1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/seq_file.h> 29 #include <linux/slab.h> 30 #include <drm/drmP.h> 31 #include <drm/radeon_drm.h> 32 #include "radeon_reg.h" 33 #include "radeon.h" 34 #include "radeon_asic.h" 35 #include "r100d.h" 36 #include "rs100d.h" 37 #include "rv200d.h" 38 #include "rv250d.h" 39 #include "atom.h" 40 41 #include <linux/firmware.h> 42 #include <linux/platform_device.h> 43 #include <linux/module.h> 44 45 #include "r100_reg_safe.h" 46 #include "rn50_reg_safe.h" 47 48 /* Firmware Names */ 49 #define FIRMWARE_R100 "radeon/R100_cp.bin" 50 #define FIRMWARE_R200 "radeon/R200_cp.bin" 51 #define FIRMWARE_R300 "radeon/R300_cp.bin" 52 #define FIRMWARE_R420 "radeon/R420_cp.bin" 53 #define FIRMWARE_RS690 "radeon/RS690_cp.bin" 54 #define FIRMWARE_RS600 "radeon/RS600_cp.bin" 55 #define FIRMWARE_R520 "radeon/R520_cp.bin" 56 57 MODULE_FIRMWARE(FIRMWARE_R100); 58 MODULE_FIRMWARE(FIRMWARE_R200); 59 MODULE_FIRMWARE(FIRMWARE_R300); 60 MODULE_FIRMWARE(FIRMWARE_R420); 61 MODULE_FIRMWARE(FIRMWARE_RS690); 62 MODULE_FIRMWARE(FIRMWARE_RS600); 63 MODULE_FIRMWARE(FIRMWARE_R520); 64 65 #include "r100_track.h" 66 67 /* This files gather functions specifics to: 68 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 69 * and others in some cases. 70 */ 71 72 static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc) 73 { 74 if (crtc == 0) { 75 if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR) 76 return true; 77 else 78 return false; 79 } else { 80 if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR) 81 return true; 82 else 83 return false; 84 } 85 } 86 87 static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc) 88 { 89 u32 vline1, vline2; 90 91 if (crtc == 0) { 92 vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 93 vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 94 } else { 95 vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 96 vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 97 } 98 if (vline1 != vline2) 99 return true; 100 else 101 return false; 102 } 103 104 /** 105 * r100_wait_for_vblank - vblank wait asic callback. 106 * 107 * @rdev: radeon_device pointer 108 * @crtc: crtc to wait for vblank on 109 * 110 * Wait for vblank on the requested crtc (r1xx-r4xx). 111 */ 112 void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) 113 { 114 unsigned i = 0; 115 116 if (crtc >= rdev->num_crtc) 117 return; 118 119 if (crtc == 0) { 120 if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN)) 121 return; 122 } else { 123 if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN)) 124 return; 125 } 126 127 /* depending on when we hit vblank, we may be close to active; if so, 128 * wait for another frame. 129 */ 130 while (r100_is_in_vblank(rdev, crtc)) { 131 if (i++ % 100 == 0) { 132 if (!r100_is_counter_moving(rdev, crtc)) 133 break; 134 } 135 } 136 137 while (!r100_is_in_vblank(rdev, crtc)) { 138 if (i++ % 100 == 0) { 139 if (!r100_is_counter_moving(rdev, crtc)) 140 break; 141 } 142 } 143 } 144 145 /** 146 * r100_pre_page_flip - pre-pageflip callback. 147 * 148 * @rdev: radeon_device pointer 149 * @crtc: crtc to prepare for pageflip on 150 * 151 * Pre-pageflip callback (r1xx-r4xx). 152 * Enables the pageflip irq (vblank irq). 153 */ 154 void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 155 { 156 /* enable the pflip int */ 157 radeon_irq_kms_pflip_irq_get(rdev, crtc); 158 } 159 160 /** 161 * r100_post_page_flip - pos-pageflip callback. 162 * 163 * @rdev: radeon_device pointer 164 * @crtc: crtc to cleanup pageflip on 165 * 166 * Post-pageflip callback (r1xx-r4xx). 167 * Disables the pageflip irq (vblank irq). 168 */ 169 void r100_post_page_flip(struct radeon_device *rdev, int crtc) 170 { 171 /* disable the pflip int */ 172 radeon_irq_kms_pflip_irq_put(rdev, crtc); 173 } 174 175 /** 176 * r100_page_flip - pageflip callback. 177 * 178 * @rdev: radeon_device pointer 179 * @crtc_id: crtc to cleanup pageflip on 180 * @crtc_base: new address of the crtc (GPU MC address) 181 * 182 * Does the actual pageflip (r1xx-r4xx). 183 * During vblank we take the crtc lock and wait for the update_pending 184 * bit to go high, when it does, we release the lock, and allow the 185 * double buffered update to take place. 186 * Returns the current update pending status. 187 */ 188 u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) 189 { 190 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; 191 u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK; 192 int i; 193 194 /* Lock the graphics update lock */ 195 /* update the scanout addresses */ 196 WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); 197 198 /* Wait for update_pending to go high. */ 199 for (i = 0; i < rdev->usec_timeout; i++) { 200 if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET) 201 break; 202 udelay(1); 203 } 204 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); 205 206 /* Unlock the lock, so double-buffering can take place inside vblank */ 207 tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK; 208 WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); 209 210 /* Return current update_pending status: */ 211 return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET; 212 } 213 214 /** 215 * r100_pm_get_dynpm_state - look up dynpm power state callback. 216 * 217 * @rdev: radeon_device pointer 218 * 219 * Look up the optimal power state based on the 220 * current state of the GPU (r1xx-r5xx). 221 * Used for dynpm only. 222 */ 223 void r100_pm_get_dynpm_state(struct radeon_device *rdev) 224 { 225 int i; 226 rdev->pm.dynpm_can_upclock = true; 227 rdev->pm.dynpm_can_downclock = true; 228 229 switch (rdev->pm.dynpm_planned_action) { 230 case DYNPM_ACTION_MINIMUM: 231 rdev->pm.requested_power_state_index = 0; 232 rdev->pm.dynpm_can_downclock = false; 233 break; 234 case DYNPM_ACTION_DOWNCLOCK: 235 if (rdev->pm.current_power_state_index == 0) { 236 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 237 rdev->pm.dynpm_can_downclock = false; 238 } else { 239 if (rdev->pm.active_crtc_count > 1) { 240 for (i = 0; i < rdev->pm.num_power_states; i++) { 241 if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) 242 continue; 243 else if (i >= rdev->pm.current_power_state_index) { 244 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 245 break; 246 } else { 247 rdev->pm.requested_power_state_index = i; 248 break; 249 } 250 } 251 } else 252 rdev->pm.requested_power_state_index = 253 rdev->pm.current_power_state_index - 1; 254 } 255 /* don't use the power state if crtcs are active and no display flag is set */ 256 if ((rdev->pm.active_crtc_count > 0) && 257 (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags & 258 RADEON_PM_MODE_NO_DISPLAY)) { 259 rdev->pm.requested_power_state_index++; 260 } 261 break; 262 case DYNPM_ACTION_UPCLOCK: 263 if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) { 264 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 265 rdev->pm.dynpm_can_upclock = false; 266 } else { 267 if (rdev->pm.active_crtc_count > 1) { 268 for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) { 269 if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) 270 continue; 271 else if (i <= rdev->pm.current_power_state_index) { 272 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 273 break; 274 } else { 275 rdev->pm.requested_power_state_index = i; 276 break; 277 } 278 } 279 } else 280 rdev->pm.requested_power_state_index = 281 rdev->pm.current_power_state_index + 1; 282 } 283 break; 284 case DYNPM_ACTION_DEFAULT: 285 rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index; 286 rdev->pm.dynpm_can_upclock = false; 287 break; 288 case DYNPM_ACTION_NONE: 289 default: 290 DRM_ERROR("Requested mode for not defined action\n"); 291 return; 292 } 293 /* only one clock mode per power state */ 294 rdev->pm.requested_clock_mode_index = 0; 295 296 DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n", 297 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 298 clock_info[rdev->pm.requested_clock_mode_index].sclk, 299 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 300 clock_info[rdev->pm.requested_clock_mode_index].mclk, 301 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 302 pcie_lanes); 303 } 304 305 /** 306 * r100_pm_init_profile - Initialize power profiles callback. 307 * 308 * @rdev: radeon_device pointer 309 * 310 * Initialize the power states used in profile mode 311 * (r1xx-r3xx). 312 * Used for profile mode only. 313 */ 314 void r100_pm_init_profile(struct radeon_device *rdev) 315 { 316 /* default */ 317 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index; 318 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 319 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0; 320 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0; 321 /* low sh */ 322 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0; 323 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0; 324 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0; 325 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0; 326 /* mid sh */ 327 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0; 328 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0; 329 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0; 330 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0; 331 /* high sh */ 332 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0; 333 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 334 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0; 335 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0; 336 /* low mh */ 337 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0; 338 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 339 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0; 340 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0; 341 /* mid mh */ 342 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0; 343 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 344 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0; 345 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0; 346 /* high mh */ 347 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0; 348 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 349 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0; 350 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0; 351 } 352 353 /** 354 * r100_pm_misc - set additional pm hw parameters callback. 355 * 356 * @rdev: radeon_device pointer 357 * 358 * Set non-clock parameters associated with a power state 359 * (voltage, pcie lanes, etc.) (r1xx-r4xx). 360 */ 361 void r100_pm_misc(struct radeon_device *rdev) 362 { 363 int requested_index = rdev->pm.requested_power_state_index; 364 struct radeon_power_state *ps = &rdev->pm.power_state[requested_index]; 365 struct radeon_voltage *voltage = &ps->clock_info[0].voltage; 366 u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl; 367 368 if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) { 369 if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) { 370 tmp = RREG32(voltage->gpio.reg); 371 if (voltage->active_high) 372 tmp |= voltage->gpio.mask; 373 else 374 tmp &= ~(voltage->gpio.mask); 375 WREG32(voltage->gpio.reg, tmp); 376 if (voltage->delay) 377 udelay(voltage->delay); 378 } else { 379 tmp = RREG32(voltage->gpio.reg); 380 if (voltage->active_high) 381 tmp &= ~voltage->gpio.mask; 382 else 383 tmp |= voltage->gpio.mask; 384 WREG32(voltage->gpio.reg, tmp); 385 if (voltage->delay) 386 udelay(voltage->delay); 387 } 388 } 389 390 sclk_cntl = RREG32_PLL(SCLK_CNTL); 391 sclk_cntl2 = RREG32_PLL(SCLK_CNTL2); 392 sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3); 393 sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL); 394 sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3); 395 if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) { 396 sclk_more_cntl |= REDUCED_SPEED_SCLK_EN; 397 if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE) 398 sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE; 399 else 400 sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE; 401 if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2) 402 sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0); 403 else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4) 404 sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2); 405 } else 406 sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN; 407 408 if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) { 409 sclk_more_cntl |= IO_CG_VOLTAGE_DROP; 410 if (voltage->delay) { 411 sclk_more_cntl |= VOLTAGE_DROP_SYNC; 412 switch (voltage->delay) { 413 case 33: 414 sclk_more_cntl |= VOLTAGE_DELAY_SEL(0); 415 break; 416 case 66: 417 sclk_more_cntl |= VOLTAGE_DELAY_SEL(1); 418 break; 419 case 99: 420 sclk_more_cntl |= VOLTAGE_DELAY_SEL(2); 421 break; 422 case 132: 423 sclk_more_cntl |= VOLTAGE_DELAY_SEL(3); 424 break; 425 } 426 } else 427 sclk_more_cntl &= ~VOLTAGE_DROP_SYNC; 428 } else 429 sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP; 430 431 if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN) 432 sclk_cntl &= ~FORCE_HDP; 433 else 434 sclk_cntl |= FORCE_HDP; 435 436 WREG32_PLL(SCLK_CNTL, sclk_cntl); 437 WREG32_PLL(SCLK_CNTL2, sclk_cntl2); 438 WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl); 439 440 /* set pcie lanes */ 441 if ((rdev->flags & RADEON_IS_PCIE) && 442 !(rdev->flags & RADEON_IS_IGP) && 443 rdev->asic->pm.set_pcie_lanes && 444 (ps->pcie_lanes != 445 rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) { 446 radeon_set_pcie_lanes(rdev, 447 ps->pcie_lanes); 448 DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes); 449 } 450 } 451 452 /** 453 * r100_pm_prepare - pre-power state change callback. 454 * 455 * @rdev: radeon_device pointer 456 * 457 * Prepare for a power state change (r1xx-r4xx). 458 */ 459 void r100_pm_prepare(struct radeon_device *rdev) 460 { 461 struct drm_device *ddev = rdev->ddev; 462 struct drm_crtc *crtc; 463 struct radeon_crtc *radeon_crtc; 464 u32 tmp; 465 466 /* disable any active CRTCs */ 467 list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { 468 radeon_crtc = to_radeon_crtc(crtc); 469 if (radeon_crtc->enabled) { 470 if (radeon_crtc->crtc_id) { 471 tmp = RREG32(RADEON_CRTC2_GEN_CNTL); 472 tmp |= RADEON_CRTC2_DISP_REQ_EN_B; 473 WREG32(RADEON_CRTC2_GEN_CNTL, tmp); 474 } else { 475 tmp = RREG32(RADEON_CRTC_GEN_CNTL); 476 tmp |= RADEON_CRTC_DISP_REQ_EN_B; 477 WREG32(RADEON_CRTC_GEN_CNTL, tmp); 478 } 479 } 480 } 481 } 482 483 /** 484 * r100_pm_finish - post-power state change callback. 485 * 486 * @rdev: radeon_device pointer 487 * 488 * Clean up after a power state change (r1xx-r4xx). 489 */ 490 void r100_pm_finish(struct radeon_device *rdev) 491 { 492 struct drm_device *ddev = rdev->ddev; 493 struct drm_crtc *crtc; 494 struct radeon_crtc *radeon_crtc; 495 u32 tmp; 496 497 /* enable any active CRTCs */ 498 list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { 499 radeon_crtc = to_radeon_crtc(crtc); 500 if (radeon_crtc->enabled) { 501 if (radeon_crtc->crtc_id) { 502 tmp = RREG32(RADEON_CRTC2_GEN_CNTL); 503 tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B; 504 WREG32(RADEON_CRTC2_GEN_CNTL, tmp); 505 } else { 506 tmp = RREG32(RADEON_CRTC_GEN_CNTL); 507 tmp &= ~RADEON_CRTC_DISP_REQ_EN_B; 508 WREG32(RADEON_CRTC_GEN_CNTL, tmp); 509 } 510 } 511 } 512 } 513 514 /** 515 * r100_gui_idle - gui idle callback. 516 * 517 * @rdev: radeon_device pointer 518 * 519 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx). 520 * Returns true if idle, false if not. 521 */ 522 bool r100_gui_idle(struct radeon_device *rdev) 523 { 524 if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE) 525 return false; 526 else 527 return true; 528 } 529 530 /* hpd for digital panel detect/disconnect */ 531 /** 532 * r100_hpd_sense - hpd sense callback. 533 * 534 * @rdev: radeon_device pointer 535 * @hpd: hpd (hotplug detect) pin 536 * 537 * Checks if a digital monitor is connected (r1xx-r4xx). 538 * Returns true if connected, false if not connected. 539 */ 540 bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) 541 { 542 bool connected = false; 543 544 switch (hpd) { 545 case RADEON_HPD_1: 546 if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE) 547 connected = true; 548 break; 549 case RADEON_HPD_2: 550 if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE) 551 connected = true; 552 break; 553 default: 554 break; 555 } 556 return connected; 557 } 558 559 /** 560 * r100_hpd_set_polarity - hpd set polarity callback. 561 * 562 * @rdev: radeon_device pointer 563 * @hpd: hpd (hotplug detect) pin 564 * 565 * Set the polarity of the hpd pin (r1xx-r4xx). 566 */ 567 void r100_hpd_set_polarity(struct radeon_device *rdev, 568 enum radeon_hpd_id hpd) 569 { 570 u32 tmp; 571 bool connected = r100_hpd_sense(rdev, hpd); 572 573 switch (hpd) { 574 case RADEON_HPD_1: 575 tmp = RREG32(RADEON_FP_GEN_CNTL); 576 if (connected) 577 tmp &= ~RADEON_FP_DETECT_INT_POL; 578 else 579 tmp |= RADEON_FP_DETECT_INT_POL; 580 WREG32(RADEON_FP_GEN_CNTL, tmp); 581 break; 582 case RADEON_HPD_2: 583 tmp = RREG32(RADEON_FP2_GEN_CNTL); 584 if (connected) 585 tmp &= ~RADEON_FP2_DETECT_INT_POL; 586 else 587 tmp |= RADEON_FP2_DETECT_INT_POL; 588 WREG32(RADEON_FP2_GEN_CNTL, tmp); 589 break; 590 default: 591 break; 592 } 593 } 594 595 /** 596 * r100_hpd_init - hpd setup callback. 597 * 598 * @rdev: radeon_device pointer 599 * 600 * Setup the hpd pins used by the card (r1xx-r4xx). 601 * Set the polarity, and enable the hpd interrupts. 602 */ 603 void r100_hpd_init(struct radeon_device *rdev) 604 { 605 struct drm_device *dev = rdev->ddev; 606 struct drm_connector *connector; 607 unsigned enable = 0; 608 609 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 610 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 611 enable |= 1 << radeon_connector->hpd.hpd; 612 radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); 613 } 614 radeon_irq_kms_enable_hpd(rdev, enable); 615 } 616 617 /** 618 * r100_hpd_fini - hpd tear down callback. 619 * 620 * @rdev: radeon_device pointer 621 * 622 * Tear down the hpd pins used by the card (r1xx-r4xx). 623 * Disable the hpd interrupts. 624 */ 625 void r100_hpd_fini(struct radeon_device *rdev) 626 { 627 struct drm_device *dev = rdev->ddev; 628 struct drm_connector *connector; 629 unsigned disable = 0; 630 631 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 632 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 633 disable |= 1 << radeon_connector->hpd.hpd; 634 } 635 radeon_irq_kms_disable_hpd(rdev, disable); 636 } 637 638 /* 639 * PCI GART 640 */ 641 void r100_pci_gart_tlb_flush(struct radeon_device *rdev) 642 { 643 /* TODO: can we do somethings here ? */ 644 /* It seems hw only cache one entry so we should discard this 645 * entry otherwise if first GPU GART read hit this entry it 646 * could end up in wrong address. */ 647 } 648 649 int r100_pci_gart_init(struct radeon_device *rdev) 650 { 651 int r; 652 653 if (rdev->gart.ptr) { 654 WARN(1, "R100 PCI GART already initialized\n"); 655 return 0; 656 } 657 /* Initialize common gart structure */ 658 r = radeon_gart_init(rdev); 659 if (r) 660 return r; 661 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; 662 rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush; 663 rdev->asic->gart.set_page = &r100_pci_gart_set_page; 664 return radeon_gart_table_ram_alloc(rdev); 665 } 666 667 int r100_pci_gart_enable(struct radeon_device *rdev) 668 { 669 uint32_t tmp; 670 671 radeon_gart_restore(rdev); 672 /* discard memory request outside of configured range */ 673 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 674 WREG32(RADEON_AIC_CNTL, tmp); 675 /* set address range for PCI address translate */ 676 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start); 677 WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end); 678 /* set PCI GART page-table base address */ 679 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr); 680 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; 681 WREG32(RADEON_AIC_CNTL, tmp); 682 r100_pci_gart_tlb_flush(rdev); 683 DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n", 684 (unsigned)(rdev->mc.gtt_size >> 20), 685 (unsigned long long)rdev->gart.table_addr); 686 rdev->gart.ready = true; 687 return 0; 688 } 689 690 void r100_pci_gart_disable(struct radeon_device *rdev) 691 { 692 uint32_t tmp; 693 694 /* discard memory request outside of configured range */ 695 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 696 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN); 697 WREG32(RADEON_AIC_LO_ADDR, 0); 698 WREG32(RADEON_AIC_HI_ADDR, 0); 699 } 700 701 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) 702 { 703 u32 *gtt = rdev->gart.ptr; 704 705 if (i < 0 || i > rdev->gart.num_gpu_pages) { 706 return -EINVAL; 707 } 708 gtt[i] = cpu_to_le32(lower_32_bits(addr)); 709 return 0; 710 } 711 712 void r100_pci_gart_fini(struct radeon_device *rdev) 713 { 714 radeon_gart_fini(rdev); 715 r100_pci_gart_disable(rdev); 716 radeon_gart_table_ram_free(rdev); 717 } 718 719 int r100_irq_set(struct radeon_device *rdev) 720 { 721 uint32_t tmp = 0; 722 723 if (!rdev->irq.installed) { 724 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 725 WREG32(R_000040_GEN_INT_CNTL, 0); 726 return -EINVAL; 727 } 728 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 729 tmp |= RADEON_SW_INT_ENABLE; 730 } 731 if (rdev->irq.crtc_vblank_int[0] || 732 atomic_read(&rdev->irq.pflip[0])) { 733 tmp |= RADEON_CRTC_VBLANK_MASK; 734 } 735 if (rdev->irq.crtc_vblank_int[1] || 736 atomic_read(&rdev->irq.pflip[1])) { 737 tmp |= RADEON_CRTC2_VBLANK_MASK; 738 } 739 if (rdev->irq.hpd[0]) { 740 tmp |= RADEON_FP_DETECT_MASK; 741 } 742 if (rdev->irq.hpd[1]) { 743 tmp |= RADEON_FP2_DETECT_MASK; 744 } 745 WREG32(RADEON_GEN_INT_CNTL, tmp); 746 return 0; 747 } 748 749 void r100_irq_disable(struct radeon_device *rdev) 750 { 751 u32 tmp; 752 753 WREG32(R_000040_GEN_INT_CNTL, 0); 754 /* Wait and acknowledge irq */ 755 mdelay(1); 756 tmp = RREG32(R_000044_GEN_INT_STATUS); 757 WREG32(R_000044_GEN_INT_STATUS, tmp); 758 } 759 760 static uint32_t r100_irq_ack(struct radeon_device *rdev) 761 { 762 uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); 763 uint32_t irq_mask = RADEON_SW_INT_TEST | 764 RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT | 765 RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT; 766 767 if (irqs) { 768 WREG32(RADEON_GEN_INT_STATUS, irqs); 769 } 770 return irqs & irq_mask; 771 } 772 773 int r100_irq_process(struct radeon_device *rdev) 774 { 775 uint32_t status, msi_rearm; 776 bool queue_hotplug = false; 777 778 status = r100_irq_ack(rdev); 779 if (!status) { 780 return IRQ_NONE; 781 } 782 if (rdev->shutdown) { 783 return IRQ_NONE; 784 } 785 while (status) { 786 /* SW interrupt */ 787 if (status & RADEON_SW_INT_TEST) { 788 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 789 } 790 /* Vertical blank interrupts */ 791 if (status & RADEON_CRTC_VBLANK_STAT) { 792 if (rdev->irq.crtc_vblank_int[0]) { 793 drm_handle_vblank(rdev->ddev, 0); 794 rdev->pm.vblank_sync = true; 795 wake_up(&rdev->irq.vblank_queue); 796 } 797 if (atomic_read(&rdev->irq.pflip[0])) 798 radeon_crtc_handle_flip(rdev, 0); 799 } 800 if (status & RADEON_CRTC2_VBLANK_STAT) { 801 if (rdev->irq.crtc_vblank_int[1]) { 802 drm_handle_vblank(rdev->ddev, 1); 803 rdev->pm.vblank_sync = true; 804 wake_up(&rdev->irq.vblank_queue); 805 } 806 if (atomic_read(&rdev->irq.pflip[1])) 807 radeon_crtc_handle_flip(rdev, 1); 808 } 809 if (status & RADEON_FP_DETECT_STAT) { 810 queue_hotplug = true; 811 DRM_DEBUG("HPD1\n"); 812 } 813 if (status & RADEON_FP2_DETECT_STAT) { 814 queue_hotplug = true; 815 DRM_DEBUG("HPD2\n"); 816 } 817 status = r100_irq_ack(rdev); 818 } 819 if (queue_hotplug) 820 schedule_work(&rdev->hotplug_work); 821 if (rdev->msi_enabled) { 822 switch (rdev->family) { 823 case CHIP_RS400: 824 case CHIP_RS480: 825 msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM; 826 WREG32(RADEON_AIC_CNTL, msi_rearm); 827 WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM); 828 break; 829 default: 830 WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN); 831 break; 832 } 833 } 834 return IRQ_HANDLED; 835 } 836 837 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) 838 { 839 if (crtc == 0) 840 return RREG32(RADEON_CRTC_CRNT_FRAME); 841 else 842 return RREG32(RADEON_CRTC2_CRNT_FRAME); 843 } 844 845 /* Who ever call radeon_fence_emit should call ring_lock and ask 846 * for enough space (today caller are ib schedule and buffer move) */ 847 void r100_fence_ring_emit(struct radeon_device *rdev, 848 struct radeon_fence *fence) 849 { 850 struct radeon_ring *ring = &rdev->ring[fence->ring]; 851 852 /* We have to make sure that caches are flushed before 853 * CPU might read something from VRAM. */ 854 radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); 855 radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL); 856 radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); 857 radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL); 858 /* Wait until IDLE & CLEAN */ 859 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 860 radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); 861 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); 862 radeon_ring_write(ring, rdev->config.r100.hdp_cntl | 863 RADEON_HDP_READ_BUFFER_INVALIDATE); 864 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); 865 radeon_ring_write(ring, rdev->config.r100.hdp_cntl); 866 /* Emit fence sequence & fire IRQ */ 867 radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); 868 radeon_ring_write(ring, fence->seq); 869 radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0)); 870 radeon_ring_write(ring, RADEON_SW_INT_FIRE); 871 } 872 873 void r100_semaphore_ring_emit(struct radeon_device *rdev, 874 struct radeon_ring *ring, 875 struct radeon_semaphore *semaphore, 876 bool emit_wait) 877 { 878 /* Unused on older asics, since we don't have semaphores or multiple rings */ 879 BUG(); 880 } 881 882 int r100_copy_blit(struct radeon_device *rdev, 883 uint64_t src_offset, 884 uint64_t dst_offset, 885 unsigned num_gpu_pages, 886 struct radeon_fence **fence) 887 { 888 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 889 uint32_t cur_pages; 890 uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE; 891 uint32_t pitch; 892 uint32_t stride_pixels; 893 unsigned ndw; 894 int num_loops; 895 int r = 0; 896 897 /* radeon limited to 16k stride */ 898 stride_bytes &= 0x3fff; 899 /* radeon pitch is /64 */ 900 pitch = stride_bytes / 64; 901 stride_pixels = stride_bytes / 4; 902 num_loops = DIV_ROUND_UP(num_gpu_pages, 8191); 903 904 /* Ask for enough room for blit + flush + fence */ 905 ndw = 64 + (10 * num_loops); 906 r = radeon_ring_lock(rdev, ring, ndw); 907 if (r) { 908 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); 909 return -EINVAL; 910 } 911 while (num_gpu_pages > 0) { 912 cur_pages = num_gpu_pages; 913 if (cur_pages > 8191) { 914 cur_pages = 8191; 915 } 916 num_gpu_pages -= cur_pages; 917 918 /* pages are in Y direction - height 919 page width in X direction - width */ 920 radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8)); 921 radeon_ring_write(ring, 922 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 923 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 924 RADEON_GMC_SRC_CLIPPING | 925 RADEON_GMC_DST_CLIPPING | 926 RADEON_GMC_BRUSH_NONE | 927 (RADEON_COLOR_FORMAT_ARGB8888 << 8) | 928 RADEON_GMC_SRC_DATATYPE_COLOR | 929 RADEON_ROP3_S | 930 RADEON_DP_SRC_SOURCE_MEMORY | 931 RADEON_GMC_CLR_CMP_CNTL_DIS | 932 RADEON_GMC_WR_MSK_DIS); 933 radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10)); 934 radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10)); 935 radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); 936 radeon_ring_write(ring, 0); 937 radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); 938 radeon_ring_write(ring, num_gpu_pages); 939 radeon_ring_write(ring, num_gpu_pages); 940 radeon_ring_write(ring, cur_pages | (stride_pixels << 16)); 941 } 942 radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); 943 radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL); 944 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 945 radeon_ring_write(ring, 946 RADEON_WAIT_2D_IDLECLEAN | 947 RADEON_WAIT_HOST_IDLECLEAN | 948 RADEON_WAIT_DMA_GUI_IDLE); 949 if (fence) { 950 r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); 951 } 952 radeon_ring_unlock_commit(rdev, ring); 953 return r; 954 } 955 956 static int r100_cp_wait_for_idle(struct radeon_device *rdev) 957 { 958 unsigned i; 959 u32 tmp; 960 961 for (i = 0; i < rdev->usec_timeout; i++) { 962 tmp = RREG32(R_000E40_RBBM_STATUS); 963 if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) { 964 return 0; 965 } 966 udelay(1); 967 } 968 return -1; 969 } 970 971 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) 972 { 973 int r; 974 975 r = radeon_ring_lock(rdev, ring, 2); 976 if (r) { 977 return; 978 } 979 radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0)); 980 radeon_ring_write(ring, 981 RADEON_ISYNC_ANY2D_IDLE3D | 982 RADEON_ISYNC_ANY3D_IDLE2D | 983 RADEON_ISYNC_WAIT_IDLEGUI | 984 RADEON_ISYNC_CPSCRATCH_IDLEGUI); 985 radeon_ring_unlock_commit(rdev, ring); 986 } 987 988 989 /* Load the microcode for the CP */ 990 static int r100_cp_init_microcode(struct radeon_device *rdev) 991 { 992 struct platform_device *pdev; 993 const char *fw_name = NULL; 994 int err; 995 996 DRM_DEBUG_KMS("\n"); 997 998 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); 999 err = IS_ERR(pdev); 1000 if (err) { 1001 printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); 1002 return -EINVAL; 1003 } 1004 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || 1005 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || 1006 (rdev->family == CHIP_RS200)) { 1007 DRM_INFO("Loading R100 Microcode\n"); 1008 fw_name = FIRMWARE_R100; 1009 } else if ((rdev->family == CHIP_R200) || 1010 (rdev->family == CHIP_RV250) || 1011 (rdev->family == CHIP_RV280) || 1012 (rdev->family == CHIP_RS300)) { 1013 DRM_INFO("Loading R200 Microcode\n"); 1014 fw_name = FIRMWARE_R200; 1015 } else if ((rdev->family == CHIP_R300) || 1016 (rdev->family == CHIP_R350) || 1017 (rdev->family == CHIP_RV350) || 1018 (rdev->family == CHIP_RV380) || 1019 (rdev->family == CHIP_RS400) || 1020 (rdev->family == CHIP_RS480)) { 1021 DRM_INFO("Loading R300 Microcode\n"); 1022 fw_name = FIRMWARE_R300; 1023 } else if ((rdev->family == CHIP_R420) || 1024 (rdev->family == CHIP_R423) || 1025 (rdev->family == CHIP_RV410)) { 1026 DRM_INFO("Loading R400 Microcode\n"); 1027 fw_name = FIRMWARE_R420; 1028 } else if ((rdev->family == CHIP_RS690) || 1029 (rdev->family == CHIP_RS740)) { 1030 DRM_INFO("Loading RS690/RS740 Microcode\n"); 1031 fw_name = FIRMWARE_RS690; 1032 } else if (rdev->family == CHIP_RS600) { 1033 DRM_INFO("Loading RS600 Microcode\n"); 1034 fw_name = FIRMWARE_RS600; 1035 } else if ((rdev->family == CHIP_RV515) || 1036 (rdev->family == CHIP_R520) || 1037 (rdev->family == CHIP_RV530) || 1038 (rdev->family == CHIP_R580) || 1039 (rdev->family == CHIP_RV560) || 1040 (rdev->family == CHIP_RV570)) { 1041 DRM_INFO("Loading R500 Microcode\n"); 1042 fw_name = FIRMWARE_R520; 1043 } 1044 1045 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); 1046 platform_device_unregister(pdev); 1047 if (err) { 1048 printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", 1049 fw_name); 1050 } else if (rdev->me_fw->size % 8) { 1051 printk(KERN_ERR 1052 "radeon_cp: Bogus length %zu in firmware \"%s\"\n", 1053 rdev->me_fw->size, fw_name); 1054 err = -EINVAL; 1055 release_firmware(rdev->me_fw); 1056 rdev->me_fw = NULL; 1057 } 1058 return err; 1059 } 1060 1061 static void r100_cp_load_microcode(struct radeon_device *rdev) 1062 { 1063 const __be32 *fw_data; 1064 int i, size; 1065 1066 if (r100_gui_wait_for_idle(rdev)) { 1067 printk(KERN_WARNING "Failed to wait GUI idle while " 1068 "programming pipes. Bad things might happen.\n"); 1069 } 1070 1071 if (rdev->me_fw) { 1072 size = rdev->me_fw->size / 4; 1073 fw_data = (const __be32 *)&rdev->me_fw->data[0]; 1074 WREG32(RADEON_CP_ME_RAM_ADDR, 0); 1075 for (i = 0; i < size; i += 2) { 1076 WREG32(RADEON_CP_ME_RAM_DATAH, 1077 be32_to_cpup(&fw_data[i])); 1078 WREG32(RADEON_CP_ME_RAM_DATAL, 1079 be32_to_cpup(&fw_data[i + 1])); 1080 } 1081 } 1082 } 1083 1084 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) 1085 { 1086 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1087 unsigned rb_bufsz; 1088 unsigned rb_blksz; 1089 unsigned max_fetch; 1090 unsigned pre_write_timer; 1091 unsigned pre_write_limit; 1092 unsigned indirect2_start; 1093 unsigned indirect1_start; 1094 uint32_t tmp; 1095 int r; 1096 1097 if (r100_debugfs_cp_init(rdev)) { 1098 DRM_ERROR("Failed to register debugfs file for CP !\n"); 1099 } 1100 if (!rdev->me_fw) { 1101 r = r100_cp_init_microcode(rdev); 1102 if (r) { 1103 DRM_ERROR("Failed to load firmware!\n"); 1104 return r; 1105 } 1106 } 1107 1108 /* Align ring size */ 1109 rb_bufsz = drm_order(ring_size / 8); 1110 ring_size = (1 << (rb_bufsz + 1)) * 4; 1111 r100_cp_load_microcode(rdev); 1112 r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET, 1113 RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR, 1114 0, 0x7fffff, RADEON_CP_PACKET2); 1115 if (r) { 1116 return r; 1117 } 1118 /* Each time the cp read 1024 bytes (16 dword/quadword) update 1119 * the rptr copy in system ram */ 1120 rb_blksz = 9; 1121 /* cp will read 128bytes at a time (4 dwords) */ 1122 max_fetch = 1; 1123 ring->align_mask = 16 - 1; 1124 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ 1125 pre_write_timer = 64; 1126 /* Force CP_RB_WPTR write if written more than one time before the 1127 * delay expire 1128 */ 1129 pre_write_limit = 0; 1130 /* Setup the cp cache like this (cache size is 96 dwords) : 1131 * RING 0 to 15 1132 * INDIRECT1 16 to 79 1133 * INDIRECT2 80 to 95 1134 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 1135 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) 1136 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 1137 * Idea being that most of the gpu cmd will be through indirect1 buffer 1138 * so it gets the bigger cache. 1139 */ 1140 indirect2_start = 80; 1141 indirect1_start = 16; 1142 /* cp setup */ 1143 WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); 1144 tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | 1145 REG_SET(RADEON_RB_BLKSZ, rb_blksz) | 1146 REG_SET(RADEON_MAX_FETCH, max_fetch)); 1147 #ifdef __BIG_ENDIAN 1148 tmp |= RADEON_BUF_SWAP_32BIT; 1149 #endif 1150 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE); 1151 1152 /* Set ring address */ 1153 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr); 1154 WREG32(RADEON_CP_RB_BASE, ring->gpu_addr); 1155 /* Force read & write ptr to 0 */ 1156 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE); 1157 WREG32(RADEON_CP_RB_RPTR_WR, 0); 1158 ring->wptr = 0; 1159 WREG32(RADEON_CP_RB_WPTR, ring->wptr); 1160 1161 /* set the wb address whether it's enabled or not */ 1162 WREG32(R_00070C_CP_RB_RPTR_ADDR, 1163 S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2)); 1164 WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET); 1165 1166 if (rdev->wb.enabled) 1167 WREG32(R_000770_SCRATCH_UMSK, 0xff); 1168 else { 1169 tmp |= RADEON_RB_NO_UPDATE; 1170 WREG32(R_000770_SCRATCH_UMSK, 0); 1171 } 1172 1173 WREG32(RADEON_CP_RB_CNTL, tmp); 1174 udelay(10); 1175 ring->rptr = RREG32(RADEON_CP_RB_RPTR); 1176 /* Set cp mode to bus mastering & enable cp*/ 1177 WREG32(RADEON_CP_CSQ_MODE, 1178 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | 1179 REG_SET(RADEON_INDIRECT1_START, indirect1_start)); 1180 WREG32(RADEON_CP_RB_WPTR_DELAY, 0); 1181 WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D); 1182 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); 1183 1184 /* at this point everything should be setup correctly to enable master */ 1185 pci_set_master(rdev->pdev); 1186 1187 radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1188 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); 1189 if (r) { 1190 DRM_ERROR("radeon: cp isn't working (%d).\n", r); 1191 return r; 1192 } 1193 ring->ready = true; 1194 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 1195 1196 if (!ring->rptr_save_reg /* not resuming from suspend */ 1197 && radeon_ring_supports_scratch_reg(rdev, ring)) { 1198 r = radeon_scratch_get(rdev, &ring->rptr_save_reg); 1199 if (r) { 1200 DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r); 1201 ring->rptr_save_reg = 0; 1202 } 1203 } 1204 return 0; 1205 } 1206 1207 void r100_cp_fini(struct radeon_device *rdev) 1208 { 1209 if (r100_cp_wait_for_idle(rdev)) { 1210 DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n"); 1211 } 1212 /* Disable ring */ 1213 r100_cp_disable(rdev); 1214 radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg); 1215 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1216 DRM_INFO("radeon: cp finalized\n"); 1217 } 1218 1219 void r100_cp_disable(struct radeon_device *rdev) 1220 { 1221 /* Disable ring */ 1222 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1223 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1224 WREG32(RADEON_CP_CSQ_MODE, 0); 1225 WREG32(RADEON_CP_CSQ_CNTL, 0); 1226 WREG32(R_000770_SCRATCH_UMSK, 0); 1227 if (r100_gui_wait_for_idle(rdev)) { 1228 printk(KERN_WARNING "Failed to wait GUI idle while " 1229 "programming pipes. Bad things might happen.\n"); 1230 } 1231 } 1232 1233 /* 1234 * CS functions 1235 */ 1236 int r100_reloc_pitch_offset(struct radeon_cs_parser *p, 1237 struct radeon_cs_packet *pkt, 1238 unsigned idx, 1239 unsigned reg) 1240 { 1241 int r; 1242 u32 tile_flags = 0; 1243 u32 tmp; 1244 struct radeon_cs_reloc *reloc; 1245 u32 value; 1246 1247 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1248 if (r) { 1249 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1250 idx, reg); 1251 radeon_cs_dump_packet(p, pkt); 1252 return r; 1253 } 1254 1255 value = radeon_get_ib_value(p, idx); 1256 tmp = value & 0x003fffff; 1257 tmp += (((u32)reloc->lobj.gpu_offset) >> 10); 1258 1259 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1260 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1261 tile_flags |= RADEON_DST_TILE_MACRO; 1262 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 1263 if (reg == RADEON_SRC_PITCH_OFFSET) { 1264 DRM_ERROR("Cannot src blit from microtiled surface\n"); 1265 radeon_cs_dump_packet(p, pkt); 1266 return -EINVAL; 1267 } 1268 tile_flags |= RADEON_DST_TILE_MICRO; 1269 } 1270 1271 tmp |= tile_flags; 1272 p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; 1273 } else 1274 p->ib.ptr[idx] = (value & 0xffc00000) | tmp; 1275 return 0; 1276 } 1277 1278 int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, 1279 struct radeon_cs_packet *pkt, 1280 int idx) 1281 { 1282 unsigned c, i; 1283 struct radeon_cs_reloc *reloc; 1284 struct r100_cs_track *track; 1285 int r = 0; 1286 volatile uint32_t *ib; 1287 u32 idx_value; 1288 1289 ib = p->ib.ptr; 1290 track = (struct r100_cs_track *)p->track; 1291 c = radeon_get_ib_value(p, idx++) & 0x1F; 1292 if (c > 16) { 1293 DRM_ERROR("Only 16 vertex buffers are allowed %d\n", 1294 pkt->opcode); 1295 radeon_cs_dump_packet(p, pkt); 1296 return -EINVAL; 1297 } 1298 track->num_arrays = c; 1299 for (i = 0; i < (c - 1); i+=2, idx+=3) { 1300 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1301 if (r) { 1302 DRM_ERROR("No reloc for packet3 %d\n", 1303 pkt->opcode); 1304 radeon_cs_dump_packet(p, pkt); 1305 return r; 1306 } 1307 idx_value = radeon_get_ib_value(p, idx); 1308 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1309 1310 track->arrays[i + 0].esize = idx_value >> 8; 1311 track->arrays[i + 0].robj = reloc->robj; 1312 track->arrays[i + 0].esize &= 0x7F; 1313 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1314 if (r) { 1315 DRM_ERROR("No reloc for packet3 %d\n", 1316 pkt->opcode); 1317 radeon_cs_dump_packet(p, pkt); 1318 return r; 1319 } 1320 ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); 1321 track->arrays[i + 1].robj = reloc->robj; 1322 track->arrays[i + 1].esize = idx_value >> 24; 1323 track->arrays[i + 1].esize &= 0x7F; 1324 } 1325 if (c & 1) { 1326 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1327 if (r) { 1328 DRM_ERROR("No reloc for packet3 %d\n", 1329 pkt->opcode); 1330 radeon_cs_dump_packet(p, pkt); 1331 return r; 1332 } 1333 idx_value = radeon_get_ib_value(p, idx); 1334 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1335 track->arrays[i + 0].robj = reloc->robj; 1336 track->arrays[i + 0].esize = idx_value >> 8; 1337 track->arrays[i + 0].esize &= 0x7F; 1338 } 1339 return r; 1340 } 1341 1342 int r100_cs_parse_packet0(struct radeon_cs_parser *p, 1343 struct radeon_cs_packet *pkt, 1344 const unsigned *auth, unsigned n, 1345 radeon_packet0_check_t check) 1346 { 1347 unsigned reg; 1348 unsigned i, j, m; 1349 unsigned idx; 1350 int r; 1351 1352 idx = pkt->idx + 1; 1353 reg = pkt->reg; 1354 /* Check that register fall into register range 1355 * determined by the number of entry (n) in the 1356 * safe register bitmap. 1357 */ 1358 if (pkt->one_reg_wr) { 1359 if ((reg >> 7) > n) { 1360 return -EINVAL; 1361 } 1362 } else { 1363 if (((reg + (pkt->count << 2)) >> 7) > n) { 1364 return -EINVAL; 1365 } 1366 } 1367 for (i = 0; i <= pkt->count; i++, idx++) { 1368 j = (reg >> 7); 1369 m = 1 << ((reg >> 2) & 31); 1370 if (auth[j] & m) { 1371 r = check(p, pkt, idx, reg); 1372 if (r) { 1373 return r; 1374 } 1375 } 1376 if (pkt->one_reg_wr) { 1377 if (!(auth[j] & m)) { 1378 break; 1379 } 1380 } else { 1381 reg += 4; 1382 } 1383 } 1384 return 0; 1385 } 1386 1387 /** 1388 * r100_cs_packet_next_vline() - parse userspace VLINE packet 1389 * @parser: parser structure holding parsing context. 1390 * 1391 * Userspace sends a special sequence for VLINE waits. 1392 * PACKET0 - VLINE_START_END + value 1393 * PACKET0 - WAIT_UNTIL +_value 1394 * RELOC (P3) - crtc_id in reloc. 1395 * 1396 * This function parses this and relocates the VLINE START END 1397 * and WAIT UNTIL packets to the correct crtc. 1398 * It also detects a switched off crtc and nulls out the 1399 * wait in that case. 1400 */ 1401 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) 1402 { 1403 struct drm_mode_object *obj; 1404 struct drm_crtc *crtc; 1405 struct radeon_crtc *radeon_crtc; 1406 struct radeon_cs_packet p3reloc, waitreloc; 1407 int crtc_id; 1408 int r; 1409 uint32_t header, h_idx, reg; 1410 volatile uint32_t *ib; 1411 1412 ib = p->ib.ptr; 1413 1414 /* parse the wait until */ 1415 r = radeon_cs_packet_parse(p, &waitreloc, p->idx); 1416 if (r) 1417 return r; 1418 1419 /* check its a wait until and only 1 count */ 1420 if (waitreloc.reg != RADEON_WAIT_UNTIL || 1421 waitreloc.count != 0) { 1422 DRM_ERROR("vline wait had illegal wait until segment\n"); 1423 return -EINVAL; 1424 } 1425 1426 if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) { 1427 DRM_ERROR("vline wait had illegal wait until\n"); 1428 return -EINVAL; 1429 } 1430 1431 /* jump over the NOP */ 1432 r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2); 1433 if (r) 1434 return r; 1435 1436 h_idx = p->idx - 2; 1437 p->idx += waitreloc.count + 2; 1438 p->idx += p3reloc.count + 2; 1439 1440 header = radeon_get_ib_value(p, h_idx); 1441 crtc_id = radeon_get_ib_value(p, h_idx + 5); 1442 reg = R100_CP_PACKET0_GET_REG(header); 1443 obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); 1444 if (!obj) { 1445 DRM_ERROR("cannot find crtc %d\n", crtc_id); 1446 return -EINVAL; 1447 } 1448 crtc = obj_to_crtc(obj); 1449 radeon_crtc = to_radeon_crtc(crtc); 1450 crtc_id = radeon_crtc->crtc_id; 1451 1452 if (!crtc->enabled) { 1453 /* if the CRTC isn't enabled - we need to nop out the wait until */ 1454 ib[h_idx + 2] = PACKET2(0); 1455 ib[h_idx + 3] = PACKET2(0); 1456 } else if (crtc_id == 1) { 1457 switch (reg) { 1458 case AVIVO_D1MODE_VLINE_START_END: 1459 header &= ~R300_CP_PACKET0_REG_MASK; 1460 header |= AVIVO_D2MODE_VLINE_START_END >> 2; 1461 break; 1462 case RADEON_CRTC_GUI_TRIG_VLINE: 1463 header &= ~R300_CP_PACKET0_REG_MASK; 1464 header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; 1465 break; 1466 default: 1467 DRM_ERROR("unknown crtc reloc\n"); 1468 return -EINVAL; 1469 } 1470 ib[h_idx] = header; 1471 ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; 1472 } 1473 1474 return 0; 1475 } 1476 1477 static int r100_get_vtx_size(uint32_t vtx_fmt) 1478 { 1479 int vtx_size; 1480 vtx_size = 2; 1481 /* ordered according to bits in spec */ 1482 if (vtx_fmt & RADEON_SE_VTX_FMT_W0) 1483 vtx_size++; 1484 if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR) 1485 vtx_size += 3; 1486 if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA) 1487 vtx_size++; 1488 if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR) 1489 vtx_size++; 1490 if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC) 1491 vtx_size += 3; 1492 if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG) 1493 vtx_size++; 1494 if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC) 1495 vtx_size++; 1496 if (vtx_fmt & RADEON_SE_VTX_FMT_ST0) 1497 vtx_size += 2; 1498 if (vtx_fmt & RADEON_SE_VTX_FMT_ST1) 1499 vtx_size += 2; 1500 if (vtx_fmt & RADEON_SE_VTX_FMT_Q1) 1501 vtx_size++; 1502 if (vtx_fmt & RADEON_SE_VTX_FMT_ST2) 1503 vtx_size += 2; 1504 if (vtx_fmt & RADEON_SE_VTX_FMT_Q2) 1505 vtx_size++; 1506 if (vtx_fmt & RADEON_SE_VTX_FMT_ST3) 1507 vtx_size += 2; 1508 if (vtx_fmt & RADEON_SE_VTX_FMT_Q3) 1509 vtx_size++; 1510 if (vtx_fmt & RADEON_SE_VTX_FMT_Q0) 1511 vtx_size++; 1512 /* blend weight */ 1513 if (vtx_fmt & (0x7 << 15)) 1514 vtx_size += (vtx_fmt >> 15) & 0x7; 1515 if (vtx_fmt & RADEON_SE_VTX_FMT_N0) 1516 vtx_size += 3; 1517 if (vtx_fmt & RADEON_SE_VTX_FMT_XY1) 1518 vtx_size += 2; 1519 if (vtx_fmt & RADEON_SE_VTX_FMT_Z1) 1520 vtx_size++; 1521 if (vtx_fmt & RADEON_SE_VTX_FMT_W1) 1522 vtx_size++; 1523 if (vtx_fmt & RADEON_SE_VTX_FMT_N1) 1524 vtx_size++; 1525 if (vtx_fmt & RADEON_SE_VTX_FMT_Z) 1526 vtx_size++; 1527 return vtx_size; 1528 } 1529 1530 static int r100_packet0_check(struct radeon_cs_parser *p, 1531 struct radeon_cs_packet *pkt, 1532 unsigned idx, unsigned reg) 1533 { 1534 struct radeon_cs_reloc *reloc; 1535 struct r100_cs_track *track; 1536 volatile uint32_t *ib; 1537 uint32_t tmp; 1538 int r; 1539 int i, face; 1540 u32 tile_flags = 0; 1541 u32 idx_value; 1542 1543 ib = p->ib.ptr; 1544 track = (struct r100_cs_track *)p->track; 1545 1546 idx_value = radeon_get_ib_value(p, idx); 1547 1548 switch (reg) { 1549 case RADEON_CRTC_GUI_TRIG_VLINE: 1550 r = r100_cs_packet_parse_vline(p); 1551 if (r) { 1552 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1553 idx, reg); 1554 radeon_cs_dump_packet(p, pkt); 1555 return r; 1556 } 1557 break; 1558 /* FIXME: only allow PACKET3 blit? easier to check for out of 1559 * range access */ 1560 case RADEON_DST_PITCH_OFFSET: 1561 case RADEON_SRC_PITCH_OFFSET: 1562 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 1563 if (r) 1564 return r; 1565 break; 1566 case RADEON_RB3D_DEPTHOFFSET: 1567 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1568 if (r) { 1569 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1570 idx, reg); 1571 radeon_cs_dump_packet(p, pkt); 1572 return r; 1573 } 1574 track->zb.robj = reloc->robj; 1575 track->zb.offset = idx_value; 1576 track->zb_dirty = true; 1577 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1578 break; 1579 case RADEON_RB3D_COLOROFFSET: 1580 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1581 if (r) { 1582 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1583 idx, reg); 1584 radeon_cs_dump_packet(p, pkt); 1585 return r; 1586 } 1587 track->cb[0].robj = reloc->robj; 1588 track->cb[0].offset = idx_value; 1589 track->cb_dirty = true; 1590 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1591 break; 1592 case RADEON_PP_TXOFFSET_0: 1593 case RADEON_PP_TXOFFSET_1: 1594 case RADEON_PP_TXOFFSET_2: 1595 i = (reg - RADEON_PP_TXOFFSET_0) / 24; 1596 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1597 if (r) { 1598 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1599 idx, reg); 1600 radeon_cs_dump_packet(p, pkt); 1601 return r; 1602 } 1603 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1604 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1605 tile_flags |= RADEON_TXO_MACRO_TILE; 1606 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1607 tile_flags |= RADEON_TXO_MICRO_TILE_X2; 1608 1609 tmp = idx_value & ~(0x7 << 2); 1610 tmp |= tile_flags; 1611 ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); 1612 } else 1613 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1614 track->textures[i].robj = reloc->robj; 1615 track->tex_dirty = true; 1616 break; 1617 case RADEON_PP_CUBIC_OFFSET_T0_0: 1618 case RADEON_PP_CUBIC_OFFSET_T0_1: 1619 case RADEON_PP_CUBIC_OFFSET_T0_2: 1620 case RADEON_PP_CUBIC_OFFSET_T0_3: 1621 case RADEON_PP_CUBIC_OFFSET_T0_4: 1622 i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; 1623 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1624 if (r) { 1625 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1626 idx, reg); 1627 radeon_cs_dump_packet(p, pkt); 1628 return r; 1629 } 1630 track->textures[0].cube_info[i].offset = idx_value; 1631 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1632 track->textures[0].cube_info[i].robj = reloc->robj; 1633 track->tex_dirty = true; 1634 break; 1635 case RADEON_PP_CUBIC_OFFSET_T1_0: 1636 case RADEON_PP_CUBIC_OFFSET_T1_1: 1637 case RADEON_PP_CUBIC_OFFSET_T1_2: 1638 case RADEON_PP_CUBIC_OFFSET_T1_3: 1639 case RADEON_PP_CUBIC_OFFSET_T1_4: 1640 i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; 1641 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1642 if (r) { 1643 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1644 idx, reg); 1645 radeon_cs_dump_packet(p, pkt); 1646 return r; 1647 } 1648 track->textures[1].cube_info[i].offset = idx_value; 1649 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1650 track->textures[1].cube_info[i].robj = reloc->robj; 1651 track->tex_dirty = true; 1652 break; 1653 case RADEON_PP_CUBIC_OFFSET_T2_0: 1654 case RADEON_PP_CUBIC_OFFSET_T2_1: 1655 case RADEON_PP_CUBIC_OFFSET_T2_2: 1656 case RADEON_PP_CUBIC_OFFSET_T2_3: 1657 case RADEON_PP_CUBIC_OFFSET_T2_4: 1658 i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; 1659 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1660 if (r) { 1661 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1662 idx, reg); 1663 radeon_cs_dump_packet(p, pkt); 1664 return r; 1665 } 1666 track->textures[2].cube_info[i].offset = idx_value; 1667 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1668 track->textures[2].cube_info[i].robj = reloc->robj; 1669 track->tex_dirty = true; 1670 break; 1671 case RADEON_RE_WIDTH_HEIGHT: 1672 track->maxy = ((idx_value >> 16) & 0x7FF); 1673 track->cb_dirty = true; 1674 track->zb_dirty = true; 1675 break; 1676 case RADEON_RB3D_COLORPITCH: 1677 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1678 if (r) { 1679 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1680 idx, reg); 1681 radeon_cs_dump_packet(p, pkt); 1682 return r; 1683 } 1684 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1685 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1686 tile_flags |= RADEON_COLOR_TILE_ENABLE; 1687 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1688 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 1689 1690 tmp = idx_value & ~(0x7 << 16); 1691 tmp |= tile_flags; 1692 ib[idx] = tmp; 1693 } else 1694 ib[idx] = idx_value; 1695 1696 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 1697 track->cb_dirty = true; 1698 break; 1699 case RADEON_RB3D_DEPTHPITCH: 1700 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 1701 track->zb_dirty = true; 1702 break; 1703 case RADEON_RB3D_CNTL: 1704 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 1705 case 7: 1706 case 8: 1707 case 9: 1708 case 11: 1709 case 12: 1710 track->cb[0].cpp = 1; 1711 break; 1712 case 3: 1713 case 4: 1714 case 15: 1715 track->cb[0].cpp = 2; 1716 break; 1717 case 6: 1718 track->cb[0].cpp = 4; 1719 break; 1720 default: 1721 DRM_ERROR("Invalid color buffer format (%d) !\n", 1722 ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 1723 return -EINVAL; 1724 } 1725 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 1726 track->cb_dirty = true; 1727 track->zb_dirty = true; 1728 break; 1729 case RADEON_RB3D_ZSTENCILCNTL: 1730 switch (idx_value & 0xf) { 1731 case 0: 1732 track->zb.cpp = 2; 1733 break; 1734 case 2: 1735 case 3: 1736 case 4: 1737 case 5: 1738 case 9: 1739 case 11: 1740 track->zb.cpp = 4; 1741 break; 1742 default: 1743 break; 1744 } 1745 track->zb_dirty = true; 1746 break; 1747 case RADEON_RB3D_ZPASS_ADDR: 1748 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1749 if (r) { 1750 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1751 idx, reg); 1752 radeon_cs_dump_packet(p, pkt); 1753 return r; 1754 } 1755 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1756 break; 1757 case RADEON_PP_CNTL: 1758 { 1759 uint32_t temp = idx_value >> 4; 1760 for (i = 0; i < track->num_texture; i++) 1761 track->textures[i].enabled = !!(temp & (1 << i)); 1762 track->tex_dirty = true; 1763 } 1764 break; 1765 case RADEON_SE_VF_CNTL: 1766 track->vap_vf_cntl = idx_value; 1767 break; 1768 case RADEON_SE_VTX_FMT: 1769 track->vtx_size = r100_get_vtx_size(idx_value); 1770 break; 1771 case RADEON_PP_TEX_SIZE_0: 1772 case RADEON_PP_TEX_SIZE_1: 1773 case RADEON_PP_TEX_SIZE_2: 1774 i = (reg - RADEON_PP_TEX_SIZE_0) / 8; 1775 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 1776 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 1777 track->tex_dirty = true; 1778 break; 1779 case RADEON_PP_TEX_PITCH_0: 1780 case RADEON_PP_TEX_PITCH_1: 1781 case RADEON_PP_TEX_PITCH_2: 1782 i = (reg - RADEON_PP_TEX_PITCH_0) / 8; 1783 track->textures[i].pitch = idx_value + 32; 1784 track->tex_dirty = true; 1785 break; 1786 case RADEON_PP_TXFILTER_0: 1787 case RADEON_PP_TXFILTER_1: 1788 case RADEON_PP_TXFILTER_2: 1789 i = (reg - RADEON_PP_TXFILTER_0) / 24; 1790 track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK) 1791 >> RADEON_MAX_MIP_LEVEL_SHIFT); 1792 tmp = (idx_value >> 23) & 0x7; 1793 if (tmp == 2 || tmp == 6) 1794 track->textures[i].roundup_w = false; 1795 tmp = (idx_value >> 27) & 0x7; 1796 if (tmp == 2 || tmp == 6) 1797 track->textures[i].roundup_h = false; 1798 track->tex_dirty = true; 1799 break; 1800 case RADEON_PP_TXFORMAT_0: 1801 case RADEON_PP_TXFORMAT_1: 1802 case RADEON_PP_TXFORMAT_2: 1803 i = (reg - RADEON_PP_TXFORMAT_0) / 24; 1804 if (idx_value & RADEON_TXFORMAT_NON_POWER2) { 1805 track->textures[i].use_pitch = 1; 1806 } else { 1807 track->textures[i].use_pitch = 0; 1808 track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); 1809 track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); 1810 } 1811 if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) 1812 track->textures[i].tex_coord_type = 2; 1813 switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { 1814 case RADEON_TXFORMAT_I8: 1815 case RADEON_TXFORMAT_RGB332: 1816 case RADEON_TXFORMAT_Y8: 1817 track->textures[i].cpp = 1; 1818 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1819 break; 1820 case RADEON_TXFORMAT_AI88: 1821 case RADEON_TXFORMAT_ARGB1555: 1822 case RADEON_TXFORMAT_RGB565: 1823 case RADEON_TXFORMAT_ARGB4444: 1824 case RADEON_TXFORMAT_VYUY422: 1825 case RADEON_TXFORMAT_YVYU422: 1826 case RADEON_TXFORMAT_SHADOW16: 1827 case RADEON_TXFORMAT_LDUDV655: 1828 case RADEON_TXFORMAT_DUDV88: 1829 track->textures[i].cpp = 2; 1830 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1831 break; 1832 case RADEON_TXFORMAT_ARGB8888: 1833 case RADEON_TXFORMAT_RGBA8888: 1834 case RADEON_TXFORMAT_SHADOW32: 1835 case RADEON_TXFORMAT_LDUDUV8888: 1836 track->textures[i].cpp = 4; 1837 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1838 break; 1839 case RADEON_TXFORMAT_DXT1: 1840 track->textures[i].cpp = 1; 1841 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 1842 break; 1843 case RADEON_TXFORMAT_DXT23: 1844 case RADEON_TXFORMAT_DXT45: 1845 track->textures[i].cpp = 1; 1846 track->textures[i].compress_format = R100_TRACK_COMP_DXT35; 1847 break; 1848 } 1849 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 1850 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 1851 track->tex_dirty = true; 1852 break; 1853 case RADEON_PP_CUBIC_FACES_0: 1854 case RADEON_PP_CUBIC_FACES_1: 1855 case RADEON_PP_CUBIC_FACES_2: 1856 tmp = idx_value; 1857 i = (reg - RADEON_PP_CUBIC_FACES_0) / 4; 1858 for (face = 0; face < 4; face++) { 1859 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 1860 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 1861 } 1862 track->tex_dirty = true; 1863 break; 1864 default: 1865 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 1866 reg, idx); 1867 return -EINVAL; 1868 } 1869 return 0; 1870 } 1871 1872 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, 1873 struct radeon_cs_packet *pkt, 1874 struct radeon_bo *robj) 1875 { 1876 unsigned idx; 1877 u32 value; 1878 idx = pkt->idx + 1; 1879 value = radeon_get_ib_value(p, idx + 2); 1880 if ((value + 1) > radeon_bo_size(robj)) { 1881 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " 1882 "(need %u have %lu) !\n", 1883 value + 1, 1884 radeon_bo_size(robj)); 1885 return -EINVAL; 1886 } 1887 return 0; 1888 } 1889 1890 static int r100_packet3_check(struct radeon_cs_parser *p, 1891 struct radeon_cs_packet *pkt) 1892 { 1893 struct radeon_cs_reloc *reloc; 1894 struct r100_cs_track *track; 1895 unsigned idx; 1896 volatile uint32_t *ib; 1897 int r; 1898 1899 ib = p->ib.ptr; 1900 idx = pkt->idx + 1; 1901 track = (struct r100_cs_track *)p->track; 1902 switch (pkt->opcode) { 1903 case PACKET3_3D_LOAD_VBPNTR: 1904 r = r100_packet3_load_vbpntr(p, pkt, idx); 1905 if (r) 1906 return r; 1907 break; 1908 case PACKET3_INDX_BUFFER: 1909 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1910 if (r) { 1911 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1912 radeon_cs_dump_packet(p, pkt); 1913 return r; 1914 } 1915 ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); 1916 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 1917 if (r) { 1918 return r; 1919 } 1920 break; 1921 case 0x23: 1922 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ 1923 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1924 if (r) { 1925 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1926 radeon_cs_dump_packet(p, pkt); 1927 return r; 1928 } 1929 ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); 1930 track->num_arrays = 1; 1931 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); 1932 1933 track->arrays[0].robj = reloc->robj; 1934 track->arrays[0].esize = track->vtx_size; 1935 1936 track->max_indx = radeon_get_ib_value(p, idx+1); 1937 1938 track->vap_vf_cntl = radeon_get_ib_value(p, idx+3); 1939 track->immd_dwords = pkt->count - 1; 1940 r = r100_cs_track_check(p->rdev, track); 1941 if (r) 1942 return r; 1943 break; 1944 case PACKET3_3D_DRAW_IMMD: 1945 if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { 1946 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1947 return -EINVAL; 1948 } 1949 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0)); 1950 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1951 track->immd_dwords = pkt->count - 1; 1952 r = r100_cs_track_check(p->rdev, track); 1953 if (r) 1954 return r; 1955 break; 1956 /* triggers drawing using in-packet vertex data */ 1957 case PACKET3_3D_DRAW_IMMD_2: 1958 if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { 1959 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1960 return -EINVAL; 1961 } 1962 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1963 track->immd_dwords = pkt->count; 1964 r = r100_cs_track_check(p->rdev, track); 1965 if (r) 1966 return r; 1967 break; 1968 /* triggers drawing using in-packet vertex data */ 1969 case PACKET3_3D_DRAW_VBUF_2: 1970 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1971 r = r100_cs_track_check(p->rdev, track); 1972 if (r) 1973 return r; 1974 break; 1975 /* triggers drawing of vertex buffers setup elsewhere */ 1976 case PACKET3_3D_DRAW_INDX_2: 1977 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1978 r = r100_cs_track_check(p->rdev, track); 1979 if (r) 1980 return r; 1981 break; 1982 /* triggers drawing using indices to vertex buffer */ 1983 case PACKET3_3D_DRAW_VBUF: 1984 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1985 r = r100_cs_track_check(p->rdev, track); 1986 if (r) 1987 return r; 1988 break; 1989 /* triggers drawing of vertex buffers setup elsewhere */ 1990 case PACKET3_3D_DRAW_INDX: 1991 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1992 r = r100_cs_track_check(p->rdev, track); 1993 if (r) 1994 return r; 1995 break; 1996 /* triggers drawing using indices to vertex buffer */ 1997 case PACKET3_3D_CLEAR_HIZ: 1998 case PACKET3_3D_CLEAR_ZMASK: 1999 if (p->rdev->hyperz_filp != p->filp) 2000 return -EINVAL; 2001 break; 2002 case PACKET3_NOP: 2003 break; 2004 default: 2005 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 2006 return -EINVAL; 2007 } 2008 return 0; 2009 } 2010 2011 int r100_cs_parse(struct radeon_cs_parser *p) 2012 { 2013 struct radeon_cs_packet pkt; 2014 struct r100_cs_track *track; 2015 int r; 2016 2017 track = kzalloc(sizeof(*track), GFP_KERNEL); 2018 if (!track) 2019 return -ENOMEM; 2020 r100_cs_track_clear(p->rdev, track); 2021 p->track = track; 2022 do { 2023 r = radeon_cs_packet_parse(p, &pkt, p->idx); 2024 if (r) { 2025 return r; 2026 } 2027 p->idx += pkt.count + 2; 2028 switch (pkt.type) { 2029 case RADEON_PACKET_TYPE0: 2030 if (p->rdev->family >= CHIP_R200) 2031 r = r100_cs_parse_packet0(p, &pkt, 2032 p->rdev->config.r100.reg_safe_bm, 2033 p->rdev->config.r100.reg_safe_bm_size, 2034 &r200_packet0_check); 2035 else 2036 r = r100_cs_parse_packet0(p, &pkt, 2037 p->rdev->config.r100.reg_safe_bm, 2038 p->rdev->config.r100.reg_safe_bm_size, 2039 &r100_packet0_check); 2040 break; 2041 case RADEON_PACKET_TYPE2: 2042 break; 2043 case RADEON_PACKET_TYPE3: 2044 r = r100_packet3_check(p, &pkt); 2045 break; 2046 default: 2047 DRM_ERROR("Unknown packet type %d !\n", 2048 pkt.type); 2049 return -EINVAL; 2050 } 2051 if (r) 2052 return r; 2053 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 2054 return 0; 2055 } 2056 2057 static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) 2058 { 2059 DRM_ERROR("pitch %d\n", t->pitch); 2060 DRM_ERROR("use_pitch %d\n", t->use_pitch); 2061 DRM_ERROR("width %d\n", t->width); 2062 DRM_ERROR("width_11 %d\n", t->width_11); 2063 DRM_ERROR("height %d\n", t->height); 2064 DRM_ERROR("height_11 %d\n", t->height_11); 2065 DRM_ERROR("num levels %d\n", t->num_levels); 2066 DRM_ERROR("depth %d\n", t->txdepth); 2067 DRM_ERROR("bpp %d\n", t->cpp); 2068 DRM_ERROR("coordinate type %d\n", t->tex_coord_type); 2069 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); 2070 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); 2071 DRM_ERROR("compress format %d\n", t->compress_format); 2072 } 2073 2074 static int r100_track_compress_size(int compress_format, int w, int h) 2075 { 2076 int block_width, block_height, block_bytes; 2077 int wblocks, hblocks; 2078 int min_wblocks; 2079 int sz; 2080 2081 block_width = 4; 2082 block_height = 4; 2083 2084 switch (compress_format) { 2085 case R100_TRACK_COMP_DXT1: 2086 block_bytes = 8; 2087 min_wblocks = 4; 2088 break; 2089 default: 2090 case R100_TRACK_COMP_DXT35: 2091 block_bytes = 16; 2092 min_wblocks = 2; 2093 break; 2094 } 2095 2096 hblocks = (h + block_height - 1) / block_height; 2097 wblocks = (w + block_width - 1) / block_width; 2098 if (wblocks < min_wblocks) 2099 wblocks = min_wblocks; 2100 sz = wblocks * hblocks * block_bytes; 2101 return sz; 2102 } 2103 2104 static int r100_cs_track_cube(struct radeon_device *rdev, 2105 struct r100_cs_track *track, unsigned idx) 2106 { 2107 unsigned face, w, h; 2108 struct radeon_bo *cube_robj; 2109 unsigned long size; 2110 unsigned compress_format = track->textures[idx].compress_format; 2111 2112 for (face = 0; face < 5; face++) { 2113 cube_robj = track->textures[idx].cube_info[face].robj; 2114 w = track->textures[idx].cube_info[face].width; 2115 h = track->textures[idx].cube_info[face].height; 2116 2117 if (compress_format) { 2118 size = r100_track_compress_size(compress_format, w, h); 2119 } else 2120 size = w * h; 2121 size *= track->textures[idx].cpp; 2122 2123 size += track->textures[idx].cube_info[face].offset; 2124 2125 if (size > radeon_bo_size(cube_robj)) { 2126 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", 2127 size, radeon_bo_size(cube_robj)); 2128 r100_cs_track_texture_print(&track->textures[idx]); 2129 return -1; 2130 } 2131 } 2132 return 0; 2133 } 2134 2135 static int r100_cs_track_texture_check(struct radeon_device *rdev, 2136 struct r100_cs_track *track) 2137 { 2138 struct radeon_bo *robj; 2139 unsigned long size; 2140 unsigned u, i, w, h, d; 2141 int ret; 2142 2143 for (u = 0; u < track->num_texture; u++) { 2144 if (!track->textures[u].enabled) 2145 continue; 2146 if (track->textures[u].lookup_disable) 2147 continue; 2148 robj = track->textures[u].robj; 2149 if (robj == NULL) { 2150 DRM_ERROR("No texture bound to unit %u\n", u); 2151 return -EINVAL; 2152 } 2153 size = 0; 2154 for (i = 0; i <= track->textures[u].num_levels; i++) { 2155 if (track->textures[u].use_pitch) { 2156 if (rdev->family < CHIP_R300) 2157 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); 2158 else 2159 w = track->textures[u].pitch / (1 << i); 2160 } else { 2161 w = track->textures[u].width; 2162 if (rdev->family >= CHIP_RV515) 2163 w |= track->textures[u].width_11; 2164 w = w / (1 << i); 2165 if (track->textures[u].roundup_w) 2166 w = roundup_pow_of_two(w); 2167 } 2168 h = track->textures[u].height; 2169 if (rdev->family >= CHIP_RV515) 2170 h |= track->textures[u].height_11; 2171 h = h / (1 << i); 2172 if (track->textures[u].roundup_h) 2173 h = roundup_pow_of_two(h); 2174 if (track->textures[u].tex_coord_type == 1) { 2175 d = (1 << track->textures[u].txdepth) / (1 << i); 2176 if (!d) 2177 d = 1; 2178 } else { 2179 d = 1; 2180 } 2181 if (track->textures[u].compress_format) { 2182 2183 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; 2184 /* compressed textures are block based */ 2185 } else 2186 size += w * h * d; 2187 } 2188 size *= track->textures[u].cpp; 2189 2190 switch (track->textures[u].tex_coord_type) { 2191 case 0: 2192 case 1: 2193 break; 2194 case 2: 2195 if (track->separate_cube) { 2196 ret = r100_cs_track_cube(rdev, track, u); 2197 if (ret) 2198 return ret; 2199 } else 2200 size *= 6; 2201 break; 2202 default: 2203 DRM_ERROR("Invalid texture coordinate type %u for unit " 2204 "%u\n", track->textures[u].tex_coord_type, u); 2205 return -EINVAL; 2206 } 2207 if (size > radeon_bo_size(robj)) { 2208 DRM_ERROR("Texture of unit %u needs %lu bytes but is " 2209 "%lu\n", u, size, radeon_bo_size(robj)); 2210 r100_cs_track_texture_print(&track->textures[u]); 2211 return -EINVAL; 2212 } 2213 } 2214 return 0; 2215 } 2216 2217 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) 2218 { 2219 unsigned i; 2220 unsigned long size; 2221 unsigned prim_walk; 2222 unsigned nverts; 2223 unsigned num_cb = track->cb_dirty ? track->num_cb : 0; 2224 2225 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && 2226 !track->blend_read_enable) 2227 num_cb = 0; 2228 2229 for (i = 0; i < num_cb; i++) { 2230 if (track->cb[i].robj == NULL) { 2231 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); 2232 return -EINVAL; 2233 } 2234 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; 2235 size += track->cb[i].offset; 2236 if (size > radeon_bo_size(track->cb[i].robj)) { 2237 DRM_ERROR("[drm] Buffer too small for color buffer %d " 2238 "(need %lu have %lu) !\n", i, size, 2239 radeon_bo_size(track->cb[i].robj)); 2240 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", 2241 i, track->cb[i].pitch, track->cb[i].cpp, 2242 track->cb[i].offset, track->maxy); 2243 return -EINVAL; 2244 } 2245 } 2246 track->cb_dirty = false; 2247 2248 if (track->zb_dirty && track->z_enabled) { 2249 if (track->zb.robj == NULL) { 2250 DRM_ERROR("[drm] No buffer for z buffer !\n"); 2251 return -EINVAL; 2252 } 2253 size = track->zb.pitch * track->zb.cpp * track->maxy; 2254 size += track->zb.offset; 2255 if (size > radeon_bo_size(track->zb.robj)) { 2256 DRM_ERROR("[drm] Buffer too small for z buffer " 2257 "(need %lu have %lu) !\n", size, 2258 radeon_bo_size(track->zb.robj)); 2259 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", 2260 track->zb.pitch, track->zb.cpp, 2261 track->zb.offset, track->maxy); 2262 return -EINVAL; 2263 } 2264 } 2265 track->zb_dirty = false; 2266 2267 if (track->aa_dirty && track->aaresolve) { 2268 if (track->aa.robj == NULL) { 2269 DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); 2270 return -EINVAL; 2271 } 2272 /* I believe the format comes from colorbuffer0. */ 2273 size = track->aa.pitch * track->cb[0].cpp * track->maxy; 2274 size += track->aa.offset; 2275 if (size > radeon_bo_size(track->aa.robj)) { 2276 DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " 2277 "(need %lu have %lu) !\n", i, size, 2278 radeon_bo_size(track->aa.robj)); 2279 DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", 2280 i, track->aa.pitch, track->cb[0].cpp, 2281 track->aa.offset, track->maxy); 2282 return -EINVAL; 2283 } 2284 } 2285 track->aa_dirty = false; 2286 2287 prim_walk = (track->vap_vf_cntl >> 4) & 0x3; 2288 if (track->vap_vf_cntl & (1 << 14)) { 2289 nverts = track->vap_alt_nverts; 2290 } else { 2291 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; 2292 } 2293 switch (prim_walk) { 2294 case 1: 2295 for (i = 0; i < track->num_arrays; i++) { 2296 size = track->arrays[i].esize * track->max_indx * 4; 2297 if (track->arrays[i].robj == NULL) { 2298 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2299 "bound\n", prim_walk, i); 2300 return -EINVAL; 2301 } 2302 if (size > radeon_bo_size(track->arrays[i].robj)) { 2303 dev_err(rdev->dev, "(PW %u) Vertex array %u " 2304 "need %lu dwords have %lu dwords\n", 2305 prim_walk, i, size >> 2, 2306 radeon_bo_size(track->arrays[i].robj) 2307 >> 2); 2308 DRM_ERROR("Max indices %u\n", track->max_indx); 2309 return -EINVAL; 2310 } 2311 } 2312 break; 2313 case 2: 2314 for (i = 0; i < track->num_arrays; i++) { 2315 size = track->arrays[i].esize * (nverts - 1) * 4; 2316 if (track->arrays[i].robj == NULL) { 2317 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2318 "bound\n", prim_walk, i); 2319 return -EINVAL; 2320 } 2321 if (size > radeon_bo_size(track->arrays[i].robj)) { 2322 dev_err(rdev->dev, "(PW %u) Vertex array %u " 2323 "need %lu dwords have %lu dwords\n", 2324 prim_walk, i, size >> 2, 2325 radeon_bo_size(track->arrays[i].robj) 2326 >> 2); 2327 return -EINVAL; 2328 } 2329 } 2330 break; 2331 case 3: 2332 size = track->vtx_size * nverts; 2333 if (size != track->immd_dwords) { 2334 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", 2335 track->immd_dwords, size); 2336 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", 2337 nverts, track->vtx_size); 2338 return -EINVAL; 2339 } 2340 break; 2341 default: 2342 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", 2343 prim_walk); 2344 return -EINVAL; 2345 } 2346 2347 if (track->tex_dirty) { 2348 track->tex_dirty = false; 2349 return r100_cs_track_texture_check(rdev, track); 2350 } 2351 return 0; 2352 } 2353 2354 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) 2355 { 2356 unsigned i, face; 2357 2358 track->cb_dirty = true; 2359 track->zb_dirty = true; 2360 track->tex_dirty = true; 2361 track->aa_dirty = true; 2362 2363 if (rdev->family < CHIP_R300) { 2364 track->num_cb = 1; 2365 if (rdev->family <= CHIP_RS200) 2366 track->num_texture = 3; 2367 else 2368 track->num_texture = 6; 2369 track->maxy = 2048; 2370 track->separate_cube = 1; 2371 } else { 2372 track->num_cb = 4; 2373 track->num_texture = 16; 2374 track->maxy = 4096; 2375 track->separate_cube = 0; 2376 track->aaresolve = false; 2377 track->aa.robj = NULL; 2378 } 2379 2380 for (i = 0; i < track->num_cb; i++) { 2381 track->cb[i].robj = NULL; 2382 track->cb[i].pitch = 8192; 2383 track->cb[i].cpp = 16; 2384 track->cb[i].offset = 0; 2385 } 2386 track->z_enabled = true; 2387 track->zb.robj = NULL; 2388 track->zb.pitch = 8192; 2389 track->zb.cpp = 4; 2390 track->zb.offset = 0; 2391 track->vtx_size = 0x7F; 2392 track->immd_dwords = 0xFFFFFFFFUL; 2393 track->num_arrays = 11; 2394 track->max_indx = 0x00FFFFFFUL; 2395 for (i = 0; i < track->num_arrays; i++) { 2396 track->arrays[i].robj = NULL; 2397 track->arrays[i].esize = 0x7F; 2398 } 2399 for (i = 0; i < track->num_texture; i++) { 2400 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 2401 track->textures[i].pitch = 16536; 2402 track->textures[i].width = 16536; 2403 track->textures[i].height = 16536; 2404 track->textures[i].width_11 = 1 << 11; 2405 track->textures[i].height_11 = 1 << 11; 2406 track->textures[i].num_levels = 12; 2407 if (rdev->family <= CHIP_RS200) { 2408 track->textures[i].tex_coord_type = 0; 2409 track->textures[i].txdepth = 0; 2410 } else { 2411 track->textures[i].txdepth = 16; 2412 track->textures[i].tex_coord_type = 1; 2413 } 2414 track->textures[i].cpp = 64; 2415 track->textures[i].robj = NULL; 2416 /* CS IB emission code makes sure texture unit are disabled */ 2417 track->textures[i].enabled = false; 2418 track->textures[i].lookup_disable = false; 2419 track->textures[i].roundup_w = true; 2420 track->textures[i].roundup_h = true; 2421 if (track->separate_cube) 2422 for (face = 0; face < 5; face++) { 2423 track->textures[i].cube_info[face].robj = NULL; 2424 track->textures[i].cube_info[face].width = 16536; 2425 track->textures[i].cube_info[face].height = 16536; 2426 track->textures[i].cube_info[face].offset = 0; 2427 } 2428 } 2429 } 2430 2431 /* 2432 * Global GPU functions 2433 */ 2434 static void r100_errata(struct radeon_device *rdev) 2435 { 2436 rdev->pll_errata = 0; 2437 2438 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { 2439 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; 2440 } 2441 2442 if (rdev->family == CHIP_RV100 || 2443 rdev->family == CHIP_RS100 || 2444 rdev->family == CHIP_RS200) { 2445 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; 2446 } 2447 } 2448 2449 static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) 2450 { 2451 unsigned i; 2452 uint32_t tmp; 2453 2454 for (i = 0; i < rdev->usec_timeout; i++) { 2455 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 2456 if (tmp >= n) { 2457 return 0; 2458 } 2459 DRM_UDELAY(1); 2460 } 2461 return -1; 2462 } 2463 2464 int r100_gui_wait_for_idle(struct radeon_device *rdev) 2465 { 2466 unsigned i; 2467 uint32_t tmp; 2468 2469 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { 2470 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" 2471 " Bad things might happen.\n"); 2472 } 2473 for (i = 0; i < rdev->usec_timeout; i++) { 2474 tmp = RREG32(RADEON_RBBM_STATUS); 2475 if (!(tmp & RADEON_RBBM_ACTIVE)) { 2476 return 0; 2477 } 2478 DRM_UDELAY(1); 2479 } 2480 return -1; 2481 } 2482 2483 int r100_mc_wait_for_idle(struct radeon_device *rdev) 2484 { 2485 unsigned i; 2486 uint32_t tmp; 2487 2488 for (i = 0; i < rdev->usec_timeout; i++) { 2489 /* read MC_STATUS */ 2490 tmp = RREG32(RADEON_MC_STATUS); 2491 if (tmp & RADEON_MC_IDLE) { 2492 return 0; 2493 } 2494 DRM_UDELAY(1); 2495 } 2496 return -1; 2497 } 2498 2499 bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 2500 { 2501 u32 rbbm_status; 2502 2503 rbbm_status = RREG32(R_000E40_RBBM_STATUS); 2504 if (!G_000E40_GUI_ACTIVE(rbbm_status)) { 2505 radeon_ring_lockup_update(ring); 2506 return false; 2507 } 2508 /* force CP activities */ 2509 radeon_ring_force_activity(rdev, ring); 2510 return radeon_ring_test_lockup(rdev, ring); 2511 } 2512 2513 /* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ 2514 void r100_enable_bm(struct radeon_device *rdev) 2515 { 2516 uint32_t tmp; 2517 /* Enable bus mastering */ 2518 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 2519 WREG32(RADEON_BUS_CNTL, tmp); 2520 } 2521 2522 void r100_bm_disable(struct radeon_device *rdev) 2523 { 2524 u32 tmp; 2525 2526 /* disable bus mastering */ 2527 tmp = RREG32(R_000030_BUS_CNTL); 2528 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); 2529 mdelay(1); 2530 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); 2531 mdelay(1); 2532 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); 2533 tmp = RREG32(RADEON_BUS_CNTL); 2534 mdelay(1); 2535 pci_clear_master(rdev->pdev); 2536 mdelay(1); 2537 } 2538 2539 int r100_asic_reset(struct radeon_device *rdev) 2540 { 2541 struct r100_mc_save save; 2542 u32 status, tmp; 2543 int ret = 0; 2544 2545 status = RREG32(R_000E40_RBBM_STATUS); 2546 if (!G_000E40_GUI_ACTIVE(status)) { 2547 return 0; 2548 } 2549 r100_mc_stop(rdev, &save); 2550 status = RREG32(R_000E40_RBBM_STATUS); 2551 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2552 /* stop CP */ 2553 WREG32(RADEON_CP_CSQ_CNTL, 0); 2554 tmp = RREG32(RADEON_CP_RB_CNTL); 2555 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); 2556 WREG32(RADEON_CP_RB_RPTR_WR, 0); 2557 WREG32(RADEON_CP_RB_WPTR, 0); 2558 WREG32(RADEON_CP_RB_CNTL, tmp); 2559 /* save PCI state */ 2560 pci_save_state(rdev->pdev); 2561 /* disable bus mastering */ 2562 r100_bm_disable(rdev); 2563 WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | 2564 S_0000F0_SOFT_RESET_RE(1) | 2565 S_0000F0_SOFT_RESET_PP(1) | 2566 S_0000F0_SOFT_RESET_RB(1)); 2567 RREG32(R_0000F0_RBBM_SOFT_RESET); 2568 mdelay(500); 2569 WREG32(R_0000F0_RBBM_SOFT_RESET, 0); 2570 mdelay(1); 2571 status = RREG32(R_000E40_RBBM_STATUS); 2572 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2573 /* reset CP */ 2574 WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); 2575 RREG32(R_0000F0_RBBM_SOFT_RESET); 2576 mdelay(500); 2577 WREG32(R_0000F0_RBBM_SOFT_RESET, 0); 2578 mdelay(1); 2579 status = RREG32(R_000E40_RBBM_STATUS); 2580 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2581 /* restore PCI & busmastering */ 2582 pci_restore_state(rdev->pdev); 2583 r100_enable_bm(rdev); 2584 /* Check if GPU is idle */ 2585 if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || 2586 G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { 2587 dev_err(rdev->dev, "failed to reset GPU\n"); 2588 ret = -1; 2589 } else 2590 dev_info(rdev->dev, "GPU reset succeed\n"); 2591 r100_mc_resume(rdev, &save); 2592 return ret; 2593 } 2594 2595 void r100_set_common_regs(struct radeon_device *rdev) 2596 { 2597 struct drm_device *dev = rdev->ddev; 2598 bool force_dac2 = false; 2599 u32 tmp; 2600 2601 /* set these so they don't interfere with anything */ 2602 WREG32(RADEON_OV0_SCALE_CNTL, 0); 2603 WREG32(RADEON_SUBPIC_CNTL, 0); 2604 WREG32(RADEON_VIPH_CONTROL, 0); 2605 WREG32(RADEON_I2C_CNTL_1, 0); 2606 WREG32(RADEON_DVI_I2C_CNTL_1, 0); 2607 WREG32(RADEON_CAP0_TRIG_CNTL, 0); 2608 WREG32(RADEON_CAP1_TRIG_CNTL, 0); 2609 2610 /* always set up dac2 on rn50 and some rv100 as lots 2611 * of servers seem to wire it up to a VGA port but 2612 * don't report it in the bios connector 2613 * table. 2614 */ 2615 switch (dev->pdev->device) { 2616 /* RN50 */ 2617 case 0x515e: 2618 case 0x5969: 2619 force_dac2 = true; 2620 break; 2621 /* RV100*/ 2622 case 0x5159: 2623 case 0x515a: 2624 /* DELL triple head servers */ 2625 if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) && 2626 ((dev->pdev->subsystem_device == 0x016c) || 2627 (dev->pdev->subsystem_device == 0x016d) || 2628 (dev->pdev->subsystem_device == 0x016e) || 2629 (dev->pdev->subsystem_device == 0x016f) || 2630 (dev->pdev->subsystem_device == 0x0170) || 2631 (dev->pdev->subsystem_device == 0x017d) || 2632 (dev->pdev->subsystem_device == 0x017e) || 2633 (dev->pdev->subsystem_device == 0x0183) || 2634 (dev->pdev->subsystem_device == 0x018a) || 2635 (dev->pdev->subsystem_device == 0x019a))) 2636 force_dac2 = true; 2637 break; 2638 } 2639 2640 if (force_dac2) { 2641 u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG); 2642 u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL); 2643 u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2); 2644 2645 /* For CRT on DAC2, don't turn it on if BIOS didn't 2646 enable it, even it's detected. 2647 */ 2648 2649 /* force it to crtc0 */ 2650 dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL; 2651 dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL; 2652 disp_hw_debug |= RADEON_CRT2_DISP1_SEL; 2653 2654 /* set up the TV DAC */ 2655 tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL | 2656 RADEON_TV_DAC_STD_MASK | 2657 RADEON_TV_DAC_RDACPD | 2658 RADEON_TV_DAC_GDACPD | 2659 RADEON_TV_DAC_BDACPD | 2660 RADEON_TV_DAC_BGADJ_MASK | 2661 RADEON_TV_DAC_DACADJ_MASK); 2662 tv_dac_cntl |= (RADEON_TV_DAC_NBLANK | 2663 RADEON_TV_DAC_NHOLD | 2664 RADEON_TV_DAC_STD_PS2 | 2665 (0x58 << 16)); 2666 2667 WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl); 2668 WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug); 2669 WREG32(RADEON_DAC_CNTL2, dac2_cntl); 2670 } 2671 2672 /* switch PM block to ACPI mode */ 2673 tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL); 2674 tmp &= ~RADEON_PM_MODE_SEL; 2675 WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp); 2676 2677 } 2678 2679 /* 2680 * VRAM info 2681 */ 2682 static void r100_vram_get_type(struct radeon_device *rdev) 2683 { 2684 uint32_t tmp; 2685 2686 rdev->mc.vram_is_ddr = false; 2687 if (rdev->flags & RADEON_IS_IGP) 2688 rdev->mc.vram_is_ddr = true; 2689 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) 2690 rdev->mc.vram_is_ddr = true; 2691 if ((rdev->family == CHIP_RV100) || 2692 (rdev->family == CHIP_RS100) || 2693 (rdev->family == CHIP_RS200)) { 2694 tmp = RREG32(RADEON_MEM_CNTL); 2695 if (tmp & RV100_HALF_MODE) { 2696 rdev->mc.vram_width = 32; 2697 } else { 2698 rdev->mc.vram_width = 64; 2699 } 2700 if (rdev->flags & RADEON_SINGLE_CRTC) { 2701 rdev->mc.vram_width /= 4; 2702 rdev->mc.vram_is_ddr = true; 2703 } 2704 } else if (rdev->family <= CHIP_RV280) { 2705 tmp = RREG32(RADEON_MEM_CNTL); 2706 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { 2707 rdev->mc.vram_width = 128; 2708 } else { 2709 rdev->mc.vram_width = 64; 2710 } 2711 } else { 2712 /* newer IGPs */ 2713 rdev->mc.vram_width = 128; 2714 } 2715 } 2716 2717 static u32 r100_get_accessible_vram(struct radeon_device *rdev) 2718 { 2719 u32 aper_size; 2720 u8 byte; 2721 2722 aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2723 2724 /* Set HDP_APER_CNTL only on cards that are known not to be broken, 2725 * that is has the 2nd generation multifunction PCI interface 2726 */ 2727 if (rdev->family == CHIP_RV280 || 2728 rdev->family >= CHIP_RV350) { 2729 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, 2730 ~RADEON_HDP_APER_CNTL); 2731 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); 2732 return aper_size * 2; 2733 } 2734 2735 /* Older cards have all sorts of funny issues to deal with. First 2736 * check if it's a multifunction card by reading the PCI config 2737 * header type... Limit those to one aperture size 2738 */ 2739 pci_read_config_byte(rdev->pdev, 0xe, &byte); 2740 if (byte & 0x80) { 2741 DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); 2742 DRM_INFO("Limiting VRAM to one aperture\n"); 2743 return aper_size; 2744 } 2745 2746 /* Single function older card. We read HDP_APER_CNTL to see how the BIOS 2747 * have set it up. We don't write this as it's broken on some ASICs but 2748 * we expect the BIOS to have done the right thing (might be too optimistic...) 2749 */ 2750 if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) 2751 return aper_size * 2; 2752 return aper_size; 2753 } 2754 2755 void r100_vram_init_sizes(struct radeon_device *rdev) 2756 { 2757 u64 config_aper_size; 2758 2759 /* work out accessible VRAM */ 2760 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 2761 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 2762 rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); 2763 /* FIXME we don't use the second aperture yet when we could use it */ 2764 if (rdev->mc.visible_vram_size > rdev->mc.aper_size) 2765 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2766 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2767 if (rdev->flags & RADEON_IS_IGP) { 2768 uint32_t tom; 2769 /* read NB_TOM to get the amount of ram stolen for the GPU */ 2770 tom = RREG32(RADEON_NB_TOM); 2771 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); 2772 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 2773 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 2774 } else { 2775 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 2776 /* Some production boards of m6 will report 0 2777 * if it's 8 MB 2778 */ 2779 if (rdev->mc.real_vram_size == 0) { 2780 rdev->mc.real_vram_size = 8192 * 1024; 2781 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 2782 } 2783 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 2784 * Novell bug 204882 + along with lots of ubuntu ones 2785 */ 2786 if (rdev->mc.aper_size > config_aper_size) 2787 config_aper_size = rdev->mc.aper_size; 2788 2789 if (config_aper_size > rdev->mc.real_vram_size) 2790 rdev->mc.mc_vram_size = config_aper_size; 2791 else 2792 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 2793 } 2794 } 2795 2796 void r100_vga_set_state(struct radeon_device *rdev, bool state) 2797 { 2798 uint32_t temp; 2799 2800 temp = RREG32(RADEON_CONFIG_CNTL); 2801 if (state == false) { 2802 temp &= ~RADEON_CFG_VGA_RAM_EN; 2803 temp |= RADEON_CFG_VGA_IO_DIS; 2804 } else { 2805 temp &= ~RADEON_CFG_VGA_IO_DIS; 2806 } 2807 WREG32(RADEON_CONFIG_CNTL, temp); 2808 } 2809 2810 static void r100_mc_init(struct radeon_device *rdev) 2811 { 2812 u64 base; 2813 2814 r100_vram_get_type(rdev); 2815 r100_vram_init_sizes(rdev); 2816 base = rdev->mc.aper_base; 2817 if (rdev->flags & RADEON_IS_IGP) 2818 base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16; 2819 radeon_vram_location(rdev, &rdev->mc, base); 2820 rdev->mc.gtt_base_align = 0; 2821 if (!(rdev->flags & RADEON_IS_AGP)) 2822 radeon_gtt_location(rdev, &rdev->mc); 2823 radeon_update_bandwidth_info(rdev); 2824 } 2825 2826 2827 /* 2828 * Indirect registers accessor 2829 */ 2830 void r100_pll_errata_after_index(struct radeon_device *rdev) 2831 { 2832 if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) { 2833 (void)RREG32(RADEON_CLOCK_CNTL_DATA); 2834 (void)RREG32(RADEON_CRTC_GEN_CNTL); 2835 } 2836 } 2837 2838 static void r100_pll_errata_after_data(struct radeon_device *rdev) 2839 { 2840 /* This workarounds is necessary on RV100, RS100 and RS200 chips 2841 * or the chip could hang on a subsequent access 2842 */ 2843 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { 2844 mdelay(5); 2845 } 2846 2847 /* This function is required to workaround a hardware bug in some (all?) 2848 * revisions of the R300. This workaround should be called after every 2849 * CLOCK_CNTL_INDEX register access. If not, register reads afterward 2850 * may not be correct. 2851 */ 2852 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { 2853 uint32_t save, tmp; 2854 2855 save = RREG32(RADEON_CLOCK_CNTL_INDEX); 2856 tmp = save & ~(0x3f | RADEON_PLL_WR_EN); 2857 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); 2858 tmp = RREG32(RADEON_CLOCK_CNTL_DATA); 2859 WREG32(RADEON_CLOCK_CNTL_INDEX, save); 2860 } 2861 } 2862 2863 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) 2864 { 2865 uint32_t data; 2866 2867 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); 2868 r100_pll_errata_after_index(rdev); 2869 data = RREG32(RADEON_CLOCK_CNTL_DATA); 2870 r100_pll_errata_after_data(rdev); 2871 return data; 2872 } 2873 2874 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 2875 { 2876 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); 2877 r100_pll_errata_after_index(rdev); 2878 WREG32(RADEON_CLOCK_CNTL_DATA, v); 2879 r100_pll_errata_after_data(rdev); 2880 } 2881 2882 static void r100_set_safe_registers(struct radeon_device *rdev) 2883 { 2884 if (ASIC_IS_RN50(rdev)) { 2885 rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; 2886 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); 2887 } else if (rdev->family < CHIP_R200) { 2888 rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; 2889 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); 2890 } else { 2891 r200_set_safe_registers(rdev); 2892 } 2893 } 2894 2895 /* 2896 * Debugfs info 2897 */ 2898 #if defined(CONFIG_DEBUG_FS) 2899 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) 2900 { 2901 struct drm_info_node *node = (struct drm_info_node *) m->private; 2902 struct drm_device *dev = node->minor->dev; 2903 struct radeon_device *rdev = dev->dev_private; 2904 uint32_t reg, value; 2905 unsigned i; 2906 2907 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); 2908 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); 2909 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2910 for (i = 0; i < 64; i++) { 2911 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); 2912 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; 2913 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); 2914 value = RREG32(RADEON_RBBM_CMDFIFO_DATA); 2915 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); 2916 } 2917 return 0; 2918 } 2919 2920 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) 2921 { 2922 struct drm_info_node *node = (struct drm_info_node *) m->private; 2923 struct drm_device *dev = node->minor->dev; 2924 struct radeon_device *rdev = dev->dev_private; 2925 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2926 uint32_t rdp, wdp; 2927 unsigned count, i, j; 2928 2929 radeon_ring_free_size(rdev, ring); 2930 rdp = RREG32(RADEON_CP_RB_RPTR); 2931 wdp = RREG32(RADEON_CP_RB_WPTR); 2932 count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; 2933 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2934 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); 2935 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); 2936 seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); 2937 seq_printf(m, "%u dwords in ring\n", count); 2938 for (j = 0; j <= count; j++) { 2939 i = (rdp + j) & ring->ptr_mask; 2940 seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); 2941 } 2942 return 0; 2943 } 2944 2945 2946 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) 2947 { 2948 struct drm_info_node *node = (struct drm_info_node *) m->private; 2949 struct drm_device *dev = node->minor->dev; 2950 struct radeon_device *rdev = dev->dev_private; 2951 uint32_t csq_stat, csq2_stat, tmp; 2952 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; 2953 unsigned i; 2954 2955 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2956 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); 2957 csq_stat = RREG32(RADEON_CP_CSQ_STAT); 2958 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); 2959 r_rptr = (csq_stat >> 0) & 0x3ff; 2960 r_wptr = (csq_stat >> 10) & 0x3ff; 2961 ib1_rptr = (csq_stat >> 20) & 0x3ff; 2962 ib1_wptr = (csq2_stat >> 0) & 0x3ff; 2963 ib2_rptr = (csq2_stat >> 10) & 0x3ff; 2964 ib2_wptr = (csq2_stat >> 20) & 0x3ff; 2965 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); 2966 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); 2967 seq_printf(m, "Ring rptr %u\n", r_rptr); 2968 seq_printf(m, "Ring wptr %u\n", r_wptr); 2969 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); 2970 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); 2971 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); 2972 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); 2973 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms 2974 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ 2975 seq_printf(m, "Ring fifo:\n"); 2976 for (i = 0; i < 256; i++) { 2977 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2978 tmp = RREG32(RADEON_CP_CSQ_DATA); 2979 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); 2980 } 2981 seq_printf(m, "Indirect1 fifo:\n"); 2982 for (i = 256; i <= 512; i++) { 2983 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2984 tmp = RREG32(RADEON_CP_CSQ_DATA); 2985 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); 2986 } 2987 seq_printf(m, "Indirect2 fifo:\n"); 2988 for (i = 640; i < ib1_wptr; i++) { 2989 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2990 tmp = RREG32(RADEON_CP_CSQ_DATA); 2991 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); 2992 } 2993 return 0; 2994 } 2995 2996 static int r100_debugfs_mc_info(struct seq_file *m, void *data) 2997 { 2998 struct drm_info_node *node = (struct drm_info_node *) m->private; 2999 struct drm_device *dev = node->minor->dev; 3000 struct radeon_device *rdev = dev->dev_private; 3001 uint32_t tmp; 3002 3003 tmp = RREG32(RADEON_CONFIG_MEMSIZE); 3004 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); 3005 tmp = RREG32(RADEON_MC_FB_LOCATION); 3006 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); 3007 tmp = RREG32(RADEON_BUS_CNTL); 3008 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); 3009 tmp = RREG32(RADEON_MC_AGP_LOCATION); 3010 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); 3011 tmp = RREG32(RADEON_AGP_BASE); 3012 seq_printf(m, "AGP_BASE 0x%08x\n", tmp); 3013 tmp = RREG32(RADEON_HOST_PATH_CNTL); 3014 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); 3015 tmp = RREG32(0x01D0); 3016 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); 3017 tmp = RREG32(RADEON_AIC_LO_ADDR); 3018 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); 3019 tmp = RREG32(RADEON_AIC_HI_ADDR); 3020 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); 3021 tmp = RREG32(0x01E4); 3022 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); 3023 return 0; 3024 } 3025 3026 static struct drm_info_list r100_debugfs_rbbm_list[] = { 3027 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, 3028 }; 3029 3030 static struct drm_info_list r100_debugfs_cp_list[] = { 3031 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, 3032 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, 3033 }; 3034 3035 static struct drm_info_list r100_debugfs_mc_info_list[] = { 3036 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, 3037 }; 3038 #endif 3039 3040 int r100_debugfs_rbbm_init(struct radeon_device *rdev) 3041 { 3042 #if defined(CONFIG_DEBUG_FS) 3043 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); 3044 #else 3045 return 0; 3046 #endif 3047 } 3048 3049 int r100_debugfs_cp_init(struct radeon_device *rdev) 3050 { 3051 #if defined(CONFIG_DEBUG_FS) 3052 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); 3053 #else 3054 return 0; 3055 #endif 3056 } 3057 3058 int r100_debugfs_mc_info_init(struct radeon_device *rdev) 3059 { 3060 #if defined(CONFIG_DEBUG_FS) 3061 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); 3062 #else 3063 return 0; 3064 #endif 3065 } 3066 3067 int r100_set_surface_reg(struct radeon_device *rdev, int reg, 3068 uint32_t tiling_flags, uint32_t pitch, 3069 uint32_t offset, uint32_t obj_size) 3070 { 3071 int surf_index = reg * 16; 3072 int flags = 0; 3073 3074 if (rdev->family <= CHIP_RS200) { 3075 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 3076 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 3077 flags |= RADEON_SURF_TILE_COLOR_BOTH; 3078 if (tiling_flags & RADEON_TILING_MACRO) 3079 flags |= RADEON_SURF_TILE_COLOR_MACRO; 3080 } else if (rdev->family <= CHIP_RV280) { 3081 if (tiling_flags & (RADEON_TILING_MACRO)) 3082 flags |= R200_SURF_TILE_COLOR_MACRO; 3083 if (tiling_flags & RADEON_TILING_MICRO) 3084 flags |= R200_SURF_TILE_COLOR_MICRO; 3085 } else { 3086 if (tiling_flags & RADEON_TILING_MACRO) 3087 flags |= R300_SURF_TILE_MACRO; 3088 if (tiling_flags & RADEON_TILING_MICRO) 3089 flags |= R300_SURF_TILE_MICRO; 3090 } 3091 3092 if (tiling_flags & RADEON_TILING_SWAP_16BIT) 3093 flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; 3094 if (tiling_flags & RADEON_TILING_SWAP_32BIT) 3095 flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; 3096 3097 /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */ 3098 if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) { 3099 if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) 3100 if (ASIC_IS_RN50(rdev)) 3101 pitch /= 16; 3102 } 3103 3104 /* r100/r200 divide by 16 */ 3105 if (rdev->family < CHIP_R300) 3106 flags |= pitch / 16; 3107 else 3108 flags |= pitch / 8; 3109 3110 3111 DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); 3112 WREG32(RADEON_SURFACE0_INFO + surf_index, flags); 3113 WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); 3114 WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); 3115 return 0; 3116 } 3117 3118 void r100_clear_surface_reg(struct radeon_device *rdev, int reg) 3119 { 3120 int surf_index = reg * 16; 3121 WREG32(RADEON_SURFACE0_INFO + surf_index, 0); 3122 } 3123 3124 void r100_bandwidth_update(struct radeon_device *rdev) 3125 { 3126 fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; 3127 fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; 3128 fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; 3129 uint32_t temp, data, mem_trcd, mem_trp, mem_tras; 3130 fixed20_12 memtcas_ff[8] = { 3131 dfixed_init(1), 3132 dfixed_init(2), 3133 dfixed_init(3), 3134 dfixed_init(0), 3135 dfixed_init_half(1), 3136 dfixed_init_half(2), 3137 dfixed_init(0), 3138 }; 3139 fixed20_12 memtcas_rs480_ff[8] = { 3140 dfixed_init(0), 3141 dfixed_init(1), 3142 dfixed_init(2), 3143 dfixed_init(3), 3144 dfixed_init(0), 3145 dfixed_init_half(1), 3146 dfixed_init_half(2), 3147 dfixed_init_half(3), 3148 }; 3149 fixed20_12 memtcas2_ff[8] = { 3150 dfixed_init(0), 3151 dfixed_init(1), 3152 dfixed_init(2), 3153 dfixed_init(3), 3154 dfixed_init(4), 3155 dfixed_init(5), 3156 dfixed_init(6), 3157 dfixed_init(7), 3158 }; 3159 fixed20_12 memtrbs[8] = { 3160 dfixed_init(1), 3161 dfixed_init_half(1), 3162 dfixed_init(2), 3163 dfixed_init_half(2), 3164 dfixed_init(3), 3165 dfixed_init_half(3), 3166 dfixed_init(4), 3167 dfixed_init_half(4) 3168 }; 3169 fixed20_12 memtrbs_r4xx[8] = { 3170 dfixed_init(4), 3171 dfixed_init(5), 3172 dfixed_init(6), 3173 dfixed_init(7), 3174 dfixed_init(8), 3175 dfixed_init(9), 3176 dfixed_init(10), 3177 dfixed_init(11) 3178 }; 3179 fixed20_12 min_mem_eff; 3180 fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; 3181 fixed20_12 cur_latency_mclk, cur_latency_sclk; 3182 fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, 3183 disp_drain_rate2, read_return_rate; 3184 fixed20_12 time_disp1_drop_priority; 3185 int c; 3186 int cur_size = 16; /* in octawords */ 3187 int critical_point = 0, critical_point2; 3188 /* uint32_t read_return_rate, time_disp1_drop_priority; */ 3189 int stop_req, max_stop_req; 3190 struct drm_display_mode *mode1 = NULL; 3191 struct drm_display_mode *mode2 = NULL; 3192 uint32_t pixel_bytes1 = 0; 3193 uint32_t pixel_bytes2 = 0; 3194 3195 radeon_update_display_priority(rdev); 3196 3197 if (rdev->mode_info.crtcs[0]->base.enabled) { 3198 mode1 = &rdev->mode_info.crtcs[0]->base.mode; 3199 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; 3200 } 3201 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3202 if (rdev->mode_info.crtcs[1]->base.enabled) { 3203 mode2 = &rdev->mode_info.crtcs[1]->base.mode; 3204 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; 3205 } 3206 } 3207 3208 min_mem_eff.full = dfixed_const_8(0); 3209 /* get modes */ 3210 if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { 3211 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); 3212 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); 3213 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); 3214 /* check crtc enables */ 3215 if (mode2) 3216 mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); 3217 if (mode1) 3218 mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); 3219 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); 3220 } 3221 3222 /* 3223 * determine is there is enough bw for current mode 3224 */ 3225 sclk_ff = rdev->pm.sclk; 3226 mclk_ff = rdev->pm.mclk; 3227 3228 temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); 3229 temp_ff.full = dfixed_const(temp); 3230 mem_bw.full = dfixed_mul(mclk_ff, temp_ff); 3231 3232 pix_clk.full = 0; 3233 pix_clk2.full = 0; 3234 peak_disp_bw.full = 0; 3235 if (mode1) { 3236 temp_ff.full = dfixed_const(1000); 3237 pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */ 3238 pix_clk.full = dfixed_div(pix_clk, temp_ff); 3239 temp_ff.full = dfixed_const(pixel_bytes1); 3240 peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff); 3241 } 3242 if (mode2) { 3243 temp_ff.full = dfixed_const(1000); 3244 pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */ 3245 pix_clk2.full = dfixed_div(pix_clk2, temp_ff); 3246 temp_ff.full = dfixed_const(pixel_bytes2); 3247 peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff); 3248 } 3249 3250 mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); 3251 if (peak_disp_bw.full >= mem_bw.full) { 3252 DRM_ERROR("You may not have enough display bandwidth for current mode\n" 3253 "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); 3254 } 3255 3256 /* Get values from the EXT_MEM_CNTL register...converting its contents. */ 3257 temp = RREG32(RADEON_MEM_TIMING_CNTL); 3258 if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ 3259 mem_trcd = ((temp >> 2) & 0x3) + 1; 3260 mem_trp = ((temp & 0x3)) + 1; 3261 mem_tras = ((temp & 0x70) >> 4) + 1; 3262 } else if (rdev->family == CHIP_R300 || 3263 rdev->family == CHIP_R350) { /* r300, r350 */ 3264 mem_trcd = (temp & 0x7) + 1; 3265 mem_trp = ((temp >> 8) & 0x7) + 1; 3266 mem_tras = ((temp >> 11) & 0xf) + 4; 3267 } else if (rdev->family == CHIP_RV350 || 3268 rdev->family <= CHIP_RV380) { 3269 /* rv3x0 */ 3270 mem_trcd = (temp & 0x7) + 3; 3271 mem_trp = ((temp >> 8) & 0x7) + 3; 3272 mem_tras = ((temp >> 11) & 0xf) + 6; 3273 } else if (rdev->family == CHIP_R420 || 3274 rdev->family == CHIP_R423 || 3275 rdev->family == CHIP_RV410) { 3276 /* r4xx */ 3277 mem_trcd = (temp & 0xf) + 3; 3278 if (mem_trcd > 15) 3279 mem_trcd = 15; 3280 mem_trp = ((temp >> 8) & 0xf) + 3; 3281 if (mem_trp > 15) 3282 mem_trp = 15; 3283 mem_tras = ((temp >> 12) & 0x1f) + 6; 3284 if (mem_tras > 31) 3285 mem_tras = 31; 3286 } else { /* RV200, R200 */ 3287 mem_trcd = (temp & 0x7) + 1; 3288 mem_trp = ((temp >> 8) & 0x7) + 1; 3289 mem_tras = ((temp >> 12) & 0xf) + 4; 3290 } 3291 /* convert to FF */ 3292 trcd_ff.full = dfixed_const(mem_trcd); 3293 trp_ff.full = dfixed_const(mem_trp); 3294 tras_ff.full = dfixed_const(mem_tras); 3295 3296 /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ 3297 temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); 3298 data = (temp & (7 << 20)) >> 20; 3299 if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { 3300 if (rdev->family == CHIP_RS480) /* don't think rs400 */ 3301 tcas_ff = memtcas_rs480_ff[data]; 3302 else 3303 tcas_ff = memtcas_ff[data]; 3304 } else 3305 tcas_ff = memtcas2_ff[data]; 3306 3307 if (rdev->family == CHIP_RS400 || 3308 rdev->family == CHIP_RS480) { 3309 /* extra cas latency stored in bits 23-25 0-4 clocks */ 3310 data = (temp >> 23) & 0x7; 3311 if (data < 5) 3312 tcas_ff.full += dfixed_const(data); 3313 } 3314 3315 if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { 3316 /* on the R300, Tcas is included in Trbs. 3317 */ 3318 temp = RREG32(RADEON_MEM_CNTL); 3319 data = (R300_MEM_NUM_CHANNELS_MASK & temp); 3320 if (data == 1) { 3321 if (R300_MEM_USE_CD_CH_ONLY & temp) { 3322 temp = RREG32(R300_MC_IND_INDEX); 3323 temp &= ~R300_MC_IND_ADDR_MASK; 3324 temp |= R300_MC_READ_CNTL_CD_mcind; 3325 WREG32(R300_MC_IND_INDEX, temp); 3326 temp = RREG32(R300_MC_IND_DATA); 3327 data = (R300_MEM_RBS_POSITION_C_MASK & temp); 3328 } else { 3329 temp = RREG32(R300_MC_READ_CNTL_AB); 3330 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 3331 } 3332 } else { 3333 temp = RREG32(R300_MC_READ_CNTL_AB); 3334 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 3335 } 3336 if (rdev->family == CHIP_RV410 || 3337 rdev->family == CHIP_R420 || 3338 rdev->family == CHIP_R423) 3339 trbs_ff = memtrbs_r4xx[data]; 3340 else 3341 trbs_ff = memtrbs[data]; 3342 tcas_ff.full += trbs_ff.full; 3343 } 3344 3345 sclk_eff_ff.full = sclk_ff.full; 3346 3347 if (rdev->flags & RADEON_IS_AGP) { 3348 fixed20_12 agpmode_ff; 3349 agpmode_ff.full = dfixed_const(radeon_agpmode); 3350 temp_ff.full = dfixed_const_666(16); 3351 sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff); 3352 } 3353 /* TODO PCIE lanes may affect this - agpmode == 16?? */ 3354 3355 if (ASIC_IS_R300(rdev)) { 3356 sclk_delay_ff.full = dfixed_const(250); 3357 } else { 3358 if ((rdev->family == CHIP_RV100) || 3359 rdev->flags & RADEON_IS_IGP) { 3360 if (rdev->mc.vram_is_ddr) 3361 sclk_delay_ff.full = dfixed_const(41); 3362 else 3363 sclk_delay_ff.full = dfixed_const(33); 3364 } else { 3365 if (rdev->mc.vram_width == 128) 3366 sclk_delay_ff.full = dfixed_const(57); 3367 else 3368 sclk_delay_ff.full = dfixed_const(41); 3369 } 3370 } 3371 3372 mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff); 3373 3374 if (rdev->mc.vram_is_ddr) { 3375 if (rdev->mc.vram_width == 32) { 3376 k1.full = dfixed_const(40); 3377 c = 3; 3378 } else { 3379 k1.full = dfixed_const(20); 3380 c = 1; 3381 } 3382 } else { 3383 k1.full = dfixed_const(40); 3384 c = 3; 3385 } 3386 3387 temp_ff.full = dfixed_const(2); 3388 mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff); 3389 temp_ff.full = dfixed_const(c); 3390 mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff); 3391 temp_ff.full = dfixed_const(4); 3392 mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff); 3393 mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff); 3394 mc_latency_mclk.full += k1.full; 3395 3396 mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff); 3397 mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff); 3398 3399 /* 3400 HW cursor time assuming worst case of full size colour cursor. 3401 */ 3402 temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); 3403 temp_ff.full += trcd_ff.full; 3404 if (temp_ff.full < tras_ff.full) 3405 temp_ff.full = tras_ff.full; 3406 cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff); 3407 3408 temp_ff.full = dfixed_const(cur_size); 3409 cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff); 3410 /* 3411 Find the total latency for the display data. 3412 */ 3413 disp_latency_overhead.full = dfixed_const(8); 3414 disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff); 3415 mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; 3416 mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; 3417 3418 if (mc_latency_mclk.full > mc_latency_sclk.full) 3419 disp_latency.full = mc_latency_mclk.full; 3420 else 3421 disp_latency.full = mc_latency_sclk.full; 3422 3423 /* setup Max GRPH_STOP_REQ default value */ 3424 if (ASIC_IS_RV100(rdev)) 3425 max_stop_req = 0x5c; 3426 else 3427 max_stop_req = 0x7c; 3428 3429 if (mode1) { 3430 /* CRTC1 3431 Set GRPH_BUFFER_CNTL register using h/w defined optimal values. 3432 GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] 3433 */ 3434 stop_req = mode1->hdisplay * pixel_bytes1 / 16; 3435 3436 if (stop_req > max_stop_req) 3437 stop_req = max_stop_req; 3438 3439 /* 3440 Find the drain rate of the display buffer. 3441 */ 3442 temp_ff.full = dfixed_const((16/pixel_bytes1)); 3443 disp_drain_rate.full = dfixed_div(pix_clk, temp_ff); 3444 3445 /* 3446 Find the critical point of the display buffer. 3447 */ 3448 crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency); 3449 crit_point_ff.full += dfixed_const_half(0); 3450 3451 critical_point = dfixed_trunc(crit_point_ff); 3452 3453 if (rdev->disp_priority == 2) { 3454 critical_point = 0; 3455 } 3456 3457 /* 3458 The critical point should never be above max_stop_req-4. Setting 3459 GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. 3460 */ 3461 if (max_stop_req - critical_point < 4) 3462 critical_point = 0; 3463 3464 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { 3465 /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ 3466 critical_point = 0x10; 3467 } 3468 3469 temp = RREG32(RADEON_GRPH_BUFFER_CNTL); 3470 temp &= ~(RADEON_GRPH_STOP_REQ_MASK); 3471 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 3472 temp &= ~(RADEON_GRPH_START_REQ_MASK); 3473 if ((rdev->family == CHIP_R350) && 3474 (stop_req > 0x15)) { 3475 stop_req -= 0x10; 3476 } 3477 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 3478 temp |= RADEON_GRPH_BUFFER_SIZE; 3479 temp &= ~(RADEON_GRPH_CRITICAL_CNTL | 3480 RADEON_GRPH_CRITICAL_AT_SOF | 3481 RADEON_GRPH_STOP_CNTL); 3482 /* 3483 Write the result into the register. 3484 */ 3485 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 3486 (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 3487 3488 #if 0 3489 if ((rdev->family == CHIP_RS400) || 3490 (rdev->family == CHIP_RS480)) { 3491 /* attempt to program RS400 disp regs correctly ??? */ 3492 temp = RREG32(RS400_DISP1_REG_CNTL); 3493 temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | 3494 RS400_DISP1_STOP_REQ_LEVEL_MASK); 3495 WREG32(RS400_DISP1_REQ_CNTL1, (temp | 3496 (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 3497 (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 3498 temp = RREG32(RS400_DMIF_MEM_CNTL1); 3499 temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | 3500 RS400_DISP1_CRITICAL_POINT_STOP_MASK); 3501 WREG32(RS400_DMIF_MEM_CNTL1, (temp | 3502 (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | 3503 (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); 3504 } 3505 #endif 3506 3507 DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n", 3508 /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ 3509 (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); 3510 } 3511 3512 if (mode2) { 3513 u32 grph2_cntl; 3514 stop_req = mode2->hdisplay * pixel_bytes2 / 16; 3515 3516 if (stop_req > max_stop_req) 3517 stop_req = max_stop_req; 3518 3519 /* 3520 Find the drain rate of the display buffer. 3521 */ 3522 temp_ff.full = dfixed_const((16/pixel_bytes2)); 3523 disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); 3524 3525 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); 3526 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); 3527 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 3528 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); 3529 if ((rdev->family == CHIP_R350) && 3530 (stop_req > 0x15)) { 3531 stop_req -= 0x10; 3532 } 3533 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 3534 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; 3535 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | 3536 RADEON_GRPH_CRITICAL_AT_SOF | 3537 RADEON_GRPH_STOP_CNTL); 3538 3539 if ((rdev->family == CHIP_RS100) || 3540 (rdev->family == CHIP_RS200)) 3541 critical_point2 = 0; 3542 else { 3543 temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; 3544 temp_ff.full = dfixed_const(temp); 3545 temp_ff.full = dfixed_mul(mclk_ff, temp_ff); 3546 if (sclk_ff.full < temp_ff.full) 3547 temp_ff.full = sclk_ff.full; 3548 3549 read_return_rate.full = temp_ff.full; 3550 3551 if (mode1) { 3552 temp_ff.full = read_return_rate.full - disp_drain_rate.full; 3553 time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); 3554 } else { 3555 time_disp1_drop_priority.full = 0; 3556 } 3557 crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; 3558 crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); 3559 crit_point_ff.full += dfixed_const_half(0); 3560 3561 critical_point2 = dfixed_trunc(crit_point_ff); 3562 3563 if (rdev->disp_priority == 2) { 3564 critical_point2 = 0; 3565 } 3566 3567 if (max_stop_req - critical_point2 < 4) 3568 critical_point2 = 0; 3569 3570 } 3571 3572 if (critical_point2 == 0 && rdev->family == CHIP_R300) { 3573 /* some R300 cards have problem with this set to 0 */ 3574 critical_point2 = 0x10; 3575 } 3576 3577 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 3578 (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 3579 3580 if ((rdev->family == CHIP_RS400) || 3581 (rdev->family == CHIP_RS480)) { 3582 #if 0 3583 /* attempt to program RS400 disp2 regs correctly ??? */ 3584 temp = RREG32(RS400_DISP2_REQ_CNTL1); 3585 temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | 3586 RS400_DISP2_STOP_REQ_LEVEL_MASK); 3587 WREG32(RS400_DISP2_REQ_CNTL1, (temp | 3588 (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 3589 (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 3590 temp = RREG32(RS400_DISP2_REQ_CNTL2); 3591 temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | 3592 RS400_DISP2_CRITICAL_POINT_STOP_MASK); 3593 WREG32(RS400_DISP2_REQ_CNTL2, (temp | 3594 (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | 3595 (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); 3596 #endif 3597 WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); 3598 WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); 3599 WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); 3600 WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); 3601 } 3602 3603 DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", 3604 (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); 3605 } 3606 } 3607 3608 int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 3609 { 3610 uint32_t scratch; 3611 uint32_t tmp = 0; 3612 unsigned i; 3613 int r; 3614 3615 r = radeon_scratch_get(rdev, &scratch); 3616 if (r) { 3617 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 3618 return r; 3619 } 3620 WREG32(scratch, 0xCAFEDEAD); 3621 r = radeon_ring_lock(rdev, ring, 2); 3622 if (r) { 3623 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3624 radeon_scratch_free(rdev, scratch); 3625 return r; 3626 } 3627 radeon_ring_write(ring, PACKET0(scratch, 0)); 3628 radeon_ring_write(ring, 0xDEADBEEF); 3629 radeon_ring_unlock_commit(rdev, ring); 3630 for (i = 0; i < rdev->usec_timeout; i++) { 3631 tmp = RREG32(scratch); 3632 if (tmp == 0xDEADBEEF) { 3633 break; 3634 } 3635 DRM_UDELAY(1); 3636 } 3637 if (i < rdev->usec_timeout) { 3638 DRM_INFO("ring test succeeded in %d usecs\n", i); 3639 } else { 3640 DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n", 3641 scratch, tmp); 3642 r = -EINVAL; 3643 } 3644 radeon_scratch_free(rdev, scratch); 3645 return r; 3646 } 3647 3648 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3649 { 3650 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3651 3652 if (ring->rptr_save_reg) { 3653 u32 next_rptr = ring->wptr + 2 + 3; 3654 radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); 3655 radeon_ring_write(ring, next_rptr); 3656 } 3657 3658 radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1)); 3659 radeon_ring_write(ring, ib->gpu_addr); 3660 radeon_ring_write(ring, ib->length_dw); 3661 } 3662 3663 int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3664 { 3665 struct radeon_ib ib; 3666 uint32_t scratch; 3667 uint32_t tmp = 0; 3668 unsigned i; 3669 int r; 3670 3671 r = radeon_scratch_get(rdev, &scratch); 3672 if (r) { 3673 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3674 return r; 3675 } 3676 WREG32(scratch, 0xCAFEDEAD); 3677 r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256); 3678 if (r) { 3679 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3680 goto free_scratch; 3681 } 3682 ib.ptr[0] = PACKET0(scratch, 0); 3683 ib.ptr[1] = 0xDEADBEEF; 3684 ib.ptr[2] = PACKET2(0); 3685 ib.ptr[3] = PACKET2(0); 3686 ib.ptr[4] = PACKET2(0); 3687 ib.ptr[5] = PACKET2(0); 3688 ib.ptr[6] = PACKET2(0); 3689 ib.ptr[7] = PACKET2(0); 3690 ib.length_dw = 8; 3691 r = radeon_ib_schedule(rdev, &ib, NULL); 3692 if (r) { 3693 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3694 goto free_ib; 3695 } 3696 r = radeon_fence_wait(ib.fence, false); 3697 if (r) { 3698 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3699 goto free_ib; 3700 } 3701 for (i = 0; i < rdev->usec_timeout; i++) { 3702 tmp = RREG32(scratch); 3703 if (tmp == 0xDEADBEEF) { 3704 break; 3705 } 3706 DRM_UDELAY(1); 3707 } 3708 if (i < rdev->usec_timeout) { 3709 DRM_INFO("ib test succeeded in %u usecs\n", i); 3710 } else { 3711 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3712 scratch, tmp); 3713 r = -EINVAL; 3714 } 3715 free_ib: 3716 radeon_ib_free(rdev, &ib); 3717 free_scratch: 3718 radeon_scratch_free(rdev, scratch); 3719 return r; 3720 } 3721 3722 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) 3723 { 3724 /* Shutdown CP we shouldn't need to do that but better be safe than 3725 * sorry 3726 */ 3727 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3728 WREG32(R_000740_CP_CSQ_CNTL, 0); 3729 3730 /* Save few CRTC registers */ 3731 save->GENMO_WT = RREG8(R_0003C2_GENMO_WT); 3732 save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL); 3733 save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL); 3734 save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET); 3735 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3736 save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL); 3737 save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET); 3738 } 3739 3740 /* Disable VGA aperture access */ 3741 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT); 3742 /* Disable cursor, overlay, crtc */ 3743 WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1)); 3744 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL | 3745 S_000054_CRTC_DISPLAY_DIS(1)); 3746 WREG32(R_000050_CRTC_GEN_CNTL, 3747 (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) | 3748 S_000050_CRTC_DISP_REQ_EN_B(1)); 3749 WREG32(R_000420_OV0_SCALE_CNTL, 3750 C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL)); 3751 WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET); 3752 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3753 WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET | 3754 S_000360_CUR2_LOCK(1)); 3755 WREG32(R_0003F8_CRTC2_GEN_CNTL, 3756 (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) | 3757 S_0003F8_CRTC2_DISPLAY_DIS(1) | 3758 S_0003F8_CRTC2_DISP_REQ_EN_B(1)); 3759 WREG32(R_000360_CUR2_OFFSET, 3760 C_000360_CUR2_LOCK & save->CUR2_OFFSET); 3761 } 3762 } 3763 3764 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save) 3765 { 3766 /* Update base address for crtc */ 3767 WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start); 3768 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3769 WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start); 3770 } 3771 /* Restore CRTC registers */ 3772 WREG8(R_0003C2_GENMO_WT, save->GENMO_WT); 3773 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL); 3774 WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL); 3775 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3776 WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL); 3777 } 3778 } 3779 3780 void r100_vga_render_disable(struct radeon_device *rdev) 3781 { 3782 u32 tmp; 3783 3784 tmp = RREG8(R_0003C2_GENMO_WT); 3785 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp); 3786 } 3787 3788 static void r100_debugfs(struct radeon_device *rdev) 3789 { 3790 int r; 3791 3792 r = r100_debugfs_mc_info_init(rdev); 3793 if (r) 3794 dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n"); 3795 } 3796 3797 static void r100_mc_program(struct radeon_device *rdev) 3798 { 3799 struct r100_mc_save save; 3800 3801 /* Stops all mc clients */ 3802 r100_mc_stop(rdev, &save); 3803 if (rdev->flags & RADEON_IS_AGP) { 3804 WREG32(R_00014C_MC_AGP_LOCATION, 3805 S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) | 3806 S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16)); 3807 WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base)); 3808 if (rdev->family > CHIP_RV200) 3809 WREG32(R_00015C_AGP_BASE_2, 3810 upper_32_bits(rdev->mc.agp_base) & 0xff); 3811 } else { 3812 WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF); 3813 WREG32(R_000170_AGP_BASE, 0); 3814 if (rdev->family > CHIP_RV200) 3815 WREG32(R_00015C_AGP_BASE_2, 0); 3816 } 3817 /* Wait for mc idle */ 3818 if (r100_mc_wait_for_idle(rdev)) 3819 dev_warn(rdev->dev, "Wait for MC idle timeout.\n"); 3820 /* Program MC, should be a 32bits limited address space */ 3821 WREG32(R_000148_MC_FB_LOCATION, 3822 S_000148_MC_FB_START(rdev->mc.vram_start >> 16) | 3823 S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16)); 3824 r100_mc_resume(rdev, &save); 3825 } 3826 3827 static void r100_clock_startup(struct radeon_device *rdev) 3828 { 3829 u32 tmp; 3830 3831 if (radeon_dynclks != -1 && radeon_dynclks) 3832 radeon_legacy_set_clock_gating(rdev, 1); 3833 /* We need to force on some of the block */ 3834 tmp = RREG32_PLL(R_00000D_SCLK_CNTL); 3835 tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1); 3836 if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280)) 3837 tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1); 3838 WREG32_PLL(R_00000D_SCLK_CNTL, tmp); 3839 } 3840 3841 static int r100_startup(struct radeon_device *rdev) 3842 { 3843 int r; 3844 3845 /* set common regs */ 3846 r100_set_common_regs(rdev); 3847 /* program mc */ 3848 r100_mc_program(rdev); 3849 /* Resume clock */ 3850 r100_clock_startup(rdev); 3851 /* Initialize GART (initialize after TTM so we can allocate 3852 * memory through TTM but finalize after TTM) */ 3853 r100_enable_bm(rdev); 3854 if (rdev->flags & RADEON_IS_PCI) { 3855 r = r100_pci_gart_enable(rdev); 3856 if (r) 3857 return r; 3858 } 3859 3860 /* allocate wb buffer */ 3861 r = radeon_wb_init(rdev); 3862 if (r) 3863 return r; 3864 3865 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 3866 if (r) { 3867 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 3868 return r; 3869 } 3870 3871 /* Enable IRQ */ 3872 if (!rdev->irq.installed) { 3873 r = radeon_irq_kms_init(rdev); 3874 if (r) 3875 return r; 3876 } 3877 3878 r100_irq_set(rdev); 3879 rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); 3880 /* 1M ring buffer */ 3881 r = r100_cp_init(rdev, 1024 * 1024); 3882 if (r) { 3883 dev_err(rdev->dev, "failed initializing CP (%d).\n", r); 3884 return r; 3885 } 3886 3887 r = radeon_ib_pool_init(rdev); 3888 if (r) { 3889 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 3890 return r; 3891 } 3892 3893 return 0; 3894 } 3895 3896 int r100_resume(struct radeon_device *rdev) 3897 { 3898 int r; 3899 3900 /* Make sur GART are not working */ 3901 if (rdev->flags & RADEON_IS_PCI) 3902 r100_pci_gart_disable(rdev); 3903 /* Resume clock before doing reset */ 3904 r100_clock_startup(rdev); 3905 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 3906 if (radeon_asic_reset(rdev)) { 3907 dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 3908 RREG32(R_000E40_RBBM_STATUS), 3909 RREG32(R_0007C0_CP_STAT)); 3910 } 3911 /* post */ 3912 radeon_combios_asic_init(rdev->ddev); 3913 /* Resume clock after posting */ 3914 r100_clock_startup(rdev); 3915 /* Initialize surface registers */ 3916 radeon_surface_init(rdev); 3917 3918 rdev->accel_working = true; 3919 r = r100_startup(rdev); 3920 if (r) { 3921 rdev->accel_working = false; 3922 } 3923 return r; 3924 } 3925 3926 int r100_suspend(struct radeon_device *rdev) 3927 { 3928 r100_cp_disable(rdev); 3929 radeon_wb_disable(rdev); 3930 r100_irq_disable(rdev); 3931 if (rdev->flags & RADEON_IS_PCI) 3932 r100_pci_gart_disable(rdev); 3933 return 0; 3934 } 3935 3936 void r100_fini(struct radeon_device *rdev) 3937 { 3938 r100_cp_fini(rdev); 3939 radeon_wb_fini(rdev); 3940 radeon_ib_pool_fini(rdev); 3941 radeon_gem_fini(rdev); 3942 if (rdev->flags & RADEON_IS_PCI) 3943 r100_pci_gart_fini(rdev); 3944 radeon_agp_fini(rdev); 3945 radeon_irq_kms_fini(rdev); 3946 radeon_fence_driver_fini(rdev); 3947 radeon_bo_fini(rdev); 3948 radeon_atombios_fini(rdev); 3949 kfree(rdev->bios); 3950 rdev->bios = NULL; 3951 } 3952 3953 /* 3954 * Due to how kexec works, it can leave the hw fully initialised when it 3955 * boots the new kernel. However doing our init sequence with the CP and 3956 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup 3957 * do some quick sanity checks and restore sane values to avoid this 3958 * problem. 3959 */ 3960 void r100_restore_sanity(struct radeon_device *rdev) 3961 { 3962 u32 tmp; 3963 3964 tmp = RREG32(RADEON_CP_CSQ_CNTL); 3965 if (tmp) { 3966 WREG32(RADEON_CP_CSQ_CNTL, 0); 3967 } 3968 tmp = RREG32(RADEON_CP_RB_CNTL); 3969 if (tmp) { 3970 WREG32(RADEON_CP_RB_CNTL, 0); 3971 } 3972 tmp = RREG32(RADEON_SCRATCH_UMSK); 3973 if (tmp) { 3974 WREG32(RADEON_SCRATCH_UMSK, 0); 3975 } 3976 } 3977 3978 int r100_init(struct radeon_device *rdev) 3979 { 3980 int r; 3981 3982 /* Register debugfs file specific to this group of asics */ 3983 r100_debugfs(rdev); 3984 /* Disable VGA */ 3985 r100_vga_render_disable(rdev); 3986 /* Initialize scratch registers */ 3987 radeon_scratch_init(rdev); 3988 /* Initialize surface registers */ 3989 radeon_surface_init(rdev); 3990 /* sanity check some register to avoid hangs like after kexec */ 3991 r100_restore_sanity(rdev); 3992 /* TODO: disable VGA need to use VGA request */ 3993 /* BIOS*/ 3994 if (!radeon_get_bios(rdev)) { 3995 if (ASIC_IS_AVIVO(rdev)) 3996 return -EINVAL; 3997 } 3998 if (rdev->is_atom_bios) { 3999 dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n"); 4000 return -EINVAL; 4001 } else { 4002 r = radeon_combios_init(rdev); 4003 if (r) 4004 return r; 4005 } 4006 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 4007 if (radeon_asic_reset(rdev)) { 4008 dev_warn(rdev->dev, 4009 "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 4010 RREG32(R_000E40_RBBM_STATUS), 4011 RREG32(R_0007C0_CP_STAT)); 4012 } 4013 /* check if cards are posted or not */ 4014 if (radeon_boot_test_post_card(rdev) == false) 4015 return -EINVAL; 4016 /* Set asic errata */ 4017 r100_errata(rdev); 4018 /* Initialize clocks */ 4019 radeon_get_clock_info(rdev->ddev); 4020 /* initialize AGP */ 4021 if (rdev->flags & RADEON_IS_AGP) { 4022 r = radeon_agp_init(rdev); 4023 if (r) { 4024 radeon_agp_disable(rdev); 4025 } 4026 } 4027 /* initialize VRAM */ 4028 r100_mc_init(rdev); 4029 /* Fence driver */ 4030 r = radeon_fence_driver_init(rdev); 4031 if (r) 4032 return r; 4033 /* Memory manager */ 4034 r = radeon_bo_init(rdev); 4035 if (r) 4036 return r; 4037 if (rdev->flags & RADEON_IS_PCI) { 4038 r = r100_pci_gart_init(rdev); 4039 if (r) 4040 return r; 4041 } 4042 r100_set_safe_registers(rdev); 4043 4044 rdev->accel_working = true; 4045 r = r100_startup(rdev); 4046 if (r) { 4047 /* Somethings want wront with the accel init stop accel */ 4048 dev_err(rdev->dev, "Disabling GPU acceleration\n"); 4049 r100_cp_fini(rdev); 4050 radeon_wb_fini(rdev); 4051 radeon_ib_pool_fini(rdev); 4052 radeon_irq_kms_fini(rdev); 4053 if (rdev->flags & RADEON_IS_PCI) 4054 r100_pci_gart_fini(rdev); 4055 rdev->accel_working = false; 4056 } 4057 return 0; 4058 } 4059 4060 uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, 4061 bool always_indirect) 4062 { 4063 if (reg < rdev->rmmio_size && !always_indirect) 4064 return readl(((void __iomem *)rdev->rmmio) + reg); 4065 else { 4066 unsigned long flags; 4067 uint32_t ret; 4068 4069 spin_lock_irqsave(&rdev->mmio_idx_lock, flags); 4070 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4071 ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4072 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); 4073 4074 return ret; 4075 } 4076 } 4077 4078 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, 4079 bool always_indirect) 4080 { 4081 if (reg < rdev->rmmio_size && !always_indirect) 4082 writel(v, ((void __iomem *)rdev->rmmio) + reg); 4083 else { 4084 unsigned long flags; 4085 4086 spin_lock_irqsave(&rdev->mmio_idx_lock, flags); 4087 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4088 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4089 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); 4090 } 4091 } 4092 4093 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) 4094 { 4095 if (reg < rdev->rio_mem_size) 4096 return ioread32(rdev->rio_mem + reg); 4097 else { 4098 iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); 4099 return ioread32(rdev->rio_mem + RADEON_MM_DATA); 4100 } 4101 } 4102 4103 void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) 4104 { 4105 if (reg < rdev->rio_mem_size) 4106 iowrite32(v, rdev->rio_mem + reg); 4107 else { 4108 iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); 4109 iowrite32(v, rdev->rio_mem + RADEON_MM_DATA); 4110 } 4111 } 4112