1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/seq_file.h> 29 #include <linux/slab.h> 30 #include <drm/drmP.h> 31 #include <drm/radeon_drm.h> 32 #include "radeon_reg.h" 33 #include "radeon.h" 34 #include "radeon_asic.h" 35 #include "r100d.h" 36 #include "rs100d.h" 37 #include "rv200d.h" 38 #include "rv250d.h" 39 #include "atom.h" 40 41 #include <linux/firmware.h> 42 #include <linux/platform_device.h> 43 #include <linux/module.h> 44 45 #include "r100_reg_safe.h" 46 #include "rn50_reg_safe.h" 47 48 /* Firmware Names */ 49 #define FIRMWARE_R100 "radeon/R100_cp.bin" 50 #define FIRMWARE_R200 "radeon/R200_cp.bin" 51 #define FIRMWARE_R300 "radeon/R300_cp.bin" 52 #define FIRMWARE_R420 "radeon/R420_cp.bin" 53 #define FIRMWARE_RS690 "radeon/RS690_cp.bin" 54 #define FIRMWARE_RS600 "radeon/RS600_cp.bin" 55 #define FIRMWARE_R520 "radeon/R520_cp.bin" 56 57 MODULE_FIRMWARE(FIRMWARE_R100); 58 MODULE_FIRMWARE(FIRMWARE_R200); 59 MODULE_FIRMWARE(FIRMWARE_R300); 60 MODULE_FIRMWARE(FIRMWARE_R420); 61 MODULE_FIRMWARE(FIRMWARE_RS690); 62 MODULE_FIRMWARE(FIRMWARE_RS600); 63 MODULE_FIRMWARE(FIRMWARE_R520); 64 65 #include "r100_track.h" 66 67 /* This files gather functions specifics to: 68 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 69 * and others in some cases. 70 */ 71 72 /** 73 * r100_wait_for_vblank - vblank wait asic callback. 74 * 75 * @rdev: radeon_device pointer 76 * @crtc: crtc to wait for vblank on 77 * 78 * Wait for vblank on the requested crtc (r1xx-r4xx). 79 */ 80 void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) 81 { 82 int i; 83 84 if (crtc >= rdev->num_crtc) 85 return; 86 87 if (crtc == 0) { 88 if (RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN) { 89 for (i = 0; i < rdev->usec_timeout; i++) { 90 if (!(RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)) 91 break; 92 udelay(1); 93 } 94 for (i = 0; i < rdev->usec_timeout; i++) { 95 if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR) 96 break; 97 udelay(1); 98 } 99 } 100 } else { 101 if (RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN) { 102 for (i = 0; i < rdev->usec_timeout; i++) { 103 if (!(RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)) 104 break; 105 udelay(1); 106 } 107 for (i = 0; i < rdev->usec_timeout; i++) { 108 if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR) 109 break; 110 udelay(1); 111 } 112 } 113 } 114 } 115 116 /** 117 * r100_pre_page_flip - pre-pageflip callback. 118 * 119 * @rdev: radeon_device pointer 120 * @crtc: crtc to prepare for pageflip on 121 * 122 * Pre-pageflip callback (r1xx-r4xx). 123 * Enables the pageflip irq (vblank irq). 124 */ 125 void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 126 { 127 /* enable the pflip int */ 128 radeon_irq_kms_pflip_irq_get(rdev, crtc); 129 } 130 131 /** 132 * r100_post_page_flip - pos-pageflip callback. 133 * 134 * @rdev: radeon_device pointer 135 * @crtc: crtc to cleanup pageflip on 136 * 137 * Post-pageflip callback (r1xx-r4xx). 138 * Disables the pageflip irq (vblank irq). 139 */ 140 void r100_post_page_flip(struct radeon_device *rdev, int crtc) 141 { 142 /* disable the pflip int */ 143 radeon_irq_kms_pflip_irq_put(rdev, crtc); 144 } 145 146 /** 147 * r100_page_flip - pageflip callback. 148 * 149 * @rdev: radeon_device pointer 150 * @crtc_id: crtc to cleanup pageflip on 151 * @crtc_base: new address of the crtc (GPU MC address) 152 * 153 * Does the actual pageflip (r1xx-r4xx). 154 * During vblank we take the crtc lock and wait for the update_pending 155 * bit to go high, when it does, we release the lock, and allow the 156 * double buffered update to take place. 157 * Returns the current update pending status. 158 */ 159 u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) 160 { 161 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; 162 u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK; 163 int i; 164 165 /* Lock the graphics update lock */ 166 /* update the scanout addresses */ 167 WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); 168 169 /* Wait for update_pending to go high. */ 170 for (i = 0; i < rdev->usec_timeout; i++) { 171 if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET) 172 break; 173 udelay(1); 174 } 175 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); 176 177 /* Unlock the lock, so double-buffering can take place inside vblank */ 178 tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK; 179 WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); 180 181 /* Return current update_pending status: */ 182 return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET; 183 } 184 185 /** 186 * r100_pm_get_dynpm_state - look up dynpm power state callback. 187 * 188 * @rdev: radeon_device pointer 189 * 190 * Look up the optimal power state based on the 191 * current state of the GPU (r1xx-r5xx). 192 * Used for dynpm only. 193 */ 194 void r100_pm_get_dynpm_state(struct radeon_device *rdev) 195 { 196 int i; 197 rdev->pm.dynpm_can_upclock = true; 198 rdev->pm.dynpm_can_downclock = true; 199 200 switch (rdev->pm.dynpm_planned_action) { 201 case DYNPM_ACTION_MINIMUM: 202 rdev->pm.requested_power_state_index = 0; 203 rdev->pm.dynpm_can_downclock = false; 204 break; 205 case DYNPM_ACTION_DOWNCLOCK: 206 if (rdev->pm.current_power_state_index == 0) { 207 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 208 rdev->pm.dynpm_can_downclock = false; 209 } else { 210 if (rdev->pm.active_crtc_count > 1) { 211 for (i = 0; i < rdev->pm.num_power_states; i++) { 212 if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) 213 continue; 214 else if (i >= rdev->pm.current_power_state_index) { 215 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 216 break; 217 } else { 218 rdev->pm.requested_power_state_index = i; 219 break; 220 } 221 } 222 } else 223 rdev->pm.requested_power_state_index = 224 rdev->pm.current_power_state_index - 1; 225 } 226 /* don't use the power state if crtcs are active and no display flag is set */ 227 if ((rdev->pm.active_crtc_count > 0) && 228 (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags & 229 RADEON_PM_MODE_NO_DISPLAY)) { 230 rdev->pm.requested_power_state_index++; 231 } 232 break; 233 case DYNPM_ACTION_UPCLOCK: 234 if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) { 235 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 236 rdev->pm.dynpm_can_upclock = false; 237 } else { 238 if (rdev->pm.active_crtc_count > 1) { 239 for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) { 240 if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) 241 continue; 242 else if (i <= rdev->pm.current_power_state_index) { 243 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 244 break; 245 } else { 246 rdev->pm.requested_power_state_index = i; 247 break; 248 } 249 } 250 } else 251 rdev->pm.requested_power_state_index = 252 rdev->pm.current_power_state_index + 1; 253 } 254 break; 255 case DYNPM_ACTION_DEFAULT: 256 rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index; 257 rdev->pm.dynpm_can_upclock = false; 258 break; 259 case DYNPM_ACTION_NONE: 260 default: 261 DRM_ERROR("Requested mode for not defined action\n"); 262 return; 263 } 264 /* only one clock mode per power state */ 265 rdev->pm.requested_clock_mode_index = 0; 266 267 DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n", 268 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 269 clock_info[rdev->pm.requested_clock_mode_index].sclk, 270 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 271 clock_info[rdev->pm.requested_clock_mode_index].mclk, 272 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 273 pcie_lanes); 274 } 275 276 /** 277 * r100_pm_init_profile - Initialize power profiles callback. 278 * 279 * @rdev: radeon_device pointer 280 * 281 * Initialize the power states used in profile mode 282 * (r1xx-r3xx). 283 * Used for profile mode only. 284 */ 285 void r100_pm_init_profile(struct radeon_device *rdev) 286 { 287 /* default */ 288 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index; 289 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 290 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0; 291 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0; 292 /* low sh */ 293 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0; 294 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0; 295 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0; 296 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0; 297 /* mid sh */ 298 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0; 299 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0; 300 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0; 301 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0; 302 /* high sh */ 303 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0; 304 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 305 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0; 306 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0; 307 /* low mh */ 308 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0; 309 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 310 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0; 311 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0; 312 /* mid mh */ 313 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0; 314 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 315 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0; 316 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0; 317 /* high mh */ 318 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0; 319 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 320 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0; 321 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0; 322 } 323 324 /** 325 * r100_pm_misc - set additional pm hw parameters callback. 326 * 327 * @rdev: radeon_device pointer 328 * 329 * Set non-clock parameters associated with a power state 330 * (voltage, pcie lanes, etc.) (r1xx-r4xx). 331 */ 332 void r100_pm_misc(struct radeon_device *rdev) 333 { 334 int requested_index = rdev->pm.requested_power_state_index; 335 struct radeon_power_state *ps = &rdev->pm.power_state[requested_index]; 336 struct radeon_voltage *voltage = &ps->clock_info[0].voltage; 337 u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl; 338 339 if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) { 340 if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) { 341 tmp = RREG32(voltage->gpio.reg); 342 if (voltage->active_high) 343 tmp |= voltage->gpio.mask; 344 else 345 tmp &= ~(voltage->gpio.mask); 346 WREG32(voltage->gpio.reg, tmp); 347 if (voltage->delay) 348 udelay(voltage->delay); 349 } else { 350 tmp = RREG32(voltage->gpio.reg); 351 if (voltage->active_high) 352 tmp &= ~voltage->gpio.mask; 353 else 354 tmp |= voltage->gpio.mask; 355 WREG32(voltage->gpio.reg, tmp); 356 if (voltage->delay) 357 udelay(voltage->delay); 358 } 359 } 360 361 sclk_cntl = RREG32_PLL(SCLK_CNTL); 362 sclk_cntl2 = RREG32_PLL(SCLK_CNTL2); 363 sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3); 364 sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL); 365 sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3); 366 if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) { 367 sclk_more_cntl |= REDUCED_SPEED_SCLK_EN; 368 if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE) 369 sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE; 370 else 371 sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE; 372 if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2) 373 sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0); 374 else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4) 375 sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2); 376 } else 377 sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN; 378 379 if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) { 380 sclk_more_cntl |= IO_CG_VOLTAGE_DROP; 381 if (voltage->delay) { 382 sclk_more_cntl |= VOLTAGE_DROP_SYNC; 383 switch (voltage->delay) { 384 case 33: 385 sclk_more_cntl |= VOLTAGE_DELAY_SEL(0); 386 break; 387 case 66: 388 sclk_more_cntl |= VOLTAGE_DELAY_SEL(1); 389 break; 390 case 99: 391 sclk_more_cntl |= VOLTAGE_DELAY_SEL(2); 392 break; 393 case 132: 394 sclk_more_cntl |= VOLTAGE_DELAY_SEL(3); 395 break; 396 } 397 } else 398 sclk_more_cntl &= ~VOLTAGE_DROP_SYNC; 399 } else 400 sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP; 401 402 if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN) 403 sclk_cntl &= ~FORCE_HDP; 404 else 405 sclk_cntl |= FORCE_HDP; 406 407 WREG32_PLL(SCLK_CNTL, sclk_cntl); 408 WREG32_PLL(SCLK_CNTL2, sclk_cntl2); 409 WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl); 410 411 /* set pcie lanes */ 412 if ((rdev->flags & RADEON_IS_PCIE) && 413 !(rdev->flags & RADEON_IS_IGP) && 414 rdev->asic->pm.set_pcie_lanes && 415 (ps->pcie_lanes != 416 rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) { 417 radeon_set_pcie_lanes(rdev, 418 ps->pcie_lanes); 419 DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes); 420 } 421 } 422 423 /** 424 * r100_pm_prepare - pre-power state change callback. 425 * 426 * @rdev: radeon_device pointer 427 * 428 * Prepare for a power state change (r1xx-r4xx). 429 */ 430 void r100_pm_prepare(struct radeon_device *rdev) 431 { 432 struct drm_device *ddev = rdev->ddev; 433 struct drm_crtc *crtc; 434 struct radeon_crtc *radeon_crtc; 435 u32 tmp; 436 437 /* disable any active CRTCs */ 438 list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { 439 radeon_crtc = to_radeon_crtc(crtc); 440 if (radeon_crtc->enabled) { 441 if (radeon_crtc->crtc_id) { 442 tmp = RREG32(RADEON_CRTC2_GEN_CNTL); 443 tmp |= RADEON_CRTC2_DISP_REQ_EN_B; 444 WREG32(RADEON_CRTC2_GEN_CNTL, tmp); 445 } else { 446 tmp = RREG32(RADEON_CRTC_GEN_CNTL); 447 tmp |= RADEON_CRTC_DISP_REQ_EN_B; 448 WREG32(RADEON_CRTC_GEN_CNTL, tmp); 449 } 450 } 451 } 452 } 453 454 /** 455 * r100_pm_finish - post-power state change callback. 456 * 457 * @rdev: radeon_device pointer 458 * 459 * Clean up after a power state change (r1xx-r4xx). 460 */ 461 void r100_pm_finish(struct radeon_device *rdev) 462 { 463 struct drm_device *ddev = rdev->ddev; 464 struct drm_crtc *crtc; 465 struct radeon_crtc *radeon_crtc; 466 u32 tmp; 467 468 /* enable any active CRTCs */ 469 list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { 470 radeon_crtc = to_radeon_crtc(crtc); 471 if (radeon_crtc->enabled) { 472 if (radeon_crtc->crtc_id) { 473 tmp = RREG32(RADEON_CRTC2_GEN_CNTL); 474 tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B; 475 WREG32(RADEON_CRTC2_GEN_CNTL, tmp); 476 } else { 477 tmp = RREG32(RADEON_CRTC_GEN_CNTL); 478 tmp &= ~RADEON_CRTC_DISP_REQ_EN_B; 479 WREG32(RADEON_CRTC_GEN_CNTL, tmp); 480 } 481 } 482 } 483 } 484 485 /** 486 * r100_gui_idle - gui idle callback. 487 * 488 * @rdev: radeon_device pointer 489 * 490 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx). 491 * Returns true if idle, false if not. 492 */ 493 bool r100_gui_idle(struct radeon_device *rdev) 494 { 495 if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE) 496 return false; 497 else 498 return true; 499 } 500 501 /* hpd for digital panel detect/disconnect */ 502 /** 503 * r100_hpd_sense - hpd sense callback. 504 * 505 * @rdev: radeon_device pointer 506 * @hpd: hpd (hotplug detect) pin 507 * 508 * Checks if a digital monitor is connected (r1xx-r4xx). 509 * Returns true if connected, false if not connected. 510 */ 511 bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) 512 { 513 bool connected = false; 514 515 switch (hpd) { 516 case RADEON_HPD_1: 517 if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE) 518 connected = true; 519 break; 520 case RADEON_HPD_2: 521 if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE) 522 connected = true; 523 break; 524 default: 525 break; 526 } 527 return connected; 528 } 529 530 /** 531 * r100_hpd_set_polarity - hpd set polarity callback. 532 * 533 * @rdev: radeon_device pointer 534 * @hpd: hpd (hotplug detect) pin 535 * 536 * Set the polarity of the hpd pin (r1xx-r4xx). 537 */ 538 void r100_hpd_set_polarity(struct radeon_device *rdev, 539 enum radeon_hpd_id hpd) 540 { 541 u32 tmp; 542 bool connected = r100_hpd_sense(rdev, hpd); 543 544 switch (hpd) { 545 case RADEON_HPD_1: 546 tmp = RREG32(RADEON_FP_GEN_CNTL); 547 if (connected) 548 tmp &= ~RADEON_FP_DETECT_INT_POL; 549 else 550 tmp |= RADEON_FP_DETECT_INT_POL; 551 WREG32(RADEON_FP_GEN_CNTL, tmp); 552 break; 553 case RADEON_HPD_2: 554 tmp = RREG32(RADEON_FP2_GEN_CNTL); 555 if (connected) 556 tmp &= ~RADEON_FP2_DETECT_INT_POL; 557 else 558 tmp |= RADEON_FP2_DETECT_INT_POL; 559 WREG32(RADEON_FP2_GEN_CNTL, tmp); 560 break; 561 default: 562 break; 563 } 564 } 565 566 /** 567 * r100_hpd_init - hpd setup callback. 568 * 569 * @rdev: radeon_device pointer 570 * 571 * Setup the hpd pins used by the card (r1xx-r4xx). 572 * Set the polarity, and enable the hpd interrupts. 573 */ 574 void r100_hpd_init(struct radeon_device *rdev) 575 { 576 struct drm_device *dev = rdev->ddev; 577 struct drm_connector *connector; 578 unsigned enable = 0; 579 580 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 581 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 582 enable |= 1 << radeon_connector->hpd.hpd; 583 radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); 584 } 585 radeon_irq_kms_enable_hpd(rdev, enable); 586 } 587 588 /** 589 * r100_hpd_fini - hpd tear down callback. 590 * 591 * @rdev: radeon_device pointer 592 * 593 * Tear down the hpd pins used by the card (r1xx-r4xx). 594 * Disable the hpd interrupts. 595 */ 596 void r100_hpd_fini(struct radeon_device *rdev) 597 { 598 struct drm_device *dev = rdev->ddev; 599 struct drm_connector *connector; 600 unsigned disable = 0; 601 602 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 603 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 604 disable |= 1 << radeon_connector->hpd.hpd; 605 } 606 radeon_irq_kms_disable_hpd(rdev, disable); 607 } 608 609 /* 610 * PCI GART 611 */ 612 void r100_pci_gart_tlb_flush(struct radeon_device *rdev) 613 { 614 /* TODO: can we do somethings here ? */ 615 /* It seems hw only cache one entry so we should discard this 616 * entry otherwise if first GPU GART read hit this entry it 617 * could end up in wrong address. */ 618 } 619 620 int r100_pci_gart_init(struct radeon_device *rdev) 621 { 622 int r; 623 624 if (rdev->gart.ptr) { 625 WARN(1, "R100 PCI GART already initialized\n"); 626 return 0; 627 } 628 /* Initialize common gart structure */ 629 r = radeon_gart_init(rdev); 630 if (r) 631 return r; 632 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; 633 rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush; 634 rdev->asic->gart.set_page = &r100_pci_gart_set_page; 635 return radeon_gart_table_ram_alloc(rdev); 636 } 637 638 int r100_pci_gart_enable(struct radeon_device *rdev) 639 { 640 uint32_t tmp; 641 642 radeon_gart_restore(rdev); 643 /* discard memory request outside of configured range */ 644 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 645 WREG32(RADEON_AIC_CNTL, tmp); 646 /* set address range for PCI address translate */ 647 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start); 648 WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end); 649 /* set PCI GART page-table base address */ 650 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr); 651 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; 652 WREG32(RADEON_AIC_CNTL, tmp); 653 r100_pci_gart_tlb_flush(rdev); 654 DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n", 655 (unsigned)(rdev->mc.gtt_size >> 20), 656 (unsigned long long)rdev->gart.table_addr); 657 rdev->gart.ready = true; 658 return 0; 659 } 660 661 void r100_pci_gart_disable(struct radeon_device *rdev) 662 { 663 uint32_t tmp; 664 665 /* discard memory request outside of configured range */ 666 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 667 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN); 668 WREG32(RADEON_AIC_LO_ADDR, 0); 669 WREG32(RADEON_AIC_HI_ADDR, 0); 670 } 671 672 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) 673 { 674 u32 *gtt = rdev->gart.ptr; 675 676 if (i < 0 || i > rdev->gart.num_gpu_pages) { 677 return -EINVAL; 678 } 679 gtt[i] = cpu_to_le32(lower_32_bits(addr)); 680 return 0; 681 } 682 683 void r100_pci_gart_fini(struct radeon_device *rdev) 684 { 685 radeon_gart_fini(rdev); 686 r100_pci_gart_disable(rdev); 687 radeon_gart_table_ram_free(rdev); 688 } 689 690 int r100_irq_set(struct radeon_device *rdev) 691 { 692 uint32_t tmp = 0; 693 694 if (!rdev->irq.installed) { 695 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 696 WREG32(R_000040_GEN_INT_CNTL, 0); 697 return -EINVAL; 698 } 699 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 700 tmp |= RADEON_SW_INT_ENABLE; 701 } 702 if (rdev->irq.crtc_vblank_int[0] || 703 atomic_read(&rdev->irq.pflip[0])) { 704 tmp |= RADEON_CRTC_VBLANK_MASK; 705 } 706 if (rdev->irq.crtc_vblank_int[1] || 707 atomic_read(&rdev->irq.pflip[1])) { 708 tmp |= RADEON_CRTC2_VBLANK_MASK; 709 } 710 if (rdev->irq.hpd[0]) { 711 tmp |= RADEON_FP_DETECT_MASK; 712 } 713 if (rdev->irq.hpd[1]) { 714 tmp |= RADEON_FP2_DETECT_MASK; 715 } 716 WREG32(RADEON_GEN_INT_CNTL, tmp); 717 return 0; 718 } 719 720 void r100_irq_disable(struct radeon_device *rdev) 721 { 722 u32 tmp; 723 724 WREG32(R_000040_GEN_INT_CNTL, 0); 725 /* Wait and acknowledge irq */ 726 mdelay(1); 727 tmp = RREG32(R_000044_GEN_INT_STATUS); 728 WREG32(R_000044_GEN_INT_STATUS, tmp); 729 } 730 731 static uint32_t r100_irq_ack(struct radeon_device *rdev) 732 { 733 uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); 734 uint32_t irq_mask = RADEON_SW_INT_TEST | 735 RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT | 736 RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT; 737 738 if (irqs) { 739 WREG32(RADEON_GEN_INT_STATUS, irqs); 740 } 741 return irqs & irq_mask; 742 } 743 744 int r100_irq_process(struct radeon_device *rdev) 745 { 746 uint32_t status, msi_rearm; 747 bool queue_hotplug = false; 748 749 status = r100_irq_ack(rdev); 750 if (!status) { 751 return IRQ_NONE; 752 } 753 if (rdev->shutdown) { 754 return IRQ_NONE; 755 } 756 while (status) { 757 /* SW interrupt */ 758 if (status & RADEON_SW_INT_TEST) { 759 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 760 } 761 /* Vertical blank interrupts */ 762 if (status & RADEON_CRTC_VBLANK_STAT) { 763 if (rdev->irq.crtc_vblank_int[0]) { 764 drm_handle_vblank(rdev->ddev, 0); 765 rdev->pm.vblank_sync = true; 766 wake_up(&rdev->irq.vblank_queue); 767 } 768 if (atomic_read(&rdev->irq.pflip[0])) 769 radeon_crtc_handle_flip(rdev, 0); 770 } 771 if (status & RADEON_CRTC2_VBLANK_STAT) { 772 if (rdev->irq.crtc_vblank_int[1]) { 773 drm_handle_vblank(rdev->ddev, 1); 774 rdev->pm.vblank_sync = true; 775 wake_up(&rdev->irq.vblank_queue); 776 } 777 if (atomic_read(&rdev->irq.pflip[1])) 778 radeon_crtc_handle_flip(rdev, 1); 779 } 780 if (status & RADEON_FP_DETECT_STAT) { 781 queue_hotplug = true; 782 DRM_DEBUG("HPD1\n"); 783 } 784 if (status & RADEON_FP2_DETECT_STAT) { 785 queue_hotplug = true; 786 DRM_DEBUG("HPD2\n"); 787 } 788 status = r100_irq_ack(rdev); 789 } 790 if (queue_hotplug) 791 schedule_work(&rdev->hotplug_work); 792 if (rdev->msi_enabled) { 793 switch (rdev->family) { 794 case CHIP_RS400: 795 case CHIP_RS480: 796 msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM; 797 WREG32(RADEON_AIC_CNTL, msi_rearm); 798 WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM); 799 break; 800 default: 801 WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN); 802 break; 803 } 804 } 805 return IRQ_HANDLED; 806 } 807 808 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) 809 { 810 if (crtc == 0) 811 return RREG32(RADEON_CRTC_CRNT_FRAME); 812 else 813 return RREG32(RADEON_CRTC2_CRNT_FRAME); 814 } 815 816 /* Who ever call radeon_fence_emit should call ring_lock and ask 817 * for enough space (today caller are ib schedule and buffer move) */ 818 void r100_fence_ring_emit(struct radeon_device *rdev, 819 struct radeon_fence *fence) 820 { 821 struct radeon_ring *ring = &rdev->ring[fence->ring]; 822 823 /* We have to make sure that caches are flushed before 824 * CPU might read something from VRAM. */ 825 radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); 826 radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL); 827 radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); 828 radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL); 829 /* Wait until IDLE & CLEAN */ 830 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 831 radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); 832 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); 833 radeon_ring_write(ring, rdev->config.r100.hdp_cntl | 834 RADEON_HDP_READ_BUFFER_INVALIDATE); 835 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); 836 radeon_ring_write(ring, rdev->config.r100.hdp_cntl); 837 /* Emit fence sequence & fire IRQ */ 838 radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); 839 radeon_ring_write(ring, fence->seq); 840 radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0)); 841 radeon_ring_write(ring, RADEON_SW_INT_FIRE); 842 } 843 844 void r100_semaphore_ring_emit(struct radeon_device *rdev, 845 struct radeon_ring *ring, 846 struct radeon_semaphore *semaphore, 847 bool emit_wait) 848 { 849 /* Unused on older asics, since we don't have semaphores or multiple rings */ 850 BUG(); 851 } 852 853 int r100_copy_blit(struct radeon_device *rdev, 854 uint64_t src_offset, 855 uint64_t dst_offset, 856 unsigned num_gpu_pages, 857 struct radeon_fence **fence) 858 { 859 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 860 uint32_t cur_pages; 861 uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE; 862 uint32_t pitch; 863 uint32_t stride_pixels; 864 unsigned ndw; 865 int num_loops; 866 int r = 0; 867 868 /* radeon limited to 16k stride */ 869 stride_bytes &= 0x3fff; 870 /* radeon pitch is /64 */ 871 pitch = stride_bytes / 64; 872 stride_pixels = stride_bytes / 4; 873 num_loops = DIV_ROUND_UP(num_gpu_pages, 8191); 874 875 /* Ask for enough room for blit + flush + fence */ 876 ndw = 64 + (10 * num_loops); 877 r = radeon_ring_lock(rdev, ring, ndw); 878 if (r) { 879 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); 880 return -EINVAL; 881 } 882 while (num_gpu_pages > 0) { 883 cur_pages = num_gpu_pages; 884 if (cur_pages > 8191) { 885 cur_pages = 8191; 886 } 887 num_gpu_pages -= cur_pages; 888 889 /* pages are in Y direction - height 890 page width in X direction - width */ 891 radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8)); 892 radeon_ring_write(ring, 893 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 894 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 895 RADEON_GMC_SRC_CLIPPING | 896 RADEON_GMC_DST_CLIPPING | 897 RADEON_GMC_BRUSH_NONE | 898 (RADEON_COLOR_FORMAT_ARGB8888 << 8) | 899 RADEON_GMC_SRC_DATATYPE_COLOR | 900 RADEON_ROP3_S | 901 RADEON_DP_SRC_SOURCE_MEMORY | 902 RADEON_GMC_CLR_CMP_CNTL_DIS | 903 RADEON_GMC_WR_MSK_DIS); 904 radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10)); 905 radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10)); 906 radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); 907 radeon_ring_write(ring, 0); 908 radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); 909 radeon_ring_write(ring, num_gpu_pages); 910 radeon_ring_write(ring, num_gpu_pages); 911 radeon_ring_write(ring, cur_pages | (stride_pixels << 16)); 912 } 913 radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); 914 radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL); 915 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 916 radeon_ring_write(ring, 917 RADEON_WAIT_2D_IDLECLEAN | 918 RADEON_WAIT_HOST_IDLECLEAN | 919 RADEON_WAIT_DMA_GUI_IDLE); 920 if (fence) { 921 r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); 922 } 923 radeon_ring_unlock_commit(rdev, ring); 924 return r; 925 } 926 927 static int r100_cp_wait_for_idle(struct radeon_device *rdev) 928 { 929 unsigned i; 930 u32 tmp; 931 932 for (i = 0; i < rdev->usec_timeout; i++) { 933 tmp = RREG32(R_000E40_RBBM_STATUS); 934 if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) { 935 return 0; 936 } 937 udelay(1); 938 } 939 return -1; 940 } 941 942 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) 943 { 944 int r; 945 946 r = radeon_ring_lock(rdev, ring, 2); 947 if (r) { 948 return; 949 } 950 radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0)); 951 radeon_ring_write(ring, 952 RADEON_ISYNC_ANY2D_IDLE3D | 953 RADEON_ISYNC_ANY3D_IDLE2D | 954 RADEON_ISYNC_WAIT_IDLEGUI | 955 RADEON_ISYNC_CPSCRATCH_IDLEGUI); 956 radeon_ring_unlock_commit(rdev, ring); 957 } 958 959 960 /* Load the microcode for the CP */ 961 static int r100_cp_init_microcode(struct radeon_device *rdev) 962 { 963 struct platform_device *pdev; 964 const char *fw_name = NULL; 965 int err; 966 967 DRM_DEBUG_KMS("\n"); 968 969 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); 970 err = IS_ERR(pdev); 971 if (err) { 972 printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); 973 return -EINVAL; 974 } 975 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || 976 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || 977 (rdev->family == CHIP_RS200)) { 978 DRM_INFO("Loading R100 Microcode\n"); 979 fw_name = FIRMWARE_R100; 980 } else if ((rdev->family == CHIP_R200) || 981 (rdev->family == CHIP_RV250) || 982 (rdev->family == CHIP_RV280) || 983 (rdev->family == CHIP_RS300)) { 984 DRM_INFO("Loading R200 Microcode\n"); 985 fw_name = FIRMWARE_R200; 986 } else if ((rdev->family == CHIP_R300) || 987 (rdev->family == CHIP_R350) || 988 (rdev->family == CHIP_RV350) || 989 (rdev->family == CHIP_RV380) || 990 (rdev->family == CHIP_RS400) || 991 (rdev->family == CHIP_RS480)) { 992 DRM_INFO("Loading R300 Microcode\n"); 993 fw_name = FIRMWARE_R300; 994 } else if ((rdev->family == CHIP_R420) || 995 (rdev->family == CHIP_R423) || 996 (rdev->family == CHIP_RV410)) { 997 DRM_INFO("Loading R400 Microcode\n"); 998 fw_name = FIRMWARE_R420; 999 } else if ((rdev->family == CHIP_RS690) || 1000 (rdev->family == CHIP_RS740)) { 1001 DRM_INFO("Loading RS690/RS740 Microcode\n"); 1002 fw_name = FIRMWARE_RS690; 1003 } else if (rdev->family == CHIP_RS600) { 1004 DRM_INFO("Loading RS600 Microcode\n"); 1005 fw_name = FIRMWARE_RS600; 1006 } else if ((rdev->family == CHIP_RV515) || 1007 (rdev->family == CHIP_R520) || 1008 (rdev->family == CHIP_RV530) || 1009 (rdev->family == CHIP_R580) || 1010 (rdev->family == CHIP_RV560) || 1011 (rdev->family == CHIP_RV570)) { 1012 DRM_INFO("Loading R500 Microcode\n"); 1013 fw_name = FIRMWARE_R520; 1014 } 1015 1016 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); 1017 platform_device_unregister(pdev); 1018 if (err) { 1019 printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", 1020 fw_name); 1021 } else if (rdev->me_fw->size % 8) { 1022 printk(KERN_ERR 1023 "radeon_cp: Bogus length %zu in firmware \"%s\"\n", 1024 rdev->me_fw->size, fw_name); 1025 err = -EINVAL; 1026 release_firmware(rdev->me_fw); 1027 rdev->me_fw = NULL; 1028 } 1029 return err; 1030 } 1031 1032 static void r100_cp_load_microcode(struct radeon_device *rdev) 1033 { 1034 const __be32 *fw_data; 1035 int i, size; 1036 1037 if (r100_gui_wait_for_idle(rdev)) { 1038 printk(KERN_WARNING "Failed to wait GUI idle while " 1039 "programming pipes. Bad things might happen.\n"); 1040 } 1041 1042 if (rdev->me_fw) { 1043 size = rdev->me_fw->size / 4; 1044 fw_data = (const __be32 *)&rdev->me_fw->data[0]; 1045 WREG32(RADEON_CP_ME_RAM_ADDR, 0); 1046 for (i = 0; i < size; i += 2) { 1047 WREG32(RADEON_CP_ME_RAM_DATAH, 1048 be32_to_cpup(&fw_data[i])); 1049 WREG32(RADEON_CP_ME_RAM_DATAL, 1050 be32_to_cpup(&fw_data[i + 1])); 1051 } 1052 } 1053 } 1054 1055 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) 1056 { 1057 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1058 unsigned rb_bufsz; 1059 unsigned rb_blksz; 1060 unsigned max_fetch; 1061 unsigned pre_write_timer; 1062 unsigned pre_write_limit; 1063 unsigned indirect2_start; 1064 unsigned indirect1_start; 1065 uint32_t tmp; 1066 int r; 1067 1068 if (r100_debugfs_cp_init(rdev)) { 1069 DRM_ERROR("Failed to register debugfs file for CP !\n"); 1070 } 1071 if (!rdev->me_fw) { 1072 r = r100_cp_init_microcode(rdev); 1073 if (r) { 1074 DRM_ERROR("Failed to load firmware!\n"); 1075 return r; 1076 } 1077 } 1078 1079 /* Align ring size */ 1080 rb_bufsz = drm_order(ring_size / 8); 1081 ring_size = (1 << (rb_bufsz + 1)) * 4; 1082 r100_cp_load_microcode(rdev); 1083 r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET, 1084 RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR, 1085 0, 0x7fffff, RADEON_CP_PACKET2); 1086 if (r) { 1087 return r; 1088 } 1089 /* Each time the cp read 1024 bytes (16 dword/quadword) update 1090 * the rptr copy in system ram */ 1091 rb_blksz = 9; 1092 /* cp will read 128bytes at a time (4 dwords) */ 1093 max_fetch = 1; 1094 ring->align_mask = 16 - 1; 1095 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ 1096 pre_write_timer = 64; 1097 /* Force CP_RB_WPTR write if written more than one time before the 1098 * delay expire 1099 */ 1100 pre_write_limit = 0; 1101 /* Setup the cp cache like this (cache size is 96 dwords) : 1102 * RING 0 to 15 1103 * INDIRECT1 16 to 79 1104 * INDIRECT2 80 to 95 1105 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 1106 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) 1107 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 1108 * Idea being that most of the gpu cmd will be through indirect1 buffer 1109 * so it gets the bigger cache. 1110 */ 1111 indirect2_start = 80; 1112 indirect1_start = 16; 1113 /* cp setup */ 1114 WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); 1115 tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | 1116 REG_SET(RADEON_RB_BLKSZ, rb_blksz) | 1117 REG_SET(RADEON_MAX_FETCH, max_fetch)); 1118 #ifdef __BIG_ENDIAN 1119 tmp |= RADEON_BUF_SWAP_32BIT; 1120 #endif 1121 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE); 1122 1123 /* Set ring address */ 1124 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr); 1125 WREG32(RADEON_CP_RB_BASE, ring->gpu_addr); 1126 /* Force read & write ptr to 0 */ 1127 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE); 1128 WREG32(RADEON_CP_RB_RPTR_WR, 0); 1129 ring->wptr = 0; 1130 WREG32(RADEON_CP_RB_WPTR, ring->wptr); 1131 1132 /* set the wb address whether it's enabled or not */ 1133 WREG32(R_00070C_CP_RB_RPTR_ADDR, 1134 S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2)); 1135 WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET); 1136 1137 if (rdev->wb.enabled) 1138 WREG32(R_000770_SCRATCH_UMSK, 0xff); 1139 else { 1140 tmp |= RADEON_RB_NO_UPDATE; 1141 WREG32(R_000770_SCRATCH_UMSK, 0); 1142 } 1143 1144 WREG32(RADEON_CP_RB_CNTL, tmp); 1145 udelay(10); 1146 ring->rptr = RREG32(RADEON_CP_RB_RPTR); 1147 /* Set cp mode to bus mastering & enable cp*/ 1148 WREG32(RADEON_CP_CSQ_MODE, 1149 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | 1150 REG_SET(RADEON_INDIRECT1_START, indirect1_start)); 1151 WREG32(RADEON_CP_RB_WPTR_DELAY, 0); 1152 WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D); 1153 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); 1154 1155 /* at this point everything should be setup correctly to enable master */ 1156 pci_set_master(rdev->pdev); 1157 1158 radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1159 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); 1160 if (r) { 1161 DRM_ERROR("radeon: cp isn't working (%d).\n", r); 1162 return r; 1163 } 1164 ring->ready = true; 1165 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 1166 1167 if (!ring->rptr_save_reg /* not resuming from suspend */ 1168 && radeon_ring_supports_scratch_reg(rdev, ring)) { 1169 r = radeon_scratch_get(rdev, &ring->rptr_save_reg); 1170 if (r) { 1171 DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r); 1172 ring->rptr_save_reg = 0; 1173 } 1174 } 1175 return 0; 1176 } 1177 1178 void r100_cp_fini(struct radeon_device *rdev) 1179 { 1180 if (r100_cp_wait_for_idle(rdev)) { 1181 DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n"); 1182 } 1183 /* Disable ring */ 1184 r100_cp_disable(rdev); 1185 radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg); 1186 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1187 DRM_INFO("radeon: cp finalized\n"); 1188 } 1189 1190 void r100_cp_disable(struct radeon_device *rdev) 1191 { 1192 /* Disable ring */ 1193 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1194 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1195 WREG32(RADEON_CP_CSQ_MODE, 0); 1196 WREG32(RADEON_CP_CSQ_CNTL, 0); 1197 WREG32(R_000770_SCRATCH_UMSK, 0); 1198 if (r100_gui_wait_for_idle(rdev)) { 1199 printk(KERN_WARNING "Failed to wait GUI idle while " 1200 "programming pipes. Bad things might happen.\n"); 1201 } 1202 } 1203 1204 /* 1205 * CS functions 1206 */ 1207 int r100_reloc_pitch_offset(struct radeon_cs_parser *p, 1208 struct radeon_cs_packet *pkt, 1209 unsigned idx, 1210 unsigned reg) 1211 { 1212 int r; 1213 u32 tile_flags = 0; 1214 u32 tmp; 1215 struct radeon_cs_reloc *reloc; 1216 u32 value; 1217 1218 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1219 if (r) { 1220 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1221 idx, reg); 1222 radeon_cs_dump_packet(p, pkt); 1223 return r; 1224 } 1225 1226 value = radeon_get_ib_value(p, idx); 1227 tmp = value & 0x003fffff; 1228 tmp += (((u32)reloc->lobj.gpu_offset) >> 10); 1229 1230 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1231 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1232 tile_flags |= RADEON_DST_TILE_MACRO; 1233 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 1234 if (reg == RADEON_SRC_PITCH_OFFSET) { 1235 DRM_ERROR("Cannot src blit from microtiled surface\n"); 1236 radeon_cs_dump_packet(p, pkt); 1237 return -EINVAL; 1238 } 1239 tile_flags |= RADEON_DST_TILE_MICRO; 1240 } 1241 1242 tmp |= tile_flags; 1243 p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; 1244 } else 1245 p->ib.ptr[idx] = (value & 0xffc00000) | tmp; 1246 return 0; 1247 } 1248 1249 int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, 1250 struct radeon_cs_packet *pkt, 1251 int idx) 1252 { 1253 unsigned c, i; 1254 struct radeon_cs_reloc *reloc; 1255 struct r100_cs_track *track; 1256 int r = 0; 1257 volatile uint32_t *ib; 1258 u32 idx_value; 1259 1260 ib = p->ib.ptr; 1261 track = (struct r100_cs_track *)p->track; 1262 c = radeon_get_ib_value(p, idx++) & 0x1F; 1263 if (c > 16) { 1264 DRM_ERROR("Only 16 vertex buffers are allowed %d\n", 1265 pkt->opcode); 1266 radeon_cs_dump_packet(p, pkt); 1267 return -EINVAL; 1268 } 1269 track->num_arrays = c; 1270 for (i = 0; i < (c - 1); i+=2, idx+=3) { 1271 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1272 if (r) { 1273 DRM_ERROR("No reloc for packet3 %d\n", 1274 pkt->opcode); 1275 radeon_cs_dump_packet(p, pkt); 1276 return r; 1277 } 1278 idx_value = radeon_get_ib_value(p, idx); 1279 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1280 1281 track->arrays[i + 0].esize = idx_value >> 8; 1282 track->arrays[i + 0].robj = reloc->robj; 1283 track->arrays[i + 0].esize &= 0x7F; 1284 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1285 if (r) { 1286 DRM_ERROR("No reloc for packet3 %d\n", 1287 pkt->opcode); 1288 radeon_cs_dump_packet(p, pkt); 1289 return r; 1290 } 1291 ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); 1292 track->arrays[i + 1].robj = reloc->robj; 1293 track->arrays[i + 1].esize = idx_value >> 24; 1294 track->arrays[i + 1].esize &= 0x7F; 1295 } 1296 if (c & 1) { 1297 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1298 if (r) { 1299 DRM_ERROR("No reloc for packet3 %d\n", 1300 pkt->opcode); 1301 radeon_cs_dump_packet(p, pkt); 1302 return r; 1303 } 1304 idx_value = radeon_get_ib_value(p, idx); 1305 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1306 track->arrays[i + 0].robj = reloc->robj; 1307 track->arrays[i + 0].esize = idx_value >> 8; 1308 track->arrays[i + 0].esize &= 0x7F; 1309 } 1310 return r; 1311 } 1312 1313 int r100_cs_parse_packet0(struct radeon_cs_parser *p, 1314 struct radeon_cs_packet *pkt, 1315 const unsigned *auth, unsigned n, 1316 radeon_packet0_check_t check) 1317 { 1318 unsigned reg; 1319 unsigned i, j, m; 1320 unsigned idx; 1321 int r; 1322 1323 idx = pkt->idx + 1; 1324 reg = pkt->reg; 1325 /* Check that register fall into register range 1326 * determined by the number of entry (n) in the 1327 * safe register bitmap. 1328 */ 1329 if (pkt->one_reg_wr) { 1330 if ((reg >> 7) > n) { 1331 return -EINVAL; 1332 } 1333 } else { 1334 if (((reg + (pkt->count << 2)) >> 7) > n) { 1335 return -EINVAL; 1336 } 1337 } 1338 for (i = 0; i <= pkt->count; i++, idx++) { 1339 j = (reg >> 7); 1340 m = 1 << ((reg >> 2) & 31); 1341 if (auth[j] & m) { 1342 r = check(p, pkt, idx, reg); 1343 if (r) { 1344 return r; 1345 } 1346 } 1347 if (pkt->one_reg_wr) { 1348 if (!(auth[j] & m)) { 1349 break; 1350 } 1351 } else { 1352 reg += 4; 1353 } 1354 } 1355 return 0; 1356 } 1357 1358 /** 1359 * r100_cs_packet_next_vline() - parse userspace VLINE packet 1360 * @parser: parser structure holding parsing context. 1361 * 1362 * Userspace sends a special sequence for VLINE waits. 1363 * PACKET0 - VLINE_START_END + value 1364 * PACKET0 - WAIT_UNTIL +_value 1365 * RELOC (P3) - crtc_id in reloc. 1366 * 1367 * This function parses this and relocates the VLINE START END 1368 * and WAIT UNTIL packets to the correct crtc. 1369 * It also detects a switched off crtc and nulls out the 1370 * wait in that case. 1371 */ 1372 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) 1373 { 1374 struct drm_mode_object *obj; 1375 struct drm_crtc *crtc; 1376 struct radeon_crtc *radeon_crtc; 1377 struct radeon_cs_packet p3reloc, waitreloc; 1378 int crtc_id; 1379 int r; 1380 uint32_t header, h_idx, reg; 1381 volatile uint32_t *ib; 1382 1383 ib = p->ib.ptr; 1384 1385 /* parse the wait until */ 1386 r = radeon_cs_packet_parse(p, &waitreloc, p->idx); 1387 if (r) 1388 return r; 1389 1390 /* check its a wait until and only 1 count */ 1391 if (waitreloc.reg != RADEON_WAIT_UNTIL || 1392 waitreloc.count != 0) { 1393 DRM_ERROR("vline wait had illegal wait until segment\n"); 1394 return -EINVAL; 1395 } 1396 1397 if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) { 1398 DRM_ERROR("vline wait had illegal wait until\n"); 1399 return -EINVAL; 1400 } 1401 1402 /* jump over the NOP */ 1403 r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2); 1404 if (r) 1405 return r; 1406 1407 h_idx = p->idx - 2; 1408 p->idx += waitreloc.count + 2; 1409 p->idx += p3reloc.count + 2; 1410 1411 header = radeon_get_ib_value(p, h_idx); 1412 crtc_id = radeon_get_ib_value(p, h_idx + 5); 1413 reg = R100_CP_PACKET0_GET_REG(header); 1414 obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); 1415 if (!obj) { 1416 DRM_ERROR("cannot find crtc %d\n", crtc_id); 1417 return -EINVAL; 1418 } 1419 crtc = obj_to_crtc(obj); 1420 radeon_crtc = to_radeon_crtc(crtc); 1421 crtc_id = radeon_crtc->crtc_id; 1422 1423 if (!crtc->enabled) { 1424 /* if the CRTC isn't enabled - we need to nop out the wait until */ 1425 ib[h_idx + 2] = PACKET2(0); 1426 ib[h_idx + 3] = PACKET2(0); 1427 } else if (crtc_id == 1) { 1428 switch (reg) { 1429 case AVIVO_D1MODE_VLINE_START_END: 1430 header &= ~R300_CP_PACKET0_REG_MASK; 1431 header |= AVIVO_D2MODE_VLINE_START_END >> 2; 1432 break; 1433 case RADEON_CRTC_GUI_TRIG_VLINE: 1434 header &= ~R300_CP_PACKET0_REG_MASK; 1435 header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; 1436 break; 1437 default: 1438 DRM_ERROR("unknown crtc reloc\n"); 1439 return -EINVAL; 1440 } 1441 ib[h_idx] = header; 1442 ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; 1443 } 1444 1445 return 0; 1446 } 1447 1448 static int r100_get_vtx_size(uint32_t vtx_fmt) 1449 { 1450 int vtx_size; 1451 vtx_size = 2; 1452 /* ordered according to bits in spec */ 1453 if (vtx_fmt & RADEON_SE_VTX_FMT_W0) 1454 vtx_size++; 1455 if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR) 1456 vtx_size += 3; 1457 if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA) 1458 vtx_size++; 1459 if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR) 1460 vtx_size++; 1461 if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC) 1462 vtx_size += 3; 1463 if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG) 1464 vtx_size++; 1465 if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC) 1466 vtx_size++; 1467 if (vtx_fmt & RADEON_SE_VTX_FMT_ST0) 1468 vtx_size += 2; 1469 if (vtx_fmt & RADEON_SE_VTX_FMT_ST1) 1470 vtx_size += 2; 1471 if (vtx_fmt & RADEON_SE_VTX_FMT_Q1) 1472 vtx_size++; 1473 if (vtx_fmt & RADEON_SE_VTX_FMT_ST2) 1474 vtx_size += 2; 1475 if (vtx_fmt & RADEON_SE_VTX_FMT_Q2) 1476 vtx_size++; 1477 if (vtx_fmt & RADEON_SE_VTX_FMT_ST3) 1478 vtx_size += 2; 1479 if (vtx_fmt & RADEON_SE_VTX_FMT_Q3) 1480 vtx_size++; 1481 if (vtx_fmt & RADEON_SE_VTX_FMT_Q0) 1482 vtx_size++; 1483 /* blend weight */ 1484 if (vtx_fmt & (0x7 << 15)) 1485 vtx_size += (vtx_fmt >> 15) & 0x7; 1486 if (vtx_fmt & RADEON_SE_VTX_FMT_N0) 1487 vtx_size += 3; 1488 if (vtx_fmt & RADEON_SE_VTX_FMT_XY1) 1489 vtx_size += 2; 1490 if (vtx_fmt & RADEON_SE_VTX_FMT_Z1) 1491 vtx_size++; 1492 if (vtx_fmt & RADEON_SE_VTX_FMT_W1) 1493 vtx_size++; 1494 if (vtx_fmt & RADEON_SE_VTX_FMT_N1) 1495 vtx_size++; 1496 if (vtx_fmt & RADEON_SE_VTX_FMT_Z) 1497 vtx_size++; 1498 return vtx_size; 1499 } 1500 1501 static int r100_packet0_check(struct radeon_cs_parser *p, 1502 struct radeon_cs_packet *pkt, 1503 unsigned idx, unsigned reg) 1504 { 1505 struct radeon_cs_reloc *reloc; 1506 struct r100_cs_track *track; 1507 volatile uint32_t *ib; 1508 uint32_t tmp; 1509 int r; 1510 int i, face; 1511 u32 tile_flags = 0; 1512 u32 idx_value; 1513 1514 ib = p->ib.ptr; 1515 track = (struct r100_cs_track *)p->track; 1516 1517 idx_value = radeon_get_ib_value(p, idx); 1518 1519 switch (reg) { 1520 case RADEON_CRTC_GUI_TRIG_VLINE: 1521 r = r100_cs_packet_parse_vline(p); 1522 if (r) { 1523 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1524 idx, reg); 1525 radeon_cs_dump_packet(p, pkt); 1526 return r; 1527 } 1528 break; 1529 /* FIXME: only allow PACKET3 blit? easier to check for out of 1530 * range access */ 1531 case RADEON_DST_PITCH_OFFSET: 1532 case RADEON_SRC_PITCH_OFFSET: 1533 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 1534 if (r) 1535 return r; 1536 break; 1537 case RADEON_RB3D_DEPTHOFFSET: 1538 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1539 if (r) { 1540 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1541 idx, reg); 1542 radeon_cs_dump_packet(p, pkt); 1543 return r; 1544 } 1545 track->zb.robj = reloc->robj; 1546 track->zb.offset = idx_value; 1547 track->zb_dirty = true; 1548 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1549 break; 1550 case RADEON_RB3D_COLOROFFSET: 1551 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1552 if (r) { 1553 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1554 idx, reg); 1555 radeon_cs_dump_packet(p, pkt); 1556 return r; 1557 } 1558 track->cb[0].robj = reloc->robj; 1559 track->cb[0].offset = idx_value; 1560 track->cb_dirty = true; 1561 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1562 break; 1563 case RADEON_PP_TXOFFSET_0: 1564 case RADEON_PP_TXOFFSET_1: 1565 case RADEON_PP_TXOFFSET_2: 1566 i = (reg - RADEON_PP_TXOFFSET_0) / 24; 1567 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1568 if (r) { 1569 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1570 idx, reg); 1571 radeon_cs_dump_packet(p, pkt); 1572 return r; 1573 } 1574 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1575 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1576 tile_flags |= RADEON_TXO_MACRO_TILE; 1577 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1578 tile_flags |= RADEON_TXO_MICRO_TILE_X2; 1579 1580 tmp = idx_value & ~(0x7 << 2); 1581 tmp |= tile_flags; 1582 ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); 1583 } else 1584 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1585 track->textures[i].robj = reloc->robj; 1586 track->tex_dirty = true; 1587 break; 1588 case RADEON_PP_CUBIC_OFFSET_T0_0: 1589 case RADEON_PP_CUBIC_OFFSET_T0_1: 1590 case RADEON_PP_CUBIC_OFFSET_T0_2: 1591 case RADEON_PP_CUBIC_OFFSET_T0_3: 1592 case RADEON_PP_CUBIC_OFFSET_T0_4: 1593 i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; 1594 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1595 if (r) { 1596 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1597 idx, reg); 1598 radeon_cs_dump_packet(p, pkt); 1599 return r; 1600 } 1601 track->textures[0].cube_info[i].offset = idx_value; 1602 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1603 track->textures[0].cube_info[i].robj = reloc->robj; 1604 track->tex_dirty = true; 1605 break; 1606 case RADEON_PP_CUBIC_OFFSET_T1_0: 1607 case RADEON_PP_CUBIC_OFFSET_T1_1: 1608 case RADEON_PP_CUBIC_OFFSET_T1_2: 1609 case RADEON_PP_CUBIC_OFFSET_T1_3: 1610 case RADEON_PP_CUBIC_OFFSET_T1_4: 1611 i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; 1612 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1613 if (r) { 1614 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1615 idx, reg); 1616 radeon_cs_dump_packet(p, pkt); 1617 return r; 1618 } 1619 track->textures[1].cube_info[i].offset = idx_value; 1620 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1621 track->textures[1].cube_info[i].robj = reloc->robj; 1622 track->tex_dirty = true; 1623 break; 1624 case RADEON_PP_CUBIC_OFFSET_T2_0: 1625 case RADEON_PP_CUBIC_OFFSET_T2_1: 1626 case RADEON_PP_CUBIC_OFFSET_T2_2: 1627 case RADEON_PP_CUBIC_OFFSET_T2_3: 1628 case RADEON_PP_CUBIC_OFFSET_T2_4: 1629 i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; 1630 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1631 if (r) { 1632 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1633 idx, reg); 1634 radeon_cs_dump_packet(p, pkt); 1635 return r; 1636 } 1637 track->textures[2].cube_info[i].offset = idx_value; 1638 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1639 track->textures[2].cube_info[i].robj = reloc->robj; 1640 track->tex_dirty = true; 1641 break; 1642 case RADEON_RE_WIDTH_HEIGHT: 1643 track->maxy = ((idx_value >> 16) & 0x7FF); 1644 track->cb_dirty = true; 1645 track->zb_dirty = true; 1646 break; 1647 case RADEON_RB3D_COLORPITCH: 1648 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1649 if (r) { 1650 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1651 idx, reg); 1652 radeon_cs_dump_packet(p, pkt); 1653 return r; 1654 } 1655 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1656 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1657 tile_flags |= RADEON_COLOR_TILE_ENABLE; 1658 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1659 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 1660 1661 tmp = idx_value & ~(0x7 << 16); 1662 tmp |= tile_flags; 1663 ib[idx] = tmp; 1664 } else 1665 ib[idx] = idx_value; 1666 1667 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 1668 track->cb_dirty = true; 1669 break; 1670 case RADEON_RB3D_DEPTHPITCH: 1671 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 1672 track->zb_dirty = true; 1673 break; 1674 case RADEON_RB3D_CNTL: 1675 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 1676 case 7: 1677 case 8: 1678 case 9: 1679 case 11: 1680 case 12: 1681 track->cb[0].cpp = 1; 1682 break; 1683 case 3: 1684 case 4: 1685 case 15: 1686 track->cb[0].cpp = 2; 1687 break; 1688 case 6: 1689 track->cb[0].cpp = 4; 1690 break; 1691 default: 1692 DRM_ERROR("Invalid color buffer format (%d) !\n", 1693 ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 1694 return -EINVAL; 1695 } 1696 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 1697 track->cb_dirty = true; 1698 track->zb_dirty = true; 1699 break; 1700 case RADEON_RB3D_ZSTENCILCNTL: 1701 switch (idx_value & 0xf) { 1702 case 0: 1703 track->zb.cpp = 2; 1704 break; 1705 case 2: 1706 case 3: 1707 case 4: 1708 case 5: 1709 case 9: 1710 case 11: 1711 track->zb.cpp = 4; 1712 break; 1713 default: 1714 break; 1715 } 1716 track->zb_dirty = true; 1717 break; 1718 case RADEON_RB3D_ZPASS_ADDR: 1719 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1720 if (r) { 1721 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1722 idx, reg); 1723 radeon_cs_dump_packet(p, pkt); 1724 return r; 1725 } 1726 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1727 break; 1728 case RADEON_PP_CNTL: 1729 { 1730 uint32_t temp = idx_value >> 4; 1731 for (i = 0; i < track->num_texture; i++) 1732 track->textures[i].enabled = !!(temp & (1 << i)); 1733 track->tex_dirty = true; 1734 } 1735 break; 1736 case RADEON_SE_VF_CNTL: 1737 track->vap_vf_cntl = idx_value; 1738 break; 1739 case RADEON_SE_VTX_FMT: 1740 track->vtx_size = r100_get_vtx_size(idx_value); 1741 break; 1742 case RADEON_PP_TEX_SIZE_0: 1743 case RADEON_PP_TEX_SIZE_1: 1744 case RADEON_PP_TEX_SIZE_2: 1745 i = (reg - RADEON_PP_TEX_SIZE_0) / 8; 1746 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 1747 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 1748 track->tex_dirty = true; 1749 break; 1750 case RADEON_PP_TEX_PITCH_0: 1751 case RADEON_PP_TEX_PITCH_1: 1752 case RADEON_PP_TEX_PITCH_2: 1753 i = (reg - RADEON_PP_TEX_PITCH_0) / 8; 1754 track->textures[i].pitch = idx_value + 32; 1755 track->tex_dirty = true; 1756 break; 1757 case RADEON_PP_TXFILTER_0: 1758 case RADEON_PP_TXFILTER_1: 1759 case RADEON_PP_TXFILTER_2: 1760 i = (reg - RADEON_PP_TXFILTER_0) / 24; 1761 track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK) 1762 >> RADEON_MAX_MIP_LEVEL_SHIFT); 1763 tmp = (idx_value >> 23) & 0x7; 1764 if (tmp == 2 || tmp == 6) 1765 track->textures[i].roundup_w = false; 1766 tmp = (idx_value >> 27) & 0x7; 1767 if (tmp == 2 || tmp == 6) 1768 track->textures[i].roundup_h = false; 1769 track->tex_dirty = true; 1770 break; 1771 case RADEON_PP_TXFORMAT_0: 1772 case RADEON_PP_TXFORMAT_1: 1773 case RADEON_PP_TXFORMAT_2: 1774 i = (reg - RADEON_PP_TXFORMAT_0) / 24; 1775 if (idx_value & RADEON_TXFORMAT_NON_POWER2) { 1776 track->textures[i].use_pitch = 1; 1777 } else { 1778 track->textures[i].use_pitch = 0; 1779 track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); 1780 track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); 1781 } 1782 if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) 1783 track->textures[i].tex_coord_type = 2; 1784 switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { 1785 case RADEON_TXFORMAT_I8: 1786 case RADEON_TXFORMAT_RGB332: 1787 case RADEON_TXFORMAT_Y8: 1788 track->textures[i].cpp = 1; 1789 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1790 break; 1791 case RADEON_TXFORMAT_AI88: 1792 case RADEON_TXFORMAT_ARGB1555: 1793 case RADEON_TXFORMAT_RGB565: 1794 case RADEON_TXFORMAT_ARGB4444: 1795 case RADEON_TXFORMAT_VYUY422: 1796 case RADEON_TXFORMAT_YVYU422: 1797 case RADEON_TXFORMAT_SHADOW16: 1798 case RADEON_TXFORMAT_LDUDV655: 1799 case RADEON_TXFORMAT_DUDV88: 1800 track->textures[i].cpp = 2; 1801 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1802 break; 1803 case RADEON_TXFORMAT_ARGB8888: 1804 case RADEON_TXFORMAT_RGBA8888: 1805 case RADEON_TXFORMAT_SHADOW32: 1806 case RADEON_TXFORMAT_LDUDUV8888: 1807 track->textures[i].cpp = 4; 1808 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1809 break; 1810 case RADEON_TXFORMAT_DXT1: 1811 track->textures[i].cpp = 1; 1812 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 1813 break; 1814 case RADEON_TXFORMAT_DXT23: 1815 case RADEON_TXFORMAT_DXT45: 1816 track->textures[i].cpp = 1; 1817 track->textures[i].compress_format = R100_TRACK_COMP_DXT35; 1818 break; 1819 } 1820 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 1821 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 1822 track->tex_dirty = true; 1823 break; 1824 case RADEON_PP_CUBIC_FACES_0: 1825 case RADEON_PP_CUBIC_FACES_1: 1826 case RADEON_PP_CUBIC_FACES_2: 1827 tmp = idx_value; 1828 i = (reg - RADEON_PP_CUBIC_FACES_0) / 4; 1829 for (face = 0; face < 4; face++) { 1830 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 1831 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 1832 } 1833 track->tex_dirty = true; 1834 break; 1835 default: 1836 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 1837 reg, idx); 1838 return -EINVAL; 1839 } 1840 return 0; 1841 } 1842 1843 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, 1844 struct radeon_cs_packet *pkt, 1845 struct radeon_bo *robj) 1846 { 1847 unsigned idx; 1848 u32 value; 1849 idx = pkt->idx + 1; 1850 value = radeon_get_ib_value(p, idx + 2); 1851 if ((value + 1) > radeon_bo_size(robj)) { 1852 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " 1853 "(need %u have %lu) !\n", 1854 value + 1, 1855 radeon_bo_size(robj)); 1856 return -EINVAL; 1857 } 1858 return 0; 1859 } 1860 1861 static int r100_packet3_check(struct radeon_cs_parser *p, 1862 struct radeon_cs_packet *pkt) 1863 { 1864 struct radeon_cs_reloc *reloc; 1865 struct r100_cs_track *track; 1866 unsigned idx; 1867 volatile uint32_t *ib; 1868 int r; 1869 1870 ib = p->ib.ptr; 1871 idx = pkt->idx + 1; 1872 track = (struct r100_cs_track *)p->track; 1873 switch (pkt->opcode) { 1874 case PACKET3_3D_LOAD_VBPNTR: 1875 r = r100_packet3_load_vbpntr(p, pkt, idx); 1876 if (r) 1877 return r; 1878 break; 1879 case PACKET3_INDX_BUFFER: 1880 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1881 if (r) { 1882 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1883 radeon_cs_dump_packet(p, pkt); 1884 return r; 1885 } 1886 ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); 1887 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 1888 if (r) { 1889 return r; 1890 } 1891 break; 1892 case 0x23: 1893 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ 1894 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1895 if (r) { 1896 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1897 radeon_cs_dump_packet(p, pkt); 1898 return r; 1899 } 1900 ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); 1901 track->num_arrays = 1; 1902 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); 1903 1904 track->arrays[0].robj = reloc->robj; 1905 track->arrays[0].esize = track->vtx_size; 1906 1907 track->max_indx = radeon_get_ib_value(p, idx+1); 1908 1909 track->vap_vf_cntl = radeon_get_ib_value(p, idx+3); 1910 track->immd_dwords = pkt->count - 1; 1911 r = r100_cs_track_check(p->rdev, track); 1912 if (r) 1913 return r; 1914 break; 1915 case PACKET3_3D_DRAW_IMMD: 1916 if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { 1917 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1918 return -EINVAL; 1919 } 1920 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0)); 1921 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1922 track->immd_dwords = pkt->count - 1; 1923 r = r100_cs_track_check(p->rdev, track); 1924 if (r) 1925 return r; 1926 break; 1927 /* triggers drawing using in-packet vertex data */ 1928 case PACKET3_3D_DRAW_IMMD_2: 1929 if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { 1930 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1931 return -EINVAL; 1932 } 1933 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1934 track->immd_dwords = pkt->count; 1935 r = r100_cs_track_check(p->rdev, track); 1936 if (r) 1937 return r; 1938 break; 1939 /* triggers drawing using in-packet vertex data */ 1940 case PACKET3_3D_DRAW_VBUF_2: 1941 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1942 r = r100_cs_track_check(p->rdev, track); 1943 if (r) 1944 return r; 1945 break; 1946 /* triggers drawing of vertex buffers setup elsewhere */ 1947 case PACKET3_3D_DRAW_INDX_2: 1948 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1949 r = r100_cs_track_check(p->rdev, track); 1950 if (r) 1951 return r; 1952 break; 1953 /* triggers drawing using indices to vertex buffer */ 1954 case PACKET3_3D_DRAW_VBUF: 1955 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1956 r = r100_cs_track_check(p->rdev, track); 1957 if (r) 1958 return r; 1959 break; 1960 /* triggers drawing of vertex buffers setup elsewhere */ 1961 case PACKET3_3D_DRAW_INDX: 1962 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1963 r = r100_cs_track_check(p->rdev, track); 1964 if (r) 1965 return r; 1966 break; 1967 /* triggers drawing using indices to vertex buffer */ 1968 case PACKET3_3D_CLEAR_HIZ: 1969 case PACKET3_3D_CLEAR_ZMASK: 1970 if (p->rdev->hyperz_filp != p->filp) 1971 return -EINVAL; 1972 break; 1973 case PACKET3_NOP: 1974 break; 1975 default: 1976 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 1977 return -EINVAL; 1978 } 1979 return 0; 1980 } 1981 1982 int r100_cs_parse(struct radeon_cs_parser *p) 1983 { 1984 struct radeon_cs_packet pkt; 1985 struct r100_cs_track *track; 1986 int r; 1987 1988 track = kzalloc(sizeof(*track), GFP_KERNEL); 1989 if (!track) 1990 return -ENOMEM; 1991 r100_cs_track_clear(p->rdev, track); 1992 p->track = track; 1993 do { 1994 r = radeon_cs_packet_parse(p, &pkt, p->idx); 1995 if (r) { 1996 return r; 1997 } 1998 p->idx += pkt.count + 2; 1999 switch (pkt.type) { 2000 case RADEON_PACKET_TYPE0: 2001 if (p->rdev->family >= CHIP_R200) 2002 r = r100_cs_parse_packet0(p, &pkt, 2003 p->rdev->config.r100.reg_safe_bm, 2004 p->rdev->config.r100.reg_safe_bm_size, 2005 &r200_packet0_check); 2006 else 2007 r = r100_cs_parse_packet0(p, &pkt, 2008 p->rdev->config.r100.reg_safe_bm, 2009 p->rdev->config.r100.reg_safe_bm_size, 2010 &r100_packet0_check); 2011 break; 2012 case RADEON_PACKET_TYPE2: 2013 break; 2014 case RADEON_PACKET_TYPE3: 2015 r = r100_packet3_check(p, &pkt); 2016 break; 2017 default: 2018 DRM_ERROR("Unknown packet type %d !\n", 2019 pkt.type); 2020 return -EINVAL; 2021 } 2022 if (r) 2023 return r; 2024 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 2025 return 0; 2026 } 2027 2028 static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) 2029 { 2030 DRM_ERROR("pitch %d\n", t->pitch); 2031 DRM_ERROR("use_pitch %d\n", t->use_pitch); 2032 DRM_ERROR("width %d\n", t->width); 2033 DRM_ERROR("width_11 %d\n", t->width_11); 2034 DRM_ERROR("height %d\n", t->height); 2035 DRM_ERROR("height_11 %d\n", t->height_11); 2036 DRM_ERROR("num levels %d\n", t->num_levels); 2037 DRM_ERROR("depth %d\n", t->txdepth); 2038 DRM_ERROR("bpp %d\n", t->cpp); 2039 DRM_ERROR("coordinate type %d\n", t->tex_coord_type); 2040 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); 2041 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); 2042 DRM_ERROR("compress format %d\n", t->compress_format); 2043 } 2044 2045 static int r100_track_compress_size(int compress_format, int w, int h) 2046 { 2047 int block_width, block_height, block_bytes; 2048 int wblocks, hblocks; 2049 int min_wblocks; 2050 int sz; 2051 2052 block_width = 4; 2053 block_height = 4; 2054 2055 switch (compress_format) { 2056 case R100_TRACK_COMP_DXT1: 2057 block_bytes = 8; 2058 min_wblocks = 4; 2059 break; 2060 default: 2061 case R100_TRACK_COMP_DXT35: 2062 block_bytes = 16; 2063 min_wblocks = 2; 2064 break; 2065 } 2066 2067 hblocks = (h + block_height - 1) / block_height; 2068 wblocks = (w + block_width - 1) / block_width; 2069 if (wblocks < min_wblocks) 2070 wblocks = min_wblocks; 2071 sz = wblocks * hblocks * block_bytes; 2072 return sz; 2073 } 2074 2075 static int r100_cs_track_cube(struct radeon_device *rdev, 2076 struct r100_cs_track *track, unsigned idx) 2077 { 2078 unsigned face, w, h; 2079 struct radeon_bo *cube_robj; 2080 unsigned long size; 2081 unsigned compress_format = track->textures[idx].compress_format; 2082 2083 for (face = 0; face < 5; face++) { 2084 cube_robj = track->textures[idx].cube_info[face].robj; 2085 w = track->textures[idx].cube_info[face].width; 2086 h = track->textures[idx].cube_info[face].height; 2087 2088 if (compress_format) { 2089 size = r100_track_compress_size(compress_format, w, h); 2090 } else 2091 size = w * h; 2092 size *= track->textures[idx].cpp; 2093 2094 size += track->textures[idx].cube_info[face].offset; 2095 2096 if (size > radeon_bo_size(cube_robj)) { 2097 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", 2098 size, radeon_bo_size(cube_robj)); 2099 r100_cs_track_texture_print(&track->textures[idx]); 2100 return -1; 2101 } 2102 } 2103 return 0; 2104 } 2105 2106 static int r100_cs_track_texture_check(struct radeon_device *rdev, 2107 struct r100_cs_track *track) 2108 { 2109 struct radeon_bo *robj; 2110 unsigned long size; 2111 unsigned u, i, w, h, d; 2112 int ret; 2113 2114 for (u = 0; u < track->num_texture; u++) { 2115 if (!track->textures[u].enabled) 2116 continue; 2117 if (track->textures[u].lookup_disable) 2118 continue; 2119 robj = track->textures[u].robj; 2120 if (robj == NULL) { 2121 DRM_ERROR("No texture bound to unit %u\n", u); 2122 return -EINVAL; 2123 } 2124 size = 0; 2125 for (i = 0; i <= track->textures[u].num_levels; i++) { 2126 if (track->textures[u].use_pitch) { 2127 if (rdev->family < CHIP_R300) 2128 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); 2129 else 2130 w = track->textures[u].pitch / (1 << i); 2131 } else { 2132 w = track->textures[u].width; 2133 if (rdev->family >= CHIP_RV515) 2134 w |= track->textures[u].width_11; 2135 w = w / (1 << i); 2136 if (track->textures[u].roundup_w) 2137 w = roundup_pow_of_two(w); 2138 } 2139 h = track->textures[u].height; 2140 if (rdev->family >= CHIP_RV515) 2141 h |= track->textures[u].height_11; 2142 h = h / (1 << i); 2143 if (track->textures[u].roundup_h) 2144 h = roundup_pow_of_two(h); 2145 if (track->textures[u].tex_coord_type == 1) { 2146 d = (1 << track->textures[u].txdepth) / (1 << i); 2147 if (!d) 2148 d = 1; 2149 } else { 2150 d = 1; 2151 } 2152 if (track->textures[u].compress_format) { 2153 2154 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; 2155 /* compressed textures are block based */ 2156 } else 2157 size += w * h * d; 2158 } 2159 size *= track->textures[u].cpp; 2160 2161 switch (track->textures[u].tex_coord_type) { 2162 case 0: 2163 case 1: 2164 break; 2165 case 2: 2166 if (track->separate_cube) { 2167 ret = r100_cs_track_cube(rdev, track, u); 2168 if (ret) 2169 return ret; 2170 } else 2171 size *= 6; 2172 break; 2173 default: 2174 DRM_ERROR("Invalid texture coordinate type %u for unit " 2175 "%u\n", track->textures[u].tex_coord_type, u); 2176 return -EINVAL; 2177 } 2178 if (size > radeon_bo_size(robj)) { 2179 DRM_ERROR("Texture of unit %u needs %lu bytes but is " 2180 "%lu\n", u, size, radeon_bo_size(robj)); 2181 r100_cs_track_texture_print(&track->textures[u]); 2182 return -EINVAL; 2183 } 2184 } 2185 return 0; 2186 } 2187 2188 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) 2189 { 2190 unsigned i; 2191 unsigned long size; 2192 unsigned prim_walk; 2193 unsigned nverts; 2194 unsigned num_cb = track->cb_dirty ? track->num_cb : 0; 2195 2196 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && 2197 !track->blend_read_enable) 2198 num_cb = 0; 2199 2200 for (i = 0; i < num_cb; i++) { 2201 if (track->cb[i].robj == NULL) { 2202 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); 2203 return -EINVAL; 2204 } 2205 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; 2206 size += track->cb[i].offset; 2207 if (size > radeon_bo_size(track->cb[i].robj)) { 2208 DRM_ERROR("[drm] Buffer too small for color buffer %d " 2209 "(need %lu have %lu) !\n", i, size, 2210 radeon_bo_size(track->cb[i].robj)); 2211 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", 2212 i, track->cb[i].pitch, track->cb[i].cpp, 2213 track->cb[i].offset, track->maxy); 2214 return -EINVAL; 2215 } 2216 } 2217 track->cb_dirty = false; 2218 2219 if (track->zb_dirty && track->z_enabled) { 2220 if (track->zb.robj == NULL) { 2221 DRM_ERROR("[drm] No buffer for z buffer !\n"); 2222 return -EINVAL; 2223 } 2224 size = track->zb.pitch * track->zb.cpp * track->maxy; 2225 size += track->zb.offset; 2226 if (size > radeon_bo_size(track->zb.robj)) { 2227 DRM_ERROR("[drm] Buffer too small for z buffer " 2228 "(need %lu have %lu) !\n", size, 2229 radeon_bo_size(track->zb.robj)); 2230 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", 2231 track->zb.pitch, track->zb.cpp, 2232 track->zb.offset, track->maxy); 2233 return -EINVAL; 2234 } 2235 } 2236 track->zb_dirty = false; 2237 2238 if (track->aa_dirty && track->aaresolve) { 2239 if (track->aa.robj == NULL) { 2240 DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); 2241 return -EINVAL; 2242 } 2243 /* I believe the format comes from colorbuffer0. */ 2244 size = track->aa.pitch * track->cb[0].cpp * track->maxy; 2245 size += track->aa.offset; 2246 if (size > radeon_bo_size(track->aa.robj)) { 2247 DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " 2248 "(need %lu have %lu) !\n", i, size, 2249 radeon_bo_size(track->aa.robj)); 2250 DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", 2251 i, track->aa.pitch, track->cb[0].cpp, 2252 track->aa.offset, track->maxy); 2253 return -EINVAL; 2254 } 2255 } 2256 track->aa_dirty = false; 2257 2258 prim_walk = (track->vap_vf_cntl >> 4) & 0x3; 2259 if (track->vap_vf_cntl & (1 << 14)) { 2260 nverts = track->vap_alt_nverts; 2261 } else { 2262 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; 2263 } 2264 switch (prim_walk) { 2265 case 1: 2266 for (i = 0; i < track->num_arrays; i++) { 2267 size = track->arrays[i].esize * track->max_indx * 4; 2268 if (track->arrays[i].robj == NULL) { 2269 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2270 "bound\n", prim_walk, i); 2271 return -EINVAL; 2272 } 2273 if (size > radeon_bo_size(track->arrays[i].robj)) { 2274 dev_err(rdev->dev, "(PW %u) Vertex array %u " 2275 "need %lu dwords have %lu dwords\n", 2276 prim_walk, i, size >> 2, 2277 radeon_bo_size(track->arrays[i].robj) 2278 >> 2); 2279 DRM_ERROR("Max indices %u\n", track->max_indx); 2280 return -EINVAL; 2281 } 2282 } 2283 break; 2284 case 2: 2285 for (i = 0; i < track->num_arrays; i++) { 2286 size = track->arrays[i].esize * (nverts - 1) * 4; 2287 if (track->arrays[i].robj == NULL) { 2288 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2289 "bound\n", prim_walk, i); 2290 return -EINVAL; 2291 } 2292 if (size > radeon_bo_size(track->arrays[i].robj)) { 2293 dev_err(rdev->dev, "(PW %u) Vertex array %u " 2294 "need %lu dwords have %lu dwords\n", 2295 prim_walk, i, size >> 2, 2296 radeon_bo_size(track->arrays[i].robj) 2297 >> 2); 2298 return -EINVAL; 2299 } 2300 } 2301 break; 2302 case 3: 2303 size = track->vtx_size * nverts; 2304 if (size != track->immd_dwords) { 2305 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", 2306 track->immd_dwords, size); 2307 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", 2308 nverts, track->vtx_size); 2309 return -EINVAL; 2310 } 2311 break; 2312 default: 2313 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", 2314 prim_walk); 2315 return -EINVAL; 2316 } 2317 2318 if (track->tex_dirty) { 2319 track->tex_dirty = false; 2320 return r100_cs_track_texture_check(rdev, track); 2321 } 2322 return 0; 2323 } 2324 2325 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) 2326 { 2327 unsigned i, face; 2328 2329 track->cb_dirty = true; 2330 track->zb_dirty = true; 2331 track->tex_dirty = true; 2332 track->aa_dirty = true; 2333 2334 if (rdev->family < CHIP_R300) { 2335 track->num_cb = 1; 2336 if (rdev->family <= CHIP_RS200) 2337 track->num_texture = 3; 2338 else 2339 track->num_texture = 6; 2340 track->maxy = 2048; 2341 track->separate_cube = 1; 2342 } else { 2343 track->num_cb = 4; 2344 track->num_texture = 16; 2345 track->maxy = 4096; 2346 track->separate_cube = 0; 2347 track->aaresolve = false; 2348 track->aa.robj = NULL; 2349 } 2350 2351 for (i = 0; i < track->num_cb; i++) { 2352 track->cb[i].robj = NULL; 2353 track->cb[i].pitch = 8192; 2354 track->cb[i].cpp = 16; 2355 track->cb[i].offset = 0; 2356 } 2357 track->z_enabled = true; 2358 track->zb.robj = NULL; 2359 track->zb.pitch = 8192; 2360 track->zb.cpp = 4; 2361 track->zb.offset = 0; 2362 track->vtx_size = 0x7F; 2363 track->immd_dwords = 0xFFFFFFFFUL; 2364 track->num_arrays = 11; 2365 track->max_indx = 0x00FFFFFFUL; 2366 for (i = 0; i < track->num_arrays; i++) { 2367 track->arrays[i].robj = NULL; 2368 track->arrays[i].esize = 0x7F; 2369 } 2370 for (i = 0; i < track->num_texture; i++) { 2371 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 2372 track->textures[i].pitch = 16536; 2373 track->textures[i].width = 16536; 2374 track->textures[i].height = 16536; 2375 track->textures[i].width_11 = 1 << 11; 2376 track->textures[i].height_11 = 1 << 11; 2377 track->textures[i].num_levels = 12; 2378 if (rdev->family <= CHIP_RS200) { 2379 track->textures[i].tex_coord_type = 0; 2380 track->textures[i].txdepth = 0; 2381 } else { 2382 track->textures[i].txdepth = 16; 2383 track->textures[i].tex_coord_type = 1; 2384 } 2385 track->textures[i].cpp = 64; 2386 track->textures[i].robj = NULL; 2387 /* CS IB emission code makes sure texture unit are disabled */ 2388 track->textures[i].enabled = false; 2389 track->textures[i].lookup_disable = false; 2390 track->textures[i].roundup_w = true; 2391 track->textures[i].roundup_h = true; 2392 if (track->separate_cube) 2393 for (face = 0; face < 5; face++) { 2394 track->textures[i].cube_info[face].robj = NULL; 2395 track->textures[i].cube_info[face].width = 16536; 2396 track->textures[i].cube_info[face].height = 16536; 2397 track->textures[i].cube_info[face].offset = 0; 2398 } 2399 } 2400 } 2401 2402 /* 2403 * Global GPU functions 2404 */ 2405 static void r100_errata(struct radeon_device *rdev) 2406 { 2407 rdev->pll_errata = 0; 2408 2409 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { 2410 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; 2411 } 2412 2413 if (rdev->family == CHIP_RV100 || 2414 rdev->family == CHIP_RS100 || 2415 rdev->family == CHIP_RS200) { 2416 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; 2417 } 2418 } 2419 2420 static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) 2421 { 2422 unsigned i; 2423 uint32_t tmp; 2424 2425 for (i = 0; i < rdev->usec_timeout; i++) { 2426 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 2427 if (tmp >= n) { 2428 return 0; 2429 } 2430 DRM_UDELAY(1); 2431 } 2432 return -1; 2433 } 2434 2435 int r100_gui_wait_for_idle(struct radeon_device *rdev) 2436 { 2437 unsigned i; 2438 uint32_t tmp; 2439 2440 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { 2441 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" 2442 " Bad things might happen.\n"); 2443 } 2444 for (i = 0; i < rdev->usec_timeout; i++) { 2445 tmp = RREG32(RADEON_RBBM_STATUS); 2446 if (!(tmp & RADEON_RBBM_ACTIVE)) { 2447 return 0; 2448 } 2449 DRM_UDELAY(1); 2450 } 2451 return -1; 2452 } 2453 2454 int r100_mc_wait_for_idle(struct radeon_device *rdev) 2455 { 2456 unsigned i; 2457 uint32_t tmp; 2458 2459 for (i = 0; i < rdev->usec_timeout; i++) { 2460 /* read MC_STATUS */ 2461 tmp = RREG32(RADEON_MC_STATUS); 2462 if (tmp & RADEON_MC_IDLE) { 2463 return 0; 2464 } 2465 DRM_UDELAY(1); 2466 } 2467 return -1; 2468 } 2469 2470 bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 2471 { 2472 u32 rbbm_status; 2473 2474 rbbm_status = RREG32(R_000E40_RBBM_STATUS); 2475 if (!G_000E40_GUI_ACTIVE(rbbm_status)) { 2476 radeon_ring_lockup_update(ring); 2477 return false; 2478 } 2479 /* force CP activities */ 2480 radeon_ring_force_activity(rdev, ring); 2481 return radeon_ring_test_lockup(rdev, ring); 2482 } 2483 2484 /* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ 2485 void r100_enable_bm(struct radeon_device *rdev) 2486 { 2487 uint32_t tmp; 2488 /* Enable bus mastering */ 2489 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 2490 WREG32(RADEON_BUS_CNTL, tmp); 2491 } 2492 2493 void r100_bm_disable(struct radeon_device *rdev) 2494 { 2495 u32 tmp; 2496 2497 /* disable bus mastering */ 2498 tmp = RREG32(R_000030_BUS_CNTL); 2499 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); 2500 mdelay(1); 2501 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); 2502 mdelay(1); 2503 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); 2504 tmp = RREG32(RADEON_BUS_CNTL); 2505 mdelay(1); 2506 pci_clear_master(rdev->pdev); 2507 mdelay(1); 2508 } 2509 2510 int r100_asic_reset(struct radeon_device *rdev) 2511 { 2512 struct r100_mc_save save; 2513 u32 status, tmp; 2514 int ret = 0; 2515 2516 status = RREG32(R_000E40_RBBM_STATUS); 2517 if (!G_000E40_GUI_ACTIVE(status)) { 2518 return 0; 2519 } 2520 r100_mc_stop(rdev, &save); 2521 status = RREG32(R_000E40_RBBM_STATUS); 2522 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2523 /* stop CP */ 2524 WREG32(RADEON_CP_CSQ_CNTL, 0); 2525 tmp = RREG32(RADEON_CP_RB_CNTL); 2526 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); 2527 WREG32(RADEON_CP_RB_RPTR_WR, 0); 2528 WREG32(RADEON_CP_RB_WPTR, 0); 2529 WREG32(RADEON_CP_RB_CNTL, tmp); 2530 /* save PCI state */ 2531 pci_save_state(rdev->pdev); 2532 /* disable bus mastering */ 2533 r100_bm_disable(rdev); 2534 WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | 2535 S_0000F0_SOFT_RESET_RE(1) | 2536 S_0000F0_SOFT_RESET_PP(1) | 2537 S_0000F0_SOFT_RESET_RB(1)); 2538 RREG32(R_0000F0_RBBM_SOFT_RESET); 2539 mdelay(500); 2540 WREG32(R_0000F0_RBBM_SOFT_RESET, 0); 2541 mdelay(1); 2542 status = RREG32(R_000E40_RBBM_STATUS); 2543 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2544 /* reset CP */ 2545 WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); 2546 RREG32(R_0000F0_RBBM_SOFT_RESET); 2547 mdelay(500); 2548 WREG32(R_0000F0_RBBM_SOFT_RESET, 0); 2549 mdelay(1); 2550 status = RREG32(R_000E40_RBBM_STATUS); 2551 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2552 /* restore PCI & busmastering */ 2553 pci_restore_state(rdev->pdev); 2554 r100_enable_bm(rdev); 2555 /* Check if GPU is idle */ 2556 if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || 2557 G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { 2558 dev_err(rdev->dev, "failed to reset GPU\n"); 2559 ret = -1; 2560 } else 2561 dev_info(rdev->dev, "GPU reset succeed\n"); 2562 r100_mc_resume(rdev, &save); 2563 return ret; 2564 } 2565 2566 void r100_set_common_regs(struct radeon_device *rdev) 2567 { 2568 struct drm_device *dev = rdev->ddev; 2569 bool force_dac2 = false; 2570 u32 tmp; 2571 2572 /* set these so they don't interfere with anything */ 2573 WREG32(RADEON_OV0_SCALE_CNTL, 0); 2574 WREG32(RADEON_SUBPIC_CNTL, 0); 2575 WREG32(RADEON_VIPH_CONTROL, 0); 2576 WREG32(RADEON_I2C_CNTL_1, 0); 2577 WREG32(RADEON_DVI_I2C_CNTL_1, 0); 2578 WREG32(RADEON_CAP0_TRIG_CNTL, 0); 2579 WREG32(RADEON_CAP1_TRIG_CNTL, 0); 2580 2581 /* always set up dac2 on rn50 and some rv100 as lots 2582 * of servers seem to wire it up to a VGA port but 2583 * don't report it in the bios connector 2584 * table. 2585 */ 2586 switch (dev->pdev->device) { 2587 /* RN50 */ 2588 case 0x515e: 2589 case 0x5969: 2590 force_dac2 = true; 2591 break; 2592 /* RV100*/ 2593 case 0x5159: 2594 case 0x515a: 2595 /* DELL triple head servers */ 2596 if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) && 2597 ((dev->pdev->subsystem_device == 0x016c) || 2598 (dev->pdev->subsystem_device == 0x016d) || 2599 (dev->pdev->subsystem_device == 0x016e) || 2600 (dev->pdev->subsystem_device == 0x016f) || 2601 (dev->pdev->subsystem_device == 0x0170) || 2602 (dev->pdev->subsystem_device == 0x017d) || 2603 (dev->pdev->subsystem_device == 0x017e) || 2604 (dev->pdev->subsystem_device == 0x0183) || 2605 (dev->pdev->subsystem_device == 0x018a) || 2606 (dev->pdev->subsystem_device == 0x019a))) 2607 force_dac2 = true; 2608 break; 2609 } 2610 2611 if (force_dac2) { 2612 u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG); 2613 u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL); 2614 u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2); 2615 2616 /* For CRT on DAC2, don't turn it on if BIOS didn't 2617 enable it, even it's detected. 2618 */ 2619 2620 /* force it to crtc0 */ 2621 dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL; 2622 dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL; 2623 disp_hw_debug |= RADEON_CRT2_DISP1_SEL; 2624 2625 /* set up the TV DAC */ 2626 tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL | 2627 RADEON_TV_DAC_STD_MASK | 2628 RADEON_TV_DAC_RDACPD | 2629 RADEON_TV_DAC_GDACPD | 2630 RADEON_TV_DAC_BDACPD | 2631 RADEON_TV_DAC_BGADJ_MASK | 2632 RADEON_TV_DAC_DACADJ_MASK); 2633 tv_dac_cntl |= (RADEON_TV_DAC_NBLANK | 2634 RADEON_TV_DAC_NHOLD | 2635 RADEON_TV_DAC_STD_PS2 | 2636 (0x58 << 16)); 2637 2638 WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl); 2639 WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug); 2640 WREG32(RADEON_DAC_CNTL2, dac2_cntl); 2641 } 2642 2643 /* switch PM block to ACPI mode */ 2644 tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL); 2645 tmp &= ~RADEON_PM_MODE_SEL; 2646 WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp); 2647 2648 } 2649 2650 /* 2651 * VRAM info 2652 */ 2653 static void r100_vram_get_type(struct radeon_device *rdev) 2654 { 2655 uint32_t tmp; 2656 2657 rdev->mc.vram_is_ddr = false; 2658 if (rdev->flags & RADEON_IS_IGP) 2659 rdev->mc.vram_is_ddr = true; 2660 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) 2661 rdev->mc.vram_is_ddr = true; 2662 if ((rdev->family == CHIP_RV100) || 2663 (rdev->family == CHIP_RS100) || 2664 (rdev->family == CHIP_RS200)) { 2665 tmp = RREG32(RADEON_MEM_CNTL); 2666 if (tmp & RV100_HALF_MODE) { 2667 rdev->mc.vram_width = 32; 2668 } else { 2669 rdev->mc.vram_width = 64; 2670 } 2671 if (rdev->flags & RADEON_SINGLE_CRTC) { 2672 rdev->mc.vram_width /= 4; 2673 rdev->mc.vram_is_ddr = true; 2674 } 2675 } else if (rdev->family <= CHIP_RV280) { 2676 tmp = RREG32(RADEON_MEM_CNTL); 2677 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { 2678 rdev->mc.vram_width = 128; 2679 } else { 2680 rdev->mc.vram_width = 64; 2681 } 2682 } else { 2683 /* newer IGPs */ 2684 rdev->mc.vram_width = 128; 2685 } 2686 } 2687 2688 static u32 r100_get_accessible_vram(struct radeon_device *rdev) 2689 { 2690 u32 aper_size; 2691 u8 byte; 2692 2693 aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2694 2695 /* Set HDP_APER_CNTL only on cards that are known not to be broken, 2696 * that is has the 2nd generation multifunction PCI interface 2697 */ 2698 if (rdev->family == CHIP_RV280 || 2699 rdev->family >= CHIP_RV350) { 2700 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, 2701 ~RADEON_HDP_APER_CNTL); 2702 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); 2703 return aper_size * 2; 2704 } 2705 2706 /* Older cards have all sorts of funny issues to deal with. First 2707 * check if it's a multifunction card by reading the PCI config 2708 * header type... Limit those to one aperture size 2709 */ 2710 pci_read_config_byte(rdev->pdev, 0xe, &byte); 2711 if (byte & 0x80) { 2712 DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); 2713 DRM_INFO("Limiting VRAM to one aperture\n"); 2714 return aper_size; 2715 } 2716 2717 /* Single function older card. We read HDP_APER_CNTL to see how the BIOS 2718 * have set it up. We don't write this as it's broken on some ASICs but 2719 * we expect the BIOS to have done the right thing (might be too optimistic...) 2720 */ 2721 if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) 2722 return aper_size * 2; 2723 return aper_size; 2724 } 2725 2726 void r100_vram_init_sizes(struct radeon_device *rdev) 2727 { 2728 u64 config_aper_size; 2729 2730 /* work out accessible VRAM */ 2731 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 2732 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 2733 rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); 2734 /* FIXME we don't use the second aperture yet when we could use it */ 2735 if (rdev->mc.visible_vram_size > rdev->mc.aper_size) 2736 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2737 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2738 if (rdev->flags & RADEON_IS_IGP) { 2739 uint32_t tom; 2740 /* read NB_TOM to get the amount of ram stolen for the GPU */ 2741 tom = RREG32(RADEON_NB_TOM); 2742 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); 2743 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 2744 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 2745 } else { 2746 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 2747 /* Some production boards of m6 will report 0 2748 * if it's 8 MB 2749 */ 2750 if (rdev->mc.real_vram_size == 0) { 2751 rdev->mc.real_vram_size = 8192 * 1024; 2752 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 2753 } 2754 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 2755 * Novell bug 204882 + along with lots of ubuntu ones 2756 */ 2757 if (rdev->mc.aper_size > config_aper_size) 2758 config_aper_size = rdev->mc.aper_size; 2759 2760 if (config_aper_size > rdev->mc.real_vram_size) 2761 rdev->mc.mc_vram_size = config_aper_size; 2762 else 2763 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 2764 } 2765 } 2766 2767 void r100_vga_set_state(struct radeon_device *rdev, bool state) 2768 { 2769 uint32_t temp; 2770 2771 temp = RREG32(RADEON_CONFIG_CNTL); 2772 if (state == false) { 2773 temp &= ~RADEON_CFG_VGA_RAM_EN; 2774 temp |= RADEON_CFG_VGA_IO_DIS; 2775 } else { 2776 temp &= ~RADEON_CFG_VGA_IO_DIS; 2777 } 2778 WREG32(RADEON_CONFIG_CNTL, temp); 2779 } 2780 2781 static void r100_mc_init(struct radeon_device *rdev) 2782 { 2783 u64 base; 2784 2785 r100_vram_get_type(rdev); 2786 r100_vram_init_sizes(rdev); 2787 base = rdev->mc.aper_base; 2788 if (rdev->flags & RADEON_IS_IGP) 2789 base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16; 2790 radeon_vram_location(rdev, &rdev->mc, base); 2791 rdev->mc.gtt_base_align = 0; 2792 if (!(rdev->flags & RADEON_IS_AGP)) 2793 radeon_gtt_location(rdev, &rdev->mc); 2794 radeon_update_bandwidth_info(rdev); 2795 } 2796 2797 2798 /* 2799 * Indirect registers accessor 2800 */ 2801 void r100_pll_errata_after_index(struct radeon_device *rdev) 2802 { 2803 if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) { 2804 (void)RREG32(RADEON_CLOCK_CNTL_DATA); 2805 (void)RREG32(RADEON_CRTC_GEN_CNTL); 2806 } 2807 } 2808 2809 static void r100_pll_errata_after_data(struct radeon_device *rdev) 2810 { 2811 /* This workarounds is necessary on RV100, RS100 and RS200 chips 2812 * or the chip could hang on a subsequent access 2813 */ 2814 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { 2815 mdelay(5); 2816 } 2817 2818 /* This function is required to workaround a hardware bug in some (all?) 2819 * revisions of the R300. This workaround should be called after every 2820 * CLOCK_CNTL_INDEX register access. If not, register reads afterward 2821 * may not be correct. 2822 */ 2823 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { 2824 uint32_t save, tmp; 2825 2826 save = RREG32(RADEON_CLOCK_CNTL_INDEX); 2827 tmp = save & ~(0x3f | RADEON_PLL_WR_EN); 2828 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); 2829 tmp = RREG32(RADEON_CLOCK_CNTL_DATA); 2830 WREG32(RADEON_CLOCK_CNTL_INDEX, save); 2831 } 2832 } 2833 2834 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) 2835 { 2836 uint32_t data; 2837 2838 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); 2839 r100_pll_errata_after_index(rdev); 2840 data = RREG32(RADEON_CLOCK_CNTL_DATA); 2841 r100_pll_errata_after_data(rdev); 2842 return data; 2843 } 2844 2845 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 2846 { 2847 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); 2848 r100_pll_errata_after_index(rdev); 2849 WREG32(RADEON_CLOCK_CNTL_DATA, v); 2850 r100_pll_errata_after_data(rdev); 2851 } 2852 2853 static void r100_set_safe_registers(struct radeon_device *rdev) 2854 { 2855 if (ASIC_IS_RN50(rdev)) { 2856 rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; 2857 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); 2858 } else if (rdev->family < CHIP_R200) { 2859 rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; 2860 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); 2861 } else { 2862 r200_set_safe_registers(rdev); 2863 } 2864 } 2865 2866 /* 2867 * Debugfs info 2868 */ 2869 #if defined(CONFIG_DEBUG_FS) 2870 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) 2871 { 2872 struct drm_info_node *node = (struct drm_info_node *) m->private; 2873 struct drm_device *dev = node->minor->dev; 2874 struct radeon_device *rdev = dev->dev_private; 2875 uint32_t reg, value; 2876 unsigned i; 2877 2878 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); 2879 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); 2880 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2881 for (i = 0; i < 64; i++) { 2882 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); 2883 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; 2884 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); 2885 value = RREG32(RADEON_RBBM_CMDFIFO_DATA); 2886 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); 2887 } 2888 return 0; 2889 } 2890 2891 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) 2892 { 2893 struct drm_info_node *node = (struct drm_info_node *) m->private; 2894 struct drm_device *dev = node->minor->dev; 2895 struct radeon_device *rdev = dev->dev_private; 2896 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2897 uint32_t rdp, wdp; 2898 unsigned count, i, j; 2899 2900 radeon_ring_free_size(rdev, ring); 2901 rdp = RREG32(RADEON_CP_RB_RPTR); 2902 wdp = RREG32(RADEON_CP_RB_WPTR); 2903 count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; 2904 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2905 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); 2906 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); 2907 seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); 2908 seq_printf(m, "%u dwords in ring\n", count); 2909 for (j = 0; j <= count; j++) { 2910 i = (rdp + j) & ring->ptr_mask; 2911 seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); 2912 } 2913 return 0; 2914 } 2915 2916 2917 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) 2918 { 2919 struct drm_info_node *node = (struct drm_info_node *) m->private; 2920 struct drm_device *dev = node->minor->dev; 2921 struct radeon_device *rdev = dev->dev_private; 2922 uint32_t csq_stat, csq2_stat, tmp; 2923 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; 2924 unsigned i; 2925 2926 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2927 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); 2928 csq_stat = RREG32(RADEON_CP_CSQ_STAT); 2929 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); 2930 r_rptr = (csq_stat >> 0) & 0x3ff; 2931 r_wptr = (csq_stat >> 10) & 0x3ff; 2932 ib1_rptr = (csq_stat >> 20) & 0x3ff; 2933 ib1_wptr = (csq2_stat >> 0) & 0x3ff; 2934 ib2_rptr = (csq2_stat >> 10) & 0x3ff; 2935 ib2_wptr = (csq2_stat >> 20) & 0x3ff; 2936 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); 2937 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); 2938 seq_printf(m, "Ring rptr %u\n", r_rptr); 2939 seq_printf(m, "Ring wptr %u\n", r_wptr); 2940 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); 2941 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); 2942 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); 2943 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); 2944 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms 2945 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ 2946 seq_printf(m, "Ring fifo:\n"); 2947 for (i = 0; i < 256; i++) { 2948 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2949 tmp = RREG32(RADEON_CP_CSQ_DATA); 2950 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); 2951 } 2952 seq_printf(m, "Indirect1 fifo:\n"); 2953 for (i = 256; i <= 512; i++) { 2954 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2955 tmp = RREG32(RADEON_CP_CSQ_DATA); 2956 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); 2957 } 2958 seq_printf(m, "Indirect2 fifo:\n"); 2959 for (i = 640; i < ib1_wptr; i++) { 2960 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2961 tmp = RREG32(RADEON_CP_CSQ_DATA); 2962 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); 2963 } 2964 return 0; 2965 } 2966 2967 static int r100_debugfs_mc_info(struct seq_file *m, void *data) 2968 { 2969 struct drm_info_node *node = (struct drm_info_node *) m->private; 2970 struct drm_device *dev = node->minor->dev; 2971 struct radeon_device *rdev = dev->dev_private; 2972 uint32_t tmp; 2973 2974 tmp = RREG32(RADEON_CONFIG_MEMSIZE); 2975 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); 2976 tmp = RREG32(RADEON_MC_FB_LOCATION); 2977 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); 2978 tmp = RREG32(RADEON_BUS_CNTL); 2979 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); 2980 tmp = RREG32(RADEON_MC_AGP_LOCATION); 2981 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); 2982 tmp = RREG32(RADEON_AGP_BASE); 2983 seq_printf(m, "AGP_BASE 0x%08x\n", tmp); 2984 tmp = RREG32(RADEON_HOST_PATH_CNTL); 2985 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); 2986 tmp = RREG32(0x01D0); 2987 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); 2988 tmp = RREG32(RADEON_AIC_LO_ADDR); 2989 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); 2990 tmp = RREG32(RADEON_AIC_HI_ADDR); 2991 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); 2992 tmp = RREG32(0x01E4); 2993 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); 2994 return 0; 2995 } 2996 2997 static struct drm_info_list r100_debugfs_rbbm_list[] = { 2998 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, 2999 }; 3000 3001 static struct drm_info_list r100_debugfs_cp_list[] = { 3002 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, 3003 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, 3004 }; 3005 3006 static struct drm_info_list r100_debugfs_mc_info_list[] = { 3007 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, 3008 }; 3009 #endif 3010 3011 int r100_debugfs_rbbm_init(struct radeon_device *rdev) 3012 { 3013 #if defined(CONFIG_DEBUG_FS) 3014 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); 3015 #else 3016 return 0; 3017 #endif 3018 } 3019 3020 int r100_debugfs_cp_init(struct radeon_device *rdev) 3021 { 3022 #if defined(CONFIG_DEBUG_FS) 3023 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); 3024 #else 3025 return 0; 3026 #endif 3027 } 3028 3029 int r100_debugfs_mc_info_init(struct radeon_device *rdev) 3030 { 3031 #if defined(CONFIG_DEBUG_FS) 3032 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); 3033 #else 3034 return 0; 3035 #endif 3036 } 3037 3038 int r100_set_surface_reg(struct radeon_device *rdev, int reg, 3039 uint32_t tiling_flags, uint32_t pitch, 3040 uint32_t offset, uint32_t obj_size) 3041 { 3042 int surf_index = reg * 16; 3043 int flags = 0; 3044 3045 if (rdev->family <= CHIP_RS200) { 3046 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 3047 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 3048 flags |= RADEON_SURF_TILE_COLOR_BOTH; 3049 if (tiling_flags & RADEON_TILING_MACRO) 3050 flags |= RADEON_SURF_TILE_COLOR_MACRO; 3051 } else if (rdev->family <= CHIP_RV280) { 3052 if (tiling_flags & (RADEON_TILING_MACRO)) 3053 flags |= R200_SURF_TILE_COLOR_MACRO; 3054 if (tiling_flags & RADEON_TILING_MICRO) 3055 flags |= R200_SURF_TILE_COLOR_MICRO; 3056 } else { 3057 if (tiling_flags & RADEON_TILING_MACRO) 3058 flags |= R300_SURF_TILE_MACRO; 3059 if (tiling_flags & RADEON_TILING_MICRO) 3060 flags |= R300_SURF_TILE_MICRO; 3061 } 3062 3063 if (tiling_flags & RADEON_TILING_SWAP_16BIT) 3064 flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; 3065 if (tiling_flags & RADEON_TILING_SWAP_32BIT) 3066 flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; 3067 3068 /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */ 3069 if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) { 3070 if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) 3071 if (ASIC_IS_RN50(rdev)) 3072 pitch /= 16; 3073 } 3074 3075 /* r100/r200 divide by 16 */ 3076 if (rdev->family < CHIP_R300) 3077 flags |= pitch / 16; 3078 else 3079 flags |= pitch / 8; 3080 3081 3082 DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); 3083 WREG32(RADEON_SURFACE0_INFO + surf_index, flags); 3084 WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); 3085 WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); 3086 return 0; 3087 } 3088 3089 void r100_clear_surface_reg(struct radeon_device *rdev, int reg) 3090 { 3091 int surf_index = reg * 16; 3092 WREG32(RADEON_SURFACE0_INFO + surf_index, 0); 3093 } 3094 3095 void r100_bandwidth_update(struct radeon_device *rdev) 3096 { 3097 fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; 3098 fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; 3099 fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; 3100 uint32_t temp, data, mem_trcd, mem_trp, mem_tras; 3101 fixed20_12 memtcas_ff[8] = { 3102 dfixed_init(1), 3103 dfixed_init(2), 3104 dfixed_init(3), 3105 dfixed_init(0), 3106 dfixed_init_half(1), 3107 dfixed_init_half(2), 3108 dfixed_init(0), 3109 }; 3110 fixed20_12 memtcas_rs480_ff[8] = { 3111 dfixed_init(0), 3112 dfixed_init(1), 3113 dfixed_init(2), 3114 dfixed_init(3), 3115 dfixed_init(0), 3116 dfixed_init_half(1), 3117 dfixed_init_half(2), 3118 dfixed_init_half(3), 3119 }; 3120 fixed20_12 memtcas2_ff[8] = { 3121 dfixed_init(0), 3122 dfixed_init(1), 3123 dfixed_init(2), 3124 dfixed_init(3), 3125 dfixed_init(4), 3126 dfixed_init(5), 3127 dfixed_init(6), 3128 dfixed_init(7), 3129 }; 3130 fixed20_12 memtrbs[8] = { 3131 dfixed_init(1), 3132 dfixed_init_half(1), 3133 dfixed_init(2), 3134 dfixed_init_half(2), 3135 dfixed_init(3), 3136 dfixed_init_half(3), 3137 dfixed_init(4), 3138 dfixed_init_half(4) 3139 }; 3140 fixed20_12 memtrbs_r4xx[8] = { 3141 dfixed_init(4), 3142 dfixed_init(5), 3143 dfixed_init(6), 3144 dfixed_init(7), 3145 dfixed_init(8), 3146 dfixed_init(9), 3147 dfixed_init(10), 3148 dfixed_init(11) 3149 }; 3150 fixed20_12 min_mem_eff; 3151 fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; 3152 fixed20_12 cur_latency_mclk, cur_latency_sclk; 3153 fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, 3154 disp_drain_rate2, read_return_rate; 3155 fixed20_12 time_disp1_drop_priority; 3156 int c; 3157 int cur_size = 16; /* in octawords */ 3158 int critical_point = 0, critical_point2; 3159 /* uint32_t read_return_rate, time_disp1_drop_priority; */ 3160 int stop_req, max_stop_req; 3161 struct drm_display_mode *mode1 = NULL; 3162 struct drm_display_mode *mode2 = NULL; 3163 uint32_t pixel_bytes1 = 0; 3164 uint32_t pixel_bytes2 = 0; 3165 3166 radeon_update_display_priority(rdev); 3167 3168 if (rdev->mode_info.crtcs[0]->base.enabled) { 3169 mode1 = &rdev->mode_info.crtcs[0]->base.mode; 3170 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; 3171 } 3172 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3173 if (rdev->mode_info.crtcs[1]->base.enabled) { 3174 mode2 = &rdev->mode_info.crtcs[1]->base.mode; 3175 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; 3176 } 3177 } 3178 3179 min_mem_eff.full = dfixed_const_8(0); 3180 /* get modes */ 3181 if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { 3182 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); 3183 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); 3184 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); 3185 /* check crtc enables */ 3186 if (mode2) 3187 mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); 3188 if (mode1) 3189 mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); 3190 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); 3191 } 3192 3193 /* 3194 * determine is there is enough bw for current mode 3195 */ 3196 sclk_ff = rdev->pm.sclk; 3197 mclk_ff = rdev->pm.mclk; 3198 3199 temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); 3200 temp_ff.full = dfixed_const(temp); 3201 mem_bw.full = dfixed_mul(mclk_ff, temp_ff); 3202 3203 pix_clk.full = 0; 3204 pix_clk2.full = 0; 3205 peak_disp_bw.full = 0; 3206 if (mode1) { 3207 temp_ff.full = dfixed_const(1000); 3208 pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */ 3209 pix_clk.full = dfixed_div(pix_clk, temp_ff); 3210 temp_ff.full = dfixed_const(pixel_bytes1); 3211 peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff); 3212 } 3213 if (mode2) { 3214 temp_ff.full = dfixed_const(1000); 3215 pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */ 3216 pix_clk2.full = dfixed_div(pix_clk2, temp_ff); 3217 temp_ff.full = dfixed_const(pixel_bytes2); 3218 peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff); 3219 } 3220 3221 mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); 3222 if (peak_disp_bw.full >= mem_bw.full) { 3223 DRM_ERROR("You may not have enough display bandwidth for current mode\n" 3224 "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); 3225 } 3226 3227 /* Get values from the EXT_MEM_CNTL register...converting its contents. */ 3228 temp = RREG32(RADEON_MEM_TIMING_CNTL); 3229 if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ 3230 mem_trcd = ((temp >> 2) & 0x3) + 1; 3231 mem_trp = ((temp & 0x3)) + 1; 3232 mem_tras = ((temp & 0x70) >> 4) + 1; 3233 } else if (rdev->family == CHIP_R300 || 3234 rdev->family == CHIP_R350) { /* r300, r350 */ 3235 mem_trcd = (temp & 0x7) + 1; 3236 mem_trp = ((temp >> 8) & 0x7) + 1; 3237 mem_tras = ((temp >> 11) & 0xf) + 4; 3238 } else if (rdev->family == CHIP_RV350 || 3239 rdev->family <= CHIP_RV380) { 3240 /* rv3x0 */ 3241 mem_trcd = (temp & 0x7) + 3; 3242 mem_trp = ((temp >> 8) & 0x7) + 3; 3243 mem_tras = ((temp >> 11) & 0xf) + 6; 3244 } else if (rdev->family == CHIP_R420 || 3245 rdev->family == CHIP_R423 || 3246 rdev->family == CHIP_RV410) { 3247 /* r4xx */ 3248 mem_trcd = (temp & 0xf) + 3; 3249 if (mem_trcd > 15) 3250 mem_trcd = 15; 3251 mem_trp = ((temp >> 8) & 0xf) + 3; 3252 if (mem_trp > 15) 3253 mem_trp = 15; 3254 mem_tras = ((temp >> 12) & 0x1f) + 6; 3255 if (mem_tras > 31) 3256 mem_tras = 31; 3257 } else { /* RV200, R200 */ 3258 mem_trcd = (temp & 0x7) + 1; 3259 mem_trp = ((temp >> 8) & 0x7) + 1; 3260 mem_tras = ((temp >> 12) & 0xf) + 4; 3261 } 3262 /* convert to FF */ 3263 trcd_ff.full = dfixed_const(mem_trcd); 3264 trp_ff.full = dfixed_const(mem_trp); 3265 tras_ff.full = dfixed_const(mem_tras); 3266 3267 /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ 3268 temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); 3269 data = (temp & (7 << 20)) >> 20; 3270 if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { 3271 if (rdev->family == CHIP_RS480) /* don't think rs400 */ 3272 tcas_ff = memtcas_rs480_ff[data]; 3273 else 3274 tcas_ff = memtcas_ff[data]; 3275 } else 3276 tcas_ff = memtcas2_ff[data]; 3277 3278 if (rdev->family == CHIP_RS400 || 3279 rdev->family == CHIP_RS480) { 3280 /* extra cas latency stored in bits 23-25 0-4 clocks */ 3281 data = (temp >> 23) & 0x7; 3282 if (data < 5) 3283 tcas_ff.full += dfixed_const(data); 3284 } 3285 3286 if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { 3287 /* on the R300, Tcas is included in Trbs. 3288 */ 3289 temp = RREG32(RADEON_MEM_CNTL); 3290 data = (R300_MEM_NUM_CHANNELS_MASK & temp); 3291 if (data == 1) { 3292 if (R300_MEM_USE_CD_CH_ONLY & temp) { 3293 temp = RREG32(R300_MC_IND_INDEX); 3294 temp &= ~R300_MC_IND_ADDR_MASK; 3295 temp |= R300_MC_READ_CNTL_CD_mcind; 3296 WREG32(R300_MC_IND_INDEX, temp); 3297 temp = RREG32(R300_MC_IND_DATA); 3298 data = (R300_MEM_RBS_POSITION_C_MASK & temp); 3299 } else { 3300 temp = RREG32(R300_MC_READ_CNTL_AB); 3301 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 3302 } 3303 } else { 3304 temp = RREG32(R300_MC_READ_CNTL_AB); 3305 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 3306 } 3307 if (rdev->family == CHIP_RV410 || 3308 rdev->family == CHIP_R420 || 3309 rdev->family == CHIP_R423) 3310 trbs_ff = memtrbs_r4xx[data]; 3311 else 3312 trbs_ff = memtrbs[data]; 3313 tcas_ff.full += trbs_ff.full; 3314 } 3315 3316 sclk_eff_ff.full = sclk_ff.full; 3317 3318 if (rdev->flags & RADEON_IS_AGP) { 3319 fixed20_12 agpmode_ff; 3320 agpmode_ff.full = dfixed_const(radeon_agpmode); 3321 temp_ff.full = dfixed_const_666(16); 3322 sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff); 3323 } 3324 /* TODO PCIE lanes may affect this - agpmode == 16?? */ 3325 3326 if (ASIC_IS_R300(rdev)) { 3327 sclk_delay_ff.full = dfixed_const(250); 3328 } else { 3329 if ((rdev->family == CHIP_RV100) || 3330 rdev->flags & RADEON_IS_IGP) { 3331 if (rdev->mc.vram_is_ddr) 3332 sclk_delay_ff.full = dfixed_const(41); 3333 else 3334 sclk_delay_ff.full = dfixed_const(33); 3335 } else { 3336 if (rdev->mc.vram_width == 128) 3337 sclk_delay_ff.full = dfixed_const(57); 3338 else 3339 sclk_delay_ff.full = dfixed_const(41); 3340 } 3341 } 3342 3343 mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff); 3344 3345 if (rdev->mc.vram_is_ddr) { 3346 if (rdev->mc.vram_width == 32) { 3347 k1.full = dfixed_const(40); 3348 c = 3; 3349 } else { 3350 k1.full = dfixed_const(20); 3351 c = 1; 3352 } 3353 } else { 3354 k1.full = dfixed_const(40); 3355 c = 3; 3356 } 3357 3358 temp_ff.full = dfixed_const(2); 3359 mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff); 3360 temp_ff.full = dfixed_const(c); 3361 mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff); 3362 temp_ff.full = dfixed_const(4); 3363 mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff); 3364 mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff); 3365 mc_latency_mclk.full += k1.full; 3366 3367 mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff); 3368 mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff); 3369 3370 /* 3371 HW cursor time assuming worst case of full size colour cursor. 3372 */ 3373 temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); 3374 temp_ff.full += trcd_ff.full; 3375 if (temp_ff.full < tras_ff.full) 3376 temp_ff.full = tras_ff.full; 3377 cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff); 3378 3379 temp_ff.full = dfixed_const(cur_size); 3380 cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff); 3381 /* 3382 Find the total latency for the display data. 3383 */ 3384 disp_latency_overhead.full = dfixed_const(8); 3385 disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff); 3386 mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; 3387 mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; 3388 3389 if (mc_latency_mclk.full > mc_latency_sclk.full) 3390 disp_latency.full = mc_latency_mclk.full; 3391 else 3392 disp_latency.full = mc_latency_sclk.full; 3393 3394 /* setup Max GRPH_STOP_REQ default value */ 3395 if (ASIC_IS_RV100(rdev)) 3396 max_stop_req = 0x5c; 3397 else 3398 max_stop_req = 0x7c; 3399 3400 if (mode1) { 3401 /* CRTC1 3402 Set GRPH_BUFFER_CNTL register using h/w defined optimal values. 3403 GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] 3404 */ 3405 stop_req = mode1->hdisplay * pixel_bytes1 / 16; 3406 3407 if (stop_req > max_stop_req) 3408 stop_req = max_stop_req; 3409 3410 /* 3411 Find the drain rate of the display buffer. 3412 */ 3413 temp_ff.full = dfixed_const((16/pixel_bytes1)); 3414 disp_drain_rate.full = dfixed_div(pix_clk, temp_ff); 3415 3416 /* 3417 Find the critical point of the display buffer. 3418 */ 3419 crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency); 3420 crit_point_ff.full += dfixed_const_half(0); 3421 3422 critical_point = dfixed_trunc(crit_point_ff); 3423 3424 if (rdev->disp_priority == 2) { 3425 critical_point = 0; 3426 } 3427 3428 /* 3429 The critical point should never be above max_stop_req-4. Setting 3430 GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. 3431 */ 3432 if (max_stop_req - critical_point < 4) 3433 critical_point = 0; 3434 3435 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { 3436 /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ 3437 critical_point = 0x10; 3438 } 3439 3440 temp = RREG32(RADEON_GRPH_BUFFER_CNTL); 3441 temp &= ~(RADEON_GRPH_STOP_REQ_MASK); 3442 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 3443 temp &= ~(RADEON_GRPH_START_REQ_MASK); 3444 if ((rdev->family == CHIP_R350) && 3445 (stop_req > 0x15)) { 3446 stop_req -= 0x10; 3447 } 3448 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 3449 temp |= RADEON_GRPH_BUFFER_SIZE; 3450 temp &= ~(RADEON_GRPH_CRITICAL_CNTL | 3451 RADEON_GRPH_CRITICAL_AT_SOF | 3452 RADEON_GRPH_STOP_CNTL); 3453 /* 3454 Write the result into the register. 3455 */ 3456 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 3457 (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 3458 3459 #if 0 3460 if ((rdev->family == CHIP_RS400) || 3461 (rdev->family == CHIP_RS480)) { 3462 /* attempt to program RS400 disp regs correctly ??? */ 3463 temp = RREG32(RS400_DISP1_REG_CNTL); 3464 temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | 3465 RS400_DISP1_STOP_REQ_LEVEL_MASK); 3466 WREG32(RS400_DISP1_REQ_CNTL1, (temp | 3467 (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 3468 (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 3469 temp = RREG32(RS400_DMIF_MEM_CNTL1); 3470 temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | 3471 RS400_DISP1_CRITICAL_POINT_STOP_MASK); 3472 WREG32(RS400_DMIF_MEM_CNTL1, (temp | 3473 (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | 3474 (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); 3475 } 3476 #endif 3477 3478 DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n", 3479 /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ 3480 (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); 3481 } 3482 3483 if (mode2) { 3484 u32 grph2_cntl; 3485 stop_req = mode2->hdisplay * pixel_bytes2 / 16; 3486 3487 if (stop_req > max_stop_req) 3488 stop_req = max_stop_req; 3489 3490 /* 3491 Find the drain rate of the display buffer. 3492 */ 3493 temp_ff.full = dfixed_const((16/pixel_bytes2)); 3494 disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); 3495 3496 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); 3497 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); 3498 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 3499 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); 3500 if ((rdev->family == CHIP_R350) && 3501 (stop_req > 0x15)) { 3502 stop_req -= 0x10; 3503 } 3504 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 3505 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; 3506 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | 3507 RADEON_GRPH_CRITICAL_AT_SOF | 3508 RADEON_GRPH_STOP_CNTL); 3509 3510 if ((rdev->family == CHIP_RS100) || 3511 (rdev->family == CHIP_RS200)) 3512 critical_point2 = 0; 3513 else { 3514 temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; 3515 temp_ff.full = dfixed_const(temp); 3516 temp_ff.full = dfixed_mul(mclk_ff, temp_ff); 3517 if (sclk_ff.full < temp_ff.full) 3518 temp_ff.full = sclk_ff.full; 3519 3520 read_return_rate.full = temp_ff.full; 3521 3522 if (mode1) { 3523 temp_ff.full = read_return_rate.full - disp_drain_rate.full; 3524 time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); 3525 } else { 3526 time_disp1_drop_priority.full = 0; 3527 } 3528 crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; 3529 crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); 3530 crit_point_ff.full += dfixed_const_half(0); 3531 3532 critical_point2 = dfixed_trunc(crit_point_ff); 3533 3534 if (rdev->disp_priority == 2) { 3535 critical_point2 = 0; 3536 } 3537 3538 if (max_stop_req - critical_point2 < 4) 3539 critical_point2 = 0; 3540 3541 } 3542 3543 if (critical_point2 == 0 && rdev->family == CHIP_R300) { 3544 /* some R300 cards have problem with this set to 0 */ 3545 critical_point2 = 0x10; 3546 } 3547 3548 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 3549 (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 3550 3551 if ((rdev->family == CHIP_RS400) || 3552 (rdev->family == CHIP_RS480)) { 3553 #if 0 3554 /* attempt to program RS400 disp2 regs correctly ??? */ 3555 temp = RREG32(RS400_DISP2_REQ_CNTL1); 3556 temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | 3557 RS400_DISP2_STOP_REQ_LEVEL_MASK); 3558 WREG32(RS400_DISP2_REQ_CNTL1, (temp | 3559 (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 3560 (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 3561 temp = RREG32(RS400_DISP2_REQ_CNTL2); 3562 temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | 3563 RS400_DISP2_CRITICAL_POINT_STOP_MASK); 3564 WREG32(RS400_DISP2_REQ_CNTL2, (temp | 3565 (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | 3566 (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); 3567 #endif 3568 WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); 3569 WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); 3570 WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); 3571 WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); 3572 } 3573 3574 DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", 3575 (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); 3576 } 3577 } 3578 3579 int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 3580 { 3581 uint32_t scratch; 3582 uint32_t tmp = 0; 3583 unsigned i; 3584 int r; 3585 3586 r = radeon_scratch_get(rdev, &scratch); 3587 if (r) { 3588 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 3589 return r; 3590 } 3591 WREG32(scratch, 0xCAFEDEAD); 3592 r = radeon_ring_lock(rdev, ring, 2); 3593 if (r) { 3594 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3595 radeon_scratch_free(rdev, scratch); 3596 return r; 3597 } 3598 radeon_ring_write(ring, PACKET0(scratch, 0)); 3599 radeon_ring_write(ring, 0xDEADBEEF); 3600 radeon_ring_unlock_commit(rdev, ring); 3601 for (i = 0; i < rdev->usec_timeout; i++) { 3602 tmp = RREG32(scratch); 3603 if (tmp == 0xDEADBEEF) { 3604 break; 3605 } 3606 DRM_UDELAY(1); 3607 } 3608 if (i < rdev->usec_timeout) { 3609 DRM_INFO("ring test succeeded in %d usecs\n", i); 3610 } else { 3611 DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n", 3612 scratch, tmp); 3613 r = -EINVAL; 3614 } 3615 radeon_scratch_free(rdev, scratch); 3616 return r; 3617 } 3618 3619 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3620 { 3621 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3622 3623 if (ring->rptr_save_reg) { 3624 u32 next_rptr = ring->wptr + 2 + 3; 3625 radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); 3626 radeon_ring_write(ring, next_rptr); 3627 } 3628 3629 radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1)); 3630 radeon_ring_write(ring, ib->gpu_addr); 3631 radeon_ring_write(ring, ib->length_dw); 3632 } 3633 3634 int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3635 { 3636 struct radeon_ib ib; 3637 uint32_t scratch; 3638 uint32_t tmp = 0; 3639 unsigned i; 3640 int r; 3641 3642 r = radeon_scratch_get(rdev, &scratch); 3643 if (r) { 3644 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3645 return r; 3646 } 3647 WREG32(scratch, 0xCAFEDEAD); 3648 r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256); 3649 if (r) { 3650 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3651 goto free_scratch; 3652 } 3653 ib.ptr[0] = PACKET0(scratch, 0); 3654 ib.ptr[1] = 0xDEADBEEF; 3655 ib.ptr[2] = PACKET2(0); 3656 ib.ptr[3] = PACKET2(0); 3657 ib.ptr[4] = PACKET2(0); 3658 ib.ptr[5] = PACKET2(0); 3659 ib.ptr[6] = PACKET2(0); 3660 ib.ptr[7] = PACKET2(0); 3661 ib.length_dw = 8; 3662 r = radeon_ib_schedule(rdev, &ib, NULL); 3663 if (r) { 3664 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3665 goto free_ib; 3666 } 3667 r = radeon_fence_wait(ib.fence, false); 3668 if (r) { 3669 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3670 goto free_ib; 3671 } 3672 for (i = 0; i < rdev->usec_timeout; i++) { 3673 tmp = RREG32(scratch); 3674 if (tmp == 0xDEADBEEF) { 3675 break; 3676 } 3677 DRM_UDELAY(1); 3678 } 3679 if (i < rdev->usec_timeout) { 3680 DRM_INFO("ib test succeeded in %u usecs\n", i); 3681 } else { 3682 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3683 scratch, tmp); 3684 r = -EINVAL; 3685 } 3686 free_ib: 3687 radeon_ib_free(rdev, &ib); 3688 free_scratch: 3689 radeon_scratch_free(rdev, scratch); 3690 return r; 3691 } 3692 3693 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) 3694 { 3695 /* Shutdown CP we shouldn't need to do that but better be safe than 3696 * sorry 3697 */ 3698 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3699 WREG32(R_000740_CP_CSQ_CNTL, 0); 3700 3701 /* Save few CRTC registers */ 3702 save->GENMO_WT = RREG8(R_0003C2_GENMO_WT); 3703 save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL); 3704 save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL); 3705 save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET); 3706 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3707 save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL); 3708 save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET); 3709 } 3710 3711 /* Disable VGA aperture access */ 3712 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT); 3713 /* Disable cursor, overlay, crtc */ 3714 WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1)); 3715 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL | 3716 S_000054_CRTC_DISPLAY_DIS(1)); 3717 WREG32(R_000050_CRTC_GEN_CNTL, 3718 (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) | 3719 S_000050_CRTC_DISP_REQ_EN_B(1)); 3720 WREG32(R_000420_OV0_SCALE_CNTL, 3721 C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL)); 3722 WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET); 3723 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3724 WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET | 3725 S_000360_CUR2_LOCK(1)); 3726 WREG32(R_0003F8_CRTC2_GEN_CNTL, 3727 (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) | 3728 S_0003F8_CRTC2_DISPLAY_DIS(1) | 3729 S_0003F8_CRTC2_DISP_REQ_EN_B(1)); 3730 WREG32(R_000360_CUR2_OFFSET, 3731 C_000360_CUR2_LOCK & save->CUR2_OFFSET); 3732 } 3733 } 3734 3735 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save) 3736 { 3737 /* Update base address for crtc */ 3738 WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start); 3739 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3740 WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start); 3741 } 3742 /* Restore CRTC registers */ 3743 WREG8(R_0003C2_GENMO_WT, save->GENMO_WT); 3744 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL); 3745 WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL); 3746 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3747 WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL); 3748 } 3749 } 3750 3751 void r100_vga_render_disable(struct radeon_device *rdev) 3752 { 3753 u32 tmp; 3754 3755 tmp = RREG8(R_0003C2_GENMO_WT); 3756 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp); 3757 } 3758 3759 static void r100_debugfs(struct radeon_device *rdev) 3760 { 3761 int r; 3762 3763 r = r100_debugfs_mc_info_init(rdev); 3764 if (r) 3765 dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n"); 3766 } 3767 3768 static void r100_mc_program(struct radeon_device *rdev) 3769 { 3770 struct r100_mc_save save; 3771 3772 /* Stops all mc clients */ 3773 r100_mc_stop(rdev, &save); 3774 if (rdev->flags & RADEON_IS_AGP) { 3775 WREG32(R_00014C_MC_AGP_LOCATION, 3776 S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) | 3777 S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16)); 3778 WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base)); 3779 if (rdev->family > CHIP_RV200) 3780 WREG32(R_00015C_AGP_BASE_2, 3781 upper_32_bits(rdev->mc.agp_base) & 0xff); 3782 } else { 3783 WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF); 3784 WREG32(R_000170_AGP_BASE, 0); 3785 if (rdev->family > CHIP_RV200) 3786 WREG32(R_00015C_AGP_BASE_2, 0); 3787 } 3788 /* Wait for mc idle */ 3789 if (r100_mc_wait_for_idle(rdev)) 3790 dev_warn(rdev->dev, "Wait for MC idle timeout.\n"); 3791 /* Program MC, should be a 32bits limited address space */ 3792 WREG32(R_000148_MC_FB_LOCATION, 3793 S_000148_MC_FB_START(rdev->mc.vram_start >> 16) | 3794 S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16)); 3795 r100_mc_resume(rdev, &save); 3796 } 3797 3798 static void r100_clock_startup(struct radeon_device *rdev) 3799 { 3800 u32 tmp; 3801 3802 if (radeon_dynclks != -1 && radeon_dynclks) 3803 radeon_legacy_set_clock_gating(rdev, 1); 3804 /* We need to force on some of the block */ 3805 tmp = RREG32_PLL(R_00000D_SCLK_CNTL); 3806 tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1); 3807 if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280)) 3808 tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1); 3809 WREG32_PLL(R_00000D_SCLK_CNTL, tmp); 3810 } 3811 3812 static int r100_startup(struct radeon_device *rdev) 3813 { 3814 int r; 3815 3816 /* set common regs */ 3817 r100_set_common_regs(rdev); 3818 /* program mc */ 3819 r100_mc_program(rdev); 3820 /* Resume clock */ 3821 r100_clock_startup(rdev); 3822 /* Initialize GART (initialize after TTM so we can allocate 3823 * memory through TTM but finalize after TTM) */ 3824 r100_enable_bm(rdev); 3825 if (rdev->flags & RADEON_IS_PCI) { 3826 r = r100_pci_gart_enable(rdev); 3827 if (r) 3828 return r; 3829 } 3830 3831 /* allocate wb buffer */ 3832 r = radeon_wb_init(rdev); 3833 if (r) 3834 return r; 3835 3836 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 3837 if (r) { 3838 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 3839 return r; 3840 } 3841 3842 /* Enable IRQ */ 3843 r100_irq_set(rdev); 3844 rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); 3845 /* 1M ring buffer */ 3846 r = r100_cp_init(rdev, 1024 * 1024); 3847 if (r) { 3848 dev_err(rdev->dev, "failed initializing CP (%d).\n", r); 3849 return r; 3850 } 3851 3852 r = radeon_ib_pool_init(rdev); 3853 if (r) { 3854 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 3855 return r; 3856 } 3857 3858 return 0; 3859 } 3860 3861 int r100_resume(struct radeon_device *rdev) 3862 { 3863 int r; 3864 3865 /* Make sur GART are not working */ 3866 if (rdev->flags & RADEON_IS_PCI) 3867 r100_pci_gart_disable(rdev); 3868 /* Resume clock before doing reset */ 3869 r100_clock_startup(rdev); 3870 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 3871 if (radeon_asic_reset(rdev)) { 3872 dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 3873 RREG32(R_000E40_RBBM_STATUS), 3874 RREG32(R_0007C0_CP_STAT)); 3875 } 3876 /* post */ 3877 radeon_combios_asic_init(rdev->ddev); 3878 /* Resume clock after posting */ 3879 r100_clock_startup(rdev); 3880 /* Initialize surface registers */ 3881 radeon_surface_init(rdev); 3882 3883 rdev->accel_working = true; 3884 r = r100_startup(rdev); 3885 if (r) { 3886 rdev->accel_working = false; 3887 } 3888 return r; 3889 } 3890 3891 int r100_suspend(struct radeon_device *rdev) 3892 { 3893 r100_cp_disable(rdev); 3894 radeon_wb_disable(rdev); 3895 r100_irq_disable(rdev); 3896 if (rdev->flags & RADEON_IS_PCI) 3897 r100_pci_gart_disable(rdev); 3898 return 0; 3899 } 3900 3901 void r100_fini(struct radeon_device *rdev) 3902 { 3903 r100_cp_fini(rdev); 3904 radeon_wb_fini(rdev); 3905 radeon_ib_pool_fini(rdev); 3906 radeon_gem_fini(rdev); 3907 if (rdev->flags & RADEON_IS_PCI) 3908 r100_pci_gart_fini(rdev); 3909 radeon_agp_fini(rdev); 3910 radeon_irq_kms_fini(rdev); 3911 radeon_fence_driver_fini(rdev); 3912 radeon_bo_fini(rdev); 3913 radeon_atombios_fini(rdev); 3914 kfree(rdev->bios); 3915 rdev->bios = NULL; 3916 } 3917 3918 /* 3919 * Due to how kexec works, it can leave the hw fully initialised when it 3920 * boots the new kernel. However doing our init sequence with the CP and 3921 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup 3922 * do some quick sanity checks and restore sane values to avoid this 3923 * problem. 3924 */ 3925 void r100_restore_sanity(struct radeon_device *rdev) 3926 { 3927 u32 tmp; 3928 3929 tmp = RREG32(RADEON_CP_CSQ_CNTL); 3930 if (tmp) { 3931 WREG32(RADEON_CP_CSQ_CNTL, 0); 3932 } 3933 tmp = RREG32(RADEON_CP_RB_CNTL); 3934 if (tmp) { 3935 WREG32(RADEON_CP_RB_CNTL, 0); 3936 } 3937 tmp = RREG32(RADEON_SCRATCH_UMSK); 3938 if (tmp) { 3939 WREG32(RADEON_SCRATCH_UMSK, 0); 3940 } 3941 } 3942 3943 int r100_init(struct radeon_device *rdev) 3944 { 3945 int r; 3946 3947 /* Register debugfs file specific to this group of asics */ 3948 r100_debugfs(rdev); 3949 /* Disable VGA */ 3950 r100_vga_render_disable(rdev); 3951 /* Initialize scratch registers */ 3952 radeon_scratch_init(rdev); 3953 /* Initialize surface registers */ 3954 radeon_surface_init(rdev); 3955 /* sanity check some register to avoid hangs like after kexec */ 3956 r100_restore_sanity(rdev); 3957 /* TODO: disable VGA need to use VGA request */ 3958 /* BIOS*/ 3959 if (!radeon_get_bios(rdev)) { 3960 if (ASIC_IS_AVIVO(rdev)) 3961 return -EINVAL; 3962 } 3963 if (rdev->is_atom_bios) { 3964 dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n"); 3965 return -EINVAL; 3966 } else { 3967 r = radeon_combios_init(rdev); 3968 if (r) 3969 return r; 3970 } 3971 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 3972 if (radeon_asic_reset(rdev)) { 3973 dev_warn(rdev->dev, 3974 "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 3975 RREG32(R_000E40_RBBM_STATUS), 3976 RREG32(R_0007C0_CP_STAT)); 3977 } 3978 /* check if cards are posted or not */ 3979 if (radeon_boot_test_post_card(rdev) == false) 3980 return -EINVAL; 3981 /* Set asic errata */ 3982 r100_errata(rdev); 3983 /* Initialize clocks */ 3984 radeon_get_clock_info(rdev->ddev); 3985 /* initialize AGP */ 3986 if (rdev->flags & RADEON_IS_AGP) { 3987 r = radeon_agp_init(rdev); 3988 if (r) { 3989 radeon_agp_disable(rdev); 3990 } 3991 } 3992 /* initialize VRAM */ 3993 r100_mc_init(rdev); 3994 /* Fence driver */ 3995 r = radeon_fence_driver_init(rdev); 3996 if (r) 3997 return r; 3998 r = radeon_irq_kms_init(rdev); 3999 if (r) 4000 return r; 4001 /* Memory manager */ 4002 r = radeon_bo_init(rdev); 4003 if (r) 4004 return r; 4005 if (rdev->flags & RADEON_IS_PCI) { 4006 r = r100_pci_gart_init(rdev); 4007 if (r) 4008 return r; 4009 } 4010 r100_set_safe_registers(rdev); 4011 4012 rdev->accel_working = true; 4013 r = r100_startup(rdev); 4014 if (r) { 4015 /* Somethings want wront with the accel init stop accel */ 4016 dev_err(rdev->dev, "Disabling GPU acceleration\n"); 4017 r100_cp_fini(rdev); 4018 radeon_wb_fini(rdev); 4019 radeon_ib_pool_fini(rdev); 4020 radeon_irq_kms_fini(rdev); 4021 if (rdev->flags & RADEON_IS_PCI) 4022 r100_pci_gart_fini(rdev); 4023 rdev->accel_working = false; 4024 } 4025 return 0; 4026 } 4027 4028 uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, 4029 bool always_indirect) 4030 { 4031 if (reg < rdev->rmmio_size && !always_indirect) 4032 return readl(((void __iomem *)rdev->rmmio) + reg); 4033 else { 4034 unsigned long flags; 4035 uint32_t ret; 4036 4037 spin_lock_irqsave(&rdev->mmio_idx_lock, flags); 4038 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4039 ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4040 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); 4041 4042 return ret; 4043 } 4044 } 4045 4046 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, 4047 bool always_indirect) 4048 { 4049 if (reg < rdev->rmmio_size && !always_indirect) 4050 writel(v, ((void __iomem *)rdev->rmmio) + reg); 4051 else { 4052 unsigned long flags; 4053 4054 spin_lock_irqsave(&rdev->mmio_idx_lock, flags); 4055 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4056 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4057 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); 4058 } 4059 } 4060 4061 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) 4062 { 4063 if (reg < rdev->rio_mem_size) 4064 return ioread32(rdev->rio_mem + reg); 4065 else { 4066 iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); 4067 return ioread32(rdev->rio_mem + RADEON_MM_DATA); 4068 } 4069 } 4070 4071 void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) 4072 { 4073 if (reg < rdev->rio_mem_size) 4074 iowrite32(v, rdev->rio_mem + reg); 4075 else { 4076 iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); 4077 iowrite32(v, rdev->rio_mem + RADEON_MM_DATA); 4078 } 4079 } 4080