1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/slab.h> 26 #include <linux/module.h> 27 #include "drmP.h" 28 #include "radeon.h" 29 #include "radeon_asic.h" 30 #include "cikd.h" 31 #include "atom.h" 32 #include "cik_blit_shaders.h" 33 #include "radeon_ucode.h" 34 #include "clearstate_ci.h" 35 36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); 44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 45 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 51 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 52 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 53 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 56 57 extern int r600_ih_ring_alloc(struct radeon_device *rdev); 58 extern void r600_ih_ring_fini(struct radeon_device *rdev); 59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save); 60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save); 61 extern bool evergreen_is_display_hung(struct radeon_device *rdev); 62 extern void sumo_rlc_fini(struct radeon_device *rdev); 63 extern int sumo_rlc_init(struct radeon_device *rdev); 64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 65 extern void si_rlc_reset(struct radeon_device *rdev); 66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev); 67 extern int cik_sdma_resume(struct radeon_device *rdev); 68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); 69 extern void cik_sdma_fini(struct radeon_device *rdev); 70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev, 71 struct radeon_ib *ib, 72 uint64_t pe, 73 uint64_t addr, unsigned count, 74 uint32_t incr, uint32_t flags); 75 static void cik_rlc_stop(struct radeon_device *rdev); 76 static void cik_pcie_gen3_enable(struct radeon_device *rdev); 77 static void cik_program_aspm(struct radeon_device *rdev); 78 static void cik_init_pg(struct radeon_device *rdev); 79 static void cik_init_cg(struct radeon_device *rdev); 80 static void cik_fini_pg(struct radeon_device *rdev); 81 static void cik_fini_cg(struct radeon_device *rdev); 82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 83 bool enable); 84 85 /* get temperature in millidegrees */ 86 int ci_get_temp(struct radeon_device *rdev) 87 { 88 u32 temp; 89 int actual_temp = 0; 90 91 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> 92 CTF_TEMP_SHIFT; 93 94 if (temp & 0x200) 95 actual_temp = 255; 96 else 97 actual_temp = temp & 0x1ff; 98 99 actual_temp = actual_temp * 1000; 100 101 return actual_temp; 102 } 103 104 /* get temperature in millidegrees */ 105 int kv_get_temp(struct radeon_device *rdev) 106 { 107 u32 temp; 108 int actual_temp = 0; 109 110 temp = RREG32_SMC(0xC0300E0C); 111 112 if (temp) 113 actual_temp = (temp / 8) - 49; 114 else 115 actual_temp = 0; 116 117 actual_temp = actual_temp * 1000; 118 119 return actual_temp; 120 } 121 122 /* 123 * Indirect registers accessor 124 */ 125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 126 { 127 unsigned long flags; 128 u32 r; 129 130 spin_lock_irqsave(&rdev->pciep_idx_lock, flags); 131 WREG32(PCIE_INDEX, reg); 132 (void)RREG32(PCIE_INDEX); 133 r = RREG32(PCIE_DATA); 134 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); 135 return r; 136 } 137 138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 139 { 140 unsigned long flags; 141 142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags); 143 WREG32(PCIE_INDEX, reg); 144 (void)RREG32(PCIE_INDEX); 145 WREG32(PCIE_DATA, v); 146 (void)RREG32(PCIE_DATA); 147 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); 148 } 149 150 static const u32 spectre_rlc_save_restore_register_list[] = 151 { 152 (0x0e00 << 16) | (0xc12c >> 2), 153 0x00000000, 154 (0x0e00 << 16) | (0xc140 >> 2), 155 0x00000000, 156 (0x0e00 << 16) | (0xc150 >> 2), 157 0x00000000, 158 (0x0e00 << 16) | (0xc15c >> 2), 159 0x00000000, 160 (0x0e00 << 16) | (0xc168 >> 2), 161 0x00000000, 162 (0x0e00 << 16) | (0xc170 >> 2), 163 0x00000000, 164 (0x0e00 << 16) | (0xc178 >> 2), 165 0x00000000, 166 (0x0e00 << 16) | (0xc204 >> 2), 167 0x00000000, 168 (0x0e00 << 16) | (0xc2b4 >> 2), 169 0x00000000, 170 (0x0e00 << 16) | (0xc2b8 >> 2), 171 0x00000000, 172 (0x0e00 << 16) | (0xc2bc >> 2), 173 0x00000000, 174 (0x0e00 << 16) | (0xc2c0 >> 2), 175 0x00000000, 176 (0x0e00 << 16) | (0x8228 >> 2), 177 0x00000000, 178 (0x0e00 << 16) | (0x829c >> 2), 179 0x00000000, 180 (0x0e00 << 16) | (0x869c >> 2), 181 0x00000000, 182 (0x0600 << 16) | (0x98f4 >> 2), 183 0x00000000, 184 (0x0e00 << 16) | (0x98f8 >> 2), 185 0x00000000, 186 (0x0e00 << 16) | (0x9900 >> 2), 187 0x00000000, 188 (0x0e00 << 16) | (0xc260 >> 2), 189 0x00000000, 190 (0x0e00 << 16) | (0x90e8 >> 2), 191 0x00000000, 192 (0x0e00 << 16) | (0x3c000 >> 2), 193 0x00000000, 194 (0x0e00 << 16) | (0x3c00c >> 2), 195 0x00000000, 196 (0x0e00 << 16) | (0x8c1c >> 2), 197 0x00000000, 198 (0x0e00 << 16) | (0x9700 >> 2), 199 0x00000000, 200 (0x0e00 << 16) | (0xcd20 >> 2), 201 0x00000000, 202 (0x4e00 << 16) | (0xcd20 >> 2), 203 0x00000000, 204 (0x5e00 << 16) | (0xcd20 >> 2), 205 0x00000000, 206 (0x6e00 << 16) | (0xcd20 >> 2), 207 0x00000000, 208 (0x7e00 << 16) | (0xcd20 >> 2), 209 0x00000000, 210 (0x8e00 << 16) | (0xcd20 >> 2), 211 0x00000000, 212 (0x9e00 << 16) | (0xcd20 >> 2), 213 0x00000000, 214 (0xae00 << 16) | (0xcd20 >> 2), 215 0x00000000, 216 (0xbe00 << 16) | (0xcd20 >> 2), 217 0x00000000, 218 (0x0e00 << 16) | (0x89bc >> 2), 219 0x00000000, 220 (0x0e00 << 16) | (0x8900 >> 2), 221 0x00000000, 222 0x3, 223 (0x0e00 << 16) | (0xc130 >> 2), 224 0x00000000, 225 (0x0e00 << 16) | (0xc134 >> 2), 226 0x00000000, 227 (0x0e00 << 16) | (0xc1fc >> 2), 228 0x00000000, 229 (0x0e00 << 16) | (0xc208 >> 2), 230 0x00000000, 231 (0x0e00 << 16) | (0xc264 >> 2), 232 0x00000000, 233 (0x0e00 << 16) | (0xc268 >> 2), 234 0x00000000, 235 (0x0e00 << 16) | (0xc26c >> 2), 236 0x00000000, 237 (0x0e00 << 16) | (0xc270 >> 2), 238 0x00000000, 239 (0x0e00 << 16) | (0xc274 >> 2), 240 0x00000000, 241 (0x0e00 << 16) | (0xc278 >> 2), 242 0x00000000, 243 (0x0e00 << 16) | (0xc27c >> 2), 244 0x00000000, 245 (0x0e00 << 16) | (0xc280 >> 2), 246 0x00000000, 247 (0x0e00 << 16) | (0xc284 >> 2), 248 0x00000000, 249 (0x0e00 << 16) | (0xc288 >> 2), 250 0x00000000, 251 (0x0e00 << 16) | (0xc28c >> 2), 252 0x00000000, 253 (0x0e00 << 16) | (0xc290 >> 2), 254 0x00000000, 255 (0x0e00 << 16) | (0xc294 >> 2), 256 0x00000000, 257 (0x0e00 << 16) | (0xc298 >> 2), 258 0x00000000, 259 (0x0e00 << 16) | (0xc29c >> 2), 260 0x00000000, 261 (0x0e00 << 16) | (0xc2a0 >> 2), 262 0x00000000, 263 (0x0e00 << 16) | (0xc2a4 >> 2), 264 0x00000000, 265 (0x0e00 << 16) | (0xc2a8 >> 2), 266 0x00000000, 267 (0x0e00 << 16) | (0xc2ac >> 2), 268 0x00000000, 269 (0x0e00 << 16) | (0xc2b0 >> 2), 270 0x00000000, 271 (0x0e00 << 16) | (0x301d0 >> 2), 272 0x00000000, 273 (0x0e00 << 16) | (0x30238 >> 2), 274 0x00000000, 275 (0x0e00 << 16) | (0x30250 >> 2), 276 0x00000000, 277 (0x0e00 << 16) | (0x30254 >> 2), 278 0x00000000, 279 (0x0e00 << 16) | (0x30258 >> 2), 280 0x00000000, 281 (0x0e00 << 16) | (0x3025c >> 2), 282 0x00000000, 283 (0x4e00 << 16) | (0xc900 >> 2), 284 0x00000000, 285 (0x5e00 << 16) | (0xc900 >> 2), 286 0x00000000, 287 (0x6e00 << 16) | (0xc900 >> 2), 288 0x00000000, 289 (0x7e00 << 16) | (0xc900 >> 2), 290 0x00000000, 291 (0x8e00 << 16) | (0xc900 >> 2), 292 0x00000000, 293 (0x9e00 << 16) | (0xc900 >> 2), 294 0x00000000, 295 (0xae00 << 16) | (0xc900 >> 2), 296 0x00000000, 297 (0xbe00 << 16) | (0xc900 >> 2), 298 0x00000000, 299 (0x4e00 << 16) | (0xc904 >> 2), 300 0x00000000, 301 (0x5e00 << 16) | (0xc904 >> 2), 302 0x00000000, 303 (0x6e00 << 16) | (0xc904 >> 2), 304 0x00000000, 305 (0x7e00 << 16) | (0xc904 >> 2), 306 0x00000000, 307 (0x8e00 << 16) | (0xc904 >> 2), 308 0x00000000, 309 (0x9e00 << 16) | (0xc904 >> 2), 310 0x00000000, 311 (0xae00 << 16) | (0xc904 >> 2), 312 0x00000000, 313 (0xbe00 << 16) | (0xc904 >> 2), 314 0x00000000, 315 (0x4e00 << 16) | (0xc908 >> 2), 316 0x00000000, 317 (0x5e00 << 16) | (0xc908 >> 2), 318 0x00000000, 319 (0x6e00 << 16) | (0xc908 >> 2), 320 0x00000000, 321 (0x7e00 << 16) | (0xc908 >> 2), 322 0x00000000, 323 (0x8e00 << 16) | (0xc908 >> 2), 324 0x00000000, 325 (0x9e00 << 16) | (0xc908 >> 2), 326 0x00000000, 327 (0xae00 << 16) | (0xc908 >> 2), 328 0x00000000, 329 (0xbe00 << 16) | (0xc908 >> 2), 330 0x00000000, 331 (0x4e00 << 16) | (0xc90c >> 2), 332 0x00000000, 333 (0x5e00 << 16) | (0xc90c >> 2), 334 0x00000000, 335 (0x6e00 << 16) | (0xc90c >> 2), 336 0x00000000, 337 (0x7e00 << 16) | (0xc90c >> 2), 338 0x00000000, 339 (0x8e00 << 16) | (0xc90c >> 2), 340 0x00000000, 341 (0x9e00 << 16) | (0xc90c >> 2), 342 0x00000000, 343 (0xae00 << 16) | (0xc90c >> 2), 344 0x00000000, 345 (0xbe00 << 16) | (0xc90c >> 2), 346 0x00000000, 347 (0x4e00 << 16) | (0xc910 >> 2), 348 0x00000000, 349 (0x5e00 << 16) | (0xc910 >> 2), 350 0x00000000, 351 (0x6e00 << 16) | (0xc910 >> 2), 352 0x00000000, 353 (0x7e00 << 16) | (0xc910 >> 2), 354 0x00000000, 355 (0x8e00 << 16) | (0xc910 >> 2), 356 0x00000000, 357 (0x9e00 << 16) | (0xc910 >> 2), 358 0x00000000, 359 (0xae00 << 16) | (0xc910 >> 2), 360 0x00000000, 361 (0xbe00 << 16) | (0xc910 >> 2), 362 0x00000000, 363 (0x0e00 << 16) | (0xc99c >> 2), 364 0x00000000, 365 (0x0e00 << 16) | (0x9834 >> 2), 366 0x00000000, 367 (0x0000 << 16) | (0x30f00 >> 2), 368 0x00000000, 369 (0x0001 << 16) | (0x30f00 >> 2), 370 0x00000000, 371 (0x0000 << 16) | (0x30f04 >> 2), 372 0x00000000, 373 (0x0001 << 16) | (0x30f04 >> 2), 374 0x00000000, 375 (0x0000 << 16) | (0x30f08 >> 2), 376 0x00000000, 377 (0x0001 << 16) | (0x30f08 >> 2), 378 0x00000000, 379 (0x0000 << 16) | (0x30f0c >> 2), 380 0x00000000, 381 (0x0001 << 16) | (0x30f0c >> 2), 382 0x00000000, 383 (0x0600 << 16) | (0x9b7c >> 2), 384 0x00000000, 385 (0x0e00 << 16) | (0x8a14 >> 2), 386 0x00000000, 387 (0x0e00 << 16) | (0x8a18 >> 2), 388 0x00000000, 389 (0x0600 << 16) | (0x30a00 >> 2), 390 0x00000000, 391 (0x0e00 << 16) | (0x8bf0 >> 2), 392 0x00000000, 393 (0x0e00 << 16) | (0x8bcc >> 2), 394 0x00000000, 395 (0x0e00 << 16) | (0x8b24 >> 2), 396 0x00000000, 397 (0x0e00 << 16) | (0x30a04 >> 2), 398 0x00000000, 399 (0x0600 << 16) | (0x30a10 >> 2), 400 0x00000000, 401 (0x0600 << 16) | (0x30a14 >> 2), 402 0x00000000, 403 (0x0600 << 16) | (0x30a18 >> 2), 404 0x00000000, 405 (0x0600 << 16) | (0x30a2c >> 2), 406 0x00000000, 407 (0x0e00 << 16) | (0xc700 >> 2), 408 0x00000000, 409 (0x0e00 << 16) | (0xc704 >> 2), 410 0x00000000, 411 (0x0e00 << 16) | (0xc708 >> 2), 412 0x00000000, 413 (0x0e00 << 16) | (0xc768 >> 2), 414 0x00000000, 415 (0x0400 << 16) | (0xc770 >> 2), 416 0x00000000, 417 (0x0400 << 16) | (0xc774 >> 2), 418 0x00000000, 419 (0x0400 << 16) | (0xc778 >> 2), 420 0x00000000, 421 (0x0400 << 16) | (0xc77c >> 2), 422 0x00000000, 423 (0x0400 << 16) | (0xc780 >> 2), 424 0x00000000, 425 (0x0400 << 16) | (0xc784 >> 2), 426 0x00000000, 427 (0x0400 << 16) | (0xc788 >> 2), 428 0x00000000, 429 (0x0400 << 16) | (0xc78c >> 2), 430 0x00000000, 431 (0x0400 << 16) | (0xc798 >> 2), 432 0x00000000, 433 (0x0400 << 16) | (0xc79c >> 2), 434 0x00000000, 435 (0x0400 << 16) | (0xc7a0 >> 2), 436 0x00000000, 437 (0x0400 << 16) | (0xc7a4 >> 2), 438 0x00000000, 439 (0x0400 << 16) | (0xc7a8 >> 2), 440 0x00000000, 441 (0x0400 << 16) | (0xc7ac >> 2), 442 0x00000000, 443 (0x0400 << 16) | (0xc7b0 >> 2), 444 0x00000000, 445 (0x0400 << 16) | (0xc7b4 >> 2), 446 0x00000000, 447 (0x0e00 << 16) | (0x9100 >> 2), 448 0x00000000, 449 (0x0e00 << 16) | (0x3c010 >> 2), 450 0x00000000, 451 (0x0e00 << 16) | (0x92a8 >> 2), 452 0x00000000, 453 (0x0e00 << 16) | (0x92ac >> 2), 454 0x00000000, 455 (0x0e00 << 16) | (0x92b4 >> 2), 456 0x00000000, 457 (0x0e00 << 16) | (0x92b8 >> 2), 458 0x00000000, 459 (0x0e00 << 16) | (0x92bc >> 2), 460 0x00000000, 461 (0x0e00 << 16) | (0x92c0 >> 2), 462 0x00000000, 463 (0x0e00 << 16) | (0x92c4 >> 2), 464 0x00000000, 465 (0x0e00 << 16) | (0x92c8 >> 2), 466 0x00000000, 467 (0x0e00 << 16) | (0x92cc >> 2), 468 0x00000000, 469 (0x0e00 << 16) | (0x92d0 >> 2), 470 0x00000000, 471 (0x0e00 << 16) | (0x8c00 >> 2), 472 0x00000000, 473 (0x0e00 << 16) | (0x8c04 >> 2), 474 0x00000000, 475 (0x0e00 << 16) | (0x8c20 >> 2), 476 0x00000000, 477 (0x0e00 << 16) | (0x8c38 >> 2), 478 0x00000000, 479 (0x0e00 << 16) | (0x8c3c >> 2), 480 0x00000000, 481 (0x0e00 << 16) | (0xae00 >> 2), 482 0x00000000, 483 (0x0e00 << 16) | (0x9604 >> 2), 484 0x00000000, 485 (0x0e00 << 16) | (0xac08 >> 2), 486 0x00000000, 487 (0x0e00 << 16) | (0xac0c >> 2), 488 0x00000000, 489 (0x0e00 << 16) | (0xac10 >> 2), 490 0x00000000, 491 (0x0e00 << 16) | (0xac14 >> 2), 492 0x00000000, 493 (0x0e00 << 16) | (0xac58 >> 2), 494 0x00000000, 495 (0x0e00 << 16) | (0xac68 >> 2), 496 0x00000000, 497 (0x0e00 << 16) | (0xac6c >> 2), 498 0x00000000, 499 (0x0e00 << 16) | (0xac70 >> 2), 500 0x00000000, 501 (0x0e00 << 16) | (0xac74 >> 2), 502 0x00000000, 503 (0x0e00 << 16) | (0xac78 >> 2), 504 0x00000000, 505 (0x0e00 << 16) | (0xac7c >> 2), 506 0x00000000, 507 (0x0e00 << 16) | (0xac80 >> 2), 508 0x00000000, 509 (0x0e00 << 16) | (0xac84 >> 2), 510 0x00000000, 511 (0x0e00 << 16) | (0xac88 >> 2), 512 0x00000000, 513 (0x0e00 << 16) | (0xac8c >> 2), 514 0x00000000, 515 (0x0e00 << 16) | (0x970c >> 2), 516 0x00000000, 517 (0x0e00 << 16) | (0x9714 >> 2), 518 0x00000000, 519 (0x0e00 << 16) | (0x9718 >> 2), 520 0x00000000, 521 (0x0e00 << 16) | (0x971c >> 2), 522 0x00000000, 523 (0x0e00 << 16) | (0x31068 >> 2), 524 0x00000000, 525 (0x4e00 << 16) | (0x31068 >> 2), 526 0x00000000, 527 (0x5e00 << 16) | (0x31068 >> 2), 528 0x00000000, 529 (0x6e00 << 16) | (0x31068 >> 2), 530 0x00000000, 531 (0x7e00 << 16) | (0x31068 >> 2), 532 0x00000000, 533 (0x8e00 << 16) | (0x31068 >> 2), 534 0x00000000, 535 (0x9e00 << 16) | (0x31068 >> 2), 536 0x00000000, 537 (0xae00 << 16) | (0x31068 >> 2), 538 0x00000000, 539 (0xbe00 << 16) | (0x31068 >> 2), 540 0x00000000, 541 (0x0e00 << 16) | (0xcd10 >> 2), 542 0x00000000, 543 (0x0e00 << 16) | (0xcd14 >> 2), 544 0x00000000, 545 (0x0e00 << 16) | (0x88b0 >> 2), 546 0x00000000, 547 (0x0e00 << 16) | (0x88b4 >> 2), 548 0x00000000, 549 (0x0e00 << 16) | (0x88b8 >> 2), 550 0x00000000, 551 (0x0e00 << 16) | (0x88bc >> 2), 552 0x00000000, 553 (0x0400 << 16) | (0x89c0 >> 2), 554 0x00000000, 555 (0x0e00 << 16) | (0x88c4 >> 2), 556 0x00000000, 557 (0x0e00 << 16) | (0x88c8 >> 2), 558 0x00000000, 559 (0x0e00 << 16) | (0x88d0 >> 2), 560 0x00000000, 561 (0x0e00 << 16) | (0x88d4 >> 2), 562 0x00000000, 563 (0x0e00 << 16) | (0x88d8 >> 2), 564 0x00000000, 565 (0x0e00 << 16) | (0x8980 >> 2), 566 0x00000000, 567 (0x0e00 << 16) | (0x30938 >> 2), 568 0x00000000, 569 (0x0e00 << 16) | (0x3093c >> 2), 570 0x00000000, 571 (0x0e00 << 16) | (0x30940 >> 2), 572 0x00000000, 573 (0x0e00 << 16) | (0x89a0 >> 2), 574 0x00000000, 575 (0x0e00 << 16) | (0x30900 >> 2), 576 0x00000000, 577 (0x0e00 << 16) | (0x30904 >> 2), 578 0x00000000, 579 (0x0e00 << 16) | (0x89b4 >> 2), 580 0x00000000, 581 (0x0e00 << 16) | (0x3c210 >> 2), 582 0x00000000, 583 (0x0e00 << 16) | (0x3c214 >> 2), 584 0x00000000, 585 (0x0e00 << 16) | (0x3c218 >> 2), 586 0x00000000, 587 (0x0e00 << 16) | (0x8904 >> 2), 588 0x00000000, 589 0x5, 590 (0x0e00 << 16) | (0x8c28 >> 2), 591 (0x0e00 << 16) | (0x8c2c >> 2), 592 (0x0e00 << 16) | (0x8c30 >> 2), 593 (0x0e00 << 16) | (0x8c34 >> 2), 594 (0x0e00 << 16) | (0x9600 >> 2), 595 }; 596 597 static const u32 kalindi_rlc_save_restore_register_list[] = 598 { 599 (0x0e00 << 16) | (0xc12c >> 2), 600 0x00000000, 601 (0x0e00 << 16) | (0xc140 >> 2), 602 0x00000000, 603 (0x0e00 << 16) | (0xc150 >> 2), 604 0x00000000, 605 (0x0e00 << 16) | (0xc15c >> 2), 606 0x00000000, 607 (0x0e00 << 16) | (0xc168 >> 2), 608 0x00000000, 609 (0x0e00 << 16) | (0xc170 >> 2), 610 0x00000000, 611 (0x0e00 << 16) | (0xc204 >> 2), 612 0x00000000, 613 (0x0e00 << 16) | (0xc2b4 >> 2), 614 0x00000000, 615 (0x0e00 << 16) | (0xc2b8 >> 2), 616 0x00000000, 617 (0x0e00 << 16) | (0xc2bc >> 2), 618 0x00000000, 619 (0x0e00 << 16) | (0xc2c0 >> 2), 620 0x00000000, 621 (0x0e00 << 16) | (0x8228 >> 2), 622 0x00000000, 623 (0x0e00 << 16) | (0x829c >> 2), 624 0x00000000, 625 (0x0e00 << 16) | (0x869c >> 2), 626 0x00000000, 627 (0x0600 << 16) | (0x98f4 >> 2), 628 0x00000000, 629 (0x0e00 << 16) | (0x98f8 >> 2), 630 0x00000000, 631 (0x0e00 << 16) | (0x9900 >> 2), 632 0x00000000, 633 (0x0e00 << 16) | (0xc260 >> 2), 634 0x00000000, 635 (0x0e00 << 16) | (0x90e8 >> 2), 636 0x00000000, 637 (0x0e00 << 16) | (0x3c000 >> 2), 638 0x00000000, 639 (0x0e00 << 16) | (0x3c00c >> 2), 640 0x00000000, 641 (0x0e00 << 16) | (0x8c1c >> 2), 642 0x00000000, 643 (0x0e00 << 16) | (0x9700 >> 2), 644 0x00000000, 645 (0x0e00 << 16) | (0xcd20 >> 2), 646 0x00000000, 647 (0x4e00 << 16) | (0xcd20 >> 2), 648 0x00000000, 649 (0x5e00 << 16) | (0xcd20 >> 2), 650 0x00000000, 651 (0x6e00 << 16) | (0xcd20 >> 2), 652 0x00000000, 653 (0x7e00 << 16) | (0xcd20 >> 2), 654 0x00000000, 655 (0x0e00 << 16) | (0x89bc >> 2), 656 0x00000000, 657 (0x0e00 << 16) | (0x8900 >> 2), 658 0x00000000, 659 0x3, 660 (0x0e00 << 16) | (0xc130 >> 2), 661 0x00000000, 662 (0x0e00 << 16) | (0xc134 >> 2), 663 0x00000000, 664 (0x0e00 << 16) | (0xc1fc >> 2), 665 0x00000000, 666 (0x0e00 << 16) | (0xc208 >> 2), 667 0x00000000, 668 (0x0e00 << 16) | (0xc264 >> 2), 669 0x00000000, 670 (0x0e00 << 16) | (0xc268 >> 2), 671 0x00000000, 672 (0x0e00 << 16) | (0xc26c >> 2), 673 0x00000000, 674 (0x0e00 << 16) | (0xc270 >> 2), 675 0x00000000, 676 (0x0e00 << 16) | (0xc274 >> 2), 677 0x00000000, 678 (0x0e00 << 16) | (0xc28c >> 2), 679 0x00000000, 680 (0x0e00 << 16) | (0xc290 >> 2), 681 0x00000000, 682 (0x0e00 << 16) | (0xc294 >> 2), 683 0x00000000, 684 (0x0e00 << 16) | (0xc298 >> 2), 685 0x00000000, 686 (0x0e00 << 16) | (0xc2a0 >> 2), 687 0x00000000, 688 (0x0e00 << 16) | (0xc2a4 >> 2), 689 0x00000000, 690 (0x0e00 << 16) | (0xc2a8 >> 2), 691 0x00000000, 692 (0x0e00 << 16) | (0xc2ac >> 2), 693 0x00000000, 694 (0x0e00 << 16) | (0x301d0 >> 2), 695 0x00000000, 696 (0x0e00 << 16) | (0x30238 >> 2), 697 0x00000000, 698 (0x0e00 << 16) | (0x30250 >> 2), 699 0x00000000, 700 (0x0e00 << 16) | (0x30254 >> 2), 701 0x00000000, 702 (0x0e00 << 16) | (0x30258 >> 2), 703 0x00000000, 704 (0x0e00 << 16) | (0x3025c >> 2), 705 0x00000000, 706 (0x4e00 << 16) | (0xc900 >> 2), 707 0x00000000, 708 (0x5e00 << 16) | (0xc900 >> 2), 709 0x00000000, 710 (0x6e00 << 16) | (0xc900 >> 2), 711 0x00000000, 712 (0x7e00 << 16) | (0xc900 >> 2), 713 0x00000000, 714 (0x4e00 << 16) | (0xc904 >> 2), 715 0x00000000, 716 (0x5e00 << 16) | (0xc904 >> 2), 717 0x00000000, 718 (0x6e00 << 16) | (0xc904 >> 2), 719 0x00000000, 720 (0x7e00 << 16) | (0xc904 >> 2), 721 0x00000000, 722 (0x4e00 << 16) | (0xc908 >> 2), 723 0x00000000, 724 (0x5e00 << 16) | (0xc908 >> 2), 725 0x00000000, 726 (0x6e00 << 16) | (0xc908 >> 2), 727 0x00000000, 728 (0x7e00 << 16) | (0xc908 >> 2), 729 0x00000000, 730 (0x4e00 << 16) | (0xc90c >> 2), 731 0x00000000, 732 (0x5e00 << 16) | (0xc90c >> 2), 733 0x00000000, 734 (0x6e00 << 16) | (0xc90c >> 2), 735 0x00000000, 736 (0x7e00 << 16) | (0xc90c >> 2), 737 0x00000000, 738 (0x4e00 << 16) | (0xc910 >> 2), 739 0x00000000, 740 (0x5e00 << 16) | (0xc910 >> 2), 741 0x00000000, 742 (0x6e00 << 16) | (0xc910 >> 2), 743 0x00000000, 744 (0x7e00 << 16) | (0xc910 >> 2), 745 0x00000000, 746 (0x0e00 << 16) | (0xc99c >> 2), 747 0x00000000, 748 (0x0e00 << 16) | (0x9834 >> 2), 749 0x00000000, 750 (0x0000 << 16) | (0x30f00 >> 2), 751 0x00000000, 752 (0x0000 << 16) | (0x30f04 >> 2), 753 0x00000000, 754 (0x0000 << 16) | (0x30f08 >> 2), 755 0x00000000, 756 (0x0000 << 16) | (0x30f0c >> 2), 757 0x00000000, 758 (0x0600 << 16) | (0x9b7c >> 2), 759 0x00000000, 760 (0x0e00 << 16) | (0x8a14 >> 2), 761 0x00000000, 762 (0x0e00 << 16) | (0x8a18 >> 2), 763 0x00000000, 764 (0x0600 << 16) | (0x30a00 >> 2), 765 0x00000000, 766 (0x0e00 << 16) | (0x8bf0 >> 2), 767 0x00000000, 768 (0x0e00 << 16) | (0x8bcc >> 2), 769 0x00000000, 770 (0x0e00 << 16) | (0x8b24 >> 2), 771 0x00000000, 772 (0x0e00 << 16) | (0x30a04 >> 2), 773 0x00000000, 774 (0x0600 << 16) | (0x30a10 >> 2), 775 0x00000000, 776 (0x0600 << 16) | (0x30a14 >> 2), 777 0x00000000, 778 (0x0600 << 16) | (0x30a18 >> 2), 779 0x00000000, 780 (0x0600 << 16) | (0x30a2c >> 2), 781 0x00000000, 782 (0x0e00 << 16) | (0xc700 >> 2), 783 0x00000000, 784 (0x0e00 << 16) | (0xc704 >> 2), 785 0x00000000, 786 (0x0e00 << 16) | (0xc708 >> 2), 787 0x00000000, 788 (0x0e00 << 16) | (0xc768 >> 2), 789 0x00000000, 790 (0x0400 << 16) | (0xc770 >> 2), 791 0x00000000, 792 (0x0400 << 16) | (0xc774 >> 2), 793 0x00000000, 794 (0x0400 << 16) | (0xc798 >> 2), 795 0x00000000, 796 (0x0400 << 16) | (0xc79c >> 2), 797 0x00000000, 798 (0x0e00 << 16) | (0x9100 >> 2), 799 0x00000000, 800 (0x0e00 << 16) | (0x3c010 >> 2), 801 0x00000000, 802 (0x0e00 << 16) | (0x8c00 >> 2), 803 0x00000000, 804 (0x0e00 << 16) | (0x8c04 >> 2), 805 0x00000000, 806 (0x0e00 << 16) | (0x8c20 >> 2), 807 0x00000000, 808 (0x0e00 << 16) | (0x8c38 >> 2), 809 0x00000000, 810 (0x0e00 << 16) | (0x8c3c >> 2), 811 0x00000000, 812 (0x0e00 << 16) | (0xae00 >> 2), 813 0x00000000, 814 (0x0e00 << 16) | (0x9604 >> 2), 815 0x00000000, 816 (0x0e00 << 16) | (0xac08 >> 2), 817 0x00000000, 818 (0x0e00 << 16) | (0xac0c >> 2), 819 0x00000000, 820 (0x0e00 << 16) | (0xac10 >> 2), 821 0x00000000, 822 (0x0e00 << 16) | (0xac14 >> 2), 823 0x00000000, 824 (0x0e00 << 16) | (0xac58 >> 2), 825 0x00000000, 826 (0x0e00 << 16) | (0xac68 >> 2), 827 0x00000000, 828 (0x0e00 << 16) | (0xac6c >> 2), 829 0x00000000, 830 (0x0e00 << 16) | (0xac70 >> 2), 831 0x00000000, 832 (0x0e00 << 16) | (0xac74 >> 2), 833 0x00000000, 834 (0x0e00 << 16) | (0xac78 >> 2), 835 0x00000000, 836 (0x0e00 << 16) | (0xac7c >> 2), 837 0x00000000, 838 (0x0e00 << 16) | (0xac80 >> 2), 839 0x00000000, 840 (0x0e00 << 16) | (0xac84 >> 2), 841 0x00000000, 842 (0x0e00 << 16) | (0xac88 >> 2), 843 0x00000000, 844 (0x0e00 << 16) | (0xac8c >> 2), 845 0x00000000, 846 (0x0e00 << 16) | (0x970c >> 2), 847 0x00000000, 848 (0x0e00 << 16) | (0x9714 >> 2), 849 0x00000000, 850 (0x0e00 << 16) | (0x9718 >> 2), 851 0x00000000, 852 (0x0e00 << 16) | (0x971c >> 2), 853 0x00000000, 854 (0x0e00 << 16) | (0x31068 >> 2), 855 0x00000000, 856 (0x4e00 << 16) | (0x31068 >> 2), 857 0x00000000, 858 (0x5e00 << 16) | (0x31068 >> 2), 859 0x00000000, 860 (0x6e00 << 16) | (0x31068 >> 2), 861 0x00000000, 862 (0x7e00 << 16) | (0x31068 >> 2), 863 0x00000000, 864 (0x0e00 << 16) | (0xcd10 >> 2), 865 0x00000000, 866 (0x0e00 << 16) | (0xcd14 >> 2), 867 0x00000000, 868 (0x0e00 << 16) | (0x88b0 >> 2), 869 0x00000000, 870 (0x0e00 << 16) | (0x88b4 >> 2), 871 0x00000000, 872 (0x0e00 << 16) | (0x88b8 >> 2), 873 0x00000000, 874 (0x0e00 << 16) | (0x88bc >> 2), 875 0x00000000, 876 (0x0400 << 16) | (0x89c0 >> 2), 877 0x00000000, 878 (0x0e00 << 16) | (0x88c4 >> 2), 879 0x00000000, 880 (0x0e00 << 16) | (0x88c8 >> 2), 881 0x00000000, 882 (0x0e00 << 16) | (0x88d0 >> 2), 883 0x00000000, 884 (0x0e00 << 16) | (0x88d4 >> 2), 885 0x00000000, 886 (0x0e00 << 16) | (0x88d8 >> 2), 887 0x00000000, 888 (0x0e00 << 16) | (0x8980 >> 2), 889 0x00000000, 890 (0x0e00 << 16) | (0x30938 >> 2), 891 0x00000000, 892 (0x0e00 << 16) | (0x3093c >> 2), 893 0x00000000, 894 (0x0e00 << 16) | (0x30940 >> 2), 895 0x00000000, 896 (0x0e00 << 16) | (0x89a0 >> 2), 897 0x00000000, 898 (0x0e00 << 16) | (0x30900 >> 2), 899 0x00000000, 900 (0x0e00 << 16) | (0x30904 >> 2), 901 0x00000000, 902 (0x0e00 << 16) | (0x89b4 >> 2), 903 0x00000000, 904 (0x0e00 << 16) | (0x3e1fc >> 2), 905 0x00000000, 906 (0x0e00 << 16) | (0x3c210 >> 2), 907 0x00000000, 908 (0x0e00 << 16) | (0x3c214 >> 2), 909 0x00000000, 910 (0x0e00 << 16) | (0x3c218 >> 2), 911 0x00000000, 912 (0x0e00 << 16) | (0x8904 >> 2), 913 0x00000000, 914 0x5, 915 (0x0e00 << 16) | (0x8c28 >> 2), 916 (0x0e00 << 16) | (0x8c2c >> 2), 917 (0x0e00 << 16) | (0x8c30 >> 2), 918 (0x0e00 << 16) | (0x8c34 >> 2), 919 (0x0e00 << 16) | (0x9600 >> 2), 920 }; 921 922 static const u32 bonaire_golden_spm_registers[] = 923 { 924 0x30800, 0xe0ffffff, 0xe0000000 925 }; 926 927 static const u32 bonaire_golden_common_registers[] = 928 { 929 0xc770, 0xffffffff, 0x00000800, 930 0xc774, 0xffffffff, 0x00000800, 931 0xc798, 0xffffffff, 0x00007fbf, 932 0xc79c, 0xffffffff, 0x00007faf 933 }; 934 935 static const u32 bonaire_golden_registers[] = 936 { 937 0x3354, 0x00000333, 0x00000333, 938 0x3350, 0x000c0fc0, 0x00040200, 939 0x9a10, 0x00010000, 0x00058208, 940 0x3c000, 0xffff1fff, 0x00140000, 941 0x3c200, 0xfdfc0fff, 0x00000100, 942 0x3c234, 0x40000000, 0x40000200, 943 0x9830, 0xffffffff, 0x00000000, 944 0x9834, 0xf00fffff, 0x00000400, 945 0x9838, 0x0002021c, 0x00020200, 946 0xc78, 0x00000080, 0x00000000, 947 0x5bb0, 0x000000f0, 0x00000070, 948 0x5bc0, 0xf0311fff, 0x80300000, 949 0x98f8, 0x73773777, 0x12010001, 950 0x350c, 0x00810000, 0x408af000, 951 0x7030, 0x31000111, 0x00000011, 952 0x2f48, 0x73773777, 0x12010001, 953 0x220c, 0x00007fb6, 0x0021a1b1, 954 0x2210, 0x00007fb6, 0x002021b1, 955 0x2180, 0x00007fb6, 0x00002191, 956 0x2218, 0x00007fb6, 0x002121b1, 957 0x221c, 0x00007fb6, 0x002021b1, 958 0x21dc, 0x00007fb6, 0x00002191, 959 0x21e0, 0x00007fb6, 0x00002191, 960 0x3628, 0x0000003f, 0x0000000a, 961 0x362c, 0x0000003f, 0x0000000a, 962 0x2ae4, 0x00073ffe, 0x000022a2, 963 0x240c, 0x000007ff, 0x00000000, 964 0x8a14, 0xf000003f, 0x00000007, 965 0x8bf0, 0x00002001, 0x00000001, 966 0x8b24, 0xffffffff, 0x00ffffff, 967 0x30a04, 0x0000ff0f, 0x00000000, 968 0x28a4c, 0x07ffffff, 0x06000000, 969 0x4d8, 0x00000fff, 0x00000100, 970 0x3e78, 0x00000001, 0x00000002, 971 0x9100, 0x03000000, 0x0362c688, 972 0x8c00, 0x000000ff, 0x00000001, 973 0xe40, 0x00001fff, 0x00001fff, 974 0x9060, 0x0000007f, 0x00000020, 975 0x9508, 0x00010000, 0x00010000, 976 0xac14, 0x000003ff, 0x000000f3, 977 0xac0c, 0xffffffff, 0x00001032 978 }; 979 980 static const u32 bonaire_mgcg_cgcg_init[] = 981 { 982 0xc420, 0xffffffff, 0xfffffffc, 983 0x30800, 0xffffffff, 0xe0000000, 984 0x3c2a0, 0xffffffff, 0x00000100, 985 0x3c208, 0xffffffff, 0x00000100, 986 0x3c2c0, 0xffffffff, 0xc0000100, 987 0x3c2c8, 0xffffffff, 0xc0000100, 988 0x3c2c4, 0xffffffff, 0xc0000100, 989 0x55e4, 0xffffffff, 0x00600100, 990 0x3c280, 0xffffffff, 0x00000100, 991 0x3c214, 0xffffffff, 0x06000100, 992 0x3c220, 0xffffffff, 0x00000100, 993 0x3c218, 0xffffffff, 0x06000100, 994 0x3c204, 0xffffffff, 0x00000100, 995 0x3c2e0, 0xffffffff, 0x00000100, 996 0x3c224, 0xffffffff, 0x00000100, 997 0x3c200, 0xffffffff, 0x00000100, 998 0x3c230, 0xffffffff, 0x00000100, 999 0x3c234, 0xffffffff, 0x00000100, 1000 0x3c250, 0xffffffff, 0x00000100, 1001 0x3c254, 0xffffffff, 0x00000100, 1002 0x3c258, 0xffffffff, 0x00000100, 1003 0x3c25c, 0xffffffff, 0x00000100, 1004 0x3c260, 0xffffffff, 0x00000100, 1005 0x3c27c, 0xffffffff, 0x00000100, 1006 0x3c278, 0xffffffff, 0x00000100, 1007 0x3c210, 0xffffffff, 0x06000100, 1008 0x3c290, 0xffffffff, 0x00000100, 1009 0x3c274, 0xffffffff, 0x00000100, 1010 0x3c2b4, 0xffffffff, 0x00000100, 1011 0x3c2b0, 0xffffffff, 0x00000100, 1012 0x3c270, 0xffffffff, 0x00000100, 1013 0x30800, 0xffffffff, 0xe0000000, 1014 0x3c020, 0xffffffff, 0x00010000, 1015 0x3c024, 0xffffffff, 0x00030002, 1016 0x3c028, 0xffffffff, 0x00040007, 1017 0x3c02c, 0xffffffff, 0x00060005, 1018 0x3c030, 0xffffffff, 0x00090008, 1019 0x3c034, 0xffffffff, 0x00010000, 1020 0x3c038, 0xffffffff, 0x00030002, 1021 0x3c03c, 0xffffffff, 0x00040007, 1022 0x3c040, 0xffffffff, 0x00060005, 1023 0x3c044, 0xffffffff, 0x00090008, 1024 0x3c048, 0xffffffff, 0x00010000, 1025 0x3c04c, 0xffffffff, 0x00030002, 1026 0x3c050, 0xffffffff, 0x00040007, 1027 0x3c054, 0xffffffff, 0x00060005, 1028 0x3c058, 0xffffffff, 0x00090008, 1029 0x3c05c, 0xffffffff, 0x00010000, 1030 0x3c060, 0xffffffff, 0x00030002, 1031 0x3c064, 0xffffffff, 0x00040007, 1032 0x3c068, 0xffffffff, 0x00060005, 1033 0x3c06c, 0xffffffff, 0x00090008, 1034 0x3c070, 0xffffffff, 0x00010000, 1035 0x3c074, 0xffffffff, 0x00030002, 1036 0x3c078, 0xffffffff, 0x00040007, 1037 0x3c07c, 0xffffffff, 0x00060005, 1038 0x3c080, 0xffffffff, 0x00090008, 1039 0x3c084, 0xffffffff, 0x00010000, 1040 0x3c088, 0xffffffff, 0x00030002, 1041 0x3c08c, 0xffffffff, 0x00040007, 1042 0x3c090, 0xffffffff, 0x00060005, 1043 0x3c094, 0xffffffff, 0x00090008, 1044 0x3c098, 0xffffffff, 0x00010000, 1045 0x3c09c, 0xffffffff, 0x00030002, 1046 0x3c0a0, 0xffffffff, 0x00040007, 1047 0x3c0a4, 0xffffffff, 0x00060005, 1048 0x3c0a8, 0xffffffff, 0x00090008, 1049 0x3c000, 0xffffffff, 0x96e00200, 1050 0x8708, 0xffffffff, 0x00900100, 1051 0xc424, 0xffffffff, 0x0020003f, 1052 0x38, 0xffffffff, 0x0140001c, 1053 0x3c, 0x000f0000, 0x000f0000, 1054 0x220, 0xffffffff, 0xC060000C, 1055 0x224, 0xc0000fff, 0x00000100, 1056 0xf90, 0xffffffff, 0x00000100, 1057 0xf98, 0x00000101, 0x00000000, 1058 0x20a8, 0xffffffff, 0x00000104, 1059 0x55e4, 0xff000fff, 0x00000100, 1060 0x30cc, 0xc0000fff, 0x00000104, 1061 0xc1e4, 0x00000001, 0x00000001, 1062 0xd00c, 0xff000ff0, 0x00000100, 1063 0xd80c, 0xff000ff0, 0x00000100 1064 }; 1065 1066 static const u32 spectre_golden_spm_registers[] = 1067 { 1068 0x30800, 0xe0ffffff, 0xe0000000 1069 }; 1070 1071 static const u32 spectre_golden_common_registers[] = 1072 { 1073 0xc770, 0xffffffff, 0x00000800, 1074 0xc774, 0xffffffff, 0x00000800, 1075 0xc798, 0xffffffff, 0x00007fbf, 1076 0xc79c, 0xffffffff, 0x00007faf 1077 }; 1078 1079 static const u32 spectre_golden_registers[] = 1080 { 1081 0x3c000, 0xffff1fff, 0x96940200, 1082 0x3c00c, 0xffff0001, 0xff000000, 1083 0x3c200, 0xfffc0fff, 0x00000100, 1084 0x6ed8, 0x00010101, 0x00010000, 1085 0x9834, 0xf00fffff, 0x00000400, 1086 0x9838, 0xfffffffc, 0x00020200, 1087 0x5bb0, 0x000000f0, 0x00000070, 1088 0x5bc0, 0xf0311fff, 0x80300000, 1089 0x98f8, 0x73773777, 0x12010001, 1090 0x9b7c, 0x00ff0000, 0x00fc0000, 1091 0x2f48, 0x73773777, 0x12010001, 1092 0x8a14, 0xf000003f, 0x00000007, 1093 0x8b24, 0xffffffff, 0x00ffffff, 1094 0x28350, 0x3f3f3fff, 0x00000082, 1095 0x28355, 0x0000003f, 0x00000000, 1096 0x3e78, 0x00000001, 0x00000002, 1097 0x913c, 0xffff03df, 0x00000004, 1098 0xc768, 0x00000008, 0x00000008, 1099 0x8c00, 0x000008ff, 0x00000800, 1100 0x9508, 0x00010000, 0x00010000, 1101 0xac0c, 0xffffffff, 0x54763210, 1102 0x214f8, 0x01ff01ff, 0x00000002, 1103 0x21498, 0x007ff800, 0x00200000, 1104 0x2015c, 0xffffffff, 0x00000f40, 1105 0x30934, 0xffffffff, 0x00000001 1106 }; 1107 1108 static const u32 spectre_mgcg_cgcg_init[] = 1109 { 1110 0xc420, 0xffffffff, 0xfffffffc, 1111 0x30800, 0xffffffff, 0xe0000000, 1112 0x3c2a0, 0xffffffff, 0x00000100, 1113 0x3c208, 0xffffffff, 0x00000100, 1114 0x3c2c0, 0xffffffff, 0x00000100, 1115 0x3c2c8, 0xffffffff, 0x00000100, 1116 0x3c2c4, 0xffffffff, 0x00000100, 1117 0x55e4, 0xffffffff, 0x00600100, 1118 0x3c280, 0xffffffff, 0x00000100, 1119 0x3c214, 0xffffffff, 0x06000100, 1120 0x3c220, 0xffffffff, 0x00000100, 1121 0x3c218, 0xffffffff, 0x06000100, 1122 0x3c204, 0xffffffff, 0x00000100, 1123 0x3c2e0, 0xffffffff, 0x00000100, 1124 0x3c224, 0xffffffff, 0x00000100, 1125 0x3c200, 0xffffffff, 0x00000100, 1126 0x3c230, 0xffffffff, 0x00000100, 1127 0x3c234, 0xffffffff, 0x00000100, 1128 0x3c250, 0xffffffff, 0x00000100, 1129 0x3c254, 0xffffffff, 0x00000100, 1130 0x3c258, 0xffffffff, 0x00000100, 1131 0x3c25c, 0xffffffff, 0x00000100, 1132 0x3c260, 0xffffffff, 0x00000100, 1133 0x3c27c, 0xffffffff, 0x00000100, 1134 0x3c278, 0xffffffff, 0x00000100, 1135 0x3c210, 0xffffffff, 0x06000100, 1136 0x3c290, 0xffffffff, 0x00000100, 1137 0x3c274, 0xffffffff, 0x00000100, 1138 0x3c2b4, 0xffffffff, 0x00000100, 1139 0x3c2b0, 0xffffffff, 0x00000100, 1140 0x3c270, 0xffffffff, 0x00000100, 1141 0x30800, 0xffffffff, 0xe0000000, 1142 0x3c020, 0xffffffff, 0x00010000, 1143 0x3c024, 0xffffffff, 0x00030002, 1144 0x3c028, 0xffffffff, 0x00040007, 1145 0x3c02c, 0xffffffff, 0x00060005, 1146 0x3c030, 0xffffffff, 0x00090008, 1147 0x3c034, 0xffffffff, 0x00010000, 1148 0x3c038, 0xffffffff, 0x00030002, 1149 0x3c03c, 0xffffffff, 0x00040007, 1150 0x3c040, 0xffffffff, 0x00060005, 1151 0x3c044, 0xffffffff, 0x00090008, 1152 0x3c048, 0xffffffff, 0x00010000, 1153 0x3c04c, 0xffffffff, 0x00030002, 1154 0x3c050, 0xffffffff, 0x00040007, 1155 0x3c054, 0xffffffff, 0x00060005, 1156 0x3c058, 0xffffffff, 0x00090008, 1157 0x3c05c, 0xffffffff, 0x00010000, 1158 0x3c060, 0xffffffff, 0x00030002, 1159 0x3c064, 0xffffffff, 0x00040007, 1160 0x3c068, 0xffffffff, 0x00060005, 1161 0x3c06c, 0xffffffff, 0x00090008, 1162 0x3c070, 0xffffffff, 0x00010000, 1163 0x3c074, 0xffffffff, 0x00030002, 1164 0x3c078, 0xffffffff, 0x00040007, 1165 0x3c07c, 0xffffffff, 0x00060005, 1166 0x3c080, 0xffffffff, 0x00090008, 1167 0x3c084, 0xffffffff, 0x00010000, 1168 0x3c088, 0xffffffff, 0x00030002, 1169 0x3c08c, 0xffffffff, 0x00040007, 1170 0x3c090, 0xffffffff, 0x00060005, 1171 0x3c094, 0xffffffff, 0x00090008, 1172 0x3c098, 0xffffffff, 0x00010000, 1173 0x3c09c, 0xffffffff, 0x00030002, 1174 0x3c0a0, 0xffffffff, 0x00040007, 1175 0x3c0a4, 0xffffffff, 0x00060005, 1176 0x3c0a8, 0xffffffff, 0x00090008, 1177 0x3c0ac, 0xffffffff, 0x00010000, 1178 0x3c0b0, 0xffffffff, 0x00030002, 1179 0x3c0b4, 0xffffffff, 0x00040007, 1180 0x3c0b8, 0xffffffff, 0x00060005, 1181 0x3c0bc, 0xffffffff, 0x00090008, 1182 0x3c000, 0xffffffff, 0x96e00200, 1183 0x8708, 0xffffffff, 0x00900100, 1184 0xc424, 0xffffffff, 0x0020003f, 1185 0x38, 0xffffffff, 0x0140001c, 1186 0x3c, 0x000f0000, 0x000f0000, 1187 0x220, 0xffffffff, 0xC060000C, 1188 0x224, 0xc0000fff, 0x00000100, 1189 0xf90, 0xffffffff, 0x00000100, 1190 0xf98, 0x00000101, 0x00000000, 1191 0x20a8, 0xffffffff, 0x00000104, 1192 0x55e4, 0xff000fff, 0x00000100, 1193 0x30cc, 0xc0000fff, 0x00000104, 1194 0xc1e4, 0x00000001, 0x00000001, 1195 0xd00c, 0xff000ff0, 0x00000100, 1196 0xd80c, 0xff000ff0, 0x00000100 1197 }; 1198 1199 static const u32 kalindi_golden_spm_registers[] = 1200 { 1201 0x30800, 0xe0ffffff, 0xe0000000 1202 }; 1203 1204 static const u32 kalindi_golden_common_registers[] = 1205 { 1206 0xc770, 0xffffffff, 0x00000800, 1207 0xc774, 0xffffffff, 0x00000800, 1208 0xc798, 0xffffffff, 0x00007fbf, 1209 0xc79c, 0xffffffff, 0x00007faf 1210 }; 1211 1212 static const u32 kalindi_golden_registers[] = 1213 { 1214 0x3c000, 0xffffdfff, 0x6e944040, 1215 0x55e4, 0xff607fff, 0xfc000100, 1216 0x3c220, 0xff000fff, 0x00000100, 1217 0x3c224, 0xff000fff, 0x00000100, 1218 0x3c200, 0xfffc0fff, 0x00000100, 1219 0x6ed8, 0x00010101, 0x00010000, 1220 0x9830, 0xffffffff, 0x00000000, 1221 0x9834, 0xf00fffff, 0x00000400, 1222 0x5bb0, 0x000000f0, 0x00000070, 1223 0x5bc0, 0xf0311fff, 0x80300000, 1224 0x98f8, 0x73773777, 0x12010001, 1225 0x98fc, 0xffffffff, 0x00000010, 1226 0x9b7c, 0x00ff0000, 0x00fc0000, 1227 0x8030, 0x00001f0f, 0x0000100a, 1228 0x2f48, 0x73773777, 0x12010001, 1229 0x2408, 0x000fffff, 0x000c007f, 1230 0x8a14, 0xf000003f, 0x00000007, 1231 0x8b24, 0x3fff3fff, 0x00ffcfff, 1232 0x30a04, 0x0000ff0f, 0x00000000, 1233 0x28a4c, 0x07ffffff, 0x06000000, 1234 0x4d8, 0x00000fff, 0x00000100, 1235 0x3e78, 0x00000001, 0x00000002, 1236 0xc768, 0x00000008, 0x00000008, 1237 0x8c00, 0x000000ff, 0x00000003, 1238 0x214f8, 0x01ff01ff, 0x00000002, 1239 0x21498, 0x007ff800, 0x00200000, 1240 0x2015c, 0xffffffff, 0x00000f40, 1241 0x88c4, 0x001f3ae3, 0x00000082, 1242 0x88d4, 0x0000001f, 0x00000010, 1243 0x30934, 0xffffffff, 0x00000000 1244 }; 1245 1246 static const u32 kalindi_mgcg_cgcg_init[] = 1247 { 1248 0xc420, 0xffffffff, 0xfffffffc, 1249 0x30800, 0xffffffff, 0xe0000000, 1250 0x3c2a0, 0xffffffff, 0x00000100, 1251 0x3c208, 0xffffffff, 0x00000100, 1252 0x3c2c0, 0xffffffff, 0x00000100, 1253 0x3c2c8, 0xffffffff, 0x00000100, 1254 0x3c2c4, 0xffffffff, 0x00000100, 1255 0x55e4, 0xffffffff, 0x00600100, 1256 0x3c280, 0xffffffff, 0x00000100, 1257 0x3c214, 0xffffffff, 0x06000100, 1258 0x3c220, 0xffffffff, 0x00000100, 1259 0x3c218, 0xffffffff, 0x06000100, 1260 0x3c204, 0xffffffff, 0x00000100, 1261 0x3c2e0, 0xffffffff, 0x00000100, 1262 0x3c224, 0xffffffff, 0x00000100, 1263 0x3c200, 0xffffffff, 0x00000100, 1264 0x3c230, 0xffffffff, 0x00000100, 1265 0x3c234, 0xffffffff, 0x00000100, 1266 0x3c250, 0xffffffff, 0x00000100, 1267 0x3c254, 0xffffffff, 0x00000100, 1268 0x3c258, 0xffffffff, 0x00000100, 1269 0x3c25c, 0xffffffff, 0x00000100, 1270 0x3c260, 0xffffffff, 0x00000100, 1271 0x3c27c, 0xffffffff, 0x00000100, 1272 0x3c278, 0xffffffff, 0x00000100, 1273 0x3c210, 0xffffffff, 0x06000100, 1274 0x3c290, 0xffffffff, 0x00000100, 1275 0x3c274, 0xffffffff, 0x00000100, 1276 0x3c2b4, 0xffffffff, 0x00000100, 1277 0x3c2b0, 0xffffffff, 0x00000100, 1278 0x3c270, 0xffffffff, 0x00000100, 1279 0x30800, 0xffffffff, 0xe0000000, 1280 0x3c020, 0xffffffff, 0x00010000, 1281 0x3c024, 0xffffffff, 0x00030002, 1282 0x3c028, 0xffffffff, 0x00040007, 1283 0x3c02c, 0xffffffff, 0x00060005, 1284 0x3c030, 0xffffffff, 0x00090008, 1285 0x3c034, 0xffffffff, 0x00010000, 1286 0x3c038, 0xffffffff, 0x00030002, 1287 0x3c03c, 0xffffffff, 0x00040007, 1288 0x3c040, 0xffffffff, 0x00060005, 1289 0x3c044, 0xffffffff, 0x00090008, 1290 0x3c000, 0xffffffff, 0x96e00200, 1291 0x8708, 0xffffffff, 0x00900100, 1292 0xc424, 0xffffffff, 0x0020003f, 1293 0x38, 0xffffffff, 0x0140001c, 1294 0x3c, 0x000f0000, 0x000f0000, 1295 0x220, 0xffffffff, 0xC060000C, 1296 0x224, 0xc0000fff, 0x00000100, 1297 0x20a8, 0xffffffff, 0x00000104, 1298 0x55e4, 0xff000fff, 0x00000100, 1299 0x30cc, 0xc0000fff, 0x00000104, 1300 0xc1e4, 0x00000001, 0x00000001, 1301 0xd00c, 0xff000ff0, 0x00000100, 1302 0xd80c, 0xff000ff0, 0x00000100 1303 }; 1304 1305 static void cik_init_golden_registers(struct radeon_device *rdev) 1306 { 1307 switch (rdev->family) { 1308 case CHIP_BONAIRE: 1309 radeon_program_register_sequence(rdev, 1310 bonaire_mgcg_cgcg_init, 1311 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 1312 radeon_program_register_sequence(rdev, 1313 bonaire_golden_registers, 1314 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 1315 radeon_program_register_sequence(rdev, 1316 bonaire_golden_common_registers, 1317 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 1318 radeon_program_register_sequence(rdev, 1319 bonaire_golden_spm_registers, 1320 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 1321 break; 1322 case CHIP_KABINI: 1323 radeon_program_register_sequence(rdev, 1324 kalindi_mgcg_cgcg_init, 1325 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1326 radeon_program_register_sequence(rdev, 1327 kalindi_golden_registers, 1328 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 1329 radeon_program_register_sequence(rdev, 1330 kalindi_golden_common_registers, 1331 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1332 radeon_program_register_sequence(rdev, 1333 kalindi_golden_spm_registers, 1334 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1335 break; 1336 case CHIP_KAVERI: 1337 radeon_program_register_sequence(rdev, 1338 spectre_mgcg_cgcg_init, 1339 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 1340 radeon_program_register_sequence(rdev, 1341 spectre_golden_registers, 1342 (const u32)ARRAY_SIZE(spectre_golden_registers)); 1343 radeon_program_register_sequence(rdev, 1344 spectre_golden_common_registers, 1345 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 1346 radeon_program_register_sequence(rdev, 1347 spectre_golden_spm_registers, 1348 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 1349 break; 1350 default: 1351 break; 1352 } 1353 } 1354 1355 /** 1356 * cik_get_xclk - get the xclk 1357 * 1358 * @rdev: radeon_device pointer 1359 * 1360 * Returns the reference clock used by the gfx engine 1361 * (CIK). 1362 */ 1363 u32 cik_get_xclk(struct radeon_device *rdev) 1364 { 1365 u32 reference_clock = rdev->clock.spll.reference_freq; 1366 1367 if (rdev->flags & RADEON_IS_IGP) { 1368 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 1369 return reference_clock / 2; 1370 } else { 1371 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 1372 return reference_clock / 4; 1373 } 1374 return reference_clock; 1375 } 1376 1377 /** 1378 * cik_mm_rdoorbell - read a doorbell dword 1379 * 1380 * @rdev: radeon_device pointer 1381 * @offset: byte offset into the aperture 1382 * 1383 * Returns the value in the doorbell aperture at the 1384 * requested offset (CIK). 1385 */ 1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) 1387 { 1388 if (offset < rdev->doorbell.size) { 1389 return readl(((void __iomem *)rdev->doorbell.ptr) + offset); 1390 } else { 1391 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); 1392 return 0; 1393 } 1394 } 1395 1396 /** 1397 * cik_mm_wdoorbell - write a doorbell dword 1398 * 1399 * @rdev: radeon_device pointer 1400 * @offset: byte offset into the aperture 1401 * @v: value to write 1402 * 1403 * Writes @v to the doorbell aperture at the 1404 * requested offset (CIK). 1405 */ 1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) 1407 { 1408 if (offset < rdev->doorbell.size) { 1409 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset); 1410 } else { 1411 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); 1412 } 1413 } 1414 1415 #define BONAIRE_IO_MC_REGS_SIZE 36 1416 1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 1418 { 1419 {0x00000070, 0x04400000}, 1420 {0x00000071, 0x80c01803}, 1421 {0x00000072, 0x00004004}, 1422 {0x00000073, 0x00000100}, 1423 {0x00000074, 0x00ff0000}, 1424 {0x00000075, 0x34000000}, 1425 {0x00000076, 0x08000014}, 1426 {0x00000077, 0x00cc08ec}, 1427 {0x00000078, 0x00000400}, 1428 {0x00000079, 0x00000000}, 1429 {0x0000007a, 0x04090000}, 1430 {0x0000007c, 0x00000000}, 1431 {0x0000007e, 0x4408a8e8}, 1432 {0x0000007f, 0x00000304}, 1433 {0x00000080, 0x00000000}, 1434 {0x00000082, 0x00000001}, 1435 {0x00000083, 0x00000002}, 1436 {0x00000084, 0xf3e4f400}, 1437 {0x00000085, 0x052024e3}, 1438 {0x00000087, 0x00000000}, 1439 {0x00000088, 0x01000000}, 1440 {0x0000008a, 0x1c0a0000}, 1441 {0x0000008b, 0xff010000}, 1442 {0x0000008d, 0xffffefff}, 1443 {0x0000008e, 0xfff3efff}, 1444 {0x0000008f, 0xfff3efbf}, 1445 {0x00000092, 0xf7ffffff}, 1446 {0x00000093, 0xffffff7f}, 1447 {0x00000095, 0x00101101}, 1448 {0x00000096, 0x00000fff}, 1449 {0x00000097, 0x00116fff}, 1450 {0x00000098, 0x60010000}, 1451 {0x00000099, 0x10010000}, 1452 {0x0000009a, 0x00006000}, 1453 {0x0000009b, 0x00001000}, 1454 {0x0000009f, 0x00b48000} 1455 }; 1456 1457 /** 1458 * cik_srbm_select - select specific register instances 1459 * 1460 * @rdev: radeon_device pointer 1461 * @me: selected ME (micro engine) 1462 * @pipe: pipe 1463 * @queue: queue 1464 * @vmid: VMID 1465 * 1466 * Switches the currently active registers instances. Some 1467 * registers are instanced per VMID, others are instanced per 1468 * me/pipe/queue combination. 1469 */ 1470 static void cik_srbm_select(struct radeon_device *rdev, 1471 u32 me, u32 pipe, u32 queue, u32 vmid) 1472 { 1473 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 1474 MEID(me & 0x3) | 1475 VMID(vmid & 0xf) | 1476 QUEUEID(queue & 0x7)); 1477 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 1478 } 1479 1480 /* ucode loading */ 1481 /** 1482 * ci_mc_load_microcode - load MC ucode into the hw 1483 * 1484 * @rdev: radeon_device pointer 1485 * 1486 * Load the GDDR MC ucode into the hw (CIK). 1487 * Returns 0 on success, error on failure. 1488 */ 1489 static int ci_mc_load_microcode(struct radeon_device *rdev) 1490 { 1491 const __be32 *fw_data; 1492 u32 running, blackout = 0; 1493 u32 *io_mc_regs; 1494 int i, ucode_size, regs_size; 1495 1496 if (!rdev->mc_fw) 1497 return -EINVAL; 1498 1499 switch (rdev->family) { 1500 case CHIP_BONAIRE: 1501 default: 1502 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 1503 ucode_size = CIK_MC_UCODE_SIZE; 1504 regs_size = BONAIRE_IO_MC_REGS_SIZE; 1505 break; 1506 } 1507 1508 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 1509 1510 if (running == 0) { 1511 if (running) { 1512 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); 1513 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); 1514 } 1515 1516 /* reset the engine and set to writable */ 1517 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1518 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 1519 1520 /* load mc io regs */ 1521 for (i = 0; i < regs_size; i++) { 1522 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 1523 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 1524 } 1525 /* load the MC ucode */ 1526 fw_data = (const __be32 *)rdev->mc_fw->data; 1527 for (i = 0; i < ucode_size; i++) 1528 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 1529 1530 /* put the engine back into the active state */ 1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1532 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 1533 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 1534 1535 /* wait for training to complete */ 1536 for (i = 0; i < rdev->usec_timeout; i++) { 1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 1538 break; 1539 udelay(1); 1540 } 1541 for (i = 0; i < rdev->usec_timeout; i++) { 1542 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 1543 break; 1544 udelay(1); 1545 } 1546 1547 if (running) 1548 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); 1549 } 1550 1551 return 0; 1552 } 1553 1554 /** 1555 * cik_init_microcode - load ucode images from disk 1556 * 1557 * @rdev: radeon_device pointer 1558 * 1559 * Use the firmware interface to load the ucode images into 1560 * the driver (not loaded into hw). 1561 * Returns 0 on success, error on failure. 1562 */ 1563 static int cik_init_microcode(struct radeon_device *rdev) 1564 { 1565 const char *chip_name; 1566 size_t pfp_req_size, me_req_size, ce_req_size, 1567 mec_req_size, rlc_req_size, mc_req_size, 1568 sdma_req_size, smc_req_size; 1569 char fw_name[30]; 1570 int err; 1571 1572 DRM_DEBUG("\n"); 1573 1574 switch (rdev->family) { 1575 case CHIP_BONAIRE: 1576 chip_name = "BONAIRE"; 1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1578 me_req_size = CIK_ME_UCODE_SIZE * 4; 1579 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1581 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1582 mc_req_size = CIK_MC_UCODE_SIZE * 4; 1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1584 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); 1585 break; 1586 case CHIP_KAVERI: 1587 chip_name = "KAVERI"; 1588 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1589 me_req_size = CIK_ME_UCODE_SIZE * 4; 1590 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1591 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1592 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 1593 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1594 break; 1595 case CHIP_KABINI: 1596 chip_name = "KABINI"; 1597 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1598 me_req_size = CIK_ME_UCODE_SIZE * 4; 1599 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1600 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1601 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 1602 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1603 break; 1604 default: BUG(); 1605 } 1606 1607 DRM_INFO("Loading %s Microcode\n", chip_name); 1608 1609 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); 1610 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 1611 if (err) 1612 goto out; 1613 if (rdev->pfp_fw->size != pfp_req_size) { 1614 printk(KERN_ERR 1615 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1616 rdev->pfp_fw->size, fw_name); 1617 err = -EINVAL; 1618 goto out; 1619 } 1620 1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); 1622 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 1623 if (err) 1624 goto out; 1625 if (rdev->me_fw->size != me_req_size) { 1626 printk(KERN_ERR 1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1628 rdev->me_fw->size, fw_name); 1629 err = -EINVAL; 1630 } 1631 1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); 1633 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 1634 if (err) 1635 goto out; 1636 if (rdev->ce_fw->size != ce_req_size) { 1637 printk(KERN_ERR 1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1639 rdev->ce_fw->size, fw_name); 1640 err = -EINVAL; 1641 } 1642 1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name); 1644 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 1645 if (err) 1646 goto out; 1647 if (rdev->mec_fw->size != mec_req_size) { 1648 printk(KERN_ERR 1649 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1650 rdev->mec_fw->size, fw_name); 1651 err = -EINVAL; 1652 } 1653 1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); 1655 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 1656 if (err) 1657 goto out; 1658 if (rdev->rlc_fw->size != rlc_req_size) { 1659 printk(KERN_ERR 1660 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 1661 rdev->rlc_fw->size, fw_name); 1662 err = -EINVAL; 1663 } 1664 1665 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); 1666 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 1667 if (err) 1668 goto out; 1669 if (rdev->sdma_fw->size != sdma_req_size) { 1670 printk(KERN_ERR 1671 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 1672 rdev->sdma_fw->size, fw_name); 1673 err = -EINVAL; 1674 } 1675 1676 /* No SMC, MC ucode on APUs */ 1677 if (!(rdev->flags & RADEON_IS_IGP)) { 1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); 1679 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 1680 if (err) 1681 goto out; 1682 if (rdev->mc_fw->size != mc_req_size) { 1683 printk(KERN_ERR 1684 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 1685 rdev->mc_fw->size, fw_name); 1686 err = -EINVAL; 1687 } 1688 1689 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); 1690 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); 1691 if (err) { 1692 printk(KERN_ERR 1693 "smc: error loading firmware \"%s\"\n", 1694 fw_name); 1695 release_firmware(rdev->smc_fw); 1696 rdev->smc_fw = NULL; 1697 err = 0; 1698 } else if (rdev->smc_fw->size != smc_req_size) { 1699 printk(KERN_ERR 1700 "cik_smc: Bogus length %zu in firmware \"%s\"\n", 1701 rdev->smc_fw->size, fw_name); 1702 err = -EINVAL; 1703 } 1704 } 1705 1706 out: 1707 if (err) { 1708 if (err != -EINVAL) 1709 printk(KERN_ERR 1710 "cik_cp: Failed to load firmware \"%s\"\n", 1711 fw_name); 1712 release_firmware(rdev->pfp_fw); 1713 rdev->pfp_fw = NULL; 1714 release_firmware(rdev->me_fw); 1715 rdev->me_fw = NULL; 1716 release_firmware(rdev->ce_fw); 1717 rdev->ce_fw = NULL; 1718 release_firmware(rdev->rlc_fw); 1719 rdev->rlc_fw = NULL; 1720 release_firmware(rdev->mc_fw); 1721 rdev->mc_fw = NULL; 1722 release_firmware(rdev->smc_fw); 1723 rdev->smc_fw = NULL; 1724 } 1725 return err; 1726 } 1727 1728 /* 1729 * Core functions 1730 */ 1731 /** 1732 * cik_tiling_mode_table_init - init the hw tiling table 1733 * 1734 * @rdev: radeon_device pointer 1735 * 1736 * Starting with SI, the tiling setup is done globally in a 1737 * set of 32 tiling modes. Rather than selecting each set of 1738 * parameters per surface as on older asics, we just select 1739 * which index in the tiling table we want to use, and the 1740 * surface uses those parameters (CIK). 1741 */ 1742 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 1743 { 1744 const u32 num_tile_mode_states = 32; 1745 const u32 num_secondary_tile_mode_states = 16; 1746 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 1747 u32 num_pipe_configs; 1748 u32 num_rbs = rdev->config.cik.max_backends_per_se * 1749 rdev->config.cik.max_shader_engines; 1750 1751 switch (rdev->config.cik.mem_row_size_in_kb) { 1752 case 1: 1753 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 1754 break; 1755 case 2: 1756 default: 1757 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 1758 break; 1759 case 4: 1760 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 1761 break; 1762 } 1763 1764 num_pipe_configs = rdev->config.cik.max_tile_pipes; 1765 if (num_pipe_configs > 8) 1766 num_pipe_configs = 8; /* ??? */ 1767 1768 if (num_pipe_configs == 8) { 1769 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1770 switch (reg_offset) { 1771 case 0: 1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1773 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1776 break; 1777 case 1: 1778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1781 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1782 break; 1783 case 2: 1784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1788 break; 1789 case 3: 1790 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1792 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1793 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1794 break; 1795 case 4: 1796 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1798 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1799 TILE_SPLIT(split_equal_to_row_size)); 1800 break; 1801 case 5: 1802 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1804 break; 1805 case 6: 1806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1810 break; 1811 case 7: 1812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1815 TILE_SPLIT(split_equal_to_row_size)); 1816 break; 1817 case 8: 1818 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1819 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 1820 break; 1821 case 9: 1822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1823 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1824 break; 1825 case 10: 1826 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1830 break; 1831 case 11: 1832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1834 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1836 break; 1837 case 12: 1838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1842 break; 1843 case 13: 1844 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1846 break; 1847 case 14: 1848 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1852 break; 1853 case 16: 1854 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1856 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1858 break; 1859 case 17: 1860 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1862 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1864 break; 1865 case 27: 1866 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1867 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1868 break; 1869 case 28: 1870 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1874 break; 1875 case 29: 1876 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1877 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1878 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1880 break; 1881 case 30: 1882 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1883 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1884 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1886 break; 1887 default: 1888 gb_tile_moden = 0; 1889 break; 1890 } 1891 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1892 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1893 } 1894 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1895 switch (reg_offset) { 1896 case 0: 1897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1900 NUM_BANKS(ADDR_SURF_16_BANK)); 1901 break; 1902 case 1: 1903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1906 NUM_BANKS(ADDR_SURF_16_BANK)); 1907 break; 1908 case 2: 1909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1912 NUM_BANKS(ADDR_SURF_16_BANK)); 1913 break; 1914 case 3: 1915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1918 NUM_BANKS(ADDR_SURF_16_BANK)); 1919 break; 1920 case 4: 1921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1924 NUM_BANKS(ADDR_SURF_8_BANK)); 1925 break; 1926 case 5: 1927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1930 NUM_BANKS(ADDR_SURF_4_BANK)); 1931 break; 1932 case 6: 1933 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1936 NUM_BANKS(ADDR_SURF_2_BANK)); 1937 break; 1938 case 8: 1939 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1942 NUM_BANKS(ADDR_SURF_16_BANK)); 1943 break; 1944 case 9: 1945 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1948 NUM_BANKS(ADDR_SURF_16_BANK)); 1949 break; 1950 case 10: 1951 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1954 NUM_BANKS(ADDR_SURF_16_BANK)); 1955 break; 1956 case 11: 1957 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1960 NUM_BANKS(ADDR_SURF_16_BANK)); 1961 break; 1962 case 12: 1963 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1966 NUM_BANKS(ADDR_SURF_8_BANK)); 1967 break; 1968 case 13: 1969 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1972 NUM_BANKS(ADDR_SURF_4_BANK)); 1973 break; 1974 case 14: 1975 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1978 NUM_BANKS(ADDR_SURF_2_BANK)); 1979 break; 1980 default: 1981 gb_tile_moden = 0; 1982 break; 1983 } 1984 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1985 } 1986 } else if (num_pipe_configs == 4) { 1987 if (num_rbs == 4) { 1988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1989 switch (reg_offset) { 1990 case 0: 1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1995 break; 1996 case 1: 1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1999 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2001 break; 2002 case 2: 2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2007 break; 2008 case 3: 2009 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2011 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2012 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2013 break; 2014 case 4: 2015 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2017 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2018 TILE_SPLIT(split_equal_to_row_size)); 2019 break; 2020 case 5: 2021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2023 break; 2024 case 6: 2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2029 break; 2030 case 7: 2031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2033 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2034 TILE_SPLIT(split_equal_to_row_size)); 2035 break; 2036 case 8: 2037 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2038 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2039 break; 2040 case 9: 2041 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2043 break; 2044 case 10: 2045 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2049 break; 2050 case 11: 2051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2053 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2055 break; 2056 case 12: 2057 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2059 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2061 break; 2062 case 13: 2063 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2065 break; 2066 case 14: 2067 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2069 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2071 break; 2072 case 16: 2073 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2075 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2077 break; 2078 case 17: 2079 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2081 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2083 break; 2084 case 27: 2085 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2087 break; 2088 case 28: 2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2091 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2093 break; 2094 case 29: 2095 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2097 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2099 break; 2100 case 30: 2101 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2102 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2103 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2105 break; 2106 default: 2107 gb_tile_moden = 0; 2108 break; 2109 } 2110 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2111 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2112 } 2113 } else if (num_rbs < 4) { 2114 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2115 switch (reg_offset) { 2116 case 0: 2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2121 break; 2122 case 1: 2123 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2127 break; 2128 case 2: 2129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2133 break; 2134 case 3: 2135 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2137 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2138 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2139 break; 2140 case 4: 2141 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2143 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2144 TILE_SPLIT(split_equal_to_row_size)); 2145 break; 2146 case 5: 2147 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2149 break; 2150 case 6: 2151 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2155 break; 2156 case 7: 2157 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2159 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2160 TILE_SPLIT(split_equal_to_row_size)); 2161 break; 2162 case 8: 2163 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2164 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 2165 break; 2166 case 9: 2167 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2169 break; 2170 case 10: 2171 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2173 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2175 break; 2176 case 11: 2177 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2178 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2179 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2181 break; 2182 case 12: 2183 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2185 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2187 break; 2188 case 13: 2189 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2190 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2191 break; 2192 case 14: 2193 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2195 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2197 break; 2198 case 16: 2199 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2201 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2203 break; 2204 case 17: 2205 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2206 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2207 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2209 break; 2210 case 27: 2211 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2212 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2213 break; 2214 case 28: 2215 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2217 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2219 break; 2220 case 29: 2221 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2222 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2223 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2225 break; 2226 case 30: 2227 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2229 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2231 break; 2232 default: 2233 gb_tile_moden = 0; 2234 break; 2235 } 2236 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2237 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2238 } 2239 } 2240 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2241 switch (reg_offset) { 2242 case 0: 2243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2246 NUM_BANKS(ADDR_SURF_16_BANK)); 2247 break; 2248 case 1: 2249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2252 NUM_BANKS(ADDR_SURF_16_BANK)); 2253 break; 2254 case 2: 2255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2258 NUM_BANKS(ADDR_SURF_16_BANK)); 2259 break; 2260 case 3: 2261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2264 NUM_BANKS(ADDR_SURF_16_BANK)); 2265 break; 2266 case 4: 2267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2270 NUM_BANKS(ADDR_SURF_16_BANK)); 2271 break; 2272 case 5: 2273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2276 NUM_BANKS(ADDR_SURF_8_BANK)); 2277 break; 2278 case 6: 2279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2282 NUM_BANKS(ADDR_SURF_4_BANK)); 2283 break; 2284 case 8: 2285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2288 NUM_BANKS(ADDR_SURF_16_BANK)); 2289 break; 2290 case 9: 2291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2294 NUM_BANKS(ADDR_SURF_16_BANK)); 2295 break; 2296 case 10: 2297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2300 NUM_BANKS(ADDR_SURF_16_BANK)); 2301 break; 2302 case 11: 2303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2306 NUM_BANKS(ADDR_SURF_16_BANK)); 2307 break; 2308 case 12: 2309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2312 NUM_BANKS(ADDR_SURF_16_BANK)); 2313 break; 2314 case 13: 2315 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2318 NUM_BANKS(ADDR_SURF_8_BANK)); 2319 break; 2320 case 14: 2321 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2324 NUM_BANKS(ADDR_SURF_4_BANK)); 2325 break; 2326 default: 2327 gb_tile_moden = 0; 2328 break; 2329 } 2330 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2331 } 2332 } else if (num_pipe_configs == 2) { 2333 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2334 switch (reg_offset) { 2335 case 0: 2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2338 PIPE_CONFIG(ADDR_SURF_P2) | 2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2340 break; 2341 case 1: 2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2344 PIPE_CONFIG(ADDR_SURF_P2) | 2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2346 break; 2347 case 2: 2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2350 PIPE_CONFIG(ADDR_SURF_P2) | 2351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2352 break; 2353 case 3: 2354 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2356 PIPE_CONFIG(ADDR_SURF_P2) | 2357 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2358 break; 2359 case 4: 2360 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2362 PIPE_CONFIG(ADDR_SURF_P2) | 2363 TILE_SPLIT(split_equal_to_row_size)); 2364 break; 2365 case 5: 2366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2368 break; 2369 case 6: 2370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2372 PIPE_CONFIG(ADDR_SURF_P2) | 2373 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2374 break; 2375 case 7: 2376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2377 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2378 PIPE_CONFIG(ADDR_SURF_P2) | 2379 TILE_SPLIT(split_equal_to_row_size)); 2380 break; 2381 case 8: 2382 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); 2383 break; 2384 case 9: 2385 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2387 break; 2388 case 10: 2389 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2391 PIPE_CONFIG(ADDR_SURF_P2) | 2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2393 break; 2394 case 11: 2395 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2397 PIPE_CONFIG(ADDR_SURF_P2) | 2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2399 break; 2400 case 12: 2401 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2403 PIPE_CONFIG(ADDR_SURF_P2) | 2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2405 break; 2406 case 13: 2407 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2409 break; 2410 case 14: 2411 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2413 PIPE_CONFIG(ADDR_SURF_P2) | 2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2415 break; 2416 case 16: 2417 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2419 PIPE_CONFIG(ADDR_SURF_P2) | 2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2421 break; 2422 case 17: 2423 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2425 PIPE_CONFIG(ADDR_SURF_P2) | 2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2427 break; 2428 case 27: 2429 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2430 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2431 break; 2432 case 28: 2433 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2435 PIPE_CONFIG(ADDR_SURF_P2) | 2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2437 break; 2438 case 29: 2439 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2441 PIPE_CONFIG(ADDR_SURF_P2) | 2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2443 break; 2444 case 30: 2445 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2446 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2447 PIPE_CONFIG(ADDR_SURF_P2) | 2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2449 break; 2450 default: 2451 gb_tile_moden = 0; 2452 break; 2453 } 2454 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2455 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2456 } 2457 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2458 switch (reg_offset) { 2459 case 0: 2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2463 NUM_BANKS(ADDR_SURF_16_BANK)); 2464 break; 2465 case 1: 2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2469 NUM_BANKS(ADDR_SURF_16_BANK)); 2470 break; 2471 case 2: 2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2475 NUM_BANKS(ADDR_SURF_16_BANK)); 2476 break; 2477 case 3: 2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2481 NUM_BANKS(ADDR_SURF_16_BANK)); 2482 break; 2483 case 4: 2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2487 NUM_BANKS(ADDR_SURF_16_BANK)); 2488 break; 2489 case 5: 2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2493 NUM_BANKS(ADDR_SURF_16_BANK)); 2494 break; 2495 case 6: 2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2499 NUM_BANKS(ADDR_SURF_8_BANK)); 2500 break; 2501 case 8: 2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2505 NUM_BANKS(ADDR_SURF_16_BANK)); 2506 break; 2507 case 9: 2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2511 NUM_BANKS(ADDR_SURF_16_BANK)); 2512 break; 2513 case 10: 2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2517 NUM_BANKS(ADDR_SURF_16_BANK)); 2518 break; 2519 case 11: 2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2523 NUM_BANKS(ADDR_SURF_16_BANK)); 2524 break; 2525 case 12: 2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2529 NUM_BANKS(ADDR_SURF_16_BANK)); 2530 break; 2531 case 13: 2532 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2535 NUM_BANKS(ADDR_SURF_16_BANK)); 2536 break; 2537 case 14: 2538 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2541 NUM_BANKS(ADDR_SURF_8_BANK)); 2542 break; 2543 default: 2544 gb_tile_moden = 0; 2545 break; 2546 } 2547 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2548 } 2549 } else 2550 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 2551 } 2552 2553 /** 2554 * cik_select_se_sh - select which SE, SH to address 2555 * 2556 * @rdev: radeon_device pointer 2557 * @se_num: shader engine to address 2558 * @sh_num: sh block to address 2559 * 2560 * Select which SE, SH combinations to address. Certain 2561 * registers are instanced per SE or SH. 0xffffffff means 2562 * broadcast to all SEs or SHs (CIK). 2563 */ 2564 static void cik_select_se_sh(struct radeon_device *rdev, 2565 u32 se_num, u32 sh_num) 2566 { 2567 u32 data = INSTANCE_BROADCAST_WRITES; 2568 2569 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 2570 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 2571 else if (se_num == 0xffffffff) 2572 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 2573 else if (sh_num == 0xffffffff) 2574 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 2575 else 2576 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 2577 WREG32(GRBM_GFX_INDEX, data); 2578 } 2579 2580 /** 2581 * cik_create_bitmask - create a bitmask 2582 * 2583 * @bit_width: length of the mask 2584 * 2585 * create a variable length bit mask (CIK). 2586 * Returns the bitmask. 2587 */ 2588 static u32 cik_create_bitmask(u32 bit_width) 2589 { 2590 u32 i, mask = 0; 2591 2592 for (i = 0; i < bit_width; i++) { 2593 mask <<= 1; 2594 mask |= 1; 2595 } 2596 return mask; 2597 } 2598 2599 /** 2600 * cik_select_se_sh - select which SE, SH to address 2601 * 2602 * @rdev: radeon_device pointer 2603 * @max_rb_num: max RBs (render backends) for the asic 2604 * @se_num: number of SEs (shader engines) for the asic 2605 * @sh_per_se: number of SH blocks per SE for the asic 2606 * 2607 * Calculates the bitmask of disabled RBs (CIK). 2608 * Returns the disabled RB bitmask. 2609 */ 2610 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 2611 u32 max_rb_num, u32 se_num, 2612 u32 sh_per_se) 2613 { 2614 u32 data, mask; 2615 2616 data = RREG32(CC_RB_BACKEND_DISABLE); 2617 if (data & 1) 2618 data &= BACKEND_DISABLE_MASK; 2619 else 2620 data = 0; 2621 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 2622 2623 data >>= BACKEND_DISABLE_SHIFT; 2624 2625 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se); 2626 2627 return data & mask; 2628 } 2629 2630 /** 2631 * cik_setup_rb - setup the RBs on the asic 2632 * 2633 * @rdev: radeon_device pointer 2634 * @se_num: number of SEs (shader engines) for the asic 2635 * @sh_per_se: number of SH blocks per SE for the asic 2636 * @max_rb_num: max RBs (render backends) for the asic 2637 * 2638 * Configures per-SE/SH RB registers (CIK). 2639 */ 2640 static void cik_setup_rb(struct radeon_device *rdev, 2641 u32 se_num, u32 sh_per_se, 2642 u32 max_rb_num) 2643 { 2644 int i, j; 2645 u32 data, mask; 2646 u32 disabled_rbs = 0; 2647 u32 enabled_rbs = 0; 2648 2649 for (i = 0; i < se_num; i++) { 2650 for (j = 0; j < sh_per_se; j++) { 2651 cik_select_se_sh(rdev, i, j); 2652 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); 2653 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 2654 } 2655 } 2656 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2657 2658 mask = 1; 2659 for (i = 0; i < max_rb_num; i++) { 2660 if (!(disabled_rbs & mask)) 2661 enabled_rbs |= mask; 2662 mask <<= 1; 2663 } 2664 2665 for (i = 0; i < se_num; i++) { 2666 cik_select_se_sh(rdev, i, 0xffffffff); 2667 data = 0; 2668 for (j = 0; j < sh_per_se; j++) { 2669 switch (enabled_rbs & 3) { 2670 case 1: 2671 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 2672 break; 2673 case 2: 2674 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 2675 break; 2676 case 3: 2677 default: 2678 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 2679 break; 2680 } 2681 enabled_rbs >>= 2; 2682 } 2683 WREG32(PA_SC_RASTER_CONFIG, data); 2684 } 2685 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2686 } 2687 2688 /** 2689 * cik_gpu_init - setup the 3D engine 2690 * 2691 * @rdev: radeon_device pointer 2692 * 2693 * Configures the 3D engine and tiling configuration 2694 * registers so that the 3D engine is usable. 2695 */ 2696 static void cik_gpu_init(struct radeon_device *rdev) 2697 { 2698 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 2699 u32 mc_shared_chmap, mc_arb_ramcfg; 2700 u32 hdp_host_path_cntl; 2701 u32 tmp; 2702 int i, j; 2703 2704 switch (rdev->family) { 2705 case CHIP_BONAIRE: 2706 rdev->config.cik.max_shader_engines = 2; 2707 rdev->config.cik.max_tile_pipes = 4; 2708 rdev->config.cik.max_cu_per_sh = 7; 2709 rdev->config.cik.max_sh_per_se = 1; 2710 rdev->config.cik.max_backends_per_se = 2; 2711 rdev->config.cik.max_texture_channel_caches = 4; 2712 rdev->config.cik.max_gprs = 256; 2713 rdev->config.cik.max_gs_threads = 32; 2714 rdev->config.cik.max_hw_contexts = 8; 2715 2716 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2717 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2718 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2719 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2720 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2721 break; 2722 case CHIP_KAVERI: 2723 rdev->config.cik.max_shader_engines = 1; 2724 rdev->config.cik.max_tile_pipes = 4; 2725 if ((rdev->pdev->device == 0x1304) || 2726 (rdev->pdev->device == 0x1305) || 2727 (rdev->pdev->device == 0x130C) || 2728 (rdev->pdev->device == 0x130F) || 2729 (rdev->pdev->device == 0x1310) || 2730 (rdev->pdev->device == 0x1311) || 2731 (rdev->pdev->device == 0x131C)) { 2732 rdev->config.cik.max_cu_per_sh = 8; 2733 rdev->config.cik.max_backends_per_se = 2; 2734 } else if ((rdev->pdev->device == 0x1309) || 2735 (rdev->pdev->device == 0x130A) || 2736 (rdev->pdev->device == 0x130D) || 2737 (rdev->pdev->device == 0x1313) || 2738 (rdev->pdev->device == 0x131D)) { 2739 rdev->config.cik.max_cu_per_sh = 6; 2740 rdev->config.cik.max_backends_per_se = 2; 2741 } else if ((rdev->pdev->device == 0x1306) || 2742 (rdev->pdev->device == 0x1307) || 2743 (rdev->pdev->device == 0x130B) || 2744 (rdev->pdev->device == 0x130E) || 2745 (rdev->pdev->device == 0x1315) || 2746 (rdev->pdev->device == 0x131B)) { 2747 rdev->config.cik.max_cu_per_sh = 4; 2748 rdev->config.cik.max_backends_per_se = 1; 2749 } else { 2750 rdev->config.cik.max_cu_per_sh = 3; 2751 rdev->config.cik.max_backends_per_se = 1; 2752 } 2753 rdev->config.cik.max_sh_per_se = 1; 2754 rdev->config.cik.max_texture_channel_caches = 4; 2755 rdev->config.cik.max_gprs = 256; 2756 rdev->config.cik.max_gs_threads = 16; 2757 rdev->config.cik.max_hw_contexts = 8; 2758 2759 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2760 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2761 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2762 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2763 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2764 break; 2765 case CHIP_KABINI: 2766 default: 2767 rdev->config.cik.max_shader_engines = 1; 2768 rdev->config.cik.max_tile_pipes = 2; 2769 rdev->config.cik.max_cu_per_sh = 2; 2770 rdev->config.cik.max_sh_per_se = 1; 2771 rdev->config.cik.max_backends_per_se = 1; 2772 rdev->config.cik.max_texture_channel_caches = 2; 2773 rdev->config.cik.max_gprs = 256; 2774 rdev->config.cik.max_gs_threads = 16; 2775 rdev->config.cik.max_hw_contexts = 8; 2776 2777 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2778 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2779 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2780 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2781 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2782 break; 2783 } 2784 2785 /* Initialize HDP */ 2786 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 2787 WREG32((0x2c14 + j), 0x00000000); 2788 WREG32((0x2c18 + j), 0x00000000); 2789 WREG32((0x2c1c + j), 0x00000000); 2790 WREG32((0x2c20 + j), 0x00000000); 2791 WREG32((0x2c24 + j), 0x00000000); 2792 } 2793 2794 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 2795 2796 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 2797 2798 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 2799 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 2800 2801 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 2802 rdev->config.cik.mem_max_burst_length_bytes = 256; 2803 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 2804 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 2805 if (rdev->config.cik.mem_row_size_in_kb > 4) 2806 rdev->config.cik.mem_row_size_in_kb = 4; 2807 /* XXX use MC settings? */ 2808 rdev->config.cik.shader_engine_tile_size = 32; 2809 rdev->config.cik.num_gpus = 1; 2810 rdev->config.cik.multi_gpu_tile_size = 64; 2811 2812 /* fix up row size */ 2813 gb_addr_config &= ~ROW_SIZE_MASK; 2814 switch (rdev->config.cik.mem_row_size_in_kb) { 2815 case 1: 2816 default: 2817 gb_addr_config |= ROW_SIZE(0); 2818 break; 2819 case 2: 2820 gb_addr_config |= ROW_SIZE(1); 2821 break; 2822 case 4: 2823 gb_addr_config |= ROW_SIZE(2); 2824 break; 2825 } 2826 2827 /* setup tiling info dword. gb_addr_config is not adequate since it does 2828 * not have bank info, so create a custom tiling dword. 2829 * bits 3:0 num_pipes 2830 * bits 7:4 num_banks 2831 * bits 11:8 group_size 2832 * bits 15:12 row_size 2833 */ 2834 rdev->config.cik.tile_config = 0; 2835 switch (rdev->config.cik.num_tile_pipes) { 2836 case 1: 2837 rdev->config.cik.tile_config |= (0 << 0); 2838 break; 2839 case 2: 2840 rdev->config.cik.tile_config |= (1 << 0); 2841 break; 2842 case 4: 2843 rdev->config.cik.tile_config |= (2 << 0); 2844 break; 2845 case 8: 2846 default: 2847 /* XXX what about 12? */ 2848 rdev->config.cik.tile_config |= (3 << 0); 2849 break; 2850 } 2851 rdev->config.cik.tile_config |= 2852 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; 2853 rdev->config.cik.tile_config |= 2854 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 2855 rdev->config.cik.tile_config |= 2856 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 2857 2858 WREG32(GB_ADDR_CONFIG, gb_addr_config); 2859 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 2860 WREG32(DMIF_ADDR_CALC, gb_addr_config); 2861 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 2862 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 2863 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 2864 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 2865 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 2866 2867 cik_tiling_mode_table_init(rdev); 2868 2869 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 2870 rdev->config.cik.max_sh_per_se, 2871 rdev->config.cik.max_backends_per_se); 2872 2873 /* set HW defaults for 3D engine */ 2874 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 2875 2876 WREG32(SX_DEBUG_1, 0x20); 2877 2878 WREG32(TA_CNTL_AUX, 0x00010000); 2879 2880 tmp = RREG32(SPI_CONFIG_CNTL); 2881 tmp |= 0x03000000; 2882 WREG32(SPI_CONFIG_CNTL, tmp); 2883 2884 WREG32(SQ_CONFIG, 1); 2885 2886 WREG32(DB_DEBUG, 0); 2887 2888 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 2889 tmp |= 0x00000400; 2890 WREG32(DB_DEBUG2, tmp); 2891 2892 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 2893 tmp |= 0x00020200; 2894 WREG32(DB_DEBUG3, tmp); 2895 2896 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 2897 tmp |= 0x00018208; 2898 WREG32(CB_HW_CONTROL, tmp); 2899 2900 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 2901 2902 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 2903 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 2904 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 2905 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 2906 2907 WREG32(VGT_NUM_INSTANCES, 1); 2908 2909 WREG32(CP_PERFMON_CNTL, 0); 2910 2911 WREG32(SQ_CONFIG, 0); 2912 2913 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 2914 FORCE_EOV_MAX_REZ_CNT(255))); 2915 2916 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 2917 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 2918 2919 WREG32(VGT_GS_VERTEX_REUSE, 16); 2920 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 2921 2922 tmp = RREG32(HDP_MISC_CNTL); 2923 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 2924 WREG32(HDP_MISC_CNTL, tmp); 2925 2926 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 2927 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 2928 2929 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 2930 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 2931 2932 udelay(50); 2933 } 2934 2935 /* 2936 * GPU scratch registers helpers function. 2937 */ 2938 /** 2939 * cik_scratch_init - setup driver info for CP scratch regs 2940 * 2941 * @rdev: radeon_device pointer 2942 * 2943 * Set up the number and offset of the CP scratch registers. 2944 * NOTE: use of CP scratch registers is a legacy inferface and 2945 * is not used by default on newer asics (r6xx+). On newer asics, 2946 * memory buffers are used for fences rather than scratch regs. 2947 */ 2948 static void cik_scratch_init(struct radeon_device *rdev) 2949 { 2950 int i; 2951 2952 rdev->scratch.num_reg = 7; 2953 rdev->scratch.reg_base = SCRATCH_REG0; 2954 for (i = 0; i < rdev->scratch.num_reg; i++) { 2955 rdev->scratch.free[i] = true; 2956 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 2957 } 2958 } 2959 2960 /** 2961 * cik_ring_test - basic gfx ring test 2962 * 2963 * @rdev: radeon_device pointer 2964 * @ring: radeon_ring structure holding ring information 2965 * 2966 * Allocate a scratch register and write to it using the gfx ring (CIK). 2967 * Provides a basic gfx ring test to verify that the ring is working. 2968 * Used by cik_cp_gfx_resume(); 2969 * Returns 0 on success, error on failure. 2970 */ 2971 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 2972 { 2973 uint32_t scratch; 2974 uint32_t tmp = 0; 2975 unsigned i; 2976 int r; 2977 2978 r = radeon_scratch_get(rdev, &scratch); 2979 if (r) { 2980 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 2981 return r; 2982 } 2983 WREG32(scratch, 0xCAFEDEAD); 2984 r = radeon_ring_lock(rdev, ring, 3); 2985 if (r) { 2986 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 2987 radeon_scratch_free(rdev, scratch); 2988 return r; 2989 } 2990 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2991 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 2992 radeon_ring_write(ring, 0xDEADBEEF); 2993 radeon_ring_unlock_commit(rdev, ring); 2994 2995 for (i = 0; i < rdev->usec_timeout; i++) { 2996 tmp = RREG32(scratch); 2997 if (tmp == 0xDEADBEEF) 2998 break; 2999 DRM_UDELAY(1); 3000 } 3001 if (i < rdev->usec_timeout) { 3002 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 3003 } else { 3004 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 3005 ring->idx, scratch, tmp); 3006 r = -EINVAL; 3007 } 3008 radeon_scratch_free(rdev, scratch); 3009 return r; 3010 } 3011 3012 /** 3013 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 3014 * 3015 * @rdev: radeon_device pointer 3016 * @fence: radeon fence object 3017 * 3018 * Emits a fence sequnce number on the gfx ring and flushes 3019 * GPU caches. 3020 */ 3021 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 3022 struct radeon_fence *fence) 3023 { 3024 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3025 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3026 3027 /* EVENT_WRITE_EOP - flush caches, send int */ 3028 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 3029 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3030 EOP_TC_ACTION_EN | 3031 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3032 EVENT_INDEX(5))); 3033 radeon_ring_write(ring, addr & 0xfffffffc); 3034 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 3035 radeon_ring_write(ring, fence->seq); 3036 radeon_ring_write(ring, 0); 3037 /* HDP flush */ 3038 /* We should be using the new WAIT_REG_MEM special op packet here 3039 * but it causes the CP to hang 3040 */ 3041 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3042 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3043 WRITE_DATA_DST_SEL(0))); 3044 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3045 radeon_ring_write(ring, 0); 3046 radeon_ring_write(ring, 0); 3047 } 3048 3049 /** 3050 * cik_fence_compute_ring_emit - emit a fence on the compute ring 3051 * 3052 * @rdev: radeon_device pointer 3053 * @fence: radeon fence object 3054 * 3055 * Emits a fence sequnce number on the compute ring and flushes 3056 * GPU caches. 3057 */ 3058 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 3059 struct radeon_fence *fence) 3060 { 3061 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3062 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3063 3064 /* RELEASE_MEM - flush caches, send int */ 3065 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 3066 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3067 EOP_TC_ACTION_EN | 3068 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3069 EVENT_INDEX(5))); 3070 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 3071 radeon_ring_write(ring, addr & 0xfffffffc); 3072 radeon_ring_write(ring, upper_32_bits(addr)); 3073 radeon_ring_write(ring, fence->seq); 3074 radeon_ring_write(ring, 0); 3075 /* HDP flush */ 3076 /* We should be using the new WAIT_REG_MEM special op packet here 3077 * but it causes the CP to hang 3078 */ 3079 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3080 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3081 WRITE_DATA_DST_SEL(0))); 3082 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3083 radeon_ring_write(ring, 0); 3084 radeon_ring_write(ring, 0); 3085 } 3086 3087 void cik_semaphore_ring_emit(struct radeon_device *rdev, 3088 struct radeon_ring *ring, 3089 struct radeon_semaphore *semaphore, 3090 bool emit_wait) 3091 { 3092 uint64_t addr = semaphore->gpu_addr; 3093 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 3094 3095 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 3096 radeon_ring_write(ring, addr & 0xffffffff); 3097 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 3098 } 3099 3100 /* 3101 * IB stuff 3102 */ 3103 /** 3104 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 3105 * 3106 * @rdev: radeon_device pointer 3107 * @ib: radeon indirect buffer object 3108 * 3109 * Emits an DE (drawing engine) or CE (constant engine) IB 3110 * on the gfx ring. IBs are usually generated by userspace 3111 * acceleration drivers and submitted to the kernel for 3112 * sheduling on the ring. This function schedules the IB 3113 * on the gfx ring for execution by the GPU. 3114 */ 3115 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3116 { 3117 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3118 u32 header, control = INDIRECT_BUFFER_VALID; 3119 3120 if (ib->is_const_ib) { 3121 /* set switch buffer packet before const IB */ 3122 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3123 radeon_ring_write(ring, 0); 3124 3125 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3126 } else { 3127 u32 next_rptr; 3128 if (ring->rptr_save_reg) { 3129 next_rptr = ring->wptr + 3 + 4; 3130 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3131 radeon_ring_write(ring, ((ring->rptr_save_reg - 3132 PACKET3_SET_UCONFIG_REG_START) >> 2)); 3133 radeon_ring_write(ring, next_rptr); 3134 } else if (rdev->wb.enabled) { 3135 next_rptr = ring->wptr + 5 + 4; 3136 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3137 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 3138 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3139 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 3140 radeon_ring_write(ring, next_rptr); 3141 } 3142 3143 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3144 } 3145 3146 control |= ib->length_dw | 3147 (ib->vm ? (ib->vm->id << 24) : 0); 3148 3149 radeon_ring_write(ring, header); 3150 radeon_ring_write(ring, 3151 #ifdef __BIG_ENDIAN 3152 (2 << 0) | 3153 #endif 3154 (ib->gpu_addr & 0xFFFFFFFC)); 3155 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 3156 radeon_ring_write(ring, control); 3157 } 3158 3159 /** 3160 * cik_ib_test - basic gfx ring IB test 3161 * 3162 * @rdev: radeon_device pointer 3163 * @ring: radeon_ring structure holding ring information 3164 * 3165 * Allocate an IB and execute it on the gfx ring (CIK). 3166 * Provides a basic gfx ring test to verify that IBs are working. 3167 * Returns 0 on success, error on failure. 3168 */ 3169 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3170 { 3171 struct radeon_ib ib; 3172 uint32_t scratch; 3173 uint32_t tmp = 0; 3174 unsigned i; 3175 int r; 3176 3177 r = radeon_scratch_get(rdev, &scratch); 3178 if (r) { 3179 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3180 return r; 3181 } 3182 WREG32(scratch, 0xCAFEDEAD); 3183 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3184 if (r) { 3185 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3186 radeon_scratch_free(rdev, scratch); 3187 return r; 3188 } 3189 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3190 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 3191 ib.ptr[2] = 0xDEADBEEF; 3192 ib.length_dw = 3; 3193 r = radeon_ib_schedule(rdev, &ib, NULL); 3194 if (r) { 3195 radeon_scratch_free(rdev, scratch); 3196 radeon_ib_free(rdev, &ib); 3197 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3198 return r; 3199 } 3200 r = radeon_fence_wait(ib.fence, false); 3201 if (r) { 3202 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3203 radeon_scratch_free(rdev, scratch); 3204 radeon_ib_free(rdev, &ib); 3205 return r; 3206 } 3207 for (i = 0; i < rdev->usec_timeout; i++) { 3208 tmp = RREG32(scratch); 3209 if (tmp == 0xDEADBEEF) 3210 break; 3211 DRM_UDELAY(1); 3212 } 3213 if (i < rdev->usec_timeout) { 3214 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3215 } else { 3216 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3217 scratch, tmp); 3218 r = -EINVAL; 3219 } 3220 radeon_scratch_free(rdev, scratch); 3221 radeon_ib_free(rdev, &ib); 3222 return r; 3223 } 3224 3225 /* 3226 * CP. 3227 * On CIK, gfx and compute now have independant command processors. 3228 * 3229 * GFX 3230 * Gfx consists of a single ring and can process both gfx jobs and 3231 * compute jobs. The gfx CP consists of three microengines (ME): 3232 * PFP - Pre-Fetch Parser 3233 * ME - Micro Engine 3234 * CE - Constant Engine 3235 * The PFP and ME make up what is considered the Drawing Engine (DE). 3236 * The CE is an asynchronous engine used for updating buffer desciptors 3237 * used by the DE so that they can be loaded into cache in parallel 3238 * while the DE is processing state update packets. 3239 * 3240 * Compute 3241 * The compute CP consists of two microengines (ME): 3242 * MEC1 - Compute MicroEngine 1 3243 * MEC2 - Compute MicroEngine 2 3244 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 3245 * The queues are exposed to userspace and are programmed directly 3246 * by the compute runtime. 3247 */ 3248 /** 3249 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 3250 * 3251 * @rdev: radeon_device pointer 3252 * @enable: enable or disable the MEs 3253 * 3254 * Halts or unhalts the gfx MEs. 3255 */ 3256 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 3257 { 3258 if (enable) 3259 WREG32(CP_ME_CNTL, 0); 3260 else { 3261 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 3262 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3263 } 3264 udelay(50); 3265 } 3266 3267 /** 3268 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 3269 * 3270 * @rdev: radeon_device pointer 3271 * 3272 * Loads the gfx PFP, ME, and CE ucode. 3273 * Returns 0 for success, -EINVAL if the ucode is not available. 3274 */ 3275 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 3276 { 3277 const __be32 *fw_data; 3278 int i; 3279 3280 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 3281 return -EINVAL; 3282 3283 cik_cp_gfx_enable(rdev, false); 3284 3285 /* PFP */ 3286 fw_data = (const __be32 *)rdev->pfp_fw->data; 3287 WREG32(CP_PFP_UCODE_ADDR, 0); 3288 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 3289 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 3290 WREG32(CP_PFP_UCODE_ADDR, 0); 3291 3292 /* CE */ 3293 fw_data = (const __be32 *)rdev->ce_fw->data; 3294 WREG32(CP_CE_UCODE_ADDR, 0); 3295 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 3296 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 3297 WREG32(CP_CE_UCODE_ADDR, 0); 3298 3299 /* ME */ 3300 fw_data = (const __be32 *)rdev->me_fw->data; 3301 WREG32(CP_ME_RAM_WADDR, 0); 3302 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 3303 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 3304 WREG32(CP_ME_RAM_WADDR, 0); 3305 3306 WREG32(CP_PFP_UCODE_ADDR, 0); 3307 WREG32(CP_CE_UCODE_ADDR, 0); 3308 WREG32(CP_ME_RAM_WADDR, 0); 3309 WREG32(CP_ME_RAM_RADDR, 0); 3310 return 0; 3311 } 3312 3313 /** 3314 * cik_cp_gfx_start - start the gfx ring 3315 * 3316 * @rdev: radeon_device pointer 3317 * 3318 * Enables the ring and loads the clear state context and other 3319 * packets required to init the ring. 3320 * Returns 0 for success, error for failure. 3321 */ 3322 static int cik_cp_gfx_start(struct radeon_device *rdev) 3323 { 3324 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3325 int r, i; 3326 3327 /* init the CP */ 3328 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 3329 WREG32(CP_ENDIAN_SWAP, 0); 3330 WREG32(CP_DEVICE_ID, 1); 3331 3332 cik_cp_gfx_enable(rdev, true); 3333 3334 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 3335 if (r) { 3336 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3337 return r; 3338 } 3339 3340 /* init the CE partitions. CE only used for gfx on CIK */ 3341 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3342 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3343 radeon_ring_write(ring, 0xc000); 3344 radeon_ring_write(ring, 0xc000); 3345 3346 /* setup clear context state */ 3347 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3348 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3349 3350 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3351 radeon_ring_write(ring, 0x80000000); 3352 radeon_ring_write(ring, 0x80000000); 3353 3354 for (i = 0; i < cik_default_size; i++) 3355 radeon_ring_write(ring, cik_default_state[i]); 3356 3357 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3358 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3359 3360 /* set clear context state */ 3361 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3362 radeon_ring_write(ring, 0); 3363 3364 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 3365 radeon_ring_write(ring, 0x00000316); 3366 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 3367 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 3368 3369 radeon_ring_unlock_commit(rdev, ring); 3370 3371 return 0; 3372 } 3373 3374 /** 3375 * cik_cp_gfx_fini - stop the gfx ring 3376 * 3377 * @rdev: radeon_device pointer 3378 * 3379 * Stop the gfx ring and tear down the driver ring 3380 * info. 3381 */ 3382 static void cik_cp_gfx_fini(struct radeon_device *rdev) 3383 { 3384 cik_cp_gfx_enable(rdev, false); 3385 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3386 } 3387 3388 /** 3389 * cik_cp_gfx_resume - setup the gfx ring buffer registers 3390 * 3391 * @rdev: radeon_device pointer 3392 * 3393 * Program the location and size of the gfx ring buffer 3394 * and test it to make sure it's working. 3395 * Returns 0 for success, error for failure. 3396 */ 3397 static int cik_cp_gfx_resume(struct radeon_device *rdev) 3398 { 3399 struct radeon_ring *ring; 3400 u32 tmp; 3401 u32 rb_bufsz; 3402 u64 rb_addr; 3403 int r; 3404 3405 WREG32(CP_SEM_WAIT_TIMER, 0x0); 3406 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 3407 3408 /* Set the write pointer delay */ 3409 WREG32(CP_RB_WPTR_DELAY, 0); 3410 3411 /* set the RB to use vmid 0 */ 3412 WREG32(CP_RB_VMID, 0); 3413 3414 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 3415 3416 /* ring 0 - compute and gfx */ 3417 /* Set ring buffer size */ 3418 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3419 rb_bufsz = order_base_2(ring->ring_size / 8); 3420 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 3421 #ifdef __BIG_ENDIAN 3422 tmp |= BUF_SWAP_32BIT; 3423 #endif 3424 WREG32(CP_RB0_CNTL, tmp); 3425 3426 /* Initialize the ring buffer's read and write pointers */ 3427 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 3428 ring->wptr = 0; 3429 WREG32(CP_RB0_WPTR, ring->wptr); 3430 3431 /* set the wb address wether it's enabled or not */ 3432 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 3433 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 3434 3435 /* scratch register shadowing is no longer supported */ 3436 WREG32(SCRATCH_UMSK, 0); 3437 3438 if (!rdev->wb.enabled) 3439 tmp |= RB_NO_UPDATE; 3440 3441 mdelay(1); 3442 WREG32(CP_RB0_CNTL, tmp); 3443 3444 rb_addr = ring->gpu_addr >> 8; 3445 WREG32(CP_RB0_BASE, rb_addr); 3446 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3447 3448 ring->rptr = RREG32(CP_RB0_RPTR); 3449 3450 /* start the ring */ 3451 cik_cp_gfx_start(rdev); 3452 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 3453 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3454 if (r) { 3455 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3456 return r; 3457 } 3458 return 0; 3459 } 3460 3461 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, 3462 struct radeon_ring *ring) 3463 { 3464 u32 rptr; 3465 3466 3467 3468 if (rdev->wb.enabled) { 3469 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); 3470 } else { 3471 mutex_lock(&rdev->srbm_mutex); 3472 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3473 rptr = RREG32(CP_HQD_PQ_RPTR); 3474 cik_srbm_select(rdev, 0, 0, 0, 0); 3475 mutex_unlock(&rdev->srbm_mutex); 3476 } 3477 3478 return rptr; 3479 } 3480 3481 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, 3482 struct radeon_ring *ring) 3483 { 3484 u32 wptr; 3485 3486 if (rdev->wb.enabled) { 3487 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); 3488 } else { 3489 mutex_lock(&rdev->srbm_mutex); 3490 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3491 wptr = RREG32(CP_HQD_PQ_WPTR); 3492 cik_srbm_select(rdev, 0, 0, 0, 0); 3493 mutex_unlock(&rdev->srbm_mutex); 3494 } 3495 3496 return wptr; 3497 } 3498 3499 void cik_compute_ring_set_wptr(struct radeon_device *rdev, 3500 struct radeon_ring *ring) 3501 { 3502 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); 3503 WDOORBELL32(ring->doorbell_offset, ring->wptr); 3504 } 3505 3506 /** 3507 * cik_cp_compute_enable - enable/disable the compute CP MEs 3508 * 3509 * @rdev: radeon_device pointer 3510 * @enable: enable or disable the MEs 3511 * 3512 * Halts or unhalts the compute MEs. 3513 */ 3514 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 3515 { 3516 if (enable) 3517 WREG32(CP_MEC_CNTL, 0); 3518 else 3519 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 3520 udelay(50); 3521 } 3522 3523 /** 3524 * cik_cp_compute_load_microcode - load the compute CP ME ucode 3525 * 3526 * @rdev: radeon_device pointer 3527 * 3528 * Loads the compute MEC1&2 ucode. 3529 * Returns 0 for success, -EINVAL if the ucode is not available. 3530 */ 3531 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 3532 { 3533 const __be32 *fw_data; 3534 int i; 3535 3536 if (!rdev->mec_fw) 3537 return -EINVAL; 3538 3539 cik_cp_compute_enable(rdev, false); 3540 3541 /* MEC1 */ 3542 fw_data = (const __be32 *)rdev->mec_fw->data; 3543 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3544 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3545 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 3546 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3547 3548 if (rdev->family == CHIP_KAVERI) { 3549 /* MEC2 */ 3550 fw_data = (const __be32 *)rdev->mec_fw->data; 3551 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3552 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3553 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 3554 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3555 } 3556 3557 return 0; 3558 } 3559 3560 /** 3561 * cik_cp_compute_start - start the compute queues 3562 * 3563 * @rdev: radeon_device pointer 3564 * 3565 * Enable the compute queues. 3566 * Returns 0 for success, error for failure. 3567 */ 3568 static int cik_cp_compute_start(struct radeon_device *rdev) 3569 { 3570 cik_cp_compute_enable(rdev, true); 3571 3572 return 0; 3573 } 3574 3575 /** 3576 * cik_cp_compute_fini - stop the compute queues 3577 * 3578 * @rdev: radeon_device pointer 3579 * 3580 * Stop the compute queues and tear down the driver queue 3581 * info. 3582 */ 3583 static void cik_cp_compute_fini(struct radeon_device *rdev) 3584 { 3585 int i, idx, r; 3586 3587 cik_cp_compute_enable(rdev, false); 3588 3589 for (i = 0; i < 2; i++) { 3590 if (i == 0) 3591 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3592 else 3593 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3594 3595 if (rdev->ring[idx].mqd_obj) { 3596 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3597 if (unlikely(r != 0)) 3598 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 3599 3600 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 3601 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3602 3603 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 3604 rdev->ring[idx].mqd_obj = NULL; 3605 } 3606 } 3607 } 3608 3609 static void cik_mec_fini(struct radeon_device *rdev) 3610 { 3611 int r; 3612 3613 if (rdev->mec.hpd_eop_obj) { 3614 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3615 if (unlikely(r != 0)) 3616 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 3617 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 3618 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3619 3620 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 3621 rdev->mec.hpd_eop_obj = NULL; 3622 } 3623 } 3624 3625 #define MEC_HPD_SIZE 2048 3626 3627 static int cik_mec_init(struct radeon_device *rdev) 3628 { 3629 int r; 3630 u32 *hpd; 3631 3632 /* 3633 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 3634 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 3635 */ 3636 if (rdev->family == CHIP_KAVERI) 3637 rdev->mec.num_mec = 2; 3638 else 3639 rdev->mec.num_mec = 1; 3640 rdev->mec.num_pipe = 4; 3641 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 3642 3643 if (rdev->mec.hpd_eop_obj == NULL) { 3644 r = radeon_bo_create(rdev, 3645 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 3646 PAGE_SIZE, true, 3647 RADEON_GEM_DOMAIN_GTT, NULL, 3648 &rdev->mec.hpd_eop_obj); 3649 if (r) { 3650 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 3651 return r; 3652 } 3653 } 3654 3655 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3656 if (unlikely(r != 0)) { 3657 cik_mec_fini(rdev); 3658 return r; 3659 } 3660 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 3661 &rdev->mec.hpd_eop_gpu_addr); 3662 if (r) { 3663 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 3664 cik_mec_fini(rdev); 3665 return r; 3666 } 3667 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 3668 if (r) { 3669 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 3670 cik_mec_fini(rdev); 3671 return r; 3672 } 3673 3674 /* clear memory. Not sure if this is required or not */ 3675 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 3676 3677 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 3678 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3679 3680 return 0; 3681 } 3682 3683 struct hqd_registers 3684 { 3685 u32 cp_mqd_base_addr; 3686 u32 cp_mqd_base_addr_hi; 3687 u32 cp_hqd_active; 3688 u32 cp_hqd_vmid; 3689 u32 cp_hqd_persistent_state; 3690 u32 cp_hqd_pipe_priority; 3691 u32 cp_hqd_queue_priority; 3692 u32 cp_hqd_quantum; 3693 u32 cp_hqd_pq_base; 3694 u32 cp_hqd_pq_base_hi; 3695 u32 cp_hqd_pq_rptr; 3696 u32 cp_hqd_pq_rptr_report_addr; 3697 u32 cp_hqd_pq_rptr_report_addr_hi; 3698 u32 cp_hqd_pq_wptr_poll_addr; 3699 u32 cp_hqd_pq_wptr_poll_addr_hi; 3700 u32 cp_hqd_pq_doorbell_control; 3701 u32 cp_hqd_pq_wptr; 3702 u32 cp_hqd_pq_control; 3703 u32 cp_hqd_ib_base_addr; 3704 u32 cp_hqd_ib_base_addr_hi; 3705 u32 cp_hqd_ib_rptr; 3706 u32 cp_hqd_ib_control; 3707 u32 cp_hqd_iq_timer; 3708 u32 cp_hqd_iq_rptr; 3709 u32 cp_hqd_dequeue_request; 3710 u32 cp_hqd_dma_offload; 3711 u32 cp_hqd_sema_cmd; 3712 u32 cp_hqd_msg_type; 3713 u32 cp_hqd_atomic0_preop_lo; 3714 u32 cp_hqd_atomic0_preop_hi; 3715 u32 cp_hqd_atomic1_preop_lo; 3716 u32 cp_hqd_atomic1_preop_hi; 3717 u32 cp_hqd_hq_scheduler0; 3718 u32 cp_hqd_hq_scheduler1; 3719 u32 cp_mqd_control; 3720 }; 3721 3722 struct bonaire_mqd 3723 { 3724 u32 header; 3725 u32 dispatch_initiator; 3726 u32 dimensions[3]; 3727 u32 start_idx[3]; 3728 u32 num_threads[3]; 3729 u32 pipeline_stat_enable; 3730 u32 perf_counter_enable; 3731 u32 pgm[2]; 3732 u32 tba[2]; 3733 u32 tma[2]; 3734 u32 pgm_rsrc[2]; 3735 u32 vmid; 3736 u32 resource_limits; 3737 u32 static_thread_mgmt01[2]; 3738 u32 tmp_ring_size; 3739 u32 static_thread_mgmt23[2]; 3740 u32 restart[3]; 3741 u32 thread_trace_enable; 3742 u32 reserved1; 3743 u32 user_data[16]; 3744 u32 vgtcs_invoke_count[2]; 3745 struct hqd_registers queue_state; 3746 u32 dequeue_cntr; 3747 u32 interrupt_queue[64]; 3748 }; 3749 3750 /** 3751 * cik_cp_compute_resume - setup the compute queue registers 3752 * 3753 * @rdev: radeon_device pointer 3754 * 3755 * Program the compute queues and test them to make sure they 3756 * are working. 3757 * Returns 0 for success, error for failure. 3758 */ 3759 static int cik_cp_compute_resume(struct radeon_device *rdev) 3760 { 3761 int r, i, idx; 3762 u32 tmp; 3763 bool use_doorbell = true; 3764 u64 hqd_gpu_addr; 3765 u64 mqd_gpu_addr; 3766 u64 eop_gpu_addr; 3767 u64 wb_gpu_addr; 3768 u32 *buf; 3769 struct bonaire_mqd *mqd; 3770 3771 r = cik_cp_compute_start(rdev); 3772 if (r) 3773 return r; 3774 3775 /* fix up chicken bits */ 3776 tmp = RREG32(CP_CPF_DEBUG); 3777 tmp |= (1 << 23); 3778 WREG32(CP_CPF_DEBUG, tmp); 3779 3780 /* init the pipes */ 3781 mutex_lock(&rdev->srbm_mutex); 3782 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { 3783 int me = (i < 4) ? 1 : 2; 3784 int pipe = (i < 4) ? i : (i - 4); 3785 3786 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 3787 3788 cik_srbm_select(rdev, me, pipe, 0, 0); 3789 3790 /* write the EOP addr */ 3791 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 3792 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 3793 3794 /* set the VMID assigned */ 3795 WREG32(CP_HPD_EOP_VMID, 0); 3796 3797 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3798 tmp = RREG32(CP_HPD_EOP_CONTROL); 3799 tmp &= ~EOP_SIZE_MASK; 3800 tmp |= order_base_2(MEC_HPD_SIZE / 8); 3801 WREG32(CP_HPD_EOP_CONTROL, tmp); 3802 } 3803 cik_srbm_select(rdev, 0, 0, 0, 0); 3804 mutex_unlock(&rdev->srbm_mutex); 3805 3806 /* init the queues. Just two for now. */ 3807 for (i = 0; i < 2; i++) { 3808 if (i == 0) 3809 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3810 else 3811 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3812 3813 if (rdev->ring[idx].mqd_obj == NULL) { 3814 r = radeon_bo_create(rdev, 3815 sizeof(struct bonaire_mqd), 3816 PAGE_SIZE, true, 3817 RADEON_GEM_DOMAIN_GTT, NULL, 3818 &rdev->ring[idx].mqd_obj); 3819 if (r) { 3820 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 3821 return r; 3822 } 3823 } 3824 3825 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3826 if (unlikely(r != 0)) { 3827 cik_cp_compute_fini(rdev); 3828 return r; 3829 } 3830 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 3831 &mqd_gpu_addr); 3832 if (r) { 3833 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 3834 cik_cp_compute_fini(rdev); 3835 return r; 3836 } 3837 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 3838 if (r) { 3839 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 3840 cik_cp_compute_fini(rdev); 3841 return r; 3842 } 3843 3844 /* doorbell offset */ 3845 rdev->ring[idx].doorbell_offset = 3846 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; 3847 3848 /* init the mqd struct */ 3849 memset(buf, 0, sizeof(struct bonaire_mqd)); 3850 3851 mqd = (struct bonaire_mqd *)buf; 3852 mqd->header = 0xC0310800; 3853 mqd->static_thread_mgmt01[0] = 0xffffffff; 3854 mqd->static_thread_mgmt01[1] = 0xffffffff; 3855 mqd->static_thread_mgmt23[0] = 0xffffffff; 3856 mqd->static_thread_mgmt23[1] = 0xffffffff; 3857 3858 mutex_lock(&rdev->srbm_mutex); 3859 cik_srbm_select(rdev, rdev->ring[idx].me, 3860 rdev->ring[idx].pipe, 3861 rdev->ring[idx].queue, 0); 3862 3863 /* disable wptr polling */ 3864 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 3865 tmp &= ~WPTR_POLL_EN; 3866 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 3867 3868 /* enable doorbell? */ 3869 mqd->queue_state.cp_hqd_pq_doorbell_control = 3870 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3871 if (use_doorbell) 3872 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3873 else 3874 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 3875 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3876 mqd->queue_state.cp_hqd_pq_doorbell_control); 3877 3878 /* disable the queue if it's active */ 3879 mqd->queue_state.cp_hqd_dequeue_request = 0; 3880 mqd->queue_state.cp_hqd_pq_rptr = 0; 3881 mqd->queue_state.cp_hqd_pq_wptr= 0; 3882 if (RREG32(CP_HQD_ACTIVE) & 1) { 3883 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 3884 for (i = 0; i < rdev->usec_timeout; i++) { 3885 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 3886 break; 3887 udelay(1); 3888 } 3889 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 3890 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 3891 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3892 } 3893 3894 /* set the pointer to the MQD */ 3895 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 3896 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3897 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 3898 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 3899 /* set MQD vmid to 0 */ 3900 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 3901 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 3902 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 3903 3904 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3905 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 3906 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 3907 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3908 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 3909 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 3910 3911 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3912 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 3913 mqd->queue_state.cp_hqd_pq_control &= 3914 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 3915 3916 mqd->queue_state.cp_hqd_pq_control |= 3917 order_base_2(rdev->ring[idx].ring_size / 8); 3918 mqd->queue_state.cp_hqd_pq_control |= 3919 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8); 3920 #ifdef __BIG_ENDIAN 3921 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 3922 #endif 3923 mqd->queue_state.cp_hqd_pq_control &= 3924 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 3925 mqd->queue_state.cp_hqd_pq_control |= 3926 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 3927 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 3928 3929 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 3930 if (i == 0) 3931 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 3932 else 3933 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 3934 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 3935 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3936 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 3937 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 3938 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 3939 3940 /* set the wb address wether it's enabled or not */ 3941 if (i == 0) 3942 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 3943 else 3944 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 3945 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 3946 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 3947 upper_32_bits(wb_gpu_addr) & 0xffff; 3948 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 3949 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 3950 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3951 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 3952 3953 /* enable the doorbell if requested */ 3954 if (use_doorbell) { 3955 mqd->queue_state.cp_hqd_pq_doorbell_control = 3956 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3957 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 3958 mqd->queue_state.cp_hqd_pq_doorbell_control |= 3959 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); 3960 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3961 mqd->queue_state.cp_hqd_pq_doorbell_control &= 3962 ~(DOORBELL_SOURCE | DOORBELL_HIT); 3963 3964 } else { 3965 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 3966 } 3967 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3968 mqd->queue_state.cp_hqd_pq_doorbell_control); 3969 3970 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3971 rdev->ring[idx].wptr = 0; 3972 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 3973 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3974 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); 3975 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; 3976 3977 /* set the vmid for the queue */ 3978 mqd->queue_state.cp_hqd_vmid = 0; 3979 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 3980 3981 /* activate the queue */ 3982 mqd->queue_state.cp_hqd_active = 1; 3983 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 3984 3985 cik_srbm_select(rdev, 0, 0, 0, 0); 3986 mutex_unlock(&rdev->srbm_mutex); 3987 3988 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 3989 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3990 3991 rdev->ring[idx].ready = true; 3992 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 3993 if (r) 3994 rdev->ring[idx].ready = false; 3995 } 3996 3997 return 0; 3998 } 3999 4000 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 4001 { 4002 cik_cp_gfx_enable(rdev, enable); 4003 cik_cp_compute_enable(rdev, enable); 4004 } 4005 4006 static int cik_cp_load_microcode(struct radeon_device *rdev) 4007 { 4008 int r; 4009 4010 r = cik_cp_gfx_load_microcode(rdev); 4011 if (r) 4012 return r; 4013 r = cik_cp_compute_load_microcode(rdev); 4014 if (r) 4015 return r; 4016 4017 return 0; 4018 } 4019 4020 static void cik_cp_fini(struct radeon_device *rdev) 4021 { 4022 cik_cp_gfx_fini(rdev); 4023 cik_cp_compute_fini(rdev); 4024 } 4025 4026 static int cik_cp_resume(struct radeon_device *rdev) 4027 { 4028 int r; 4029 4030 cik_enable_gui_idle_interrupt(rdev, false); 4031 4032 r = cik_cp_load_microcode(rdev); 4033 if (r) 4034 return r; 4035 4036 r = cik_cp_gfx_resume(rdev); 4037 if (r) 4038 return r; 4039 r = cik_cp_compute_resume(rdev); 4040 if (r) 4041 return r; 4042 4043 cik_enable_gui_idle_interrupt(rdev, true); 4044 4045 return 0; 4046 } 4047 4048 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 4049 { 4050 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 4051 RREG32(GRBM_STATUS)); 4052 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 4053 RREG32(GRBM_STATUS2)); 4054 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 4055 RREG32(GRBM_STATUS_SE0)); 4056 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 4057 RREG32(GRBM_STATUS_SE1)); 4058 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 4059 RREG32(GRBM_STATUS_SE2)); 4060 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 4061 RREG32(GRBM_STATUS_SE3)); 4062 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 4063 RREG32(SRBM_STATUS)); 4064 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 4065 RREG32(SRBM_STATUS2)); 4066 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 4067 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 4068 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 4069 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 4070 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 4071 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 4072 RREG32(CP_STALLED_STAT1)); 4073 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 4074 RREG32(CP_STALLED_STAT2)); 4075 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 4076 RREG32(CP_STALLED_STAT3)); 4077 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 4078 RREG32(CP_CPF_BUSY_STAT)); 4079 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 4080 RREG32(CP_CPF_STALLED_STAT1)); 4081 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 4082 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 4083 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 4084 RREG32(CP_CPC_STALLED_STAT1)); 4085 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 4086 } 4087 4088 /** 4089 * cik_gpu_check_soft_reset - check which blocks are busy 4090 * 4091 * @rdev: radeon_device pointer 4092 * 4093 * Check which blocks are busy and return the relevant reset 4094 * mask to be used by cik_gpu_soft_reset(). 4095 * Returns a mask of the blocks to be reset. 4096 */ 4097 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 4098 { 4099 u32 reset_mask = 0; 4100 u32 tmp; 4101 4102 /* GRBM_STATUS */ 4103 tmp = RREG32(GRBM_STATUS); 4104 if (tmp & (PA_BUSY | SC_BUSY | 4105 BCI_BUSY | SX_BUSY | 4106 TA_BUSY | VGT_BUSY | 4107 DB_BUSY | CB_BUSY | 4108 GDS_BUSY | SPI_BUSY | 4109 IA_BUSY | IA_BUSY_NO_DMA)) 4110 reset_mask |= RADEON_RESET_GFX; 4111 4112 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 4113 reset_mask |= RADEON_RESET_CP; 4114 4115 /* GRBM_STATUS2 */ 4116 tmp = RREG32(GRBM_STATUS2); 4117 if (tmp & RLC_BUSY) 4118 reset_mask |= RADEON_RESET_RLC; 4119 4120 /* SDMA0_STATUS_REG */ 4121 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 4122 if (!(tmp & SDMA_IDLE)) 4123 reset_mask |= RADEON_RESET_DMA; 4124 4125 /* SDMA1_STATUS_REG */ 4126 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 4127 if (!(tmp & SDMA_IDLE)) 4128 reset_mask |= RADEON_RESET_DMA1; 4129 4130 /* SRBM_STATUS2 */ 4131 tmp = RREG32(SRBM_STATUS2); 4132 if (tmp & SDMA_BUSY) 4133 reset_mask |= RADEON_RESET_DMA; 4134 4135 if (tmp & SDMA1_BUSY) 4136 reset_mask |= RADEON_RESET_DMA1; 4137 4138 /* SRBM_STATUS */ 4139 tmp = RREG32(SRBM_STATUS); 4140 4141 if (tmp & IH_BUSY) 4142 reset_mask |= RADEON_RESET_IH; 4143 4144 if (tmp & SEM_BUSY) 4145 reset_mask |= RADEON_RESET_SEM; 4146 4147 if (tmp & GRBM_RQ_PENDING) 4148 reset_mask |= RADEON_RESET_GRBM; 4149 4150 if (tmp & VMC_BUSY) 4151 reset_mask |= RADEON_RESET_VMC; 4152 4153 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 4154 MCC_BUSY | MCD_BUSY)) 4155 reset_mask |= RADEON_RESET_MC; 4156 4157 if (evergreen_is_display_hung(rdev)) 4158 reset_mask |= RADEON_RESET_DISPLAY; 4159 4160 /* Skip MC reset as it's mostly likely not hung, just busy */ 4161 if (reset_mask & RADEON_RESET_MC) { 4162 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 4163 reset_mask &= ~RADEON_RESET_MC; 4164 } 4165 4166 return reset_mask; 4167 } 4168 4169 /** 4170 * cik_gpu_soft_reset - soft reset GPU 4171 * 4172 * @rdev: radeon_device pointer 4173 * @reset_mask: mask of which blocks to reset 4174 * 4175 * Soft reset the blocks specified in @reset_mask. 4176 */ 4177 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 4178 { 4179 struct evergreen_mc_save save; 4180 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4181 u32 tmp; 4182 4183 if (reset_mask == 0) 4184 return; 4185 4186 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 4187 4188 cik_print_gpu_status_regs(rdev); 4189 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 4190 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 4191 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 4192 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 4193 4194 /* disable CG/PG */ 4195 cik_fini_pg(rdev); 4196 cik_fini_cg(rdev); 4197 4198 /* stop the rlc */ 4199 cik_rlc_stop(rdev); 4200 4201 /* Disable GFX parsing/prefetching */ 4202 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 4203 4204 /* Disable MEC parsing/prefetching */ 4205 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 4206 4207 if (reset_mask & RADEON_RESET_DMA) { 4208 /* sdma0 */ 4209 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 4210 tmp |= SDMA_HALT; 4211 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 4212 } 4213 if (reset_mask & RADEON_RESET_DMA1) { 4214 /* sdma1 */ 4215 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 4216 tmp |= SDMA_HALT; 4217 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 4218 } 4219 4220 evergreen_mc_stop(rdev, &save); 4221 if (evergreen_mc_wait_for_idle(rdev)) { 4222 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4223 } 4224 4225 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 4226 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 4227 4228 if (reset_mask & RADEON_RESET_CP) { 4229 grbm_soft_reset |= SOFT_RESET_CP; 4230 4231 srbm_soft_reset |= SOFT_RESET_GRBM; 4232 } 4233 4234 if (reset_mask & RADEON_RESET_DMA) 4235 srbm_soft_reset |= SOFT_RESET_SDMA; 4236 4237 if (reset_mask & RADEON_RESET_DMA1) 4238 srbm_soft_reset |= SOFT_RESET_SDMA1; 4239 4240 if (reset_mask & RADEON_RESET_DISPLAY) 4241 srbm_soft_reset |= SOFT_RESET_DC; 4242 4243 if (reset_mask & RADEON_RESET_RLC) 4244 grbm_soft_reset |= SOFT_RESET_RLC; 4245 4246 if (reset_mask & RADEON_RESET_SEM) 4247 srbm_soft_reset |= SOFT_RESET_SEM; 4248 4249 if (reset_mask & RADEON_RESET_IH) 4250 srbm_soft_reset |= SOFT_RESET_IH; 4251 4252 if (reset_mask & RADEON_RESET_GRBM) 4253 srbm_soft_reset |= SOFT_RESET_GRBM; 4254 4255 if (reset_mask & RADEON_RESET_VMC) 4256 srbm_soft_reset |= SOFT_RESET_VMC; 4257 4258 if (!(rdev->flags & RADEON_IS_IGP)) { 4259 if (reset_mask & RADEON_RESET_MC) 4260 srbm_soft_reset |= SOFT_RESET_MC; 4261 } 4262 4263 if (grbm_soft_reset) { 4264 tmp = RREG32(GRBM_SOFT_RESET); 4265 tmp |= grbm_soft_reset; 4266 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4267 WREG32(GRBM_SOFT_RESET, tmp); 4268 tmp = RREG32(GRBM_SOFT_RESET); 4269 4270 udelay(50); 4271 4272 tmp &= ~grbm_soft_reset; 4273 WREG32(GRBM_SOFT_RESET, tmp); 4274 tmp = RREG32(GRBM_SOFT_RESET); 4275 } 4276 4277 if (srbm_soft_reset) { 4278 tmp = RREG32(SRBM_SOFT_RESET); 4279 tmp |= srbm_soft_reset; 4280 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 4281 WREG32(SRBM_SOFT_RESET, tmp); 4282 tmp = RREG32(SRBM_SOFT_RESET); 4283 4284 udelay(50); 4285 4286 tmp &= ~srbm_soft_reset; 4287 WREG32(SRBM_SOFT_RESET, tmp); 4288 tmp = RREG32(SRBM_SOFT_RESET); 4289 } 4290 4291 /* Wait a little for things to settle down */ 4292 udelay(50); 4293 4294 evergreen_mc_resume(rdev, &save); 4295 udelay(50); 4296 4297 cik_print_gpu_status_regs(rdev); 4298 } 4299 4300 /** 4301 * cik_asic_reset - soft reset GPU 4302 * 4303 * @rdev: radeon_device pointer 4304 * 4305 * Look up which blocks are hung and attempt 4306 * to reset them. 4307 * Returns 0 for success. 4308 */ 4309 int cik_asic_reset(struct radeon_device *rdev) 4310 { 4311 u32 reset_mask; 4312 4313 reset_mask = cik_gpu_check_soft_reset(rdev); 4314 4315 if (reset_mask) 4316 r600_set_bios_scratch_engine_hung(rdev, true); 4317 4318 cik_gpu_soft_reset(rdev, reset_mask); 4319 4320 reset_mask = cik_gpu_check_soft_reset(rdev); 4321 4322 if (!reset_mask) 4323 r600_set_bios_scratch_engine_hung(rdev, false); 4324 4325 return 0; 4326 } 4327 4328 /** 4329 * cik_gfx_is_lockup - check if the 3D engine is locked up 4330 * 4331 * @rdev: radeon_device pointer 4332 * @ring: radeon_ring structure holding ring information 4333 * 4334 * Check if the 3D engine is locked up (CIK). 4335 * Returns true if the engine is locked, false if not. 4336 */ 4337 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4338 { 4339 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4340 4341 if (!(reset_mask & (RADEON_RESET_GFX | 4342 RADEON_RESET_COMPUTE | 4343 RADEON_RESET_CP))) { 4344 radeon_ring_lockup_update(ring); 4345 return false; 4346 } 4347 /* force CP activities */ 4348 radeon_ring_force_activity(rdev, ring); 4349 return radeon_ring_test_lockup(rdev, ring); 4350 } 4351 4352 /* MC */ 4353 /** 4354 * cik_mc_program - program the GPU memory controller 4355 * 4356 * @rdev: radeon_device pointer 4357 * 4358 * Set the location of vram, gart, and AGP in the GPU's 4359 * physical address space (CIK). 4360 */ 4361 static void cik_mc_program(struct radeon_device *rdev) 4362 { 4363 struct evergreen_mc_save save; 4364 u32 tmp; 4365 int i, j; 4366 4367 /* Initialize HDP */ 4368 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 4369 WREG32((0x2c14 + j), 0x00000000); 4370 WREG32((0x2c18 + j), 0x00000000); 4371 WREG32((0x2c1c + j), 0x00000000); 4372 WREG32((0x2c20 + j), 0x00000000); 4373 WREG32((0x2c24 + j), 0x00000000); 4374 } 4375 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 4376 4377 evergreen_mc_stop(rdev, &save); 4378 if (radeon_mc_wait_for_idle(rdev)) { 4379 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4380 } 4381 /* Lockout access through VGA aperture*/ 4382 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 4383 /* Update configuration */ 4384 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 4385 rdev->mc.vram_start >> 12); 4386 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 4387 rdev->mc.vram_end >> 12); 4388 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 4389 rdev->vram_scratch.gpu_addr >> 12); 4390 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 4391 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 4392 WREG32(MC_VM_FB_LOCATION, tmp); 4393 /* XXX double check these! */ 4394 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 4395 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 4396 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 4397 WREG32(MC_VM_AGP_BASE, 0); 4398 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 4399 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 4400 if (radeon_mc_wait_for_idle(rdev)) { 4401 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4402 } 4403 evergreen_mc_resume(rdev, &save); 4404 /* we need to own VRAM, so turn off the VGA renderer here 4405 * to stop it overwriting our objects */ 4406 rv515_vga_render_disable(rdev); 4407 } 4408 4409 /** 4410 * cik_mc_init - initialize the memory controller driver params 4411 * 4412 * @rdev: radeon_device pointer 4413 * 4414 * Look up the amount of vram, vram width, and decide how to place 4415 * vram and gart within the GPU's physical address space (CIK). 4416 * Returns 0 for success. 4417 */ 4418 static int cik_mc_init(struct radeon_device *rdev) 4419 { 4420 u32 tmp; 4421 int chansize, numchan; 4422 4423 /* Get VRAM informations */ 4424 rdev->mc.vram_is_ddr = true; 4425 tmp = RREG32(MC_ARB_RAMCFG); 4426 if (tmp & CHANSIZE_MASK) { 4427 chansize = 64; 4428 } else { 4429 chansize = 32; 4430 } 4431 tmp = RREG32(MC_SHARED_CHMAP); 4432 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 4433 case 0: 4434 default: 4435 numchan = 1; 4436 break; 4437 case 1: 4438 numchan = 2; 4439 break; 4440 case 2: 4441 numchan = 4; 4442 break; 4443 case 3: 4444 numchan = 8; 4445 break; 4446 case 4: 4447 numchan = 3; 4448 break; 4449 case 5: 4450 numchan = 6; 4451 break; 4452 case 6: 4453 numchan = 10; 4454 break; 4455 case 7: 4456 numchan = 12; 4457 break; 4458 case 8: 4459 numchan = 16; 4460 break; 4461 } 4462 rdev->mc.vram_width = numchan * chansize; 4463 /* Could aper size report 0 ? */ 4464 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 4465 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 4466 /* size in MB on si */ 4467 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 4468 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 4469 rdev->mc.visible_vram_size = rdev->mc.aper_size; 4470 si_vram_gtt_location(rdev, &rdev->mc); 4471 radeon_update_bandwidth_info(rdev); 4472 4473 return 0; 4474 } 4475 4476 /* 4477 * GART 4478 * VMID 0 is the physical GPU addresses as used by the kernel. 4479 * VMIDs 1-15 are used for userspace clients and are handled 4480 * by the radeon vm/hsa code. 4481 */ 4482 /** 4483 * cik_pcie_gart_tlb_flush - gart tlb flush callback 4484 * 4485 * @rdev: radeon_device pointer 4486 * 4487 * Flush the TLB for the VMID 0 page table (CIK). 4488 */ 4489 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 4490 { 4491 /* flush hdp cache */ 4492 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 4493 4494 /* bits 0-15 are the VM contexts0-15 */ 4495 WREG32(VM_INVALIDATE_REQUEST, 0x1); 4496 } 4497 4498 /** 4499 * cik_pcie_gart_enable - gart enable 4500 * 4501 * @rdev: radeon_device pointer 4502 * 4503 * This sets up the TLBs, programs the page tables for VMID0, 4504 * sets up the hw for VMIDs 1-15 which are allocated on 4505 * demand, and sets up the global locations for the LDS, GDS, 4506 * and GPUVM for FSA64 clients (CIK). 4507 * Returns 0 for success, errors for failure. 4508 */ 4509 static int cik_pcie_gart_enable(struct radeon_device *rdev) 4510 { 4511 int r, i; 4512 4513 if (rdev->gart.robj == NULL) { 4514 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 4515 return -EINVAL; 4516 } 4517 r = radeon_gart_table_vram_pin(rdev); 4518 if (r) 4519 return r; 4520 radeon_gart_restore(rdev); 4521 /* Setup TLB control */ 4522 WREG32(MC_VM_MX_L1_TLB_CNTL, 4523 (0xA << 7) | 4524 ENABLE_L1_TLB | 4525 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4526 ENABLE_ADVANCED_DRIVER_MODEL | 4527 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4528 /* Setup L2 cache */ 4529 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 4530 ENABLE_L2_FRAGMENT_PROCESSING | 4531 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4532 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4533 EFFECTIVE_L2_QUEUE_SIZE(7) | 4534 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4535 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 4536 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4537 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4538 /* setup context0 */ 4539 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 4540 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 4541 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 4542 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 4543 (u32)(rdev->dummy_page.addr >> 12)); 4544 WREG32(VM_CONTEXT0_CNTL2, 0); 4545 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 4546 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 4547 4548 WREG32(0x15D4, 0); 4549 WREG32(0x15D8, 0); 4550 WREG32(0x15DC, 0); 4551 4552 /* empty context1-15 */ 4553 /* FIXME start with 4G, once using 2 level pt switch to full 4554 * vm size space 4555 */ 4556 /* set vm size, must be a multiple of 4 */ 4557 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 4558 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); 4559 for (i = 1; i < 16; i++) { 4560 if (i < 8) 4561 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 4562 rdev->gart.table_addr >> 12); 4563 else 4564 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 4565 rdev->gart.table_addr >> 12); 4566 } 4567 4568 /* enable context1-15 */ 4569 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 4570 (u32)(rdev->dummy_page.addr >> 12)); 4571 WREG32(VM_CONTEXT1_CNTL2, 4); 4572 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 4573 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4574 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4575 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4576 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4577 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 4578 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 4579 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 4580 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 4581 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 4582 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 4583 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4584 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 4585 4586 /* TC cache setup ??? */ 4587 WREG32(TC_CFG_L1_LOAD_POLICY0, 0); 4588 WREG32(TC_CFG_L1_LOAD_POLICY1, 0); 4589 WREG32(TC_CFG_L1_STORE_POLICY, 0); 4590 4591 WREG32(TC_CFG_L2_LOAD_POLICY0, 0); 4592 WREG32(TC_CFG_L2_LOAD_POLICY1, 0); 4593 WREG32(TC_CFG_L2_STORE_POLICY0, 0); 4594 WREG32(TC_CFG_L2_STORE_POLICY1, 0); 4595 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); 4596 4597 WREG32(TC_CFG_L1_VOLATILE, 0); 4598 WREG32(TC_CFG_L2_VOLATILE, 0); 4599 4600 if (rdev->family == CHIP_KAVERI) { 4601 u32 tmp = RREG32(CHUB_CONTROL); 4602 tmp &= ~BYPASS_VM; 4603 WREG32(CHUB_CONTROL, tmp); 4604 } 4605 4606 /* XXX SH_MEM regs */ 4607 /* where to put LDS, scratch, GPUVM in FSA64 space */ 4608 mutex_lock(&rdev->srbm_mutex); 4609 for (i = 0; i < 16; i++) { 4610 cik_srbm_select(rdev, 0, 0, 0, i); 4611 /* CP and shaders */ 4612 WREG32(SH_MEM_CONFIG, 0); 4613 WREG32(SH_MEM_APE1_BASE, 1); 4614 WREG32(SH_MEM_APE1_LIMIT, 0); 4615 WREG32(SH_MEM_BASES, 0); 4616 /* SDMA GFX */ 4617 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 4618 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 4619 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 4620 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 4621 /* XXX SDMA RLC - todo */ 4622 } 4623 cik_srbm_select(rdev, 0, 0, 0, 0); 4624 mutex_unlock(&rdev->srbm_mutex); 4625 4626 cik_pcie_gart_tlb_flush(rdev); 4627 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 4628 (unsigned)(rdev->mc.gtt_size >> 20), 4629 (unsigned long long)rdev->gart.table_addr); 4630 rdev->gart.ready = true; 4631 return 0; 4632 } 4633 4634 /** 4635 * cik_pcie_gart_disable - gart disable 4636 * 4637 * @rdev: radeon_device pointer 4638 * 4639 * This disables all VM page table (CIK). 4640 */ 4641 static void cik_pcie_gart_disable(struct radeon_device *rdev) 4642 { 4643 /* Disable all tables */ 4644 WREG32(VM_CONTEXT0_CNTL, 0); 4645 WREG32(VM_CONTEXT1_CNTL, 0); 4646 /* Setup TLB control */ 4647 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4648 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4649 /* Setup L2 cache */ 4650 WREG32(VM_L2_CNTL, 4651 ENABLE_L2_FRAGMENT_PROCESSING | 4652 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4653 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4654 EFFECTIVE_L2_QUEUE_SIZE(7) | 4655 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4656 WREG32(VM_L2_CNTL2, 0); 4657 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4658 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4659 radeon_gart_table_vram_unpin(rdev); 4660 } 4661 4662 /** 4663 * cik_pcie_gart_fini - vm fini callback 4664 * 4665 * @rdev: radeon_device pointer 4666 * 4667 * Tears down the driver GART/VM setup (CIK). 4668 */ 4669 static void cik_pcie_gart_fini(struct radeon_device *rdev) 4670 { 4671 cik_pcie_gart_disable(rdev); 4672 radeon_gart_table_vram_free(rdev); 4673 radeon_gart_fini(rdev); 4674 } 4675 4676 /* vm parser */ 4677 /** 4678 * cik_ib_parse - vm ib_parse callback 4679 * 4680 * @rdev: radeon_device pointer 4681 * @ib: indirect buffer pointer 4682 * 4683 * CIK uses hw IB checking so this is a nop (CIK). 4684 */ 4685 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 4686 { 4687 return 0; 4688 } 4689 4690 /* 4691 * vm 4692 * VMID 0 is the physical GPU addresses as used by the kernel. 4693 * VMIDs 1-15 are used for userspace clients and are handled 4694 * by the radeon vm/hsa code. 4695 */ 4696 /** 4697 * cik_vm_init - cik vm init callback 4698 * 4699 * @rdev: radeon_device pointer 4700 * 4701 * Inits cik specific vm parameters (number of VMs, base of vram for 4702 * VMIDs 1-15) (CIK). 4703 * Returns 0 for success. 4704 */ 4705 int cik_vm_init(struct radeon_device *rdev) 4706 { 4707 /* number of VMs */ 4708 rdev->vm_manager.nvm = 16; 4709 /* base offset of vram pages */ 4710 if (rdev->flags & RADEON_IS_IGP) { 4711 u64 tmp = RREG32(MC_VM_FB_OFFSET); 4712 tmp <<= 22; 4713 rdev->vm_manager.vram_base_offset = tmp; 4714 } else 4715 rdev->vm_manager.vram_base_offset = 0; 4716 4717 return 0; 4718 } 4719 4720 /** 4721 * cik_vm_fini - cik vm fini callback 4722 * 4723 * @rdev: radeon_device pointer 4724 * 4725 * Tear down any asic specific VM setup (CIK). 4726 */ 4727 void cik_vm_fini(struct radeon_device *rdev) 4728 { 4729 } 4730 4731 /** 4732 * cik_vm_decode_fault - print human readable fault info 4733 * 4734 * @rdev: radeon_device pointer 4735 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 4736 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 4737 * 4738 * Print human readable fault information (CIK). 4739 */ 4740 static void cik_vm_decode_fault(struct radeon_device *rdev, 4741 u32 status, u32 addr, u32 mc_client) 4742 { 4743 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 4744 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 4745 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 4746 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, 4747 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; 4748 4749 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", 4750 protections, vmid, addr, 4751 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 4752 block, mc_client, mc_id); 4753 } 4754 4755 /** 4756 * cik_vm_flush - cik vm flush using the CP 4757 * 4758 * @rdev: radeon_device pointer 4759 * 4760 * Update the page table base and flush the VM TLB 4761 * using the CP (CIK). 4762 */ 4763 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4764 { 4765 struct radeon_ring *ring = &rdev->ring[ridx]; 4766 4767 if (vm == NULL) 4768 return; 4769 4770 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4771 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4772 WRITE_DATA_DST_SEL(0))); 4773 if (vm->id < 8) { 4774 radeon_ring_write(ring, 4775 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4776 } else { 4777 radeon_ring_write(ring, 4778 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4779 } 4780 radeon_ring_write(ring, 0); 4781 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4782 4783 /* update SH_MEM_* regs */ 4784 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4785 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4786 WRITE_DATA_DST_SEL(0))); 4787 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4788 radeon_ring_write(ring, 0); 4789 radeon_ring_write(ring, VMID(vm->id)); 4790 4791 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 4792 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4793 WRITE_DATA_DST_SEL(0))); 4794 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4795 radeon_ring_write(ring, 0); 4796 4797 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 4798 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 4799 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 4800 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 4801 4802 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4803 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4804 WRITE_DATA_DST_SEL(0))); 4805 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4806 radeon_ring_write(ring, 0); 4807 radeon_ring_write(ring, VMID(0)); 4808 4809 /* HDP flush */ 4810 /* We should be using the WAIT_REG_MEM packet here like in 4811 * cik_fence_ring_emit(), but it causes the CP to hang in this 4812 * context... 4813 */ 4814 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4815 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4816 WRITE_DATA_DST_SEL(0))); 4817 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 4818 radeon_ring_write(ring, 0); 4819 radeon_ring_write(ring, 0); 4820 4821 /* bits 0-15 are the VM contexts0-15 */ 4822 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4823 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4824 WRITE_DATA_DST_SEL(0))); 4825 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4826 radeon_ring_write(ring, 0); 4827 radeon_ring_write(ring, 1 << vm->id); 4828 4829 /* compute doesn't have PFP */ 4830 if (ridx == RADEON_RING_TYPE_GFX_INDEX) { 4831 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4832 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4833 radeon_ring_write(ring, 0x0); 4834 } 4835 } 4836 4837 /** 4838 * cik_vm_set_page - update the page tables using sDMA 4839 * 4840 * @rdev: radeon_device pointer 4841 * @ib: indirect buffer to fill with commands 4842 * @pe: addr of the page entry 4843 * @addr: dst addr to write into pe 4844 * @count: number of page entries to update 4845 * @incr: increase next addr by incr bytes 4846 * @flags: access flags 4847 * 4848 * Update the page tables using CP or sDMA (CIK). 4849 */ 4850 void cik_vm_set_page(struct radeon_device *rdev, 4851 struct radeon_ib *ib, 4852 uint64_t pe, 4853 uint64_t addr, unsigned count, 4854 uint32_t incr, uint32_t flags) 4855 { 4856 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 4857 uint64_t value; 4858 unsigned ndw; 4859 4860 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { 4861 /* CP */ 4862 while (count) { 4863 ndw = 2 + count * 2; 4864 if (ndw > 0x3FFE) 4865 ndw = 0x3FFE; 4866 4867 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); 4868 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | 4869 WRITE_DATA_DST_SEL(1)); 4870 ib->ptr[ib->length_dw++] = pe; 4871 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4872 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 4873 if (flags & RADEON_VM_PAGE_SYSTEM) { 4874 value = radeon_vm_map_gart(rdev, addr); 4875 value &= 0xFFFFFFFFFFFFF000ULL; 4876 } else if (flags & RADEON_VM_PAGE_VALID) { 4877 value = addr; 4878 } else { 4879 value = 0; 4880 } 4881 addr += incr; 4882 value |= r600_flags; 4883 ib->ptr[ib->length_dw++] = value; 4884 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4885 } 4886 } 4887 } else { 4888 /* DMA */ 4889 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); 4890 } 4891 } 4892 4893 /* 4894 * RLC 4895 * The RLC is a multi-purpose microengine that handles a 4896 * variety of functions, the most important of which is 4897 * the interrupt controller. 4898 */ 4899 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 4900 bool enable) 4901 { 4902 u32 tmp = RREG32(CP_INT_CNTL_RING0); 4903 4904 if (enable) 4905 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4906 else 4907 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4908 WREG32(CP_INT_CNTL_RING0, tmp); 4909 } 4910 4911 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) 4912 { 4913 u32 tmp; 4914 4915 tmp = RREG32(RLC_LB_CNTL); 4916 if (enable) 4917 tmp |= LOAD_BALANCE_ENABLE; 4918 else 4919 tmp &= ~LOAD_BALANCE_ENABLE; 4920 WREG32(RLC_LB_CNTL, tmp); 4921 } 4922 4923 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) 4924 { 4925 u32 i, j, k; 4926 u32 mask; 4927 4928 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 4929 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 4930 cik_select_se_sh(rdev, i, j); 4931 for (k = 0; k < rdev->usec_timeout; k++) { 4932 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 4933 break; 4934 udelay(1); 4935 } 4936 } 4937 } 4938 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4939 4940 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 4941 for (k = 0; k < rdev->usec_timeout; k++) { 4942 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 4943 break; 4944 udelay(1); 4945 } 4946 } 4947 4948 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) 4949 { 4950 u32 tmp; 4951 4952 tmp = RREG32(RLC_CNTL); 4953 if (tmp != rlc) 4954 WREG32(RLC_CNTL, rlc); 4955 } 4956 4957 static u32 cik_halt_rlc(struct radeon_device *rdev) 4958 { 4959 u32 data, orig; 4960 4961 orig = data = RREG32(RLC_CNTL); 4962 4963 if (data & RLC_ENABLE) { 4964 u32 i; 4965 4966 data &= ~RLC_ENABLE; 4967 WREG32(RLC_CNTL, data); 4968 4969 for (i = 0; i < rdev->usec_timeout; i++) { 4970 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) 4971 break; 4972 udelay(1); 4973 } 4974 4975 cik_wait_for_rlc_serdes(rdev); 4976 } 4977 4978 return orig; 4979 } 4980 4981 void cik_enter_rlc_safe_mode(struct radeon_device *rdev) 4982 { 4983 u32 tmp, i, mask; 4984 4985 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); 4986 WREG32(RLC_GPR_REG2, tmp); 4987 4988 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; 4989 for (i = 0; i < rdev->usec_timeout; i++) { 4990 if ((RREG32(RLC_GPM_STAT) & mask) == mask) 4991 break; 4992 udelay(1); 4993 } 4994 4995 for (i = 0; i < rdev->usec_timeout; i++) { 4996 if ((RREG32(RLC_GPR_REG2) & REQ) == 0) 4997 break; 4998 udelay(1); 4999 } 5000 } 5001 5002 void cik_exit_rlc_safe_mode(struct radeon_device *rdev) 5003 { 5004 u32 tmp; 5005 5006 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); 5007 WREG32(RLC_GPR_REG2, tmp); 5008 } 5009 5010 /** 5011 * cik_rlc_stop - stop the RLC ME 5012 * 5013 * @rdev: radeon_device pointer 5014 * 5015 * Halt the RLC ME (MicroEngine) (CIK). 5016 */ 5017 static void cik_rlc_stop(struct radeon_device *rdev) 5018 { 5019 WREG32(RLC_CNTL, 0); 5020 5021 cik_enable_gui_idle_interrupt(rdev, false); 5022 5023 cik_wait_for_rlc_serdes(rdev); 5024 } 5025 5026 /** 5027 * cik_rlc_start - start the RLC ME 5028 * 5029 * @rdev: radeon_device pointer 5030 * 5031 * Unhalt the RLC ME (MicroEngine) (CIK). 5032 */ 5033 static void cik_rlc_start(struct radeon_device *rdev) 5034 { 5035 WREG32(RLC_CNTL, RLC_ENABLE); 5036 5037 cik_enable_gui_idle_interrupt(rdev, true); 5038 5039 udelay(50); 5040 } 5041 5042 /** 5043 * cik_rlc_resume - setup the RLC hw 5044 * 5045 * @rdev: radeon_device pointer 5046 * 5047 * Initialize the RLC registers, load the ucode, 5048 * and start the RLC (CIK). 5049 * Returns 0 for success, -EINVAL if the ucode is not available. 5050 */ 5051 static int cik_rlc_resume(struct radeon_device *rdev) 5052 { 5053 u32 i, size, tmp; 5054 const __be32 *fw_data; 5055 5056 if (!rdev->rlc_fw) 5057 return -EINVAL; 5058 5059 switch (rdev->family) { 5060 case CHIP_BONAIRE: 5061 default: 5062 size = BONAIRE_RLC_UCODE_SIZE; 5063 break; 5064 case CHIP_KAVERI: 5065 size = KV_RLC_UCODE_SIZE; 5066 break; 5067 case CHIP_KABINI: 5068 size = KB_RLC_UCODE_SIZE; 5069 break; 5070 } 5071 5072 cik_rlc_stop(rdev); 5073 5074 /* disable CG */ 5075 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 5076 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 5077 5078 si_rlc_reset(rdev); 5079 5080 cik_init_pg(rdev); 5081 5082 cik_init_cg(rdev); 5083 5084 WREG32(RLC_LB_CNTR_INIT, 0); 5085 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 5086 5087 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5088 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 5089 WREG32(RLC_LB_PARAMS, 0x00600408); 5090 WREG32(RLC_LB_CNTL, 0x80000004); 5091 5092 WREG32(RLC_MC_CNTL, 0); 5093 WREG32(RLC_UCODE_CNTL, 0); 5094 5095 fw_data = (const __be32 *)rdev->rlc_fw->data; 5096 WREG32(RLC_GPM_UCODE_ADDR, 0); 5097 for (i = 0; i < size; i++) 5098 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 5099 WREG32(RLC_GPM_UCODE_ADDR, 0); 5100 5101 /* XXX - find out what chips support lbpw */ 5102 cik_enable_lbpw(rdev, false); 5103 5104 if (rdev->family == CHIP_BONAIRE) 5105 WREG32(RLC_DRIVER_DMA_STATUS, 0); 5106 5107 cik_rlc_start(rdev); 5108 5109 return 0; 5110 } 5111 5112 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) 5113 { 5114 u32 data, orig, tmp, tmp2; 5115 5116 orig = data = RREG32(RLC_CGCG_CGLS_CTRL); 5117 5118 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { 5119 cik_enable_gui_idle_interrupt(rdev, true); 5120 5121 tmp = cik_halt_rlc(rdev); 5122 5123 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5124 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5125 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5126 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; 5127 WREG32(RLC_SERDES_WR_CTRL, tmp2); 5128 5129 cik_update_rlc(rdev, tmp); 5130 5131 data |= CGCG_EN | CGLS_EN; 5132 } else { 5133 cik_enable_gui_idle_interrupt(rdev, false); 5134 5135 RREG32(CB_CGTT_SCLK_CTRL); 5136 RREG32(CB_CGTT_SCLK_CTRL); 5137 RREG32(CB_CGTT_SCLK_CTRL); 5138 RREG32(CB_CGTT_SCLK_CTRL); 5139 5140 data &= ~(CGCG_EN | CGLS_EN); 5141 } 5142 5143 if (orig != data) 5144 WREG32(RLC_CGCG_CGLS_CTRL, data); 5145 5146 } 5147 5148 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) 5149 { 5150 u32 data, orig, tmp = 0; 5151 5152 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { 5153 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { 5154 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { 5155 orig = data = RREG32(CP_MEM_SLP_CNTL); 5156 data |= CP_MEM_LS_EN; 5157 if (orig != data) 5158 WREG32(CP_MEM_SLP_CNTL, data); 5159 } 5160 } 5161 5162 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5163 data &= 0xfffffffd; 5164 if (orig != data) 5165 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5166 5167 tmp = cik_halt_rlc(rdev); 5168 5169 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5170 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5171 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5172 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; 5173 WREG32(RLC_SERDES_WR_CTRL, data); 5174 5175 cik_update_rlc(rdev, tmp); 5176 5177 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { 5178 orig = data = RREG32(CGTS_SM_CTRL_REG); 5179 data &= ~SM_MODE_MASK; 5180 data |= SM_MODE(0x2); 5181 data |= SM_MODE_ENABLE; 5182 data &= ~CGTS_OVERRIDE; 5183 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && 5184 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) 5185 data &= ~CGTS_LS_OVERRIDE; 5186 data &= ~ON_MONITOR_ADD_MASK; 5187 data |= ON_MONITOR_ADD_EN; 5188 data |= ON_MONITOR_ADD(0x96); 5189 if (orig != data) 5190 WREG32(CGTS_SM_CTRL_REG, data); 5191 } 5192 } else { 5193 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5194 data |= 0x00000002; 5195 if (orig != data) 5196 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5197 5198 data = RREG32(RLC_MEM_SLP_CNTL); 5199 if (data & RLC_MEM_LS_EN) { 5200 data &= ~RLC_MEM_LS_EN; 5201 WREG32(RLC_MEM_SLP_CNTL, data); 5202 } 5203 5204 data = RREG32(CP_MEM_SLP_CNTL); 5205 if (data & CP_MEM_LS_EN) { 5206 data &= ~CP_MEM_LS_EN; 5207 WREG32(CP_MEM_SLP_CNTL, data); 5208 } 5209 5210 orig = data = RREG32(CGTS_SM_CTRL_REG); 5211 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; 5212 if (orig != data) 5213 WREG32(CGTS_SM_CTRL_REG, data); 5214 5215 tmp = cik_halt_rlc(rdev); 5216 5217 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5218 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5219 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5220 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; 5221 WREG32(RLC_SERDES_WR_CTRL, data); 5222 5223 cik_update_rlc(rdev, tmp); 5224 } 5225 } 5226 5227 static const u32 mc_cg_registers[] = 5228 { 5229 MC_HUB_MISC_HUB_CG, 5230 MC_HUB_MISC_SIP_CG, 5231 MC_HUB_MISC_VM_CG, 5232 MC_XPB_CLK_GAT, 5233 ATC_MISC_CG, 5234 MC_CITF_MISC_WR_CG, 5235 MC_CITF_MISC_RD_CG, 5236 MC_CITF_MISC_VM_CG, 5237 VM_L2_CG, 5238 }; 5239 5240 static void cik_enable_mc_ls(struct radeon_device *rdev, 5241 bool enable) 5242 { 5243 int i; 5244 u32 orig, data; 5245 5246 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5247 orig = data = RREG32(mc_cg_registers[i]); 5248 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) 5249 data |= MC_LS_ENABLE; 5250 else 5251 data &= ~MC_LS_ENABLE; 5252 if (data != orig) 5253 WREG32(mc_cg_registers[i], data); 5254 } 5255 } 5256 5257 static void cik_enable_mc_mgcg(struct radeon_device *rdev, 5258 bool enable) 5259 { 5260 int i; 5261 u32 orig, data; 5262 5263 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5264 orig = data = RREG32(mc_cg_registers[i]); 5265 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) 5266 data |= MC_CG_ENABLE; 5267 else 5268 data &= ~MC_CG_ENABLE; 5269 if (data != orig) 5270 WREG32(mc_cg_registers[i], data); 5271 } 5272 } 5273 5274 static void cik_enable_sdma_mgcg(struct radeon_device *rdev, 5275 bool enable) 5276 { 5277 u32 orig, data; 5278 5279 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { 5280 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); 5281 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); 5282 } else { 5283 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); 5284 data |= 0xff000000; 5285 if (data != orig) 5286 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); 5287 5288 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); 5289 data |= 0xff000000; 5290 if (data != orig) 5291 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); 5292 } 5293 } 5294 5295 static void cik_enable_sdma_mgls(struct radeon_device *rdev, 5296 bool enable) 5297 { 5298 u32 orig, data; 5299 5300 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { 5301 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5302 data |= 0x100; 5303 if (orig != data) 5304 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5305 5306 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5307 data |= 0x100; 5308 if (orig != data) 5309 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5310 } else { 5311 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5312 data &= ~0x100; 5313 if (orig != data) 5314 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5315 5316 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5317 data &= ~0x100; 5318 if (orig != data) 5319 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5320 } 5321 } 5322 5323 static void cik_enable_uvd_mgcg(struct radeon_device *rdev, 5324 bool enable) 5325 { 5326 u32 orig, data; 5327 5328 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { 5329 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5330 data = 0xfff; 5331 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5332 5333 orig = data = RREG32(UVD_CGC_CTRL); 5334 data |= DCM; 5335 if (orig != data) 5336 WREG32(UVD_CGC_CTRL, data); 5337 } else { 5338 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5339 data &= ~0xfff; 5340 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5341 5342 orig = data = RREG32(UVD_CGC_CTRL); 5343 data &= ~DCM; 5344 if (orig != data) 5345 WREG32(UVD_CGC_CTRL, data); 5346 } 5347 } 5348 5349 static void cik_enable_bif_mgls(struct radeon_device *rdev, 5350 bool enable) 5351 { 5352 u32 orig, data; 5353 5354 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 5355 5356 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) 5357 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | 5358 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; 5359 else 5360 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | 5361 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); 5362 5363 if (orig != data) 5364 WREG32_PCIE_PORT(PCIE_CNTL2, data); 5365 } 5366 5367 static void cik_enable_hdp_mgcg(struct radeon_device *rdev, 5368 bool enable) 5369 { 5370 u32 orig, data; 5371 5372 orig = data = RREG32(HDP_HOST_PATH_CNTL); 5373 5374 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) 5375 data &= ~CLOCK_GATING_DIS; 5376 else 5377 data |= CLOCK_GATING_DIS; 5378 5379 if (orig != data) 5380 WREG32(HDP_HOST_PATH_CNTL, data); 5381 } 5382 5383 static void cik_enable_hdp_ls(struct radeon_device *rdev, 5384 bool enable) 5385 { 5386 u32 orig, data; 5387 5388 orig = data = RREG32(HDP_MEM_POWER_LS); 5389 5390 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) 5391 data |= HDP_LS_ENABLE; 5392 else 5393 data &= ~HDP_LS_ENABLE; 5394 5395 if (orig != data) 5396 WREG32(HDP_MEM_POWER_LS, data); 5397 } 5398 5399 void cik_update_cg(struct radeon_device *rdev, 5400 u32 block, bool enable) 5401 { 5402 5403 if (block & RADEON_CG_BLOCK_GFX) { 5404 cik_enable_gui_idle_interrupt(rdev, false); 5405 /* order matters! */ 5406 if (enable) { 5407 cik_enable_mgcg(rdev, true); 5408 cik_enable_cgcg(rdev, true); 5409 } else { 5410 cik_enable_cgcg(rdev, false); 5411 cik_enable_mgcg(rdev, false); 5412 } 5413 cik_enable_gui_idle_interrupt(rdev, true); 5414 } 5415 5416 if (block & RADEON_CG_BLOCK_MC) { 5417 if (!(rdev->flags & RADEON_IS_IGP)) { 5418 cik_enable_mc_mgcg(rdev, enable); 5419 cik_enable_mc_ls(rdev, enable); 5420 } 5421 } 5422 5423 if (block & RADEON_CG_BLOCK_SDMA) { 5424 cik_enable_sdma_mgcg(rdev, enable); 5425 cik_enable_sdma_mgls(rdev, enable); 5426 } 5427 5428 if (block & RADEON_CG_BLOCK_BIF) { 5429 cik_enable_bif_mgls(rdev, enable); 5430 } 5431 5432 if (block & RADEON_CG_BLOCK_UVD) { 5433 if (rdev->has_uvd) 5434 cik_enable_uvd_mgcg(rdev, enable); 5435 } 5436 5437 if (block & RADEON_CG_BLOCK_HDP) { 5438 cik_enable_hdp_mgcg(rdev, enable); 5439 cik_enable_hdp_ls(rdev, enable); 5440 } 5441 } 5442 5443 static void cik_init_cg(struct radeon_device *rdev) 5444 { 5445 5446 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); 5447 5448 if (rdev->has_uvd) 5449 si_init_uvd_internal_cg(rdev); 5450 5451 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5452 RADEON_CG_BLOCK_SDMA | 5453 RADEON_CG_BLOCK_BIF | 5454 RADEON_CG_BLOCK_UVD | 5455 RADEON_CG_BLOCK_HDP), true); 5456 } 5457 5458 static void cik_fini_cg(struct radeon_device *rdev) 5459 { 5460 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5461 RADEON_CG_BLOCK_SDMA | 5462 RADEON_CG_BLOCK_BIF | 5463 RADEON_CG_BLOCK_UVD | 5464 RADEON_CG_BLOCK_HDP), false); 5465 5466 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); 5467 } 5468 5469 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, 5470 bool enable) 5471 { 5472 u32 data, orig; 5473 5474 orig = data = RREG32(RLC_PG_CNTL); 5475 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5476 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5477 else 5478 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5479 if (orig != data) 5480 WREG32(RLC_PG_CNTL, data); 5481 } 5482 5483 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, 5484 bool enable) 5485 { 5486 u32 data, orig; 5487 5488 orig = data = RREG32(RLC_PG_CNTL); 5489 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5490 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5491 else 5492 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5493 if (orig != data) 5494 WREG32(RLC_PG_CNTL, data); 5495 } 5496 5497 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) 5498 { 5499 u32 data, orig; 5500 5501 orig = data = RREG32(RLC_PG_CNTL); 5502 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) 5503 data &= ~DISABLE_CP_PG; 5504 else 5505 data |= DISABLE_CP_PG; 5506 if (orig != data) 5507 WREG32(RLC_PG_CNTL, data); 5508 } 5509 5510 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) 5511 { 5512 u32 data, orig; 5513 5514 orig = data = RREG32(RLC_PG_CNTL); 5515 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) 5516 data &= ~DISABLE_GDS_PG; 5517 else 5518 data |= DISABLE_GDS_PG; 5519 if (orig != data) 5520 WREG32(RLC_PG_CNTL, data); 5521 } 5522 5523 #define CP_ME_TABLE_SIZE 96 5524 #define CP_ME_TABLE_OFFSET 2048 5525 #define CP_MEC_TABLE_OFFSET 4096 5526 5527 void cik_init_cp_pg_table(struct radeon_device *rdev) 5528 { 5529 const __be32 *fw_data; 5530 volatile u32 *dst_ptr; 5531 int me, i, max_me = 4; 5532 u32 bo_offset = 0; 5533 u32 table_offset; 5534 5535 if (rdev->family == CHIP_KAVERI) 5536 max_me = 5; 5537 5538 if (rdev->rlc.cp_table_ptr == NULL) 5539 return; 5540 5541 /* write the cp table buffer */ 5542 dst_ptr = rdev->rlc.cp_table_ptr; 5543 for (me = 0; me < max_me; me++) { 5544 if (me == 0) { 5545 fw_data = (const __be32 *)rdev->ce_fw->data; 5546 table_offset = CP_ME_TABLE_OFFSET; 5547 } else if (me == 1) { 5548 fw_data = (const __be32 *)rdev->pfp_fw->data; 5549 table_offset = CP_ME_TABLE_OFFSET; 5550 } else if (me == 2) { 5551 fw_data = (const __be32 *)rdev->me_fw->data; 5552 table_offset = CP_ME_TABLE_OFFSET; 5553 } else { 5554 fw_data = (const __be32 *)rdev->mec_fw->data; 5555 table_offset = CP_MEC_TABLE_OFFSET; 5556 } 5557 5558 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) { 5559 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]); 5560 } 5561 bo_offset += CP_ME_TABLE_SIZE; 5562 } 5563 } 5564 5565 static void cik_enable_gfx_cgpg(struct radeon_device *rdev, 5566 bool enable) 5567 { 5568 u32 data, orig; 5569 5570 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { 5571 orig = data = RREG32(RLC_PG_CNTL); 5572 data |= GFX_PG_ENABLE; 5573 if (orig != data) 5574 WREG32(RLC_PG_CNTL, data); 5575 5576 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5577 data |= AUTO_PG_EN; 5578 if (orig != data) 5579 WREG32(RLC_AUTO_PG_CTRL, data); 5580 } else { 5581 orig = data = RREG32(RLC_PG_CNTL); 5582 data &= ~GFX_PG_ENABLE; 5583 if (orig != data) 5584 WREG32(RLC_PG_CNTL, data); 5585 5586 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5587 data &= ~AUTO_PG_EN; 5588 if (orig != data) 5589 WREG32(RLC_AUTO_PG_CTRL, data); 5590 5591 data = RREG32(DB_RENDER_CONTROL); 5592 } 5593 } 5594 5595 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) 5596 { 5597 u32 mask = 0, tmp, tmp1; 5598 int i; 5599 5600 cik_select_se_sh(rdev, se, sh); 5601 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); 5602 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); 5603 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5604 5605 tmp &= 0xffff0000; 5606 5607 tmp |= tmp1; 5608 tmp >>= 16; 5609 5610 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { 5611 mask <<= 1; 5612 mask |= 1; 5613 } 5614 5615 return (~tmp) & mask; 5616 } 5617 5618 static void cik_init_ao_cu_mask(struct radeon_device *rdev) 5619 { 5620 u32 i, j, k, active_cu_number = 0; 5621 u32 mask, counter, cu_bitmap; 5622 u32 tmp = 0; 5623 5624 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 5625 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 5626 mask = 1; 5627 cu_bitmap = 0; 5628 counter = 0; 5629 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { 5630 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { 5631 if (counter < 2) 5632 cu_bitmap |= mask; 5633 counter ++; 5634 } 5635 mask <<= 1; 5636 } 5637 5638 active_cu_number += counter; 5639 tmp |= (cu_bitmap << (i * 16 + j * 8)); 5640 } 5641 } 5642 5643 WREG32(RLC_PG_AO_CU_MASK, tmp); 5644 5645 tmp = RREG32(RLC_MAX_PG_CU); 5646 tmp &= ~MAX_PU_CU_MASK; 5647 tmp |= MAX_PU_CU(active_cu_number); 5648 WREG32(RLC_MAX_PG_CU, tmp); 5649 } 5650 5651 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, 5652 bool enable) 5653 { 5654 u32 data, orig; 5655 5656 orig = data = RREG32(RLC_PG_CNTL); 5657 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) 5658 data |= STATIC_PER_CU_PG_ENABLE; 5659 else 5660 data &= ~STATIC_PER_CU_PG_ENABLE; 5661 if (orig != data) 5662 WREG32(RLC_PG_CNTL, data); 5663 } 5664 5665 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, 5666 bool enable) 5667 { 5668 u32 data, orig; 5669 5670 orig = data = RREG32(RLC_PG_CNTL); 5671 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) 5672 data |= DYN_PER_CU_PG_ENABLE; 5673 else 5674 data &= ~DYN_PER_CU_PG_ENABLE; 5675 if (orig != data) 5676 WREG32(RLC_PG_CNTL, data); 5677 } 5678 5679 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 5680 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D 5681 5682 static void cik_init_gfx_cgpg(struct radeon_device *rdev) 5683 { 5684 u32 data, orig; 5685 u32 i; 5686 5687 if (rdev->rlc.cs_data) { 5688 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5689 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); 5690 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); 5691 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); 5692 } else { 5693 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5694 for (i = 0; i < 3; i++) 5695 WREG32(RLC_GPM_SCRATCH_DATA, 0); 5696 } 5697 if (rdev->rlc.reg_list) { 5698 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); 5699 for (i = 0; i < rdev->rlc.reg_list_size; i++) 5700 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); 5701 } 5702 5703 orig = data = RREG32(RLC_PG_CNTL); 5704 data |= GFX_PG_SRC; 5705 if (orig != data) 5706 WREG32(RLC_PG_CNTL, data); 5707 5708 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); 5709 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); 5710 5711 data = RREG32(CP_RB_WPTR_POLL_CNTL); 5712 data &= ~IDLE_POLL_COUNT_MASK; 5713 data |= IDLE_POLL_COUNT(0x60); 5714 WREG32(CP_RB_WPTR_POLL_CNTL, data); 5715 5716 data = 0x10101010; 5717 WREG32(RLC_PG_DELAY, data); 5718 5719 data = RREG32(RLC_PG_DELAY_2); 5720 data &= ~0xff; 5721 data |= 0x3; 5722 WREG32(RLC_PG_DELAY_2, data); 5723 5724 data = RREG32(RLC_AUTO_PG_CTRL); 5725 data &= ~GRBM_REG_SGIT_MASK; 5726 data |= GRBM_REG_SGIT(0x700); 5727 WREG32(RLC_AUTO_PG_CTRL, data); 5728 5729 } 5730 5731 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) 5732 { 5733 cik_enable_gfx_cgpg(rdev, enable); 5734 cik_enable_gfx_static_mgpg(rdev, enable); 5735 cik_enable_gfx_dynamic_mgpg(rdev, enable); 5736 } 5737 5738 u32 cik_get_csb_size(struct radeon_device *rdev) 5739 { 5740 u32 count = 0; 5741 const struct cs_section_def *sect = NULL; 5742 const struct cs_extent_def *ext = NULL; 5743 5744 if (rdev->rlc.cs_data == NULL) 5745 return 0; 5746 5747 /* begin clear state */ 5748 count += 2; 5749 /* context control state */ 5750 count += 3; 5751 5752 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5753 for (ext = sect->section; ext->extent != NULL; ++ext) { 5754 if (sect->id == SECT_CONTEXT) 5755 count += 2 + ext->reg_count; 5756 else 5757 return 0; 5758 } 5759 } 5760 /* pa_sc_raster_config/pa_sc_raster_config1 */ 5761 count += 4; 5762 /* end clear state */ 5763 count += 2; 5764 /* clear state */ 5765 count += 2; 5766 5767 return count; 5768 } 5769 5770 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) 5771 { 5772 u32 count = 0, i; 5773 const struct cs_section_def *sect = NULL; 5774 const struct cs_extent_def *ext = NULL; 5775 5776 if (rdev->rlc.cs_data == NULL) 5777 return; 5778 if (buffer == NULL) 5779 return; 5780 5781 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5782 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE; 5783 5784 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); 5785 buffer[count++] = 0x80000000; 5786 buffer[count++] = 0x80000000; 5787 5788 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5789 for (ext = sect->section; ext->extent != NULL; ++ext) { 5790 if (sect->id == SECT_CONTEXT) { 5791 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count); 5792 buffer[count++] = ext->reg_index - 0xa000; 5793 for (i = 0; i < ext->reg_count; i++) 5794 buffer[count++] = ext->extent[i]; 5795 } else { 5796 return; 5797 } 5798 } 5799 } 5800 5801 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 5802 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START; 5803 switch (rdev->family) { 5804 case CHIP_BONAIRE: 5805 buffer[count++] = 0x16000012; 5806 buffer[count++] = 0x00000000; 5807 break; 5808 case CHIP_KAVERI: 5809 buffer[count++] = 0x00000000; /* XXX */ 5810 buffer[count++] = 0x00000000; 5811 break; 5812 case CHIP_KABINI: 5813 buffer[count++] = 0x00000000; /* XXX */ 5814 buffer[count++] = 0x00000000; 5815 break; 5816 default: 5817 buffer[count++] = 0x00000000; 5818 buffer[count++] = 0x00000000; 5819 break; 5820 } 5821 5822 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5823 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE; 5824 5825 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0); 5826 buffer[count++] = 0; 5827 } 5828 5829 static void cik_init_pg(struct radeon_device *rdev) 5830 { 5831 if (rdev->pg_flags) { 5832 cik_enable_sck_slowdown_on_pu(rdev, true); 5833 cik_enable_sck_slowdown_on_pd(rdev, true); 5834 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5835 cik_init_gfx_cgpg(rdev); 5836 cik_enable_cp_pg(rdev, true); 5837 cik_enable_gds_pg(rdev, true); 5838 } 5839 cik_init_ao_cu_mask(rdev); 5840 cik_update_gfx_pg(rdev, true); 5841 } 5842 } 5843 5844 static void cik_fini_pg(struct radeon_device *rdev) 5845 { 5846 if (rdev->pg_flags) { 5847 cik_update_gfx_pg(rdev, false); 5848 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5849 cik_enable_cp_pg(rdev, false); 5850 cik_enable_gds_pg(rdev, false); 5851 } 5852 } 5853 } 5854 5855 /* 5856 * Interrupts 5857 * Starting with r6xx, interrupts are handled via a ring buffer. 5858 * Ring buffers are areas of GPU accessible memory that the GPU 5859 * writes interrupt vectors into and the host reads vectors out of. 5860 * There is a rptr (read pointer) that determines where the 5861 * host is currently reading, and a wptr (write pointer) 5862 * which determines where the GPU has written. When the 5863 * pointers are equal, the ring is idle. When the GPU 5864 * writes vectors to the ring buffer, it increments the 5865 * wptr. When there is an interrupt, the host then starts 5866 * fetching commands and processing them until the pointers are 5867 * equal again at which point it updates the rptr. 5868 */ 5869 5870 /** 5871 * cik_enable_interrupts - Enable the interrupt ring buffer 5872 * 5873 * @rdev: radeon_device pointer 5874 * 5875 * Enable the interrupt ring buffer (CIK). 5876 */ 5877 static void cik_enable_interrupts(struct radeon_device *rdev) 5878 { 5879 u32 ih_cntl = RREG32(IH_CNTL); 5880 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5881 5882 ih_cntl |= ENABLE_INTR; 5883 ih_rb_cntl |= IH_RB_ENABLE; 5884 WREG32(IH_CNTL, ih_cntl); 5885 WREG32(IH_RB_CNTL, ih_rb_cntl); 5886 rdev->ih.enabled = true; 5887 } 5888 5889 /** 5890 * cik_disable_interrupts - Disable the interrupt ring buffer 5891 * 5892 * @rdev: radeon_device pointer 5893 * 5894 * Disable the interrupt ring buffer (CIK). 5895 */ 5896 static void cik_disable_interrupts(struct radeon_device *rdev) 5897 { 5898 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5899 u32 ih_cntl = RREG32(IH_CNTL); 5900 5901 ih_rb_cntl &= ~IH_RB_ENABLE; 5902 ih_cntl &= ~ENABLE_INTR; 5903 WREG32(IH_RB_CNTL, ih_rb_cntl); 5904 WREG32(IH_CNTL, ih_cntl); 5905 /* set rptr, wptr to 0 */ 5906 WREG32(IH_RB_RPTR, 0); 5907 WREG32(IH_RB_WPTR, 0); 5908 rdev->ih.enabled = false; 5909 rdev->ih.rptr = 0; 5910 } 5911 5912 /** 5913 * cik_disable_interrupt_state - Disable all interrupt sources 5914 * 5915 * @rdev: radeon_device pointer 5916 * 5917 * Clear all interrupt enable bits used by the driver (CIK). 5918 */ 5919 static void cik_disable_interrupt_state(struct radeon_device *rdev) 5920 { 5921 u32 tmp; 5922 5923 /* gfx ring */ 5924 tmp = RREG32(CP_INT_CNTL_RING0) & 5925 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5926 WREG32(CP_INT_CNTL_RING0, tmp); 5927 /* sdma */ 5928 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 5929 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 5930 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 5931 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 5932 /* compute queues */ 5933 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 5934 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 5935 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 5936 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 5937 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 5938 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 5939 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 5940 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 5941 /* grbm */ 5942 WREG32(GRBM_INT_CNTL, 0); 5943 /* vline/vblank, etc. */ 5944 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 5945 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 5946 if (rdev->num_crtc >= 4) { 5947 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 5948 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 5949 } 5950 if (rdev->num_crtc >= 6) { 5951 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 5952 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 5953 } 5954 5955 /* dac hotplug */ 5956 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 5957 5958 /* digital hotplug */ 5959 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5960 WREG32(DC_HPD1_INT_CONTROL, tmp); 5961 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5962 WREG32(DC_HPD2_INT_CONTROL, tmp); 5963 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5964 WREG32(DC_HPD3_INT_CONTROL, tmp); 5965 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5966 WREG32(DC_HPD4_INT_CONTROL, tmp); 5967 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5968 WREG32(DC_HPD5_INT_CONTROL, tmp); 5969 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5970 WREG32(DC_HPD6_INT_CONTROL, tmp); 5971 5972 } 5973 5974 /** 5975 * cik_irq_init - init and enable the interrupt ring 5976 * 5977 * @rdev: radeon_device pointer 5978 * 5979 * Allocate a ring buffer for the interrupt controller, 5980 * enable the RLC, disable interrupts, enable the IH 5981 * ring buffer and enable it (CIK). 5982 * Called at device load and reume. 5983 * Returns 0 for success, errors for failure. 5984 */ 5985 static int cik_irq_init(struct radeon_device *rdev) 5986 { 5987 int ret = 0; 5988 int rb_bufsz; 5989 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 5990 5991 /* allocate ring */ 5992 ret = r600_ih_ring_alloc(rdev); 5993 if (ret) 5994 return ret; 5995 5996 /* disable irqs */ 5997 cik_disable_interrupts(rdev); 5998 5999 /* init rlc */ 6000 ret = cik_rlc_resume(rdev); 6001 if (ret) { 6002 r600_ih_ring_fini(rdev); 6003 return ret; 6004 } 6005 6006 /* setup interrupt control */ 6007 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 6008 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 6009 interrupt_cntl = RREG32(INTERRUPT_CNTL); 6010 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 6011 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 6012 */ 6013 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 6014 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 6015 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 6016 WREG32(INTERRUPT_CNTL, interrupt_cntl); 6017 6018 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 6019 rb_bufsz = order_base_2(rdev->ih.ring_size / 4); 6020 6021 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 6022 IH_WPTR_OVERFLOW_CLEAR | 6023 (rb_bufsz << 1)); 6024 6025 if (rdev->wb.enabled) 6026 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 6027 6028 /* set the writeback address whether it's enabled or not */ 6029 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 6030 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 6031 6032 WREG32(IH_RB_CNTL, ih_rb_cntl); 6033 6034 /* set rptr, wptr to 0 */ 6035 WREG32(IH_RB_RPTR, 0); 6036 WREG32(IH_RB_WPTR, 0); 6037 6038 /* Default settings for IH_CNTL (disabled at first) */ 6039 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 6040 /* RPTR_REARM only works if msi's are enabled */ 6041 if (rdev->msi_enabled) 6042 ih_cntl |= RPTR_REARM; 6043 WREG32(IH_CNTL, ih_cntl); 6044 6045 /* force the active interrupt state to all disabled */ 6046 cik_disable_interrupt_state(rdev); 6047 6048 pci_set_master(rdev->pdev); 6049 6050 /* enable irqs */ 6051 cik_enable_interrupts(rdev); 6052 6053 return ret; 6054 } 6055 6056 /** 6057 * cik_irq_set - enable/disable interrupt sources 6058 * 6059 * @rdev: radeon_device pointer 6060 * 6061 * Enable interrupt sources on the GPU (vblanks, hpd, 6062 * etc.) (CIK). 6063 * Returns 0 for success, errors for failure. 6064 */ 6065 int cik_irq_set(struct radeon_device *rdev) 6066 { 6067 u32 cp_int_cntl; 6068 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; 6069 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; 6070 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 6071 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 6072 u32 grbm_int_cntl = 0; 6073 u32 dma_cntl, dma_cntl1; 6074 u32 thermal_int; 6075 6076 if (!rdev->irq.installed) { 6077 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 6078 return -EINVAL; 6079 } 6080 /* don't enable anything if the ih is disabled */ 6081 if (!rdev->ih.enabled) { 6082 cik_disable_interrupts(rdev); 6083 /* force the active interrupt state to all disabled */ 6084 cik_disable_interrupt_state(rdev); 6085 return 0; 6086 } 6087 6088 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & 6089 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 6090 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 6091 6092 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; 6093 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; 6094 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; 6095 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; 6096 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 6097 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 6098 6099 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 6100 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 6101 6102 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6103 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6104 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6105 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6106 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6107 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6108 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6109 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6110 6111 if (rdev->flags & RADEON_IS_IGP) 6112 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & 6113 ~(THERM_INTH_MASK | THERM_INTL_MASK); 6114 else 6115 thermal_int = RREG32_SMC(CG_THERMAL_INT) & 6116 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); 6117 6118 /* enable CP interrupts on all rings */ 6119 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 6120 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 6121 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 6122 } 6123 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 6124 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6125 DRM_DEBUG("si_irq_set: sw int cp1\n"); 6126 if (ring->me == 1) { 6127 switch (ring->pipe) { 6128 case 0: 6129 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6130 break; 6131 case 1: 6132 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6133 break; 6134 case 2: 6135 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6136 break; 6137 case 3: 6138 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6139 break; 6140 default: 6141 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6142 break; 6143 } 6144 } else if (ring->me == 2) { 6145 switch (ring->pipe) { 6146 case 0: 6147 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6148 break; 6149 case 1: 6150 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6151 break; 6152 case 2: 6153 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6154 break; 6155 case 3: 6156 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6157 break; 6158 default: 6159 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6160 break; 6161 } 6162 } else { 6163 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 6164 } 6165 } 6166 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 6167 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6168 DRM_DEBUG("si_irq_set: sw int cp2\n"); 6169 if (ring->me == 1) { 6170 switch (ring->pipe) { 6171 case 0: 6172 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6173 break; 6174 case 1: 6175 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6176 break; 6177 case 2: 6178 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6179 break; 6180 case 3: 6181 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6182 break; 6183 default: 6184 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6185 break; 6186 } 6187 } else if (ring->me == 2) { 6188 switch (ring->pipe) { 6189 case 0: 6190 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6191 break; 6192 case 1: 6193 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6194 break; 6195 case 2: 6196 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6197 break; 6198 case 3: 6199 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6200 break; 6201 default: 6202 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6203 break; 6204 } 6205 } else { 6206 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 6207 } 6208 } 6209 6210 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 6211 DRM_DEBUG("cik_irq_set: sw int dma\n"); 6212 dma_cntl |= TRAP_ENABLE; 6213 } 6214 6215 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 6216 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 6217 dma_cntl1 |= TRAP_ENABLE; 6218 } 6219 6220 if (rdev->irq.crtc_vblank_int[0] || 6221 atomic_read(&rdev->irq.pflip[0])) { 6222 DRM_DEBUG("cik_irq_set: vblank 0\n"); 6223 crtc1 |= VBLANK_INTERRUPT_MASK; 6224 } 6225 if (rdev->irq.crtc_vblank_int[1] || 6226 atomic_read(&rdev->irq.pflip[1])) { 6227 DRM_DEBUG("cik_irq_set: vblank 1\n"); 6228 crtc2 |= VBLANK_INTERRUPT_MASK; 6229 } 6230 if (rdev->irq.crtc_vblank_int[2] || 6231 atomic_read(&rdev->irq.pflip[2])) { 6232 DRM_DEBUG("cik_irq_set: vblank 2\n"); 6233 crtc3 |= VBLANK_INTERRUPT_MASK; 6234 } 6235 if (rdev->irq.crtc_vblank_int[3] || 6236 atomic_read(&rdev->irq.pflip[3])) { 6237 DRM_DEBUG("cik_irq_set: vblank 3\n"); 6238 crtc4 |= VBLANK_INTERRUPT_MASK; 6239 } 6240 if (rdev->irq.crtc_vblank_int[4] || 6241 atomic_read(&rdev->irq.pflip[4])) { 6242 DRM_DEBUG("cik_irq_set: vblank 4\n"); 6243 crtc5 |= VBLANK_INTERRUPT_MASK; 6244 } 6245 if (rdev->irq.crtc_vblank_int[5] || 6246 atomic_read(&rdev->irq.pflip[5])) { 6247 DRM_DEBUG("cik_irq_set: vblank 5\n"); 6248 crtc6 |= VBLANK_INTERRUPT_MASK; 6249 } 6250 if (rdev->irq.hpd[0]) { 6251 DRM_DEBUG("cik_irq_set: hpd 1\n"); 6252 hpd1 |= DC_HPDx_INT_EN; 6253 } 6254 if (rdev->irq.hpd[1]) { 6255 DRM_DEBUG("cik_irq_set: hpd 2\n"); 6256 hpd2 |= DC_HPDx_INT_EN; 6257 } 6258 if (rdev->irq.hpd[2]) { 6259 DRM_DEBUG("cik_irq_set: hpd 3\n"); 6260 hpd3 |= DC_HPDx_INT_EN; 6261 } 6262 if (rdev->irq.hpd[3]) { 6263 DRM_DEBUG("cik_irq_set: hpd 4\n"); 6264 hpd4 |= DC_HPDx_INT_EN; 6265 } 6266 if (rdev->irq.hpd[4]) { 6267 DRM_DEBUG("cik_irq_set: hpd 5\n"); 6268 hpd5 |= DC_HPDx_INT_EN; 6269 } 6270 if (rdev->irq.hpd[5]) { 6271 DRM_DEBUG("cik_irq_set: hpd 6\n"); 6272 hpd6 |= DC_HPDx_INT_EN; 6273 } 6274 6275 if (rdev->irq.dpm_thermal) { 6276 DRM_DEBUG("dpm thermal\n"); 6277 if (rdev->flags & RADEON_IS_IGP) 6278 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; 6279 else 6280 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; 6281 } 6282 6283 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 6284 6285 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 6286 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 6287 6288 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 6289 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); 6290 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); 6291 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); 6292 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); 6293 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); 6294 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); 6295 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); 6296 6297 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 6298 6299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 6300 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 6301 if (rdev->num_crtc >= 4) { 6302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 6303 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 6304 } 6305 if (rdev->num_crtc >= 6) { 6306 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 6307 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 6308 } 6309 6310 WREG32(DC_HPD1_INT_CONTROL, hpd1); 6311 WREG32(DC_HPD2_INT_CONTROL, hpd2); 6312 WREG32(DC_HPD3_INT_CONTROL, hpd3); 6313 WREG32(DC_HPD4_INT_CONTROL, hpd4); 6314 WREG32(DC_HPD5_INT_CONTROL, hpd5); 6315 WREG32(DC_HPD6_INT_CONTROL, hpd6); 6316 6317 if (rdev->flags & RADEON_IS_IGP) 6318 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); 6319 else 6320 WREG32_SMC(CG_THERMAL_INT, thermal_int); 6321 6322 return 0; 6323 } 6324 6325 /** 6326 * cik_irq_ack - ack interrupt sources 6327 * 6328 * @rdev: radeon_device pointer 6329 * 6330 * Ack interrupt sources on the GPU (vblanks, hpd, 6331 * etc.) (CIK). Certain interrupts sources are sw 6332 * generated and do not require an explicit ack. 6333 */ 6334 static inline void cik_irq_ack(struct radeon_device *rdev) 6335 { 6336 u32 tmp; 6337 6338 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 6339 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 6340 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 6341 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 6342 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 6343 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 6344 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 6345 6346 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 6347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 6348 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 6349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 6350 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 6351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 6352 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 6353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 6354 6355 if (rdev->num_crtc >= 4) { 6356 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 6357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 6358 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 6359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 6360 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 6361 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 6362 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 6363 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 6364 } 6365 6366 if (rdev->num_crtc >= 6) { 6367 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 6368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 6369 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 6370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 6371 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 6372 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 6373 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 6374 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 6375 } 6376 6377 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6378 tmp = RREG32(DC_HPD1_INT_CONTROL); 6379 tmp |= DC_HPDx_INT_ACK; 6380 WREG32(DC_HPD1_INT_CONTROL, tmp); 6381 } 6382 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6383 tmp = RREG32(DC_HPD2_INT_CONTROL); 6384 tmp |= DC_HPDx_INT_ACK; 6385 WREG32(DC_HPD2_INT_CONTROL, tmp); 6386 } 6387 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6388 tmp = RREG32(DC_HPD3_INT_CONTROL); 6389 tmp |= DC_HPDx_INT_ACK; 6390 WREG32(DC_HPD3_INT_CONTROL, tmp); 6391 } 6392 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6393 tmp = RREG32(DC_HPD4_INT_CONTROL); 6394 tmp |= DC_HPDx_INT_ACK; 6395 WREG32(DC_HPD4_INT_CONTROL, tmp); 6396 } 6397 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6398 tmp = RREG32(DC_HPD5_INT_CONTROL); 6399 tmp |= DC_HPDx_INT_ACK; 6400 WREG32(DC_HPD5_INT_CONTROL, tmp); 6401 } 6402 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6403 tmp = RREG32(DC_HPD5_INT_CONTROL); 6404 tmp |= DC_HPDx_INT_ACK; 6405 WREG32(DC_HPD6_INT_CONTROL, tmp); 6406 } 6407 } 6408 6409 /** 6410 * cik_irq_disable - disable interrupts 6411 * 6412 * @rdev: radeon_device pointer 6413 * 6414 * Disable interrupts on the hw (CIK). 6415 */ 6416 static void cik_irq_disable(struct radeon_device *rdev) 6417 { 6418 cik_disable_interrupts(rdev); 6419 /* Wait and acknowledge irq */ 6420 mdelay(1); 6421 cik_irq_ack(rdev); 6422 cik_disable_interrupt_state(rdev); 6423 } 6424 6425 /** 6426 * cik_irq_disable - disable interrupts for suspend 6427 * 6428 * @rdev: radeon_device pointer 6429 * 6430 * Disable interrupts and stop the RLC (CIK). 6431 * Used for suspend. 6432 */ 6433 static void cik_irq_suspend(struct radeon_device *rdev) 6434 { 6435 cik_irq_disable(rdev); 6436 cik_rlc_stop(rdev); 6437 } 6438 6439 /** 6440 * cik_irq_fini - tear down interrupt support 6441 * 6442 * @rdev: radeon_device pointer 6443 * 6444 * Disable interrupts on the hw and free the IH ring 6445 * buffer (CIK). 6446 * Used for driver unload. 6447 */ 6448 static void cik_irq_fini(struct radeon_device *rdev) 6449 { 6450 cik_irq_suspend(rdev); 6451 r600_ih_ring_fini(rdev); 6452 } 6453 6454 /** 6455 * cik_get_ih_wptr - get the IH ring buffer wptr 6456 * 6457 * @rdev: radeon_device pointer 6458 * 6459 * Get the IH ring buffer wptr from either the register 6460 * or the writeback memory buffer (CIK). Also check for 6461 * ring buffer overflow and deal with it. 6462 * Used by cik_irq_process(). 6463 * Returns the value of the wptr. 6464 */ 6465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 6466 { 6467 u32 wptr, tmp; 6468 6469 if (rdev->wb.enabled) 6470 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 6471 else 6472 wptr = RREG32(IH_RB_WPTR); 6473 6474 if (wptr & RB_OVERFLOW) { 6475 /* When a ring buffer overflow happen start parsing interrupt 6476 * from the last not overwritten vector (wptr + 16). Hopefully 6477 * this should allow us to catchup. 6478 */ 6479 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", 6480 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); 6481 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 6482 tmp = RREG32(IH_RB_CNTL); 6483 tmp |= IH_WPTR_OVERFLOW_CLEAR; 6484 WREG32(IH_RB_CNTL, tmp); 6485 } 6486 return (wptr & rdev->ih.ptr_mask); 6487 } 6488 6489 /* CIK IV Ring 6490 * Each IV ring entry is 128 bits: 6491 * [7:0] - interrupt source id 6492 * [31:8] - reserved 6493 * [59:32] - interrupt source data 6494 * [63:60] - reserved 6495 * [71:64] - RINGID 6496 * CP: 6497 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 6498 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 6499 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 6500 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 6501 * PIPE_ID - ME0 0=3D 6502 * - ME1&2 compute dispatcher (4 pipes each) 6503 * SDMA: 6504 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 6505 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 6506 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 6507 * [79:72] - VMID 6508 * [95:80] - PASID 6509 * [127:96] - reserved 6510 */ 6511 /** 6512 * cik_irq_process - interrupt handler 6513 * 6514 * @rdev: radeon_device pointer 6515 * 6516 * Interrupt hander (CIK). Walk the IH ring, 6517 * ack interrupts and schedule work to handle 6518 * interrupt events. 6519 * Returns irq process return code. 6520 */ 6521 int cik_irq_process(struct radeon_device *rdev) 6522 { 6523 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6524 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6525 u32 wptr; 6526 u32 rptr; 6527 u32 src_id, src_data, ring_id; 6528 u8 me_id, pipe_id, queue_id; 6529 u32 ring_index; 6530 bool queue_hotplug = false; 6531 bool queue_reset = false; 6532 u32 addr, status, mc_client; 6533 bool queue_thermal = false; 6534 6535 if (!rdev->ih.enabled || rdev->shutdown) 6536 return IRQ_NONE; 6537 6538 wptr = cik_get_ih_wptr(rdev); 6539 6540 restart_ih: 6541 /* is somebody else already processing irqs? */ 6542 if (atomic_xchg(&rdev->ih.lock, 1)) 6543 return IRQ_NONE; 6544 6545 rptr = rdev->ih.rptr; 6546 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 6547 6548 /* Order reading of wptr vs. reading of IH ring data */ 6549 rmb(); 6550 6551 /* display interrupts */ 6552 cik_irq_ack(rdev); 6553 6554 while (rptr != wptr) { 6555 /* wptr/rptr are in bytes! */ 6556 ring_index = rptr / 4; 6557 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 6558 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 6559 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 6560 6561 switch (src_id) { 6562 case 1: /* D1 vblank/vline */ 6563 switch (src_data) { 6564 case 0: /* D1 vblank */ 6565 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { 6566 if (rdev->irq.crtc_vblank_int[0]) { 6567 drm_handle_vblank(rdev->ddev, 0); 6568 rdev->pm.vblank_sync = true; 6569 wake_up(&rdev->irq.vblank_queue); 6570 } 6571 if (atomic_read(&rdev->irq.pflip[0])) 6572 radeon_crtc_handle_flip(rdev, 0); 6573 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 6574 DRM_DEBUG("IH: D1 vblank\n"); 6575 } 6576 break; 6577 case 1: /* D1 vline */ 6578 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { 6579 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 6580 DRM_DEBUG("IH: D1 vline\n"); 6581 } 6582 break; 6583 default: 6584 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6585 break; 6586 } 6587 break; 6588 case 2: /* D2 vblank/vline */ 6589 switch (src_data) { 6590 case 0: /* D2 vblank */ 6591 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { 6592 if (rdev->irq.crtc_vblank_int[1]) { 6593 drm_handle_vblank(rdev->ddev, 1); 6594 rdev->pm.vblank_sync = true; 6595 wake_up(&rdev->irq.vblank_queue); 6596 } 6597 if (atomic_read(&rdev->irq.pflip[1])) 6598 radeon_crtc_handle_flip(rdev, 1); 6599 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 6600 DRM_DEBUG("IH: D2 vblank\n"); 6601 } 6602 break; 6603 case 1: /* D2 vline */ 6604 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { 6605 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 6606 DRM_DEBUG("IH: D2 vline\n"); 6607 } 6608 break; 6609 default: 6610 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6611 break; 6612 } 6613 break; 6614 case 3: /* D3 vblank/vline */ 6615 switch (src_data) { 6616 case 0: /* D3 vblank */ 6617 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { 6618 if (rdev->irq.crtc_vblank_int[2]) { 6619 drm_handle_vblank(rdev->ddev, 2); 6620 rdev->pm.vblank_sync = true; 6621 wake_up(&rdev->irq.vblank_queue); 6622 } 6623 if (atomic_read(&rdev->irq.pflip[2])) 6624 radeon_crtc_handle_flip(rdev, 2); 6625 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 6626 DRM_DEBUG("IH: D3 vblank\n"); 6627 } 6628 break; 6629 case 1: /* D3 vline */ 6630 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { 6631 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 6632 DRM_DEBUG("IH: D3 vline\n"); 6633 } 6634 break; 6635 default: 6636 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6637 break; 6638 } 6639 break; 6640 case 4: /* D4 vblank/vline */ 6641 switch (src_data) { 6642 case 0: /* D4 vblank */ 6643 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { 6644 if (rdev->irq.crtc_vblank_int[3]) { 6645 drm_handle_vblank(rdev->ddev, 3); 6646 rdev->pm.vblank_sync = true; 6647 wake_up(&rdev->irq.vblank_queue); 6648 } 6649 if (atomic_read(&rdev->irq.pflip[3])) 6650 radeon_crtc_handle_flip(rdev, 3); 6651 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 6652 DRM_DEBUG("IH: D4 vblank\n"); 6653 } 6654 break; 6655 case 1: /* D4 vline */ 6656 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { 6657 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 6658 DRM_DEBUG("IH: D4 vline\n"); 6659 } 6660 break; 6661 default: 6662 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6663 break; 6664 } 6665 break; 6666 case 5: /* D5 vblank/vline */ 6667 switch (src_data) { 6668 case 0: /* D5 vblank */ 6669 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { 6670 if (rdev->irq.crtc_vblank_int[4]) { 6671 drm_handle_vblank(rdev->ddev, 4); 6672 rdev->pm.vblank_sync = true; 6673 wake_up(&rdev->irq.vblank_queue); 6674 } 6675 if (atomic_read(&rdev->irq.pflip[4])) 6676 radeon_crtc_handle_flip(rdev, 4); 6677 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 6678 DRM_DEBUG("IH: D5 vblank\n"); 6679 } 6680 break; 6681 case 1: /* D5 vline */ 6682 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { 6683 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 6684 DRM_DEBUG("IH: D5 vline\n"); 6685 } 6686 break; 6687 default: 6688 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6689 break; 6690 } 6691 break; 6692 case 6: /* D6 vblank/vline */ 6693 switch (src_data) { 6694 case 0: /* D6 vblank */ 6695 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { 6696 if (rdev->irq.crtc_vblank_int[5]) { 6697 drm_handle_vblank(rdev->ddev, 5); 6698 rdev->pm.vblank_sync = true; 6699 wake_up(&rdev->irq.vblank_queue); 6700 } 6701 if (atomic_read(&rdev->irq.pflip[5])) 6702 radeon_crtc_handle_flip(rdev, 5); 6703 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 6704 DRM_DEBUG("IH: D6 vblank\n"); 6705 } 6706 break; 6707 case 1: /* D6 vline */ 6708 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { 6709 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 6710 DRM_DEBUG("IH: D6 vline\n"); 6711 } 6712 break; 6713 default: 6714 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6715 break; 6716 } 6717 break; 6718 case 42: /* HPD hotplug */ 6719 switch (src_data) { 6720 case 0: 6721 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6722 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 6723 queue_hotplug = true; 6724 DRM_DEBUG("IH: HPD1\n"); 6725 } 6726 break; 6727 case 1: 6728 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6729 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 6730 queue_hotplug = true; 6731 DRM_DEBUG("IH: HPD2\n"); 6732 } 6733 break; 6734 case 2: 6735 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6736 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 6737 queue_hotplug = true; 6738 DRM_DEBUG("IH: HPD3\n"); 6739 } 6740 break; 6741 case 3: 6742 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6743 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 6744 queue_hotplug = true; 6745 DRM_DEBUG("IH: HPD4\n"); 6746 } 6747 break; 6748 case 4: 6749 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6750 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 6751 queue_hotplug = true; 6752 DRM_DEBUG("IH: HPD5\n"); 6753 } 6754 break; 6755 case 5: 6756 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6757 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 6758 queue_hotplug = true; 6759 DRM_DEBUG("IH: HPD6\n"); 6760 } 6761 break; 6762 default: 6763 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6764 break; 6765 } 6766 break; 6767 case 124: /* UVD */ 6768 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); 6769 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); 6770 break; 6771 case 146: 6772 case 147: 6773 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 6774 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 6775 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 6776 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 6777 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 6778 addr); 6779 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 6780 status); 6781 cik_vm_decode_fault(rdev, status, addr, mc_client); 6782 /* reset addr and status */ 6783 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 6784 break; 6785 case 176: /* GFX RB CP_INT */ 6786 case 177: /* GFX IB CP_INT */ 6787 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6788 break; 6789 case 181: /* CP EOP event */ 6790 DRM_DEBUG("IH: CP EOP\n"); 6791 /* XXX check the bitfield order! */ 6792 me_id = (ring_id & 0x60) >> 5; 6793 pipe_id = (ring_id & 0x18) >> 3; 6794 queue_id = (ring_id & 0x7) >> 0; 6795 switch (me_id) { 6796 case 0: 6797 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6798 break; 6799 case 1: 6800 case 2: 6801 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 6802 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 6803 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 6804 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 6805 break; 6806 } 6807 break; 6808 case 184: /* CP Privileged reg access */ 6809 DRM_ERROR("Illegal register access in command stream\n"); 6810 /* XXX check the bitfield order! */ 6811 me_id = (ring_id & 0x60) >> 5; 6812 pipe_id = (ring_id & 0x18) >> 3; 6813 queue_id = (ring_id & 0x7) >> 0; 6814 switch (me_id) { 6815 case 0: 6816 /* This results in a full GPU reset, but all we need to do is soft 6817 * reset the CP for gfx 6818 */ 6819 queue_reset = true; 6820 break; 6821 case 1: 6822 /* XXX compute */ 6823 queue_reset = true; 6824 break; 6825 case 2: 6826 /* XXX compute */ 6827 queue_reset = true; 6828 break; 6829 } 6830 break; 6831 case 185: /* CP Privileged inst */ 6832 DRM_ERROR("Illegal instruction in command stream\n"); 6833 /* XXX check the bitfield order! */ 6834 me_id = (ring_id & 0x60) >> 5; 6835 pipe_id = (ring_id & 0x18) >> 3; 6836 queue_id = (ring_id & 0x7) >> 0; 6837 switch (me_id) { 6838 case 0: 6839 /* This results in a full GPU reset, but all we need to do is soft 6840 * reset the CP for gfx 6841 */ 6842 queue_reset = true; 6843 break; 6844 case 1: 6845 /* XXX compute */ 6846 queue_reset = true; 6847 break; 6848 case 2: 6849 /* XXX compute */ 6850 queue_reset = true; 6851 break; 6852 } 6853 break; 6854 case 224: /* SDMA trap event */ 6855 /* XXX check the bitfield order! */ 6856 me_id = (ring_id & 0x3) >> 0; 6857 queue_id = (ring_id & 0xc) >> 2; 6858 DRM_DEBUG("IH: SDMA trap\n"); 6859 switch (me_id) { 6860 case 0: 6861 switch (queue_id) { 6862 case 0: 6863 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 6864 break; 6865 case 1: 6866 /* XXX compute */ 6867 break; 6868 case 2: 6869 /* XXX compute */ 6870 break; 6871 } 6872 break; 6873 case 1: 6874 switch (queue_id) { 6875 case 0: 6876 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 6877 break; 6878 case 1: 6879 /* XXX compute */ 6880 break; 6881 case 2: 6882 /* XXX compute */ 6883 break; 6884 } 6885 break; 6886 } 6887 break; 6888 case 230: /* thermal low to high */ 6889 DRM_DEBUG("IH: thermal low to high\n"); 6890 rdev->pm.dpm.thermal.high_to_low = false; 6891 queue_thermal = true; 6892 break; 6893 case 231: /* thermal high to low */ 6894 DRM_DEBUG("IH: thermal high to low\n"); 6895 rdev->pm.dpm.thermal.high_to_low = true; 6896 queue_thermal = true; 6897 break; 6898 case 233: /* GUI IDLE */ 6899 DRM_DEBUG("IH: GUI idle\n"); 6900 break; 6901 case 241: /* SDMA Privileged inst */ 6902 case 247: /* SDMA Privileged inst */ 6903 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 6904 /* XXX check the bitfield order! */ 6905 me_id = (ring_id & 0x3) >> 0; 6906 queue_id = (ring_id & 0xc) >> 2; 6907 switch (me_id) { 6908 case 0: 6909 switch (queue_id) { 6910 case 0: 6911 queue_reset = true; 6912 break; 6913 case 1: 6914 /* XXX compute */ 6915 queue_reset = true; 6916 break; 6917 case 2: 6918 /* XXX compute */ 6919 queue_reset = true; 6920 break; 6921 } 6922 break; 6923 case 1: 6924 switch (queue_id) { 6925 case 0: 6926 queue_reset = true; 6927 break; 6928 case 1: 6929 /* XXX compute */ 6930 queue_reset = true; 6931 break; 6932 case 2: 6933 /* XXX compute */ 6934 queue_reset = true; 6935 break; 6936 } 6937 break; 6938 } 6939 break; 6940 default: 6941 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6942 break; 6943 } 6944 6945 /* wptr/rptr are in bytes! */ 6946 rptr += 16; 6947 rptr &= rdev->ih.ptr_mask; 6948 } 6949 if (queue_hotplug) 6950 schedule_work(&rdev->hotplug_work); 6951 if (queue_reset) 6952 schedule_work(&rdev->reset_work); 6953 if (queue_thermal) 6954 schedule_work(&rdev->pm.dpm.thermal.work); 6955 rdev->ih.rptr = rptr; 6956 WREG32(IH_RB_RPTR, rdev->ih.rptr); 6957 atomic_set(&rdev->ih.lock, 0); 6958 6959 /* make sure wptr hasn't changed while processing */ 6960 wptr = cik_get_ih_wptr(rdev); 6961 if (wptr != rptr) 6962 goto restart_ih; 6963 6964 return IRQ_HANDLED; 6965 } 6966 6967 /* 6968 * startup/shutdown callbacks 6969 */ 6970 /** 6971 * cik_startup - program the asic to a functional state 6972 * 6973 * @rdev: radeon_device pointer 6974 * 6975 * Programs the asic to a functional state (CIK). 6976 * Called by cik_init() and cik_resume(). 6977 * Returns 0 for success, error for failure. 6978 */ 6979 static int cik_startup(struct radeon_device *rdev) 6980 { 6981 struct radeon_ring *ring; 6982 int r; 6983 6984 /* enable pcie gen2/3 link */ 6985 cik_pcie_gen3_enable(rdev); 6986 /* enable aspm */ 6987 cik_program_aspm(rdev); 6988 6989 /* scratch needs to be initialized before MC */ 6990 r = r600_vram_scratch_init(rdev); 6991 if (r) 6992 return r; 6993 6994 cik_mc_program(rdev); 6995 6996 if (rdev->flags & RADEON_IS_IGP) { 6997 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 6998 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 6999 r = cik_init_microcode(rdev); 7000 if (r) { 7001 DRM_ERROR("Failed to load firmware!\n"); 7002 return r; 7003 } 7004 } 7005 } else { 7006 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 7007 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 7008 !rdev->mc_fw) { 7009 r = cik_init_microcode(rdev); 7010 if (r) { 7011 DRM_ERROR("Failed to load firmware!\n"); 7012 return r; 7013 } 7014 } 7015 7016 r = ci_mc_load_microcode(rdev); 7017 if (r) { 7018 DRM_ERROR("Failed to load MC firmware!\n"); 7019 return r; 7020 } 7021 } 7022 7023 r = cik_pcie_gart_enable(rdev); 7024 if (r) 7025 return r; 7026 cik_gpu_init(rdev); 7027 7028 /* allocate rlc buffers */ 7029 if (rdev->flags & RADEON_IS_IGP) { 7030 if (rdev->family == CHIP_KAVERI) { 7031 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; 7032 rdev->rlc.reg_list_size = 7033 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); 7034 } else { 7035 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; 7036 rdev->rlc.reg_list_size = 7037 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); 7038 } 7039 } 7040 rdev->rlc.cs_data = ci_cs_data; 7041 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; 7042 r = sumo_rlc_init(rdev); 7043 if (r) { 7044 DRM_ERROR("Failed to init rlc BOs!\n"); 7045 return r; 7046 } 7047 7048 /* allocate wb buffer */ 7049 r = radeon_wb_init(rdev); 7050 if (r) 7051 return r; 7052 7053 /* allocate mec buffers */ 7054 r = cik_mec_init(rdev); 7055 if (r) { 7056 DRM_ERROR("Failed to init MEC BOs!\n"); 7057 return r; 7058 } 7059 7060 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 7061 if (r) { 7062 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7063 return r; 7064 } 7065 7066 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 7067 if (r) { 7068 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7069 return r; 7070 } 7071 7072 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 7073 if (r) { 7074 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7075 return r; 7076 } 7077 7078 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 7079 if (r) { 7080 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7081 return r; 7082 } 7083 7084 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 7085 if (r) { 7086 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7087 return r; 7088 } 7089 7090 r = radeon_uvd_resume(rdev); 7091 if (!r) { 7092 r = uvd_v4_2_resume(rdev); 7093 if (!r) { 7094 r = radeon_fence_driver_start_ring(rdev, 7095 R600_RING_TYPE_UVD_INDEX); 7096 if (r) 7097 dev_err(rdev->dev, "UVD fences init error (%d).\n", r); 7098 } 7099 } 7100 if (r) 7101 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 7102 7103 /* Enable IRQ */ 7104 if (!rdev->irq.installed) { 7105 r = radeon_irq_kms_init(rdev); 7106 if (r) 7107 return r; 7108 } 7109 7110 r = cik_irq_init(rdev); 7111 if (r) { 7112 DRM_ERROR("radeon: IH init failed (%d).\n", r); 7113 radeon_irq_kms_fini(rdev); 7114 return r; 7115 } 7116 cik_irq_set(rdev); 7117 7118 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7119 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 7120 CP_RB0_RPTR, CP_RB0_WPTR, 7121 RADEON_CP_PACKET2); 7122 if (r) 7123 return r; 7124 7125 /* set up the compute queues */ 7126 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7127 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7128 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 7129 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7130 PACKET3(PACKET3_NOP, 0x3FFF)); 7131 if (r) 7132 return r; 7133 ring->me = 1; /* first MEC */ 7134 ring->pipe = 0; /* first pipe */ 7135 ring->queue = 0; /* first queue */ 7136 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 7137 7138 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7139 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7140 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 7141 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7142 PACKET3(PACKET3_NOP, 0x3FFF)); 7143 if (r) 7144 return r; 7145 /* dGPU only have 1 MEC */ 7146 ring->me = 1; /* first MEC */ 7147 ring->pipe = 0; /* first pipe */ 7148 ring->queue = 1; /* second queue */ 7149 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 7150 7151 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7152 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 7153 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, 7154 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, 7155 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7156 if (r) 7157 return r; 7158 7159 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7160 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 7161 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, 7162 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, 7163 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7164 if (r) 7165 return r; 7166 7167 r = cik_cp_resume(rdev); 7168 if (r) 7169 return r; 7170 7171 r = cik_sdma_resume(rdev); 7172 if (r) 7173 return r; 7174 7175 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7176 if (ring->ring_size) { 7177 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 7178 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 7179 RADEON_CP_PACKET2); 7180 if (!r) 7181 r = uvd_v1_0_init(rdev); 7182 if (r) 7183 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 7184 } 7185 7186 r = radeon_ib_pool_init(rdev); 7187 if (r) { 7188 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 7189 return r; 7190 } 7191 7192 r = radeon_vm_manager_init(rdev); 7193 if (r) { 7194 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 7195 return r; 7196 } 7197 7198 r = dce6_audio_init(rdev); 7199 if (r) 7200 return r; 7201 7202 return 0; 7203 } 7204 7205 /** 7206 * cik_resume - resume the asic to a functional state 7207 * 7208 * @rdev: radeon_device pointer 7209 * 7210 * Programs the asic to a functional state (CIK). 7211 * Called at resume. 7212 * Returns 0 for success, error for failure. 7213 */ 7214 int cik_resume(struct radeon_device *rdev) 7215 { 7216 int r; 7217 7218 /* post card */ 7219 atom_asic_init(rdev->mode_info.atom_context); 7220 7221 /* init golden registers */ 7222 cik_init_golden_registers(rdev); 7223 7224 rdev->accel_working = true; 7225 r = cik_startup(rdev); 7226 if (r) { 7227 DRM_ERROR("cik startup failed on resume\n"); 7228 rdev->accel_working = false; 7229 return r; 7230 } 7231 7232 return r; 7233 7234 } 7235 7236 /** 7237 * cik_suspend - suspend the asic 7238 * 7239 * @rdev: radeon_device pointer 7240 * 7241 * Bring the chip into a state suitable for suspend (CIK). 7242 * Called at suspend. 7243 * Returns 0 for success. 7244 */ 7245 int cik_suspend(struct radeon_device *rdev) 7246 { 7247 dce6_audio_fini(rdev); 7248 radeon_vm_manager_fini(rdev); 7249 cik_cp_enable(rdev, false); 7250 cik_sdma_enable(rdev, false); 7251 uvd_v1_0_fini(rdev); 7252 radeon_uvd_suspend(rdev); 7253 cik_fini_pg(rdev); 7254 cik_fini_cg(rdev); 7255 cik_irq_suspend(rdev); 7256 radeon_wb_disable(rdev); 7257 cik_pcie_gart_disable(rdev); 7258 return 0; 7259 } 7260 7261 /* Plan is to move initialization in that function and use 7262 * helper function so that radeon_device_init pretty much 7263 * do nothing more than calling asic specific function. This 7264 * should also allow to remove a bunch of callback function 7265 * like vram_info. 7266 */ 7267 /** 7268 * cik_init - asic specific driver and hw init 7269 * 7270 * @rdev: radeon_device pointer 7271 * 7272 * Setup asic specific driver variables and program the hw 7273 * to a functional state (CIK). 7274 * Called at driver startup. 7275 * Returns 0 for success, errors for failure. 7276 */ 7277 int cik_init(struct radeon_device *rdev) 7278 { 7279 struct radeon_ring *ring; 7280 int r; 7281 7282 /* Read BIOS */ 7283 if (!radeon_get_bios(rdev)) { 7284 if (ASIC_IS_AVIVO(rdev)) 7285 return -EINVAL; 7286 } 7287 /* Must be an ATOMBIOS */ 7288 if (!rdev->is_atom_bios) { 7289 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 7290 return -EINVAL; 7291 } 7292 r = radeon_atombios_init(rdev); 7293 if (r) 7294 return r; 7295 7296 /* Post card if necessary */ 7297 if (!radeon_card_posted(rdev)) { 7298 if (!rdev->bios) { 7299 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 7300 return -EINVAL; 7301 } 7302 DRM_INFO("GPU not posted. posting now...\n"); 7303 atom_asic_init(rdev->mode_info.atom_context); 7304 } 7305 /* init golden registers */ 7306 cik_init_golden_registers(rdev); 7307 /* Initialize scratch registers */ 7308 cik_scratch_init(rdev); 7309 /* Initialize surface registers */ 7310 radeon_surface_init(rdev); 7311 /* Initialize clocks */ 7312 radeon_get_clock_info(rdev->ddev); 7313 7314 /* Fence driver */ 7315 r = radeon_fence_driver_init(rdev); 7316 if (r) 7317 return r; 7318 7319 /* initialize memory controller */ 7320 r = cik_mc_init(rdev); 7321 if (r) 7322 return r; 7323 /* Memory manager */ 7324 r = radeon_bo_init(rdev); 7325 if (r) 7326 return r; 7327 7328 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7329 ring->ring_obj = NULL; 7330 r600_ring_init(rdev, ring, 1024 * 1024); 7331 7332 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7333 ring->ring_obj = NULL; 7334 r600_ring_init(rdev, ring, 1024 * 1024); 7335 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7336 if (r) 7337 return r; 7338 7339 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7340 ring->ring_obj = NULL; 7341 r600_ring_init(rdev, ring, 1024 * 1024); 7342 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7343 if (r) 7344 return r; 7345 7346 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7347 ring->ring_obj = NULL; 7348 r600_ring_init(rdev, ring, 256 * 1024); 7349 7350 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7351 ring->ring_obj = NULL; 7352 r600_ring_init(rdev, ring, 256 * 1024); 7353 7354 r = radeon_uvd_init(rdev); 7355 if (!r) { 7356 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7357 ring->ring_obj = NULL; 7358 r600_ring_init(rdev, ring, 4096); 7359 } 7360 7361 rdev->ih.ring_obj = NULL; 7362 r600_ih_ring_init(rdev, 64 * 1024); 7363 7364 r = r600_pcie_gart_init(rdev); 7365 if (r) 7366 return r; 7367 7368 rdev->accel_working = true; 7369 r = cik_startup(rdev); 7370 if (r) { 7371 dev_err(rdev->dev, "disabling GPU acceleration\n"); 7372 cik_cp_fini(rdev); 7373 cik_sdma_fini(rdev); 7374 cik_irq_fini(rdev); 7375 sumo_rlc_fini(rdev); 7376 cik_mec_fini(rdev); 7377 radeon_wb_fini(rdev); 7378 radeon_ib_pool_fini(rdev); 7379 radeon_vm_manager_fini(rdev); 7380 radeon_irq_kms_fini(rdev); 7381 cik_pcie_gart_fini(rdev); 7382 rdev->accel_working = false; 7383 } 7384 7385 /* Don't start up if the MC ucode is missing. 7386 * The default clocks and voltages before the MC ucode 7387 * is loaded are not suffient for advanced operations. 7388 */ 7389 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 7390 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 7391 return -EINVAL; 7392 } 7393 7394 return 0; 7395 } 7396 7397 /** 7398 * cik_fini - asic specific driver and hw fini 7399 * 7400 * @rdev: radeon_device pointer 7401 * 7402 * Tear down the asic specific driver variables and program the hw 7403 * to an idle state (CIK). 7404 * Called at driver unload. 7405 */ 7406 void cik_fini(struct radeon_device *rdev) 7407 { 7408 cik_cp_fini(rdev); 7409 cik_sdma_fini(rdev); 7410 cik_fini_pg(rdev); 7411 cik_fini_cg(rdev); 7412 cik_irq_fini(rdev); 7413 sumo_rlc_fini(rdev); 7414 cik_mec_fini(rdev); 7415 radeon_wb_fini(rdev); 7416 radeon_vm_manager_fini(rdev); 7417 radeon_ib_pool_fini(rdev); 7418 radeon_irq_kms_fini(rdev); 7419 uvd_v1_0_fini(rdev); 7420 radeon_uvd_fini(rdev); 7421 cik_pcie_gart_fini(rdev); 7422 r600_vram_scratch_fini(rdev); 7423 radeon_gem_fini(rdev); 7424 radeon_fence_driver_fini(rdev); 7425 radeon_bo_fini(rdev); 7426 radeon_atombios_fini(rdev); 7427 kfree(rdev->bios); 7428 rdev->bios = NULL; 7429 } 7430 7431 /* display watermark setup */ 7432 /** 7433 * dce8_line_buffer_adjust - Set up the line buffer 7434 * 7435 * @rdev: radeon_device pointer 7436 * @radeon_crtc: the selected display controller 7437 * @mode: the current display mode on the selected display 7438 * controller 7439 * 7440 * Setup up the line buffer allocation for 7441 * the selected display controller (CIK). 7442 * Returns the line buffer size in pixels. 7443 */ 7444 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 7445 struct radeon_crtc *radeon_crtc, 7446 struct drm_display_mode *mode) 7447 { 7448 u32 tmp, buffer_alloc, i; 7449 u32 pipe_offset = radeon_crtc->crtc_id * 0x20; 7450 /* 7451 * Line Buffer Setup 7452 * There are 6 line buffers, one for each display controllers. 7453 * There are 3 partitions per LB. Select the number of partitions 7454 * to enable based on the display width. For display widths larger 7455 * than 4096, you need use to use 2 display controllers and combine 7456 * them using the stereo blender. 7457 */ 7458 if (radeon_crtc->base.enabled && mode) { 7459 if (mode->crtc_hdisplay < 1920) { 7460 tmp = 1; 7461 buffer_alloc = 2; 7462 } else if (mode->crtc_hdisplay < 2560) { 7463 tmp = 2; 7464 buffer_alloc = 2; 7465 } else if (mode->crtc_hdisplay < 4096) { 7466 tmp = 0; 7467 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7468 } else { 7469 DRM_DEBUG_KMS("Mode too big for LB!\n"); 7470 tmp = 0; 7471 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7472 } 7473 } else { 7474 tmp = 1; 7475 buffer_alloc = 0; 7476 } 7477 7478 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 7479 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 7480 7481 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, 7482 DMIF_BUFFERS_ALLOCATED(buffer_alloc)); 7483 for (i = 0; i < rdev->usec_timeout; i++) { 7484 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & 7485 DMIF_BUFFERS_ALLOCATED_COMPLETED) 7486 break; 7487 udelay(1); 7488 } 7489 7490 if (radeon_crtc->base.enabled && mode) { 7491 switch (tmp) { 7492 case 0: 7493 default: 7494 return 4096 * 2; 7495 case 1: 7496 return 1920 * 2; 7497 case 2: 7498 return 2560 * 2; 7499 } 7500 } 7501 7502 /* controller not enabled, so no lb used */ 7503 return 0; 7504 } 7505 7506 /** 7507 * cik_get_number_of_dram_channels - get the number of dram channels 7508 * 7509 * @rdev: radeon_device pointer 7510 * 7511 * Look up the number of video ram channels (CIK). 7512 * Used for display watermark bandwidth calculations 7513 * Returns the number of dram channels 7514 */ 7515 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 7516 { 7517 u32 tmp = RREG32(MC_SHARED_CHMAP); 7518 7519 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 7520 case 0: 7521 default: 7522 return 1; 7523 case 1: 7524 return 2; 7525 case 2: 7526 return 4; 7527 case 3: 7528 return 8; 7529 case 4: 7530 return 3; 7531 case 5: 7532 return 6; 7533 case 6: 7534 return 10; 7535 case 7: 7536 return 12; 7537 case 8: 7538 return 16; 7539 } 7540 } 7541 7542 struct dce8_wm_params { 7543 u32 dram_channels; /* number of dram channels */ 7544 u32 yclk; /* bandwidth per dram data pin in kHz */ 7545 u32 sclk; /* engine clock in kHz */ 7546 u32 disp_clk; /* display clock in kHz */ 7547 u32 src_width; /* viewport width */ 7548 u32 active_time; /* active display time in ns */ 7549 u32 blank_time; /* blank time in ns */ 7550 bool interlaced; /* mode is interlaced */ 7551 fixed20_12 vsc; /* vertical scale ratio */ 7552 u32 num_heads; /* number of active crtcs */ 7553 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 7554 u32 lb_size; /* line buffer allocated to pipe */ 7555 u32 vtaps; /* vertical scaler taps */ 7556 }; 7557 7558 /** 7559 * dce8_dram_bandwidth - get the dram bandwidth 7560 * 7561 * @wm: watermark calculation data 7562 * 7563 * Calculate the raw dram bandwidth (CIK). 7564 * Used for display watermark bandwidth calculations 7565 * Returns the dram bandwidth in MBytes/s 7566 */ 7567 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 7568 { 7569 /* Calculate raw DRAM Bandwidth */ 7570 fixed20_12 dram_efficiency; /* 0.7 */ 7571 fixed20_12 yclk, dram_channels, bandwidth; 7572 fixed20_12 a; 7573 7574 a.full = dfixed_const(1000); 7575 yclk.full = dfixed_const(wm->yclk); 7576 yclk.full = dfixed_div(yclk, a); 7577 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7578 a.full = dfixed_const(10); 7579 dram_efficiency.full = dfixed_const(7); 7580 dram_efficiency.full = dfixed_div(dram_efficiency, a); 7581 bandwidth.full = dfixed_mul(dram_channels, yclk); 7582 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 7583 7584 return dfixed_trunc(bandwidth); 7585 } 7586 7587 /** 7588 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 7589 * 7590 * @wm: watermark calculation data 7591 * 7592 * Calculate the dram bandwidth used for display (CIK). 7593 * Used for display watermark bandwidth calculations 7594 * Returns the dram bandwidth for display in MBytes/s 7595 */ 7596 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7597 { 7598 /* Calculate DRAM Bandwidth and the part allocated to display. */ 7599 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 7600 fixed20_12 yclk, dram_channels, bandwidth; 7601 fixed20_12 a; 7602 7603 a.full = dfixed_const(1000); 7604 yclk.full = dfixed_const(wm->yclk); 7605 yclk.full = dfixed_div(yclk, a); 7606 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7607 a.full = dfixed_const(10); 7608 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 7609 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 7610 bandwidth.full = dfixed_mul(dram_channels, yclk); 7611 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 7612 7613 return dfixed_trunc(bandwidth); 7614 } 7615 7616 /** 7617 * dce8_data_return_bandwidth - get the data return bandwidth 7618 * 7619 * @wm: watermark calculation data 7620 * 7621 * Calculate the data return bandwidth used for display (CIK). 7622 * Used for display watermark bandwidth calculations 7623 * Returns the data return bandwidth in MBytes/s 7624 */ 7625 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 7626 { 7627 /* Calculate the display Data return Bandwidth */ 7628 fixed20_12 return_efficiency; /* 0.8 */ 7629 fixed20_12 sclk, bandwidth; 7630 fixed20_12 a; 7631 7632 a.full = dfixed_const(1000); 7633 sclk.full = dfixed_const(wm->sclk); 7634 sclk.full = dfixed_div(sclk, a); 7635 a.full = dfixed_const(10); 7636 return_efficiency.full = dfixed_const(8); 7637 return_efficiency.full = dfixed_div(return_efficiency, a); 7638 a.full = dfixed_const(32); 7639 bandwidth.full = dfixed_mul(a, sclk); 7640 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 7641 7642 return dfixed_trunc(bandwidth); 7643 } 7644 7645 /** 7646 * dce8_dmif_request_bandwidth - get the dmif bandwidth 7647 * 7648 * @wm: watermark calculation data 7649 * 7650 * Calculate the dmif bandwidth used for display (CIK). 7651 * Used for display watermark bandwidth calculations 7652 * Returns the dmif bandwidth in MBytes/s 7653 */ 7654 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 7655 { 7656 /* Calculate the DMIF Request Bandwidth */ 7657 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 7658 fixed20_12 disp_clk, bandwidth; 7659 fixed20_12 a, b; 7660 7661 a.full = dfixed_const(1000); 7662 disp_clk.full = dfixed_const(wm->disp_clk); 7663 disp_clk.full = dfixed_div(disp_clk, a); 7664 a.full = dfixed_const(32); 7665 b.full = dfixed_mul(a, disp_clk); 7666 7667 a.full = dfixed_const(10); 7668 disp_clk_request_efficiency.full = dfixed_const(8); 7669 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 7670 7671 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 7672 7673 return dfixed_trunc(bandwidth); 7674 } 7675 7676 /** 7677 * dce8_available_bandwidth - get the min available bandwidth 7678 * 7679 * @wm: watermark calculation data 7680 * 7681 * Calculate the min available bandwidth used for display (CIK). 7682 * Used for display watermark bandwidth calculations 7683 * Returns the min available bandwidth in MBytes/s 7684 */ 7685 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 7686 { 7687 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 7688 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 7689 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 7690 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 7691 7692 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 7693 } 7694 7695 /** 7696 * dce8_average_bandwidth - get the average available bandwidth 7697 * 7698 * @wm: watermark calculation data 7699 * 7700 * Calculate the average available bandwidth used for display (CIK). 7701 * Used for display watermark bandwidth calculations 7702 * Returns the average available bandwidth in MBytes/s 7703 */ 7704 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 7705 { 7706 /* Calculate the display mode Average Bandwidth 7707 * DisplayMode should contain the source and destination dimensions, 7708 * timing, etc. 7709 */ 7710 fixed20_12 bpp; 7711 fixed20_12 line_time; 7712 fixed20_12 src_width; 7713 fixed20_12 bandwidth; 7714 fixed20_12 a; 7715 7716 a.full = dfixed_const(1000); 7717 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 7718 line_time.full = dfixed_div(line_time, a); 7719 bpp.full = dfixed_const(wm->bytes_per_pixel); 7720 src_width.full = dfixed_const(wm->src_width); 7721 bandwidth.full = dfixed_mul(src_width, bpp); 7722 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 7723 bandwidth.full = dfixed_div(bandwidth, line_time); 7724 7725 return dfixed_trunc(bandwidth); 7726 } 7727 7728 /** 7729 * dce8_latency_watermark - get the latency watermark 7730 * 7731 * @wm: watermark calculation data 7732 * 7733 * Calculate the latency watermark (CIK). 7734 * Used for display watermark bandwidth calculations 7735 * Returns the latency watermark in ns 7736 */ 7737 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 7738 { 7739 /* First calculate the latency in ns */ 7740 u32 mc_latency = 2000; /* 2000 ns. */ 7741 u32 available_bandwidth = dce8_available_bandwidth(wm); 7742 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 7743 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 7744 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 7745 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 7746 (wm->num_heads * cursor_line_pair_return_time); 7747 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 7748 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 7749 u32 tmp, dmif_size = 12288; 7750 fixed20_12 a, b, c; 7751 7752 if (wm->num_heads == 0) 7753 return 0; 7754 7755 a.full = dfixed_const(2); 7756 b.full = dfixed_const(1); 7757 if ((wm->vsc.full > a.full) || 7758 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 7759 (wm->vtaps >= 5) || 7760 ((wm->vsc.full >= a.full) && wm->interlaced)) 7761 max_src_lines_per_dst_line = 4; 7762 else 7763 max_src_lines_per_dst_line = 2; 7764 7765 a.full = dfixed_const(available_bandwidth); 7766 b.full = dfixed_const(wm->num_heads); 7767 a.full = dfixed_div(a, b); 7768 7769 b.full = dfixed_const(mc_latency + 512); 7770 c.full = dfixed_const(wm->disp_clk); 7771 b.full = dfixed_div(b, c); 7772 7773 c.full = dfixed_const(dmif_size); 7774 b.full = dfixed_div(c, b); 7775 7776 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 7777 7778 b.full = dfixed_const(1000); 7779 c.full = dfixed_const(wm->disp_clk); 7780 b.full = dfixed_div(c, b); 7781 c.full = dfixed_const(wm->bytes_per_pixel); 7782 b.full = dfixed_mul(b, c); 7783 7784 lb_fill_bw = min(tmp, dfixed_trunc(b)); 7785 7786 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 7787 b.full = dfixed_const(1000); 7788 c.full = dfixed_const(lb_fill_bw); 7789 b.full = dfixed_div(c, b); 7790 a.full = dfixed_div(a, b); 7791 line_fill_time = dfixed_trunc(a); 7792 7793 if (line_fill_time < wm->active_time) 7794 return latency; 7795 else 7796 return latency + (line_fill_time - wm->active_time); 7797 7798 } 7799 7800 /** 7801 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 7802 * average and available dram bandwidth 7803 * 7804 * @wm: watermark calculation data 7805 * 7806 * Check if the display average bandwidth fits in the display 7807 * dram bandwidth (CIK). 7808 * Used for display watermark bandwidth calculations 7809 * Returns true if the display fits, false if not. 7810 */ 7811 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7812 { 7813 if (dce8_average_bandwidth(wm) <= 7814 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 7815 return true; 7816 else 7817 return false; 7818 } 7819 7820 /** 7821 * dce8_average_bandwidth_vs_available_bandwidth - check 7822 * average and available bandwidth 7823 * 7824 * @wm: watermark calculation data 7825 * 7826 * Check if the display average bandwidth fits in the display 7827 * available bandwidth (CIK). 7828 * Used for display watermark bandwidth calculations 7829 * Returns true if the display fits, false if not. 7830 */ 7831 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 7832 { 7833 if (dce8_average_bandwidth(wm) <= 7834 (dce8_available_bandwidth(wm) / wm->num_heads)) 7835 return true; 7836 else 7837 return false; 7838 } 7839 7840 /** 7841 * dce8_check_latency_hiding - check latency hiding 7842 * 7843 * @wm: watermark calculation data 7844 * 7845 * Check latency hiding (CIK). 7846 * Used for display watermark bandwidth calculations 7847 * Returns true if the display fits, false if not. 7848 */ 7849 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 7850 { 7851 u32 lb_partitions = wm->lb_size / wm->src_width; 7852 u32 line_time = wm->active_time + wm->blank_time; 7853 u32 latency_tolerant_lines; 7854 u32 latency_hiding; 7855 fixed20_12 a; 7856 7857 a.full = dfixed_const(1); 7858 if (wm->vsc.full > a.full) 7859 latency_tolerant_lines = 1; 7860 else { 7861 if (lb_partitions <= (wm->vtaps + 1)) 7862 latency_tolerant_lines = 1; 7863 else 7864 latency_tolerant_lines = 2; 7865 } 7866 7867 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 7868 7869 if (dce8_latency_watermark(wm) <= latency_hiding) 7870 return true; 7871 else 7872 return false; 7873 } 7874 7875 /** 7876 * dce8_program_watermarks - program display watermarks 7877 * 7878 * @rdev: radeon_device pointer 7879 * @radeon_crtc: the selected display controller 7880 * @lb_size: line buffer size 7881 * @num_heads: number of display controllers in use 7882 * 7883 * Calculate and program the display watermarks for the 7884 * selected display controller (CIK). 7885 */ 7886 static void dce8_program_watermarks(struct radeon_device *rdev, 7887 struct radeon_crtc *radeon_crtc, 7888 u32 lb_size, u32 num_heads) 7889 { 7890 struct drm_display_mode *mode = &radeon_crtc->base.mode; 7891 struct dce8_wm_params wm_low, wm_high; 7892 u32 pixel_period; 7893 u32 line_time = 0; 7894 u32 latency_watermark_a = 0, latency_watermark_b = 0; 7895 u32 tmp, wm_mask; 7896 7897 if (radeon_crtc->base.enabled && num_heads && mode) { 7898 pixel_period = 1000000 / (u32)mode->clock; 7899 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 7900 7901 /* watermark for high clocks */ 7902 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7903 rdev->pm.dpm_enabled) { 7904 wm_high.yclk = 7905 radeon_dpm_get_mclk(rdev, false) * 10; 7906 wm_high.sclk = 7907 radeon_dpm_get_sclk(rdev, false) * 10; 7908 } else { 7909 wm_high.yclk = rdev->pm.current_mclk * 10; 7910 wm_high.sclk = rdev->pm.current_sclk * 10; 7911 } 7912 7913 wm_high.disp_clk = mode->clock; 7914 wm_high.src_width = mode->crtc_hdisplay; 7915 wm_high.active_time = mode->crtc_hdisplay * pixel_period; 7916 wm_high.blank_time = line_time - wm_high.active_time; 7917 wm_high.interlaced = false; 7918 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7919 wm_high.interlaced = true; 7920 wm_high.vsc = radeon_crtc->vsc; 7921 wm_high.vtaps = 1; 7922 if (radeon_crtc->rmx_type != RMX_OFF) 7923 wm_high.vtaps = 2; 7924 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7925 wm_high.lb_size = lb_size; 7926 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); 7927 wm_high.num_heads = num_heads; 7928 7929 /* set for high clocks */ 7930 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); 7931 7932 /* possibly force display priority to high */ 7933 /* should really do this at mode validation time... */ 7934 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || 7935 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || 7936 !dce8_check_latency_hiding(&wm_high) || 7937 (rdev->disp_priority == 2)) { 7938 DRM_DEBUG_KMS("force priority to high\n"); 7939 } 7940 7941 /* watermark for low clocks */ 7942 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7943 rdev->pm.dpm_enabled) { 7944 wm_low.yclk = 7945 radeon_dpm_get_mclk(rdev, true) * 10; 7946 wm_low.sclk = 7947 radeon_dpm_get_sclk(rdev, true) * 10; 7948 } else { 7949 wm_low.yclk = rdev->pm.current_mclk * 10; 7950 wm_low.sclk = rdev->pm.current_sclk * 10; 7951 } 7952 7953 wm_low.disp_clk = mode->clock; 7954 wm_low.src_width = mode->crtc_hdisplay; 7955 wm_low.active_time = mode->crtc_hdisplay * pixel_period; 7956 wm_low.blank_time = line_time - wm_low.active_time; 7957 wm_low.interlaced = false; 7958 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7959 wm_low.interlaced = true; 7960 wm_low.vsc = radeon_crtc->vsc; 7961 wm_low.vtaps = 1; 7962 if (radeon_crtc->rmx_type != RMX_OFF) 7963 wm_low.vtaps = 2; 7964 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7965 wm_low.lb_size = lb_size; 7966 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); 7967 wm_low.num_heads = num_heads; 7968 7969 /* set for low clocks */ 7970 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); 7971 7972 /* possibly force display priority to high */ 7973 /* should really do this at mode validation time... */ 7974 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || 7975 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || 7976 !dce8_check_latency_hiding(&wm_low) || 7977 (rdev->disp_priority == 2)) { 7978 DRM_DEBUG_KMS("force priority to high\n"); 7979 } 7980 } 7981 7982 /* select wm A */ 7983 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7984 tmp = wm_mask; 7985 tmp &= ~LATENCY_WATERMARK_MASK(3); 7986 tmp |= LATENCY_WATERMARK_MASK(1); 7987 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7988 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7989 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 7990 LATENCY_HIGH_WATERMARK(line_time))); 7991 /* select wm B */ 7992 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7993 tmp &= ~LATENCY_WATERMARK_MASK(3); 7994 tmp |= LATENCY_WATERMARK_MASK(2); 7995 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7996 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7997 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 7998 LATENCY_HIGH_WATERMARK(line_time))); 7999 /* restore original selection */ 8000 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 8001 8002 /* save values for DPM */ 8003 radeon_crtc->line_time = line_time; 8004 radeon_crtc->wm_high = latency_watermark_a; 8005 radeon_crtc->wm_low = latency_watermark_b; 8006 } 8007 8008 /** 8009 * dce8_bandwidth_update - program display watermarks 8010 * 8011 * @rdev: radeon_device pointer 8012 * 8013 * Calculate and program the display watermarks and line 8014 * buffer allocation (CIK). 8015 */ 8016 void dce8_bandwidth_update(struct radeon_device *rdev) 8017 { 8018 struct drm_display_mode *mode = NULL; 8019 u32 num_heads = 0, lb_size; 8020 int i; 8021 8022 radeon_update_display_priority(rdev); 8023 8024 for (i = 0; i < rdev->num_crtc; i++) { 8025 if (rdev->mode_info.crtcs[i]->base.enabled) 8026 num_heads++; 8027 } 8028 for (i = 0; i < rdev->num_crtc; i++) { 8029 mode = &rdev->mode_info.crtcs[i]->base.mode; 8030 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 8031 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 8032 } 8033 } 8034 8035 /** 8036 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 8037 * 8038 * @rdev: radeon_device pointer 8039 * 8040 * Fetches a GPU clock counter snapshot (SI). 8041 * Returns the 64 bit clock counter snapshot. 8042 */ 8043 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 8044 { 8045 uint64_t clock; 8046 8047 mutex_lock(&rdev->gpu_clock_mutex); 8048 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 8049 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 8050 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 8051 mutex_unlock(&rdev->gpu_clock_mutex); 8052 return clock; 8053 } 8054 8055 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 8056 u32 cntl_reg, u32 status_reg) 8057 { 8058 int r, i; 8059 struct atom_clock_dividers dividers; 8060 uint32_t tmp; 8061 8062 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 8063 clock, false, ÷rs); 8064 if (r) 8065 return r; 8066 8067 tmp = RREG32_SMC(cntl_reg); 8068 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 8069 tmp |= dividers.post_divider; 8070 WREG32_SMC(cntl_reg, tmp); 8071 8072 for (i = 0; i < 100; i++) { 8073 if (RREG32_SMC(status_reg) & DCLK_STATUS) 8074 break; 8075 mdelay(10); 8076 } 8077 if (i == 100) 8078 return -ETIMEDOUT; 8079 8080 return 0; 8081 } 8082 8083 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 8084 { 8085 int r = 0; 8086 8087 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 8088 if (r) 8089 return r; 8090 8091 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 8092 return r; 8093 } 8094 8095 static void cik_pcie_gen3_enable(struct radeon_device *rdev) 8096 { 8097 struct pci_dev *root = rdev->pdev->bus->self; 8098 int bridge_pos, gpu_pos; 8099 u32 speed_cntl, mask, current_data_rate; 8100 int ret, i; 8101 u16 tmp16; 8102 8103 if (radeon_pcie_gen2 == 0) 8104 return; 8105 8106 if (rdev->flags & RADEON_IS_IGP) 8107 return; 8108 8109 if (!(rdev->flags & RADEON_IS_PCIE)) 8110 return; 8111 8112 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); 8113 if (ret != 0) 8114 return; 8115 8116 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) 8117 return; 8118 8119 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8120 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> 8121 LC_CURRENT_DATA_RATE_SHIFT; 8122 if (mask & DRM_PCIE_SPEED_80) { 8123 if (current_data_rate == 2) { 8124 DRM_INFO("PCIE gen 3 link speeds already enabled\n"); 8125 return; 8126 } 8127 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); 8128 } else if (mask & DRM_PCIE_SPEED_50) { 8129 if (current_data_rate == 1) { 8130 DRM_INFO("PCIE gen 2 link speeds already enabled\n"); 8131 return; 8132 } 8133 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); 8134 } 8135 8136 bridge_pos = pci_pcie_cap(root); 8137 if (!bridge_pos) 8138 return; 8139 8140 gpu_pos = pci_pcie_cap(rdev->pdev); 8141 if (!gpu_pos) 8142 return; 8143 8144 if (mask & DRM_PCIE_SPEED_80) { 8145 /* re-try equalization if gen3 is not already enabled */ 8146 if (current_data_rate != 2) { 8147 u16 bridge_cfg, gpu_cfg; 8148 u16 bridge_cfg2, gpu_cfg2; 8149 u32 max_lw, current_lw, tmp; 8150 8151 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8152 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8153 8154 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; 8155 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8156 8157 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; 8158 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8159 8160 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8161 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; 8162 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; 8163 8164 if (current_lw < max_lw) { 8165 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8166 if (tmp & LC_RENEGOTIATION_SUPPORT) { 8167 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); 8168 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); 8169 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; 8170 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); 8171 } 8172 } 8173 8174 for (i = 0; i < 10; i++) { 8175 /* check status */ 8176 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); 8177 if (tmp16 & PCI_EXP_DEVSTA_TRPND) 8178 break; 8179 8180 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8181 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8182 8183 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); 8184 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); 8185 8186 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8187 tmp |= LC_SET_QUIESCE; 8188 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8189 8190 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8191 tmp |= LC_REDO_EQ; 8192 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8193 8194 mdelay(100); 8195 8196 /* linkctl */ 8197 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); 8198 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8199 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); 8200 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8201 8202 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); 8203 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8204 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); 8205 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8206 8207 /* linkctl2 */ 8208 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); 8209 tmp16 &= ~((1 << 4) | (7 << 9)); 8210 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); 8211 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); 8212 8213 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8214 tmp16 &= ~((1 << 4) | (7 << 9)); 8215 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); 8216 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8217 8218 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8219 tmp &= ~LC_SET_QUIESCE; 8220 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8221 } 8222 } 8223 } 8224 8225 /* set the link speed */ 8226 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; 8227 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; 8228 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8229 8230 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8231 tmp16 &= ~0xf; 8232 if (mask & DRM_PCIE_SPEED_80) 8233 tmp16 |= 3; /* gen3 */ 8234 else if (mask & DRM_PCIE_SPEED_50) 8235 tmp16 |= 2; /* gen2 */ 8236 else 8237 tmp16 |= 1; /* gen1 */ 8238 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8239 8240 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8241 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; 8242 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8243 8244 for (i = 0; i < rdev->usec_timeout; i++) { 8245 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8246 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) 8247 break; 8248 udelay(1); 8249 } 8250 } 8251 8252 static void cik_program_aspm(struct radeon_device *rdev) 8253 { 8254 u32 data, orig; 8255 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; 8256 bool disable_clkreq = false; 8257 8258 if (radeon_aspm == 0) 8259 return; 8260 8261 /* XXX double check IGPs */ 8262 if (rdev->flags & RADEON_IS_IGP) 8263 return; 8264 8265 if (!(rdev->flags & RADEON_IS_PCIE)) 8266 return; 8267 8268 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8269 data &= ~LC_XMIT_N_FTS_MASK; 8270 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; 8271 if (orig != data) 8272 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); 8273 8274 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); 8275 data |= LC_GO_TO_RECOVERY; 8276 if (orig != data) 8277 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); 8278 8279 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); 8280 data |= P_IGNORE_EDB_ERR; 8281 if (orig != data) 8282 WREG32_PCIE_PORT(PCIE_P_CNTL, data); 8283 8284 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8285 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); 8286 data |= LC_PMI_TO_L1_DIS; 8287 if (!disable_l0s) 8288 data |= LC_L0S_INACTIVITY(7); 8289 8290 if (!disable_l1) { 8291 data |= LC_L1_INACTIVITY(7); 8292 data &= ~LC_PMI_TO_L1_DIS; 8293 if (orig != data) 8294 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8295 8296 if (!disable_plloff_in_l1) { 8297 bool clk_req_support; 8298 8299 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); 8300 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8301 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8302 if (orig != data) 8303 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); 8304 8305 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); 8306 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8307 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8308 if (orig != data) 8309 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); 8310 8311 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); 8312 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8313 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8314 if (orig != data) 8315 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); 8316 8317 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); 8318 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8319 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8320 if (orig != data) 8321 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); 8322 8323 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8324 data &= ~LC_DYN_LANES_PWR_STATE_MASK; 8325 data |= LC_DYN_LANES_PWR_STATE(3); 8326 if (orig != data) 8327 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); 8328 8329 if (!disable_clkreq) { 8330 struct pci_dev *root = rdev->pdev->bus->self; 8331 u32 lnkcap; 8332 8333 clk_req_support = false; 8334 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); 8335 if (lnkcap & PCI_EXP_LNKCAP_CLKPM) 8336 clk_req_support = true; 8337 } else { 8338 clk_req_support = false; 8339 } 8340 8341 if (clk_req_support) { 8342 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); 8343 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; 8344 if (orig != data) 8345 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); 8346 8347 orig = data = RREG32_SMC(THM_CLK_CNTL); 8348 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); 8349 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); 8350 if (orig != data) 8351 WREG32_SMC(THM_CLK_CNTL, data); 8352 8353 orig = data = RREG32_SMC(MISC_CLK_CTRL); 8354 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); 8355 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); 8356 if (orig != data) 8357 WREG32_SMC(MISC_CLK_CTRL, data); 8358 8359 orig = data = RREG32_SMC(CG_CLKPIN_CNTL); 8360 data &= ~BCLK_AS_XCLK; 8361 if (orig != data) 8362 WREG32_SMC(CG_CLKPIN_CNTL, data); 8363 8364 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); 8365 data &= ~FORCE_BIF_REFCLK_EN; 8366 if (orig != data) 8367 WREG32_SMC(CG_CLKPIN_CNTL_2, data); 8368 8369 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); 8370 data &= ~MPLL_CLKOUT_SEL_MASK; 8371 data |= MPLL_CLKOUT_SEL(4); 8372 if (orig != data) 8373 WREG32_SMC(MPLL_BYPASSCLK_SEL, data); 8374 } 8375 } 8376 } else { 8377 if (orig != data) 8378 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8379 } 8380 8381 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 8382 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; 8383 if (orig != data) 8384 WREG32_PCIE_PORT(PCIE_CNTL2, data); 8385 8386 if (!disable_l0s) { 8387 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8388 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { 8389 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8390 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { 8391 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8392 data &= ~LC_L0S_INACTIVITY_MASK; 8393 if (orig != data) 8394 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8395 } 8396 } 8397 } 8398 } 8399