1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/slab.h> 26 #include <linux/module.h> 27 #include "drmP.h" 28 #include "radeon.h" 29 #include "radeon_asic.h" 30 #include "cikd.h" 31 #include "atom.h" 32 #include "cik_blit_shaders.h" 33 #include "radeon_ucode.h" 34 #include "clearstate_ci.h" 35 36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); 44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 45 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 51 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 52 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 53 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 56 57 extern int r600_ih_ring_alloc(struct radeon_device *rdev); 58 extern void r600_ih_ring_fini(struct radeon_device *rdev); 59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save); 60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save); 61 extern bool evergreen_is_display_hung(struct radeon_device *rdev); 62 extern void sumo_rlc_fini(struct radeon_device *rdev); 63 extern int sumo_rlc_init(struct radeon_device *rdev); 64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 65 extern void si_rlc_reset(struct radeon_device *rdev); 66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev); 67 extern int cik_sdma_resume(struct radeon_device *rdev); 68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); 69 extern void cik_sdma_fini(struct radeon_device *rdev); 70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev, 71 struct radeon_ib *ib, 72 uint64_t pe, 73 uint64_t addr, unsigned count, 74 uint32_t incr, uint32_t flags); 75 static void cik_rlc_stop(struct radeon_device *rdev); 76 static void cik_pcie_gen3_enable(struct radeon_device *rdev); 77 static void cik_program_aspm(struct radeon_device *rdev); 78 static void cik_init_pg(struct radeon_device *rdev); 79 static void cik_init_cg(struct radeon_device *rdev); 80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 81 bool enable); 82 83 /* get temperature in millidegrees */ 84 int ci_get_temp(struct radeon_device *rdev) 85 { 86 u32 temp; 87 int actual_temp = 0; 88 89 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> 90 CTF_TEMP_SHIFT; 91 92 if (temp & 0x200) 93 actual_temp = 255; 94 else 95 actual_temp = temp & 0x1ff; 96 97 actual_temp = actual_temp * 1000; 98 99 return actual_temp; 100 } 101 102 /* get temperature in millidegrees */ 103 int kv_get_temp(struct radeon_device *rdev) 104 { 105 u32 temp; 106 int actual_temp = 0; 107 108 temp = RREG32_SMC(0xC0300E0C); 109 110 if (temp) 111 actual_temp = (temp / 8) - 49; 112 else 113 actual_temp = 0; 114 115 actual_temp = actual_temp * 1000; 116 117 return actual_temp; 118 } 119 120 /* 121 * Indirect registers accessor 122 */ 123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 124 { 125 unsigned long flags; 126 u32 r; 127 128 spin_lock_irqsave(&rdev->pciep_idx_lock, flags); 129 WREG32(PCIE_INDEX, reg); 130 (void)RREG32(PCIE_INDEX); 131 r = RREG32(PCIE_DATA); 132 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); 133 return r; 134 } 135 136 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 137 { 138 unsigned long flags; 139 140 spin_lock_irqsave(&rdev->pciep_idx_lock, flags); 141 WREG32(PCIE_INDEX, reg); 142 (void)RREG32(PCIE_INDEX); 143 WREG32(PCIE_DATA, v); 144 (void)RREG32(PCIE_DATA); 145 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); 146 } 147 148 static const u32 spectre_rlc_save_restore_register_list[] = 149 { 150 (0x0e00 << 16) | (0xc12c >> 2), 151 0x00000000, 152 (0x0e00 << 16) | (0xc140 >> 2), 153 0x00000000, 154 (0x0e00 << 16) | (0xc150 >> 2), 155 0x00000000, 156 (0x0e00 << 16) | (0xc15c >> 2), 157 0x00000000, 158 (0x0e00 << 16) | (0xc168 >> 2), 159 0x00000000, 160 (0x0e00 << 16) | (0xc170 >> 2), 161 0x00000000, 162 (0x0e00 << 16) | (0xc178 >> 2), 163 0x00000000, 164 (0x0e00 << 16) | (0xc204 >> 2), 165 0x00000000, 166 (0x0e00 << 16) | (0xc2b4 >> 2), 167 0x00000000, 168 (0x0e00 << 16) | (0xc2b8 >> 2), 169 0x00000000, 170 (0x0e00 << 16) | (0xc2bc >> 2), 171 0x00000000, 172 (0x0e00 << 16) | (0xc2c0 >> 2), 173 0x00000000, 174 (0x0e00 << 16) | (0x8228 >> 2), 175 0x00000000, 176 (0x0e00 << 16) | (0x829c >> 2), 177 0x00000000, 178 (0x0e00 << 16) | (0x869c >> 2), 179 0x00000000, 180 (0x0600 << 16) | (0x98f4 >> 2), 181 0x00000000, 182 (0x0e00 << 16) | (0x98f8 >> 2), 183 0x00000000, 184 (0x0e00 << 16) | (0x9900 >> 2), 185 0x00000000, 186 (0x0e00 << 16) | (0xc260 >> 2), 187 0x00000000, 188 (0x0e00 << 16) | (0x90e8 >> 2), 189 0x00000000, 190 (0x0e00 << 16) | (0x3c000 >> 2), 191 0x00000000, 192 (0x0e00 << 16) | (0x3c00c >> 2), 193 0x00000000, 194 (0x0e00 << 16) | (0x8c1c >> 2), 195 0x00000000, 196 (0x0e00 << 16) | (0x9700 >> 2), 197 0x00000000, 198 (0x0e00 << 16) | (0xcd20 >> 2), 199 0x00000000, 200 (0x4e00 << 16) | (0xcd20 >> 2), 201 0x00000000, 202 (0x5e00 << 16) | (0xcd20 >> 2), 203 0x00000000, 204 (0x6e00 << 16) | (0xcd20 >> 2), 205 0x00000000, 206 (0x7e00 << 16) | (0xcd20 >> 2), 207 0x00000000, 208 (0x8e00 << 16) | (0xcd20 >> 2), 209 0x00000000, 210 (0x9e00 << 16) | (0xcd20 >> 2), 211 0x00000000, 212 (0xae00 << 16) | (0xcd20 >> 2), 213 0x00000000, 214 (0xbe00 << 16) | (0xcd20 >> 2), 215 0x00000000, 216 (0x0e00 << 16) | (0x89bc >> 2), 217 0x00000000, 218 (0x0e00 << 16) | (0x8900 >> 2), 219 0x00000000, 220 0x3, 221 (0x0e00 << 16) | (0xc130 >> 2), 222 0x00000000, 223 (0x0e00 << 16) | (0xc134 >> 2), 224 0x00000000, 225 (0x0e00 << 16) | (0xc1fc >> 2), 226 0x00000000, 227 (0x0e00 << 16) | (0xc208 >> 2), 228 0x00000000, 229 (0x0e00 << 16) | (0xc264 >> 2), 230 0x00000000, 231 (0x0e00 << 16) | (0xc268 >> 2), 232 0x00000000, 233 (0x0e00 << 16) | (0xc26c >> 2), 234 0x00000000, 235 (0x0e00 << 16) | (0xc270 >> 2), 236 0x00000000, 237 (0x0e00 << 16) | (0xc274 >> 2), 238 0x00000000, 239 (0x0e00 << 16) | (0xc278 >> 2), 240 0x00000000, 241 (0x0e00 << 16) | (0xc27c >> 2), 242 0x00000000, 243 (0x0e00 << 16) | (0xc280 >> 2), 244 0x00000000, 245 (0x0e00 << 16) | (0xc284 >> 2), 246 0x00000000, 247 (0x0e00 << 16) | (0xc288 >> 2), 248 0x00000000, 249 (0x0e00 << 16) | (0xc28c >> 2), 250 0x00000000, 251 (0x0e00 << 16) | (0xc290 >> 2), 252 0x00000000, 253 (0x0e00 << 16) | (0xc294 >> 2), 254 0x00000000, 255 (0x0e00 << 16) | (0xc298 >> 2), 256 0x00000000, 257 (0x0e00 << 16) | (0xc29c >> 2), 258 0x00000000, 259 (0x0e00 << 16) | (0xc2a0 >> 2), 260 0x00000000, 261 (0x0e00 << 16) | (0xc2a4 >> 2), 262 0x00000000, 263 (0x0e00 << 16) | (0xc2a8 >> 2), 264 0x00000000, 265 (0x0e00 << 16) | (0xc2ac >> 2), 266 0x00000000, 267 (0x0e00 << 16) | (0xc2b0 >> 2), 268 0x00000000, 269 (0x0e00 << 16) | (0x301d0 >> 2), 270 0x00000000, 271 (0x0e00 << 16) | (0x30238 >> 2), 272 0x00000000, 273 (0x0e00 << 16) | (0x30250 >> 2), 274 0x00000000, 275 (0x0e00 << 16) | (0x30254 >> 2), 276 0x00000000, 277 (0x0e00 << 16) | (0x30258 >> 2), 278 0x00000000, 279 (0x0e00 << 16) | (0x3025c >> 2), 280 0x00000000, 281 (0x4e00 << 16) | (0xc900 >> 2), 282 0x00000000, 283 (0x5e00 << 16) | (0xc900 >> 2), 284 0x00000000, 285 (0x6e00 << 16) | (0xc900 >> 2), 286 0x00000000, 287 (0x7e00 << 16) | (0xc900 >> 2), 288 0x00000000, 289 (0x8e00 << 16) | (0xc900 >> 2), 290 0x00000000, 291 (0x9e00 << 16) | (0xc900 >> 2), 292 0x00000000, 293 (0xae00 << 16) | (0xc900 >> 2), 294 0x00000000, 295 (0xbe00 << 16) | (0xc900 >> 2), 296 0x00000000, 297 (0x4e00 << 16) | (0xc904 >> 2), 298 0x00000000, 299 (0x5e00 << 16) | (0xc904 >> 2), 300 0x00000000, 301 (0x6e00 << 16) | (0xc904 >> 2), 302 0x00000000, 303 (0x7e00 << 16) | (0xc904 >> 2), 304 0x00000000, 305 (0x8e00 << 16) | (0xc904 >> 2), 306 0x00000000, 307 (0x9e00 << 16) | (0xc904 >> 2), 308 0x00000000, 309 (0xae00 << 16) | (0xc904 >> 2), 310 0x00000000, 311 (0xbe00 << 16) | (0xc904 >> 2), 312 0x00000000, 313 (0x4e00 << 16) | (0xc908 >> 2), 314 0x00000000, 315 (0x5e00 << 16) | (0xc908 >> 2), 316 0x00000000, 317 (0x6e00 << 16) | (0xc908 >> 2), 318 0x00000000, 319 (0x7e00 << 16) | (0xc908 >> 2), 320 0x00000000, 321 (0x8e00 << 16) | (0xc908 >> 2), 322 0x00000000, 323 (0x9e00 << 16) | (0xc908 >> 2), 324 0x00000000, 325 (0xae00 << 16) | (0xc908 >> 2), 326 0x00000000, 327 (0xbe00 << 16) | (0xc908 >> 2), 328 0x00000000, 329 (0x4e00 << 16) | (0xc90c >> 2), 330 0x00000000, 331 (0x5e00 << 16) | (0xc90c >> 2), 332 0x00000000, 333 (0x6e00 << 16) | (0xc90c >> 2), 334 0x00000000, 335 (0x7e00 << 16) | (0xc90c >> 2), 336 0x00000000, 337 (0x8e00 << 16) | (0xc90c >> 2), 338 0x00000000, 339 (0x9e00 << 16) | (0xc90c >> 2), 340 0x00000000, 341 (0xae00 << 16) | (0xc90c >> 2), 342 0x00000000, 343 (0xbe00 << 16) | (0xc90c >> 2), 344 0x00000000, 345 (0x4e00 << 16) | (0xc910 >> 2), 346 0x00000000, 347 (0x5e00 << 16) | (0xc910 >> 2), 348 0x00000000, 349 (0x6e00 << 16) | (0xc910 >> 2), 350 0x00000000, 351 (0x7e00 << 16) | (0xc910 >> 2), 352 0x00000000, 353 (0x8e00 << 16) | (0xc910 >> 2), 354 0x00000000, 355 (0x9e00 << 16) | (0xc910 >> 2), 356 0x00000000, 357 (0xae00 << 16) | (0xc910 >> 2), 358 0x00000000, 359 (0xbe00 << 16) | (0xc910 >> 2), 360 0x00000000, 361 (0x0e00 << 16) | (0xc99c >> 2), 362 0x00000000, 363 (0x0e00 << 16) | (0x9834 >> 2), 364 0x00000000, 365 (0x0000 << 16) | (0x30f00 >> 2), 366 0x00000000, 367 (0x0001 << 16) | (0x30f00 >> 2), 368 0x00000000, 369 (0x0000 << 16) | (0x30f04 >> 2), 370 0x00000000, 371 (0x0001 << 16) | (0x30f04 >> 2), 372 0x00000000, 373 (0x0000 << 16) | (0x30f08 >> 2), 374 0x00000000, 375 (0x0001 << 16) | (0x30f08 >> 2), 376 0x00000000, 377 (0x0000 << 16) | (0x30f0c >> 2), 378 0x00000000, 379 (0x0001 << 16) | (0x30f0c >> 2), 380 0x00000000, 381 (0x0600 << 16) | (0x9b7c >> 2), 382 0x00000000, 383 (0x0e00 << 16) | (0x8a14 >> 2), 384 0x00000000, 385 (0x0e00 << 16) | (0x8a18 >> 2), 386 0x00000000, 387 (0x0600 << 16) | (0x30a00 >> 2), 388 0x00000000, 389 (0x0e00 << 16) | (0x8bf0 >> 2), 390 0x00000000, 391 (0x0e00 << 16) | (0x8bcc >> 2), 392 0x00000000, 393 (0x0e00 << 16) | (0x8b24 >> 2), 394 0x00000000, 395 (0x0e00 << 16) | (0x30a04 >> 2), 396 0x00000000, 397 (0x0600 << 16) | (0x30a10 >> 2), 398 0x00000000, 399 (0x0600 << 16) | (0x30a14 >> 2), 400 0x00000000, 401 (0x0600 << 16) | (0x30a18 >> 2), 402 0x00000000, 403 (0x0600 << 16) | (0x30a2c >> 2), 404 0x00000000, 405 (0x0e00 << 16) | (0xc700 >> 2), 406 0x00000000, 407 (0x0e00 << 16) | (0xc704 >> 2), 408 0x00000000, 409 (0x0e00 << 16) | (0xc708 >> 2), 410 0x00000000, 411 (0x0e00 << 16) | (0xc768 >> 2), 412 0x00000000, 413 (0x0400 << 16) | (0xc770 >> 2), 414 0x00000000, 415 (0x0400 << 16) | (0xc774 >> 2), 416 0x00000000, 417 (0x0400 << 16) | (0xc778 >> 2), 418 0x00000000, 419 (0x0400 << 16) | (0xc77c >> 2), 420 0x00000000, 421 (0x0400 << 16) | (0xc780 >> 2), 422 0x00000000, 423 (0x0400 << 16) | (0xc784 >> 2), 424 0x00000000, 425 (0x0400 << 16) | (0xc788 >> 2), 426 0x00000000, 427 (0x0400 << 16) | (0xc78c >> 2), 428 0x00000000, 429 (0x0400 << 16) | (0xc798 >> 2), 430 0x00000000, 431 (0x0400 << 16) | (0xc79c >> 2), 432 0x00000000, 433 (0x0400 << 16) | (0xc7a0 >> 2), 434 0x00000000, 435 (0x0400 << 16) | (0xc7a4 >> 2), 436 0x00000000, 437 (0x0400 << 16) | (0xc7a8 >> 2), 438 0x00000000, 439 (0x0400 << 16) | (0xc7ac >> 2), 440 0x00000000, 441 (0x0400 << 16) | (0xc7b0 >> 2), 442 0x00000000, 443 (0x0400 << 16) | (0xc7b4 >> 2), 444 0x00000000, 445 (0x0e00 << 16) | (0x9100 >> 2), 446 0x00000000, 447 (0x0e00 << 16) | (0x3c010 >> 2), 448 0x00000000, 449 (0x0e00 << 16) | (0x92a8 >> 2), 450 0x00000000, 451 (0x0e00 << 16) | (0x92ac >> 2), 452 0x00000000, 453 (0x0e00 << 16) | (0x92b4 >> 2), 454 0x00000000, 455 (0x0e00 << 16) | (0x92b8 >> 2), 456 0x00000000, 457 (0x0e00 << 16) | (0x92bc >> 2), 458 0x00000000, 459 (0x0e00 << 16) | (0x92c0 >> 2), 460 0x00000000, 461 (0x0e00 << 16) | (0x92c4 >> 2), 462 0x00000000, 463 (0x0e00 << 16) | (0x92c8 >> 2), 464 0x00000000, 465 (0x0e00 << 16) | (0x92cc >> 2), 466 0x00000000, 467 (0x0e00 << 16) | (0x92d0 >> 2), 468 0x00000000, 469 (0x0e00 << 16) | (0x8c00 >> 2), 470 0x00000000, 471 (0x0e00 << 16) | (0x8c04 >> 2), 472 0x00000000, 473 (0x0e00 << 16) | (0x8c20 >> 2), 474 0x00000000, 475 (0x0e00 << 16) | (0x8c38 >> 2), 476 0x00000000, 477 (0x0e00 << 16) | (0x8c3c >> 2), 478 0x00000000, 479 (0x0e00 << 16) | (0xae00 >> 2), 480 0x00000000, 481 (0x0e00 << 16) | (0x9604 >> 2), 482 0x00000000, 483 (0x0e00 << 16) | (0xac08 >> 2), 484 0x00000000, 485 (0x0e00 << 16) | (0xac0c >> 2), 486 0x00000000, 487 (0x0e00 << 16) | (0xac10 >> 2), 488 0x00000000, 489 (0x0e00 << 16) | (0xac14 >> 2), 490 0x00000000, 491 (0x0e00 << 16) | (0xac58 >> 2), 492 0x00000000, 493 (0x0e00 << 16) | (0xac68 >> 2), 494 0x00000000, 495 (0x0e00 << 16) | (0xac6c >> 2), 496 0x00000000, 497 (0x0e00 << 16) | (0xac70 >> 2), 498 0x00000000, 499 (0x0e00 << 16) | (0xac74 >> 2), 500 0x00000000, 501 (0x0e00 << 16) | (0xac78 >> 2), 502 0x00000000, 503 (0x0e00 << 16) | (0xac7c >> 2), 504 0x00000000, 505 (0x0e00 << 16) | (0xac80 >> 2), 506 0x00000000, 507 (0x0e00 << 16) | (0xac84 >> 2), 508 0x00000000, 509 (0x0e00 << 16) | (0xac88 >> 2), 510 0x00000000, 511 (0x0e00 << 16) | (0xac8c >> 2), 512 0x00000000, 513 (0x0e00 << 16) | (0x970c >> 2), 514 0x00000000, 515 (0x0e00 << 16) | (0x9714 >> 2), 516 0x00000000, 517 (0x0e00 << 16) | (0x9718 >> 2), 518 0x00000000, 519 (0x0e00 << 16) | (0x971c >> 2), 520 0x00000000, 521 (0x0e00 << 16) | (0x31068 >> 2), 522 0x00000000, 523 (0x4e00 << 16) | (0x31068 >> 2), 524 0x00000000, 525 (0x5e00 << 16) | (0x31068 >> 2), 526 0x00000000, 527 (0x6e00 << 16) | (0x31068 >> 2), 528 0x00000000, 529 (0x7e00 << 16) | (0x31068 >> 2), 530 0x00000000, 531 (0x8e00 << 16) | (0x31068 >> 2), 532 0x00000000, 533 (0x9e00 << 16) | (0x31068 >> 2), 534 0x00000000, 535 (0xae00 << 16) | (0x31068 >> 2), 536 0x00000000, 537 (0xbe00 << 16) | (0x31068 >> 2), 538 0x00000000, 539 (0x0e00 << 16) | (0xcd10 >> 2), 540 0x00000000, 541 (0x0e00 << 16) | (0xcd14 >> 2), 542 0x00000000, 543 (0x0e00 << 16) | (0x88b0 >> 2), 544 0x00000000, 545 (0x0e00 << 16) | (0x88b4 >> 2), 546 0x00000000, 547 (0x0e00 << 16) | (0x88b8 >> 2), 548 0x00000000, 549 (0x0e00 << 16) | (0x88bc >> 2), 550 0x00000000, 551 (0x0400 << 16) | (0x89c0 >> 2), 552 0x00000000, 553 (0x0e00 << 16) | (0x88c4 >> 2), 554 0x00000000, 555 (0x0e00 << 16) | (0x88c8 >> 2), 556 0x00000000, 557 (0x0e00 << 16) | (0x88d0 >> 2), 558 0x00000000, 559 (0x0e00 << 16) | (0x88d4 >> 2), 560 0x00000000, 561 (0x0e00 << 16) | (0x88d8 >> 2), 562 0x00000000, 563 (0x0e00 << 16) | (0x8980 >> 2), 564 0x00000000, 565 (0x0e00 << 16) | (0x30938 >> 2), 566 0x00000000, 567 (0x0e00 << 16) | (0x3093c >> 2), 568 0x00000000, 569 (0x0e00 << 16) | (0x30940 >> 2), 570 0x00000000, 571 (0x0e00 << 16) | (0x89a0 >> 2), 572 0x00000000, 573 (0x0e00 << 16) | (0x30900 >> 2), 574 0x00000000, 575 (0x0e00 << 16) | (0x30904 >> 2), 576 0x00000000, 577 (0x0e00 << 16) | (0x89b4 >> 2), 578 0x00000000, 579 (0x0e00 << 16) | (0x3c210 >> 2), 580 0x00000000, 581 (0x0e00 << 16) | (0x3c214 >> 2), 582 0x00000000, 583 (0x0e00 << 16) | (0x3c218 >> 2), 584 0x00000000, 585 (0x0e00 << 16) | (0x8904 >> 2), 586 0x00000000, 587 0x5, 588 (0x0e00 << 16) | (0x8c28 >> 2), 589 (0x0e00 << 16) | (0x8c2c >> 2), 590 (0x0e00 << 16) | (0x8c30 >> 2), 591 (0x0e00 << 16) | (0x8c34 >> 2), 592 (0x0e00 << 16) | (0x9600 >> 2), 593 }; 594 595 static const u32 kalindi_rlc_save_restore_register_list[] = 596 { 597 (0x0e00 << 16) | (0xc12c >> 2), 598 0x00000000, 599 (0x0e00 << 16) | (0xc140 >> 2), 600 0x00000000, 601 (0x0e00 << 16) | (0xc150 >> 2), 602 0x00000000, 603 (0x0e00 << 16) | (0xc15c >> 2), 604 0x00000000, 605 (0x0e00 << 16) | (0xc168 >> 2), 606 0x00000000, 607 (0x0e00 << 16) | (0xc170 >> 2), 608 0x00000000, 609 (0x0e00 << 16) | (0xc204 >> 2), 610 0x00000000, 611 (0x0e00 << 16) | (0xc2b4 >> 2), 612 0x00000000, 613 (0x0e00 << 16) | (0xc2b8 >> 2), 614 0x00000000, 615 (0x0e00 << 16) | (0xc2bc >> 2), 616 0x00000000, 617 (0x0e00 << 16) | (0xc2c0 >> 2), 618 0x00000000, 619 (0x0e00 << 16) | (0x8228 >> 2), 620 0x00000000, 621 (0x0e00 << 16) | (0x829c >> 2), 622 0x00000000, 623 (0x0e00 << 16) | (0x869c >> 2), 624 0x00000000, 625 (0x0600 << 16) | (0x98f4 >> 2), 626 0x00000000, 627 (0x0e00 << 16) | (0x98f8 >> 2), 628 0x00000000, 629 (0x0e00 << 16) | (0x9900 >> 2), 630 0x00000000, 631 (0x0e00 << 16) | (0xc260 >> 2), 632 0x00000000, 633 (0x0e00 << 16) | (0x90e8 >> 2), 634 0x00000000, 635 (0x0e00 << 16) | (0x3c000 >> 2), 636 0x00000000, 637 (0x0e00 << 16) | (0x3c00c >> 2), 638 0x00000000, 639 (0x0e00 << 16) | (0x8c1c >> 2), 640 0x00000000, 641 (0x0e00 << 16) | (0x9700 >> 2), 642 0x00000000, 643 (0x0e00 << 16) | (0xcd20 >> 2), 644 0x00000000, 645 (0x4e00 << 16) | (0xcd20 >> 2), 646 0x00000000, 647 (0x5e00 << 16) | (0xcd20 >> 2), 648 0x00000000, 649 (0x6e00 << 16) | (0xcd20 >> 2), 650 0x00000000, 651 (0x7e00 << 16) | (0xcd20 >> 2), 652 0x00000000, 653 (0x0e00 << 16) | (0x89bc >> 2), 654 0x00000000, 655 (0x0e00 << 16) | (0x8900 >> 2), 656 0x00000000, 657 0x3, 658 (0x0e00 << 16) | (0xc130 >> 2), 659 0x00000000, 660 (0x0e00 << 16) | (0xc134 >> 2), 661 0x00000000, 662 (0x0e00 << 16) | (0xc1fc >> 2), 663 0x00000000, 664 (0x0e00 << 16) | (0xc208 >> 2), 665 0x00000000, 666 (0x0e00 << 16) | (0xc264 >> 2), 667 0x00000000, 668 (0x0e00 << 16) | (0xc268 >> 2), 669 0x00000000, 670 (0x0e00 << 16) | (0xc26c >> 2), 671 0x00000000, 672 (0x0e00 << 16) | (0xc270 >> 2), 673 0x00000000, 674 (0x0e00 << 16) | (0xc274 >> 2), 675 0x00000000, 676 (0x0e00 << 16) | (0xc28c >> 2), 677 0x00000000, 678 (0x0e00 << 16) | (0xc290 >> 2), 679 0x00000000, 680 (0x0e00 << 16) | (0xc294 >> 2), 681 0x00000000, 682 (0x0e00 << 16) | (0xc298 >> 2), 683 0x00000000, 684 (0x0e00 << 16) | (0xc2a0 >> 2), 685 0x00000000, 686 (0x0e00 << 16) | (0xc2a4 >> 2), 687 0x00000000, 688 (0x0e00 << 16) | (0xc2a8 >> 2), 689 0x00000000, 690 (0x0e00 << 16) | (0xc2ac >> 2), 691 0x00000000, 692 (0x0e00 << 16) | (0x301d0 >> 2), 693 0x00000000, 694 (0x0e00 << 16) | (0x30238 >> 2), 695 0x00000000, 696 (0x0e00 << 16) | (0x30250 >> 2), 697 0x00000000, 698 (0x0e00 << 16) | (0x30254 >> 2), 699 0x00000000, 700 (0x0e00 << 16) | (0x30258 >> 2), 701 0x00000000, 702 (0x0e00 << 16) | (0x3025c >> 2), 703 0x00000000, 704 (0x4e00 << 16) | (0xc900 >> 2), 705 0x00000000, 706 (0x5e00 << 16) | (0xc900 >> 2), 707 0x00000000, 708 (0x6e00 << 16) | (0xc900 >> 2), 709 0x00000000, 710 (0x7e00 << 16) | (0xc900 >> 2), 711 0x00000000, 712 (0x4e00 << 16) | (0xc904 >> 2), 713 0x00000000, 714 (0x5e00 << 16) | (0xc904 >> 2), 715 0x00000000, 716 (0x6e00 << 16) | (0xc904 >> 2), 717 0x00000000, 718 (0x7e00 << 16) | (0xc904 >> 2), 719 0x00000000, 720 (0x4e00 << 16) | (0xc908 >> 2), 721 0x00000000, 722 (0x5e00 << 16) | (0xc908 >> 2), 723 0x00000000, 724 (0x6e00 << 16) | (0xc908 >> 2), 725 0x00000000, 726 (0x7e00 << 16) | (0xc908 >> 2), 727 0x00000000, 728 (0x4e00 << 16) | (0xc90c >> 2), 729 0x00000000, 730 (0x5e00 << 16) | (0xc90c >> 2), 731 0x00000000, 732 (0x6e00 << 16) | (0xc90c >> 2), 733 0x00000000, 734 (0x7e00 << 16) | (0xc90c >> 2), 735 0x00000000, 736 (0x4e00 << 16) | (0xc910 >> 2), 737 0x00000000, 738 (0x5e00 << 16) | (0xc910 >> 2), 739 0x00000000, 740 (0x6e00 << 16) | (0xc910 >> 2), 741 0x00000000, 742 (0x7e00 << 16) | (0xc910 >> 2), 743 0x00000000, 744 (0x0e00 << 16) | (0xc99c >> 2), 745 0x00000000, 746 (0x0e00 << 16) | (0x9834 >> 2), 747 0x00000000, 748 (0x0000 << 16) | (0x30f00 >> 2), 749 0x00000000, 750 (0x0000 << 16) | (0x30f04 >> 2), 751 0x00000000, 752 (0x0000 << 16) | (0x30f08 >> 2), 753 0x00000000, 754 (0x0000 << 16) | (0x30f0c >> 2), 755 0x00000000, 756 (0x0600 << 16) | (0x9b7c >> 2), 757 0x00000000, 758 (0x0e00 << 16) | (0x8a14 >> 2), 759 0x00000000, 760 (0x0e00 << 16) | (0x8a18 >> 2), 761 0x00000000, 762 (0x0600 << 16) | (0x30a00 >> 2), 763 0x00000000, 764 (0x0e00 << 16) | (0x8bf0 >> 2), 765 0x00000000, 766 (0x0e00 << 16) | (0x8bcc >> 2), 767 0x00000000, 768 (0x0e00 << 16) | (0x8b24 >> 2), 769 0x00000000, 770 (0x0e00 << 16) | (0x30a04 >> 2), 771 0x00000000, 772 (0x0600 << 16) | (0x30a10 >> 2), 773 0x00000000, 774 (0x0600 << 16) | (0x30a14 >> 2), 775 0x00000000, 776 (0x0600 << 16) | (0x30a18 >> 2), 777 0x00000000, 778 (0x0600 << 16) | (0x30a2c >> 2), 779 0x00000000, 780 (0x0e00 << 16) | (0xc700 >> 2), 781 0x00000000, 782 (0x0e00 << 16) | (0xc704 >> 2), 783 0x00000000, 784 (0x0e00 << 16) | (0xc708 >> 2), 785 0x00000000, 786 (0x0e00 << 16) | (0xc768 >> 2), 787 0x00000000, 788 (0x0400 << 16) | (0xc770 >> 2), 789 0x00000000, 790 (0x0400 << 16) | (0xc774 >> 2), 791 0x00000000, 792 (0x0400 << 16) | (0xc798 >> 2), 793 0x00000000, 794 (0x0400 << 16) | (0xc79c >> 2), 795 0x00000000, 796 (0x0e00 << 16) | (0x9100 >> 2), 797 0x00000000, 798 (0x0e00 << 16) | (0x3c010 >> 2), 799 0x00000000, 800 (0x0e00 << 16) | (0x8c00 >> 2), 801 0x00000000, 802 (0x0e00 << 16) | (0x8c04 >> 2), 803 0x00000000, 804 (0x0e00 << 16) | (0x8c20 >> 2), 805 0x00000000, 806 (0x0e00 << 16) | (0x8c38 >> 2), 807 0x00000000, 808 (0x0e00 << 16) | (0x8c3c >> 2), 809 0x00000000, 810 (0x0e00 << 16) | (0xae00 >> 2), 811 0x00000000, 812 (0x0e00 << 16) | (0x9604 >> 2), 813 0x00000000, 814 (0x0e00 << 16) | (0xac08 >> 2), 815 0x00000000, 816 (0x0e00 << 16) | (0xac0c >> 2), 817 0x00000000, 818 (0x0e00 << 16) | (0xac10 >> 2), 819 0x00000000, 820 (0x0e00 << 16) | (0xac14 >> 2), 821 0x00000000, 822 (0x0e00 << 16) | (0xac58 >> 2), 823 0x00000000, 824 (0x0e00 << 16) | (0xac68 >> 2), 825 0x00000000, 826 (0x0e00 << 16) | (0xac6c >> 2), 827 0x00000000, 828 (0x0e00 << 16) | (0xac70 >> 2), 829 0x00000000, 830 (0x0e00 << 16) | (0xac74 >> 2), 831 0x00000000, 832 (0x0e00 << 16) | (0xac78 >> 2), 833 0x00000000, 834 (0x0e00 << 16) | (0xac7c >> 2), 835 0x00000000, 836 (0x0e00 << 16) | (0xac80 >> 2), 837 0x00000000, 838 (0x0e00 << 16) | (0xac84 >> 2), 839 0x00000000, 840 (0x0e00 << 16) | (0xac88 >> 2), 841 0x00000000, 842 (0x0e00 << 16) | (0xac8c >> 2), 843 0x00000000, 844 (0x0e00 << 16) | (0x970c >> 2), 845 0x00000000, 846 (0x0e00 << 16) | (0x9714 >> 2), 847 0x00000000, 848 (0x0e00 << 16) | (0x9718 >> 2), 849 0x00000000, 850 (0x0e00 << 16) | (0x971c >> 2), 851 0x00000000, 852 (0x0e00 << 16) | (0x31068 >> 2), 853 0x00000000, 854 (0x4e00 << 16) | (0x31068 >> 2), 855 0x00000000, 856 (0x5e00 << 16) | (0x31068 >> 2), 857 0x00000000, 858 (0x6e00 << 16) | (0x31068 >> 2), 859 0x00000000, 860 (0x7e00 << 16) | (0x31068 >> 2), 861 0x00000000, 862 (0x0e00 << 16) | (0xcd10 >> 2), 863 0x00000000, 864 (0x0e00 << 16) | (0xcd14 >> 2), 865 0x00000000, 866 (0x0e00 << 16) | (0x88b0 >> 2), 867 0x00000000, 868 (0x0e00 << 16) | (0x88b4 >> 2), 869 0x00000000, 870 (0x0e00 << 16) | (0x88b8 >> 2), 871 0x00000000, 872 (0x0e00 << 16) | (0x88bc >> 2), 873 0x00000000, 874 (0x0400 << 16) | (0x89c0 >> 2), 875 0x00000000, 876 (0x0e00 << 16) | (0x88c4 >> 2), 877 0x00000000, 878 (0x0e00 << 16) | (0x88c8 >> 2), 879 0x00000000, 880 (0x0e00 << 16) | (0x88d0 >> 2), 881 0x00000000, 882 (0x0e00 << 16) | (0x88d4 >> 2), 883 0x00000000, 884 (0x0e00 << 16) | (0x88d8 >> 2), 885 0x00000000, 886 (0x0e00 << 16) | (0x8980 >> 2), 887 0x00000000, 888 (0x0e00 << 16) | (0x30938 >> 2), 889 0x00000000, 890 (0x0e00 << 16) | (0x3093c >> 2), 891 0x00000000, 892 (0x0e00 << 16) | (0x30940 >> 2), 893 0x00000000, 894 (0x0e00 << 16) | (0x89a0 >> 2), 895 0x00000000, 896 (0x0e00 << 16) | (0x30900 >> 2), 897 0x00000000, 898 (0x0e00 << 16) | (0x30904 >> 2), 899 0x00000000, 900 (0x0e00 << 16) | (0x89b4 >> 2), 901 0x00000000, 902 (0x0e00 << 16) | (0x3e1fc >> 2), 903 0x00000000, 904 (0x0e00 << 16) | (0x3c210 >> 2), 905 0x00000000, 906 (0x0e00 << 16) | (0x3c214 >> 2), 907 0x00000000, 908 (0x0e00 << 16) | (0x3c218 >> 2), 909 0x00000000, 910 (0x0e00 << 16) | (0x8904 >> 2), 911 0x00000000, 912 0x5, 913 (0x0e00 << 16) | (0x8c28 >> 2), 914 (0x0e00 << 16) | (0x8c2c >> 2), 915 (0x0e00 << 16) | (0x8c30 >> 2), 916 (0x0e00 << 16) | (0x8c34 >> 2), 917 (0x0e00 << 16) | (0x9600 >> 2), 918 }; 919 920 static const u32 bonaire_golden_spm_registers[] = 921 { 922 0x30800, 0xe0ffffff, 0xe0000000 923 }; 924 925 static const u32 bonaire_golden_common_registers[] = 926 { 927 0xc770, 0xffffffff, 0x00000800, 928 0xc774, 0xffffffff, 0x00000800, 929 0xc798, 0xffffffff, 0x00007fbf, 930 0xc79c, 0xffffffff, 0x00007faf 931 }; 932 933 static const u32 bonaire_golden_registers[] = 934 { 935 0x3354, 0x00000333, 0x00000333, 936 0x3350, 0x000c0fc0, 0x00040200, 937 0x9a10, 0x00010000, 0x00058208, 938 0x3c000, 0xffff1fff, 0x00140000, 939 0x3c200, 0xfdfc0fff, 0x00000100, 940 0x3c234, 0x40000000, 0x40000200, 941 0x9830, 0xffffffff, 0x00000000, 942 0x9834, 0xf00fffff, 0x00000400, 943 0x9838, 0x0002021c, 0x00020200, 944 0xc78, 0x00000080, 0x00000000, 945 0x5bb0, 0x000000f0, 0x00000070, 946 0x5bc0, 0xf0311fff, 0x80300000, 947 0x98f8, 0x73773777, 0x12010001, 948 0x350c, 0x00810000, 0x408af000, 949 0x7030, 0x31000111, 0x00000011, 950 0x2f48, 0x73773777, 0x12010001, 951 0x220c, 0x00007fb6, 0x0021a1b1, 952 0x2210, 0x00007fb6, 0x002021b1, 953 0x2180, 0x00007fb6, 0x00002191, 954 0x2218, 0x00007fb6, 0x002121b1, 955 0x221c, 0x00007fb6, 0x002021b1, 956 0x21dc, 0x00007fb6, 0x00002191, 957 0x21e0, 0x00007fb6, 0x00002191, 958 0x3628, 0x0000003f, 0x0000000a, 959 0x362c, 0x0000003f, 0x0000000a, 960 0x2ae4, 0x00073ffe, 0x000022a2, 961 0x240c, 0x000007ff, 0x00000000, 962 0x8a14, 0xf000003f, 0x00000007, 963 0x8bf0, 0x00002001, 0x00000001, 964 0x8b24, 0xffffffff, 0x00ffffff, 965 0x30a04, 0x0000ff0f, 0x00000000, 966 0x28a4c, 0x07ffffff, 0x06000000, 967 0x4d8, 0x00000fff, 0x00000100, 968 0x3e78, 0x00000001, 0x00000002, 969 0x9100, 0x03000000, 0x0362c688, 970 0x8c00, 0x000000ff, 0x00000001, 971 0xe40, 0x00001fff, 0x00001fff, 972 0x9060, 0x0000007f, 0x00000020, 973 0x9508, 0x00010000, 0x00010000, 974 0xac14, 0x000003ff, 0x000000f3, 975 0xac0c, 0xffffffff, 0x00001032 976 }; 977 978 static const u32 bonaire_mgcg_cgcg_init[] = 979 { 980 0xc420, 0xffffffff, 0xfffffffc, 981 0x30800, 0xffffffff, 0xe0000000, 982 0x3c2a0, 0xffffffff, 0x00000100, 983 0x3c208, 0xffffffff, 0x00000100, 984 0x3c2c0, 0xffffffff, 0xc0000100, 985 0x3c2c8, 0xffffffff, 0xc0000100, 986 0x3c2c4, 0xffffffff, 0xc0000100, 987 0x55e4, 0xffffffff, 0x00600100, 988 0x3c280, 0xffffffff, 0x00000100, 989 0x3c214, 0xffffffff, 0x06000100, 990 0x3c220, 0xffffffff, 0x00000100, 991 0x3c218, 0xffffffff, 0x06000100, 992 0x3c204, 0xffffffff, 0x00000100, 993 0x3c2e0, 0xffffffff, 0x00000100, 994 0x3c224, 0xffffffff, 0x00000100, 995 0x3c200, 0xffffffff, 0x00000100, 996 0x3c230, 0xffffffff, 0x00000100, 997 0x3c234, 0xffffffff, 0x00000100, 998 0x3c250, 0xffffffff, 0x00000100, 999 0x3c254, 0xffffffff, 0x00000100, 1000 0x3c258, 0xffffffff, 0x00000100, 1001 0x3c25c, 0xffffffff, 0x00000100, 1002 0x3c260, 0xffffffff, 0x00000100, 1003 0x3c27c, 0xffffffff, 0x00000100, 1004 0x3c278, 0xffffffff, 0x00000100, 1005 0x3c210, 0xffffffff, 0x06000100, 1006 0x3c290, 0xffffffff, 0x00000100, 1007 0x3c274, 0xffffffff, 0x00000100, 1008 0x3c2b4, 0xffffffff, 0x00000100, 1009 0x3c2b0, 0xffffffff, 0x00000100, 1010 0x3c270, 0xffffffff, 0x00000100, 1011 0x30800, 0xffffffff, 0xe0000000, 1012 0x3c020, 0xffffffff, 0x00010000, 1013 0x3c024, 0xffffffff, 0x00030002, 1014 0x3c028, 0xffffffff, 0x00040007, 1015 0x3c02c, 0xffffffff, 0x00060005, 1016 0x3c030, 0xffffffff, 0x00090008, 1017 0x3c034, 0xffffffff, 0x00010000, 1018 0x3c038, 0xffffffff, 0x00030002, 1019 0x3c03c, 0xffffffff, 0x00040007, 1020 0x3c040, 0xffffffff, 0x00060005, 1021 0x3c044, 0xffffffff, 0x00090008, 1022 0x3c048, 0xffffffff, 0x00010000, 1023 0x3c04c, 0xffffffff, 0x00030002, 1024 0x3c050, 0xffffffff, 0x00040007, 1025 0x3c054, 0xffffffff, 0x00060005, 1026 0x3c058, 0xffffffff, 0x00090008, 1027 0x3c05c, 0xffffffff, 0x00010000, 1028 0x3c060, 0xffffffff, 0x00030002, 1029 0x3c064, 0xffffffff, 0x00040007, 1030 0x3c068, 0xffffffff, 0x00060005, 1031 0x3c06c, 0xffffffff, 0x00090008, 1032 0x3c070, 0xffffffff, 0x00010000, 1033 0x3c074, 0xffffffff, 0x00030002, 1034 0x3c078, 0xffffffff, 0x00040007, 1035 0x3c07c, 0xffffffff, 0x00060005, 1036 0x3c080, 0xffffffff, 0x00090008, 1037 0x3c084, 0xffffffff, 0x00010000, 1038 0x3c088, 0xffffffff, 0x00030002, 1039 0x3c08c, 0xffffffff, 0x00040007, 1040 0x3c090, 0xffffffff, 0x00060005, 1041 0x3c094, 0xffffffff, 0x00090008, 1042 0x3c098, 0xffffffff, 0x00010000, 1043 0x3c09c, 0xffffffff, 0x00030002, 1044 0x3c0a0, 0xffffffff, 0x00040007, 1045 0x3c0a4, 0xffffffff, 0x00060005, 1046 0x3c0a8, 0xffffffff, 0x00090008, 1047 0x3c000, 0xffffffff, 0x96e00200, 1048 0x8708, 0xffffffff, 0x00900100, 1049 0xc424, 0xffffffff, 0x0020003f, 1050 0x38, 0xffffffff, 0x0140001c, 1051 0x3c, 0x000f0000, 0x000f0000, 1052 0x220, 0xffffffff, 0xC060000C, 1053 0x224, 0xc0000fff, 0x00000100, 1054 0xf90, 0xffffffff, 0x00000100, 1055 0xf98, 0x00000101, 0x00000000, 1056 0x20a8, 0xffffffff, 0x00000104, 1057 0x55e4, 0xff000fff, 0x00000100, 1058 0x30cc, 0xc0000fff, 0x00000104, 1059 0xc1e4, 0x00000001, 0x00000001, 1060 0xd00c, 0xff000ff0, 0x00000100, 1061 0xd80c, 0xff000ff0, 0x00000100 1062 }; 1063 1064 static const u32 spectre_golden_spm_registers[] = 1065 { 1066 0x30800, 0xe0ffffff, 0xe0000000 1067 }; 1068 1069 static const u32 spectre_golden_common_registers[] = 1070 { 1071 0xc770, 0xffffffff, 0x00000800, 1072 0xc774, 0xffffffff, 0x00000800, 1073 0xc798, 0xffffffff, 0x00007fbf, 1074 0xc79c, 0xffffffff, 0x00007faf 1075 }; 1076 1077 static const u32 spectre_golden_registers[] = 1078 { 1079 0x3c000, 0xffff1fff, 0x96940200, 1080 0x3c00c, 0xffff0001, 0xff000000, 1081 0x3c200, 0xfffc0fff, 0x00000100, 1082 0x6ed8, 0x00010101, 0x00010000, 1083 0x9834, 0xf00fffff, 0x00000400, 1084 0x9838, 0xfffffffc, 0x00020200, 1085 0x5bb0, 0x000000f0, 0x00000070, 1086 0x5bc0, 0xf0311fff, 0x80300000, 1087 0x98f8, 0x73773777, 0x12010001, 1088 0x9b7c, 0x00ff0000, 0x00fc0000, 1089 0x2f48, 0x73773777, 0x12010001, 1090 0x8a14, 0xf000003f, 0x00000007, 1091 0x8b24, 0xffffffff, 0x00ffffff, 1092 0x28350, 0x3f3f3fff, 0x00000082, 1093 0x28355, 0x0000003f, 0x00000000, 1094 0x3e78, 0x00000001, 0x00000002, 1095 0x913c, 0xffff03df, 0x00000004, 1096 0xc768, 0x00000008, 0x00000008, 1097 0x8c00, 0x000008ff, 0x00000800, 1098 0x9508, 0x00010000, 0x00010000, 1099 0xac0c, 0xffffffff, 0x54763210, 1100 0x214f8, 0x01ff01ff, 0x00000002, 1101 0x21498, 0x007ff800, 0x00200000, 1102 0x2015c, 0xffffffff, 0x00000f40, 1103 0x30934, 0xffffffff, 0x00000001 1104 }; 1105 1106 static const u32 spectre_mgcg_cgcg_init[] = 1107 { 1108 0xc420, 0xffffffff, 0xfffffffc, 1109 0x30800, 0xffffffff, 0xe0000000, 1110 0x3c2a0, 0xffffffff, 0x00000100, 1111 0x3c208, 0xffffffff, 0x00000100, 1112 0x3c2c0, 0xffffffff, 0x00000100, 1113 0x3c2c8, 0xffffffff, 0x00000100, 1114 0x3c2c4, 0xffffffff, 0x00000100, 1115 0x55e4, 0xffffffff, 0x00600100, 1116 0x3c280, 0xffffffff, 0x00000100, 1117 0x3c214, 0xffffffff, 0x06000100, 1118 0x3c220, 0xffffffff, 0x00000100, 1119 0x3c218, 0xffffffff, 0x06000100, 1120 0x3c204, 0xffffffff, 0x00000100, 1121 0x3c2e0, 0xffffffff, 0x00000100, 1122 0x3c224, 0xffffffff, 0x00000100, 1123 0x3c200, 0xffffffff, 0x00000100, 1124 0x3c230, 0xffffffff, 0x00000100, 1125 0x3c234, 0xffffffff, 0x00000100, 1126 0x3c250, 0xffffffff, 0x00000100, 1127 0x3c254, 0xffffffff, 0x00000100, 1128 0x3c258, 0xffffffff, 0x00000100, 1129 0x3c25c, 0xffffffff, 0x00000100, 1130 0x3c260, 0xffffffff, 0x00000100, 1131 0x3c27c, 0xffffffff, 0x00000100, 1132 0x3c278, 0xffffffff, 0x00000100, 1133 0x3c210, 0xffffffff, 0x06000100, 1134 0x3c290, 0xffffffff, 0x00000100, 1135 0x3c274, 0xffffffff, 0x00000100, 1136 0x3c2b4, 0xffffffff, 0x00000100, 1137 0x3c2b0, 0xffffffff, 0x00000100, 1138 0x3c270, 0xffffffff, 0x00000100, 1139 0x30800, 0xffffffff, 0xe0000000, 1140 0x3c020, 0xffffffff, 0x00010000, 1141 0x3c024, 0xffffffff, 0x00030002, 1142 0x3c028, 0xffffffff, 0x00040007, 1143 0x3c02c, 0xffffffff, 0x00060005, 1144 0x3c030, 0xffffffff, 0x00090008, 1145 0x3c034, 0xffffffff, 0x00010000, 1146 0x3c038, 0xffffffff, 0x00030002, 1147 0x3c03c, 0xffffffff, 0x00040007, 1148 0x3c040, 0xffffffff, 0x00060005, 1149 0x3c044, 0xffffffff, 0x00090008, 1150 0x3c048, 0xffffffff, 0x00010000, 1151 0x3c04c, 0xffffffff, 0x00030002, 1152 0x3c050, 0xffffffff, 0x00040007, 1153 0x3c054, 0xffffffff, 0x00060005, 1154 0x3c058, 0xffffffff, 0x00090008, 1155 0x3c05c, 0xffffffff, 0x00010000, 1156 0x3c060, 0xffffffff, 0x00030002, 1157 0x3c064, 0xffffffff, 0x00040007, 1158 0x3c068, 0xffffffff, 0x00060005, 1159 0x3c06c, 0xffffffff, 0x00090008, 1160 0x3c070, 0xffffffff, 0x00010000, 1161 0x3c074, 0xffffffff, 0x00030002, 1162 0x3c078, 0xffffffff, 0x00040007, 1163 0x3c07c, 0xffffffff, 0x00060005, 1164 0x3c080, 0xffffffff, 0x00090008, 1165 0x3c084, 0xffffffff, 0x00010000, 1166 0x3c088, 0xffffffff, 0x00030002, 1167 0x3c08c, 0xffffffff, 0x00040007, 1168 0x3c090, 0xffffffff, 0x00060005, 1169 0x3c094, 0xffffffff, 0x00090008, 1170 0x3c098, 0xffffffff, 0x00010000, 1171 0x3c09c, 0xffffffff, 0x00030002, 1172 0x3c0a0, 0xffffffff, 0x00040007, 1173 0x3c0a4, 0xffffffff, 0x00060005, 1174 0x3c0a8, 0xffffffff, 0x00090008, 1175 0x3c0ac, 0xffffffff, 0x00010000, 1176 0x3c0b0, 0xffffffff, 0x00030002, 1177 0x3c0b4, 0xffffffff, 0x00040007, 1178 0x3c0b8, 0xffffffff, 0x00060005, 1179 0x3c0bc, 0xffffffff, 0x00090008, 1180 0x3c000, 0xffffffff, 0x96e00200, 1181 0x8708, 0xffffffff, 0x00900100, 1182 0xc424, 0xffffffff, 0x0020003f, 1183 0x38, 0xffffffff, 0x0140001c, 1184 0x3c, 0x000f0000, 0x000f0000, 1185 0x220, 0xffffffff, 0xC060000C, 1186 0x224, 0xc0000fff, 0x00000100, 1187 0xf90, 0xffffffff, 0x00000100, 1188 0xf98, 0x00000101, 0x00000000, 1189 0x20a8, 0xffffffff, 0x00000104, 1190 0x55e4, 0xff000fff, 0x00000100, 1191 0x30cc, 0xc0000fff, 0x00000104, 1192 0xc1e4, 0x00000001, 0x00000001, 1193 0xd00c, 0xff000ff0, 0x00000100, 1194 0xd80c, 0xff000ff0, 0x00000100 1195 }; 1196 1197 static const u32 kalindi_golden_spm_registers[] = 1198 { 1199 0x30800, 0xe0ffffff, 0xe0000000 1200 }; 1201 1202 static const u32 kalindi_golden_common_registers[] = 1203 { 1204 0xc770, 0xffffffff, 0x00000800, 1205 0xc774, 0xffffffff, 0x00000800, 1206 0xc798, 0xffffffff, 0x00007fbf, 1207 0xc79c, 0xffffffff, 0x00007faf 1208 }; 1209 1210 static const u32 kalindi_golden_registers[] = 1211 { 1212 0x3c000, 0xffffdfff, 0x6e944040, 1213 0x55e4, 0xff607fff, 0xfc000100, 1214 0x3c220, 0xff000fff, 0x00000100, 1215 0x3c224, 0xff000fff, 0x00000100, 1216 0x3c200, 0xfffc0fff, 0x00000100, 1217 0x6ed8, 0x00010101, 0x00010000, 1218 0x9830, 0xffffffff, 0x00000000, 1219 0x9834, 0xf00fffff, 0x00000400, 1220 0x5bb0, 0x000000f0, 0x00000070, 1221 0x5bc0, 0xf0311fff, 0x80300000, 1222 0x98f8, 0x73773777, 0x12010001, 1223 0x98fc, 0xffffffff, 0x00000010, 1224 0x9b7c, 0x00ff0000, 0x00fc0000, 1225 0x8030, 0x00001f0f, 0x0000100a, 1226 0x2f48, 0x73773777, 0x12010001, 1227 0x2408, 0x000fffff, 0x000c007f, 1228 0x8a14, 0xf000003f, 0x00000007, 1229 0x8b24, 0x3fff3fff, 0x00ffcfff, 1230 0x30a04, 0x0000ff0f, 0x00000000, 1231 0x28a4c, 0x07ffffff, 0x06000000, 1232 0x4d8, 0x00000fff, 0x00000100, 1233 0x3e78, 0x00000001, 0x00000002, 1234 0xc768, 0x00000008, 0x00000008, 1235 0x8c00, 0x000000ff, 0x00000003, 1236 0x214f8, 0x01ff01ff, 0x00000002, 1237 0x21498, 0x007ff800, 0x00200000, 1238 0x2015c, 0xffffffff, 0x00000f40, 1239 0x88c4, 0x001f3ae3, 0x00000082, 1240 0x88d4, 0x0000001f, 0x00000010, 1241 0x30934, 0xffffffff, 0x00000000 1242 }; 1243 1244 static const u32 kalindi_mgcg_cgcg_init[] = 1245 { 1246 0xc420, 0xffffffff, 0xfffffffc, 1247 0x30800, 0xffffffff, 0xe0000000, 1248 0x3c2a0, 0xffffffff, 0x00000100, 1249 0x3c208, 0xffffffff, 0x00000100, 1250 0x3c2c0, 0xffffffff, 0x00000100, 1251 0x3c2c8, 0xffffffff, 0x00000100, 1252 0x3c2c4, 0xffffffff, 0x00000100, 1253 0x55e4, 0xffffffff, 0x00600100, 1254 0x3c280, 0xffffffff, 0x00000100, 1255 0x3c214, 0xffffffff, 0x06000100, 1256 0x3c220, 0xffffffff, 0x00000100, 1257 0x3c218, 0xffffffff, 0x06000100, 1258 0x3c204, 0xffffffff, 0x00000100, 1259 0x3c2e0, 0xffffffff, 0x00000100, 1260 0x3c224, 0xffffffff, 0x00000100, 1261 0x3c200, 0xffffffff, 0x00000100, 1262 0x3c230, 0xffffffff, 0x00000100, 1263 0x3c234, 0xffffffff, 0x00000100, 1264 0x3c250, 0xffffffff, 0x00000100, 1265 0x3c254, 0xffffffff, 0x00000100, 1266 0x3c258, 0xffffffff, 0x00000100, 1267 0x3c25c, 0xffffffff, 0x00000100, 1268 0x3c260, 0xffffffff, 0x00000100, 1269 0x3c27c, 0xffffffff, 0x00000100, 1270 0x3c278, 0xffffffff, 0x00000100, 1271 0x3c210, 0xffffffff, 0x06000100, 1272 0x3c290, 0xffffffff, 0x00000100, 1273 0x3c274, 0xffffffff, 0x00000100, 1274 0x3c2b4, 0xffffffff, 0x00000100, 1275 0x3c2b0, 0xffffffff, 0x00000100, 1276 0x3c270, 0xffffffff, 0x00000100, 1277 0x30800, 0xffffffff, 0xe0000000, 1278 0x3c020, 0xffffffff, 0x00010000, 1279 0x3c024, 0xffffffff, 0x00030002, 1280 0x3c028, 0xffffffff, 0x00040007, 1281 0x3c02c, 0xffffffff, 0x00060005, 1282 0x3c030, 0xffffffff, 0x00090008, 1283 0x3c034, 0xffffffff, 0x00010000, 1284 0x3c038, 0xffffffff, 0x00030002, 1285 0x3c03c, 0xffffffff, 0x00040007, 1286 0x3c040, 0xffffffff, 0x00060005, 1287 0x3c044, 0xffffffff, 0x00090008, 1288 0x3c000, 0xffffffff, 0x96e00200, 1289 0x8708, 0xffffffff, 0x00900100, 1290 0xc424, 0xffffffff, 0x0020003f, 1291 0x38, 0xffffffff, 0x0140001c, 1292 0x3c, 0x000f0000, 0x000f0000, 1293 0x220, 0xffffffff, 0xC060000C, 1294 0x224, 0xc0000fff, 0x00000100, 1295 0x20a8, 0xffffffff, 0x00000104, 1296 0x55e4, 0xff000fff, 0x00000100, 1297 0x30cc, 0xc0000fff, 0x00000104, 1298 0xc1e4, 0x00000001, 0x00000001, 1299 0xd00c, 0xff000ff0, 0x00000100, 1300 0xd80c, 0xff000ff0, 0x00000100 1301 }; 1302 1303 static void cik_init_golden_registers(struct radeon_device *rdev) 1304 { 1305 switch (rdev->family) { 1306 case CHIP_BONAIRE: 1307 radeon_program_register_sequence(rdev, 1308 bonaire_mgcg_cgcg_init, 1309 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 1310 radeon_program_register_sequence(rdev, 1311 bonaire_golden_registers, 1312 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 1313 radeon_program_register_sequence(rdev, 1314 bonaire_golden_common_registers, 1315 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 1316 radeon_program_register_sequence(rdev, 1317 bonaire_golden_spm_registers, 1318 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 1319 break; 1320 case CHIP_KABINI: 1321 radeon_program_register_sequence(rdev, 1322 kalindi_mgcg_cgcg_init, 1323 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1324 radeon_program_register_sequence(rdev, 1325 kalindi_golden_registers, 1326 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 1327 radeon_program_register_sequence(rdev, 1328 kalindi_golden_common_registers, 1329 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1330 radeon_program_register_sequence(rdev, 1331 kalindi_golden_spm_registers, 1332 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1333 break; 1334 case CHIP_KAVERI: 1335 radeon_program_register_sequence(rdev, 1336 spectre_mgcg_cgcg_init, 1337 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 1338 radeon_program_register_sequence(rdev, 1339 spectre_golden_registers, 1340 (const u32)ARRAY_SIZE(spectre_golden_registers)); 1341 radeon_program_register_sequence(rdev, 1342 spectre_golden_common_registers, 1343 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 1344 radeon_program_register_sequence(rdev, 1345 spectre_golden_spm_registers, 1346 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 1347 break; 1348 default: 1349 break; 1350 } 1351 } 1352 1353 /** 1354 * cik_get_xclk - get the xclk 1355 * 1356 * @rdev: radeon_device pointer 1357 * 1358 * Returns the reference clock used by the gfx engine 1359 * (CIK). 1360 */ 1361 u32 cik_get_xclk(struct radeon_device *rdev) 1362 { 1363 u32 reference_clock = rdev->clock.spll.reference_freq; 1364 1365 if (rdev->flags & RADEON_IS_IGP) { 1366 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 1367 return reference_clock / 2; 1368 } else { 1369 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 1370 return reference_clock / 4; 1371 } 1372 return reference_clock; 1373 } 1374 1375 /** 1376 * cik_mm_rdoorbell - read a doorbell dword 1377 * 1378 * @rdev: radeon_device pointer 1379 * @offset: byte offset into the aperture 1380 * 1381 * Returns the value in the doorbell aperture at the 1382 * requested offset (CIK). 1383 */ 1384 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) 1385 { 1386 if (offset < rdev->doorbell.size) { 1387 return readl(((void __iomem *)rdev->doorbell.ptr) + offset); 1388 } else { 1389 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); 1390 return 0; 1391 } 1392 } 1393 1394 /** 1395 * cik_mm_wdoorbell - write a doorbell dword 1396 * 1397 * @rdev: radeon_device pointer 1398 * @offset: byte offset into the aperture 1399 * @v: value to write 1400 * 1401 * Writes @v to the doorbell aperture at the 1402 * requested offset (CIK). 1403 */ 1404 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) 1405 { 1406 if (offset < rdev->doorbell.size) { 1407 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset); 1408 } else { 1409 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); 1410 } 1411 } 1412 1413 #define BONAIRE_IO_MC_REGS_SIZE 36 1414 1415 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 1416 { 1417 {0x00000070, 0x04400000}, 1418 {0x00000071, 0x80c01803}, 1419 {0x00000072, 0x00004004}, 1420 {0x00000073, 0x00000100}, 1421 {0x00000074, 0x00ff0000}, 1422 {0x00000075, 0x34000000}, 1423 {0x00000076, 0x08000014}, 1424 {0x00000077, 0x00cc08ec}, 1425 {0x00000078, 0x00000400}, 1426 {0x00000079, 0x00000000}, 1427 {0x0000007a, 0x04090000}, 1428 {0x0000007c, 0x00000000}, 1429 {0x0000007e, 0x4408a8e8}, 1430 {0x0000007f, 0x00000304}, 1431 {0x00000080, 0x00000000}, 1432 {0x00000082, 0x00000001}, 1433 {0x00000083, 0x00000002}, 1434 {0x00000084, 0xf3e4f400}, 1435 {0x00000085, 0x052024e3}, 1436 {0x00000087, 0x00000000}, 1437 {0x00000088, 0x01000000}, 1438 {0x0000008a, 0x1c0a0000}, 1439 {0x0000008b, 0xff010000}, 1440 {0x0000008d, 0xffffefff}, 1441 {0x0000008e, 0xfff3efff}, 1442 {0x0000008f, 0xfff3efbf}, 1443 {0x00000092, 0xf7ffffff}, 1444 {0x00000093, 0xffffff7f}, 1445 {0x00000095, 0x00101101}, 1446 {0x00000096, 0x00000fff}, 1447 {0x00000097, 0x00116fff}, 1448 {0x00000098, 0x60010000}, 1449 {0x00000099, 0x10010000}, 1450 {0x0000009a, 0x00006000}, 1451 {0x0000009b, 0x00001000}, 1452 {0x0000009f, 0x00b48000} 1453 }; 1454 1455 /** 1456 * cik_srbm_select - select specific register instances 1457 * 1458 * @rdev: radeon_device pointer 1459 * @me: selected ME (micro engine) 1460 * @pipe: pipe 1461 * @queue: queue 1462 * @vmid: VMID 1463 * 1464 * Switches the currently active registers instances. Some 1465 * registers are instanced per VMID, others are instanced per 1466 * me/pipe/queue combination. 1467 */ 1468 static void cik_srbm_select(struct radeon_device *rdev, 1469 u32 me, u32 pipe, u32 queue, u32 vmid) 1470 { 1471 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 1472 MEID(me & 0x3) | 1473 VMID(vmid & 0xf) | 1474 QUEUEID(queue & 0x7)); 1475 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 1476 } 1477 1478 /* ucode loading */ 1479 /** 1480 * ci_mc_load_microcode - load MC ucode into the hw 1481 * 1482 * @rdev: radeon_device pointer 1483 * 1484 * Load the GDDR MC ucode into the hw (CIK). 1485 * Returns 0 on success, error on failure. 1486 */ 1487 static int ci_mc_load_microcode(struct radeon_device *rdev) 1488 { 1489 const __be32 *fw_data; 1490 u32 running, blackout = 0; 1491 u32 *io_mc_regs; 1492 int i, ucode_size, regs_size; 1493 1494 if (!rdev->mc_fw) 1495 return -EINVAL; 1496 1497 switch (rdev->family) { 1498 case CHIP_BONAIRE: 1499 default: 1500 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 1501 ucode_size = CIK_MC_UCODE_SIZE; 1502 regs_size = BONAIRE_IO_MC_REGS_SIZE; 1503 break; 1504 } 1505 1506 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 1507 1508 if (running == 0) { 1509 if (running) { 1510 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); 1511 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); 1512 } 1513 1514 /* reset the engine and set to writable */ 1515 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1516 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 1517 1518 /* load mc io regs */ 1519 for (i = 0; i < regs_size; i++) { 1520 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 1521 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 1522 } 1523 /* load the MC ucode */ 1524 fw_data = (const __be32 *)rdev->mc_fw->data; 1525 for (i = 0; i < ucode_size; i++) 1526 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 1527 1528 /* put the engine back into the active state */ 1529 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1530 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 1532 1533 /* wait for training to complete */ 1534 for (i = 0; i < rdev->usec_timeout; i++) { 1535 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 1536 break; 1537 udelay(1); 1538 } 1539 for (i = 0; i < rdev->usec_timeout; i++) { 1540 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 1541 break; 1542 udelay(1); 1543 } 1544 1545 if (running) 1546 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); 1547 } 1548 1549 return 0; 1550 } 1551 1552 /** 1553 * cik_init_microcode - load ucode images from disk 1554 * 1555 * @rdev: radeon_device pointer 1556 * 1557 * Use the firmware interface to load the ucode images into 1558 * the driver (not loaded into hw). 1559 * Returns 0 on success, error on failure. 1560 */ 1561 static int cik_init_microcode(struct radeon_device *rdev) 1562 { 1563 const char *chip_name; 1564 size_t pfp_req_size, me_req_size, ce_req_size, 1565 mec_req_size, rlc_req_size, mc_req_size, 1566 sdma_req_size, smc_req_size; 1567 char fw_name[30]; 1568 int err; 1569 1570 DRM_DEBUG("\n"); 1571 1572 switch (rdev->family) { 1573 case CHIP_BONAIRE: 1574 chip_name = "BONAIRE"; 1575 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1576 me_req_size = CIK_ME_UCODE_SIZE * 4; 1577 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1578 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1579 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1580 mc_req_size = CIK_MC_UCODE_SIZE * 4; 1581 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1582 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); 1583 break; 1584 case CHIP_KAVERI: 1585 chip_name = "KAVERI"; 1586 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1587 me_req_size = CIK_ME_UCODE_SIZE * 4; 1588 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1589 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1590 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 1591 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1592 break; 1593 case CHIP_KABINI: 1594 chip_name = "KABINI"; 1595 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1596 me_req_size = CIK_ME_UCODE_SIZE * 4; 1597 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1598 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1599 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 1600 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1601 break; 1602 default: BUG(); 1603 } 1604 1605 DRM_INFO("Loading %s Microcode\n", chip_name); 1606 1607 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); 1608 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 1609 if (err) 1610 goto out; 1611 if (rdev->pfp_fw->size != pfp_req_size) { 1612 printk(KERN_ERR 1613 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1614 rdev->pfp_fw->size, fw_name); 1615 err = -EINVAL; 1616 goto out; 1617 } 1618 1619 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); 1620 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 1621 if (err) 1622 goto out; 1623 if (rdev->me_fw->size != me_req_size) { 1624 printk(KERN_ERR 1625 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1626 rdev->me_fw->size, fw_name); 1627 err = -EINVAL; 1628 } 1629 1630 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); 1631 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 1632 if (err) 1633 goto out; 1634 if (rdev->ce_fw->size != ce_req_size) { 1635 printk(KERN_ERR 1636 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1637 rdev->ce_fw->size, fw_name); 1638 err = -EINVAL; 1639 } 1640 1641 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name); 1642 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 1643 if (err) 1644 goto out; 1645 if (rdev->mec_fw->size != mec_req_size) { 1646 printk(KERN_ERR 1647 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1648 rdev->mec_fw->size, fw_name); 1649 err = -EINVAL; 1650 } 1651 1652 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); 1653 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 1654 if (err) 1655 goto out; 1656 if (rdev->rlc_fw->size != rlc_req_size) { 1657 printk(KERN_ERR 1658 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 1659 rdev->rlc_fw->size, fw_name); 1660 err = -EINVAL; 1661 } 1662 1663 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); 1664 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 1665 if (err) 1666 goto out; 1667 if (rdev->sdma_fw->size != sdma_req_size) { 1668 printk(KERN_ERR 1669 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 1670 rdev->sdma_fw->size, fw_name); 1671 err = -EINVAL; 1672 } 1673 1674 /* No SMC, MC ucode on APUs */ 1675 if (!(rdev->flags & RADEON_IS_IGP)) { 1676 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); 1677 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 1678 if (err) 1679 goto out; 1680 if (rdev->mc_fw->size != mc_req_size) { 1681 printk(KERN_ERR 1682 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 1683 rdev->mc_fw->size, fw_name); 1684 err = -EINVAL; 1685 } 1686 1687 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); 1688 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); 1689 if (err) { 1690 printk(KERN_ERR 1691 "smc: error loading firmware \"%s\"\n", 1692 fw_name); 1693 release_firmware(rdev->smc_fw); 1694 rdev->smc_fw = NULL; 1695 } else if (rdev->smc_fw->size != smc_req_size) { 1696 printk(KERN_ERR 1697 "cik_smc: Bogus length %zu in firmware \"%s\"\n", 1698 rdev->smc_fw->size, fw_name); 1699 err = -EINVAL; 1700 } 1701 } 1702 1703 out: 1704 if (err) { 1705 if (err != -EINVAL) 1706 printk(KERN_ERR 1707 "cik_cp: Failed to load firmware \"%s\"\n", 1708 fw_name); 1709 release_firmware(rdev->pfp_fw); 1710 rdev->pfp_fw = NULL; 1711 release_firmware(rdev->me_fw); 1712 rdev->me_fw = NULL; 1713 release_firmware(rdev->ce_fw); 1714 rdev->ce_fw = NULL; 1715 release_firmware(rdev->rlc_fw); 1716 rdev->rlc_fw = NULL; 1717 release_firmware(rdev->mc_fw); 1718 rdev->mc_fw = NULL; 1719 release_firmware(rdev->smc_fw); 1720 rdev->smc_fw = NULL; 1721 } 1722 return err; 1723 } 1724 1725 /* 1726 * Core functions 1727 */ 1728 /** 1729 * cik_tiling_mode_table_init - init the hw tiling table 1730 * 1731 * @rdev: radeon_device pointer 1732 * 1733 * Starting with SI, the tiling setup is done globally in a 1734 * set of 32 tiling modes. Rather than selecting each set of 1735 * parameters per surface as on older asics, we just select 1736 * which index in the tiling table we want to use, and the 1737 * surface uses those parameters (CIK). 1738 */ 1739 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 1740 { 1741 const u32 num_tile_mode_states = 32; 1742 const u32 num_secondary_tile_mode_states = 16; 1743 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 1744 u32 num_pipe_configs; 1745 u32 num_rbs = rdev->config.cik.max_backends_per_se * 1746 rdev->config.cik.max_shader_engines; 1747 1748 switch (rdev->config.cik.mem_row_size_in_kb) { 1749 case 1: 1750 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 1751 break; 1752 case 2: 1753 default: 1754 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 1755 break; 1756 case 4: 1757 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 1758 break; 1759 } 1760 1761 num_pipe_configs = rdev->config.cik.max_tile_pipes; 1762 if (num_pipe_configs > 8) 1763 num_pipe_configs = 8; /* ??? */ 1764 1765 if (num_pipe_configs == 8) { 1766 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1767 switch (reg_offset) { 1768 case 0: 1769 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1770 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1771 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1772 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1773 break; 1774 case 1: 1775 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1776 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1777 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1779 break; 1780 case 2: 1781 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1782 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1783 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1784 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1785 break; 1786 case 3: 1787 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1789 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1791 break; 1792 case 4: 1793 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1794 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1795 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1796 TILE_SPLIT(split_equal_to_row_size)); 1797 break; 1798 case 5: 1799 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1800 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1801 break; 1802 case 6: 1803 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1804 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1805 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1806 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1807 break; 1808 case 7: 1809 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1810 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1811 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1812 TILE_SPLIT(split_equal_to_row_size)); 1813 break; 1814 case 8: 1815 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1816 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 1817 break; 1818 case 9: 1819 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1820 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1821 break; 1822 case 10: 1823 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1824 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1825 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1827 break; 1828 case 11: 1829 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1830 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1831 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1833 break; 1834 case 12: 1835 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1836 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1837 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1839 break; 1840 case 13: 1841 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1842 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1843 break; 1844 case 14: 1845 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1846 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1849 break; 1850 case 16: 1851 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1852 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1853 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1855 break; 1856 case 17: 1857 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1858 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1861 break; 1862 case 27: 1863 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1864 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1865 break; 1866 case 28: 1867 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1868 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1871 break; 1872 case 29: 1873 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1874 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1875 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1877 break; 1878 case 30: 1879 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1880 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1881 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1883 break; 1884 default: 1885 gb_tile_moden = 0; 1886 break; 1887 } 1888 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1889 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1890 } 1891 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1892 switch (reg_offset) { 1893 case 0: 1894 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1895 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1896 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1897 NUM_BANKS(ADDR_SURF_16_BANK)); 1898 break; 1899 case 1: 1900 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1903 NUM_BANKS(ADDR_SURF_16_BANK)); 1904 break; 1905 case 2: 1906 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1909 NUM_BANKS(ADDR_SURF_16_BANK)); 1910 break; 1911 case 3: 1912 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1913 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1914 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1915 NUM_BANKS(ADDR_SURF_16_BANK)); 1916 break; 1917 case 4: 1918 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1921 NUM_BANKS(ADDR_SURF_8_BANK)); 1922 break; 1923 case 5: 1924 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1927 NUM_BANKS(ADDR_SURF_4_BANK)); 1928 break; 1929 case 6: 1930 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1933 NUM_BANKS(ADDR_SURF_2_BANK)); 1934 break; 1935 case 8: 1936 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1939 NUM_BANKS(ADDR_SURF_16_BANK)); 1940 break; 1941 case 9: 1942 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1945 NUM_BANKS(ADDR_SURF_16_BANK)); 1946 break; 1947 case 10: 1948 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1951 NUM_BANKS(ADDR_SURF_16_BANK)); 1952 break; 1953 case 11: 1954 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1957 NUM_BANKS(ADDR_SURF_16_BANK)); 1958 break; 1959 case 12: 1960 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1963 NUM_BANKS(ADDR_SURF_8_BANK)); 1964 break; 1965 case 13: 1966 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1969 NUM_BANKS(ADDR_SURF_4_BANK)); 1970 break; 1971 case 14: 1972 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1975 NUM_BANKS(ADDR_SURF_2_BANK)); 1976 break; 1977 default: 1978 gb_tile_moden = 0; 1979 break; 1980 } 1981 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1982 } 1983 } else if (num_pipe_configs == 4) { 1984 if (num_rbs == 4) { 1985 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1986 switch (reg_offset) { 1987 case 0: 1988 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1990 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1991 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1992 break; 1993 case 1: 1994 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1995 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1996 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1997 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1998 break; 1999 case 2: 2000 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2002 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2004 break; 2005 case 3: 2006 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2008 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2010 break; 2011 case 4: 2012 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2013 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2014 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2015 TILE_SPLIT(split_equal_to_row_size)); 2016 break; 2017 case 5: 2018 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2019 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2020 break; 2021 case 6: 2022 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2023 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2024 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2026 break; 2027 case 7: 2028 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2029 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2030 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2031 TILE_SPLIT(split_equal_to_row_size)); 2032 break; 2033 case 8: 2034 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2035 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2036 break; 2037 case 9: 2038 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2040 break; 2041 case 10: 2042 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2043 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2044 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2046 break; 2047 case 11: 2048 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2049 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2050 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2052 break; 2053 case 12: 2054 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2056 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2058 break; 2059 case 13: 2060 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2062 break; 2063 case 14: 2064 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2066 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2068 break; 2069 case 16: 2070 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2072 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2074 break; 2075 case 17: 2076 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2077 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2078 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2080 break; 2081 case 27: 2082 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2083 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2084 break; 2085 case 28: 2086 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2087 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2088 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2090 break; 2091 case 29: 2092 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2093 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2094 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2096 break; 2097 case 30: 2098 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2099 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2100 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2102 break; 2103 default: 2104 gb_tile_moden = 0; 2105 break; 2106 } 2107 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2108 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2109 } 2110 } else if (num_rbs < 4) { 2111 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2112 switch (reg_offset) { 2113 case 0: 2114 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2116 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2118 break; 2119 case 1: 2120 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2122 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2123 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2124 break; 2125 case 2: 2126 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2128 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2130 break; 2131 case 3: 2132 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2133 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2134 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2135 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2136 break; 2137 case 4: 2138 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2139 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2140 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2141 TILE_SPLIT(split_equal_to_row_size)); 2142 break; 2143 case 5: 2144 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2145 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2146 break; 2147 case 6: 2148 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2149 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2150 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2151 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2152 break; 2153 case 7: 2154 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2156 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2157 TILE_SPLIT(split_equal_to_row_size)); 2158 break; 2159 case 8: 2160 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2161 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 2162 break; 2163 case 9: 2164 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2165 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2166 break; 2167 case 10: 2168 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2169 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2170 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2172 break; 2173 case 11: 2174 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2175 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2176 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2178 break; 2179 case 12: 2180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2181 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2182 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2184 break; 2185 case 13: 2186 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2188 break; 2189 case 14: 2190 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2192 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2194 break; 2195 case 16: 2196 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2198 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2200 break; 2201 case 17: 2202 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2203 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2204 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2206 break; 2207 case 27: 2208 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2209 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2210 break; 2211 case 28: 2212 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2213 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2214 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2216 break; 2217 case 29: 2218 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2219 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2220 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2222 break; 2223 case 30: 2224 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2225 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2226 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2228 break; 2229 default: 2230 gb_tile_moden = 0; 2231 break; 2232 } 2233 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2234 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2235 } 2236 } 2237 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2238 switch (reg_offset) { 2239 case 0: 2240 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2243 NUM_BANKS(ADDR_SURF_16_BANK)); 2244 break; 2245 case 1: 2246 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2249 NUM_BANKS(ADDR_SURF_16_BANK)); 2250 break; 2251 case 2: 2252 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2255 NUM_BANKS(ADDR_SURF_16_BANK)); 2256 break; 2257 case 3: 2258 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2261 NUM_BANKS(ADDR_SURF_16_BANK)); 2262 break; 2263 case 4: 2264 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2267 NUM_BANKS(ADDR_SURF_16_BANK)); 2268 break; 2269 case 5: 2270 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2273 NUM_BANKS(ADDR_SURF_8_BANK)); 2274 break; 2275 case 6: 2276 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2279 NUM_BANKS(ADDR_SURF_4_BANK)); 2280 break; 2281 case 8: 2282 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2285 NUM_BANKS(ADDR_SURF_16_BANK)); 2286 break; 2287 case 9: 2288 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2291 NUM_BANKS(ADDR_SURF_16_BANK)); 2292 break; 2293 case 10: 2294 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2295 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2296 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2297 NUM_BANKS(ADDR_SURF_16_BANK)); 2298 break; 2299 case 11: 2300 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2303 NUM_BANKS(ADDR_SURF_16_BANK)); 2304 break; 2305 case 12: 2306 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2309 NUM_BANKS(ADDR_SURF_16_BANK)); 2310 break; 2311 case 13: 2312 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2315 NUM_BANKS(ADDR_SURF_8_BANK)); 2316 break; 2317 case 14: 2318 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2321 NUM_BANKS(ADDR_SURF_4_BANK)); 2322 break; 2323 default: 2324 gb_tile_moden = 0; 2325 break; 2326 } 2327 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2328 } 2329 } else if (num_pipe_configs == 2) { 2330 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2331 switch (reg_offset) { 2332 case 0: 2333 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2335 PIPE_CONFIG(ADDR_SURF_P2) | 2336 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2337 break; 2338 case 1: 2339 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2341 PIPE_CONFIG(ADDR_SURF_P2) | 2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2343 break; 2344 case 2: 2345 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2347 PIPE_CONFIG(ADDR_SURF_P2) | 2348 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2349 break; 2350 case 3: 2351 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2353 PIPE_CONFIG(ADDR_SURF_P2) | 2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2355 break; 2356 case 4: 2357 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2359 PIPE_CONFIG(ADDR_SURF_P2) | 2360 TILE_SPLIT(split_equal_to_row_size)); 2361 break; 2362 case 5: 2363 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2365 break; 2366 case 6: 2367 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2369 PIPE_CONFIG(ADDR_SURF_P2) | 2370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2371 break; 2372 case 7: 2373 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2375 PIPE_CONFIG(ADDR_SURF_P2) | 2376 TILE_SPLIT(split_equal_to_row_size)); 2377 break; 2378 case 8: 2379 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); 2380 break; 2381 case 9: 2382 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2383 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2384 break; 2385 case 10: 2386 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2387 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2388 PIPE_CONFIG(ADDR_SURF_P2) | 2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2390 break; 2391 case 11: 2392 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2393 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2394 PIPE_CONFIG(ADDR_SURF_P2) | 2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2396 break; 2397 case 12: 2398 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2399 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2400 PIPE_CONFIG(ADDR_SURF_P2) | 2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2402 break; 2403 case 13: 2404 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2405 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2406 break; 2407 case 14: 2408 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2410 PIPE_CONFIG(ADDR_SURF_P2) | 2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2412 break; 2413 case 16: 2414 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2415 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2416 PIPE_CONFIG(ADDR_SURF_P2) | 2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2418 break; 2419 case 17: 2420 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2421 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2422 PIPE_CONFIG(ADDR_SURF_P2) | 2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2424 break; 2425 case 27: 2426 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2427 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2428 break; 2429 case 28: 2430 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2431 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2432 PIPE_CONFIG(ADDR_SURF_P2) | 2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2434 break; 2435 case 29: 2436 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2438 PIPE_CONFIG(ADDR_SURF_P2) | 2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2440 break; 2441 case 30: 2442 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2443 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2444 PIPE_CONFIG(ADDR_SURF_P2) | 2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2446 break; 2447 default: 2448 gb_tile_moden = 0; 2449 break; 2450 } 2451 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2452 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2453 } 2454 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2455 switch (reg_offset) { 2456 case 0: 2457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2460 NUM_BANKS(ADDR_SURF_16_BANK)); 2461 break; 2462 case 1: 2463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2466 NUM_BANKS(ADDR_SURF_16_BANK)); 2467 break; 2468 case 2: 2469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2472 NUM_BANKS(ADDR_SURF_16_BANK)); 2473 break; 2474 case 3: 2475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2478 NUM_BANKS(ADDR_SURF_16_BANK)); 2479 break; 2480 case 4: 2481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2484 NUM_BANKS(ADDR_SURF_16_BANK)); 2485 break; 2486 case 5: 2487 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2490 NUM_BANKS(ADDR_SURF_16_BANK)); 2491 break; 2492 case 6: 2493 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2496 NUM_BANKS(ADDR_SURF_8_BANK)); 2497 break; 2498 case 8: 2499 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2502 NUM_BANKS(ADDR_SURF_16_BANK)); 2503 break; 2504 case 9: 2505 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2508 NUM_BANKS(ADDR_SURF_16_BANK)); 2509 break; 2510 case 10: 2511 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2514 NUM_BANKS(ADDR_SURF_16_BANK)); 2515 break; 2516 case 11: 2517 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2520 NUM_BANKS(ADDR_SURF_16_BANK)); 2521 break; 2522 case 12: 2523 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2526 NUM_BANKS(ADDR_SURF_16_BANK)); 2527 break; 2528 case 13: 2529 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2532 NUM_BANKS(ADDR_SURF_16_BANK)); 2533 break; 2534 case 14: 2535 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2538 NUM_BANKS(ADDR_SURF_8_BANK)); 2539 break; 2540 default: 2541 gb_tile_moden = 0; 2542 break; 2543 } 2544 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2545 } 2546 } else 2547 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 2548 } 2549 2550 /** 2551 * cik_select_se_sh - select which SE, SH to address 2552 * 2553 * @rdev: radeon_device pointer 2554 * @se_num: shader engine to address 2555 * @sh_num: sh block to address 2556 * 2557 * Select which SE, SH combinations to address. Certain 2558 * registers are instanced per SE or SH. 0xffffffff means 2559 * broadcast to all SEs or SHs (CIK). 2560 */ 2561 static void cik_select_se_sh(struct radeon_device *rdev, 2562 u32 se_num, u32 sh_num) 2563 { 2564 u32 data = INSTANCE_BROADCAST_WRITES; 2565 2566 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 2567 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 2568 else if (se_num == 0xffffffff) 2569 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 2570 else if (sh_num == 0xffffffff) 2571 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 2572 else 2573 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 2574 WREG32(GRBM_GFX_INDEX, data); 2575 } 2576 2577 /** 2578 * cik_create_bitmask - create a bitmask 2579 * 2580 * @bit_width: length of the mask 2581 * 2582 * create a variable length bit mask (CIK). 2583 * Returns the bitmask. 2584 */ 2585 static u32 cik_create_bitmask(u32 bit_width) 2586 { 2587 u32 i, mask = 0; 2588 2589 for (i = 0; i < bit_width; i++) { 2590 mask <<= 1; 2591 mask |= 1; 2592 } 2593 return mask; 2594 } 2595 2596 /** 2597 * cik_select_se_sh - select which SE, SH to address 2598 * 2599 * @rdev: radeon_device pointer 2600 * @max_rb_num: max RBs (render backends) for the asic 2601 * @se_num: number of SEs (shader engines) for the asic 2602 * @sh_per_se: number of SH blocks per SE for the asic 2603 * 2604 * Calculates the bitmask of disabled RBs (CIK). 2605 * Returns the disabled RB bitmask. 2606 */ 2607 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 2608 u32 max_rb_num, u32 se_num, 2609 u32 sh_per_se) 2610 { 2611 u32 data, mask; 2612 2613 data = RREG32(CC_RB_BACKEND_DISABLE); 2614 if (data & 1) 2615 data &= BACKEND_DISABLE_MASK; 2616 else 2617 data = 0; 2618 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 2619 2620 data >>= BACKEND_DISABLE_SHIFT; 2621 2622 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se); 2623 2624 return data & mask; 2625 } 2626 2627 /** 2628 * cik_setup_rb - setup the RBs on the asic 2629 * 2630 * @rdev: radeon_device pointer 2631 * @se_num: number of SEs (shader engines) for the asic 2632 * @sh_per_se: number of SH blocks per SE for the asic 2633 * @max_rb_num: max RBs (render backends) for the asic 2634 * 2635 * Configures per-SE/SH RB registers (CIK). 2636 */ 2637 static void cik_setup_rb(struct radeon_device *rdev, 2638 u32 se_num, u32 sh_per_se, 2639 u32 max_rb_num) 2640 { 2641 int i, j; 2642 u32 data, mask; 2643 u32 disabled_rbs = 0; 2644 u32 enabled_rbs = 0; 2645 2646 for (i = 0; i < se_num; i++) { 2647 for (j = 0; j < sh_per_se; j++) { 2648 cik_select_se_sh(rdev, i, j); 2649 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); 2650 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 2651 } 2652 } 2653 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2654 2655 mask = 1; 2656 for (i = 0; i < max_rb_num; i++) { 2657 if (!(disabled_rbs & mask)) 2658 enabled_rbs |= mask; 2659 mask <<= 1; 2660 } 2661 2662 for (i = 0; i < se_num; i++) { 2663 cik_select_se_sh(rdev, i, 0xffffffff); 2664 data = 0; 2665 for (j = 0; j < sh_per_se; j++) { 2666 switch (enabled_rbs & 3) { 2667 case 1: 2668 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 2669 break; 2670 case 2: 2671 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 2672 break; 2673 case 3: 2674 default: 2675 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 2676 break; 2677 } 2678 enabled_rbs >>= 2; 2679 } 2680 WREG32(PA_SC_RASTER_CONFIG, data); 2681 } 2682 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2683 } 2684 2685 /** 2686 * cik_gpu_init - setup the 3D engine 2687 * 2688 * @rdev: radeon_device pointer 2689 * 2690 * Configures the 3D engine and tiling configuration 2691 * registers so that the 3D engine is usable. 2692 */ 2693 static void cik_gpu_init(struct radeon_device *rdev) 2694 { 2695 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 2696 u32 mc_shared_chmap, mc_arb_ramcfg; 2697 u32 hdp_host_path_cntl; 2698 u32 tmp; 2699 int i, j; 2700 2701 switch (rdev->family) { 2702 case CHIP_BONAIRE: 2703 rdev->config.cik.max_shader_engines = 2; 2704 rdev->config.cik.max_tile_pipes = 4; 2705 rdev->config.cik.max_cu_per_sh = 7; 2706 rdev->config.cik.max_sh_per_se = 1; 2707 rdev->config.cik.max_backends_per_se = 2; 2708 rdev->config.cik.max_texture_channel_caches = 4; 2709 rdev->config.cik.max_gprs = 256; 2710 rdev->config.cik.max_gs_threads = 32; 2711 rdev->config.cik.max_hw_contexts = 8; 2712 2713 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2714 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2715 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2716 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2717 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2718 break; 2719 case CHIP_KAVERI: 2720 rdev->config.cik.max_shader_engines = 1; 2721 rdev->config.cik.max_tile_pipes = 4; 2722 if ((rdev->pdev->device == 0x1304) || 2723 (rdev->pdev->device == 0x1305) || 2724 (rdev->pdev->device == 0x130C) || 2725 (rdev->pdev->device == 0x130F) || 2726 (rdev->pdev->device == 0x1310) || 2727 (rdev->pdev->device == 0x1311) || 2728 (rdev->pdev->device == 0x131C)) { 2729 rdev->config.cik.max_cu_per_sh = 8; 2730 rdev->config.cik.max_backends_per_se = 2; 2731 } else if ((rdev->pdev->device == 0x1309) || 2732 (rdev->pdev->device == 0x130A) || 2733 (rdev->pdev->device == 0x130D) || 2734 (rdev->pdev->device == 0x1313) || 2735 (rdev->pdev->device == 0x131D)) { 2736 rdev->config.cik.max_cu_per_sh = 6; 2737 rdev->config.cik.max_backends_per_se = 2; 2738 } else if ((rdev->pdev->device == 0x1306) || 2739 (rdev->pdev->device == 0x1307) || 2740 (rdev->pdev->device == 0x130B) || 2741 (rdev->pdev->device == 0x130E) || 2742 (rdev->pdev->device == 0x1315) || 2743 (rdev->pdev->device == 0x131B)) { 2744 rdev->config.cik.max_cu_per_sh = 4; 2745 rdev->config.cik.max_backends_per_se = 1; 2746 } else { 2747 rdev->config.cik.max_cu_per_sh = 3; 2748 rdev->config.cik.max_backends_per_se = 1; 2749 } 2750 rdev->config.cik.max_sh_per_se = 1; 2751 rdev->config.cik.max_texture_channel_caches = 4; 2752 rdev->config.cik.max_gprs = 256; 2753 rdev->config.cik.max_gs_threads = 16; 2754 rdev->config.cik.max_hw_contexts = 8; 2755 2756 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2757 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2758 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2759 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2760 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2761 break; 2762 case CHIP_KABINI: 2763 default: 2764 rdev->config.cik.max_shader_engines = 1; 2765 rdev->config.cik.max_tile_pipes = 2; 2766 rdev->config.cik.max_cu_per_sh = 2; 2767 rdev->config.cik.max_sh_per_se = 1; 2768 rdev->config.cik.max_backends_per_se = 1; 2769 rdev->config.cik.max_texture_channel_caches = 2; 2770 rdev->config.cik.max_gprs = 256; 2771 rdev->config.cik.max_gs_threads = 16; 2772 rdev->config.cik.max_hw_contexts = 8; 2773 2774 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2775 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2776 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2777 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2778 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2779 break; 2780 } 2781 2782 /* Initialize HDP */ 2783 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 2784 WREG32((0x2c14 + j), 0x00000000); 2785 WREG32((0x2c18 + j), 0x00000000); 2786 WREG32((0x2c1c + j), 0x00000000); 2787 WREG32((0x2c20 + j), 0x00000000); 2788 WREG32((0x2c24 + j), 0x00000000); 2789 } 2790 2791 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 2792 2793 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 2794 2795 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 2796 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 2797 2798 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 2799 rdev->config.cik.mem_max_burst_length_bytes = 256; 2800 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 2801 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 2802 if (rdev->config.cik.mem_row_size_in_kb > 4) 2803 rdev->config.cik.mem_row_size_in_kb = 4; 2804 /* XXX use MC settings? */ 2805 rdev->config.cik.shader_engine_tile_size = 32; 2806 rdev->config.cik.num_gpus = 1; 2807 rdev->config.cik.multi_gpu_tile_size = 64; 2808 2809 /* fix up row size */ 2810 gb_addr_config &= ~ROW_SIZE_MASK; 2811 switch (rdev->config.cik.mem_row_size_in_kb) { 2812 case 1: 2813 default: 2814 gb_addr_config |= ROW_SIZE(0); 2815 break; 2816 case 2: 2817 gb_addr_config |= ROW_SIZE(1); 2818 break; 2819 case 4: 2820 gb_addr_config |= ROW_SIZE(2); 2821 break; 2822 } 2823 2824 /* setup tiling info dword. gb_addr_config is not adequate since it does 2825 * not have bank info, so create a custom tiling dword. 2826 * bits 3:0 num_pipes 2827 * bits 7:4 num_banks 2828 * bits 11:8 group_size 2829 * bits 15:12 row_size 2830 */ 2831 rdev->config.cik.tile_config = 0; 2832 switch (rdev->config.cik.num_tile_pipes) { 2833 case 1: 2834 rdev->config.cik.tile_config |= (0 << 0); 2835 break; 2836 case 2: 2837 rdev->config.cik.tile_config |= (1 << 0); 2838 break; 2839 case 4: 2840 rdev->config.cik.tile_config |= (2 << 0); 2841 break; 2842 case 8: 2843 default: 2844 /* XXX what about 12? */ 2845 rdev->config.cik.tile_config |= (3 << 0); 2846 break; 2847 } 2848 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) 2849 rdev->config.cik.tile_config |= 1 << 4; 2850 else 2851 rdev->config.cik.tile_config |= 0 << 4; 2852 rdev->config.cik.tile_config |= 2853 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 2854 rdev->config.cik.tile_config |= 2855 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 2856 2857 WREG32(GB_ADDR_CONFIG, gb_addr_config); 2858 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 2859 WREG32(DMIF_ADDR_CALC, gb_addr_config); 2860 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 2861 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 2862 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 2863 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 2864 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 2865 2866 cik_tiling_mode_table_init(rdev); 2867 2868 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 2869 rdev->config.cik.max_sh_per_se, 2870 rdev->config.cik.max_backends_per_se); 2871 2872 /* set HW defaults for 3D engine */ 2873 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 2874 2875 WREG32(SX_DEBUG_1, 0x20); 2876 2877 WREG32(TA_CNTL_AUX, 0x00010000); 2878 2879 tmp = RREG32(SPI_CONFIG_CNTL); 2880 tmp |= 0x03000000; 2881 WREG32(SPI_CONFIG_CNTL, tmp); 2882 2883 WREG32(SQ_CONFIG, 1); 2884 2885 WREG32(DB_DEBUG, 0); 2886 2887 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 2888 tmp |= 0x00000400; 2889 WREG32(DB_DEBUG2, tmp); 2890 2891 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 2892 tmp |= 0x00020200; 2893 WREG32(DB_DEBUG3, tmp); 2894 2895 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 2896 tmp |= 0x00018208; 2897 WREG32(CB_HW_CONTROL, tmp); 2898 2899 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 2900 2901 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 2902 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 2903 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 2904 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 2905 2906 WREG32(VGT_NUM_INSTANCES, 1); 2907 2908 WREG32(CP_PERFMON_CNTL, 0); 2909 2910 WREG32(SQ_CONFIG, 0); 2911 2912 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 2913 FORCE_EOV_MAX_REZ_CNT(255))); 2914 2915 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 2916 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 2917 2918 WREG32(VGT_GS_VERTEX_REUSE, 16); 2919 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 2920 2921 tmp = RREG32(HDP_MISC_CNTL); 2922 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 2923 WREG32(HDP_MISC_CNTL, tmp); 2924 2925 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 2926 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 2927 2928 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 2929 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 2930 2931 udelay(50); 2932 } 2933 2934 /* 2935 * GPU scratch registers helpers function. 2936 */ 2937 /** 2938 * cik_scratch_init - setup driver info for CP scratch regs 2939 * 2940 * @rdev: radeon_device pointer 2941 * 2942 * Set up the number and offset of the CP scratch registers. 2943 * NOTE: use of CP scratch registers is a legacy inferface and 2944 * is not used by default on newer asics (r6xx+). On newer asics, 2945 * memory buffers are used for fences rather than scratch regs. 2946 */ 2947 static void cik_scratch_init(struct radeon_device *rdev) 2948 { 2949 int i; 2950 2951 rdev->scratch.num_reg = 7; 2952 rdev->scratch.reg_base = SCRATCH_REG0; 2953 for (i = 0; i < rdev->scratch.num_reg; i++) { 2954 rdev->scratch.free[i] = true; 2955 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 2956 } 2957 } 2958 2959 /** 2960 * cik_ring_test - basic gfx ring test 2961 * 2962 * @rdev: radeon_device pointer 2963 * @ring: radeon_ring structure holding ring information 2964 * 2965 * Allocate a scratch register and write to it using the gfx ring (CIK). 2966 * Provides a basic gfx ring test to verify that the ring is working. 2967 * Used by cik_cp_gfx_resume(); 2968 * Returns 0 on success, error on failure. 2969 */ 2970 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 2971 { 2972 uint32_t scratch; 2973 uint32_t tmp = 0; 2974 unsigned i; 2975 int r; 2976 2977 r = radeon_scratch_get(rdev, &scratch); 2978 if (r) { 2979 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 2980 return r; 2981 } 2982 WREG32(scratch, 0xCAFEDEAD); 2983 r = radeon_ring_lock(rdev, ring, 3); 2984 if (r) { 2985 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 2986 radeon_scratch_free(rdev, scratch); 2987 return r; 2988 } 2989 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2990 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 2991 radeon_ring_write(ring, 0xDEADBEEF); 2992 radeon_ring_unlock_commit(rdev, ring); 2993 2994 for (i = 0; i < rdev->usec_timeout; i++) { 2995 tmp = RREG32(scratch); 2996 if (tmp == 0xDEADBEEF) 2997 break; 2998 DRM_UDELAY(1); 2999 } 3000 if (i < rdev->usec_timeout) { 3001 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 3002 } else { 3003 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 3004 ring->idx, scratch, tmp); 3005 r = -EINVAL; 3006 } 3007 radeon_scratch_free(rdev, scratch); 3008 return r; 3009 } 3010 3011 /** 3012 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 3013 * 3014 * @rdev: radeon_device pointer 3015 * @fence: radeon fence object 3016 * 3017 * Emits a fence sequnce number on the gfx ring and flushes 3018 * GPU caches. 3019 */ 3020 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 3021 struct radeon_fence *fence) 3022 { 3023 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3024 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3025 3026 /* EVENT_WRITE_EOP - flush caches, send int */ 3027 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 3028 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3029 EOP_TC_ACTION_EN | 3030 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3031 EVENT_INDEX(5))); 3032 radeon_ring_write(ring, addr & 0xfffffffc); 3033 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 3034 radeon_ring_write(ring, fence->seq); 3035 radeon_ring_write(ring, 0); 3036 /* HDP flush */ 3037 /* We should be using the new WAIT_REG_MEM special op packet here 3038 * but it causes the CP to hang 3039 */ 3040 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3041 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3042 WRITE_DATA_DST_SEL(0))); 3043 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3044 radeon_ring_write(ring, 0); 3045 radeon_ring_write(ring, 0); 3046 } 3047 3048 /** 3049 * cik_fence_compute_ring_emit - emit a fence on the compute ring 3050 * 3051 * @rdev: radeon_device pointer 3052 * @fence: radeon fence object 3053 * 3054 * Emits a fence sequnce number on the compute ring and flushes 3055 * GPU caches. 3056 */ 3057 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 3058 struct radeon_fence *fence) 3059 { 3060 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3061 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3062 3063 /* RELEASE_MEM - flush caches, send int */ 3064 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 3065 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3066 EOP_TC_ACTION_EN | 3067 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3068 EVENT_INDEX(5))); 3069 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 3070 radeon_ring_write(ring, addr & 0xfffffffc); 3071 radeon_ring_write(ring, upper_32_bits(addr)); 3072 radeon_ring_write(ring, fence->seq); 3073 radeon_ring_write(ring, 0); 3074 /* HDP flush */ 3075 /* We should be using the new WAIT_REG_MEM special op packet here 3076 * but it causes the CP to hang 3077 */ 3078 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3079 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3080 WRITE_DATA_DST_SEL(0))); 3081 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3082 radeon_ring_write(ring, 0); 3083 radeon_ring_write(ring, 0); 3084 } 3085 3086 void cik_semaphore_ring_emit(struct radeon_device *rdev, 3087 struct radeon_ring *ring, 3088 struct radeon_semaphore *semaphore, 3089 bool emit_wait) 3090 { 3091 uint64_t addr = semaphore->gpu_addr; 3092 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 3093 3094 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 3095 radeon_ring_write(ring, addr & 0xffffffff); 3096 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 3097 } 3098 3099 /* 3100 * IB stuff 3101 */ 3102 /** 3103 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 3104 * 3105 * @rdev: radeon_device pointer 3106 * @ib: radeon indirect buffer object 3107 * 3108 * Emits an DE (drawing engine) or CE (constant engine) IB 3109 * on the gfx ring. IBs are usually generated by userspace 3110 * acceleration drivers and submitted to the kernel for 3111 * sheduling on the ring. This function schedules the IB 3112 * on the gfx ring for execution by the GPU. 3113 */ 3114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3115 { 3116 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3117 u32 header, control = INDIRECT_BUFFER_VALID; 3118 3119 if (ib->is_const_ib) { 3120 /* set switch buffer packet before const IB */ 3121 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3122 radeon_ring_write(ring, 0); 3123 3124 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3125 } else { 3126 u32 next_rptr; 3127 if (ring->rptr_save_reg) { 3128 next_rptr = ring->wptr + 3 + 4; 3129 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3130 radeon_ring_write(ring, ((ring->rptr_save_reg - 3131 PACKET3_SET_UCONFIG_REG_START) >> 2)); 3132 radeon_ring_write(ring, next_rptr); 3133 } else if (rdev->wb.enabled) { 3134 next_rptr = ring->wptr + 5 + 4; 3135 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3136 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 3137 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3138 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 3139 radeon_ring_write(ring, next_rptr); 3140 } 3141 3142 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3143 } 3144 3145 control |= ib->length_dw | 3146 (ib->vm ? (ib->vm->id << 24) : 0); 3147 3148 radeon_ring_write(ring, header); 3149 radeon_ring_write(ring, 3150 #ifdef __BIG_ENDIAN 3151 (2 << 0) | 3152 #endif 3153 (ib->gpu_addr & 0xFFFFFFFC)); 3154 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 3155 radeon_ring_write(ring, control); 3156 } 3157 3158 /** 3159 * cik_ib_test - basic gfx ring IB test 3160 * 3161 * @rdev: radeon_device pointer 3162 * @ring: radeon_ring structure holding ring information 3163 * 3164 * Allocate an IB and execute it on the gfx ring (CIK). 3165 * Provides a basic gfx ring test to verify that IBs are working. 3166 * Returns 0 on success, error on failure. 3167 */ 3168 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3169 { 3170 struct radeon_ib ib; 3171 uint32_t scratch; 3172 uint32_t tmp = 0; 3173 unsigned i; 3174 int r; 3175 3176 r = radeon_scratch_get(rdev, &scratch); 3177 if (r) { 3178 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3179 return r; 3180 } 3181 WREG32(scratch, 0xCAFEDEAD); 3182 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3183 if (r) { 3184 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3185 return r; 3186 } 3187 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3188 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 3189 ib.ptr[2] = 0xDEADBEEF; 3190 ib.length_dw = 3; 3191 r = radeon_ib_schedule(rdev, &ib, NULL); 3192 if (r) { 3193 radeon_scratch_free(rdev, scratch); 3194 radeon_ib_free(rdev, &ib); 3195 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3196 return r; 3197 } 3198 r = radeon_fence_wait(ib.fence, false); 3199 if (r) { 3200 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3201 return r; 3202 } 3203 for (i = 0; i < rdev->usec_timeout; i++) { 3204 tmp = RREG32(scratch); 3205 if (tmp == 0xDEADBEEF) 3206 break; 3207 DRM_UDELAY(1); 3208 } 3209 if (i < rdev->usec_timeout) { 3210 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3211 } else { 3212 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3213 scratch, tmp); 3214 r = -EINVAL; 3215 } 3216 radeon_scratch_free(rdev, scratch); 3217 radeon_ib_free(rdev, &ib); 3218 return r; 3219 } 3220 3221 /* 3222 * CP. 3223 * On CIK, gfx and compute now have independant command processors. 3224 * 3225 * GFX 3226 * Gfx consists of a single ring and can process both gfx jobs and 3227 * compute jobs. The gfx CP consists of three microengines (ME): 3228 * PFP - Pre-Fetch Parser 3229 * ME - Micro Engine 3230 * CE - Constant Engine 3231 * The PFP and ME make up what is considered the Drawing Engine (DE). 3232 * The CE is an asynchronous engine used for updating buffer desciptors 3233 * used by the DE so that they can be loaded into cache in parallel 3234 * while the DE is processing state update packets. 3235 * 3236 * Compute 3237 * The compute CP consists of two microengines (ME): 3238 * MEC1 - Compute MicroEngine 1 3239 * MEC2 - Compute MicroEngine 2 3240 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 3241 * The queues are exposed to userspace and are programmed directly 3242 * by the compute runtime. 3243 */ 3244 /** 3245 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 3246 * 3247 * @rdev: radeon_device pointer 3248 * @enable: enable or disable the MEs 3249 * 3250 * Halts or unhalts the gfx MEs. 3251 */ 3252 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 3253 { 3254 if (enable) 3255 WREG32(CP_ME_CNTL, 0); 3256 else { 3257 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 3258 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3259 } 3260 udelay(50); 3261 } 3262 3263 /** 3264 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 3265 * 3266 * @rdev: radeon_device pointer 3267 * 3268 * Loads the gfx PFP, ME, and CE ucode. 3269 * Returns 0 for success, -EINVAL if the ucode is not available. 3270 */ 3271 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 3272 { 3273 const __be32 *fw_data; 3274 int i; 3275 3276 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 3277 return -EINVAL; 3278 3279 cik_cp_gfx_enable(rdev, false); 3280 3281 /* PFP */ 3282 fw_data = (const __be32 *)rdev->pfp_fw->data; 3283 WREG32(CP_PFP_UCODE_ADDR, 0); 3284 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 3285 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 3286 WREG32(CP_PFP_UCODE_ADDR, 0); 3287 3288 /* CE */ 3289 fw_data = (const __be32 *)rdev->ce_fw->data; 3290 WREG32(CP_CE_UCODE_ADDR, 0); 3291 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 3292 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 3293 WREG32(CP_CE_UCODE_ADDR, 0); 3294 3295 /* ME */ 3296 fw_data = (const __be32 *)rdev->me_fw->data; 3297 WREG32(CP_ME_RAM_WADDR, 0); 3298 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 3299 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 3300 WREG32(CP_ME_RAM_WADDR, 0); 3301 3302 WREG32(CP_PFP_UCODE_ADDR, 0); 3303 WREG32(CP_CE_UCODE_ADDR, 0); 3304 WREG32(CP_ME_RAM_WADDR, 0); 3305 WREG32(CP_ME_RAM_RADDR, 0); 3306 return 0; 3307 } 3308 3309 /** 3310 * cik_cp_gfx_start - start the gfx ring 3311 * 3312 * @rdev: radeon_device pointer 3313 * 3314 * Enables the ring and loads the clear state context and other 3315 * packets required to init the ring. 3316 * Returns 0 for success, error for failure. 3317 */ 3318 static int cik_cp_gfx_start(struct radeon_device *rdev) 3319 { 3320 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3321 int r, i; 3322 3323 /* init the CP */ 3324 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 3325 WREG32(CP_ENDIAN_SWAP, 0); 3326 WREG32(CP_DEVICE_ID, 1); 3327 3328 cik_cp_gfx_enable(rdev, true); 3329 3330 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 3331 if (r) { 3332 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3333 return r; 3334 } 3335 3336 /* init the CE partitions. CE only used for gfx on CIK */ 3337 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3338 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3339 radeon_ring_write(ring, 0xc000); 3340 radeon_ring_write(ring, 0xc000); 3341 3342 /* setup clear context state */ 3343 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3344 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3345 3346 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3347 radeon_ring_write(ring, 0x80000000); 3348 radeon_ring_write(ring, 0x80000000); 3349 3350 for (i = 0; i < cik_default_size; i++) 3351 radeon_ring_write(ring, cik_default_state[i]); 3352 3353 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3354 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3355 3356 /* set clear context state */ 3357 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3358 radeon_ring_write(ring, 0); 3359 3360 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 3361 radeon_ring_write(ring, 0x00000316); 3362 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 3363 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 3364 3365 radeon_ring_unlock_commit(rdev, ring); 3366 3367 return 0; 3368 } 3369 3370 /** 3371 * cik_cp_gfx_fini - stop the gfx ring 3372 * 3373 * @rdev: radeon_device pointer 3374 * 3375 * Stop the gfx ring and tear down the driver ring 3376 * info. 3377 */ 3378 static void cik_cp_gfx_fini(struct radeon_device *rdev) 3379 { 3380 cik_cp_gfx_enable(rdev, false); 3381 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3382 } 3383 3384 /** 3385 * cik_cp_gfx_resume - setup the gfx ring buffer registers 3386 * 3387 * @rdev: radeon_device pointer 3388 * 3389 * Program the location and size of the gfx ring buffer 3390 * and test it to make sure it's working. 3391 * Returns 0 for success, error for failure. 3392 */ 3393 static int cik_cp_gfx_resume(struct radeon_device *rdev) 3394 { 3395 struct radeon_ring *ring; 3396 u32 tmp; 3397 u32 rb_bufsz; 3398 u64 rb_addr; 3399 int r; 3400 3401 WREG32(CP_SEM_WAIT_TIMER, 0x0); 3402 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 3403 3404 /* Set the write pointer delay */ 3405 WREG32(CP_RB_WPTR_DELAY, 0); 3406 3407 /* set the RB to use vmid 0 */ 3408 WREG32(CP_RB_VMID, 0); 3409 3410 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 3411 3412 /* ring 0 - compute and gfx */ 3413 /* Set ring buffer size */ 3414 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3415 rb_bufsz = order_base_2(ring->ring_size / 8); 3416 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 3417 #ifdef __BIG_ENDIAN 3418 tmp |= BUF_SWAP_32BIT; 3419 #endif 3420 WREG32(CP_RB0_CNTL, tmp); 3421 3422 /* Initialize the ring buffer's read and write pointers */ 3423 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 3424 ring->wptr = 0; 3425 WREG32(CP_RB0_WPTR, ring->wptr); 3426 3427 /* set the wb address wether it's enabled or not */ 3428 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 3429 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 3430 3431 /* scratch register shadowing is no longer supported */ 3432 WREG32(SCRATCH_UMSK, 0); 3433 3434 if (!rdev->wb.enabled) 3435 tmp |= RB_NO_UPDATE; 3436 3437 mdelay(1); 3438 WREG32(CP_RB0_CNTL, tmp); 3439 3440 rb_addr = ring->gpu_addr >> 8; 3441 WREG32(CP_RB0_BASE, rb_addr); 3442 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3443 3444 ring->rptr = RREG32(CP_RB0_RPTR); 3445 3446 /* start the ring */ 3447 cik_cp_gfx_start(rdev); 3448 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 3449 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3450 if (r) { 3451 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3452 return r; 3453 } 3454 return 0; 3455 } 3456 3457 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, 3458 struct radeon_ring *ring) 3459 { 3460 u32 rptr; 3461 3462 3463 3464 if (rdev->wb.enabled) { 3465 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); 3466 } else { 3467 mutex_lock(&rdev->srbm_mutex); 3468 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3469 rptr = RREG32(CP_HQD_PQ_RPTR); 3470 cik_srbm_select(rdev, 0, 0, 0, 0); 3471 mutex_unlock(&rdev->srbm_mutex); 3472 } 3473 3474 return rptr; 3475 } 3476 3477 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, 3478 struct radeon_ring *ring) 3479 { 3480 u32 wptr; 3481 3482 if (rdev->wb.enabled) { 3483 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); 3484 } else { 3485 mutex_lock(&rdev->srbm_mutex); 3486 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3487 wptr = RREG32(CP_HQD_PQ_WPTR); 3488 cik_srbm_select(rdev, 0, 0, 0, 0); 3489 mutex_unlock(&rdev->srbm_mutex); 3490 } 3491 3492 return wptr; 3493 } 3494 3495 void cik_compute_ring_set_wptr(struct radeon_device *rdev, 3496 struct radeon_ring *ring) 3497 { 3498 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); 3499 WDOORBELL32(ring->doorbell_offset, ring->wptr); 3500 } 3501 3502 /** 3503 * cik_cp_compute_enable - enable/disable the compute CP MEs 3504 * 3505 * @rdev: radeon_device pointer 3506 * @enable: enable or disable the MEs 3507 * 3508 * Halts or unhalts the compute MEs. 3509 */ 3510 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 3511 { 3512 if (enable) 3513 WREG32(CP_MEC_CNTL, 0); 3514 else 3515 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 3516 udelay(50); 3517 } 3518 3519 /** 3520 * cik_cp_compute_load_microcode - load the compute CP ME ucode 3521 * 3522 * @rdev: radeon_device pointer 3523 * 3524 * Loads the compute MEC1&2 ucode. 3525 * Returns 0 for success, -EINVAL if the ucode is not available. 3526 */ 3527 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 3528 { 3529 const __be32 *fw_data; 3530 int i; 3531 3532 if (!rdev->mec_fw) 3533 return -EINVAL; 3534 3535 cik_cp_compute_enable(rdev, false); 3536 3537 /* MEC1 */ 3538 fw_data = (const __be32 *)rdev->mec_fw->data; 3539 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3540 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3541 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 3542 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3543 3544 if (rdev->family == CHIP_KAVERI) { 3545 /* MEC2 */ 3546 fw_data = (const __be32 *)rdev->mec_fw->data; 3547 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3548 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3549 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 3550 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3551 } 3552 3553 return 0; 3554 } 3555 3556 /** 3557 * cik_cp_compute_start - start the compute queues 3558 * 3559 * @rdev: radeon_device pointer 3560 * 3561 * Enable the compute queues. 3562 * Returns 0 for success, error for failure. 3563 */ 3564 static int cik_cp_compute_start(struct radeon_device *rdev) 3565 { 3566 cik_cp_compute_enable(rdev, true); 3567 3568 return 0; 3569 } 3570 3571 /** 3572 * cik_cp_compute_fini - stop the compute queues 3573 * 3574 * @rdev: radeon_device pointer 3575 * 3576 * Stop the compute queues and tear down the driver queue 3577 * info. 3578 */ 3579 static void cik_cp_compute_fini(struct radeon_device *rdev) 3580 { 3581 int i, idx, r; 3582 3583 cik_cp_compute_enable(rdev, false); 3584 3585 for (i = 0; i < 2; i++) { 3586 if (i == 0) 3587 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3588 else 3589 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3590 3591 if (rdev->ring[idx].mqd_obj) { 3592 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3593 if (unlikely(r != 0)) 3594 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 3595 3596 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 3597 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3598 3599 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 3600 rdev->ring[idx].mqd_obj = NULL; 3601 } 3602 } 3603 } 3604 3605 static void cik_mec_fini(struct radeon_device *rdev) 3606 { 3607 int r; 3608 3609 if (rdev->mec.hpd_eop_obj) { 3610 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3611 if (unlikely(r != 0)) 3612 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 3613 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 3614 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3615 3616 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 3617 rdev->mec.hpd_eop_obj = NULL; 3618 } 3619 } 3620 3621 #define MEC_HPD_SIZE 2048 3622 3623 static int cik_mec_init(struct radeon_device *rdev) 3624 { 3625 int r; 3626 u32 *hpd; 3627 3628 /* 3629 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 3630 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 3631 */ 3632 if (rdev->family == CHIP_KAVERI) 3633 rdev->mec.num_mec = 2; 3634 else 3635 rdev->mec.num_mec = 1; 3636 rdev->mec.num_pipe = 4; 3637 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 3638 3639 if (rdev->mec.hpd_eop_obj == NULL) { 3640 r = radeon_bo_create(rdev, 3641 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 3642 PAGE_SIZE, true, 3643 RADEON_GEM_DOMAIN_GTT, NULL, 3644 &rdev->mec.hpd_eop_obj); 3645 if (r) { 3646 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 3647 return r; 3648 } 3649 } 3650 3651 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3652 if (unlikely(r != 0)) { 3653 cik_mec_fini(rdev); 3654 return r; 3655 } 3656 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 3657 &rdev->mec.hpd_eop_gpu_addr); 3658 if (r) { 3659 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 3660 cik_mec_fini(rdev); 3661 return r; 3662 } 3663 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 3664 if (r) { 3665 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 3666 cik_mec_fini(rdev); 3667 return r; 3668 } 3669 3670 /* clear memory. Not sure if this is required or not */ 3671 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 3672 3673 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 3674 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3675 3676 return 0; 3677 } 3678 3679 struct hqd_registers 3680 { 3681 u32 cp_mqd_base_addr; 3682 u32 cp_mqd_base_addr_hi; 3683 u32 cp_hqd_active; 3684 u32 cp_hqd_vmid; 3685 u32 cp_hqd_persistent_state; 3686 u32 cp_hqd_pipe_priority; 3687 u32 cp_hqd_queue_priority; 3688 u32 cp_hqd_quantum; 3689 u32 cp_hqd_pq_base; 3690 u32 cp_hqd_pq_base_hi; 3691 u32 cp_hqd_pq_rptr; 3692 u32 cp_hqd_pq_rptr_report_addr; 3693 u32 cp_hqd_pq_rptr_report_addr_hi; 3694 u32 cp_hqd_pq_wptr_poll_addr; 3695 u32 cp_hqd_pq_wptr_poll_addr_hi; 3696 u32 cp_hqd_pq_doorbell_control; 3697 u32 cp_hqd_pq_wptr; 3698 u32 cp_hqd_pq_control; 3699 u32 cp_hqd_ib_base_addr; 3700 u32 cp_hqd_ib_base_addr_hi; 3701 u32 cp_hqd_ib_rptr; 3702 u32 cp_hqd_ib_control; 3703 u32 cp_hqd_iq_timer; 3704 u32 cp_hqd_iq_rptr; 3705 u32 cp_hqd_dequeue_request; 3706 u32 cp_hqd_dma_offload; 3707 u32 cp_hqd_sema_cmd; 3708 u32 cp_hqd_msg_type; 3709 u32 cp_hqd_atomic0_preop_lo; 3710 u32 cp_hqd_atomic0_preop_hi; 3711 u32 cp_hqd_atomic1_preop_lo; 3712 u32 cp_hqd_atomic1_preop_hi; 3713 u32 cp_hqd_hq_scheduler0; 3714 u32 cp_hqd_hq_scheduler1; 3715 u32 cp_mqd_control; 3716 }; 3717 3718 struct bonaire_mqd 3719 { 3720 u32 header; 3721 u32 dispatch_initiator; 3722 u32 dimensions[3]; 3723 u32 start_idx[3]; 3724 u32 num_threads[3]; 3725 u32 pipeline_stat_enable; 3726 u32 perf_counter_enable; 3727 u32 pgm[2]; 3728 u32 tba[2]; 3729 u32 tma[2]; 3730 u32 pgm_rsrc[2]; 3731 u32 vmid; 3732 u32 resource_limits; 3733 u32 static_thread_mgmt01[2]; 3734 u32 tmp_ring_size; 3735 u32 static_thread_mgmt23[2]; 3736 u32 restart[3]; 3737 u32 thread_trace_enable; 3738 u32 reserved1; 3739 u32 user_data[16]; 3740 u32 vgtcs_invoke_count[2]; 3741 struct hqd_registers queue_state; 3742 u32 dequeue_cntr; 3743 u32 interrupt_queue[64]; 3744 }; 3745 3746 /** 3747 * cik_cp_compute_resume - setup the compute queue registers 3748 * 3749 * @rdev: radeon_device pointer 3750 * 3751 * Program the compute queues and test them to make sure they 3752 * are working. 3753 * Returns 0 for success, error for failure. 3754 */ 3755 static int cik_cp_compute_resume(struct radeon_device *rdev) 3756 { 3757 int r, i, idx; 3758 u32 tmp; 3759 bool use_doorbell = true; 3760 u64 hqd_gpu_addr; 3761 u64 mqd_gpu_addr; 3762 u64 eop_gpu_addr; 3763 u64 wb_gpu_addr; 3764 u32 *buf; 3765 struct bonaire_mqd *mqd; 3766 3767 r = cik_cp_compute_start(rdev); 3768 if (r) 3769 return r; 3770 3771 /* fix up chicken bits */ 3772 tmp = RREG32(CP_CPF_DEBUG); 3773 tmp |= (1 << 23); 3774 WREG32(CP_CPF_DEBUG, tmp); 3775 3776 /* init the pipes */ 3777 mutex_lock(&rdev->srbm_mutex); 3778 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { 3779 int me = (i < 4) ? 1 : 2; 3780 int pipe = (i < 4) ? i : (i - 4); 3781 3782 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 3783 3784 cik_srbm_select(rdev, me, pipe, 0, 0); 3785 3786 /* write the EOP addr */ 3787 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 3788 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 3789 3790 /* set the VMID assigned */ 3791 WREG32(CP_HPD_EOP_VMID, 0); 3792 3793 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3794 tmp = RREG32(CP_HPD_EOP_CONTROL); 3795 tmp &= ~EOP_SIZE_MASK; 3796 tmp |= order_base_2(MEC_HPD_SIZE / 8); 3797 WREG32(CP_HPD_EOP_CONTROL, tmp); 3798 } 3799 cik_srbm_select(rdev, 0, 0, 0, 0); 3800 mutex_unlock(&rdev->srbm_mutex); 3801 3802 /* init the queues. Just two for now. */ 3803 for (i = 0; i < 2; i++) { 3804 if (i == 0) 3805 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3806 else 3807 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3808 3809 if (rdev->ring[idx].mqd_obj == NULL) { 3810 r = radeon_bo_create(rdev, 3811 sizeof(struct bonaire_mqd), 3812 PAGE_SIZE, true, 3813 RADEON_GEM_DOMAIN_GTT, NULL, 3814 &rdev->ring[idx].mqd_obj); 3815 if (r) { 3816 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 3817 return r; 3818 } 3819 } 3820 3821 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3822 if (unlikely(r != 0)) { 3823 cik_cp_compute_fini(rdev); 3824 return r; 3825 } 3826 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 3827 &mqd_gpu_addr); 3828 if (r) { 3829 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 3830 cik_cp_compute_fini(rdev); 3831 return r; 3832 } 3833 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 3834 if (r) { 3835 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 3836 cik_cp_compute_fini(rdev); 3837 return r; 3838 } 3839 3840 /* doorbell offset */ 3841 rdev->ring[idx].doorbell_offset = 3842 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; 3843 3844 /* init the mqd struct */ 3845 memset(buf, 0, sizeof(struct bonaire_mqd)); 3846 3847 mqd = (struct bonaire_mqd *)buf; 3848 mqd->header = 0xC0310800; 3849 mqd->static_thread_mgmt01[0] = 0xffffffff; 3850 mqd->static_thread_mgmt01[1] = 0xffffffff; 3851 mqd->static_thread_mgmt23[0] = 0xffffffff; 3852 mqd->static_thread_mgmt23[1] = 0xffffffff; 3853 3854 mutex_lock(&rdev->srbm_mutex); 3855 cik_srbm_select(rdev, rdev->ring[idx].me, 3856 rdev->ring[idx].pipe, 3857 rdev->ring[idx].queue, 0); 3858 3859 /* disable wptr polling */ 3860 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 3861 tmp &= ~WPTR_POLL_EN; 3862 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 3863 3864 /* enable doorbell? */ 3865 mqd->queue_state.cp_hqd_pq_doorbell_control = 3866 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3867 if (use_doorbell) 3868 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3869 else 3870 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 3871 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3872 mqd->queue_state.cp_hqd_pq_doorbell_control); 3873 3874 /* disable the queue if it's active */ 3875 mqd->queue_state.cp_hqd_dequeue_request = 0; 3876 mqd->queue_state.cp_hqd_pq_rptr = 0; 3877 mqd->queue_state.cp_hqd_pq_wptr= 0; 3878 if (RREG32(CP_HQD_ACTIVE) & 1) { 3879 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 3880 for (i = 0; i < rdev->usec_timeout; i++) { 3881 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 3882 break; 3883 udelay(1); 3884 } 3885 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 3886 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 3887 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3888 } 3889 3890 /* set the pointer to the MQD */ 3891 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 3892 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3893 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 3894 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 3895 /* set MQD vmid to 0 */ 3896 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 3897 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 3898 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 3899 3900 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3901 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 3902 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 3903 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3904 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 3905 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 3906 3907 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3908 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 3909 mqd->queue_state.cp_hqd_pq_control &= 3910 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 3911 3912 mqd->queue_state.cp_hqd_pq_control |= 3913 order_base_2(rdev->ring[idx].ring_size / 8); 3914 mqd->queue_state.cp_hqd_pq_control |= 3915 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8); 3916 #ifdef __BIG_ENDIAN 3917 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 3918 #endif 3919 mqd->queue_state.cp_hqd_pq_control &= 3920 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 3921 mqd->queue_state.cp_hqd_pq_control |= 3922 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 3923 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 3924 3925 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 3926 if (i == 0) 3927 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 3928 else 3929 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 3930 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 3931 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3932 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 3933 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 3934 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 3935 3936 /* set the wb address wether it's enabled or not */ 3937 if (i == 0) 3938 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 3939 else 3940 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 3941 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 3942 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 3943 upper_32_bits(wb_gpu_addr) & 0xffff; 3944 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 3945 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 3946 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3947 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 3948 3949 /* enable the doorbell if requested */ 3950 if (use_doorbell) { 3951 mqd->queue_state.cp_hqd_pq_doorbell_control = 3952 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3953 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 3954 mqd->queue_state.cp_hqd_pq_doorbell_control |= 3955 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); 3956 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3957 mqd->queue_state.cp_hqd_pq_doorbell_control &= 3958 ~(DOORBELL_SOURCE | DOORBELL_HIT); 3959 3960 } else { 3961 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 3962 } 3963 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3964 mqd->queue_state.cp_hqd_pq_doorbell_control); 3965 3966 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3967 rdev->ring[idx].wptr = 0; 3968 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 3969 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3970 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); 3971 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; 3972 3973 /* set the vmid for the queue */ 3974 mqd->queue_state.cp_hqd_vmid = 0; 3975 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 3976 3977 /* activate the queue */ 3978 mqd->queue_state.cp_hqd_active = 1; 3979 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 3980 3981 cik_srbm_select(rdev, 0, 0, 0, 0); 3982 mutex_unlock(&rdev->srbm_mutex); 3983 3984 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 3985 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3986 3987 rdev->ring[idx].ready = true; 3988 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 3989 if (r) 3990 rdev->ring[idx].ready = false; 3991 } 3992 3993 return 0; 3994 } 3995 3996 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 3997 { 3998 cik_cp_gfx_enable(rdev, enable); 3999 cik_cp_compute_enable(rdev, enable); 4000 } 4001 4002 static int cik_cp_load_microcode(struct radeon_device *rdev) 4003 { 4004 int r; 4005 4006 r = cik_cp_gfx_load_microcode(rdev); 4007 if (r) 4008 return r; 4009 r = cik_cp_compute_load_microcode(rdev); 4010 if (r) 4011 return r; 4012 4013 return 0; 4014 } 4015 4016 static void cik_cp_fini(struct radeon_device *rdev) 4017 { 4018 cik_cp_gfx_fini(rdev); 4019 cik_cp_compute_fini(rdev); 4020 } 4021 4022 static int cik_cp_resume(struct radeon_device *rdev) 4023 { 4024 int r; 4025 4026 cik_enable_gui_idle_interrupt(rdev, false); 4027 4028 r = cik_cp_load_microcode(rdev); 4029 if (r) 4030 return r; 4031 4032 r = cik_cp_gfx_resume(rdev); 4033 if (r) 4034 return r; 4035 r = cik_cp_compute_resume(rdev); 4036 if (r) 4037 return r; 4038 4039 cik_enable_gui_idle_interrupt(rdev, true); 4040 4041 return 0; 4042 } 4043 4044 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 4045 { 4046 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 4047 RREG32(GRBM_STATUS)); 4048 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 4049 RREG32(GRBM_STATUS2)); 4050 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 4051 RREG32(GRBM_STATUS_SE0)); 4052 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 4053 RREG32(GRBM_STATUS_SE1)); 4054 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 4055 RREG32(GRBM_STATUS_SE2)); 4056 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 4057 RREG32(GRBM_STATUS_SE3)); 4058 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 4059 RREG32(SRBM_STATUS)); 4060 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 4061 RREG32(SRBM_STATUS2)); 4062 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 4063 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 4064 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 4065 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 4066 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 4067 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 4068 RREG32(CP_STALLED_STAT1)); 4069 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 4070 RREG32(CP_STALLED_STAT2)); 4071 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 4072 RREG32(CP_STALLED_STAT3)); 4073 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 4074 RREG32(CP_CPF_BUSY_STAT)); 4075 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 4076 RREG32(CP_CPF_STALLED_STAT1)); 4077 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 4078 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 4079 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 4080 RREG32(CP_CPC_STALLED_STAT1)); 4081 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 4082 } 4083 4084 /** 4085 * cik_gpu_check_soft_reset - check which blocks are busy 4086 * 4087 * @rdev: radeon_device pointer 4088 * 4089 * Check which blocks are busy and return the relevant reset 4090 * mask to be used by cik_gpu_soft_reset(). 4091 * Returns a mask of the blocks to be reset. 4092 */ 4093 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 4094 { 4095 u32 reset_mask = 0; 4096 u32 tmp; 4097 4098 /* GRBM_STATUS */ 4099 tmp = RREG32(GRBM_STATUS); 4100 if (tmp & (PA_BUSY | SC_BUSY | 4101 BCI_BUSY | SX_BUSY | 4102 TA_BUSY | VGT_BUSY | 4103 DB_BUSY | CB_BUSY | 4104 GDS_BUSY | SPI_BUSY | 4105 IA_BUSY | IA_BUSY_NO_DMA)) 4106 reset_mask |= RADEON_RESET_GFX; 4107 4108 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 4109 reset_mask |= RADEON_RESET_CP; 4110 4111 /* GRBM_STATUS2 */ 4112 tmp = RREG32(GRBM_STATUS2); 4113 if (tmp & RLC_BUSY) 4114 reset_mask |= RADEON_RESET_RLC; 4115 4116 /* SDMA0_STATUS_REG */ 4117 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 4118 if (!(tmp & SDMA_IDLE)) 4119 reset_mask |= RADEON_RESET_DMA; 4120 4121 /* SDMA1_STATUS_REG */ 4122 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 4123 if (!(tmp & SDMA_IDLE)) 4124 reset_mask |= RADEON_RESET_DMA1; 4125 4126 /* SRBM_STATUS2 */ 4127 tmp = RREG32(SRBM_STATUS2); 4128 if (tmp & SDMA_BUSY) 4129 reset_mask |= RADEON_RESET_DMA; 4130 4131 if (tmp & SDMA1_BUSY) 4132 reset_mask |= RADEON_RESET_DMA1; 4133 4134 /* SRBM_STATUS */ 4135 tmp = RREG32(SRBM_STATUS); 4136 4137 if (tmp & IH_BUSY) 4138 reset_mask |= RADEON_RESET_IH; 4139 4140 if (tmp & SEM_BUSY) 4141 reset_mask |= RADEON_RESET_SEM; 4142 4143 if (tmp & GRBM_RQ_PENDING) 4144 reset_mask |= RADEON_RESET_GRBM; 4145 4146 if (tmp & VMC_BUSY) 4147 reset_mask |= RADEON_RESET_VMC; 4148 4149 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 4150 MCC_BUSY | MCD_BUSY)) 4151 reset_mask |= RADEON_RESET_MC; 4152 4153 if (evergreen_is_display_hung(rdev)) 4154 reset_mask |= RADEON_RESET_DISPLAY; 4155 4156 /* Skip MC reset as it's mostly likely not hung, just busy */ 4157 if (reset_mask & RADEON_RESET_MC) { 4158 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 4159 reset_mask &= ~RADEON_RESET_MC; 4160 } 4161 4162 return reset_mask; 4163 } 4164 4165 /** 4166 * cik_gpu_soft_reset - soft reset GPU 4167 * 4168 * @rdev: radeon_device pointer 4169 * @reset_mask: mask of which blocks to reset 4170 * 4171 * Soft reset the blocks specified in @reset_mask. 4172 */ 4173 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 4174 { 4175 struct evergreen_mc_save save; 4176 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4177 u32 tmp; 4178 4179 if (reset_mask == 0) 4180 return; 4181 4182 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 4183 4184 cik_print_gpu_status_regs(rdev); 4185 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 4186 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 4187 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 4188 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 4189 4190 /* stop the rlc */ 4191 cik_rlc_stop(rdev); 4192 4193 /* Disable GFX parsing/prefetching */ 4194 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 4195 4196 /* Disable MEC parsing/prefetching */ 4197 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 4198 4199 if (reset_mask & RADEON_RESET_DMA) { 4200 /* sdma0 */ 4201 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 4202 tmp |= SDMA_HALT; 4203 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 4204 } 4205 if (reset_mask & RADEON_RESET_DMA1) { 4206 /* sdma1 */ 4207 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 4208 tmp |= SDMA_HALT; 4209 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 4210 } 4211 4212 evergreen_mc_stop(rdev, &save); 4213 if (evergreen_mc_wait_for_idle(rdev)) { 4214 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4215 } 4216 4217 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 4218 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 4219 4220 if (reset_mask & RADEON_RESET_CP) { 4221 grbm_soft_reset |= SOFT_RESET_CP; 4222 4223 srbm_soft_reset |= SOFT_RESET_GRBM; 4224 } 4225 4226 if (reset_mask & RADEON_RESET_DMA) 4227 srbm_soft_reset |= SOFT_RESET_SDMA; 4228 4229 if (reset_mask & RADEON_RESET_DMA1) 4230 srbm_soft_reset |= SOFT_RESET_SDMA1; 4231 4232 if (reset_mask & RADEON_RESET_DISPLAY) 4233 srbm_soft_reset |= SOFT_RESET_DC; 4234 4235 if (reset_mask & RADEON_RESET_RLC) 4236 grbm_soft_reset |= SOFT_RESET_RLC; 4237 4238 if (reset_mask & RADEON_RESET_SEM) 4239 srbm_soft_reset |= SOFT_RESET_SEM; 4240 4241 if (reset_mask & RADEON_RESET_IH) 4242 srbm_soft_reset |= SOFT_RESET_IH; 4243 4244 if (reset_mask & RADEON_RESET_GRBM) 4245 srbm_soft_reset |= SOFT_RESET_GRBM; 4246 4247 if (reset_mask & RADEON_RESET_VMC) 4248 srbm_soft_reset |= SOFT_RESET_VMC; 4249 4250 if (!(rdev->flags & RADEON_IS_IGP)) { 4251 if (reset_mask & RADEON_RESET_MC) 4252 srbm_soft_reset |= SOFT_RESET_MC; 4253 } 4254 4255 if (grbm_soft_reset) { 4256 tmp = RREG32(GRBM_SOFT_RESET); 4257 tmp |= grbm_soft_reset; 4258 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4259 WREG32(GRBM_SOFT_RESET, tmp); 4260 tmp = RREG32(GRBM_SOFT_RESET); 4261 4262 udelay(50); 4263 4264 tmp &= ~grbm_soft_reset; 4265 WREG32(GRBM_SOFT_RESET, tmp); 4266 tmp = RREG32(GRBM_SOFT_RESET); 4267 } 4268 4269 if (srbm_soft_reset) { 4270 tmp = RREG32(SRBM_SOFT_RESET); 4271 tmp |= srbm_soft_reset; 4272 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 4273 WREG32(SRBM_SOFT_RESET, tmp); 4274 tmp = RREG32(SRBM_SOFT_RESET); 4275 4276 udelay(50); 4277 4278 tmp &= ~srbm_soft_reset; 4279 WREG32(SRBM_SOFT_RESET, tmp); 4280 tmp = RREG32(SRBM_SOFT_RESET); 4281 } 4282 4283 /* Wait a little for things to settle down */ 4284 udelay(50); 4285 4286 evergreen_mc_resume(rdev, &save); 4287 udelay(50); 4288 4289 cik_print_gpu_status_regs(rdev); 4290 } 4291 4292 /** 4293 * cik_asic_reset - soft reset GPU 4294 * 4295 * @rdev: radeon_device pointer 4296 * 4297 * Look up which blocks are hung and attempt 4298 * to reset them. 4299 * Returns 0 for success. 4300 */ 4301 int cik_asic_reset(struct radeon_device *rdev) 4302 { 4303 u32 reset_mask; 4304 4305 reset_mask = cik_gpu_check_soft_reset(rdev); 4306 4307 if (reset_mask) 4308 r600_set_bios_scratch_engine_hung(rdev, true); 4309 4310 cik_gpu_soft_reset(rdev, reset_mask); 4311 4312 reset_mask = cik_gpu_check_soft_reset(rdev); 4313 4314 if (!reset_mask) 4315 r600_set_bios_scratch_engine_hung(rdev, false); 4316 4317 return 0; 4318 } 4319 4320 /** 4321 * cik_gfx_is_lockup - check if the 3D engine is locked up 4322 * 4323 * @rdev: radeon_device pointer 4324 * @ring: radeon_ring structure holding ring information 4325 * 4326 * Check if the 3D engine is locked up (CIK). 4327 * Returns true if the engine is locked, false if not. 4328 */ 4329 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4330 { 4331 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4332 4333 if (!(reset_mask & (RADEON_RESET_GFX | 4334 RADEON_RESET_COMPUTE | 4335 RADEON_RESET_CP))) { 4336 radeon_ring_lockup_update(ring); 4337 return false; 4338 } 4339 /* force CP activities */ 4340 radeon_ring_force_activity(rdev, ring); 4341 return radeon_ring_test_lockup(rdev, ring); 4342 } 4343 4344 /* MC */ 4345 /** 4346 * cik_mc_program - program the GPU memory controller 4347 * 4348 * @rdev: radeon_device pointer 4349 * 4350 * Set the location of vram, gart, and AGP in the GPU's 4351 * physical address space (CIK). 4352 */ 4353 static void cik_mc_program(struct radeon_device *rdev) 4354 { 4355 struct evergreen_mc_save save; 4356 u32 tmp; 4357 int i, j; 4358 4359 /* Initialize HDP */ 4360 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 4361 WREG32((0x2c14 + j), 0x00000000); 4362 WREG32((0x2c18 + j), 0x00000000); 4363 WREG32((0x2c1c + j), 0x00000000); 4364 WREG32((0x2c20 + j), 0x00000000); 4365 WREG32((0x2c24 + j), 0x00000000); 4366 } 4367 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 4368 4369 evergreen_mc_stop(rdev, &save); 4370 if (radeon_mc_wait_for_idle(rdev)) { 4371 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4372 } 4373 /* Lockout access through VGA aperture*/ 4374 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 4375 /* Update configuration */ 4376 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 4377 rdev->mc.vram_start >> 12); 4378 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 4379 rdev->mc.vram_end >> 12); 4380 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 4381 rdev->vram_scratch.gpu_addr >> 12); 4382 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 4383 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 4384 WREG32(MC_VM_FB_LOCATION, tmp); 4385 /* XXX double check these! */ 4386 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 4387 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 4388 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 4389 WREG32(MC_VM_AGP_BASE, 0); 4390 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 4391 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 4392 if (radeon_mc_wait_for_idle(rdev)) { 4393 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4394 } 4395 evergreen_mc_resume(rdev, &save); 4396 /* we need to own VRAM, so turn off the VGA renderer here 4397 * to stop it overwriting our objects */ 4398 rv515_vga_render_disable(rdev); 4399 } 4400 4401 /** 4402 * cik_mc_init - initialize the memory controller driver params 4403 * 4404 * @rdev: radeon_device pointer 4405 * 4406 * Look up the amount of vram, vram width, and decide how to place 4407 * vram and gart within the GPU's physical address space (CIK). 4408 * Returns 0 for success. 4409 */ 4410 static int cik_mc_init(struct radeon_device *rdev) 4411 { 4412 u32 tmp; 4413 int chansize, numchan; 4414 4415 /* Get VRAM informations */ 4416 rdev->mc.vram_is_ddr = true; 4417 tmp = RREG32(MC_ARB_RAMCFG); 4418 if (tmp & CHANSIZE_MASK) { 4419 chansize = 64; 4420 } else { 4421 chansize = 32; 4422 } 4423 tmp = RREG32(MC_SHARED_CHMAP); 4424 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 4425 case 0: 4426 default: 4427 numchan = 1; 4428 break; 4429 case 1: 4430 numchan = 2; 4431 break; 4432 case 2: 4433 numchan = 4; 4434 break; 4435 case 3: 4436 numchan = 8; 4437 break; 4438 case 4: 4439 numchan = 3; 4440 break; 4441 case 5: 4442 numchan = 6; 4443 break; 4444 case 6: 4445 numchan = 10; 4446 break; 4447 case 7: 4448 numchan = 12; 4449 break; 4450 case 8: 4451 numchan = 16; 4452 break; 4453 } 4454 rdev->mc.vram_width = numchan * chansize; 4455 /* Could aper size report 0 ? */ 4456 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 4457 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 4458 /* size in MB on si */ 4459 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 4460 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 4461 rdev->mc.visible_vram_size = rdev->mc.aper_size; 4462 si_vram_gtt_location(rdev, &rdev->mc); 4463 radeon_update_bandwidth_info(rdev); 4464 4465 return 0; 4466 } 4467 4468 /* 4469 * GART 4470 * VMID 0 is the physical GPU addresses as used by the kernel. 4471 * VMIDs 1-15 are used for userspace clients and are handled 4472 * by the radeon vm/hsa code. 4473 */ 4474 /** 4475 * cik_pcie_gart_tlb_flush - gart tlb flush callback 4476 * 4477 * @rdev: radeon_device pointer 4478 * 4479 * Flush the TLB for the VMID 0 page table (CIK). 4480 */ 4481 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 4482 { 4483 /* flush hdp cache */ 4484 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 4485 4486 /* bits 0-15 are the VM contexts0-15 */ 4487 WREG32(VM_INVALIDATE_REQUEST, 0x1); 4488 } 4489 4490 /** 4491 * cik_pcie_gart_enable - gart enable 4492 * 4493 * @rdev: radeon_device pointer 4494 * 4495 * This sets up the TLBs, programs the page tables for VMID0, 4496 * sets up the hw for VMIDs 1-15 which are allocated on 4497 * demand, and sets up the global locations for the LDS, GDS, 4498 * and GPUVM for FSA64 clients (CIK). 4499 * Returns 0 for success, errors for failure. 4500 */ 4501 static int cik_pcie_gart_enable(struct radeon_device *rdev) 4502 { 4503 int r, i; 4504 4505 if (rdev->gart.robj == NULL) { 4506 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 4507 return -EINVAL; 4508 } 4509 r = radeon_gart_table_vram_pin(rdev); 4510 if (r) 4511 return r; 4512 radeon_gart_restore(rdev); 4513 /* Setup TLB control */ 4514 WREG32(MC_VM_MX_L1_TLB_CNTL, 4515 (0xA << 7) | 4516 ENABLE_L1_TLB | 4517 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4518 ENABLE_ADVANCED_DRIVER_MODEL | 4519 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4520 /* Setup L2 cache */ 4521 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 4522 ENABLE_L2_FRAGMENT_PROCESSING | 4523 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4524 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4525 EFFECTIVE_L2_QUEUE_SIZE(7) | 4526 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4527 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 4528 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4529 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4530 /* setup context0 */ 4531 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 4532 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 4533 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 4534 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 4535 (u32)(rdev->dummy_page.addr >> 12)); 4536 WREG32(VM_CONTEXT0_CNTL2, 0); 4537 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 4538 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 4539 4540 WREG32(0x15D4, 0); 4541 WREG32(0x15D8, 0); 4542 WREG32(0x15DC, 0); 4543 4544 /* empty context1-15 */ 4545 /* FIXME start with 4G, once using 2 level pt switch to full 4546 * vm size space 4547 */ 4548 /* set vm size, must be a multiple of 4 */ 4549 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 4550 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); 4551 for (i = 1; i < 16; i++) { 4552 if (i < 8) 4553 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 4554 rdev->gart.table_addr >> 12); 4555 else 4556 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 4557 rdev->gart.table_addr >> 12); 4558 } 4559 4560 /* enable context1-15 */ 4561 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 4562 (u32)(rdev->dummy_page.addr >> 12)); 4563 WREG32(VM_CONTEXT1_CNTL2, 4); 4564 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 4565 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4566 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4567 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4568 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4569 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 4570 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 4571 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 4572 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 4573 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 4574 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 4575 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4576 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 4577 4578 /* TC cache setup ??? */ 4579 WREG32(TC_CFG_L1_LOAD_POLICY0, 0); 4580 WREG32(TC_CFG_L1_LOAD_POLICY1, 0); 4581 WREG32(TC_CFG_L1_STORE_POLICY, 0); 4582 4583 WREG32(TC_CFG_L2_LOAD_POLICY0, 0); 4584 WREG32(TC_CFG_L2_LOAD_POLICY1, 0); 4585 WREG32(TC_CFG_L2_STORE_POLICY0, 0); 4586 WREG32(TC_CFG_L2_STORE_POLICY1, 0); 4587 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); 4588 4589 WREG32(TC_CFG_L1_VOLATILE, 0); 4590 WREG32(TC_CFG_L2_VOLATILE, 0); 4591 4592 if (rdev->family == CHIP_KAVERI) { 4593 u32 tmp = RREG32(CHUB_CONTROL); 4594 tmp &= ~BYPASS_VM; 4595 WREG32(CHUB_CONTROL, tmp); 4596 } 4597 4598 /* XXX SH_MEM regs */ 4599 /* where to put LDS, scratch, GPUVM in FSA64 space */ 4600 mutex_lock(&rdev->srbm_mutex); 4601 for (i = 0; i < 16; i++) { 4602 cik_srbm_select(rdev, 0, 0, 0, i); 4603 /* CP and shaders */ 4604 WREG32(SH_MEM_CONFIG, 0); 4605 WREG32(SH_MEM_APE1_BASE, 1); 4606 WREG32(SH_MEM_APE1_LIMIT, 0); 4607 WREG32(SH_MEM_BASES, 0); 4608 /* SDMA GFX */ 4609 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 4610 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 4611 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 4612 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 4613 /* XXX SDMA RLC - todo */ 4614 } 4615 cik_srbm_select(rdev, 0, 0, 0, 0); 4616 mutex_unlock(&rdev->srbm_mutex); 4617 4618 cik_pcie_gart_tlb_flush(rdev); 4619 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 4620 (unsigned)(rdev->mc.gtt_size >> 20), 4621 (unsigned long long)rdev->gart.table_addr); 4622 rdev->gart.ready = true; 4623 return 0; 4624 } 4625 4626 /** 4627 * cik_pcie_gart_disable - gart disable 4628 * 4629 * @rdev: radeon_device pointer 4630 * 4631 * This disables all VM page table (CIK). 4632 */ 4633 static void cik_pcie_gart_disable(struct radeon_device *rdev) 4634 { 4635 /* Disable all tables */ 4636 WREG32(VM_CONTEXT0_CNTL, 0); 4637 WREG32(VM_CONTEXT1_CNTL, 0); 4638 /* Setup TLB control */ 4639 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4640 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4641 /* Setup L2 cache */ 4642 WREG32(VM_L2_CNTL, 4643 ENABLE_L2_FRAGMENT_PROCESSING | 4644 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4645 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4646 EFFECTIVE_L2_QUEUE_SIZE(7) | 4647 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4648 WREG32(VM_L2_CNTL2, 0); 4649 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4650 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4651 radeon_gart_table_vram_unpin(rdev); 4652 } 4653 4654 /** 4655 * cik_pcie_gart_fini - vm fini callback 4656 * 4657 * @rdev: radeon_device pointer 4658 * 4659 * Tears down the driver GART/VM setup (CIK). 4660 */ 4661 static void cik_pcie_gart_fini(struct radeon_device *rdev) 4662 { 4663 cik_pcie_gart_disable(rdev); 4664 radeon_gart_table_vram_free(rdev); 4665 radeon_gart_fini(rdev); 4666 } 4667 4668 /* vm parser */ 4669 /** 4670 * cik_ib_parse - vm ib_parse callback 4671 * 4672 * @rdev: radeon_device pointer 4673 * @ib: indirect buffer pointer 4674 * 4675 * CIK uses hw IB checking so this is a nop (CIK). 4676 */ 4677 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 4678 { 4679 return 0; 4680 } 4681 4682 /* 4683 * vm 4684 * VMID 0 is the physical GPU addresses as used by the kernel. 4685 * VMIDs 1-15 are used for userspace clients and are handled 4686 * by the radeon vm/hsa code. 4687 */ 4688 /** 4689 * cik_vm_init - cik vm init callback 4690 * 4691 * @rdev: radeon_device pointer 4692 * 4693 * Inits cik specific vm parameters (number of VMs, base of vram for 4694 * VMIDs 1-15) (CIK). 4695 * Returns 0 for success. 4696 */ 4697 int cik_vm_init(struct radeon_device *rdev) 4698 { 4699 /* number of VMs */ 4700 rdev->vm_manager.nvm = 16; 4701 /* base offset of vram pages */ 4702 if (rdev->flags & RADEON_IS_IGP) { 4703 u64 tmp = RREG32(MC_VM_FB_OFFSET); 4704 tmp <<= 22; 4705 rdev->vm_manager.vram_base_offset = tmp; 4706 } else 4707 rdev->vm_manager.vram_base_offset = 0; 4708 4709 return 0; 4710 } 4711 4712 /** 4713 * cik_vm_fini - cik vm fini callback 4714 * 4715 * @rdev: radeon_device pointer 4716 * 4717 * Tear down any asic specific VM setup (CIK). 4718 */ 4719 void cik_vm_fini(struct radeon_device *rdev) 4720 { 4721 } 4722 4723 /** 4724 * cik_vm_decode_fault - print human readable fault info 4725 * 4726 * @rdev: radeon_device pointer 4727 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 4728 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 4729 * 4730 * Print human readable fault information (CIK). 4731 */ 4732 static void cik_vm_decode_fault(struct radeon_device *rdev, 4733 u32 status, u32 addr, u32 mc_client) 4734 { 4735 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 4736 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 4737 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 4738 char *block = (char *)&mc_client; 4739 4740 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n", 4741 protections, vmid, addr, 4742 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 4743 block, mc_id); 4744 } 4745 4746 /** 4747 * cik_vm_flush - cik vm flush using the CP 4748 * 4749 * @rdev: radeon_device pointer 4750 * 4751 * Update the page table base and flush the VM TLB 4752 * using the CP (CIK). 4753 */ 4754 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4755 { 4756 struct radeon_ring *ring = &rdev->ring[ridx]; 4757 4758 if (vm == NULL) 4759 return; 4760 4761 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4762 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4763 WRITE_DATA_DST_SEL(0))); 4764 if (vm->id < 8) { 4765 radeon_ring_write(ring, 4766 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4767 } else { 4768 radeon_ring_write(ring, 4769 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4770 } 4771 radeon_ring_write(ring, 0); 4772 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4773 4774 /* update SH_MEM_* regs */ 4775 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4776 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4777 WRITE_DATA_DST_SEL(0))); 4778 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4779 radeon_ring_write(ring, 0); 4780 radeon_ring_write(ring, VMID(vm->id)); 4781 4782 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 4783 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4784 WRITE_DATA_DST_SEL(0))); 4785 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4786 radeon_ring_write(ring, 0); 4787 4788 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 4789 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 4790 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 4791 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 4792 4793 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4794 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4795 WRITE_DATA_DST_SEL(0))); 4796 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4797 radeon_ring_write(ring, 0); 4798 radeon_ring_write(ring, VMID(0)); 4799 4800 /* HDP flush */ 4801 /* We should be using the WAIT_REG_MEM packet here like in 4802 * cik_fence_ring_emit(), but it causes the CP to hang in this 4803 * context... 4804 */ 4805 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4806 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4807 WRITE_DATA_DST_SEL(0))); 4808 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 4809 radeon_ring_write(ring, 0); 4810 radeon_ring_write(ring, 0); 4811 4812 /* bits 0-15 are the VM contexts0-15 */ 4813 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4814 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4815 WRITE_DATA_DST_SEL(0))); 4816 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4817 radeon_ring_write(ring, 0); 4818 radeon_ring_write(ring, 1 << vm->id); 4819 4820 /* compute doesn't have PFP */ 4821 if (ridx == RADEON_RING_TYPE_GFX_INDEX) { 4822 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4823 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4824 radeon_ring_write(ring, 0x0); 4825 } 4826 } 4827 4828 /** 4829 * cik_vm_set_page - update the page tables using sDMA 4830 * 4831 * @rdev: radeon_device pointer 4832 * @ib: indirect buffer to fill with commands 4833 * @pe: addr of the page entry 4834 * @addr: dst addr to write into pe 4835 * @count: number of page entries to update 4836 * @incr: increase next addr by incr bytes 4837 * @flags: access flags 4838 * 4839 * Update the page tables using CP or sDMA (CIK). 4840 */ 4841 void cik_vm_set_page(struct radeon_device *rdev, 4842 struct radeon_ib *ib, 4843 uint64_t pe, 4844 uint64_t addr, unsigned count, 4845 uint32_t incr, uint32_t flags) 4846 { 4847 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 4848 uint64_t value; 4849 unsigned ndw; 4850 4851 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { 4852 /* CP */ 4853 while (count) { 4854 ndw = 2 + count * 2; 4855 if (ndw > 0x3FFE) 4856 ndw = 0x3FFE; 4857 4858 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); 4859 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | 4860 WRITE_DATA_DST_SEL(1)); 4861 ib->ptr[ib->length_dw++] = pe; 4862 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4863 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 4864 if (flags & RADEON_VM_PAGE_SYSTEM) { 4865 value = radeon_vm_map_gart(rdev, addr); 4866 value &= 0xFFFFFFFFFFFFF000ULL; 4867 } else if (flags & RADEON_VM_PAGE_VALID) { 4868 value = addr; 4869 } else { 4870 value = 0; 4871 } 4872 addr += incr; 4873 value |= r600_flags; 4874 ib->ptr[ib->length_dw++] = value; 4875 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4876 } 4877 } 4878 } else { 4879 /* DMA */ 4880 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); 4881 } 4882 } 4883 4884 /* 4885 * RLC 4886 * The RLC is a multi-purpose microengine that handles a 4887 * variety of functions, the most important of which is 4888 * the interrupt controller. 4889 */ 4890 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 4891 bool enable) 4892 { 4893 u32 tmp = RREG32(CP_INT_CNTL_RING0); 4894 4895 if (enable) 4896 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4897 else 4898 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4899 WREG32(CP_INT_CNTL_RING0, tmp); 4900 } 4901 4902 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) 4903 { 4904 u32 tmp; 4905 4906 tmp = RREG32(RLC_LB_CNTL); 4907 if (enable) 4908 tmp |= LOAD_BALANCE_ENABLE; 4909 else 4910 tmp &= ~LOAD_BALANCE_ENABLE; 4911 WREG32(RLC_LB_CNTL, tmp); 4912 } 4913 4914 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) 4915 { 4916 u32 i, j, k; 4917 u32 mask; 4918 4919 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 4920 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 4921 cik_select_se_sh(rdev, i, j); 4922 for (k = 0; k < rdev->usec_timeout; k++) { 4923 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 4924 break; 4925 udelay(1); 4926 } 4927 } 4928 } 4929 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4930 4931 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 4932 for (k = 0; k < rdev->usec_timeout; k++) { 4933 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 4934 break; 4935 udelay(1); 4936 } 4937 } 4938 4939 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) 4940 { 4941 u32 tmp; 4942 4943 tmp = RREG32(RLC_CNTL); 4944 if (tmp != rlc) 4945 WREG32(RLC_CNTL, rlc); 4946 } 4947 4948 static u32 cik_halt_rlc(struct radeon_device *rdev) 4949 { 4950 u32 data, orig; 4951 4952 orig = data = RREG32(RLC_CNTL); 4953 4954 if (data & RLC_ENABLE) { 4955 u32 i; 4956 4957 data &= ~RLC_ENABLE; 4958 WREG32(RLC_CNTL, data); 4959 4960 for (i = 0; i < rdev->usec_timeout; i++) { 4961 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) 4962 break; 4963 udelay(1); 4964 } 4965 4966 cik_wait_for_rlc_serdes(rdev); 4967 } 4968 4969 return orig; 4970 } 4971 4972 void cik_enter_rlc_safe_mode(struct radeon_device *rdev) 4973 { 4974 u32 tmp, i, mask; 4975 4976 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); 4977 WREG32(RLC_GPR_REG2, tmp); 4978 4979 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; 4980 for (i = 0; i < rdev->usec_timeout; i++) { 4981 if ((RREG32(RLC_GPM_STAT) & mask) == mask) 4982 break; 4983 udelay(1); 4984 } 4985 4986 for (i = 0; i < rdev->usec_timeout; i++) { 4987 if ((RREG32(RLC_GPR_REG2) & REQ) == 0) 4988 break; 4989 udelay(1); 4990 } 4991 } 4992 4993 void cik_exit_rlc_safe_mode(struct radeon_device *rdev) 4994 { 4995 u32 tmp; 4996 4997 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); 4998 WREG32(RLC_GPR_REG2, tmp); 4999 } 5000 5001 /** 5002 * cik_rlc_stop - stop the RLC ME 5003 * 5004 * @rdev: radeon_device pointer 5005 * 5006 * Halt the RLC ME (MicroEngine) (CIK). 5007 */ 5008 static void cik_rlc_stop(struct radeon_device *rdev) 5009 { 5010 WREG32(RLC_CNTL, 0); 5011 5012 cik_enable_gui_idle_interrupt(rdev, false); 5013 5014 cik_wait_for_rlc_serdes(rdev); 5015 } 5016 5017 /** 5018 * cik_rlc_start - start the RLC ME 5019 * 5020 * @rdev: radeon_device pointer 5021 * 5022 * Unhalt the RLC ME (MicroEngine) (CIK). 5023 */ 5024 static void cik_rlc_start(struct radeon_device *rdev) 5025 { 5026 WREG32(RLC_CNTL, RLC_ENABLE); 5027 5028 cik_enable_gui_idle_interrupt(rdev, true); 5029 5030 udelay(50); 5031 } 5032 5033 /** 5034 * cik_rlc_resume - setup the RLC hw 5035 * 5036 * @rdev: radeon_device pointer 5037 * 5038 * Initialize the RLC registers, load the ucode, 5039 * and start the RLC (CIK). 5040 * Returns 0 for success, -EINVAL if the ucode is not available. 5041 */ 5042 static int cik_rlc_resume(struct radeon_device *rdev) 5043 { 5044 u32 i, size, tmp; 5045 const __be32 *fw_data; 5046 5047 if (!rdev->rlc_fw) 5048 return -EINVAL; 5049 5050 switch (rdev->family) { 5051 case CHIP_BONAIRE: 5052 default: 5053 size = BONAIRE_RLC_UCODE_SIZE; 5054 break; 5055 case CHIP_KAVERI: 5056 size = KV_RLC_UCODE_SIZE; 5057 break; 5058 case CHIP_KABINI: 5059 size = KB_RLC_UCODE_SIZE; 5060 break; 5061 } 5062 5063 cik_rlc_stop(rdev); 5064 5065 /* disable CG */ 5066 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 5067 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 5068 5069 si_rlc_reset(rdev); 5070 5071 cik_init_pg(rdev); 5072 5073 cik_init_cg(rdev); 5074 5075 WREG32(RLC_LB_CNTR_INIT, 0); 5076 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 5077 5078 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5079 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 5080 WREG32(RLC_LB_PARAMS, 0x00600408); 5081 WREG32(RLC_LB_CNTL, 0x80000004); 5082 5083 WREG32(RLC_MC_CNTL, 0); 5084 WREG32(RLC_UCODE_CNTL, 0); 5085 5086 fw_data = (const __be32 *)rdev->rlc_fw->data; 5087 WREG32(RLC_GPM_UCODE_ADDR, 0); 5088 for (i = 0; i < size; i++) 5089 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 5090 WREG32(RLC_GPM_UCODE_ADDR, 0); 5091 5092 /* XXX - find out what chips support lbpw */ 5093 cik_enable_lbpw(rdev, false); 5094 5095 if (rdev->family == CHIP_BONAIRE) 5096 WREG32(RLC_DRIVER_DMA_STATUS, 0); 5097 5098 cik_rlc_start(rdev); 5099 5100 return 0; 5101 } 5102 5103 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) 5104 { 5105 u32 data, orig, tmp, tmp2; 5106 5107 orig = data = RREG32(RLC_CGCG_CGLS_CTRL); 5108 5109 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { 5110 cik_enable_gui_idle_interrupt(rdev, true); 5111 5112 tmp = cik_halt_rlc(rdev); 5113 5114 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5115 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5116 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5117 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; 5118 WREG32(RLC_SERDES_WR_CTRL, tmp2); 5119 5120 cik_update_rlc(rdev, tmp); 5121 5122 data |= CGCG_EN | CGLS_EN; 5123 } else { 5124 cik_enable_gui_idle_interrupt(rdev, false); 5125 5126 RREG32(CB_CGTT_SCLK_CTRL); 5127 RREG32(CB_CGTT_SCLK_CTRL); 5128 RREG32(CB_CGTT_SCLK_CTRL); 5129 RREG32(CB_CGTT_SCLK_CTRL); 5130 5131 data &= ~(CGCG_EN | CGLS_EN); 5132 } 5133 5134 if (orig != data) 5135 WREG32(RLC_CGCG_CGLS_CTRL, data); 5136 5137 } 5138 5139 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) 5140 { 5141 u32 data, orig, tmp = 0; 5142 5143 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { 5144 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { 5145 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { 5146 orig = data = RREG32(CP_MEM_SLP_CNTL); 5147 data |= CP_MEM_LS_EN; 5148 if (orig != data) 5149 WREG32(CP_MEM_SLP_CNTL, data); 5150 } 5151 } 5152 5153 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5154 data &= 0xfffffffd; 5155 if (orig != data) 5156 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5157 5158 tmp = cik_halt_rlc(rdev); 5159 5160 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5161 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5162 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5163 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; 5164 WREG32(RLC_SERDES_WR_CTRL, data); 5165 5166 cik_update_rlc(rdev, tmp); 5167 5168 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { 5169 orig = data = RREG32(CGTS_SM_CTRL_REG); 5170 data &= ~SM_MODE_MASK; 5171 data |= SM_MODE(0x2); 5172 data |= SM_MODE_ENABLE; 5173 data &= ~CGTS_OVERRIDE; 5174 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && 5175 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) 5176 data &= ~CGTS_LS_OVERRIDE; 5177 data &= ~ON_MONITOR_ADD_MASK; 5178 data |= ON_MONITOR_ADD_EN; 5179 data |= ON_MONITOR_ADD(0x96); 5180 if (orig != data) 5181 WREG32(CGTS_SM_CTRL_REG, data); 5182 } 5183 } else { 5184 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5185 data |= 0x00000002; 5186 if (orig != data) 5187 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5188 5189 data = RREG32(RLC_MEM_SLP_CNTL); 5190 if (data & RLC_MEM_LS_EN) { 5191 data &= ~RLC_MEM_LS_EN; 5192 WREG32(RLC_MEM_SLP_CNTL, data); 5193 } 5194 5195 data = RREG32(CP_MEM_SLP_CNTL); 5196 if (data & CP_MEM_LS_EN) { 5197 data &= ~CP_MEM_LS_EN; 5198 WREG32(CP_MEM_SLP_CNTL, data); 5199 } 5200 5201 orig = data = RREG32(CGTS_SM_CTRL_REG); 5202 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; 5203 if (orig != data) 5204 WREG32(CGTS_SM_CTRL_REG, data); 5205 5206 tmp = cik_halt_rlc(rdev); 5207 5208 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5209 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5210 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5211 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; 5212 WREG32(RLC_SERDES_WR_CTRL, data); 5213 5214 cik_update_rlc(rdev, tmp); 5215 } 5216 } 5217 5218 static const u32 mc_cg_registers[] = 5219 { 5220 MC_HUB_MISC_HUB_CG, 5221 MC_HUB_MISC_SIP_CG, 5222 MC_HUB_MISC_VM_CG, 5223 MC_XPB_CLK_GAT, 5224 ATC_MISC_CG, 5225 MC_CITF_MISC_WR_CG, 5226 MC_CITF_MISC_RD_CG, 5227 MC_CITF_MISC_VM_CG, 5228 VM_L2_CG, 5229 }; 5230 5231 static void cik_enable_mc_ls(struct radeon_device *rdev, 5232 bool enable) 5233 { 5234 int i; 5235 u32 orig, data; 5236 5237 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5238 orig = data = RREG32(mc_cg_registers[i]); 5239 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) 5240 data |= MC_LS_ENABLE; 5241 else 5242 data &= ~MC_LS_ENABLE; 5243 if (data != orig) 5244 WREG32(mc_cg_registers[i], data); 5245 } 5246 } 5247 5248 static void cik_enable_mc_mgcg(struct radeon_device *rdev, 5249 bool enable) 5250 { 5251 int i; 5252 u32 orig, data; 5253 5254 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5255 orig = data = RREG32(mc_cg_registers[i]); 5256 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) 5257 data |= MC_CG_ENABLE; 5258 else 5259 data &= ~MC_CG_ENABLE; 5260 if (data != orig) 5261 WREG32(mc_cg_registers[i], data); 5262 } 5263 } 5264 5265 static void cik_enable_sdma_mgcg(struct radeon_device *rdev, 5266 bool enable) 5267 { 5268 u32 orig, data; 5269 5270 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { 5271 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); 5272 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); 5273 } else { 5274 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); 5275 data |= 0xff000000; 5276 if (data != orig) 5277 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); 5278 5279 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); 5280 data |= 0xff000000; 5281 if (data != orig) 5282 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); 5283 } 5284 } 5285 5286 static void cik_enable_sdma_mgls(struct radeon_device *rdev, 5287 bool enable) 5288 { 5289 u32 orig, data; 5290 5291 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { 5292 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5293 data |= 0x100; 5294 if (orig != data) 5295 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5296 5297 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5298 data |= 0x100; 5299 if (orig != data) 5300 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5301 } else { 5302 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5303 data &= ~0x100; 5304 if (orig != data) 5305 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5306 5307 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5308 data &= ~0x100; 5309 if (orig != data) 5310 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5311 } 5312 } 5313 5314 static void cik_enable_uvd_mgcg(struct radeon_device *rdev, 5315 bool enable) 5316 { 5317 u32 orig, data; 5318 5319 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { 5320 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5321 data = 0xfff; 5322 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5323 5324 orig = data = RREG32(UVD_CGC_CTRL); 5325 data |= DCM; 5326 if (orig != data) 5327 WREG32(UVD_CGC_CTRL, data); 5328 } else { 5329 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5330 data &= ~0xfff; 5331 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5332 5333 orig = data = RREG32(UVD_CGC_CTRL); 5334 data &= ~DCM; 5335 if (orig != data) 5336 WREG32(UVD_CGC_CTRL, data); 5337 } 5338 } 5339 5340 static void cik_enable_bif_mgls(struct radeon_device *rdev, 5341 bool enable) 5342 { 5343 u32 orig, data; 5344 5345 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 5346 5347 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) 5348 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | 5349 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; 5350 else 5351 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | 5352 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); 5353 5354 if (orig != data) 5355 WREG32_PCIE_PORT(PCIE_CNTL2, data); 5356 } 5357 5358 static void cik_enable_hdp_mgcg(struct radeon_device *rdev, 5359 bool enable) 5360 { 5361 u32 orig, data; 5362 5363 orig = data = RREG32(HDP_HOST_PATH_CNTL); 5364 5365 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) 5366 data &= ~CLOCK_GATING_DIS; 5367 else 5368 data |= CLOCK_GATING_DIS; 5369 5370 if (orig != data) 5371 WREG32(HDP_HOST_PATH_CNTL, data); 5372 } 5373 5374 static void cik_enable_hdp_ls(struct radeon_device *rdev, 5375 bool enable) 5376 { 5377 u32 orig, data; 5378 5379 orig = data = RREG32(HDP_MEM_POWER_LS); 5380 5381 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) 5382 data |= HDP_LS_ENABLE; 5383 else 5384 data &= ~HDP_LS_ENABLE; 5385 5386 if (orig != data) 5387 WREG32(HDP_MEM_POWER_LS, data); 5388 } 5389 5390 void cik_update_cg(struct radeon_device *rdev, 5391 u32 block, bool enable) 5392 { 5393 5394 if (block & RADEON_CG_BLOCK_GFX) { 5395 cik_enable_gui_idle_interrupt(rdev, false); 5396 /* order matters! */ 5397 if (enable) { 5398 cik_enable_mgcg(rdev, true); 5399 cik_enable_cgcg(rdev, true); 5400 } else { 5401 cik_enable_cgcg(rdev, false); 5402 cik_enable_mgcg(rdev, false); 5403 } 5404 cik_enable_gui_idle_interrupt(rdev, true); 5405 } 5406 5407 if (block & RADEON_CG_BLOCK_MC) { 5408 if (!(rdev->flags & RADEON_IS_IGP)) { 5409 cik_enable_mc_mgcg(rdev, enable); 5410 cik_enable_mc_ls(rdev, enable); 5411 } 5412 } 5413 5414 if (block & RADEON_CG_BLOCK_SDMA) { 5415 cik_enable_sdma_mgcg(rdev, enable); 5416 cik_enable_sdma_mgls(rdev, enable); 5417 } 5418 5419 if (block & RADEON_CG_BLOCK_BIF) { 5420 cik_enable_bif_mgls(rdev, enable); 5421 } 5422 5423 if (block & RADEON_CG_BLOCK_UVD) { 5424 if (rdev->has_uvd) 5425 cik_enable_uvd_mgcg(rdev, enable); 5426 } 5427 5428 if (block & RADEON_CG_BLOCK_HDP) { 5429 cik_enable_hdp_mgcg(rdev, enable); 5430 cik_enable_hdp_ls(rdev, enable); 5431 } 5432 } 5433 5434 static void cik_init_cg(struct radeon_device *rdev) 5435 { 5436 5437 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); 5438 5439 if (rdev->has_uvd) 5440 si_init_uvd_internal_cg(rdev); 5441 5442 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5443 RADEON_CG_BLOCK_SDMA | 5444 RADEON_CG_BLOCK_BIF | 5445 RADEON_CG_BLOCK_UVD | 5446 RADEON_CG_BLOCK_HDP), true); 5447 } 5448 5449 static void cik_fini_cg(struct radeon_device *rdev) 5450 { 5451 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5452 RADEON_CG_BLOCK_SDMA | 5453 RADEON_CG_BLOCK_BIF | 5454 RADEON_CG_BLOCK_UVD | 5455 RADEON_CG_BLOCK_HDP), false); 5456 5457 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); 5458 } 5459 5460 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, 5461 bool enable) 5462 { 5463 u32 data, orig; 5464 5465 orig = data = RREG32(RLC_PG_CNTL); 5466 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5467 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5468 else 5469 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5470 if (orig != data) 5471 WREG32(RLC_PG_CNTL, data); 5472 } 5473 5474 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, 5475 bool enable) 5476 { 5477 u32 data, orig; 5478 5479 orig = data = RREG32(RLC_PG_CNTL); 5480 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5481 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5482 else 5483 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5484 if (orig != data) 5485 WREG32(RLC_PG_CNTL, data); 5486 } 5487 5488 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) 5489 { 5490 u32 data, orig; 5491 5492 orig = data = RREG32(RLC_PG_CNTL); 5493 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) 5494 data &= ~DISABLE_CP_PG; 5495 else 5496 data |= DISABLE_CP_PG; 5497 if (orig != data) 5498 WREG32(RLC_PG_CNTL, data); 5499 } 5500 5501 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) 5502 { 5503 u32 data, orig; 5504 5505 orig = data = RREG32(RLC_PG_CNTL); 5506 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) 5507 data &= ~DISABLE_GDS_PG; 5508 else 5509 data |= DISABLE_GDS_PG; 5510 if (orig != data) 5511 WREG32(RLC_PG_CNTL, data); 5512 } 5513 5514 #define CP_ME_TABLE_SIZE 96 5515 #define CP_ME_TABLE_OFFSET 2048 5516 #define CP_MEC_TABLE_OFFSET 4096 5517 5518 void cik_init_cp_pg_table(struct radeon_device *rdev) 5519 { 5520 const __be32 *fw_data; 5521 volatile u32 *dst_ptr; 5522 int me, i, max_me = 4; 5523 u32 bo_offset = 0; 5524 u32 table_offset; 5525 5526 if (rdev->family == CHIP_KAVERI) 5527 max_me = 5; 5528 5529 if (rdev->rlc.cp_table_ptr == NULL) 5530 return; 5531 5532 /* write the cp table buffer */ 5533 dst_ptr = rdev->rlc.cp_table_ptr; 5534 for (me = 0; me < max_me; me++) { 5535 if (me == 0) { 5536 fw_data = (const __be32 *)rdev->ce_fw->data; 5537 table_offset = CP_ME_TABLE_OFFSET; 5538 } else if (me == 1) { 5539 fw_data = (const __be32 *)rdev->pfp_fw->data; 5540 table_offset = CP_ME_TABLE_OFFSET; 5541 } else if (me == 2) { 5542 fw_data = (const __be32 *)rdev->me_fw->data; 5543 table_offset = CP_ME_TABLE_OFFSET; 5544 } else { 5545 fw_data = (const __be32 *)rdev->mec_fw->data; 5546 table_offset = CP_MEC_TABLE_OFFSET; 5547 } 5548 5549 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) { 5550 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]); 5551 } 5552 bo_offset += CP_ME_TABLE_SIZE; 5553 } 5554 } 5555 5556 static void cik_enable_gfx_cgpg(struct radeon_device *rdev, 5557 bool enable) 5558 { 5559 u32 data, orig; 5560 5561 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { 5562 orig = data = RREG32(RLC_PG_CNTL); 5563 data |= GFX_PG_ENABLE; 5564 if (orig != data) 5565 WREG32(RLC_PG_CNTL, data); 5566 5567 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5568 data |= AUTO_PG_EN; 5569 if (orig != data) 5570 WREG32(RLC_AUTO_PG_CTRL, data); 5571 } else { 5572 orig = data = RREG32(RLC_PG_CNTL); 5573 data &= ~GFX_PG_ENABLE; 5574 if (orig != data) 5575 WREG32(RLC_PG_CNTL, data); 5576 5577 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5578 data &= ~AUTO_PG_EN; 5579 if (orig != data) 5580 WREG32(RLC_AUTO_PG_CTRL, data); 5581 5582 data = RREG32(DB_RENDER_CONTROL); 5583 } 5584 } 5585 5586 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) 5587 { 5588 u32 mask = 0, tmp, tmp1; 5589 int i; 5590 5591 cik_select_se_sh(rdev, se, sh); 5592 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); 5593 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); 5594 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5595 5596 tmp &= 0xffff0000; 5597 5598 tmp |= tmp1; 5599 tmp >>= 16; 5600 5601 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { 5602 mask <<= 1; 5603 mask |= 1; 5604 } 5605 5606 return (~tmp) & mask; 5607 } 5608 5609 static void cik_init_ao_cu_mask(struct radeon_device *rdev) 5610 { 5611 u32 i, j, k, active_cu_number = 0; 5612 u32 mask, counter, cu_bitmap; 5613 u32 tmp = 0; 5614 5615 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 5616 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 5617 mask = 1; 5618 cu_bitmap = 0; 5619 counter = 0; 5620 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { 5621 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { 5622 if (counter < 2) 5623 cu_bitmap |= mask; 5624 counter ++; 5625 } 5626 mask <<= 1; 5627 } 5628 5629 active_cu_number += counter; 5630 tmp |= (cu_bitmap << (i * 16 + j * 8)); 5631 } 5632 } 5633 5634 WREG32(RLC_PG_AO_CU_MASK, tmp); 5635 5636 tmp = RREG32(RLC_MAX_PG_CU); 5637 tmp &= ~MAX_PU_CU_MASK; 5638 tmp |= MAX_PU_CU(active_cu_number); 5639 WREG32(RLC_MAX_PG_CU, tmp); 5640 } 5641 5642 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, 5643 bool enable) 5644 { 5645 u32 data, orig; 5646 5647 orig = data = RREG32(RLC_PG_CNTL); 5648 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) 5649 data |= STATIC_PER_CU_PG_ENABLE; 5650 else 5651 data &= ~STATIC_PER_CU_PG_ENABLE; 5652 if (orig != data) 5653 WREG32(RLC_PG_CNTL, data); 5654 } 5655 5656 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, 5657 bool enable) 5658 { 5659 u32 data, orig; 5660 5661 orig = data = RREG32(RLC_PG_CNTL); 5662 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) 5663 data |= DYN_PER_CU_PG_ENABLE; 5664 else 5665 data &= ~DYN_PER_CU_PG_ENABLE; 5666 if (orig != data) 5667 WREG32(RLC_PG_CNTL, data); 5668 } 5669 5670 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 5671 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D 5672 5673 static void cik_init_gfx_cgpg(struct radeon_device *rdev) 5674 { 5675 u32 data, orig; 5676 u32 i; 5677 5678 if (rdev->rlc.cs_data) { 5679 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5680 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); 5681 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); 5682 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); 5683 } else { 5684 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5685 for (i = 0; i < 3; i++) 5686 WREG32(RLC_GPM_SCRATCH_DATA, 0); 5687 } 5688 if (rdev->rlc.reg_list) { 5689 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); 5690 for (i = 0; i < rdev->rlc.reg_list_size; i++) 5691 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); 5692 } 5693 5694 orig = data = RREG32(RLC_PG_CNTL); 5695 data |= GFX_PG_SRC; 5696 if (orig != data) 5697 WREG32(RLC_PG_CNTL, data); 5698 5699 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); 5700 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); 5701 5702 data = RREG32(CP_RB_WPTR_POLL_CNTL); 5703 data &= ~IDLE_POLL_COUNT_MASK; 5704 data |= IDLE_POLL_COUNT(0x60); 5705 WREG32(CP_RB_WPTR_POLL_CNTL, data); 5706 5707 data = 0x10101010; 5708 WREG32(RLC_PG_DELAY, data); 5709 5710 data = RREG32(RLC_PG_DELAY_2); 5711 data &= ~0xff; 5712 data |= 0x3; 5713 WREG32(RLC_PG_DELAY_2, data); 5714 5715 data = RREG32(RLC_AUTO_PG_CTRL); 5716 data &= ~GRBM_REG_SGIT_MASK; 5717 data |= GRBM_REG_SGIT(0x700); 5718 WREG32(RLC_AUTO_PG_CTRL, data); 5719 5720 } 5721 5722 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) 5723 { 5724 cik_enable_gfx_cgpg(rdev, enable); 5725 cik_enable_gfx_static_mgpg(rdev, enable); 5726 cik_enable_gfx_dynamic_mgpg(rdev, enable); 5727 } 5728 5729 u32 cik_get_csb_size(struct radeon_device *rdev) 5730 { 5731 u32 count = 0; 5732 const struct cs_section_def *sect = NULL; 5733 const struct cs_extent_def *ext = NULL; 5734 5735 if (rdev->rlc.cs_data == NULL) 5736 return 0; 5737 5738 /* begin clear state */ 5739 count += 2; 5740 /* context control state */ 5741 count += 3; 5742 5743 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5744 for (ext = sect->section; ext->extent != NULL; ++ext) { 5745 if (sect->id == SECT_CONTEXT) 5746 count += 2 + ext->reg_count; 5747 else 5748 return 0; 5749 } 5750 } 5751 /* pa_sc_raster_config/pa_sc_raster_config1 */ 5752 count += 4; 5753 /* end clear state */ 5754 count += 2; 5755 /* clear state */ 5756 count += 2; 5757 5758 return count; 5759 } 5760 5761 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) 5762 { 5763 u32 count = 0, i; 5764 const struct cs_section_def *sect = NULL; 5765 const struct cs_extent_def *ext = NULL; 5766 5767 if (rdev->rlc.cs_data == NULL) 5768 return; 5769 if (buffer == NULL) 5770 return; 5771 5772 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5773 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE; 5774 5775 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); 5776 buffer[count++] = 0x80000000; 5777 buffer[count++] = 0x80000000; 5778 5779 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5780 for (ext = sect->section; ext->extent != NULL; ++ext) { 5781 if (sect->id == SECT_CONTEXT) { 5782 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count); 5783 buffer[count++] = ext->reg_index - 0xa000; 5784 for (i = 0; i < ext->reg_count; i++) 5785 buffer[count++] = ext->extent[i]; 5786 } else { 5787 return; 5788 } 5789 } 5790 } 5791 5792 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 5793 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START; 5794 switch (rdev->family) { 5795 case CHIP_BONAIRE: 5796 buffer[count++] = 0x16000012; 5797 buffer[count++] = 0x00000000; 5798 break; 5799 case CHIP_KAVERI: 5800 buffer[count++] = 0x00000000; /* XXX */ 5801 buffer[count++] = 0x00000000; 5802 break; 5803 case CHIP_KABINI: 5804 buffer[count++] = 0x00000000; /* XXX */ 5805 buffer[count++] = 0x00000000; 5806 break; 5807 default: 5808 buffer[count++] = 0x00000000; 5809 buffer[count++] = 0x00000000; 5810 break; 5811 } 5812 5813 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5814 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE; 5815 5816 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0); 5817 buffer[count++] = 0; 5818 } 5819 5820 static void cik_init_pg(struct radeon_device *rdev) 5821 { 5822 if (rdev->pg_flags) { 5823 cik_enable_sck_slowdown_on_pu(rdev, true); 5824 cik_enable_sck_slowdown_on_pd(rdev, true); 5825 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5826 cik_init_gfx_cgpg(rdev); 5827 cik_enable_cp_pg(rdev, true); 5828 cik_enable_gds_pg(rdev, true); 5829 } 5830 cik_init_ao_cu_mask(rdev); 5831 cik_update_gfx_pg(rdev, true); 5832 } 5833 } 5834 5835 static void cik_fini_pg(struct radeon_device *rdev) 5836 { 5837 if (rdev->pg_flags) { 5838 cik_update_gfx_pg(rdev, false); 5839 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5840 cik_enable_cp_pg(rdev, false); 5841 cik_enable_gds_pg(rdev, false); 5842 } 5843 } 5844 } 5845 5846 /* 5847 * Interrupts 5848 * Starting with r6xx, interrupts are handled via a ring buffer. 5849 * Ring buffers are areas of GPU accessible memory that the GPU 5850 * writes interrupt vectors into and the host reads vectors out of. 5851 * There is a rptr (read pointer) that determines where the 5852 * host is currently reading, and a wptr (write pointer) 5853 * which determines where the GPU has written. When the 5854 * pointers are equal, the ring is idle. When the GPU 5855 * writes vectors to the ring buffer, it increments the 5856 * wptr. When there is an interrupt, the host then starts 5857 * fetching commands and processing them until the pointers are 5858 * equal again at which point it updates the rptr. 5859 */ 5860 5861 /** 5862 * cik_enable_interrupts - Enable the interrupt ring buffer 5863 * 5864 * @rdev: radeon_device pointer 5865 * 5866 * Enable the interrupt ring buffer (CIK). 5867 */ 5868 static void cik_enable_interrupts(struct radeon_device *rdev) 5869 { 5870 u32 ih_cntl = RREG32(IH_CNTL); 5871 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5872 5873 ih_cntl |= ENABLE_INTR; 5874 ih_rb_cntl |= IH_RB_ENABLE; 5875 WREG32(IH_CNTL, ih_cntl); 5876 WREG32(IH_RB_CNTL, ih_rb_cntl); 5877 rdev->ih.enabled = true; 5878 } 5879 5880 /** 5881 * cik_disable_interrupts - Disable the interrupt ring buffer 5882 * 5883 * @rdev: radeon_device pointer 5884 * 5885 * Disable the interrupt ring buffer (CIK). 5886 */ 5887 static void cik_disable_interrupts(struct radeon_device *rdev) 5888 { 5889 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5890 u32 ih_cntl = RREG32(IH_CNTL); 5891 5892 ih_rb_cntl &= ~IH_RB_ENABLE; 5893 ih_cntl &= ~ENABLE_INTR; 5894 WREG32(IH_RB_CNTL, ih_rb_cntl); 5895 WREG32(IH_CNTL, ih_cntl); 5896 /* set rptr, wptr to 0 */ 5897 WREG32(IH_RB_RPTR, 0); 5898 WREG32(IH_RB_WPTR, 0); 5899 rdev->ih.enabled = false; 5900 rdev->ih.rptr = 0; 5901 } 5902 5903 /** 5904 * cik_disable_interrupt_state - Disable all interrupt sources 5905 * 5906 * @rdev: radeon_device pointer 5907 * 5908 * Clear all interrupt enable bits used by the driver (CIK). 5909 */ 5910 static void cik_disable_interrupt_state(struct radeon_device *rdev) 5911 { 5912 u32 tmp; 5913 5914 /* gfx ring */ 5915 tmp = RREG32(CP_INT_CNTL_RING0) & 5916 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5917 WREG32(CP_INT_CNTL_RING0, tmp); 5918 /* sdma */ 5919 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 5920 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 5921 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 5922 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 5923 /* compute queues */ 5924 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 5925 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 5926 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 5927 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 5928 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 5929 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 5930 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 5931 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 5932 /* grbm */ 5933 WREG32(GRBM_INT_CNTL, 0); 5934 /* vline/vblank, etc. */ 5935 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 5936 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 5937 if (rdev->num_crtc >= 4) { 5938 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 5939 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 5940 } 5941 if (rdev->num_crtc >= 6) { 5942 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 5943 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 5944 } 5945 5946 /* dac hotplug */ 5947 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 5948 5949 /* digital hotplug */ 5950 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5951 WREG32(DC_HPD1_INT_CONTROL, tmp); 5952 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5953 WREG32(DC_HPD2_INT_CONTROL, tmp); 5954 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5955 WREG32(DC_HPD3_INT_CONTROL, tmp); 5956 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5957 WREG32(DC_HPD4_INT_CONTROL, tmp); 5958 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5959 WREG32(DC_HPD5_INT_CONTROL, tmp); 5960 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5961 WREG32(DC_HPD6_INT_CONTROL, tmp); 5962 5963 } 5964 5965 /** 5966 * cik_irq_init - init and enable the interrupt ring 5967 * 5968 * @rdev: radeon_device pointer 5969 * 5970 * Allocate a ring buffer for the interrupt controller, 5971 * enable the RLC, disable interrupts, enable the IH 5972 * ring buffer and enable it (CIK). 5973 * Called at device load and reume. 5974 * Returns 0 for success, errors for failure. 5975 */ 5976 static int cik_irq_init(struct radeon_device *rdev) 5977 { 5978 int ret = 0; 5979 int rb_bufsz; 5980 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 5981 5982 /* allocate ring */ 5983 ret = r600_ih_ring_alloc(rdev); 5984 if (ret) 5985 return ret; 5986 5987 /* disable irqs */ 5988 cik_disable_interrupts(rdev); 5989 5990 /* init rlc */ 5991 ret = cik_rlc_resume(rdev); 5992 if (ret) { 5993 r600_ih_ring_fini(rdev); 5994 return ret; 5995 } 5996 5997 /* setup interrupt control */ 5998 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 5999 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 6000 interrupt_cntl = RREG32(INTERRUPT_CNTL); 6001 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 6002 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 6003 */ 6004 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 6005 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 6006 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 6007 WREG32(INTERRUPT_CNTL, interrupt_cntl); 6008 6009 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 6010 rb_bufsz = order_base_2(rdev->ih.ring_size / 4); 6011 6012 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 6013 IH_WPTR_OVERFLOW_CLEAR | 6014 (rb_bufsz << 1)); 6015 6016 if (rdev->wb.enabled) 6017 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 6018 6019 /* set the writeback address whether it's enabled or not */ 6020 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 6021 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 6022 6023 WREG32(IH_RB_CNTL, ih_rb_cntl); 6024 6025 /* set rptr, wptr to 0 */ 6026 WREG32(IH_RB_RPTR, 0); 6027 WREG32(IH_RB_WPTR, 0); 6028 6029 /* Default settings for IH_CNTL (disabled at first) */ 6030 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 6031 /* RPTR_REARM only works if msi's are enabled */ 6032 if (rdev->msi_enabled) 6033 ih_cntl |= RPTR_REARM; 6034 WREG32(IH_CNTL, ih_cntl); 6035 6036 /* force the active interrupt state to all disabled */ 6037 cik_disable_interrupt_state(rdev); 6038 6039 pci_set_master(rdev->pdev); 6040 6041 /* enable irqs */ 6042 cik_enable_interrupts(rdev); 6043 6044 return ret; 6045 } 6046 6047 /** 6048 * cik_irq_set - enable/disable interrupt sources 6049 * 6050 * @rdev: radeon_device pointer 6051 * 6052 * Enable interrupt sources on the GPU (vblanks, hpd, 6053 * etc.) (CIK). 6054 * Returns 0 for success, errors for failure. 6055 */ 6056 int cik_irq_set(struct radeon_device *rdev) 6057 { 6058 u32 cp_int_cntl; 6059 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; 6060 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; 6061 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 6062 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 6063 u32 grbm_int_cntl = 0; 6064 u32 dma_cntl, dma_cntl1; 6065 u32 thermal_int; 6066 6067 if (!rdev->irq.installed) { 6068 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 6069 return -EINVAL; 6070 } 6071 /* don't enable anything if the ih is disabled */ 6072 if (!rdev->ih.enabled) { 6073 cik_disable_interrupts(rdev); 6074 /* force the active interrupt state to all disabled */ 6075 cik_disable_interrupt_state(rdev); 6076 return 0; 6077 } 6078 6079 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & 6080 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 6081 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 6082 6083 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; 6084 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; 6085 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; 6086 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; 6087 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 6088 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 6089 6090 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 6091 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 6092 6093 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6094 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6095 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6096 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6097 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6098 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6099 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6100 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6101 6102 if (rdev->flags & RADEON_IS_IGP) 6103 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & 6104 ~(THERM_INTH_MASK | THERM_INTL_MASK); 6105 else 6106 thermal_int = RREG32_SMC(CG_THERMAL_INT) & 6107 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); 6108 6109 /* enable CP interrupts on all rings */ 6110 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 6111 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 6112 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 6113 } 6114 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 6115 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6116 DRM_DEBUG("si_irq_set: sw int cp1\n"); 6117 if (ring->me == 1) { 6118 switch (ring->pipe) { 6119 case 0: 6120 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6121 break; 6122 case 1: 6123 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6124 break; 6125 case 2: 6126 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6127 break; 6128 case 3: 6129 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6130 break; 6131 default: 6132 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6133 break; 6134 } 6135 } else if (ring->me == 2) { 6136 switch (ring->pipe) { 6137 case 0: 6138 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6139 break; 6140 case 1: 6141 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6142 break; 6143 case 2: 6144 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6145 break; 6146 case 3: 6147 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6148 break; 6149 default: 6150 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6151 break; 6152 } 6153 } else { 6154 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 6155 } 6156 } 6157 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 6158 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6159 DRM_DEBUG("si_irq_set: sw int cp2\n"); 6160 if (ring->me == 1) { 6161 switch (ring->pipe) { 6162 case 0: 6163 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6164 break; 6165 case 1: 6166 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6167 break; 6168 case 2: 6169 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6170 break; 6171 case 3: 6172 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6173 break; 6174 default: 6175 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6176 break; 6177 } 6178 } else if (ring->me == 2) { 6179 switch (ring->pipe) { 6180 case 0: 6181 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6182 break; 6183 case 1: 6184 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6185 break; 6186 case 2: 6187 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6188 break; 6189 case 3: 6190 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6191 break; 6192 default: 6193 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6194 break; 6195 } 6196 } else { 6197 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 6198 } 6199 } 6200 6201 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 6202 DRM_DEBUG("cik_irq_set: sw int dma\n"); 6203 dma_cntl |= TRAP_ENABLE; 6204 } 6205 6206 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 6207 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 6208 dma_cntl1 |= TRAP_ENABLE; 6209 } 6210 6211 if (rdev->irq.crtc_vblank_int[0] || 6212 atomic_read(&rdev->irq.pflip[0])) { 6213 DRM_DEBUG("cik_irq_set: vblank 0\n"); 6214 crtc1 |= VBLANK_INTERRUPT_MASK; 6215 } 6216 if (rdev->irq.crtc_vblank_int[1] || 6217 atomic_read(&rdev->irq.pflip[1])) { 6218 DRM_DEBUG("cik_irq_set: vblank 1\n"); 6219 crtc2 |= VBLANK_INTERRUPT_MASK; 6220 } 6221 if (rdev->irq.crtc_vblank_int[2] || 6222 atomic_read(&rdev->irq.pflip[2])) { 6223 DRM_DEBUG("cik_irq_set: vblank 2\n"); 6224 crtc3 |= VBLANK_INTERRUPT_MASK; 6225 } 6226 if (rdev->irq.crtc_vblank_int[3] || 6227 atomic_read(&rdev->irq.pflip[3])) { 6228 DRM_DEBUG("cik_irq_set: vblank 3\n"); 6229 crtc4 |= VBLANK_INTERRUPT_MASK; 6230 } 6231 if (rdev->irq.crtc_vblank_int[4] || 6232 atomic_read(&rdev->irq.pflip[4])) { 6233 DRM_DEBUG("cik_irq_set: vblank 4\n"); 6234 crtc5 |= VBLANK_INTERRUPT_MASK; 6235 } 6236 if (rdev->irq.crtc_vblank_int[5] || 6237 atomic_read(&rdev->irq.pflip[5])) { 6238 DRM_DEBUG("cik_irq_set: vblank 5\n"); 6239 crtc6 |= VBLANK_INTERRUPT_MASK; 6240 } 6241 if (rdev->irq.hpd[0]) { 6242 DRM_DEBUG("cik_irq_set: hpd 1\n"); 6243 hpd1 |= DC_HPDx_INT_EN; 6244 } 6245 if (rdev->irq.hpd[1]) { 6246 DRM_DEBUG("cik_irq_set: hpd 2\n"); 6247 hpd2 |= DC_HPDx_INT_EN; 6248 } 6249 if (rdev->irq.hpd[2]) { 6250 DRM_DEBUG("cik_irq_set: hpd 3\n"); 6251 hpd3 |= DC_HPDx_INT_EN; 6252 } 6253 if (rdev->irq.hpd[3]) { 6254 DRM_DEBUG("cik_irq_set: hpd 4\n"); 6255 hpd4 |= DC_HPDx_INT_EN; 6256 } 6257 if (rdev->irq.hpd[4]) { 6258 DRM_DEBUG("cik_irq_set: hpd 5\n"); 6259 hpd5 |= DC_HPDx_INT_EN; 6260 } 6261 if (rdev->irq.hpd[5]) { 6262 DRM_DEBUG("cik_irq_set: hpd 6\n"); 6263 hpd6 |= DC_HPDx_INT_EN; 6264 } 6265 6266 if (rdev->irq.dpm_thermal) { 6267 DRM_DEBUG("dpm thermal\n"); 6268 if (rdev->flags & RADEON_IS_IGP) 6269 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; 6270 else 6271 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; 6272 } 6273 6274 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 6275 6276 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 6277 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 6278 6279 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 6280 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); 6281 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); 6282 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); 6283 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); 6284 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); 6285 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); 6286 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); 6287 6288 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 6289 6290 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 6291 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 6292 if (rdev->num_crtc >= 4) { 6293 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 6294 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 6295 } 6296 if (rdev->num_crtc >= 6) { 6297 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 6298 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 6299 } 6300 6301 WREG32(DC_HPD1_INT_CONTROL, hpd1); 6302 WREG32(DC_HPD2_INT_CONTROL, hpd2); 6303 WREG32(DC_HPD3_INT_CONTROL, hpd3); 6304 WREG32(DC_HPD4_INT_CONTROL, hpd4); 6305 WREG32(DC_HPD5_INT_CONTROL, hpd5); 6306 WREG32(DC_HPD6_INT_CONTROL, hpd6); 6307 6308 if (rdev->flags & RADEON_IS_IGP) 6309 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); 6310 else 6311 WREG32_SMC(CG_THERMAL_INT, thermal_int); 6312 6313 return 0; 6314 } 6315 6316 /** 6317 * cik_irq_ack - ack interrupt sources 6318 * 6319 * @rdev: radeon_device pointer 6320 * 6321 * Ack interrupt sources on the GPU (vblanks, hpd, 6322 * etc.) (CIK). Certain interrupts sources are sw 6323 * generated and do not require an explicit ack. 6324 */ 6325 static inline void cik_irq_ack(struct radeon_device *rdev) 6326 { 6327 u32 tmp; 6328 6329 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 6330 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 6331 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 6332 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 6333 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 6334 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 6335 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 6336 6337 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 6338 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 6339 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 6340 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 6341 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 6342 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 6343 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 6344 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 6345 6346 if (rdev->num_crtc >= 4) { 6347 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 6348 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 6349 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 6350 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 6351 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 6352 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 6353 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 6354 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 6355 } 6356 6357 if (rdev->num_crtc >= 6) { 6358 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 6359 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 6360 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 6361 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 6362 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 6363 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 6364 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 6365 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 6366 } 6367 6368 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6369 tmp = RREG32(DC_HPD1_INT_CONTROL); 6370 tmp |= DC_HPDx_INT_ACK; 6371 WREG32(DC_HPD1_INT_CONTROL, tmp); 6372 } 6373 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6374 tmp = RREG32(DC_HPD2_INT_CONTROL); 6375 tmp |= DC_HPDx_INT_ACK; 6376 WREG32(DC_HPD2_INT_CONTROL, tmp); 6377 } 6378 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6379 tmp = RREG32(DC_HPD3_INT_CONTROL); 6380 tmp |= DC_HPDx_INT_ACK; 6381 WREG32(DC_HPD3_INT_CONTROL, tmp); 6382 } 6383 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6384 tmp = RREG32(DC_HPD4_INT_CONTROL); 6385 tmp |= DC_HPDx_INT_ACK; 6386 WREG32(DC_HPD4_INT_CONTROL, tmp); 6387 } 6388 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6389 tmp = RREG32(DC_HPD5_INT_CONTROL); 6390 tmp |= DC_HPDx_INT_ACK; 6391 WREG32(DC_HPD5_INT_CONTROL, tmp); 6392 } 6393 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6394 tmp = RREG32(DC_HPD5_INT_CONTROL); 6395 tmp |= DC_HPDx_INT_ACK; 6396 WREG32(DC_HPD6_INT_CONTROL, tmp); 6397 } 6398 } 6399 6400 /** 6401 * cik_irq_disable - disable interrupts 6402 * 6403 * @rdev: radeon_device pointer 6404 * 6405 * Disable interrupts on the hw (CIK). 6406 */ 6407 static void cik_irq_disable(struct radeon_device *rdev) 6408 { 6409 cik_disable_interrupts(rdev); 6410 /* Wait and acknowledge irq */ 6411 mdelay(1); 6412 cik_irq_ack(rdev); 6413 cik_disable_interrupt_state(rdev); 6414 } 6415 6416 /** 6417 * cik_irq_disable - disable interrupts for suspend 6418 * 6419 * @rdev: radeon_device pointer 6420 * 6421 * Disable interrupts and stop the RLC (CIK). 6422 * Used for suspend. 6423 */ 6424 static void cik_irq_suspend(struct radeon_device *rdev) 6425 { 6426 cik_irq_disable(rdev); 6427 cik_rlc_stop(rdev); 6428 } 6429 6430 /** 6431 * cik_irq_fini - tear down interrupt support 6432 * 6433 * @rdev: radeon_device pointer 6434 * 6435 * Disable interrupts on the hw and free the IH ring 6436 * buffer (CIK). 6437 * Used for driver unload. 6438 */ 6439 static void cik_irq_fini(struct radeon_device *rdev) 6440 { 6441 cik_irq_suspend(rdev); 6442 r600_ih_ring_fini(rdev); 6443 } 6444 6445 /** 6446 * cik_get_ih_wptr - get the IH ring buffer wptr 6447 * 6448 * @rdev: radeon_device pointer 6449 * 6450 * Get the IH ring buffer wptr from either the register 6451 * or the writeback memory buffer (CIK). Also check for 6452 * ring buffer overflow and deal with it. 6453 * Used by cik_irq_process(). 6454 * Returns the value of the wptr. 6455 */ 6456 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 6457 { 6458 u32 wptr, tmp; 6459 6460 if (rdev->wb.enabled) 6461 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 6462 else 6463 wptr = RREG32(IH_RB_WPTR); 6464 6465 if (wptr & RB_OVERFLOW) { 6466 /* When a ring buffer overflow happen start parsing interrupt 6467 * from the last not overwritten vector (wptr + 16). Hopefully 6468 * this should allow us to catchup. 6469 */ 6470 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", 6471 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); 6472 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 6473 tmp = RREG32(IH_RB_CNTL); 6474 tmp |= IH_WPTR_OVERFLOW_CLEAR; 6475 WREG32(IH_RB_CNTL, tmp); 6476 } 6477 return (wptr & rdev->ih.ptr_mask); 6478 } 6479 6480 /* CIK IV Ring 6481 * Each IV ring entry is 128 bits: 6482 * [7:0] - interrupt source id 6483 * [31:8] - reserved 6484 * [59:32] - interrupt source data 6485 * [63:60] - reserved 6486 * [71:64] - RINGID 6487 * CP: 6488 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 6489 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 6490 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 6491 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 6492 * PIPE_ID - ME0 0=3D 6493 * - ME1&2 compute dispatcher (4 pipes each) 6494 * SDMA: 6495 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 6496 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 6497 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 6498 * [79:72] - VMID 6499 * [95:80] - PASID 6500 * [127:96] - reserved 6501 */ 6502 /** 6503 * cik_irq_process - interrupt handler 6504 * 6505 * @rdev: radeon_device pointer 6506 * 6507 * Interrupt hander (CIK). Walk the IH ring, 6508 * ack interrupts and schedule work to handle 6509 * interrupt events. 6510 * Returns irq process return code. 6511 */ 6512 int cik_irq_process(struct radeon_device *rdev) 6513 { 6514 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6515 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6516 u32 wptr; 6517 u32 rptr; 6518 u32 src_id, src_data, ring_id; 6519 u8 me_id, pipe_id, queue_id; 6520 u32 ring_index; 6521 bool queue_hotplug = false; 6522 bool queue_reset = false; 6523 u32 addr, status, mc_client; 6524 bool queue_thermal = false; 6525 6526 if (!rdev->ih.enabled || rdev->shutdown) 6527 return IRQ_NONE; 6528 6529 wptr = cik_get_ih_wptr(rdev); 6530 6531 restart_ih: 6532 /* is somebody else already processing irqs? */ 6533 if (atomic_xchg(&rdev->ih.lock, 1)) 6534 return IRQ_NONE; 6535 6536 rptr = rdev->ih.rptr; 6537 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 6538 6539 /* Order reading of wptr vs. reading of IH ring data */ 6540 rmb(); 6541 6542 /* display interrupts */ 6543 cik_irq_ack(rdev); 6544 6545 while (rptr != wptr) { 6546 /* wptr/rptr are in bytes! */ 6547 ring_index = rptr / 4; 6548 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 6549 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 6550 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 6551 6552 switch (src_id) { 6553 case 1: /* D1 vblank/vline */ 6554 switch (src_data) { 6555 case 0: /* D1 vblank */ 6556 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { 6557 if (rdev->irq.crtc_vblank_int[0]) { 6558 drm_handle_vblank(rdev->ddev, 0); 6559 rdev->pm.vblank_sync = true; 6560 wake_up(&rdev->irq.vblank_queue); 6561 } 6562 if (atomic_read(&rdev->irq.pflip[0])) 6563 radeon_crtc_handle_flip(rdev, 0); 6564 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 6565 DRM_DEBUG("IH: D1 vblank\n"); 6566 } 6567 break; 6568 case 1: /* D1 vline */ 6569 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { 6570 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 6571 DRM_DEBUG("IH: D1 vline\n"); 6572 } 6573 break; 6574 default: 6575 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6576 break; 6577 } 6578 break; 6579 case 2: /* D2 vblank/vline */ 6580 switch (src_data) { 6581 case 0: /* D2 vblank */ 6582 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { 6583 if (rdev->irq.crtc_vblank_int[1]) { 6584 drm_handle_vblank(rdev->ddev, 1); 6585 rdev->pm.vblank_sync = true; 6586 wake_up(&rdev->irq.vblank_queue); 6587 } 6588 if (atomic_read(&rdev->irq.pflip[1])) 6589 radeon_crtc_handle_flip(rdev, 1); 6590 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 6591 DRM_DEBUG("IH: D2 vblank\n"); 6592 } 6593 break; 6594 case 1: /* D2 vline */ 6595 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { 6596 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 6597 DRM_DEBUG("IH: D2 vline\n"); 6598 } 6599 break; 6600 default: 6601 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6602 break; 6603 } 6604 break; 6605 case 3: /* D3 vblank/vline */ 6606 switch (src_data) { 6607 case 0: /* D3 vblank */ 6608 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { 6609 if (rdev->irq.crtc_vblank_int[2]) { 6610 drm_handle_vblank(rdev->ddev, 2); 6611 rdev->pm.vblank_sync = true; 6612 wake_up(&rdev->irq.vblank_queue); 6613 } 6614 if (atomic_read(&rdev->irq.pflip[2])) 6615 radeon_crtc_handle_flip(rdev, 2); 6616 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 6617 DRM_DEBUG("IH: D3 vblank\n"); 6618 } 6619 break; 6620 case 1: /* D3 vline */ 6621 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { 6622 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 6623 DRM_DEBUG("IH: D3 vline\n"); 6624 } 6625 break; 6626 default: 6627 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6628 break; 6629 } 6630 break; 6631 case 4: /* D4 vblank/vline */ 6632 switch (src_data) { 6633 case 0: /* D4 vblank */ 6634 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { 6635 if (rdev->irq.crtc_vblank_int[3]) { 6636 drm_handle_vblank(rdev->ddev, 3); 6637 rdev->pm.vblank_sync = true; 6638 wake_up(&rdev->irq.vblank_queue); 6639 } 6640 if (atomic_read(&rdev->irq.pflip[3])) 6641 radeon_crtc_handle_flip(rdev, 3); 6642 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 6643 DRM_DEBUG("IH: D4 vblank\n"); 6644 } 6645 break; 6646 case 1: /* D4 vline */ 6647 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { 6648 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 6649 DRM_DEBUG("IH: D4 vline\n"); 6650 } 6651 break; 6652 default: 6653 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6654 break; 6655 } 6656 break; 6657 case 5: /* D5 vblank/vline */ 6658 switch (src_data) { 6659 case 0: /* D5 vblank */ 6660 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { 6661 if (rdev->irq.crtc_vblank_int[4]) { 6662 drm_handle_vblank(rdev->ddev, 4); 6663 rdev->pm.vblank_sync = true; 6664 wake_up(&rdev->irq.vblank_queue); 6665 } 6666 if (atomic_read(&rdev->irq.pflip[4])) 6667 radeon_crtc_handle_flip(rdev, 4); 6668 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 6669 DRM_DEBUG("IH: D5 vblank\n"); 6670 } 6671 break; 6672 case 1: /* D5 vline */ 6673 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { 6674 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 6675 DRM_DEBUG("IH: D5 vline\n"); 6676 } 6677 break; 6678 default: 6679 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6680 break; 6681 } 6682 break; 6683 case 6: /* D6 vblank/vline */ 6684 switch (src_data) { 6685 case 0: /* D6 vblank */ 6686 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { 6687 if (rdev->irq.crtc_vblank_int[5]) { 6688 drm_handle_vblank(rdev->ddev, 5); 6689 rdev->pm.vblank_sync = true; 6690 wake_up(&rdev->irq.vblank_queue); 6691 } 6692 if (atomic_read(&rdev->irq.pflip[5])) 6693 radeon_crtc_handle_flip(rdev, 5); 6694 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 6695 DRM_DEBUG("IH: D6 vblank\n"); 6696 } 6697 break; 6698 case 1: /* D6 vline */ 6699 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { 6700 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 6701 DRM_DEBUG("IH: D6 vline\n"); 6702 } 6703 break; 6704 default: 6705 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6706 break; 6707 } 6708 break; 6709 case 42: /* HPD hotplug */ 6710 switch (src_data) { 6711 case 0: 6712 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6713 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 6714 queue_hotplug = true; 6715 DRM_DEBUG("IH: HPD1\n"); 6716 } 6717 break; 6718 case 1: 6719 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6720 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 6721 queue_hotplug = true; 6722 DRM_DEBUG("IH: HPD2\n"); 6723 } 6724 break; 6725 case 2: 6726 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6727 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 6728 queue_hotplug = true; 6729 DRM_DEBUG("IH: HPD3\n"); 6730 } 6731 break; 6732 case 3: 6733 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6734 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 6735 queue_hotplug = true; 6736 DRM_DEBUG("IH: HPD4\n"); 6737 } 6738 break; 6739 case 4: 6740 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6741 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 6742 queue_hotplug = true; 6743 DRM_DEBUG("IH: HPD5\n"); 6744 } 6745 break; 6746 case 5: 6747 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6748 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 6749 queue_hotplug = true; 6750 DRM_DEBUG("IH: HPD6\n"); 6751 } 6752 break; 6753 default: 6754 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6755 break; 6756 } 6757 break; 6758 case 124: /* UVD */ 6759 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); 6760 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); 6761 break; 6762 case 146: 6763 case 147: 6764 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 6765 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 6766 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 6767 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 6768 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 6769 addr); 6770 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 6771 status); 6772 cik_vm_decode_fault(rdev, status, addr, mc_client); 6773 /* reset addr and status */ 6774 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 6775 break; 6776 case 176: /* GFX RB CP_INT */ 6777 case 177: /* GFX IB CP_INT */ 6778 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6779 break; 6780 case 181: /* CP EOP event */ 6781 DRM_DEBUG("IH: CP EOP\n"); 6782 /* XXX check the bitfield order! */ 6783 me_id = (ring_id & 0x60) >> 5; 6784 pipe_id = (ring_id & 0x18) >> 3; 6785 queue_id = (ring_id & 0x7) >> 0; 6786 switch (me_id) { 6787 case 0: 6788 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6789 break; 6790 case 1: 6791 case 2: 6792 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 6793 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 6794 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 6795 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 6796 break; 6797 } 6798 break; 6799 case 184: /* CP Privileged reg access */ 6800 DRM_ERROR("Illegal register access in command stream\n"); 6801 /* XXX check the bitfield order! */ 6802 me_id = (ring_id & 0x60) >> 5; 6803 pipe_id = (ring_id & 0x18) >> 3; 6804 queue_id = (ring_id & 0x7) >> 0; 6805 switch (me_id) { 6806 case 0: 6807 /* This results in a full GPU reset, but all we need to do is soft 6808 * reset the CP for gfx 6809 */ 6810 queue_reset = true; 6811 break; 6812 case 1: 6813 /* XXX compute */ 6814 queue_reset = true; 6815 break; 6816 case 2: 6817 /* XXX compute */ 6818 queue_reset = true; 6819 break; 6820 } 6821 break; 6822 case 185: /* CP Privileged inst */ 6823 DRM_ERROR("Illegal instruction in command stream\n"); 6824 /* XXX check the bitfield order! */ 6825 me_id = (ring_id & 0x60) >> 5; 6826 pipe_id = (ring_id & 0x18) >> 3; 6827 queue_id = (ring_id & 0x7) >> 0; 6828 switch (me_id) { 6829 case 0: 6830 /* This results in a full GPU reset, but all we need to do is soft 6831 * reset the CP for gfx 6832 */ 6833 queue_reset = true; 6834 break; 6835 case 1: 6836 /* XXX compute */ 6837 queue_reset = true; 6838 break; 6839 case 2: 6840 /* XXX compute */ 6841 queue_reset = true; 6842 break; 6843 } 6844 break; 6845 case 224: /* SDMA trap event */ 6846 /* XXX check the bitfield order! */ 6847 me_id = (ring_id & 0x3) >> 0; 6848 queue_id = (ring_id & 0xc) >> 2; 6849 DRM_DEBUG("IH: SDMA trap\n"); 6850 switch (me_id) { 6851 case 0: 6852 switch (queue_id) { 6853 case 0: 6854 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 6855 break; 6856 case 1: 6857 /* XXX compute */ 6858 break; 6859 case 2: 6860 /* XXX compute */ 6861 break; 6862 } 6863 break; 6864 case 1: 6865 switch (queue_id) { 6866 case 0: 6867 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 6868 break; 6869 case 1: 6870 /* XXX compute */ 6871 break; 6872 case 2: 6873 /* XXX compute */ 6874 break; 6875 } 6876 break; 6877 } 6878 break; 6879 case 230: /* thermal low to high */ 6880 DRM_DEBUG("IH: thermal low to high\n"); 6881 rdev->pm.dpm.thermal.high_to_low = false; 6882 queue_thermal = true; 6883 break; 6884 case 231: /* thermal high to low */ 6885 DRM_DEBUG("IH: thermal high to low\n"); 6886 rdev->pm.dpm.thermal.high_to_low = true; 6887 queue_thermal = true; 6888 break; 6889 case 233: /* GUI IDLE */ 6890 DRM_DEBUG("IH: GUI idle\n"); 6891 break; 6892 case 241: /* SDMA Privileged inst */ 6893 case 247: /* SDMA Privileged inst */ 6894 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 6895 /* XXX check the bitfield order! */ 6896 me_id = (ring_id & 0x3) >> 0; 6897 queue_id = (ring_id & 0xc) >> 2; 6898 switch (me_id) { 6899 case 0: 6900 switch (queue_id) { 6901 case 0: 6902 queue_reset = true; 6903 break; 6904 case 1: 6905 /* XXX compute */ 6906 queue_reset = true; 6907 break; 6908 case 2: 6909 /* XXX compute */ 6910 queue_reset = true; 6911 break; 6912 } 6913 break; 6914 case 1: 6915 switch (queue_id) { 6916 case 0: 6917 queue_reset = true; 6918 break; 6919 case 1: 6920 /* XXX compute */ 6921 queue_reset = true; 6922 break; 6923 case 2: 6924 /* XXX compute */ 6925 queue_reset = true; 6926 break; 6927 } 6928 break; 6929 } 6930 break; 6931 default: 6932 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6933 break; 6934 } 6935 6936 /* wptr/rptr are in bytes! */ 6937 rptr += 16; 6938 rptr &= rdev->ih.ptr_mask; 6939 } 6940 if (queue_hotplug) 6941 schedule_work(&rdev->hotplug_work); 6942 if (queue_reset) 6943 schedule_work(&rdev->reset_work); 6944 if (queue_thermal) 6945 schedule_work(&rdev->pm.dpm.thermal.work); 6946 rdev->ih.rptr = rptr; 6947 WREG32(IH_RB_RPTR, rdev->ih.rptr); 6948 atomic_set(&rdev->ih.lock, 0); 6949 6950 /* make sure wptr hasn't changed while processing */ 6951 wptr = cik_get_ih_wptr(rdev); 6952 if (wptr != rptr) 6953 goto restart_ih; 6954 6955 return IRQ_HANDLED; 6956 } 6957 6958 /* 6959 * startup/shutdown callbacks 6960 */ 6961 /** 6962 * cik_startup - program the asic to a functional state 6963 * 6964 * @rdev: radeon_device pointer 6965 * 6966 * Programs the asic to a functional state (CIK). 6967 * Called by cik_init() and cik_resume(). 6968 * Returns 0 for success, error for failure. 6969 */ 6970 static int cik_startup(struct radeon_device *rdev) 6971 { 6972 struct radeon_ring *ring; 6973 int r; 6974 6975 /* enable pcie gen2/3 link */ 6976 cik_pcie_gen3_enable(rdev); 6977 /* enable aspm */ 6978 cik_program_aspm(rdev); 6979 6980 /* scratch needs to be initialized before MC */ 6981 r = r600_vram_scratch_init(rdev); 6982 if (r) 6983 return r; 6984 6985 cik_mc_program(rdev); 6986 6987 if (rdev->flags & RADEON_IS_IGP) { 6988 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 6989 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 6990 r = cik_init_microcode(rdev); 6991 if (r) { 6992 DRM_ERROR("Failed to load firmware!\n"); 6993 return r; 6994 } 6995 } 6996 } else { 6997 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 6998 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 6999 !rdev->mc_fw) { 7000 r = cik_init_microcode(rdev); 7001 if (r) { 7002 DRM_ERROR("Failed to load firmware!\n"); 7003 return r; 7004 } 7005 } 7006 7007 r = ci_mc_load_microcode(rdev); 7008 if (r) { 7009 DRM_ERROR("Failed to load MC firmware!\n"); 7010 return r; 7011 } 7012 } 7013 7014 r = cik_pcie_gart_enable(rdev); 7015 if (r) 7016 return r; 7017 cik_gpu_init(rdev); 7018 7019 /* allocate rlc buffers */ 7020 if (rdev->flags & RADEON_IS_IGP) { 7021 if (rdev->family == CHIP_KAVERI) { 7022 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; 7023 rdev->rlc.reg_list_size = 7024 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); 7025 } else { 7026 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; 7027 rdev->rlc.reg_list_size = 7028 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); 7029 } 7030 } 7031 rdev->rlc.cs_data = ci_cs_data; 7032 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; 7033 r = sumo_rlc_init(rdev); 7034 if (r) { 7035 DRM_ERROR("Failed to init rlc BOs!\n"); 7036 return r; 7037 } 7038 7039 /* allocate wb buffer */ 7040 r = radeon_wb_init(rdev); 7041 if (r) 7042 return r; 7043 7044 /* allocate mec buffers */ 7045 r = cik_mec_init(rdev); 7046 if (r) { 7047 DRM_ERROR("Failed to init MEC BOs!\n"); 7048 return r; 7049 } 7050 7051 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 7052 if (r) { 7053 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7054 return r; 7055 } 7056 7057 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 7058 if (r) { 7059 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7060 return r; 7061 } 7062 7063 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 7064 if (r) { 7065 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7066 return r; 7067 } 7068 7069 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 7070 if (r) { 7071 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7072 return r; 7073 } 7074 7075 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 7076 if (r) { 7077 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7078 return r; 7079 } 7080 7081 r = radeon_uvd_resume(rdev); 7082 if (!r) { 7083 r = uvd_v4_2_resume(rdev); 7084 if (!r) { 7085 r = radeon_fence_driver_start_ring(rdev, 7086 R600_RING_TYPE_UVD_INDEX); 7087 if (r) 7088 dev_err(rdev->dev, "UVD fences init error (%d).\n", r); 7089 } 7090 } 7091 if (r) 7092 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 7093 7094 /* Enable IRQ */ 7095 if (!rdev->irq.installed) { 7096 r = radeon_irq_kms_init(rdev); 7097 if (r) 7098 return r; 7099 } 7100 7101 r = cik_irq_init(rdev); 7102 if (r) { 7103 DRM_ERROR("radeon: IH init failed (%d).\n", r); 7104 radeon_irq_kms_fini(rdev); 7105 return r; 7106 } 7107 cik_irq_set(rdev); 7108 7109 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7110 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 7111 CP_RB0_RPTR, CP_RB0_WPTR, 7112 RADEON_CP_PACKET2); 7113 if (r) 7114 return r; 7115 7116 /* set up the compute queues */ 7117 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7118 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7119 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 7120 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7121 PACKET3(PACKET3_NOP, 0x3FFF)); 7122 if (r) 7123 return r; 7124 ring->me = 1; /* first MEC */ 7125 ring->pipe = 0; /* first pipe */ 7126 ring->queue = 0; /* first queue */ 7127 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 7128 7129 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7130 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7131 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 7132 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7133 PACKET3(PACKET3_NOP, 0x3FFF)); 7134 if (r) 7135 return r; 7136 /* dGPU only have 1 MEC */ 7137 ring->me = 1; /* first MEC */ 7138 ring->pipe = 0; /* first pipe */ 7139 ring->queue = 1; /* second queue */ 7140 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 7141 7142 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7143 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 7144 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, 7145 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, 7146 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7147 if (r) 7148 return r; 7149 7150 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7151 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 7152 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, 7153 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, 7154 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7155 if (r) 7156 return r; 7157 7158 r = cik_cp_resume(rdev); 7159 if (r) 7160 return r; 7161 7162 r = cik_sdma_resume(rdev); 7163 if (r) 7164 return r; 7165 7166 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7167 if (ring->ring_size) { 7168 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 7169 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 7170 RADEON_CP_PACKET2); 7171 if (!r) 7172 r = uvd_v1_0_init(rdev); 7173 if (r) 7174 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 7175 } 7176 7177 r = radeon_ib_pool_init(rdev); 7178 if (r) { 7179 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 7180 return r; 7181 } 7182 7183 r = radeon_vm_manager_init(rdev); 7184 if (r) { 7185 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 7186 return r; 7187 } 7188 7189 r = dce6_audio_init(rdev); 7190 if (r) 7191 return r; 7192 7193 return 0; 7194 } 7195 7196 /** 7197 * cik_resume - resume the asic to a functional state 7198 * 7199 * @rdev: radeon_device pointer 7200 * 7201 * Programs the asic to a functional state (CIK). 7202 * Called at resume. 7203 * Returns 0 for success, error for failure. 7204 */ 7205 int cik_resume(struct radeon_device *rdev) 7206 { 7207 int r; 7208 7209 /* post card */ 7210 atom_asic_init(rdev->mode_info.atom_context); 7211 7212 /* init golden registers */ 7213 cik_init_golden_registers(rdev); 7214 7215 rdev->accel_working = true; 7216 r = cik_startup(rdev); 7217 if (r) { 7218 DRM_ERROR("cik startup failed on resume\n"); 7219 rdev->accel_working = false; 7220 return r; 7221 } 7222 7223 return r; 7224 7225 } 7226 7227 /** 7228 * cik_suspend - suspend the asic 7229 * 7230 * @rdev: radeon_device pointer 7231 * 7232 * Bring the chip into a state suitable for suspend (CIK). 7233 * Called at suspend. 7234 * Returns 0 for success. 7235 */ 7236 int cik_suspend(struct radeon_device *rdev) 7237 { 7238 dce6_audio_fini(rdev); 7239 radeon_vm_manager_fini(rdev); 7240 cik_cp_enable(rdev, false); 7241 cik_sdma_enable(rdev, false); 7242 uvd_v1_0_fini(rdev); 7243 radeon_uvd_suspend(rdev); 7244 cik_fini_pg(rdev); 7245 cik_fini_cg(rdev); 7246 cik_irq_suspend(rdev); 7247 radeon_wb_disable(rdev); 7248 cik_pcie_gart_disable(rdev); 7249 return 0; 7250 } 7251 7252 /* Plan is to move initialization in that function and use 7253 * helper function so that radeon_device_init pretty much 7254 * do nothing more than calling asic specific function. This 7255 * should also allow to remove a bunch of callback function 7256 * like vram_info. 7257 */ 7258 /** 7259 * cik_init - asic specific driver and hw init 7260 * 7261 * @rdev: radeon_device pointer 7262 * 7263 * Setup asic specific driver variables and program the hw 7264 * to a functional state (CIK). 7265 * Called at driver startup. 7266 * Returns 0 for success, errors for failure. 7267 */ 7268 int cik_init(struct radeon_device *rdev) 7269 { 7270 struct radeon_ring *ring; 7271 int r; 7272 7273 /* Read BIOS */ 7274 if (!radeon_get_bios(rdev)) { 7275 if (ASIC_IS_AVIVO(rdev)) 7276 return -EINVAL; 7277 } 7278 /* Must be an ATOMBIOS */ 7279 if (!rdev->is_atom_bios) { 7280 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 7281 return -EINVAL; 7282 } 7283 r = radeon_atombios_init(rdev); 7284 if (r) 7285 return r; 7286 7287 /* Post card if necessary */ 7288 if (!radeon_card_posted(rdev)) { 7289 if (!rdev->bios) { 7290 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 7291 return -EINVAL; 7292 } 7293 DRM_INFO("GPU not posted. posting now...\n"); 7294 atom_asic_init(rdev->mode_info.atom_context); 7295 } 7296 /* init golden registers */ 7297 cik_init_golden_registers(rdev); 7298 /* Initialize scratch registers */ 7299 cik_scratch_init(rdev); 7300 /* Initialize surface registers */ 7301 radeon_surface_init(rdev); 7302 /* Initialize clocks */ 7303 radeon_get_clock_info(rdev->ddev); 7304 7305 /* Fence driver */ 7306 r = radeon_fence_driver_init(rdev); 7307 if (r) 7308 return r; 7309 7310 /* initialize memory controller */ 7311 r = cik_mc_init(rdev); 7312 if (r) 7313 return r; 7314 /* Memory manager */ 7315 r = radeon_bo_init(rdev); 7316 if (r) 7317 return r; 7318 7319 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7320 ring->ring_obj = NULL; 7321 r600_ring_init(rdev, ring, 1024 * 1024); 7322 7323 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7324 ring->ring_obj = NULL; 7325 r600_ring_init(rdev, ring, 1024 * 1024); 7326 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7327 if (r) 7328 return r; 7329 7330 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7331 ring->ring_obj = NULL; 7332 r600_ring_init(rdev, ring, 1024 * 1024); 7333 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7334 if (r) 7335 return r; 7336 7337 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7338 ring->ring_obj = NULL; 7339 r600_ring_init(rdev, ring, 256 * 1024); 7340 7341 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7342 ring->ring_obj = NULL; 7343 r600_ring_init(rdev, ring, 256 * 1024); 7344 7345 r = radeon_uvd_init(rdev); 7346 if (!r) { 7347 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7348 ring->ring_obj = NULL; 7349 r600_ring_init(rdev, ring, 4096); 7350 } 7351 7352 rdev->ih.ring_obj = NULL; 7353 r600_ih_ring_init(rdev, 64 * 1024); 7354 7355 r = r600_pcie_gart_init(rdev); 7356 if (r) 7357 return r; 7358 7359 rdev->accel_working = true; 7360 r = cik_startup(rdev); 7361 if (r) { 7362 dev_err(rdev->dev, "disabling GPU acceleration\n"); 7363 cik_cp_fini(rdev); 7364 cik_sdma_fini(rdev); 7365 cik_irq_fini(rdev); 7366 sumo_rlc_fini(rdev); 7367 cik_mec_fini(rdev); 7368 radeon_wb_fini(rdev); 7369 radeon_ib_pool_fini(rdev); 7370 radeon_vm_manager_fini(rdev); 7371 radeon_irq_kms_fini(rdev); 7372 cik_pcie_gart_fini(rdev); 7373 rdev->accel_working = false; 7374 } 7375 7376 /* Don't start up if the MC ucode is missing. 7377 * The default clocks and voltages before the MC ucode 7378 * is loaded are not suffient for advanced operations. 7379 */ 7380 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 7381 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 7382 return -EINVAL; 7383 } 7384 7385 return 0; 7386 } 7387 7388 /** 7389 * cik_fini - asic specific driver and hw fini 7390 * 7391 * @rdev: radeon_device pointer 7392 * 7393 * Tear down the asic specific driver variables and program the hw 7394 * to an idle state (CIK). 7395 * Called at driver unload. 7396 */ 7397 void cik_fini(struct radeon_device *rdev) 7398 { 7399 cik_cp_fini(rdev); 7400 cik_sdma_fini(rdev); 7401 cik_fini_pg(rdev); 7402 cik_fini_cg(rdev); 7403 cik_irq_fini(rdev); 7404 sumo_rlc_fini(rdev); 7405 cik_mec_fini(rdev); 7406 radeon_wb_fini(rdev); 7407 radeon_vm_manager_fini(rdev); 7408 radeon_ib_pool_fini(rdev); 7409 radeon_irq_kms_fini(rdev); 7410 uvd_v1_0_fini(rdev); 7411 radeon_uvd_fini(rdev); 7412 cik_pcie_gart_fini(rdev); 7413 r600_vram_scratch_fini(rdev); 7414 radeon_gem_fini(rdev); 7415 radeon_fence_driver_fini(rdev); 7416 radeon_bo_fini(rdev); 7417 radeon_atombios_fini(rdev); 7418 kfree(rdev->bios); 7419 rdev->bios = NULL; 7420 } 7421 7422 /* display watermark setup */ 7423 /** 7424 * dce8_line_buffer_adjust - Set up the line buffer 7425 * 7426 * @rdev: radeon_device pointer 7427 * @radeon_crtc: the selected display controller 7428 * @mode: the current display mode on the selected display 7429 * controller 7430 * 7431 * Setup up the line buffer allocation for 7432 * the selected display controller (CIK). 7433 * Returns the line buffer size in pixels. 7434 */ 7435 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 7436 struct radeon_crtc *radeon_crtc, 7437 struct drm_display_mode *mode) 7438 { 7439 u32 tmp, buffer_alloc, i; 7440 u32 pipe_offset = radeon_crtc->crtc_id * 0x20; 7441 /* 7442 * Line Buffer Setup 7443 * There are 6 line buffers, one for each display controllers. 7444 * There are 3 partitions per LB. Select the number of partitions 7445 * to enable based on the display width. For display widths larger 7446 * than 4096, you need use to use 2 display controllers and combine 7447 * them using the stereo blender. 7448 */ 7449 if (radeon_crtc->base.enabled && mode) { 7450 if (mode->crtc_hdisplay < 1920) { 7451 tmp = 1; 7452 buffer_alloc = 2; 7453 } else if (mode->crtc_hdisplay < 2560) { 7454 tmp = 2; 7455 buffer_alloc = 2; 7456 } else if (mode->crtc_hdisplay < 4096) { 7457 tmp = 0; 7458 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7459 } else { 7460 DRM_DEBUG_KMS("Mode too big for LB!\n"); 7461 tmp = 0; 7462 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7463 } 7464 } else { 7465 tmp = 1; 7466 buffer_alloc = 0; 7467 } 7468 7469 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 7470 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 7471 7472 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, 7473 DMIF_BUFFERS_ALLOCATED(buffer_alloc)); 7474 for (i = 0; i < rdev->usec_timeout; i++) { 7475 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & 7476 DMIF_BUFFERS_ALLOCATED_COMPLETED) 7477 break; 7478 udelay(1); 7479 } 7480 7481 if (radeon_crtc->base.enabled && mode) { 7482 switch (tmp) { 7483 case 0: 7484 default: 7485 return 4096 * 2; 7486 case 1: 7487 return 1920 * 2; 7488 case 2: 7489 return 2560 * 2; 7490 } 7491 } 7492 7493 /* controller not enabled, so no lb used */ 7494 return 0; 7495 } 7496 7497 /** 7498 * cik_get_number_of_dram_channels - get the number of dram channels 7499 * 7500 * @rdev: radeon_device pointer 7501 * 7502 * Look up the number of video ram channels (CIK). 7503 * Used for display watermark bandwidth calculations 7504 * Returns the number of dram channels 7505 */ 7506 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 7507 { 7508 u32 tmp = RREG32(MC_SHARED_CHMAP); 7509 7510 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 7511 case 0: 7512 default: 7513 return 1; 7514 case 1: 7515 return 2; 7516 case 2: 7517 return 4; 7518 case 3: 7519 return 8; 7520 case 4: 7521 return 3; 7522 case 5: 7523 return 6; 7524 case 6: 7525 return 10; 7526 case 7: 7527 return 12; 7528 case 8: 7529 return 16; 7530 } 7531 } 7532 7533 struct dce8_wm_params { 7534 u32 dram_channels; /* number of dram channels */ 7535 u32 yclk; /* bandwidth per dram data pin in kHz */ 7536 u32 sclk; /* engine clock in kHz */ 7537 u32 disp_clk; /* display clock in kHz */ 7538 u32 src_width; /* viewport width */ 7539 u32 active_time; /* active display time in ns */ 7540 u32 blank_time; /* blank time in ns */ 7541 bool interlaced; /* mode is interlaced */ 7542 fixed20_12 vsc; /* vertical scale ratio */ 7543 u32 num_heads; /* number of active crtcs */ 7544 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 7545 u32 lb_size; /* line buffer allocated to pipe */ 7546 u32 vtaps; /* vertical scaler taps */ 7547 }; 7548 7549 /** 7550 * dce8_dram_bandwidth - get the dram bandwidth 7551 * 7552 * @wm: watermark calculation data 7553 * 7554 * Calculate the raw dram bandwidth (CIK). 7555 * Used for display watermark bandwidth calculations 7556 * Returns the dram bandwidth in MBytes/s 7557 */ 7558 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 7559 { 7560 /* Calculate raw DRAM Bandwidth */ 7561 fixed20_12 dram_efficiency; /* 0.7 */ 7562 fixed20_12 yclk, dram_channels, bandwidth; 7563 fixed20_12 a; 7564 7565 a.full = dfixed_const(1000); 7566 yclk.full = dfixed_const(wm->yclk); 7567 yclk.full = dfixed_div(yclk, a); 7568 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7569 a.full = dfixed_const(10); 7570 dram_efficiency.full = dfixed_const(7); 7571 dram_efficiency.full = dfixed_div(dram_efficiency, a); 7572 bandwidth.full = dfixed_mul(dram_channels, yclk); 7573 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 7574 7575 return dfixed_trunc(bandwidth); 7576 } 7577 7578 /** 7579 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 7580 * 7581 * @wm: watermark calculation data 7582 * 7583 * Calculate the dram bandwidth used for display (CIK). 7584 * Used for display watermark bandwidth calculations 7585 * Returns the dram bandwidth for display in MBytes/s 7586 */ 7587 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7588 { 7589 /* Calculate DRAM Bandwidth and the part allocated to display. */ 7590 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 7591 fixed20_12 yclk, dram_channels, bandwidth; 7592 fixed20_12 a; 7593 7594 a.full = dfixed_const(1000); 7595 yclk.full = dfixed_const(wm->yclk); 7596 yclk.full = dfixed_div(yclk, a); 7597 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7598 a.full = dfixed_const(10); 7599 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 7600 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 7601 bandwidth.full = dfixed_mul(dram_channels, yclk); 7602 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 7603 7604 return dfixed_trunc(bandwidth); 7605 } 7606 7607 /** 7608 * dce8_data_return_bandwidth - get the data return bandwidth 7609 * 7610 * @wm: watermark calculation data 7611 * 7612 * Calculate the data return bandwidth used for display (CIK). 7613 * Used for display watermark bandwidth calculations 7614 * Returns the data return bandwidth in MBytes/s 7615 */ 7616 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 7617 { 7618 /* Calculate the display Data return Bandwidth */ 7619 fixed20_12 return_efficiency; /* 0.8 */ 7620 fixed20_12 sclk, bandwidth; 7621 fixed20_12 a; 7622 7623 a.full = dfixed_const(1000); 7624 sclk.full = dfixed_const(wm->sclk); 7625 sclk.full = dfixed_div(sclk, a); 7626 a.full = dfixed_const(10); 7627 return_efficiency.full = dfixed_const(8); 7628 return_efficiency.full = dfixed_div(return_efficiency, a); 7629 a.full = dfixed_const(32); 7630 bandwidth.full = dfixed_mul(a, sclk); 7631 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 7632 7633 return dfixed_trunc(bandwidth); 7634 } 7635 7636 /** 7637 * dce8_dmif_request_bandwidth - get the dmif bandwidth 7638 * 7639 * @wm: watermark calculation data 7640 * 7641 * Calculate the dmif bandwidth used for display (CIK). 7642 * Used for display watermark bandwidth calculations 7643 * Returns the dmif bandwidth in MBytes/s 7644 */ 7645 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 7646 { 7647 /* Calculate the DMIF Request Bandwidth */ 7648 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 7649 fixed20_12 disp_clk, bandwidth; 7650 fixed20_12 a, b; 7651 7652 a.full = dfixed_const(1000); 7653 disp_clk.full = dfixed_const(wm->disp_clk); 7654 disp_clk.full = dfixed_div(disp_clk, a); 7655 a.full = dfixed_const(32); 7656 b.full = dfixed_mul(a, disp_clk); 7657 7658 a.full = dfixed_const(10); 7659 disp_clk_request_efficiency.full = dfixed_const(8); 7660 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 7661 7662 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 7663 7664 return dfixed_trunc(bandwidth); 7665 } 7666 7667 /** 7668 * dce8_available_bandwidth - get the min available bandwidth 7669 * 7670 * @wm: watermark calculation data 7671 * 7672 * Calculate the min available bandwidth used for display (CIK). 7673 * Used for display watermark bandwidth calculations 7674 * Returns the min available bandwidth in MBytes/s 7675 */ 7676 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 7677 { 7678 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 7679 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 7680 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 7681 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 7682 7683 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 7684 } 7685 7686 /** 7687 * dce8_average_bandwidth - get the average available bandwidth 7688 * 7689 * @wm: watermark calculation data 7690 * 7691 * Calculate the average available bandwidth used for display (CIK). 7692 * Used for display watermark bandwidth calculations 7693 * Returns the average available bandwidth in MBytes/s 7694 */ 7695 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 7696 { 7697 /* Calculate the display mode Average Bandwidth 7698 * DisplayMode should contain the source and destination dimensions, 7699 * timing, etc. 7700 */ 7701 fixed20_12 bpp; 7702 fixed20_12 line_time; 7703 fixed20_12 src_width; 7704 fixed20_12 bandwidth; 7705 fixed20_12 a; 7706 7707 a.full = dfixed_const(1000); 7708 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 7709 line_time.full = dfixed_div(line_time, a); 7710 bpp.full = dfixed_const(wm->bytes_per_pixel); 7711 src_width.full = dfixed_const(wm->src_width); 7712 bandwidth.full = dfixed_mul(src_width, bpp); 7713 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 7714 bandwidth.full = dfixed_div(bandwidth, line_time); 7715 7716 return dfixed_trunc(bandwidth); 7717 } 7718 7719 /** 7720 * dce8_latency_watermark - get the latency watermark 7721 * 7722 * @wm: watermark calculation data 7723 * 7724 * Calculate the latency watermark (CIK). 7725 * Used for display watermark bandwidth calculations 7726 * Returns the latency watermark in ns 7727 */ 7728 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 7729 { 7730 /* First calculate the latency in ns */ 7731 u32 mc_latency = 2000; /* 2000 ns. */ 7732 u32 available_bandwidth = dce8_available_bandwidth(wm); 7733 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 7734 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 7735 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 7736 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 7737 (wm->num_heads * cursor_line_pair_return_time); 7738 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 7739 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 7740 u32 tmp, dmif_size = 12288; 7741 fixed20_12 a, b, c; 7742 7743 if (wm->num_heads == 0) 7744 return 0; 7745 7746 a.full = dfixed_const(2); 7747 b.full = dfixed_const(1); 7748 if ((wm->vsc.full > a.full) || 7749 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 7750 (wm->vtaps >= 5) || 7751 ((wm->vsc.full >= a.full) && wm->interlaced)) 7752 max_src_lines_per_dst_line = 4; 7753 else 7754 max_src_lines_per_dst_line = 2; 7755 7756 a.full = dfixed_const(available_bandwidth); 7757 b.full = dfixed_const(wm->num_heads); 7758 a.full = dfixed_div(a, b); 7759 7760 b.full = dfixed_const(mc_latency + 512); 7761 c.full = dfixed_const(wm->disp_clk); 7762 b.full = dfixed_div(b, c); 7763 7764 c.full = dfixed_const(dmif_size); 7765 b.full = dfixed_div(c, b); 7766 7767 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 7768 7769 b.full = dfixed_const(1000); 7770 c.full = dfixed_const(wm->disp_clk); 7771 b.full = dfixed_div(c, b); 7772 c.full = dfixed_const(wm->bytes_per_pixel); 7773 b.full = dfixed_mul(b, c); 7774 7775 lb_fill_bw = min(tmp, dfixed_trunc(b)); 7776 7777 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 7778 b.full = dfixed_const(1000); 7779 c.full = dfixed_const(lb_fill_bw); 7780 b.full = dfixed_div(c, b); 7781 a.full = dfixed_div(a, b); 7782 line_fill_time = dfixed_trunc(a); 7783 7784 if (line_fill_time < wm->active_time) 7785 return latency; 7786 else 7787 return latency + (line_fill_time - wm->active_time); 7788 7789 } 7790 7791 /** 7792 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 7793 * average and available dram bandwidth 7794 * 7795 * @wm: watermark calculation data 7796 * 7797 * Check if the display average bandwidth fits in the display 7798 * dram bandwidth (CIK). 7799 * Used for display watermark bandwidth calculations 7800 * Returns true if the display fits, false if not. 7801 */ 7802 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7803 { 7804 if (dce8_average_bandwidth(wm) <= 7805 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 7806 return true; 7807 else 7808 return false; 7809 } 7810 7811 /** 7812 * dce8_average_bandwidth_vs_available_bandwidth - check 7813 * average and available bandwidth 7814 * 7815 * @wm: watermark calculation data 7816 * 7817 * Check if the display average bandwidth fits in the display 7818 * available bandwidth (CIK). 7819 * Used for display watermark bandwidth calculations 7820 * Returns true if the display fits, false if not. 7821 */ 7822 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 7823 { 7824 if (dce8_average_bandwidth(wm) <= 7825 (dce8_available_bandwidth(wm) / wm->num_heads)) 7826 return true; 7827 else 7828 return false; 7829 } 7830 7831 /** 7832 * dce8_check_latency_hiding - check latency hiding 7833 * 7834 * @wm: watermark calculation data 7835 * 7836 * Check latency hiding (CIK). 7837 * Used for display watermark bandwidth calculations 7838 * Returns true if the display fits, false if not. 7839 */ 7840 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 7841 { 7842 u32 lb_partitions = wm->lb_size / wm->src_width; 7843 u32 line_time = wm->active_time + wm->blank_time; 7844 u32 latency_tolerant_lines; 7845 u32 latency_hiding; 7846 fixed20_12 a; 7847 7848 a.full = dfixed_const(1); 7849 if (wm->vsc.full > a.full) 7850 latency_tolerant_lines = 1; 7851 else { 7852 if (lb_partitions <= (wm->vtaps + 1)) 7853 latency_tolerant_lines = 1; 7854 else 7855 latency_tolerant_lines = 2; 7856 } 7857 7858 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 7859 7860 if (dce8_latency_watermark(wm) <= latency_hiding) 7861 return true; 7862 else 7863 return false; 7864 } 7865 7866 /** 7867 * dce8_program_watermarks - program display watermarks 7868 * 7869 * @rdev: radeon_device pointer 7870 * @radeon_crtc: the selected display controller 7871 * @lb_size: line buffer size 7872 * @num_heads: number of display controllers in use 7873 * 7874 * Calculate and program the display watermarks for the 7875 * selected display controller (CIK). 7876 */ 7877 static void dce8_program_watermarks(struct radeon_device *rdev, 7878 struct radeon_crtc *radeon_crtc, 7879 u32 lb_size, u32 num_heads) 7880 { 7881 struct drm_display_mode *mode = &radeon_crtc->base.mode; 7882 struct dce8_wm_params wm_low, wm_high; 7883 u32 pixel_period; 7884 u32 line_time = 0; 7885 u32 latency_watermark_a = 0, latency_watermark_b = 0; 7886 u32 tmp, wm_mask; 7887 7888 if (radeon_crtc->base.enabled && num_heads && mode) { 7889 pixel_period = 1000000 / (u32)mode->clock; 7890 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 7891 7892 /* watermark for high clocks */ 7893 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7894 rdev->pm.dpm_enabled) { 7895 wm_high.yclk = 7896 radeon_dpm_get_mclk(rdev, false) * 10; 7897 wm_high.sclk = 7898 radeon_dpm_get_sclk(rdev, false) * 10; 7899 } else { 7900 wm_high.yclk = rdev->pm.current_mclk * 10; 7901 wm_high.sclk = rdev->pm.current_sclk * 10; 7902 } 7903 7904 wm_high.disp_clk = mode->clock; 7905 wm_high.src_width = mode->crtc_hdisplay; 7906 wm_high.active_time = mode->crtc_hdisplay * pixel_period; 7907 wm_high.blank_time = line_time - wm_high.active_time; 7908 wm_high.interlaced = false; 7909 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7910 wm_high.interlaced = true; 7911 wm_high.vsc = radeon_crtc->vsc; 7912 wm_high.vtaps = 1; 7913 if (radeon_crtc->rmx_type != RMX_OFF) 7914 wm_high.vtaps = 2; 7915 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7916 wm_high.lb_size = lb_size; 7917 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); 7918 wm_high.num_heads = num_heads; 7919 7920 /* set for high clocks */ 7921 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); 7922 7923 /* possibly force display priority to high */ 7924 /* should really do this at mode validation time... */ 7925 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || 7926 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || 7927 !dce8_check_latency_hiding(&wm_high) || 7928 (rdev->disp_priority == 2)) { 7929 DRM_DEBUG_KMS("force priority to high\n"); 7930 } 7931 7932 /* watermark for low clocks */ 7933 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7934 rdev->pm.dpm_enabled) { 7935 wm_low.yclk = 7936 radeon_dpm_get_mclk(rdev, true) * 10; 7937 wm_low.sclk = 7938 radeon_dpm_get_sclk(rdev, true) * 10; 7939 } else { 7940 wm_low.yclk = rdev->pm.current_mclk * 10; 7941 wm_low.sclk = rdev->pm.current_sclk * 10; 7942 } 7943 7944 wm_low.disp_clk = mode->clock; 7945 wm_low.src_width = mode->crtc_hdisplay; 7946 wm_low.active_time = mode->crtc_hdisplay * pixel_period; 7947 wm_low.blank_time = line_time - wm_low.active_time; 7948 wm_low.interlaced = false; 7949 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7950 wm_low.interlaced = true; 7951 wm_low.vsc = radeon_crtc->vsc; 7952 wm_low.vtaps = 1; 7953 if (radeon_crtc->rmx_type != RMX_OFF) 7954 wm_low.vtaps = 2; 7955 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7956 wm_low.lb_size = lb_size; 7957 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); 7958 wm_low.num_heads = num_heads; 7959 7960 /* set for low clocks */ 7961 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); 7962 7963 /* possibly force display priority to high */ 7964 /* should really do this at mode validation time... */ 7965 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || 7966 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || 7967 !dce8_check_latency_hiding(&wm_low) || 7968 (rdev->disp_priority == 2)) { 7969 DRM_DEBUG_KMS("force priority to high\n"); 7970 } 7971 } 7972 7973 /* select wm A */ 7974 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7975 tmp = wm_mask; 7976 tmp &= ~LATENCY_WATERMARK_MASK(3); 7977 tmp |= LATENCY_WATERMARK_MASK(1); 7978 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7979 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7980 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 7981 LATENCY_HIGH_WATERMARK(line_time))); 7982 /* select wm B */ 7983 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7984 tmp &= ~LATENCY_WATERMARK_MASK(3); 7985 tmp |= LATENCY_WATERMARK_MASK(2); 7986 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7987 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7988 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 7989 LATENCY_HIGH_WATERMARK(line_time))); 7990 /* restore original selection */ 7991 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 7992 7993 /* save values for DPM */ 7994 radeon_crtc->line_time = line_time; 7995 radeon_crtc->wm_high = latency_watermark_a; 7996 radeon_crtc->wm_low = latency_watermark_b; 7997 } 7998 7999 /** 8000 * dce8_bandwidth_update - program display watermarks 8001 * 8002 * @rdev: radeon_device pointer 8003 * 8004 * Calculate and program the display watermarks and line 8005 * buffer allocation (CIK). 8006 */ 8007 void dce8_bandwidth_update(struct radeon_device *rdev) 8008 { 8009 struct drm_display_mode *mode = NULL; 8010 u32 num_heads = 0, lb_size; 8011 int i; 8012 8013 radeon_update_display_priority(rdev); 8014 8015 for (i = 0; i < rdev->num_crtc; i++) { 8016 if (rdev->mode_info.crtcs[i]->base.enabled) 8017 num_heads++; 8018 } 8019 for (i = 0; i < rdev->num_crtc; i++) { 8020 mode = &rdev->mode_info.crtcs[i]->base.mode; 8021 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 8022 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 8023 } 8024 } 8025 8026 /** 8027 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 8028 * 8029 * @rdev: radeon_device pointer 8030 * 8031 * Fetches a GPU clock counter snapshot (SI). 8032 * Returns the 64 bit clock counter snapshot. 8033 */ 8034 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 8035 { 8036 uint64_t clock; 8037 8038 mutex_lock(&rdev->gpu_clock_mutex); 8039 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 8040 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 8041 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 8042 mutex_unlock(&rdev->gpu_clock_mutex); 8043 return clock; 8044 } 8045 8046 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 8047 u32 cntl_reg, u32 status_reg) 8048 { 8049 int r, i; 8050 struct atom_clock_dividers dividers; 8051 uint32_t tmp; 8052 8053 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 8054 clock, false, ÷rs); 8055 if (r) 8056 return r; 8057 8058 tmp = RREG32_SMC(cntl_reg); 8059 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 8060 tmp |= dividers.post_divider; 8061 WREG32_SMC(cntl_reg, tmp); 8062 8063 for (i = 0; i < 100; i++) { 8064 if (RREG32_SMC(status_reg) & DCLK_STATUS) 8065 break; 8066 mdelay(10); 8067 } 8068 if (i == 100) 8069 return -ETIMEDOUT; 8070 8071 return 0; 8072 } 8073 8074 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 8075 { 8076 int r = 0; 8077 8078 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 8079 if (r) 8080 return r; 8081 8082 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 8083 return r; 8084 } 8085 8086 static void cik_pcie_gen3_enable(struct radeon_device *rdev) 8087 { 8088 struct pci_dev *root = rdev->pdev->bus->self; 8089 int bridge_pos, gpu_pos; 8090 u32 speed_cntl, mask, current_data_rate; 8091 int ret, i; 8092 u16 tmp16; 8093 8094 if (radeon_pcie_gen2 == 0) 8095 return; 8096 8097 if (rdev->flags & RADEON_IS_IGP) 8098 return; 8099 8100 if (!(rdev->flags & RADEON_IS_PCIE)) 8101 return; 8102 8103 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); 8104 if (ret != 0) 8105 return; 8106 8107 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) 8108 return; 8109 8110 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8111 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> 8112 LC_CURRENT_DATA_RATE_SHIFT; 8113 if (mask & DRM_PCIE_SPEED_80) { 8114 if (current_data_rate == 2) { 8115 DRM_INFO("PCIE gen 3 link speeds already enabled\n"); 8116 return; 8117 } 8118 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); 8119 } else if (mask & DRM_PCIE_SPEED_50) { 8120 if (current_data_rate == 1) { 8121 DRM_INFO("PCIE gen 2 link speeds already enabled\n"); 8122 return; 8123 } 8124 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); 8125 } 8126 8127 bridge_pos = pci_pcie_cap(root); 8128 if (!bridge_pos) 8129 return; 8130 8131 gpu_pos = pci_pcie_cap(rdev->pdev); 8132 if (!gpu_pos) 8133 return; 8134 8135 if (mask & DRM_PCIE_SPEED_80) { 8136 /* re-try equalization if gen3 is not already enabled */ 8137 if (current_data_rate != 2) { 8138 u16 bridge_cfg, gpu_cfg; 8139 u16 bridge_cfg2, gpu_cfg2; 8140 u32 max_lw, current_lw, tmp; 8141 8142 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8143 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8144 8145 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; 8146 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8147 8148 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; 8149 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8150 8151 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8152 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; 8153 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; 8154 8155 if (current_lw < max_lw) { 8156 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8157 if (tmp & LC_RENEGOTIATION_SUPPORT) { 8158 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); 8159 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); 8160 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; 8161 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); 8162 } 8163 } 8164 8165 for (i = 0; i < 10; i++) { 8166 /* check status */ 8167 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); 8168 if (tmp16 & PCI_EXP_DEVSTA_TRPND) 8169 break; 8170 8171 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8172 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8173 8174 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); 8175 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); 8176 8177 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8178 tmp |= LC_SET_QUIESCE; 8179 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8180 8181 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8182 tmp |= LC_REDO_EQ; 8183 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8184 8185 mdelay(100); 8186 8187 /* linkctl */ 8188 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); 8189 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8190 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); 8191 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8192 8193 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); 8194 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8195 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); 8196 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8197 8198 /* linkctl2 */ 8199 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); 8200 tmp16 &= ~((1 << 4) | (7 << 9)); 8201 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); 8202 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); 8203 8204 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8205 tmp16 &= ~((1 << 4) | (7 << 9)); 8206 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); 8207 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8208 8209 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8210 tmp &= ~LC_SET_QUIESCE; 8211 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8212 } 8213 } 8214 } 8215 8216 /* set the link speed */ 8217 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; 8218 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; 8219 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8220 8221 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8222 tmp16 &= ~0xf; 8223 if (mask & DRM_PCIE_SPEED_80) 8224 tmp16 |= 3; /* gen3 */ 8225 else if (mask & DRM_PCIE_SPEED_50) 8226 tmp16 |= 2; /* gen2 */ 8227 else 8228 tmp16 |= 1; /* gen1 */ 8229 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8230 8231 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8232 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; 8233 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8234 8235 for (i = 0; i < rdev->usec_timeout; i++) { 8236 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8237 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) 8238 break; 8239 udelay(1); 8240 } 8241 } 8242 8243 static void cik_program_aspm(struct radeon_device *rdev) 8244 { 8245 u32 data, orig; 8246 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; 8247 bool disable_clkreq = false; 8248 8249 if (radeon_aspm == 0) 8250 return; 8251 8252 /* XXX double check IGPs */ 8253 if (rdev->flags & RADEON_IS_IGP) 8254 return; 8255 8256 if (!(rdev->flags & RADEON_IS_PCIE)) 8257 return; 8258 8259 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8260 data &= ~LC_XMIT_N_FTS_MASK; 8261 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; 8262 if (orig != data) 8263 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); 8264 8265 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); 8266 data |= LC_GO_TO_RECOVERY; 8267 if (orig != data) 8268 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); 8269 8270 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); 8271 data |= P_IGNORE_EDB_ERR; 8272 if (orig != data) 8273 WREG32_PCIE_PORT(PCIE_P_CNTL, data); 8274 8275 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8276 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); 8277 data |= LC_PMI_TO_L1_DIS; 8278 if (!disable_l0s) 8279 data |= LC_L0S_INACTIVITY(7); 8280 8281 if (!disable_l1) { 8282 data |= LC_L1_INACTIVITY(7); 8283 data &= ~LC_PMI_TO_L1_DIS; 8284 if (orig != data) 8285 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8286 8287 if (!disable_plloff_in_l1) { 8288 bool clk_req_support; 8289 8290 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); 8291 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8292 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8293 if (orig != data) 8294 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); 8295 8296 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); 8297 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8298 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8299 if (orig != data) 8300 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); 8301 8302 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); 8303 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8304 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8305 if (orig != data) 8306 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); 8307 8308 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); 8309 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8310 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8311 if (orig != data) 8312 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); 8313 8314 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8315 data &= ~LC_DYN_LANES_PWR_STATE_MASK; 8316 data |= LC_DYN_LANES_PWR_STATE(3); 8317 if (orig != data) 8318 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); 8319 8320 if (!disable_clkreq) { 8321 struct pci_dev *root = rdev->pdev->bus->self; 8322 u32 lnkcap; 8323 8324 clk_req_support = false; 8325 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); 8326 if (lnkcap & PCI_EXP_LNKCAP_CLKPM) 8327 clk_req_support = true; 8328 } else { 8329 clk_req_support = false; 8330 } 8331 8332 if (clk_req_support) { 8333 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); 8334 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; 8335 if (orig != data) 8336 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); 8337 8338 orig = data = RREG32_SMC(THM_CLK_CNTL); 8339 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); 8340 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); 8341 if (orig != data) 8342 WREG32_SMC(THM_CLK_CNTL, data); 8343 8344 orig = data = RREG32_SMC(MISC_CLK_CTRL); 8345 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); 8346 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); 8347 if (orig != data) 8348 WREG32_SMC(MISC_CLK_CTRL, data); 8349 8350 orig = data = RREG32_SMC(CG_CLKPIN_CNTL); 8351 data &= ~BCLK_AS_XCLK; 8352 if (orig != data) 8353 WREG32_SMC(CG_CLKPIN_CNTL, data); 8354 8355 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); 8356 data &= ~FORCE_BIF_REFCLK_EN; 8357 if (orig != data) 8358 WREG32_SMC(CG_CLKPIN_CNTL_2, data); 8359 8360 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); 8361 data &= ~MPLL_CLKOUT_SEL_MASK; 8362 data |= MPLL_CLKOUT_SEL(4); 8363 if (orig != data) 8364 WREG32_SMC(MPLL_BYPASSCLK_SEL, data); 8365 } 8366 } 8367 } else { 8368 if (orig != data) 8369 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8370 } 8371 8372 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 8373 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; 8374 if (orig != data) 8375 WREG32_PCIE_PORT(PCIE_CNTL2, data); 8376 8377 if (!disable_l0s) { 8378 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8379 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { 8380 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8381 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { 8382 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8383 data &= ~LC_L0S_INACTIVITY_MASK; 8384 if (orig != data) 8385 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8386 } 8387 } 8388 } 8389 } 8390