1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/slab.h> 26 #include <linux/module.h> 27 #include "drmP.h" 28 #include "radeon.h" 29 #include "radeon_asic.h" 30 #include "cikd.h" 31 #include "atom.h" 32 #include "cik_blit_shaders.h" 33 #include "radeon_ucode.h" 34 #include "clearstate_ci.h" 35 36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); 44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 45 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 51 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 52 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 53 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 56 57 extern int r600_ih_ring_alloc(struct radeon_device *rdev); 58 extern void r600_ih_ring_fini(struct radeon_device *rdev); 59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save); 60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save); 61 extern bool evergreen_is_display_hung(struct radeon_device *rdev); 62 extern void sumo_rlc_fini(struct radeon_device *rdev); 63 extern int sumo_rlc_init(struct radeon_device *rdev); 64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 65 extern void si_rlc_reset(struct radeon_device *rdev); 66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev); 67 extern int cik_sdma_resume(struct radeon_device *rdev); 68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); 69 extern void cik_sdma_fini(struct radeon_device *rdev); 70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev, 71 struct radeon_ib *ib, 72 uint64_t pe, 73 uint64_t addr, unsigned count, 74 uint32_t incr, uint32_t flags); 75 static void cik_rlc_stop(struct radeon_device *rdev); 76 static void cik_pcie_gen3_enable(struct radeon_device *rdev); 77 static void cik_program_aspm(struct radeon_device *rdev); 78 static void cik_init_pg(struct radeon_device *rdev); 79 static void cik_init_cg(struct radeon_device *rdev); 80 static void cik_fini_pg(struct radeon_device *rdev); 81 static void cik_fini_cg(struct radeon_device *rdev); 82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 83 bool enable); 84 85 /* get temperature in millidegrees */ 86 int ci_get_temp(struct radeon_device *rdev) 87 { 88 u32 temp; 89 int actual_temp = 0; 90 91 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> 92 CTF_TEMP_SHIFT; 93 94 if (temp & 0x200) 95 actual_temp = 255; 96 else 97 actual_temp = temp & 0x1ff; 98 99 actual_temp = actual_temp * 1000; 100 101 return actual_temp; 102 } 103 104 /* get temperature in millidegrees */ 105 int kv_get_temp(struct radeon_device *rdev) 106 { 107 u32 temp; 108 int actual_temp = 0; 109 110 temp = RREG32_SMC(0xC0300E0C); 111 112 if (temp) 113 actual_temp = (temp / 8) - 49; 114 else 115 actual_temp = 0; 116 117 actual_temp = actual_temp * 1000; 118 119 return actual_temp; 120 } 121 122 /* 123 * Indirect registers accessor 124 */ 125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 126 { 127 unsigned long flags; 128 u32 r; 129 130 spin_lock_irqsave(&rdev->pciep_idx_lock, flags); 131 WREG32(PCIE_INDEX, reg); 132 (void)RREG32(PCIE_INDEX); 133 r = RREG32(PCIE_DATA); 134 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); 135 return r; 136 } 137 138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 139 { 140 unsigned long flags; 141 142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags); 143 WREG32(PCIE_INDEX, reg); 144 (void)RREG32(PCIE_INDEX); 145 WREG32(PCIE_DATA, v); 146 (void)RREG32(PCIE_DATA); 147 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); 148 } 149 150 static const u32 spectre_rlc_save_restore_register_list[] = 151 { 152 (0x0e00 << 16) | (0xc12c >> 2), 153 0x00000000, 154 (0x0e00 << 16) | (0xc140 >> 2), 155 0x00000000, 156 (0x0e00 << 16) | (0xc150 >> 2), 157 0x00000000, 158 (0x0e00 << 16) | (0xc15c >> 2), 159 0x00000000, 160 (0x0e00 << 16) | (0xc168 >> 2), 161 0x00000000, 162 (0x0e00 << 16) | (0xc170 >> 2), 163 0x00000000, 164 (0x0e00 << 16) | (0xc178 >> 2), 165 0x00000000, 166 (0x0e00 << 16) | (0xc204 >> 2), 167 0x00000000, 168 (0x0e00 << 16) | (0xc2b4 >> 2), 169 0x00000000, 170 (0x0e00 << 16) | (0xc2b8 >> 2), 171 0x00000000, 172 (0x0e00 << 16) | (0xc2bc >> 2), 173 0x00000000, 174 (0x0e00 << 16) | (0xc2c0 >> 2), 175 0x00000000, 176 (0x0e00 << 16) | (0x8228 >> 2), 177 0x00000000, 178 (0x0e00 << 16) | (0x829c >> 2), 179 0x00000000, 180 (0x0e00 << 16) | (0x869c >> 2), 181 0x00000000, 182 (0x0600 << 16) | (0x98f4 >> 2), 183 0x00000000, 184 (0x0e00 << 16) | (0x98f8 >> 2), 185 0x00000000, 186 (0x0e00 << 16) | (0x9900 >> 2), 187 0x00000000, 188 (0x0e00 << 16) | (0xc260 >> 2), 189 0x00000000, 190 (0x0e00 << 16) | (0x90e8 >> 2), 191 0x00000000, 192 (0x0e00 << 16) | (0x3c000 >> 2), 193 0x00000000, 194 (0x0e00 << 16) | (0x3c00c >> 2), 195 0x00000000, 196 (0x0e00 << 16) | (0x8c1c >> 2), 197 0x00000000, 198 (0x0e00 << 16) | (0x9700 >> 2), 199 0x00000000, 200 (0x0e00 << 16) | (0xcd20 >> 2), 201 0x00000000, 202 (0x4e00 << 16) | (0xcd20 >> 2), 203 0x00000000, 204 (0x5e00 << 16) | (0xcd20 >> 2), 205 0x00000000, 206 (0x6e00 << 16) | (0xcd20 >> 2), 207 0x00000000, 208 (0x7e00 << 16) | (0xcd20 >> 2), 209 0x00000000, 210 (0x8e00 << 16) | (0xcd20 >> 2), 211 0x00000000, 212 (0x9e00 << 16) | (0xcd20 >> 2), 213 0x00000000, 214 (0xae00 << 16) | (0xcd20 >> 2), 215 0x00000000, 216 (0xbe00 << 16) | (0xcd20 >> 2), 217 0x00000000, 218 (0x0e00 << 16) | (0x89bc >> 2), 219 0x00000000, 220 (0x0e00 << 16) | (0x8900 >> 2), 221 0x00000000, 222 0x3, 223 (0x0e00 << 16) | (0xc130 >> 2), 224 0x00000000, 225 (0x0e00 << 16) | (0xc134 >> 2), 226 0x00000000, 227 (0x0e00 << 16) | (0xc1fc >> 2), 228 0x00000000, 229 (0x0e00 << 16) | (0xc208 >> 2), 230 0x00000000, 231 (0x0e00 << 16) | (0xc264 >> 2), 232 0x00000000, 233 (0x0e00 << 16) | (0xc268 >> 2), 234 0x00000000, 235 (0x0e00 << 16) | (0xc26c >> 2), 236 0x00000000, 237 (0x0e00 << 16) | (0xc270 >> 2), 238 0x00000000, 239 (0x0e00 << 16) | (0xc274 >> 2), 240 0x00000000, 241 (0x0e00 << 16) | (0xc278 >> 2), 242 0x00000000, 243 (0x0e00 << 16) | (0xc27c >> 2), 244 0x00000000, 245 (0x0e00 << 16) | (0xc280 >> 2), 246 0x00000000, 247 (0x0e00 << 16) | (0xc284 >> 2), 248 0x00000000, 249 (0x0e00 << 16) | (0xc288 >> 2), 250 0x00000000, 251 (0x0e00 << 16) | (0xc28c >> 2), 252 0x00000000, 253 (0x0e00 << 16) | (0xc290 >> 2), 254 0x00000000, 255 (0x0e00 << 16) | (0xc294 >> 2), 256 0x00000000, 257 (0x0e00 << 16) | (0xc298 >> 2), 258 0x00000000, 259 (0x0e00 << 16) | (0xc29c >> 2), 260 0x00000000, 261 (0x0e00 << 16) | (0xc2a0 >> 2), 262 0x00000000, 263 (0x0e00 << 16) | (0xc2a4 >> 2), 264 0x00000000, 265 (0x0e00 << 16) | (0xc2a8 >> 2), 266 0x00000000, 267 (0x0e00 << 16) | (0xc2ac >> 2), 268 0x00000000, 269 (0x0e00 << 16) | (0xc2b0 >> 2), 270 0x00000000, 271 (0x0e00 << 16) | (0x301d0 >> 2), 272 0x00000000, 273 (0x0e00 << 16) | (0x30238 >> 2), 274 0x00000000, 275 (0x0e00 << 16) | (0x30250 >> 2), 276 0x00000000, 277 (0x0e00 << 16) | (0x30254 >> 2), 278 0x00000000, 279 (0x0e00 << 16) | (0x30258 >> 2), 280 0x00000000, 281 (0x0e00 << 16) | (0x3025c >> 2), 282 0x00000000, 283 (0x4e00 << 16) | (0xc900 >> 2), 284 0x00000000, 285 (0x5e00 << 16) | (0xc900 >> 2), 286 0x00000000, 287 (0x6e00 << 16) | (0xc900 >> 2), 288 0x00000000, 289 (0x7e00 << 16) | (0xc900 >> 2), 290 0x00000000, 291 (0x8e00 << 16) | (0xc900 >> 2), 292 0x00000000, 293 (0x9e00 << 16) | (0xc900 >> 2), 294 0x00000000, 295 (0xae00 << 16) | (0xc900 >> 2), 296 0x00000000, 297 (0xbe00 << 16) | (0xc900 >> 2), 298 0x00000000, 299 (0x4e00 << 16) | (0xc904 >> 2), 300 0x00000000, 301 (0x5e00 << 16) | (0xc904 >> 2), 302 0x00000000, 303 (0x6e00 << 16) | (0xc904 >> 2), 304 0x00000000, 305 (0x7e00 << 16) | (0xc904 >> 2), 306 0x00000000, 307 (0x8e00 << 16) | (0xc904 >> 2), 308 0x00000000, 309 (0x9e00 << 16) | (0xc904 >> 2), 310 0x00000000, 311 (0xae00 << 16) | (0xc904 >> 2), 312 0x00000000, 313 (0xbe00 << 16) | (0xc904 >> 2), 314 0x00000000, 315 (0x4e00 << 16) | (0xc908 >> 2), 316 0x00000000, 317 (0x5e00 << 16) | (0xc908 >> 2), 318 0x00000000, 319 (0x6e00 << 16) | (0xc908 >> 2), 320 0x00000000, 321 (0x7e00 << 16) | (0xc908 >> 2), 322 0x00000000, 323 (0x8e00 << 16) | (0xc908 >> 2), 324 0x00000000, 325 (0x9e00 << 16) | (0xc908 >> 2), 326 0x00000000, 327 (0xae00 << 16) | (0xc908 >> 2), 328 0x00000000, 329 (0xbe00 << 16) | (0xc908 >> 2), 330 0x00000000, 331 (0x4e00 << 16) | (0xc90c >> 2), 332 0x00000000, 333 (0x5e00 << 16) | (0xc90c >> 2), 334 0x00000000, 335 (0x6e00 << 16) | (0xc90c >> 2), 336 0x00000000, 337 (0x7e00 << 16) | (0xc90c >> 2), 338 0x00000000, 339 (0x8e00 << 16) | (0xc90c >> 2), 340 0x00000000, 341 (0x9e00 << 16) | (0xc90c >> 2), 342 0x00000000, 343 (0xae00 << 16) | (0xc90c >> 2), 344 0x00000000, 345 (0xbe00 << 16) | (0xc90c >> 2), 346 0x00000000, 347 (0x4e00 << 16) | (0xc910 >> 2), 348 0x00000000, 349 (0x5e00 << 16) | (0xc910 >> 2), 350 0x00000000, 351 (0x6e00 << 16) | (0xc910 >> 2), 352 0x00000000, 353 (0x7e00 << 16) | (0xc910 >> 2), 354 0x00000000, 355 (0x8e00 << 16) | (0xc910 >> 2), 356 0x00000000, 357 (0x9e00 << 16) | (0xc910 >> 2), 358 0x00000000, 359 (0xae00 << 16) | (0xc910 >> 2), 360 0x00000000, 361 (0xbe00 << 16) | (0xc910 >> 2), 362 0x00000000, 363 (0x0e00 << 16) | (0xc99c >> 2), 364 0x00000000, 365 (0x0e00 << 16) | (0x9834 >> 2), 366 0x00000000, 367 (0x0000 << 16) | (0x30f00 >> 2), 368 0x00000000, 369 (0x0001 << 16) | (0x30f00 >> 2), 370 0x00000000, 371 (0x0000 << 16) | (0x30f04 >> 2), 372 0x00000000, 373 (0x0001 << 16) | (0x30f04 >> 2), 374 0x00000000, 375 (0x0000 << 16) | (0x30f08 >> 2), 376 0x00000000, 377 (0x0001 << 16) | (0x30f08 >> 2), 378 0x00000000, 379 (0x0000 << 16) | (0x30f0c >> 2), 380 0x00000000, 381 (0x0001 << 16) | (0x30f0c >> 2), 382 0x00000000, 383 (0x0600 << 16) | (0x9b7c >> 2), 384 0x00000000, 385 (0x0e00 << 16) | (0x8a14 >> 2), 386 0x00000000, 387 (0x0e00 << 16) | (0x8a18 >> 2), 388 0x00000000, 389 (0x0600 << 16) | (0x30a00 >> 2), 390 0x00000000, 391 (0x0e00 << 16) | (0x8bf0 >> 2), 392 0x00000000, 393 (0x0e00 << 16) | (0x8bcc >> 2), 394 0x00000000, 395 (0x0e00 << 16) | (0x8b24 >> 2), 396 0x00000000, 397 (0x0e00 << 16) | (0x30a04 >> 2), 398 0x00000000, 399 (0x0600 << 16) | (0x30a10 >> 2), 400 0x00000000, 401 (0x0600 << 16) | (0x30a14 >> 2), 402 0x00000000, 403 (0x0600 << 16) | (0x30a18 >> 2), 404 0x00000000, 405 (0x0600 << 16) | (0x30a2c >> 2), 406 0x00000000, 407 (0x0e00 << 16) | (0xc700 >> 2), 408 0x00000000, 409 (0x0e00 << 16) | (0xc704 >> 2), 410 0x00000000, 411 (0x0e00 << 16) | (0xc708 >> 2), 412 0x00000000, 413 (0x0e00 << 16) | (0xc768 >> 2), 414 0x00000000, 415 (0x0400 << 16) | (0xc770 >> 2), 416 0x00000000, 417 (0x0400 << 16) | (0xc774 >> 2), 418 0x00000000, 419 (0x0400 << 16) | (0xc778 >> 2), 420 0x00000000, 421 (0x0400 << 16) | (0xc77c >> 2), 422 0x00000000, 423 (0x0400 << 16) | (0xc780 >> 2), 424 0x00000000, 425 (0x0400 << 16) | (0xc784 >> 2), 426 0x00000000, 427 (0x0400 << 16) | (0xc788 >> 2), 428 0x00000000, 429 (0x0400 << 16) | (0xc78c >> 2), 430 0x00000000, 431 (0x0400 << 16) | (0xc798 >> 2), 432 0x00000000, 433 (0x0400 << 16) | (0xc79c >> 2), 434 0x00000000, 435 (0x0400 << 16) | (0xc7a0 >> 2), 436 0x00000000, 437 (0x0400 << 16) | (0xc7a4 >> 2), 438 0x00000000, 439 (0x0400 << 16) | (0xc7a8 >> 2), 440 0x00000000, 441 (0x0400 << 16) | (0xc7ac >> 2), 442 0x00000000, 443 (0x0400 << 16) | (0xc7b0 >> 2), 444 0x00000000, 445 (0x0400 << 16) | (0xc7b4 >> 2), 446 0x00000000, 447 (0x0e00 << 16) | (0x9100 >> 2), 448 0x00000000, 449 (0x0e00 << 16) | (0x3c010 >> 2), 450 0x00000000, 451 (0x0e00 << 16) | (0x92a8 >> 2), 452 0x00000000, 453 (0x0e00 << 16) | (0x92ac >> 2), 454 0x00000000, 455 (0x0e00 << 16) | (0x92b4 >> 2), 456 0x00000000, 457 (0x0e00 << 16) | (0x92b8 >> 2), 458 0x00000000, 459 (0x0e00 << 16) | (0x92bc >> 2), 460 0x00000000, 461 (0x0e00 << 16) | (0x92c0 >> 2), 462 0x00000000, 463 (0x0e00 << 16) | (0x92c4 >> 2), 464 0x00000000, 465 (0x0e00 << 16) | (0x92c8 >> 2), 466 0x00000000, 467 (0x0e00 << 16) | (0x92cc >> 2), 468 0x00000000, 469 (0x0e00 << 16) | (0x92d0 >> 2), 470 0x00000000, 471 (0x0e00 << 16) | (0x8c00 >> 2), 472 0x00000000, 473 (0x0e00 << 16) | (0x8c04 >> 2), 474 0x00000000, 475 (0x0e00 << 16) | (0x8c20 >> 2), 476 0x00000000, 477 (0x0e00 << 16) | (0x8c38 >> 2), 478 0x00000000, 479 (0x0e00 << 16) | (0x8c3c >> 2), 480 0x00000000, 481 (0x0e00 << 16) | (0xae00 >> 2), 482 0x00000000, 483 (0x0e00 << 16) | (0x9604 >> 2), 484 0x00000000, 485 (0x0e00 << 16) | (0xac08 >> 2), 486 0x00000000, 487 (0x0e00 << 16) | (0xac0c >> 2), 488 0x00000000, 489 (0x0e00 << 16) | (0xac10 >> 2), 490 0x00000000, 491 (0x0e00 << 16) | (0xac14 >> 2), 492 0x00000000, 493 (0x0e00 << 16) | (0xac58 >> 2), 494 0x00000000, 495 (0x0e00 << 16) | (0xac68 >> 2), 496 0x00000000, 497 (0x0e00 << 16) | (0xac6c >> 2), 498 0x00000000, 499 (0x0e00 << 16) | (0xac70 >> 2), 500 0x00000000, 501 (0x0e00 << 16) | (0xac74 >> 2), 502 0x00000000, 503 (0x0e00 << 16) | (0xac78 >> 2), 504 0x00000000, 505 (0x0e00 << 16) | (0xac7c >> 2), 506 0x00000000, 507 (0x0e00 << 16) | (0xac80 >> 2), 508 0x00000000, 509 (0x0e00 << 16) | (0xac84 >> 2), 510 0x00000000, 511 (0x0e00 << 16) | (0xac88 >> 2), 512 0x00000000, 513 (0x0e00 << 16) | (0xac8c >> 2), 514 0x00000000, 515 (0x0e00 << 16) | (0x970c >> 2), 516 0x00000000, 517 (0x0e00 << 16) | (0x9714 >> 2), 518 0x00000000, 519 (0x0e00 << 16) | (0x9718 >> 2), 520 0x00000000, 521 (0x0e00 << 16) | (0x971c >> 2), 522 0x00000000, 523 (0x0e00 << 16) | (0x31068 >> 2), 524 0x00000000, 525 (0x4e00 << 16) | (0x31068 >> 2), 526 0x00000000, 527 (0x5e00 << 16) | (0x31068 >> 2), 528 0x00000000, 529 (0x6e00 << 16) | (0x31068 >> 2), 530 0x00000000, 531 (0x7e00 << 16) | (0x31068 >> 2), 532 0x00000000, 533 (0x8e00 << 16) | (0x31068 >> 2), 534 0x00000000, 535 (0x9e00 << 16) | (0x31068 >> 2), 536 0x00000000, 537 (0xae00 << 16) | (0x31068 >> 2), 538 0x00000000, 539 (0xbe00 << 16) | (0x31068 >> 2), 540 0x00000000, 541 (0x0e00 << 16) | (0xcd10 >> 2), 542 0x00000000, 543 (0x0e00 << 16) | (0xcd14 >> 2), 544 0x00000000, 545 (0x0e00 << 16) | (0x88b0 >> 2), 546 0x00000000, 547 (0x0e00 << 16) | (0x88b4 >> 2), 548 0x00000000, 549 (0x0e00 << 16) | (0x88b8 >> 2), 550 0x00000000, 551 (0x0e00 << 16) | (0x88bc >> 2), 552 0x00000000, 553 (0x0400 << 16) | (0x89c0 >> 2), 554 0x00000000, 555 (0x0e00 << 16) | (0x88c4 >> 2), 556 0x00000000, 557 (0x0e00 << 16) | (0x88c8 >> 2), 558 0x00000000, 559 (0x0e00 << 16) | (0x88d0 >> 2), 560 0x00000000, 561 (0x0e00 << 16) | (0x88d4 >> 2), 562 0x00000000, 563 (0x0e00 << 16) | (0x88d8 >> 2), 564 0x00000000, 565 (0x0e00 << 16) | (0x8980 >> 2), 566 0x00000000, 567 (0x0e00 << 16) | (0x30938 >> 2), 568 0x00000000, 569 (0x0e00 << 16) | (0x3093c >> 2), 570 0x00000000, 571 (0x0e00 << 16) | (0x30940 >> 2), 572 0x00000000, 573 (0x0e00 << 16) | (0x89a0 >> 2), 574 0x00000000, 575 (0x0e00 << 16) | (0x30900 >> 2), 576 0x00000000, 577 (0x0e00 << 16) | (0x30904 >> 2), 578 0x00000000, 579 (0x0e00 << 16) | (0x89b4 >> 2), 580 0x00000000, 581 (0x0e00 << 16) | (0x3c210 >> 2), 582 0x00000000, 583 (0x0e00 << 16) | (0x3c214 >> 2), 584 0x00000000, 585 (0x0e00 << 16) | (0x3c218 >> 2), 586 0x00000000, 587 (0x0e00 << 16) | (0x8904 >> 2), 588 0x00000000, 589 0x5, 590 (0x0e00 << 16) | (0x8c28 >> 2), 591 (0x0e00 << 16) | (0x8c2c >> 2), 592 (0x0e00 << 16) | (0x8c30 >> 2), 593 (0x0e00 << 16) | (0x8c34 >> 2), 594 (0x0e00 << 16) | (0x9600 >> 2), 595 }; 596 597 static const u32 kalindi_rlc_save_restore_register_list[] = 598 { 599 (0x0e00 << 16) | (0xc12c >> 2), 600 0x00000000, 601 (0x0e00 << 16) | (0xc140 >> 2), 602 0x00000000, 603 (0x0e00 << 16) | (0xc150 >> 2), 604 0x00000000, 605 (0x0e00 << 16) | (0xc15c >> 2), 606 0x00000000, 607 (0x0e00 << 16) | (0xc168 >> 2), 608 0x00000000, 609 (0x0e00 << 16) | (0xc170 >> 2), 610 0x00000000, 611 (0x0e00 << 16) | (0xc204 >> 2), 612 0x00000000, 613 (0x0e00 << 16) | (0xc2b4 >> 2), 614 0x00000000, 615 (0x0e00 << 16) | (0xc2b8 >> 2), 616 0x00000000, 617 (0x0e00 << 16) | (0xc2bc >> 2), 618 0x00000000, 619 (0x0e00 << 16) | (0xc2c0 >> 2), 620 0x00000000, 621 (0x0e00 << 16) | (0x8228 >> 2), 622 0x00000000, 623 (0x0e00 << 16) | (0x829c >> 2), 624 0x00000000, 625 (0x0e00 << 16) | (0x869c >> 2), 626 0x00000000, 627 (0x0600 << 16) | (0x98f4 >> 2), 628 0x00000000, 629 (0x0e00 << 16) | (0x98f8 >> 2), 630 0x00000000, 631 (0x0e00 << 16) | (0x9900 >> 2), 632 0x00000000, 633 (0x0e00 << 16) | (0xc260 >> 2), 634 0x00000000, 635 (0x0e00 << 16) | (0x90e8 >> 2), 636 0x00000000, 637 (0x0e00 << 16) | (0x3c000 >> 2), 638 0x00000000, 639 (0x0e00 << 16) | (0x3c00c >> 2), 640 0x00000000, 641 (0x0e00 << 16) | (0x8c1c >> 2), 642 0x00000000, 643 (0x0e00 << 16) | (0x9700 >> 2), 644 0x00000000, 645 (0x0e00 << 16) | (0xcd20 >> 2), 646 0x00000000, 647 (0x4e00 << 16) | (0xcd20 >> 2), 648 0x00000000, 649 (0x5e00 << 16) | (0xcd20 >> 2), 650 0x00000000, 651 (0x6e00 << 16) | (0xcd20 >> 2), 652 0x00000000, 653 (0x7e00 << 16) | (0xcd20 >> 2), 654 0x00000000, 655 (0x0e00 << 16) | (0x89bc >> 2), 656 0x00000000, 657 (0x0e00 << 16) | (0x8900 >> 2), 658 0x00000000, 659 0x3, 660 (0x0e00 << 16) | (0xc130 >> 2), 661 0x00000000, 662 (0x0e00 << 16) | (0xc134 >> 2), 663 0x00000000, 664 (0x0e00 << 16) | (0xc1fc >> 2), 665 0x00000000, 666 (0x0e00 << 16) | (0xc208 >> 2), 667 0x00000000, 668 (0x0e00 << 16) | (0xc264 >> 2), 669 0x00000000, 670 (0x0e00 << 16) | (0xc268 >> 2), 671 0x00000000, 672 (0x0e00 << 16) | (0xc26c >> 2), 673 0x00000000, 674 (0x0e00 << 16) | (0xc270 >> 2), 675 0x00000000, 676 (0x0e00 << 16) | (0xc274 >> 2), 677 0x00000000, 678 (0x0e00 << 16) | (0xc28c >> 2), 679 0x00000000, 680 (0x0e00 << 16) | (0xc290 >> 2), 681 0x00000000, 682 (0x0e00 << 16) | (0xc294 >> 2), 683 0x00000000, 684 (0x0e00 << 16) | (0xc298 >> 2), 685 0x00000000, 686 (0x0e00 << 16) | (0xc2a0 >> 2), 687 0x00000000, 688 (0x0e00 << 16) | (0xc2a4 >> 2), 689 0x00000000, 690 (0x0e00 << 16) | (0xc2a8 >> 2), 691 0x00000000, 692 (0x0e00 << 16) | (0xc2ac >> 2), 693 0x00000000, 694 (0x0e00 << 16) | (0x301d0 >> 2), 695 0x00000000, 696 (0x0e00 << 16) | (0x30238 >> 2), 697 0x00000000, 698 (0x0e00 << 16) | (0x30250 >> 2), 699 0x00000000, 700 (0x0e00 << 16) | (0x30254 >> 2), 701 0x00000000, 702 (0x0e00 << 16) | (0x30258 >> 2), 703 0x00000000, 704 (0x0e00 << 16) | (0x3025c >> 2), 705 0x00000000, 706 (0x4e00 << 16) | (0xc900 >> 2), 707 0x00000000, 708 (0x5e00 << 16) | (0xc900 >> 2), 709 0x00000000, 710 (0x6e00 << 16) | (0xc900 >> 2), 711 0x00000000, 712 (0x7e00 << 16) | (0xc900 >> 2), 713 0x00000000, 714 (0x4e00 << 16) | (0xc904 >> 2), 715 0x00000000, 716 (0x5e00 << 16) | (0xc904 >> 2), 717 0x00000000, 718 (0x6e00 << 16) | (0xc904 >> 2), 719 0x00000000, 720 (0x7e00 << 16) | (0xc904 >> 2), 721 0x00000000, 722 (0x4e00 << 16) | (0xc908 >> 2), 723 0x00000000, 724 (0x5e00 << 16) | (0xc908 >> 2), 725 0x00000000, 726 (0x6e00 << 16) | (0xc908 >> 2), 727 0x00000000, 728 (0x7e00 << 16) | (0xc908 >> 2), 729 0x00000000, 730 (0x4e00 << 16) | (0xc90c >> 2), 731 0x00000000, 732 (0x5e00 << 16) | (0xc90c >> 2), 733 0x00000000, 734 (0x6e00 << 16) | (0xc90c >> 2), 735 0x00000000, 736 (0x7e00 << 16) | (0xc90c >> 2), 737 0x00000000, 738 (0x4e00 << 16) | (0xc910 >> 2), 739 0x00000000, 740 (0x5e00 << 16) | (0xc910 >> 2), 741 0x00000000, 742 (0x6e00 << 16) | (0xc910 >> 2), 743 0x00000000, 744 (0x7e00 << 16) | (0xc910 >> 2), 745 0x00000000, 746 (0x0e00 << 16) | (0xc99c >> 2), 747 0x00000000, 748 (0x0e00 << 16) | (0x9834 >> 2), 749 0x00000000, 750 (0x0000 << 16) | (0x30f00 >> 2), 751 0x00000000, 752 (0x0000 << 16) | (0x30f04 >> 2), 753 0x00000000, 754 (0x0000 << 16) | (0x30f08 >> 2), 755 0x00000000, 756 (0x0000 << 16) | (0x30f0c >> 2), 757 0x00000000, 758 (0x0600 << 16) | (0x9b7c >> 2), 759 0x00000000, 760 (0x0e00 << 16) | (0x8a14 >> 2), 761 0x00000000, 762 (0x0e00 << 16) | (0x8a18 >> 2), 763 0x00000000, 764 (0x0600 << 16) | (0x30a00 >> 2), 765 0x00000000, 766 (0x0e00 << 16) | (0x8bf0 >> 2), 767 0x00000000, 768 (0x0e00 << 16) | (0x8bcc >> 2), 769 0x00000000, 770 (0x0e00 << 16) | (0x8b24 >> 2), 771 0x00000000, 772 (0x0e00 << 16) | (0x30a04 >> 2), 773 0x00000000, 774 (0x0600 << 16) | (0x30a10 >> 2), 775 0x00000000, 776 (0x0600 << 16) | (0x30a14 >> 2), 777 0x00000000, 778 (0x0600 << 16) | (0x30a18 >> 2), 779 0x00000000, 780 (0x0600 << 16) | (0x30a2c >> 2), 781 0x00000000, 782 (0x0e00 << 16) | (0xc700 >> 2), 783 0x00000000, 784 (0x0e00 << 16) | (0xc704 >> 2), 785 0x00000000, 786 (0x0e00 << 16) | (0xc708 >> 2), 787 0x00000000, 788 (0x0e00 << 16) | (0xc768 >> 2), 789 0x00000000, 790 (0x0400 << 16) | (0xc770 >> 2), 791 0x00000000, 792 (0x0400 << 16) | (0xc774 >> 2), 793 0x00000000, 794 (0x0400 << 16) | (0xc798 >> 2), 795 0x00000000, 796 (0x0400 << 16) | (0xc79c >> 2), 797 0x00000000, 798 (0x0e00 << 16) | (0x9100 >> 2), 799 0x00000000, 800 (0x0e00 << 16) | (0x3c010 >> 2), 801 0x00000000, 802 (0x0e00 << 16) | (0x8c00 >> 2), 803 0x00000000, 804 (0x0e00 << 16) | (0x8c04 >> 2), 805 0x00000000, 806 (0x0e00 << 16) | (0x8c20 >> 2), 807 0x00000000, 808 (0x0e00 << 16) | (0x8c38 >> 2), 809 0x00000000, 810 (0x0e00 << 16) | (0x8c3c >> 2), 811 0x00000000, 812 (0x0e00 << 16) | (0xae00 >> 2), 813 0x00000000, 814 (0x0e00 << 16) | (0x9604 >> 2), 815 0x00000000, 816 (0x0e00 << 16) | (0xac08 >> 2), 817 0x00000000, 818 (0x0e00 << 16) | (0xac0c >> 2), 819 0x00000000, 820 (0x0e00 << 16) | (0xac10 >> 2), 821 0x00000000, 822 (0x0e00 << 16) | (0xac14 >> 2), 823 0x00000000, 824 (0x0e00 << 16) | (0xac58 >> 2), 825 0x00000000, 826 (0x0e00 << 16) | (0xac68 >> 2), 827 0x00000000, 828 (0x0e00 << 16) | (0xac6c >> 2), 829 0x00000000, 830 (0x0e00 << 16) | (0xac70 >> 2), 831 0x00000000, 832 (0x0e00 << 16) | (0xac74 >> 2), 833 0x00000000, 834 (0x0e00 << 16) | (0xac78 >> 2), 835 0x00000000, 836 (0x0e00 << 16) | (0xac7c >> 2), 837 0x00000000, 838 (0x0e00 << 16) | (0xac80 >> 2), 839 0x00000000, 840 (0x0e00 << 16) | (0xac84 >> 2), 841 0x00000000, 842 (0x0e00 << 16) | (0xac88 >> 2), 843 0x00000000, 844 (0x0e00 << 16) | (0xac8c >> 2), 845 0x00000000, 846 (0x0e00 << 16) | (0x970c >> 2), 847 0x00000000, 848 (0x0e00 << 16) | (0x9714 >> 2), 849 0x00000000, 850 (0x0e00 << 16) | (0x9718 >> 2), 851 0x00000000, 852 (0x0e00 << 16) | (0x971c >> 2), 853 0x00000000, 854 (0x0e00 << 16) | (0x31068 >> 2), 855 0x00000000, 856 (0x4e00 << 16) | (0x31068 >> 2), 857 0x00000000, 858 (0x5e00 << 16) | (0x31068 >> 2), 859 0x00000000, 860 (0x6e00 << 16) | (0x31068 >> 2), 861 0x00000000, 862 (0x7e00 << 16) | (0x31068 >> 2), 863 0x00000000, 864 (0x0e00 << 16) | (0xcd10 >> 2), 865 0x00000000, 866 (0x0e00 << 16) | (0xcd14 >> 2), 867 0x00000000, 868 (0x0e00 << 16) | (0x88b0 >> 2), 869 0x00000000, 870 (0x0e00 << 16) | (0x88b4 >> 2), 871 0x00000000, 872 (0x0e00 << 16) | (0x88b8 >> 2), 873 0x00000000, 874 (0x0e00 << 16) | (0x88bc >> 2), 875 0x00000000, 876 (0x0400 << 16) | (0x89c0 >> 2), 877 0x00000000, 878 (0x0e00 << 16) | (0x88c4 >> 2), 879 0x00000000, 880 (0x0e00 << 16) | (0x88c8 >> 2), 881 0x00000000, 882 (0x0e00 << 16) | (0x88d0 >> 2), 883 0x00000000, 884 (0x0e00 << 16) | (0x88d4 >> 2), 885 0x00000000, 886 (0x0e00 << 16) | (0x88d8 >> 2), 887 0x00000000, 888 (0x0e00 << 16) | (0x8980 >> 2), 889 0x00000000, 890 (0x0e00 << 16) | (0x30938 >> 2), 891 0x00000000, 892 (0x0e00 << 16) | (0x3093c >> 2), 893 0x00000000, 894 (0x0e00 << 16) | (0x30940 >> 2), 895 0x00000000, 896 (0x0e00 << 16) | (0x89a0 >> 2), 897 0x00000000, 898 (0x0e00 << 16) | (0x30900 >> 2), 899 0x00000000, 900 (0x0e00 << 16) | (0x30904 >> 2), 901 0x00000000, 902 (0x0e00 << 16) | (0x89b4 >> 2), 903 0x00000000, 904 (0x0e00 << 16) | (0x3e1fc >> 2), 905 0x00000000, 906 (0x0e00 << 16) | (0x3c210 >> 2), 907 0x00000000, 908 (0x0e00 << 16) | (0x3c214 >> 2), 909 0x00000000, 910 (0x0e00 << 16) | (0x3c218 >> 2), 911 0x00000000, 912 (0x0e00 << 16) | (0x8904 >> 2), 913 0x00000000, 914 0x5, 915 (0x0e00 << 16) | (0x8c28 >> 2), 916 (0x0e00 << 16) | (0x8c2c >> 2), 917 (0x0e00 << 16) | (0x8c30 >> 2), 918 (0x0e00 << 16) | (0x8c34 >> 2), 919 (0x0e00 << 16) | (0x9600 >> 2), 920 }; 921 922 static const u32 bonaire_golden_spm_registers[] = 923 { 924 0x30800, 0xe0ffffff, 0xe0000000 925 }; 926 927 static const u32 bonaire_golden_common_registers[] = 928 { 929 0xc770, 0xffffffff, 0x00000800, 930 0xc774, 0xffffffff, 0x00000800, 931 0xc798, 0xffffffff, 0x00007fbf, 932 0xc79c, 0xffffffff, 0x00007faf 933 }; 934 935 static const u32 bonaire_golden_registers[] = 936 { 937 0x3354, 0x00000333, 0x00000333, 938 0x3350, 0x000c0fc0, 0x00040200, 939 0x9a10, 0x00010000, 0x00058208, 940 0x3c000, 0xffff1fff, 0x00140000, 941 0x3c200, 0xfdfc0fff, 0x00000100, 942 0x3c234, 0x40000000, 0x40000200, 943 0x9830, 0xffffffff, 0x00000000, 944 0x9834, 0xf00fffff, 0x00000400, 945 0x9838, 0x0002021c, 0x00020200, 946 0xc78, 0x00000080, 0x00000000, 947 0x5bb0, 0x000000f0, 0x00000070, 948 0x5bc0, 0xf0311fff, 0x80300000, 949 0x98f8, 0x73773777, 0x12010001, 950 0x350c, 0x00810000, 0x408af000, 951 0x7030, 0x31000111, 0x00000011, 952 0x2f48, 0x73773777, 0x12010001, 953 0x220c, 0x00007fb6, 0x0021a1b1, 954 0x2210, 0x00007fb6, 0x002021b1, 955 0x2180, 0x00007fb6, 0x00002191, 956 0x2218, 0x00007fb6, 0x002121b1, 957 0x221c, 0x00007fb6, 0x002021b1, 958 0x21dc, 0x00007fb6, 0x00002191, 959 0x21e0, 0x00007fb6, 0x00002191, 960 0x3628, 0x0000003f, 0x0000000a, 961 0x362c, 0x0000003f, 0x0000000a, 962 0x2ae4, 0x00073ffe, 0x000022a2, 963 0x240c, 0x000007ff, 0x00000000, 964 0x8a14, 0xf000003f, 0x00000007, 965 0x8bf0, 0x00002001, 0x00000001, 966 0x8b24, 0xffffffff, 0x00ffffff, 967 0x30a04, 0x0000ff0f, 0x00000000, 968 0x28a4c, 0x07ffffff, 0x06000000, 969 0x4d8, 0x00000fff, 0x00000100, 970 0x3e78, 0x00000001, 0x00000002, 971 0x9100, 0x03000000, 0x0362c688, 972 0x8c00, 0x000000ff, 0x00000001, 973 0xe40, 0x00001fff, 0x00001fff, 974 0x9060, 0x0000007f, 0x00000020, 975 0x9508, 0x00010000, 0x00010000, 976 0xac14, 0x000003ff, 0x000000f3, 977 0xac0c, 0xffffffff, 0x00001032 978 }; 979 980 static const u32 bonaire_mgcg_cgcg_init[] = 981 { 982 0xc420, 0xffffffff, 0xfffffffc, 983 0x30800, 0xffffffff, 0xe0000000, 984 0x3c2a0, 0xffffffff, 0x00000100, 985 0x3c208, 0xffffffff, 0x00000100, 986 0x3c2c0, 0xffffffff, 0xc0000100, 987 0x3c2c8, 0xffffffff, 0xc0000100, 988 0x3c2c4, 0xffffffff, 0xc0000100, 989 0x55e4, 0xffffffff, 0x00600100, 990 0x3c280, 0xffffffff, 0x00000100, 991 0x3c214, 0xffffffff, 0x06000100, 992 0x3c220, 0xffffffff, 0x00000100, 993 0x3c218, 0xffffffff, 0x06000100, 994 0x3c204, 0xffffffff, 0x00000100, 995 0x3c2e0, 0xffffffff, 0x00000100, 996 0x3c224, 0xffffffff, 0x00000100, 997 0x3c200, 0xffffffff, 0x00000100, 998 0x3c230, 0xffffffff, 0x00000100, 999 0x3c234, 0xffffffff, 0x00000100, 1000 0x3c250, 0xffffffff, 0x00000100, 1001 0x3c254, 0xffffffff, 0x00000100, 1002 0x3c258, 0xffffffff, 0x00000100, 1003 0x3c25c, 0xffffffff, 0x00000100, 1004 0x3c260, 0xffffffff, 0x00000100, 1005 0x3c27c, 0xffffffff, 0x00000100, 1006 0x3c278, 0xffffffff, 0x00000100, 1007 0x3c210, 0xffffffff, 0x06000100, 1008 0x3c290, 0xffffffff, 0x00000100, 1009 0x3c274, 0xffffffff, 0x00000100, 1010 0x3c2b4, 0xffffffff, 0x00000100, 1011 0x3c2b0, 0xffffffff, 0x00000100, 1012 0x3c270, 0xffffffff, 0x00000100, 1013 0x30800, 0xffffffff, 0xe0000000, 1014 0x3c020, 0xffffffff, 0x00010000, 1015 0x3c024, 0xffffffff, 0x00030002, 1016 0x3c028, 0xffffffff, 0x00040007, 1017 0x3c02c, 0xffffffff, 0x00060005, 1018 0x3c030, 0xffffffff, 0x00090008, 1019 0x3c034, 0xffffffff, 0x00010000, 1020 0x3c038, 0xffffffff, 0x00030002, 1021 0x3c03c, 0xffffffff, 0x00040007, 1022 0x3c040, 0xffffffff, 0x00060005, 1023 0x3c044, 0xffffffff, 0x00090008, 1024 0x3c048, 0xffffffff, 0x00010000, 1025 0x3c04c, 0xffffffff, 0x00030002, 1026 0x3c050, 0xffffffff, 0x00040007, 1027 0x3c054, 0xffffffff, 0x00060005, 1028 0x3c058, 0xffffffff, 0x00090008, 1029 0x3c05c, 0xffffffff, 0x00010000, 1030 0x3c060, 0xffffffff, 0x00030002, 1031 0x3c064, 0xffffffff, 0x00040007, 1032 0x3c068, 0xffffffff, 0x00060005, 1033 0x3c06c, 0xffffffff, 0x00090008, 1034 0x3c070, 0xffffffff, 0x00010000, 1035 0x3c074, 0xffffffff, 0x00030002, 1036 0x3c078, 0xffffffff, 0x00040007, 1037 0x3c07c, 0xffffffff, 0x00060005, 1038 0x3c080, 0xffffffff, 0x00090008, 1039 0x3c084, 0xffffffff, 0x00010000, 1040 0x3c088, 0xffffffff, 0x00030002, 1041 0x3c08c, 0xffffffff, 0x00040007, 1042 0x3c090, 0xffffffff, 0x00060005, 1043 0x3c094, 0xffffffff, 0x00090008, 1044 0x3c098, 0xffffffff, 0x00010000, 1045 0x3c09c, 0xffffffff, 0x00030002, 1046 0x3c0a0, 0xffffffff, 0x00040007, 1047 0x3c0a4, 0xffffffff, 0x00060005, 1048 0x3c0a8, 0xffffffff, 0x00090008, 1049 0x3c000, 0xffffffff, 0x96e00200, 1050 0x8708, 0xffffffff, 0x00900100, 1051 0xc424, 0xffffffff, 0x0020003f, 1052 0x38, 0xffffffff, 0x0140001c, 1053 0x3c, 0x000f0000, 0x000f0000, 1054 0x220, 0xffffffff, 0xC060000C, 1055 0x224, 0xc0000fff, 0x00000100, 1056 0xf90, 0xffffffff, 0x00000100, 1057 0xf98, 0x00000101, 0x00000000, 1058 0x20a8, 0xffffffff, 0x00000104, 1059 0x55e4, 0xff000fff, 0x00000100, 1060 0x30cc, 0xc0000fff, 0x00000104, 1061 0xc1e4, 0x00000001, 0x00000001, 1062 0xd00c, 0xff000ff0, 0x00000100, 1063 0xd80c, 0xff000ff0, 0x00000100 1064 }; 1065 1066 static const u32 spectre_golden_spm_registers[] = 1067 { 1068 0x30800, 0xe0ffffff, 0xe0000000 1069 }; 1070 1071 static const u32 spectre_golden_common_registers[] = 1072 { 1073 0xc770, 0xffffffff, 0x00000800, 1074 0xc774, 0xffffffff, 0x00000800, 1075 0xc798, 0xffffffff, 0x00007fbf, 1076 0xc79c, 0xffffffff, 0x00007faf 1077 }; 1078 1079 static const u32 spectre_golden_registers[] = 1080 { 1081 0x3c000, 0xffff1fff, 0x96940200, 1082 0x3c00c, 0xffff0001, 0xff000000, 1083 0x3c200, 0xfffc0fff, 0x00000100, 1084 0x6ed8, 0x00010101, 0x00010000, 1085 0x9834, 0xf00fffff, 0x00000400, 1086 0x9838, 0xfffffffc, 0x00020200, 1087 0x5bb0, 0x000000f0, 0x00000070, 1088 0x5bc0, 0xf0311fff, 0x80300000, 1089 0x98f8, 0x73773777, 0x12010001, 1090 0x9b7c, 0x00ff0000, 0x00fc0000, 1091 0x2f48, 0x73773777, 0x12010001, 1092 0x8a14, 0xf000003f, 0x00000007, 1093 0x8b24, 0xffffffff, 0x00ffffff, 1094 0x28350, 0x3f3f3fff, 0x00000082, 1095 0x28355, 0x0000003f, 0x00000000, 1096 0x3e78, 0x00000001, 0x00000002, 1097 0x913c, 0xffff03df, 0x00000004, 1098 0xc768, 0x00000008, 0x00000008, 1099 0x8c00, 0x000008ff, 0x00000800, 1100 0x9508, 0x00010000, 0x00010000, 1101 0xac0c, 0xffffffff, 0x54763210, 1102 0x214f8, 0x01ff01ff, 0x00000002, 1103 0x21498, 0x007ff800, 0x00200000, 1104 0x2015c, 0xffffffff, 0x00000f40, 1105 0x30934, 0xffffffff, 0x00000001 1106 }; 1107 1108 static const u32 spectre_mgcg_cgcg_init[] = 1109 { 1110 0xc420, 0xffffffff, 0xfffffffc, 1111 0x30800, 0xffffffff, 0xe0000000, 1112 0x3c2a0, 0xffffffff, 0x00000100, 1113 0x3c208, 0xffffffff, 0x00000100, 1114 0x3c2c0, 0xffffffff, 0x00000100, 1115 0x3c2c8, 0xffffffff, 0x00000100, 1116 0x3c2c4, 0xffffffff, 0x00000100, 1117 0x55e4, 0xffffffff, 0x00600100, 1118 0x3c280, 0xffffffff, 0x00000100, 1119 0x3c214, 0xffffffff, 0x06000100, 1120 0x3c220, 0xffffffff, 0x00000100, 1121 0x3c218, 0xffffffff, 0x06000100, 1122 0x3c204, 0xffffffff, 0x00000100, 1123 0x3c2e0, 0xffffffff, 0x00000100, 1124 0x3c224, 0xffffffff, 0x00000100, 1125 0x3c200, 0xffffffff, 0x00000100, 1126 0x3c230, 0xffffffff, 0x00000100, 1127 0x3c234, 0xffffffff, 0x00000100, 1128 0x3c250, 0xffffffff, 0x00000100, 1129 0x3c254, 0xffffffff, 0x00000100, 1130 0x3c258, 0xffffffff, 0x00000100, 1131 0x3c25c, 0xffffffff, 0x00000100, 1132 0x3c260, 0xffffffff, 0x00000100, 1133 0x3c27c, 0xffffffff, 0x00000100, 1134 0x3c278, 0xffffffff, 0x00000100, 1135 0x3c210, 0xffffffff, 0x06000100, 1136 0x3c290, 0xffffffff, 0x00000100, 1137 0x3c274, 0xffffffff, 0x00000100, 1138 0x3c2b4, 0xffffffff, 0x00000100, 1139 0x3c2b0, 0xffffffff, 0x00000100, 1140 0x3c270, 0xffffffff, 0x00000100, 1141 0x30800, 0xffffffff, 0xe0000000, 1142 0x3c020, 0xffffffff, 0x00010000, 1143 0x3c024, 0xffffffff, 0x00030002, 1144 0x3c028, 0xffffffff, 0x00040007, 1145 0x3c02c, 0xffffffff, 0x00060005, 1146 0x3c030, 0xffffffff, 0x00090008, 1147 0x3c034, 0xffffffff, 0x00010000, 1148 0x3c038, 0xffffffff, 0x00030002, 1149 0x3c03c, 0xffffffff, 0x00040007, 1150 0x3c040, 0xffffffff, 0x00060005, 1151 0x3c044, 0xffffffff, 0x00090008, 1152 0x3c048, 0xffffffff, 0x00010000, 1153 0x3c04c, 0xffffffff, 0x00030002, 1154 0x3c050, 0xffffffff, 0x00040007, 1155 0x3c054, 0xffffffff, 0x00060005, 1156 0x3c058, 0xffffffff, 0x00090008, 1157 0x3c05c, 0xffffffff, 0x00010000, 1158 0x3c060, 0xffffffff, 0x00030002, 1159 0x3c064, 0xffffffff, 0x00040007, 1160 0x3c068, 0xffffffff, 0x00060005, 1161 0x3c06c, 0xffffffff, 0x00090008, 1162 0x3c070, 0xffffffff, 0x00010000, 1163 0x3c074, 0xffffffff, 0x00030002, 1164 0x3c078, 0xffffffff, 0x00040007, 1165 0x3c07c, 0xffffffff, 0x00060005, 1166 0x3c080, 0xffffffff, 0x00090008, 1167 0x3c084, 0xffffffff, 0x00010000, 1168 0x3c088, 0xffffffff, 0x00030002, 1169 0x3c08c, 0xffffffff, 0x00040007, 1170 0x3c090, 0xffffffff, 0x00060005, 1171 0x3c094, 0xffffffff, 0x00090008, 1172 0x3c098, 0xffffffff, 0x00010000, 1173 0x3c09c, 0xffffffff, 0x00030002, 1174 0x3c0a0, 0xffffffff, 0x00040007, 1175 0x3c0a4, 0xffffffff, 0x00060005, 1176 0x3c0a8, 0xffffffff, 0x00090008, 1177 0x3c0ac, 0xffffffff, 0x00010000, 1178 0x3c0b0, 0xffffffff, 0x00030002, 1179 0x3c0b4, 0xffffffff, 0x00040007, 1180 0x3c0b8, 0xffffffff, 0x00060005, 1181 0x3c0bc, 0xffffffff, 0x00090008, 1182 0x3c000, 0xffffffff, 0x96e00200, 1183 0x8708, 0xffffffff, 0x00900100, 1184 0xc424, 0xffffffff, 0x0020003f, 1185 0x38, 0xffffffff, 0x0140001c, 1186 0x3c, 0x000f0000, 0x000f0000, 1187 0x220, 0xffffffff, 0xC060000C, 1188 0x224, 0xc0000fff, 0x00000100, 1189 0xf90, 0xffffffff, 0x00000100, 1190 0xf98, 0x00000101, 0x00000000, 1191 0x20a8, 0xffffffff, 0x00000104, 1192 0x55e4, 0xff000fff, 0x00000100, 1193 0x30cc, 0xc0000fff, 0x00000104, 1194 0xc1e4, 0x00000001, 0x00000001, 1195 0xd00c, 0xff000ff0, 0x00000100, 1196 0xd80c, 0xff000ff0, 0x00000100 1197 }; 1198 1199 static const u32 kalindi_golden_spm_registers[] = 1200 { 1201 0x30800, 0xe0ffffff, 0xe0000000 1202 }; 1203 1204 static const u32 kalindi_golden_common_registers[] = 1205 { 1206 0xc770, 0xffffffff, 0x00000800, 1207 0xc774, 0xffffffff, 0x00000800, 1208 0xc798, 0xffffffff, 0x00007fbf, 1209 0xc79c, 0xffffffff, 0x00007faf 1210 }; 1211 1212 static const u32 kalindi_golden_registers[] = 1213 { 1214 0x3c000, 0xffffdfff, 0x6e944040, 1215 0x55e4, 0xff607fff, 0xfc000100, 1216 0x3c220, 0xff000fff, 0x00000100, 1217 0x3c224, 0xff000fff, 0x00000100, 1218 0x3c200, 0xfffc0fff, 0x00000100, 1219 0x6ed8, 0x00010101, 0x00010000, 1220 0x9830, 0xffffffff, 0x00000000, 1221 0x9834, 0xf00fffff, 0x00000400, 1222 0x5bb0, 0x000000f0, 0x00000070, 1223 0x5bc0, 0xf0311fff, 0x80300000, 1224 0x98f8, 0x73773777, 0x12010001, 1225 0x98fc, 0xffffffff, 0x00000010, 1226 0x9b7c, 0x00ff0000, 0x00fc0000, 1227 0x8030, 0x00001f0f, 0x0000100a, 1228 0x2f48, 0x73773777, 0x12010001, 1229 0x2408, 0x000fffff, 0x000c007f, 1230 0x8a14, 0xf000003f, 0x00000007, 1231 0x8b24, 0x3fff3fff, 0x00ffcfff, 1232 0x30a04, 0x0000ff0f, 0x00000000, 1233 0x28a4c, 0x07ffffff, 0x06000000, 1234 0x4d8, 0x00000fff, 0x00000100, 1235 0x3e78, 0x00000001, 0x00000002, 1236 0xc768, 0x00000008, 0x00000008, 1237 0x8c00, 0x000000ff, 0x00000003, 1238 0x214f8, 0x01ff01ff, 0x00000002, 1239 0x21498, 0x007ff800, 0x00200000, 1240 0x2015c, 0xffffffff, 0x00000f40, 1241 0x88c4, 0x001f3ae3, 0x00000082, 1242 0x88d4, 0x0000001f, 0x00000010, 1243 0x30934, 0xffffffff, 0x00000000 1244 }; 1245 1246 static const u32 kalindi_mgcg_cgcg_init[] = 1247 { 1248 0xc420, 0xffffffff, 0xfffffffc, 1249 0x30800, 0xffffffff, 0xe0000000, 1250 0x3c2a0, 0xffffffff, 0x00000100, 1251 0x3c208, 0xffffffff, 0x00000100, 1252 0x3c2c0, 0xffffffff, 0x00000100, 1253 0x3c2c8, 0xffffffff, 0x00000100, 1254 0x3c2c4, 0xffffffff, 0x00000100, 1255 0x55e4, 0xffffffff, 0x00600100, 1256 0x3c280, 0xffffffff, 0x00000100, 1257 0x3c214, 0xffffffff, 0x06000100, 1258 0x3c220, 0xffffffff, 0x00000100, 1259 0x3c218, 0xffffffff, 0x06000100, 1260 0x3c204, 0xffffffff, 0x00000100, 1261 0x3c2e0, 0xffffffff, 0x00000100, 1262 0x3c224, 0xffffffff, 0x00000100, 1263 0x3c200, 0xffffffff, 0x00000100, 1264 0x3c230, 0xffffffff, 0x00000100, 1265 0x3c234, 0xffffffff, 0x00000100, 1266 0x3c250, 0xffffffff, 0x00000100, 1267 0x3c254, 0xffffffff, 0x00000100, 1268 0x3c258, 0xffffffff, 0x00000100, 1269 0x3c25c, 0xffffffff, 0x00000100, 1270 0x3c260, 0xffffffff, 0x00000100, 1271 0x3c27c, 0xffffffff, 0x00000100, 1272 0x3c278, 0xffffffff, 0x00000100, 1273 0x3c210, 0xffffffff, 0x06000100, 1274 0x3c290, 0xffffffff, 0x00000100, 1275 0x3c274, 0xffffffff, 0x00000100, 1276 0x3c2b4, 0xffffffff, 0x00000100, 1277 0x3c2b0, 0xffffffff, 0x00000100, 1278 0x3c270, 0xffffffff, 0x00000100, 1279 0x30800, 0xffffffff, 0xe0000000, 1280 0x3c020, 0xffffffff, 0x00010000, 1281 0x3c024, 0xffffffff, 0x00030002, 1282 0x3c028, 0xffffffff, 0x00040007, 1283 0x3c02c, 0xffffffff, 0x00060005, 1284 0x3c030, 0xffffffff, 0x00090008, 1285 0x3c034, 0xffffffff, 0x00010000, 1286 0x3c038, 0xffffffff, 0x00030002, 1287 0x3c03c, 0xffffffff, 0x00040007, 1288 0x3c040, 0xffffffff, 0x00060005, 1289 0x3c044, 0xffffffff, 0x00090008, 1290 0x3c000, 0xffffffff, 0x96e00200, 1291 0x8708, 0xffffffff, 0x00900100, 1292 0xc424, 0xffffffff, 0x0020003f, 1293 0x38, 0xffffffff, 0x0140001c, 1294 0x3c, 0x000f0000, 0x000f0000, 1295 0x220, 0xffffffff, 0xC060000C, 1296 0x224, 0xc0000fff, 0x00000100, 1297 0x20a8, 0xffffffff, 0x00000104, 1298 0x55e4, 0xff000fff, 0x00000100, 1299 0x30cc, 0xc0000fff, 0x00000104, 1300 0xc1e4, 0x00000001, 0x00000001, 1301 0xd00c, 0xff000ff0, 0x00000100, 1302 0xd80c, 0xff000ff0, 0x00000100 1303 }; 1304 1305 static void cik_init_golden_registers(struct radeon_device *rdev) 1306 { 1307 switch (rdev->family) { 1308 case CHIP_BONAIRE: 1309 radeon_program_register_sequence(rdev, 1310 bonaire_mgcg_cgcg_init, 1311 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 1312 radeon_program_register_sequence(rdev, 1313 bonaire_golden_registers, 1314 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 1315 radeon_program_register_sequence(rdev, 1316 bonaire_golden_common_registers, 1317 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 1318 radeon_program_register_sequence(rdev, 1319 bonaire_golden_spm_registers, 1320 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 1321 break; 1322 case CHIP_KABINI: 1323 radeon_program_register_sequence(rdev, 1324 kalindi_mgcg_cgcg_init, 1325 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1326 radeon_program_register_sequence(rdev, 1327 kalindi_golden_registers, 1328 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 1329 radeon_program_register_sequence(rdev, 1330 kalindi_golden_common_registers, 1331 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1332 radeon_program_register_sequence(rdev, 1333 kalindi_golden_spm_registers, 1334 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1335 break; 1336 case CHIP_KAVERI: 1337 radeon_program_register_sequence(rdev, 1338 spectre_mgcg_cgcg_init, 1339 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 1340 radeon_program_register_sequence(rdev, 1341 spectre_golden_registers, 1342 (const u32)ARRAY_SIZE(spectre_golden_registers)); 1343 radeon_program_register_sequence(rdev, 1344 spectre_golden_common_registers, 1345 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 1346 radeon_program_register_sequence(rdev, 1347 spectre_golden_spm_registers, 1348 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 1349 break; 1350 default: 1351 break; 1352 } 1353 } 1354 1355 /** 1356 * cik_get_xclk - get the xclk 1357 * 1358 * @rdev: radeon_device pointer 1359 * 1360 * Returns the reference clock used by the gfx engine 1361 * (CIK). 1362 */ 1363 u32 cik_get_xclk(struct radeon_device *rdev) 1364 { 1365 u32 reference_clock = rdev->clock.spll.reference_freq; 1366 1367 if (rdev->flags & RADEON_IS_IGP) { 1368 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 1369 return reference_clock / 2; 1370 } else { 1371 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 1372 return reference_clock / 4; 1373 } 1374 return reference_clock; 1375 } 1376 1377 /** 1378 * cik_mm_rdoorbell - read a doorbell dword 1379 * 1380 * @rdev: radeon_device pointer 1381 * @offset: byte offset into the aperture 1382 * 1383 * Returns the value in the doorbell aperture at the 1384 * requested offset (CIK). 1385 */ 1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) 1387 { 1388 if (offset < rdev->doorbell.size) { 1389 return readl(((void __iomem *)rdev->doorbell.ptr) + offset); 1390 } else { 1391 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); 1392 return 0; 1393 } 1394 } 1395 1396 /** 1397 * cik_mm_wdoorbell - write a doorbell dword 1398 * 1399 * @rdev: radeon_device pointer 1400 * @offset: byte offset into the aperture 1401 * @v: value to write 1402 * 1403 * Writes @v to the doorbell aperture at the 1404 * requested offset (CIK). 1405 */ 1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) 1407 { 1408 if (offset < rdev->doorbell.size) { 1409 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset); 1410 } else { 1411 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); 1412 } 1413 } 1414 1415 #define BONAIRE_IO_MC_REGS_SIZE 36 1416 1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 1418 { 1419 {0x00000070, 0x04400000}, 1420 {0x00000071, 0x80c01803}, 1421 {0x00000072, 0x00004004}, 1422 {0x00000073, 0x00000100}, 1423 {0x00000074, 0x00ff0000}, 1424 {0x00000075, 0x34000000}, 1425 {0x00000076, 0x08000014}, 1426 {0x00000077, 0x00cc08ec}, 1427 {0x00000078, 0x00000400}, 1428 {0x00000079, 0x00000000}, 1429 {0x0000007a, 0x04090000}, 1430 {0x0000007c, 0x00000000}, 1431 {0x0000007e, 0x4408a8e8}, 1432 {0x0000007f, 0x00000304}, 1433 {0x00000080, 0x00000000}, 1434 {0x00000082, 0x00000001}, 1435 {0x00000083, 0x00000002}, 1436 {0x00000084, 0xf3e4f400}, 1437 {0x00000085, 0x052024e3}, 1438 {0x00000087, 0x00000000}, 1439 {0x00000088, 0x01000000}, 1440 {0x0000008a, 0x1c0a0000}, 1441 {0x0000008b, 0xff010000}, 1442 {0x0000008d, 0xffffefff}, 1443 {0x0000008e, 0xfff3efff}, 1444 {0x0000008f, 0xfff3efbf}, 1445 {0x00000092, 0xf7ffffff}, 1446 {0x00000093, 0xffffff7f}, 1447 {0x00000095, 0x00101101}, 1448 {0x00000096, 0x00000fff}, 1449 {0x00000097, 0x00116fff}, 1450 {0x00000098, 0x60010000}, 1451 {0x00000099, 0x10010000}, 1452 {0x0000009a, 0x00006000}, 1453 {0x0000009b, 0x00001000}, 1454 {0x0000009f, 0x00b48000} 1455 }; 1456 1457 /** 1458 * cik_srbm_select - select specific register instances 1459 * 1460 * @rdev: radeon_device pointer 1461 * @me: selected ME (micro engine) 1462 * @pipe: pipe 1463 * @queue: queue 1464 * @vmid: VMID 1465 * 1466 * Switches the currently active registers instances. Some 1467 * registers are instanced per VMID, others are instanced per 1468 * me/pipe/queue combination. 1469 */ 1470 static void cik_srbm_select(struct radeon_device *rdev, 1471 u32 me, u32 pipe, u32 queue, u32 vmid) 1472 { 1473 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 1474 MEID(me & 0x3) | 1475 VMID(vmid & 0xf) | 1476 QUEUEID(queue & 0x7)); 1477 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 1478 } 1479 1480 /* ucode loading */ 1481 /** 1482 * ci_mc_load_microcode - load MC ucode into the hw 1483 * 1484 * @rdev: radeon_device pointer 1485 * 1486 * Load the GDDR MC ucode into the hw (CIK). 1487 * Returns 0 on success, error on failure. 1488 */ 1489 static int ci_mc_load_microcode(struct radeon_device *rdev) 1490 { 1491 const __be32 *fw_data; 1492 u32 running, blackout = 0; 1493 u32 *io_mc_regs; 1494 int i, ucode_size, regs_size; 1495 1496 if (!rdev->mc_fw) 1497 return -EINVAL; 1498 1499 switch (rdev->family) { 1500 case CHIP_BONAIRE: 1501 default: 1502 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 1503 ucode_size = CIK_MC_UCODE_SIZE; 1504 regs_size = BONAIRE_IO_MC_REGS_SIZE; 1505 break; 1506 } 1507 1508 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 1509 1510 if (running == 0) { 1511 if (running) { 1512 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); 1513 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); 1514 } 1515 1516 /* reset the engine and set to writable */ 1517 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1518 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 1519 1520 /* load mc io regs */ 1521 for (i = 0; i < regs_size; i++) { 1522 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 1523 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 1524 } 1525 /* load the MC ucode */ 1526 fw_data = (const __be32 *)rdev->mc_fw->data; 1527 for (i = 0; i < ucode_size; i++) 1528 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 1529 1530 /* put the engine back into the active state */ 1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1532 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 1533 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 1534 1535 /* wait for training to complete */ 1536 for (i = 0; i < rdev->usec_timeout; i++) { 1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 1538 break; 1539 udelay(1); 1540 } 1541 for (i = 0; i < rdev->usec_timeout; i++) { 1542 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 1543 break; 1544 udelay(1); 1545 } 1546 1547 if (running) 1548 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); 1549 } 1550 1551 return 0; 1552 } 1553 1554 /** 1555 * cik_init_microcode - load ucode images from disk 1556 * 1557 * @rdev: radeon_device pointer 1558 * 1559 * Use the firmware interface to load the ucode images into 1560 * the driver (not loaded into hw). 1561 * Returns 0 on success, error on failure. 1562 */ 1563 static int cik_init_microcode(struct radeon_device *rdev) 1564 { 1565 const char *chip_name; 1566 size_t pfp_req_size, me_req_size, ce_req_size, 1567 mec_req_size, rlc_req_size, mc_req_size, 1568 sdma_req_size, smc_req_size; 1569 char fw_name[30]; 1570 int err; 1571 1572 DRM_DEBUG("\n"); 1573 1574 switch (rdev->family) { 1575 case CHIP_BONAIRE: 1576 chip_name = "BONAIRE"; 1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1578 me_req_size = CIK_ME_UCODE_SIZE * 4; 1579 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1581 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1582 mc_req_size = CIK_MC_UCODE_SIZE * 4; 1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1584 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); 1585 break; 1586 case CHIP_KAVERI: 1587 chip_name = "KAVERI"; 1588 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1589 me_req_size = CIK_ME_UCODE_SIZE * 4; 1590 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1591 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1592 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 1593 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1594 break; 1595 case CHIP_KABINI: 1596 chip_name = "KABINI"; 1597 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1598 me_req_size = CIK_ME_UCODE_SIZE * 4; 1599 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1600 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1601 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 1602 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1603 break; 1604 default: BUG(); 1605 } 1606 1607 DRM_INFO("Loading %s Microcode\n", chip_name); 1608 1609 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); 1610 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 1611 if (err) 1612 goto out; 1613 if (rdev->pfp_fw->size != pfp_req_size) { 1614 printk(KERN_ERR 1615 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1616 rdev->pfp_fw->size, fw_name); 1617 err = -EINVAL; 1618 goto out; 1619 } 1620 1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); 1622 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 1623 if (err) 1624 goto out; 1625 if (rdev->me_fw->size != me_req_size) { 1626 printk(KERN_ERR 1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1628 rdev->me_fw->size, fw_name); 1629 err = -EINVAL; 1630 } 1631 1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); 1633 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 1634 if (err) 1635 goto out; 1636 if (rdev->ce_fw->size != ce_req_size) { 1637 printk(KERN_ERR 1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1639 rdev->ce_fw->size, fw_name); 1640 err = -EINVAL; 1641 } 1642 1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name); 1644 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 1645 if (err) 1646 goto out; 1647 if (rdev->mec_fw->size != mec_req_size) { 1648 printk(KERN_ERR 1649 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1650 rdev->mec_fw->size, fw_name); 1651 err = -EINVAL; 1652 } 1653 1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); 1655 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 1656 if (err) 1657 goto out; 1658 if (rdev->rlc_fw->size != rlc_req_size) { 1659 printk(KERN_ERR 1660 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 1661 rdev->rlc_fw->size, fw_name); 1662 err = -EINVAL; 1663 } 1664 1665 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); 1666 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 1667 if (err) 1668 goto out; 1669 if (rdev->sdma_fw->size != sdma_req_size) { 1670 printk(KERN_ERR 1671 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 1672 rdev->sdma_fw->size, fw_name); 1673 err = -EINVAL; 1674 } 1675 1676 /* No SMC, MC ucode on APUs */ 1677 if (!(rdev->flags & RADEON_IS_IGP)) { 1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); 1679 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 1680 if (err) 1681 goto out; 1682 if (rdev->mc_fw->size != mc_req_size) { 1683 printk(KERN_ERR 1684 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 1685 rdev->mc_fw->size, fw_name); 1686 err = -EINVAL; 1687 } 1688 1689 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); 1690 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); 1691 if (err) { 1692 printk(KERN_ERR 1693 "smc: error loading firmware \"%s\"\n", 1694 fw_name); 1695 release_firmware(rdev->smc_fw); 1696 rdev->smc_fw = NULL; 1697 } else if (rdev->smc_fw->size != smc_req_size) { 1698 printk(KERN_ERR 1699 "cik_smc: Bogus length %zu in firmware \"%s\"\n", 1700 rdev->smc_fw->size, fw_name); 1701 err = -EINVAL; 1702 } 1703 } 1704 1705 out: 1706 if (err) { 1707 if (err != -EINVAL) 1708 printk(KERN_ERR 1709 "cik_cp: Failed to load firmware \"%s\"\n", 1710 fw_name); 1711 release_firmware(rdev->pfp_fw); 1712 rdev->pfp_fw = NULL; 1713 release_firmware(rdev->me_fw); 1714 rdev->me_fw = NULL; 1715 release_firmware(rdev->ce_fw); 1716 rdev->ce_fw = NULL; 1717 release_firmware(rdev->rlc_fw); 1718 rdev->rlc_fw = NULL; 1719 release_firmware(rdev->mc_fw); 1720 rdev->mc_fw = NULL; 1721 release_firmware(rdev->smc_fw); 1722 rdev->smc_fw = NULL; 1723 } 1724 return err; 1725 } 1726 1727 /* 1728 * Core functions 1729 */ 1730 /** 1731 * cik_tiling_mode_table_init - init the hw tiling table 1732 * 1733 * @rdev: radeon_device pointer 1734 * 1735 * Starting with SI, the tiling setup is done globally in a 1736 * set of 32 tiling modes. Rather than selecting each set of 1737 * parameters per surface as on older asics, we just select 1738 * which index in the tiling table we want to use, and the 1739 * surface uses those parameters (CIK). 1740 */ 1741 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 1742 { 1743 const u32 num_tile_mode_states = 32; 1744 const u32 num_secondary_tile_mode_states = 16; 1745 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 1746 u32 num_pipe_configs; 1747 u32 num_rbs = rdev->config.cik.max_backends_per_se * 1748 rdev->config.cik.max_shader_engines; 1749 1750 switch (rdev->config.cik.mem_row_size_in_kb) { 1751 case 1: 1752 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 1753 break; 1754 case 2: 1755 default: 1756 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 1757 break; 1758 case 4: 1759 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 1760 break; 1761 } 1762 1763 num_pipe_configs = rdev->config.cik.max_tile_pipes; 1764 if (num_pipe_configs > 8) 1765 num_pipe_configs = 8; /* ??? */ 1766 1767 if (num_pipe_configs == 8) { 1768 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1769 switch (reg_offset) { 1770 case 0: 1771 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1773 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1775 break; 1776 case 1: 1777 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1779 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1780 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1781 break; 1782 case 2: 1783 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1785 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1787 break; 1788 case 3: 1789 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1790 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1791 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1792 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1793 break; 1794 case 4: 1795 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1796 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1797 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1798 TILE_SPLIT(split_equal_to_row_size)); 1799 break; 1800 case 5: 1801 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1803 break; 1804 case 6: 1805 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1807 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1809 break; 1810 case 7: 1811 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1813 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1814 TILE_SPLIT(split_equal_to_row_size)); 1815 break; 1816 case 8: 1817 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1818 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 1819 break; 1820 case 9: 1821 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1822 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1823 break; 1824 case 10: 1825 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1826 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1827 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1829 break; 1830 case 11: 1831 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1832 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1833 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1835 break; 1836 case 12: 1837 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1838 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1841 break; 1842 case 13: 1843 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1845 break; 1846 case 14: 1847 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1849 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1851 break; 1852 case 16: 1853 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1854 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1855 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1857 break; 1858 case 17: 1859 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1860 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1861 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1863 break; 1864 case 27: 1865 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1866 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1867 break; 1868 case 28: 1869 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1870 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1871 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1873 break; 1874 case 29: 1875 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1876 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1877 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1879 break; 1880 case 30: 1881 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1882 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1883 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1885 break; 1886 default: 1887 gb_tile_moden = 0; 1888 break; 1889 } 1890 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1891 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1892 } 1893 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1894 switch (reg_offset) { 1895 case 0: 1896 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1899 NUM_BANKS(ADDR_SURF_16_BANK)); 1900 break; 1901 case 1: 1902 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1903 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1904 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1905 NUM_BANKS(ADDR_SURF_16_BANK)); 1906 break; 1907 case 2: 1908 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1911 NUM_BANKS(ADDR_SURF_16_BANK)); 1912 break; 1913 case 3: 1914 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1917 NUM_BANKS(ADDR_SURF_16_BANK)); 1918 break; 1919 case 4: 1920 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1923 NUM_BANKS(ADDR_SURF_8_BANK)); 1924 break; 1925 case 5: 1926 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1929 NUM_BANKS(ADDR_SURF_4_BANK)); 1930 break; 1931 case 6: 1932 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1935 NUM_BANKS(ADDR_SURF_2_BANK)); 1936 break; 1937 case 8: 1938 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1941 NUM_BANKS(ADDR_SURF_16_BANK)); 1942 break; 1943 case 9: 1944 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1947 NUM_BANKS(ADDR_SURF_16_BANK)); 1948 break; 1949 case 10: 1950 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1953 NUM_BANKS(ADDR_SURF_16_BANK)); 1954 break; 1955 case 11: 1956 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1959 NUM_BANKS(ADDR_SURF_16_BANK)); 1960 break; 1961 case 12: 1962 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1965 NUM_BANKS(ADDR_SURF_8_BANK)); 1966 break; 1967 case 13: 1968 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1971 NUM_BANKS(ADDR_SURF_4_BANK)); 1972 break; 1973 case 14: 1974 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1977 NUM_BANKS(ADDR_SURF_2_BANK)); 1978 break; 1979 default: 1980 gb_tile_moden = 0; 1981 break; 1982 } 1983 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1984 } 1985 } else if (num_pipe_configs == 4) { 1986 if (num_rbs == 4) { 1987 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1988 switch (reg_offset) { 1989 case 0: 1990 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1991 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1992 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1993 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1994 break; 1995 case 1: 1996 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1998 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1999 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2000 break; 2001 case 2: 2002 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2003 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2004 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2005 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2006 break; 2007 case 3: 2008 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2009 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2010 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2012 break; 2013 case 4: 2014 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2015 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2016 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2017 TILE_SPLIT(split_equal_to_row_size)); 2018 break; 2019 case 5: 2020 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2021 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2022 break; 2023 case 6: 2024 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2025 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2026 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2027 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2028 break; 2029 case 7: 2030 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2031 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2032 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2033 TILE_SPLIT(split_equal_to_row_size)); 2034 break; 2035 case 8: 2036 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2037 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2038 break; 2039 case 9: 2040 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2041 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2042 break; 2043 case 10: 2044 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2045 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2046 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2048 break; 2049 case 11: 2050 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2051 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2052 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2054 break; 2055 case 12: 2056 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2057 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2058 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2060 break; 2061 case 13: 2062 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2064 break; 2065 case 14: 2066 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2068 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2070 break; 2071 case 16: 2072 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2073 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2074 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2076 break; 2077 case 17: 2078 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2080 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2082 break; 2083 case 27: 2084 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2085 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2086 break; 2087 case 28: 2088 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2089 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2090 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2092 break; 2093 case 29: 2094 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2095 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2096 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2098 break; 2099 case 30: 2100 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2101 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2102 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2104 break; 2105 default: 2106 gb_tile_moden = 0; 2107 break; 2108 } 2109 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2110 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2111 } 2112 } else if (num_rbs < 4) { 2113 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2114 switch (reg_offset) { 2115 case 0: 2116 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2118 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2119 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2120 break; 2121 case 1: 2122 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2124 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2126 break; 2127 case 2: 2128 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2129 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2130 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2131 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2132 break; 2133 case 3: 2134 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2135 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2136 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2137 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2138 break; 2139 case 4: 2140 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2141 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2142 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2143 TILE_SPLIT(split_equal_to_row_size)); 2144 break; 2145 case 5: 2146 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2147 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2148 break; 2149 case 6: 2150 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2151 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2152 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2154 break; 2155 case 7: 2156 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2157 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2158 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2159 TILE_SPLIT(split_equal_to_row_size)); 2160 break; 2161 case 8: 2162 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2163 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 2164 break; 2165 case 9: 2166 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2167 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2168 break; 2169 case 10: 2170 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2171 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2172 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2174 break; 2175 case 11: 2176 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2177 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2178 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2180 break; 2181 case 12: 2182 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2183 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2184 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2186 break; 2187 case 13: 2188 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2189 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2190 break; 2191 case 14: 2192 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2193 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2194 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2196 break; 2197 case 16: 2198 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2199 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2200 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2202 break; 2203 case 17: 2204 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2206 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2208 break; 2209 case 27: 2210 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2211 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2212 break; 2213 case 28: 2214 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2215 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2216 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2218 break; 2219 case 29: 2220 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2221 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2222 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2224 break; 2225 case 30: 2226 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2227 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2228 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2230 break; 2231 default: 2232 gb_tile_moden = 0; 2233 break; 2234 } 2235 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2236 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2237 } 2238 } 2239 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2240 switch (reg_offset) { 2241 case 0: 2242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2245 NUM_BANKS(ADDR_SURF_16_BANK)); 2246 break; 2247 case 1: 2248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2251 NUM_BANKS(ADDR_SURF_16_BANK)); 2252 break; 2253 case 2: 2254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2257 NUM_BANKS(ADDR_SURF_16_BANK)); 2258 break; 2259 case 3: 2260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2263 NUM_BANKS(ADDR_SURF_16_BANK)); 2264 break; 2265 case 4: 2266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2269 NUM_BANKS(ADDR_SURF_16_BANK)); 2270 break; 2271 case 5: 2272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2275 NUM_BANKS(ADDR_SURF_8_BANK)); 2276 break; 2277 case 6: 2278 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2281 NUM_BANKS(ADDR_SURF_4_BANK)); 2282 break; 2283 case 8: 2284 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2287 NUM_BANKS(ADDR_SURF_16_BANK)); 2288 break; 2289 case 9: 2290 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2293 NUM_BANKS(ADDR_SURF_16_BANK)); 2294 break; 2295 case 10: 2296 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2299 NUM_BANKS(ADDR_SURF_16_BANK)); 2300 break; 2301 case 11: 2302 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2305 NUM_BANKS(ADDR_SURF_16_BANK)); 2306 break; 2307 case 12: 2308 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2311 NUM_BANKS(ADDR_SURF_16_BANK)); 2312 break; 2313 case 13: 2314 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2317 NUM_BANKS(ADDR_SURF_8_BANK)); 2318 break; 2319 case 14: 2320 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2323 NUM_BANKS(ADDR_SURF_4_BANK)); 2324 break; 2325 default: 2326 gb_tile_moden = 0; 2327 break; 2328 } 2329 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2330 } 2331 } else if (num_pipe_configs == 2) { 2332 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2333 switch (reg_offset) { 2334 case 0: 2335 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2337 PIPE_CONFIG(ADDR_SURF_P2) | 2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2339 break; 2340 case 1: 2341 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2343 PIPE_CONFIG(ADDR_SURF_P2) | 2344 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2345 break; 2346 case 2: 2347 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2349 PIPE_CONFIG(ADDR_SURF_P2) | 2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2351 break; 2352 case 3: 2353 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2355 PIPE_CONFIG(ADDR_SURF_P2) | 2356 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2357 break; 2358 case 4: 2359 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2360 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2361 PIPE_CONFIG(ADDR_SURF_P2) | 2362 TILE_SPLIT(split_equal_to_row_size)); 2363 break; 2364 case 5: 2365 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2367 break; 2368 case 6: 2369 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2371 PIPE_CONFIG(ADDR_SURF_P2) | 2372 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2373 break; 2374 case 7: 2375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2376 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2377 PIPE_CONFIG(ADDR_SURF_P2) | 2378 TILE_SPLIT(split_equal_to_row_size)); 2379 break; 2380 case 8: 2381 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); 2382 break; 2383 case 9: 2384 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2386 break; 2387 case 10: 2388 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2389 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2390 PIPE_CONFIG(ADDR_SURF_P2) | 2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2392 break; 2393 case 11: 2394 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2395 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2396 PIPE_CONFIG(ADDR_SURF_P2) | 2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2398 break; 2399 case 12: 2400 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2401 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2402 PIPE_CONFIG(ADDR_SURF_P2) | 2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2404 break; 2405 case 13: 2406 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2408 break; 2409 case 14: 2410 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2411 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2412 PIPE_CONFIG(ADDR_SURF_P2) | 2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2414 break; 2415 case 16: 2416 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2418 PIPE_CONFIG(ADDR_SURF_P2) | 2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2420 break; 2421 case 17: 2422 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2423 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2424 PIPE_CONFIG(ADDR_SURF_P2) | 2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2426 break; 2427 case 27: 2428 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2430 break; 2431 case 28: 2432 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2434 PIPE_CONFIG(ADDR_SURF_P2) | 2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2436 break; 2437 case 29: 2438 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2439 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2440 PIPE_CONFIG(ADDR_SURF_P2) | 2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2442 break; 2443 case 30: 2444 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2446 PIPE_CONFIG(ADDR_SURF_P2) | 2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2448 break; 2449 default: 2450 gb_tile_moden = 0; 2451 break; 2452 } 2453 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2454 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2455 } 2456 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2457 switch (reg_offset) { 2458 case 0: 2459 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2462 NUM_BANKS(ADDR_SURF_16_BANK)); 2463 break; 2464 case 1: 2465 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2468 NUM_BANKS(ADDR_SURF_16_BANK)); 2469 break; 2470 case 2: 2471 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2474 NUM_BANKS(ADDR_SURF_16_BANK)); 2475 break; 2476 case 3: 2477 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2480 NUM_BANKS(ADDR_SURF_16_BANK)); 2481 break; 2482 case 4: 2483 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2486 NUM_BANKS(ADDR_SURF_16_BANK)); 2487 break; 2488 case 5: 2489 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2492 NUM_BANKS(ADDR_SURF_16_BANK)); 2493 break; 2494 case 6: 2495 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2498 NUM_BANKS(ADDR_SURF_8_BANK)); 2499 break; 2500 case 8: 2501 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2504 NUM_BANKS(ADDR_SURF_16_BANK)); 2505 break; 2506 case 9: 2507 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2510 NUM_BANKS(ADDR_SURF_16_BANK)); 2511 break; 2512 case 10: 2513 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2516 NUM_BANKS(ADDR_SURF_16_BANK)); 2517 break; 2518 case 11: 2519 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2522 NUM_BANKS(ADDR_SURF_16_BANK)); 2523 break; 2524 case 12: 2525 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2528 NUM_BANKS(ADDR_SURF_16_BANK)); 2529 break; 2530 case 13: 2531 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2534 NUM_BANKS(ADDR_SURF_16_BANK)); 2535 break; 2536 case 14: 2537 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2540 NUM_BANKS(ADDR_SURF_8_BANK)); 2541 break; 2542 default: 2543 gb_tile_moden = 0; 2544 break; 2545 } 2546 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2547 } 2548 } else 2549 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 2550 } 2551 2552 /** 2553 * cik_select_se_sh - select which SE, SH to address 2554 * 2555 * @rdev: radeon_device pointer 2556 * @se_num: shader engine to address 2557 * @sh_num: sh block to address 2558 * 2559 * Select which SE, SH combinations to address. Certain 2560 * registers are instanced per SE or SH. 0xffffffff means 2561 * broadcast to all SEs or SHs (CIK). 2562 */ 2563 static void cik_select_se_sh(struct radeon_device *rdev, 2564 u32 se_num, u32 sh_num) 2565 { 2566 u32 data = INSTANCE_BROADCAST_WRITES; 2567 2568 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 2569 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 2570 else if (se_num == 0xffffffff) 2571 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 2572 else if (sh_num == 0xffffffff) 2573 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 2574 else 2575 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 2576 WREG32(GRBM_GFX_INDEX, data); 2577 } 2578 2579 /** 2580 * cik_create_bitmask - create a bitmask 2581 * 2582 * @bit_width: length of the mask 2583 * 2584 * create a variable length bit mask (CIK). 2585 * Returns the bitmask. 2586 */ 2587 static u32 cik_create_bitmask(u32 bit_width) 2588 { 2589 u32 i, mask = 0; 2590 2591 for (i = 0; i < bit_width; i++) { 2592 mask <<= 1; 2593 mask |= 1; 2594 } 2595 return mask; 2596 } 2597 2598 /** 2599 * cik_select_se_sh - select which SE, SH to address 2600 * 2601 * @rdev: radeon_device pointer 2602 * @max_rb_num: max RBs (render backends) for the asic 2603 * @se_num: number of SEs (shader engines) for the asic 2604 * @sh_per_se: number of SH blocks per SE for the asic 2605 * 2606 * Calculates the bitmask of disabled RBs (CIK). 2607 * Returns the disabled RB bitmask. 2608 */ 2609 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 2610 u32 max_rb_num, u32 se_num, 2611 u32 sh_per_se) 2612 { 2613 u32 data, mask; 2614 2615 data = RREG32(CC_RB_BACKEND_DISABLE); 2616 if (data & 1) 2617 data &= BACKEND_DISABLE_MASK; 2618 else 2619 data = 0; 2620 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 2621 2622 data >>= BACKEND_DISABLE_SHIFT; 2623 2624 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se); 2625 2626 return data & mask; 2627 } 2628 2629 /** 2630 * cik_setup_rb - setup the RBs on the asic 2631 * 2632 * @rdev: radeon_device pointer 2633 * @se_num: number of SEs (shader engines) for the asic 2634 * @sh_per_se: number of SH blocks per SE for the asic 2635 * @max_rb_num: max RBs (render backends) for the asic 2636 * 2637 * Configures per-SE/SH RB registers (CIK). 2638 */ 2639 static void cik_setup_rb(struct radeon_device *rdev, 2640 u32 se_num, u32 sh_per_se, 2641 u32 max_rb_num) 2642 { 2643 int i, j; 2644 u32 data, mask; 2645 u32 disabled_rbs = 0; 2646 u32 enabled_rbs = 0; 2647 2648 for (i = 0; i < se_num; i++) { 2649 for (j = 0; j < sh_per_se; j++) { 2650 cik_select_se_sh(rdev, i, j); 2651 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); 2652 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 2653 } 2654 } 2655 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2656 2657 mask = 1; 2658 for (i = 0; i < max_rb_num; i++) { 2659 if (!(disabled_rbs & mask)) 2660 enabled_rbs |= mask; 2661 mask <<= 1; 2662 } 2663 2664 for (i = 0; i < se_num; i++) { 2665 cik_select_se_sh(rdev, i, 0xffffffff); 2666 data = 0; 2667 for (j = 0; j < sh_per_se; j++) { 2668 switch (enabled_rbs & 3) { 2669 case 1: 2670 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 2671 break; 2672 case 2: 2673 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 2674 break; 2675 case 3: 2676 default: 2677 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 2678 break; 2679 } 2680 enabled_rbs >>= 2; 2681 } 2682 WREG32(PA_SC_RASTER_CONFIG, data); 2683 } 2684 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2685 } 2686 2687 /** 2688 * cik_gpu_init - setup the 3D engine 2689 * 2690 * @rdev: radeon_device pointer 2691 * 2692 * Configures the 3D engine and tiling configuration 2693 * registers so that the 3D engine is usable. 2694 */ 2695 static void cik_gpu_init(struct radeon_device *rdev) 2696 { 2697 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 2698 u32 mc_shared_chmap, mc_arb_ramcfg; 2699 u32 hdp_host_path_cntl; 2700 u32 tmp; 2701 int i, j; 2702 2703 switch (rdev->family) { 2704 case CHIP_BONAIRE: 2705 rdev->config.cik.max_shader_engines = 2; 2706 rdev->config.cik.max_tile_pipes = 4; 2707 rdev->config.cik.max_cu_per_sh = 7; 2708 rdev->config.cik.max_sh_per_se = 1; 2709 rdev->config.cik.max_backends_per_se = 2; 2710 rdev->config.cik.max_texture_channel_caches = 4; 2711 rdev->config.cik.max_gprs = 256; 2712 rdev->config.cik.max_gs_threads = 32; 2713 rdev->config.cik.max_hw_contexts = 8; 2714 2715 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2716 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2717 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2718 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2719 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2720 break; 2721 case CHIP_KAVERI: 2722 rdev->config.cik.max_shader_engines = 1; 2723 rdev->config.cik.max_tile_pipes = 4; 2724 if ((rdev->pdev->device == 0x1304) || 2725 (rdev->pdev->device == 0x1305) || 2726 (rdev->pdev->device == 0x130C) || 2727 (rdev->pdev->device == 0x130F) || 2728 (rdev->pdev->device == 0x1310) || 2729 (rdev->pdev->device == 0x1311) || 2730 (rdev->pdev->device == 0x131C)) { 2731 rdev->config.cik.max_cu_per_sh = 8; 2732 rdev->config.cik.max_backends_per_se = 2; 2733 } else if ((rdev->pdev->device == 0x1309) || 2734 (rdev->pdev->device == 0x130A) || 2735 (rdev->pdev->device == 0x130D) || 2736 (rdev->pdev->device == 0x1313) || 2737 (rdev->pdev->device == 0x131D)) { 2738 rdev->config.cik.max_cu_per_sh = 6; 2739 rdev->config.cik.max_backends_per_se = 2; 2740 } else if ((rdev->pdev->device == 0x1306) || 2741 (rdev->pdev->device == 0x1307) || 2742 (rdev->pdev->device == 0x130B) || 2743 (rdev->pdev->device == 0x130E) || 2744 (rdev->pdev->device == 0x1315) || 2745 (rdev->pdev->device == 0x131B)) { 2746 rdev->config.cik.max_cu_per_sh = 4; 2747 rdev->config.cik.max_backends_per_se = 1; 2748 } else { 2749 rdev->config.cik.max_cu_per_sh = 3; 2750 rdev->config.cik.max_backends_per_se = 1; 2751 } 2752 rdev->config.cik.max_sh_per_se = 1; 2753 rdev->config.cik.max_texture_channel_caches = 4; 2754 rdev->config.cik.max_gprs = 256; 2755 rdev->config.cik.max_gs_threads = 16; 2756 rdev->config.cik.max_hw_contexts = 8; 2757 2758 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2759 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2760 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2761 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2762 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2763 break; 2764 case CHIP_KABINI: 2765 default: 2766 rdev->config.cik.max_shader_engines = 1; 2767 rdev->config.cik.max_tile_pipes = 2; 2768 rdev->config.cik.max_cu_per_sh = 2; 2769 rdev->config.cik.max_sh_per_se = 1; 2770 rdev->config.cik.max_backends_per_se = 1; 2771 rdev->config.cik.max_texture_channel_caches = 2; 2772 rdev->config.cik.max_gprs = 256; 2773 rdev->config.cik.max_gs_threads = 16; 2774 rdev->config.cik.max_hw_contexts = 8; 2775 2776 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2777 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2778 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2779 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2780 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2781 break; 2782 } 2783 2784 /* Initialize HDP */ 2785 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 2786 WREG32((0x2c14 + j), 0x00000000); 2787 WREG32((0x2c18 + j), 0x00000000); 2788 WREG32((0x2c1c + j), 0x00000000); 2789 WREG32((0x2c20 + j), 0x00000000); 2790 WREG32((0x2c24 + j), 0x00000000); 2791 } 2792 2793 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 2794 2795 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 2796 2797 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 2798 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 2799 2800 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 2801 rdev->config.cik.mem_max_burst_length_bytes = 256; 2802 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 2803 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 2804 if (rdev->config.cik.mem_row_size_in_kb > 4) 2805 rdev->config.cik.mem_row_size_in_kb = 4; 2806 /* XXX use MC settings? */ 2807 rdev->config.cik.shader_engine_tile_size = 32; 2808 rdev->config.cik.num_gpus = 1; 2809 rdev->config.cik.multi_gpu_tile_size = 64; 2810 2811 /* fix up row size */ 2812 gb_addr_config &= ~ROW_SIZE_MASK; 2813 switch (rdev->config.cik.mem_row_size_in_kb) { 2814 case 1: 2815 default: 2816 gb_addr_config |= ROW_SIZE(0); 2817 break; 2818 case 2: 2819 gb_addr_config |= ROW_SIZE(1); 2820 break; 2821 case 4: 2822 gb_addr_config |= ROW_SIZE(2); 2823 break; 2824 } 2825 2826 /* setup tiling info dword. gb_addr_config is not adequate since it does 2827 * not have bank info, so create a custom tiling dword. 2828 * bits 3:0 num_pipes 2829 * bits 7:4 num_banks 2830 * bits 11:8 group_size 2831 * bits 15:12 row_size 2832 */ 2833 rdev->config.cik.tile_config = 0; 2834 switch (rdev->config.cik.num_tile_pipes) { 2835 case 1: 2836 rdev->config.cik.tile_config |= (0 << 0); 2837 break; 2838 case 2: 2839 rdev->config.cik.tile_config |= (1 << 0); 2840 break; 2841 case 4: 2842 rdev->config.cik.tile_config |= (2 << 0); 2843 break; 2844 case 8: 2845 default: 2846 /* XXX what about 12? */ 2847 rdev->config.cik.tile_config |= (3 << 0); 2848 break; 2849 } 2850 rdev->config.cik.tile_config |= 2851 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; 2852 rdev->config.cik.tile_config |= 2853 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 2854 rdev->config.cik.tile_config |= 2855 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 2856 2857 WREG32(GB_ADDR_CONFIG, gb_addr_config); 2858 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 2859 WREG32(DMIF_ADDR_CALC, gb_addr_config); 2860 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 2861 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 2862 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 2863 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 2864 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 2865 2866 cik_tiling_mode_table_init(rdev); 2867 2868 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 2869 rdev->config.cik.max_sh_per_se, 2870 rdev->config.cik.max_backends_per_se); 2871 2872 /* set HW defaults for 3D engine */ 2873 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 2874 2875 WREG32(SX_DEBUG_1, 0x20); 2876 2877 WREG32(TA_CNTL_AUX, 0x00010000); 2878 2879 tmp = RREG32(SPI_CONFIG_CNTL); 2880 tmp |= 0x03000000; 2881 WREG32(SPI_CONFIG_CNTL, tmp); 2882 2883 WREG32(SQ_CONFIG, 1); 2884 2885 WREG32(DB_DEBUG, 0); 2886 2887 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 2888 tmp |= 0x00000400; 2889 WREG32(DB_DEBUG2, tmp); 2890 2891 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 2892 tmp |= 0x00020200; 2893 WREG32(DB_DEBUG3, tmp); 2894 2895 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 2896 tmp |= 0x00018208; 2897 WREG32(CB_HW_CONTROL, tmp); 2898 2899 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 2900 2901 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 2902 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 2903 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 2904 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 2905 2906 WREG32(VGT_NUM_INSTANCES, 1); 2907 2908 WREG32(CP_PERFMON_CNTL, 0); 2909 2910 WREG32(SQ_CONFIG, 0); 2911 2912 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 2913 FORCE_EOV_MAX_REZ_CNT(255))); 2914 2915 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 2916 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 2917 2918 WREG32(VGT_GS_VERTEX_REUSE, 16); 2919 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 2920 2921 tmp = RREG32(HDP_MISC_CNTL); 2922 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 2923 WREG32(HDP_MISC_CNTL, tmp); 2924 2925 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 2926 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 2927 2928 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 2929 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 2930 2931 udelay(50); 2932 } 2933 2934 /* 2935 * GPU scratch registers helpers function. 2936 */ 2937 /** 2938 * cik_scratch_init - setup driver info for CP scratch regs 2939 * 2940 * @rdev: radeon_device pointer 2941 * 2942 * Set up the number and offset of the CP scratch registers. 2943 * NOTE: use of CP scratch registers is a legacy inferface and 2944 * is not used by default on newer asics (r6xx+). On newer asics, 2945 * memory buffers are used for fences rather than scratch regs. 2946 */ 2947 static void cik_scratch_init(struct radeon_device *rdev) 2948 { 2949 int i; 2950 2951 rdev->scratch.num_reg = 7; 2952 rdev->scratch.reg_base = SCRATCH_REG0; 2953 for (i = 0; i < rdev->scratch.num_reg; i++) { 2954 rdev->scratch.free[i] = true; 2955 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 2956 } 2957 } 2958 2959 /** 2960 * cik_ring_test - basic gfx ring test 2961 * 2962 * @rdev: radeon_device pointer 2963 * @ring: radeon_ring structure holding ring information 2964 * 2965 * Allocate a scratch register and write to it using the gfx ring (CIK). 2966 * Provides a basic gfx ring test to verify that the ring is working. 2967 * Used by cik_cp_gfx_resume(); 2968 * Returns 0 on success, error on failure. 2969 */ 2970 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 2971 { 2972 uint32_t scratch; 2973 uint32_t tmp = 0; 2974 unsigned i; 2975 int r; 2976 2977 r = radeon_scratch_get(rdev, &scratch); 2978 if (r) { 2979 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 2980 return r; 2981 } 2982 WREG32(scratch, 0xCAFEDEAD); 2983 r = radeon_ring_lock(rdev, ring, 3); 2984 if (r) { 2985 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 2986 radeon_scratch_free(rdev, scratch); 2987 return r; 2988 } 2989 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2990 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 2991 radeon_ring_write(ring, 0xDEADBEEF); 2992 radeon_ring_unlock_commit(rdev, ring); 2993 2994 for (i = 0; i < rdev->usec_timeout; i++) { 2995 tmp = RREG32(scratch); 2996 if (tmp == 0xDEADBEEF) 2997 break; 2998 DRM_UDELAY(1); 2999 } 3000 if (i < rdev->usec_timeout) { 3001 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 3002 } else { 3003 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 3004 ring->idx, scratch, tmp); 3005 r = -EINVAL; 3006 } 3007 radeon_scratch_free(rdev, scratch); 3008 return r; 3009 } 3010 3011 /** 3012 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 3013 * 3014 * @rdev: radeon_device pointer 3015 * @fence: radeon fence object 3016 * 3017 * Emits a fence sequnce number on the gfx ring and flushes 3018 * GPU caches. 3019 */ 3020 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 3021 struct radeon_fence *fence) 3022 { 3023 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3024 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3025 3026 /* EVENT_WRITE_EOP - flush caches, send int */ 3027 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 3028 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3029 EOP_TC_ACTION_EN | 3030 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3031 EVENT_INDEX(5))); 3032 radeon_ring_write(ring, addr & 0xfffffffc); 3033 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 3034 radeon_ring_write(ring, fence->seq); 3035 radeon_ring_write(ring, 0); 3036 /* HDP flush */ 3037 /* We should be using the new WAIT_REG_MEM special op packet here 3038 * but it causes the CP to hang 3039 */ 3040 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3041 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3042 WRITE_DATA_DST_SEL(0))); 3043 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3044 radeon_ring_write(ring, 0); 3045 radeon_ring_write(ring, 0); 3046 } 3047 3048 /** 3049 * cik_fence_compute_ring_emit - emit a fence on the compute ring 3050 * 3051 * @rdev: radeon_device pointer 3052 * @fence: radeon fence object 3053 * 3054 * Emits a fence sequnce number on the compute ring and flushes 3055 * GPU caches. 3056 */ 3057 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 3058 struct radeon_fence *fence) 3059 { 3060 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3061 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3062 3063 /* RELEASE_MEM - flush caches, send int */ 3064 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 3065 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3066 EOP_TC_ACTION_EN | 3067 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3068 EVENT_INDEX(5))); 3069 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 3070 radeon_ring_write(ring, addr & 0xfffffffc); 3071 radeon_ring_write(ring, upper_32_bits(addr)); 3072 radeon_ring_write(ring, fence->seq); 3073 radeon_ring_write(ring, 0); 3074 /* HDP flush */ 3075 /* We should be using the new WAIT_REG_MEM special op packet here 3076 * but it causes the CP to hang 3077 */ 3078 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3079 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3080 WRITE_DATA_DST_SEL(0))); 3081 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3082 radeon_ring_write(ring, 0); 3083 radeon_ring_write(ring, 0); 3084 } 3085 3086 void cik_semaphore_ring_emit(struct radeon_device *rdev, 3087 struct radeon_ring *ring, 3088 struct radeon_semaphore *semaphore, 3089 bool emit_wait) 3090 { 3091 uint64_t addr = semaphore->gpu_addr; 3092 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 3093 3094 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 3095 radeon_ring_write(ring, addr & 0xffffffff); 3096 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 3097 } 3098 3099 /* 3100 * IB stuff 3101 */ 3102 /** 3103 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 3104 * 3105 * @rdev: radeon_device pointer 3106 * @ib: radeon indirect buffer object 3107 * 3108 * Emits an DE (drawing engine) or CE (constant engine) IB 3109 * on the gfx ring. IBs are usually generated by userspace 3110 * acceleration drivers and submitted to the kernel for 3111 * sheduling on the ring. This function schedules the IB 3112 * on the gfx ring for execution by the GPU. 3113 */ 3114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3115 { 3116 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3117 u32 header, control = INDIRECT_BUFFER_VALID; 3118 3119 if (ib->is_const_ib) { 3120 /* set switch buffer packet before const IB */ 3121 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3122 radeon_ring_write(ring, 0); 3123 3124 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3125 } else { 3126 u32 next_rptr; 3127 if (ring->rptr_save_reg) { 3128 next_rptr = ring->wptr + 3 + 4; 3129 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3130 radeon_ring_write(ring, ((ring->rptr_save_reg - 3131 PACKET3_SET_UCONFIG_REG_START) >> 2)); 3132 radeon_ring_write(ring, next_rptr); 3133 } else if (rdev->wb.enabled) { 3134 next_rptr = ring->wptr + 5 + 4; 3135 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3136 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 3137 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3138 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 3139 radeon_ring_write(ring, next_rptr); 3140 } 3141 3142 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3143 } 3144 3145 control |= ib->length_dw | 3146 (ib->vm ? (ib->vm->id << 24) : 0); 3147 3148 radeon_ring_write(ring, header); 3149 radeon_ring_write(ring, 3150 #ifdef __BIG_ENDIAN 3151 (2 << 0) | 3152 #endif 3153 (ib->gpu_addr & 0xFFFFFFFC)); 3154 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 3155 radeon_ring_write(ring, control); 3156 } 3157 3158 /** 3159 * cik_ib_test - basic gfx ring IB test 3160 * 3161 * @rdev: radeon_device pointer 3162 * @ring: radeon_ring structure holding ring information 3163 * 3164 * Allocate an IB and execute it on the gfx ring (CIK). 3165 * Provides a basic gfx ring test to verify that IBs are working. 3166 * Returns 0 on success, error on failure. 3167 */ 3168 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3169 { 3170 struct radeon_ib ib; 3171 uint32_t scratch; 3172 uint32_t tmp = 0; 3173 unsigned i; 3174 int r; 3175 3176 r = radeon_scratch_get(rdev, &scratch); 3177 if (r) { 3178 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3179 return r; 3180 } 3181 WREG32(scratch, 0xCAFEDEAD); 3182 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3183 if (r) { 3184 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3185 return r; 3186 } 3187 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3188 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 3189 ib.ptr[2] = 0xDEADBEEF; 3190 ib.length_dw = 3; 3191 r = radeon_ib_schedule(rdev, &ib, NULL); 3192 if (r) { 3193 radeon_scratch_free(rdev, scratch); 3194 radeon_ib_free(rdev, &ib); 3195 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3196 return r; 3197 } 3198 r = radeon_fence_wait(ib.fence, false); 3199 if (r) { 3200 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3201 return r; 3202 } 3203 for (i = 0; i < rdev->usec_timeout; i++) { 3204 tmp = RREG32(scratch); 3205 if (tmp == 0xDEADBEEF) 3206 break; 3207 DRM_UDELAY(1); 3208 } 3209 if (i < rdev->usec_timeout) { 3210 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3211 } else { 3212 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3213 scratch, tmp); 3214 r = -EINVAL; 3215 } 3216 radeon_scratch_free(rdev, scratch); 3217 radeon_ib_free(rdev, &ib); 3218 return r; 3219 } 3220 3221 /* 3222 * CP. 3223 * On CIK, gfx and compute now have independant command processors. 3224 * 3225 * GFX 3226 * Gfx consists of a single ring and can process both gfx jobs and 3227 * compute jobs. The gfx CP consists of three microengines (ME): 3228 * PFP - Pre-Fetch Parser 3229 * ME - Micro Engine 3230 * CE - Constant Engine 3231 * The PFP and ME make up what is considered the Drawing Engine (DE). 3232 * The CE is an asynchronous engine used for updating buffer desciptors 3233 * used by the DE so that they can be loaded into cache in parallel 3234 * while the DE is processing state update packets. 3235 * 3236 * Compute 3237 * The compute CP consists of two microengines (ME): 3238 * MEC1 - Compute MicroEngine 1 3239 * MEC2 - Compute MicroEngine 2 3240 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 3241 * The queues are exposed to userspace and are programmed directly 3242 * by the compute runtime. 3243 */ 3244 /** 3245 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 3246 * 3247 * @rdev: radeon_device pointer 3248 * @enable: enable or disable the MEs 3249 * 3250 * Halts or unhalts the gfx MEs. 3251 */ 3252 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 3253 { 3254 if (enable) 3255 WREG32(CP_ME_CNTL, 0); 3256 else { 3257 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 3258 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3259 } 3260 udelay(50); 3261 } 3262 3263 /** 3264 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 3265 * 3266 * @rdev: radeon_device pointer 3267 * 3268 * Loads the gfx PFP, ME, and CE ucode. 3269 * Returns 0 for success, -EINVAL if the ucode is not available. 3270 */ 3271 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 3272 { 3273 const __be32 *fw_data; 3274 int i; 3275 3276 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 3277 return -EINVAL; 3278 3279 cik_cp_gfx_enable(rdev, false); 3280 3281 /* PFP */ 3282 fw_data = (const __be32 *)rdev->pfp_fw->data; 3283 WREG32(CP_PFP_UCODE_ADDR, 0); 3284 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 3285 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 3286 WREG32(CP_PFP_UCODE_ADDR, 0); 3287 3288 /* CE */ 3289 fw_data = (const __be32 *)rdev->ce_fw->data; 3290 WREG32(CP_CE_UCODE_ADDR, 0); 3291 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 3292 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 3293 WREG32(CP_CE_UCODE_ADDR, 0); 3294 3295 /* ME */ 3296 fw_data = (const __be32 *)rdev->me_fw->data; 3297 WREG32(CP_ME_RAM_WADDR, 0); 3298 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 3299 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 3300 WREG32(CP_ME_RAM_WADDR, 0); 3301 3302 WREG32(CP_PFP_UCODE_ADDR, 0); 3303 WREG32(CP_CE_UCODE_ADDR, 0); 3304 WREG32(CP_ME_RAM_WADDR, 0); 3305 WREG32(CP_ME_RAM_RADDR, 0); 3306 return 0; 3307 } 3308 3309 /** 3310 * cik_cp_gfx_start - start the gfx ring 3311 * 3312 * @rdev: radeon_device pointer 3313 * 3314 * Enables the ring and loads the clear state context and other 3315 * packets required to init the ring. 3316 * Returns 0 for success, error for failure. 3317 */ 3318 static int cik_cp_gfx_start(struct radeon_device *rdev) 3319 { 3320 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3321 int r, i; 3322 3323 /* init the CP */ 3324 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 3325 WREG32(CP_ENDIAN_SWAP, 0); 3326 WREG32(CP_DEVICE_ID, 1); 3327 3328 cik_cp_gfx_enable(rdev, true); 3329 3330 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 3331 if (r) { 3332 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3333 return r; 3334 } 3335 3336 /* init the CE partitions. CE only used for gfx on CIK */ 3337 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3338 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3339 radeon_ring_write(ring, 0xc000); 3340 radeon_ring_write(ring, 0xc000); 3341 3342 /* setup clear context state */ 3343 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3344 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3345 3346 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3347 radeon_ring_write(ring, 0x80000000); 3348 radeon_ring_write(ring, 0x80000000); 3349 3350 for (i = 0; i < cik_default_size; i++) 3351 radeon_ring_write(ring, cik_default_state[i]); 3352 3353 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3354 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3355 3356 /* set clear context state */ 3357 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3358 radeon_ring_write(ring, 0); 3359 3360 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 3361 radeon_ring_write(ring, 0x00000316); 3362 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 3363 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 3364 3365 radeon_ring_unlock_commit(rdev, ring); 3366 3367 return 0; 3368 } 3369 3370 /** 3371 * cik_cp_gfx_fini - stop the gfx ring 3372 * 3373 * @rdev: radeon_device pointer 3374 * 3375 * Stop the gfx ring and tear down the driver ring 3376 * info. 3377 */ 3378 static void cik_cp_gfx_fini(struct radeon_device *rdev) 3379 { 3380 cik_cp_gfx_enable(rdev, false); 3381 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3382 } 3383 3384 /** 3385 * cik_cp_gfx_resume - setup the gfx ring buffer registers 3386 * 3387 * @rdev: radeon_device pointer 3388 * 3389 * Program the location and size of the gfx ring buffer 3390 * and test it to make sure it's working. 3391 * Returns 0 for success, error for failure. 3392 */ 3393 static int cik_cp_gfx_resume(struct radeon_device *rdev) 3394 { 3395 struct radeon_ring *ring; 3396 u32 tmp; 3397 u32 rb_bufsz; 3398 u64 rb_addr; 3399 int r; 3400 3401 WREG32(CP_SEM_WAIT_TIMER, 0x0); 3402 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 3403 3404 /* Set the write pointer delay */ 3405 WREG32(CP_RB_WPTR_DELAY, 0); 3406 3407 /* set the RB to use vmid 0 */ 3408 WREG32(CP_RB_VMID, 0); 3409 3410 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 3411 3412 /* ring 0 - compute and gfx */ 3413 /* Set ring buffer size */ 3414 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3415 rb_bufsz = order_base_2(ring->ring_size / 8); 3416 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 3417 #ifdef __BIG_ENDIAN 3418 tmp |= BUF_SWAP_32BIT; 3419 #endif 3420 WREG32(CP_RB0_CNTL, tmp); 3421 3422 /* Initialize the ring buffer's read and write pointers */ 3423 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 3424 ring->wptr = 0; 3425 WREG32(CP_RB0_WPTR, ring->wptr); 3426 3427 /* set the wb address wether it's enabled or not */ 3428 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 3429 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 3430 3431 /* scratch register shadowing is no longer supported */ 3432 WREG32(SCRATCH_UMSK, 0); 3433 3434 if (!rdev->wb.enabled) 3435 tmp |= RB_NO_UPDATE; 3436 3437 mdelay(1); 3438 WREG32(CP_RB0_CNTL, tmp); 3439 3440 rb_addr = ring->gpu_addr >> 8; 3441 WREG32(CP_RB0_BASE, rb_addr); 3442 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3443 3444 ring->rptr = RREG32(CP_RB0_RPTR); 3445 3446 /* start the ring */ 3447 cik_cp_gfx_start(rdev); 3448 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 3449 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3450 if (r) { 3451 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3452 return r; 3453 } 3454 return 0; 3455 } 3456 3457 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, 3458 struct radeon_ring *ring) 3459 { 3460 u32 rptr; 3461 3462 3463 3464 if (rdev->wb.enabled) { 3465 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); 3466 } else { 3467 mutex_lock(&rdev->srbm_mutex); 3468 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3469 rptr = RREG32(CP_HQD_PQ_RPTR); 3470 cik_srbm_select(rdev, 0, 0, 0, 0); 3471 mutex_unlock(&rdev->srbm_mutex); 3472 } 3473 3474 return rptr; 3475 } 3476 3477 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, 3478 struct radeon_ring *ring) 3479 { 3480 u32 wptr; 3481 3482 if (rdev->wb.enabled) { 3483 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); 3484 } else { 3485 mutex_lock(&rdev->srbm_mutex); 3486 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3487 wptr = RREG32(CP_HQD_PQ_WPTR); 3488 cik_srbm_select(rdev, 0, 0, 0, 0); 3489 mutex_unlock(&rdev->srbm_mutex); 3490 } 3491 3492 return wptr; 3493 } 3494 3495 void cik_compute_ring_set_wptr(struct radeon_device *rdev, 3496 struct radeon_ring *ring) 3497 { 3498 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); 3499 WDOORBELL32(ring->doorbell_offset, ring->wptr); 3500 } 3501 3502 /** 3503 * cik_cp_compute_enable - enable/disable the compute CP MEs 3504 * 3505 * @rdev: radeon_device pointer 3506 * @enable: enable or disable the MEs 3507 * 3508 * Halts or unhalts the compute MEs. 3509 */ 3510 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 3511 { 3512 if (enable) 3513 WREG32(CP_MEC_CNTL, 0); 3514 else 3515 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 3516 udelay(50); 3517 } 3518 3519 /** 3520 * cik_cp_compute_load_microcode - load the compute CP ME ucode 3521 * 3522 * @rdev: radeon_device pointer 3523 * 3524 * Loads the compute MEC1&2 ucode. 3525 * Returns 0 for success, -EINVAL if the ucode is not available. 3526 */ 3527 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 3528 { 3529 const __be32 *fw_data; 3530 int i; 3531 3532 if (!rdev->mec_fw) 3533 return -EINVAL; 3534 3535 cik_cp_compute_enable(rdev, false); 3536 3537 /* MEC1 */ 3538 fw_data = (const __be32 *)rdev->mec_fw->data; 3539 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3540 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3541 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 3542 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3543 3544 if (rdev->family == CHIP_KAVERI) { 3545 /* MEC2 */ 3546 fw_data = (const __be32 *)rdev->mec_fw->data; 3547 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3548 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3549 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 3550 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3551 } 3552 3553 return 0; 3554 } 3555 3556 /** 3557 * cik_cp_compute_start - start the compute queues 3558 * 3559 * @rdev: radeon_device pointer 3560 * 3561 * Enable the compute queues. 3562 * Returns 0 for success, error for failure. 3563 */ 3564 static int cik_cp_compute_start(struct radeon_device *rdev) 3565 { 3566 cik_cp_compute_enable(rdev, true); 3567 3568 return 0; 3569 } 3570 3571 /** 3572 * cik_cp_compute_fini - stop the compute queues 3573 * 3574 * @rdev: radeon_device pointer 3575 * 3576 * Stop the compute queues and tear down the driver queue 3577 * info. 3578 */ 3579 static void cik_cp_compute_fini(struct radeon_device *rdev) 3580 { 3581 int i, idx, r; 3582 3583 cik_cp_compute_enable(rdev, false); 3584 3585 for (i = 0; i < 2; i++) { 3586 if (i == 0) 3587 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3588 else 3589 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3590 3591 if (rdev->ring[idx].mqd_obj) { 3592 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3593 if (unlikely(r != 0)) 3594 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 3595 3596 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 3597 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3598 3599 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 3600 rdev->ring[idx].mqd_obj = NULL; 3601 } 3602 } 3603 } 3604 3605 static void cik_mec_fini(struct radeon_device *rdev) 3606 { 3607 int r; 3608 3609 if (rdev->mec.hpd_eop_obj) { 3610 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3611 if (unlikely(r != 0)) 3612 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 3613 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 3614 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3615 3616 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 3617 rdev->mec.hpd_eop_obj = NULL; 3618 } 3619 } 3620 3621 #define MEC_HPD_SIZE 2048 3622 3623 static int cik_mec_init(struct radeon_device *rdev) 3624 { 3625 int r; 3626 u32 *hpd; 3627 3628 /* 3629 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 3630 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 3631 */ 3632 if (rdev->family == CHIP_KAVERI) 3633 rdev->mec.num_mec = 2; 3634 else 3635 rdev->mec.num_mec = 1; 3636 rdev->mec.num_pipe = 4; 3637 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 3638 3639 if (rdev->mec.hpd_eop_obj == NULL) { 3640 r = radeon_bo_create(rdev, 3641 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 3642 PAGE_SIZE, true, 3643 RADEON_GEM_DOMAIN_GTT, NULL, 3644 &rdev->mec.hpd_eop_obj); 3645 if (r) { 3646 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 3647 return r; 3648 } 3649 } 3650 3651 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3652 if (unlikely(r != 0)) { 3653 cik_mec_fini(rdev); 3654 return r; 3655 } 3656 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 3657 &rdev->mec.hpd_eop_gpu_addr); 3658 if (r) { 3659 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 3660 cik_mec_fini(rdev); 3661 return r; 3662 } 3663 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 3664 if (r) { 3665 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 3666 cik_mec_fini(rdev); 3667 return r; 3668 } 3669 3670 /* clear memory. Not sure if this is required or not */ 3671 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 3672 3673 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 3674 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3675 3676 return 0; 3677 } 3678 3679 struct hqd_registers 3680 { 3681 u32 cp_mqd_base_addr; 3682 u32 cp_mqd_base_addr_hi; 3683 u32 cp_hqd_active; 3684 u32 cp_hqd_vmid; 3685 u32 cp_hqd_persistent_state; 3686 u32 cp_hqd_pipe_priority; 3687 u32 cp_hqd_queue_priority; 3688 u32 cp_hqd_quantum; 3689 u32 cp_hqd_pq_base; 3690 u32 cp_hqd_pq_base_hi; 3691 u32 cp_hqd_pq_rptr; 3692 u32 cp_hqd_pq_rptr_report_addr; 3693 u32 cp_hqd_pq_rptr_report_addr_hi; 3694 u32 cp_hqd_pq_wptr_poll_addr; 3695 u32 cp_hqd_pq_wptr_poll_addr_hi; 3696 u32 cp_hqd_pq_doorbell_control; 3697 u32 cp_hqd_pq_wptr; 3698 u32 cp_hqd_pq_control; 3699 u32 cp_hqd_ib_base_addr; 3700 u32 cp_hqd_ib_base_addr_hi; 3701 u32 cp_hqd_ib_rptr; 3702 u32 cp_hqd_ib_control; 3703 u32 cp_hqd_iq_timer; 3704 u32 cp_hqd_iq_rptr; 3705 u32 cp_hqd_dequeue_request; 3706 u32 cp_hqd_dma_offload; 3707 u32 cp_hqd_sema_cmd; 3708 u32 cp_hqd_msg_type; 3709 u32 cp_hqd_atomic0_preop_lo; 3710 u32 cp_hqd_atomic0_preop_hi; 3711 u32 cp_hqd_atomic1_preop_lo; 3712 u32 cp_hqd_atomic1_preop_hi; 3713 u32 cp_hqd_hq_scheduler0; 3714 u32 cp_hqd_hq_scheduler1; 3715 u32 cp_mqd_control; 3716 }; 3717 3718 struct bonaire_mqd 3719 { 3720 u32 header; 3721 u32 dispatch_initiator; 3722 u32 dimensions[3]; 3723 u32 start_idx[3]; 3724 u32 num_threads[3]; 3725 u32 pipeline_stat_enable; 3726 u32 perf_counter_enable; 3727 u32 pgm[2]; 3728 u32 tba[2]; 3729 u32 tma[2]; 3730 u32 pgm_rsrc[2]; 3731 u32 vmid; 3732 u32 resource_limits; 3733 u32 static_thread_mgmt01[2]; 3734 u32 tmp_ring_size; 3735 u32 static_thread_mgmt23[2]; 3736 u32 restart[3]; 3737 u32 thread_trace_enable; 3738 u32 reserved1; 3739 u32 user_data[16]; 3740 u32 vgtcs_invoke_count[2]; 3741 struct hqd_registers queue_state; 3742 u32 dequeue_cntr; 3743 u32 interrupt_queue[64]; 3744 }; 3745 3746 /** 3747 * cik_cp_compute_resume - setup the compute queue registers 3748 * 3749 * @rdev: radeon_device pointer 3750 * 3751 * Program the compute queues and test them to make sure they 3752 * are working. 3753 * Returns 0 for success, error for failure. 3754 */ 3755 static int cik_cp_compute_resume(struct radeon_device *rdev) 3756 { 3757 int r, i, idx; 3758 u32 tmp; 3759 bool use_doorbell = true; 3760 u64 hqd_gpu_addr; 3761 u64 mqd_gpu_addr; 3762 u64 eop_gpu_addr; 3763 u64 wb_gpu_addr; 3764 u32 *buf; 3765 struct bonaire_mqd *mqd; 3766 3767 r = cik_cp_compute_start(rdev); 3768 if (r) 3769 return r; 3770 3771 /* fix up chicken bits */ 3772 tmp = RREG32(CP_CPF_DEBUG); 3773 tmp |= (1 << 23); 3774 WREG32(CP_CPF_DEBUG, tmp); 3775 3776 /* init the pipes */ 3777 mutex_lock(&rdev->srbm_mutex); 3778 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { 3779 int me = (i < 4) ? 1 : 2; 3780 int pipe = (i < 4) ? i : (i - 4); 3781 3782 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 3783 3784 cik_srbm_select(rdev, me, pipe, 0, 0); 3785 3786 /* write the EOP addr */ 3787 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 3788 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 3789 3790 /* set the VMID assigned */ 3791 WREG32(CP_HPD_EOP_VMID, 0); 3792 3793 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3794 tmp = RREG32(CP_HPD_EOP_CONTROL); 3795 tmp &= ~EOP_SIZE_MASK; 3796 tmp |= order_base_2(MEC_HPD_SIZE / 8); 3797 WREG32(CP_HPD_EOP_CONTROL, tmp); 3798 } 3799 cik_srbm_select(rdev, 0, 0, 0, 0); 3800 mutex_unlock(&rdev->srbm_mutex); 3801 3802 /* init the queues. Just two for now. */ 3803 for (i = 0; i < 2; i++) { 3804 if (i == 0) 3805 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3806 else 3807 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3808 3809 if (rdev->ring[idx].mqd_obj == NULL) { 3810 r = radeon_bo_create(rdev, 3811 sizeof(struct bonaire_mqd), 3812 PAGE_SIZE, true, 3813 RADEON_GEM_DOMAIN_GTT, NULL, 3814 &rdev->ring[idx].mqd_obj); 3815 if (r) { 3816 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 3817 return r; 3818 } 3819 } 3820 3821 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3822 if (unlikely(r != 0)) { 3823 cik_cp_compute_fini(rdev); 3824 return r; 3825 } 3826 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 3827 &mqd_gpu_addr); 3828 if (r) { 3829 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 3830 cik_cp_compute_fini(rdev); 3831 return r; 3832 } 3833 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 3834 if (r) { 3835 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 3836 cik_cp_compute_fini(rdev); 3837 return r; 3838 } 3839 3840 /* doorbell offset */ 3841 rdev->ring[idx].doorbell_offset = 3842 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; 3843 3844 /* init the mqd struct */ 3845 memset(buf, 0, sizeof(struct bonaire_mqd)); 3846 3847 mqd = (struct bonaire_mqd *)buf; 3848 mqd->header = 0xC0310800; 3849 mqd->static_thread_mgmt01[0] = 0xffffffff; 3850 mqd->static_thread_mgmt01[1] = 0xffffffff; 3851 mqd->static_thread_mgmt23[0] = 0xffffffff; 3852 mqd->static_thread_mgmt23[1] = 0xffffffff; 3853 3854 mutex_lock(&rdev->srbm_mutex); 3855 cik_srbm_select(rdev, rdev->ring[idx].me, 3856 rdev->ring[idx].pipe, 3857 rdev->ring[idx].queue, 0); 3858 3859 /* disable wptr polling */ 3860 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 3861 tmp &= ~WPTR_POLL_EN; 3862 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 3863 3864 /* enable doorbell? */ 3865 mqd->queue_state.cp_hqd_pq_doorbell_control = 3866 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3867 if (use_doorbell) 3868 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3869 else 3870 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 3871 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3872 mqd->queue_state.cp_hqd_pq_doorbell_control); 3873 3874 /* disable the queue if it's active */ 3875 mqd->queue_state.cp_hqd_dequeue_request = 0; 3876 mqd->queue_state.cp_hqd_pq_rptr = 0; 3877 mqd->queue_state.cp_hqd_pq_wptr= 0; 3878 if (RREG32(CP_HQD_ACTIVE) & 1) { 3879 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 3880 for (i = 0; i < rdev->usec_timeout; i++) { 3881 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 3882 break; 3883 udelay(1); 3884 } 3885 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 3886 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 3887 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3888 } 3889 3890 /* set the pointer to the MQD */ 3891 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 3892 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3893 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 3894 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 3895 /* set MQD vmid to 0 */ 3896 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 3897 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 3898 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 3899 3900 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3901 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 3902 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 3903 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3904 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 3905 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 3906 3907 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3908 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 3909 mqd->queue_state.cp_hqd_pq_control &= 3910 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 3911 3912 mqd->queue_state.cp_hqd_pq_control |= 3913 order_base_2(rdev->ring[idx].ring_size / 8); 3914 mqd->queue_state.cp_hqd_pq_control |= 3915 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8); 3916 #ifdef __BIG_ENDIAN 3917 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 3918 #endif 3919 mqd->queue_state.cp_hqd_pq_control &= 3920 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 3921 mqd->queue_state.cp_hqd_pq_control |= 3922 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 3923 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 3924 3925 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 3926 if (i == 0) 3927 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 3928 else 3929 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 3930 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 3931 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3932 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 3933 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 3934 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 3935 3936 /* set the wb address wether it's enabled or not */ 3937 if (i == 0) 3938 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 3939 else 3940 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 3941 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 3942 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 3943 upper_32_bits(wb_gpu_addr) & 0xffff; 3944 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 3945 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 3946 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3947 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 3948 3949 /* enable the doorbell if requested */ 3950 if (use_doorbell) { 3951 mqd->queue_state.cp_hqd_pq_doorbell_control = 3952 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3953 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 3954 mqd->queue_state.cp_hqd_pq_doorbell_control |= 3955 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); 3956 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3957 mqd->queue_state.cp_hqd_pq_doorbell_control &= 3958 ~(DOORBELL_SOURCE | DOORBELL_HIT); 3959 3960 } else { 3961 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 3962 } 3963 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3964 mqd->queue_state.cp_hqd_pq_doorbell_control); 3965 3966 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3967 rdev->ring[idx].wptr = 0; 3968 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 3969 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3970 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); 3971 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; 3972 3973 /* set the vmid for the queue */ 3974 mqd->queue_state.cp_hqd_vmid = 0; 3975 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 3976 3977 /* activate the queue */ 3978 mqd->queue_state.cp_hqd_active = 1; 3979 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 3980 3981 cik_srbm_select(rdev, 0, 0, 0, 0); 3982 mutex_unlock(&rdev->srbm_mutex); 3983 3984 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 3985 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3986 3987 rdev->ring[idx].ready = true; 3988 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 3989 if (r) 3990 rdev->ring[idx].ready = false; 3991 } 3992 3993 return 0; 3994 } 3995 3996 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 3997 { 3998 cik_cp_gfx_enable(rdev, enable); 3999 cik_cp_compute_enable(rdev, enable); 4000 } 4001 4002 static int cik_cp_load_microcode(struct radeon_device *rdev) 4003 { 4004 int r; 4005 4006 r = cik_cp_gfx_load_microcode(rdev); 4007 if (r) 4008 return r; 4009 r = cik_cp_compute_load_microcode(rdev); 4010 if (r) 4011 return r; 4012 4013 return 0; 4014 } 4015 4016 static void cik_cp_fini(struct radeon_device *rdev) 4017 { 4018 cik_cp_gfx_fini(rdev); 4019 cik_cp_compute_fini(rdev); 4020 } 4021 4022 static int cik_cp_resume(struct radeon_device *rdev) 4023 { 4024 int r; 4025 4026 cik_enable_gui_idle_interrupt(rdev, false); 4027 4028 r = cik_cp_load_microcode(rdev); 4029 if (r) 4030 return r; 4031 4032 r = cik_cp_gfx_resume(rdev); 4033 if (r) 4034 return r; 4035 r = cik_cp_compute_resume(rdev); 4036 if (r) 4037 return r; 4038 4039 cik_enable_gui_idle_interrupt(rdev, true); 4040 4041 return 0; 4042 } 4043 4044 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 4045 { 4046 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 4047 RREG32(GRBM_STATUS)); 4048 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 4049 RREG32(GRBM_STATUS2)); 4050 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 4051 RREG32(GRBM_STATUS_SE0)); 4052 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 4053 RREG32(GRBM_STATUS_SE1)); 4054 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 4055 RREG32(GRBM_STATUS_SE2)); 4056 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 4057 RREG32(GRBM_STATUS_SE3)); 4058 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 4059 RREG32(SRBM_STATUS)); 4060 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 4061 RREG32(SRBM_STATUS2)); 4062 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 4063 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 4064 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 4065 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 4066 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 4067 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 4068 RREG32(CP_STALLED_STAT1)); 4069 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 4070 RREG32(CP_STALLED_STAT2)); 4071 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 4072 RREG32(CP_STALLED_STAT3)); 4073 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 4074 RREG32(CP_CPF_BUSY_STAT)); 4075 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 4076 RREG32(CP_CPF_STALLED_STAT1)); 4077 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 4078 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 4079 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 4080 RREG32(CP_CPC_STALLED_STAT1)); 4081 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 4082 } 4083 4084 /** 4085 * cik_gpu_check_soft_reset - check which blocks are busy 4086 * 4087 * @rdev: radeon_device pointer 4088 * 4089 * Check which blocks are busy and return the relevant reset 4090 * mask to be used by cik_gpu_soft_reset(). 4091 * Returns a mask of the blocks to be reset. 4092 */ 4093 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 4094 { 4095 u32 reset_mask = 0; 4096 u32 tmp; 4097 4098 /* GRBM_STATUS */ 4099 tmp = RREG32(GRBM_STATUS); 4100 if (tmp & (PA_BUSY | SC_BUSY | 4101 BCI_BUSY | SX_BUSY | 4102 TA_BUSY | VGT_BUSY | 4103 DB_BUSY | CB_BUSY | 4104 GDS_BUSY | SPI_BUSY | 4105 IA_BUSY | IA_BUSY_NO_DMA)) 4106 reset_mask |= RADEON_RESET_GFX; 4107 4108 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 4109 reset_mask |= RADEON_RESET_CP; 4110 4111 /* GRBM_STATUS2 */ 4112 tmp = RREG32(GRBM_STATUS2); 4113 if (tmp & RLC_BUSY) 4114 reset_mask |= RADEON_RESET_RLC; 4115 4116 /* SDMA0_STATUS_REG */ 4117 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 4118 if (!(tmp & SDMA_IDLE)) 4119 reset_mask |= RADEON_RESET_DMA; 4120 4121 /* SDMA1_STATUS_REG */ 4122 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 4123 if (!(tmp & SDMA_IDLE)) 4124 reset_mask |= RADEON_RESET_DMA1; 4125 4126 /* SRBM_STATUS2 */ 4127 tmp = RREG32(SRBM_STATUS2); 4128 if (tmp & SDMA_BUSY) 4129 reset_mask |= RADEON_RESET_DMA; 4130 4131 if (tmp & SDMA1_BUSY) 4132 reset_mask |= RADEON_RESET_DMA1; 4133 4134 /* SRBM_STATUS */ 4135 tmp = RREG32(SRBM_STATUS); 4136 4137 if (tmp & IH_BUSY) 4138 reset_mask |= RADEON_RESET_IH; 4139 4140 if (tmp & SEM_BUSY) 4141 reset_mask |= RADEON_RESET_SEM; 4142 4143 if (tmp & GRBM_RQ_PENDING) 4144 reset_mask |= RADEON_RESET_GRBM; 4145 4146 if (tmp & VMC_BUSY) 4147 reset_mask |= RADEON_RESET_VMC; 4148 4149 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 4150 MCC_BUSY | MCD_BUSY)) 4151 reset_mask |= RADEON_RESET_MC; 4152 4153 if (evergreen_is_display_hung(rdev)) 4154 reset_mask |= RADEON_RESET_DISPLAY; 4155 4156 /* Skip MC reset as it's mostly likely not hung, just busy */ 4157 if (reset_mask & RADEON_RESET_MC) { 4158 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 4159 reset_mask &= ~RADEON_RESET_MC; 4160 } 4161 4162 return reset_mask; 4163 } 4164 4165 /** 4166 * cik_gpu_soft_reset - soft reset GPU 4167 * 4168 * @rdev: radeon_device pointer 4169 * @reset_mask: mask of which blocks to reset 4170 * 4171 * Soft reset the blocks specified in @reset_mask. 4172 */ 4173 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 4174 { 4175 struct evergreen_mc_save save; 4176 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4177 u32 tmp; 4178 4179 if (reset_mask == 0) 4180 return; 4181 4182 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 4183 4184 cik_print_gpu_status_regs(rdev); 4185 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 4186 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 4187 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 4188 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 4189 4190 /* disable CG/PG */ 4191 cik_fini_pg(rdev); 4192 cik_fini_cg(rdev); 4193 4194 /* stop the rlc */ 4195 cik_rlc_stop(rdev); 4196 4197 /* Disable GFX parsing/prefetching */ 4198 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 4199 4200 /* Disable MEC parsing/prefetching */ 4201 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 4202 4203 if (reset_mask & RADEON_RESET_DMA) { 4204 /* sdma0 */ 4205 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 4206 tmp |= SDMA_HALT; 4207 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 4208 } 4209 if (reset_mask & RADEON_RESET_DMA1) { 4210 /* sdma1 */ 4211 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 4212 tmp |= SDMA_HALT; 4213 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 4214 } 4215 4216 evergreen_mc_stop(rdev, &save); 4217 if (evergreen_mc_wait_for_idle(rdev)) { 4218 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4219 } 4220 4221 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 4222 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 4223 4224 if (reset_mask & RADEON_RESET_CP) { 4225 grbm_soft_reset |= SOFT_RESET_CP; 4226 4227 srbm_soft_reset |= SOFT_RESET_GRBM; 4228 } 4229 4230 if (reset_mask & RADEON_RESET_DMA) 4231 srbm_soft_reset |= SOFT_RESET_SDMA; 4232 4233 if (reset_mask & RADEON_RESET_DMA1) 4234 srbm_soft_reset |= SOFT_RESET_SDMA1; 4235 4236 if (reset_mask & RADEON_RESET_DISPLAY) 4237 srbm_soft_reset |= SOFT_RESET_DC; 4238 4239 if (reset_mask & RADEON_RESET_RLC) 4240 grbm_soft_reset |= SOFT_RESET_RLC; 4241 4242 if (reset_mask & RADEON_RESET_SEM) 4243 srbm_soft_reset |= SOFT_RESET_SEM; 4244 4245 if (reset_mask & RADEON_RESET_IH) 4246 srbm_soft_reset |= SOFT_RESET_IH; 4247 4248 if (reset_mask & RADEON_RESET_GRBM) 4249 srbm_soft_reset |= SOFT_RESET_GRBM; 4250 4251 if (reset_mask & RADEON_RESET_VMC) 4252 srbm_soft_reset |= SOFT_RESET_VMC; 4253 4254 if (!(rdev->flags & RADEON_IS_IGP)) { 4255 if (reset_mask & RADEON_RESET_MC) 4256 srbm_soft_reset |= SOFT_RESET_MC; 4257 } 4258 4259 if (grbm_soft_reset) { 4260 tmp = RREG32(GRBM_SOFT_RESET); 4261 tmp |= grbm_soft_reset; 4262 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4263 WREG32(GRBM_SOFT_RESET, tmp); 4264 tmp = RREG32(GRBM_SOFT_RESET); 4265 4266 udelay(50); 4267 4268 tmp &= ~grbm_soft_reset; 4269 WREG32(GRBM_SOFT_RESET, tmp); 4270 tmp = RREG32(GRBM_SOFT_RESET); 4271 } 4272 4273 if (srbm_soft_reset) { 4274 tmp = RREG32(SRBM_SOFT_RESET); 4275 tmp |= srbm_soft_reset; 4276 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 4277 WREG32(SRBM_SOFT_RESET, tmp); 4278 tmp = RREG32(SRBM_SOFT_RESET); 4279 4280 udelay(50); 4281 4282 tmp &= ~srbm_soft_reset; 4283 WREG32(SRBM_SOFT_RESET, tmp); 4284 tmp = RREG32(SRBM_SOFT_RESET); 4285 } 4286 4287 /* Wait a little for things to settle down */ 4288 udelay(50); 4289 4290 evergreen_mc_resume(rdev, &save); 4291 udelay(50); 4292 4293 cik_print_gpu_status_regs(rdev); 4294 } 4295 4296 /** 4297 * cik_asic_reset - soft reset GPU 4298 * 4299 * @rdev: radeon_device pointer 4300 * 4301 * Look up which blocks are hung and attempt 4302 * to reset them. 4303 * Returns 0 for success. 4304 */ 4305 int cik_asic_reset(struct radeon_device *rdev) 4306 { 4307 u32 reset_mask; 4308 4309 reset_mask = cik_gpu_check_soft_reset(rdev); 4310 4311 if (reset_mask) 4312 r600_set_bios_scratch_engine_hung(rdev, true); 4313 4314 cik_gpu_soft_reset(rdev, reset_mask); 4315 4316 reset_mask = cik_gpu_check_soft_reset(rdev); 4317 4318 if (!reset_mask) 4319 r600_set_bios_scratch_engine_hung(rdev, false); 4320 4321 return 0; 4322 } 4323 4324 /** 4325 * cik_gfx_is_lockup - check if the 3D engine is locked up 4326 * 4327 * @rdev: radeon_device pointer 4328 * @ring: radeon_ring structure holding ring information 4329 * 4330 * Check if the 3D engine is locked up (CIK). 4331 * Returns true if the engine is locked, false if not. 4332 */ 4333 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4334 { 4335 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4336 4337 if (!(reset_mask & (RADEON_RESET_GFX | 4338 RADEON_RESET_COMPUTE | 4339 RADEON_RESET_CP))) { 4340 radeon_ring_lockup_update(ring); 4341 return false; 4342 } 4343 /* force CP activities */ 4344 radeon_ring_force_activity(rdev, ring); 4345 return radeon_ring_test_lockup(rdev, ring); 4346 } 4347 4348 /* MC */ 4349 /** 4350 * cik_mc_program - program the GPU memory controller 4351 * 4352 * @rdev: radeon_device pointer 4353 * 4354 * Set the location of vram, gart, and AGP in the GPU's 4355 * physical address space (CIK). 4356 */ 4357 static void cik_mc_program(struct radeon_device *rdev) 4358 { 4359 struct evergreen_mc_save save; 4360 u32 tmp; 4361 int i, j; 4362 4363 /* Initialize HDP */ 4364 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 4365 WREG32((0x2c14 + j), 0x00000000); 4366 WREG32((0x2c18 + j), 0x00000000); 4367 WREG32((0x2c1c + j), 0x00000000); 4368 WREG32((0x2c20 + j), 0x00000000); 4369 WREG32((0x2c24 + j), 0x00000000); 4370 } 4371 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 4372 4373 evergreen_mc_stop(rdev, &save); 4374 if (radeon_mc_wait_for_idle(rdev)) { 4375 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4376 } 4377 /* Lockout access through VGA aperture*/ 4378 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 4379 /* Update configuration */ 4380 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 4381 rdev->mc.vram_start >> 12); 4382 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 4383 rdev->mc.vram_end >> 12); 4384 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 4385 rdev->vram_scratch.gpu_addr >> 12); 4386 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 4387 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 4388 WREG32(MC_VM_FB_LOCATION, tmp); 4389 /* XXX double check these! */ 4390 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 4391 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 4392 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 4393 WREG32(MC_VM_AGP_BASE, 0); 4394 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 4395 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 4396 if (radeon_mc_wait_for_idle(rdev)) { 4397 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4398 } 4399 evergreen_mc_resume(rdev, &save); 4400 /* we need to own VRAM, so turn off the VGA renderer here 4401 * to stop it overwriting our objects */ 4402 rv515_vga_render_disable(rdev); 4403 } 4404 4405 /** 4406 * cik_mc_init - initialize the memory controller driver params 4407 * 4408 * @rdev: radeon_device pointer 4409 * 4410 * Look up the amount of vram, vram width, and decide how to place 4411 * vram and gart within the GPU's physical address space (CIK). 4412 * Returns 0 for success. 4413 */ 4414 static int cik_mc_init(struct radeon_device *rdev) 4415 { 4416 u32 tmp; 4417 int chansize, numchan; 4418 4419 /* Get VRAM informations */ 4420 rdev->mc.vram_is_ddr = true; 4421 tmp = RREG32(MC_ARB_RAMCFG); 4422 if (tmp & CHANSIZE_MASK) { 4423 chansize = 64; 4424 } else { 4425 chansize = 32; 4426 } 4427 tmp = RREG32(MC_SHARED_CHMAP); 4428 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 4429 case 0: 4430 default: 4431 numchan = 1; 4432 break; 4433 case 1: 4434 numchan = 2; 4435 break; 4436 case 2: 4437 numchan = 4; 4438 break; 4439 case 3: 4440 numchan = 8; 4441 break; 4442 case 4: 4443 numchan = 3; 4444 break; 4445 case 5: 4446 numchan = 6; 4447 break; 4448 case 6: 4449 numchan = 10; 4450 break; 4451 case 7: 4452 numchan = 12; 4453 break; 4454 case 8: 4455 numchan = 16; 4456 break; 4457 } 4458 rdev->mc.vram_width = numchan * chansize; 4459 /* Could aper size report 0 ? */ 4460 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 4461 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 4462 /* size in MB on si */ 4463 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 4464 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 4465 rdev->mc.visible_vram_size = rdev->mc.aper_size; 4466 si_vram_gtt_location(rdev, &rdev->mc); 4467 radeon_update_bandwidth_info(rdev); 4468 4469 return 0; 4470 } 4471 4472 /* 4473 * GART 4474 * VMID 0 is the physical GPU addresses as used by the kernel. 4475 * VMIDs 1-15 are used for userspace clients and are handled 4476 * by the radeon vm/hsa code. 4477 */ 4478 /** 4479 * cik_pcie_gart_tlb_flush - gart tlb flush callback 4480 * 4481 * @rdev: radeon_device pointer 4482 * 4483 * Flush the TLB for the VMID 0 page table (CIK). 4484 */ 4485 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 4486 { 4487 /* flush hdp cache */ 4488 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 4489 4490 /* bits 0-15 are the VM contexts0-15 */ 4491 WREG32(VM_INVALIDATE_REQUEST, 0x1); 4492 } 4493 4494 /** 4495 * cik_pcie_gart_enable - gart enable 4496 * 4497 * @rdev: radeon_device pointer 4498 * 4499 * This sets up the TLBs, programs the page tables for VMID0, 4500 * sets up the hw for VMIDs 1-15 which are allocated on 4501 * demand, and sets up the global locations for the LDS, GDS, 4502 * and GPUVM for FSA64 clients (CIK). 4503 * Returns 0 for success, errors for failure. 4504 */ 4505 static int cik_pcie_gart_enable(struct radeon_device *rdev) 4506 { 4507 int r, i; 4508 4509 if (rdev->gart.robj == NULL) { 4510 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 4511 return -EINVAL; 4512 } 4513 r = radeon_gart_table_vram_pin(rdev); 4514 if (r) 4515 return r; 4516 radeon_gart_restore(rdev); 4517 /* Setup TLB control */ 4518 WREG32(MC_VM_MX_L1_TLB_CNTL, 4519 (0xA << 7) | 4520 ENABLE_L1_TLB | 4521 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4522 ENABLE_ADVANCED_DRIVER_MODEL | 4523 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4524 /* Setup L2 cache */ 4525 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 4526 ENABLE_L2_FRAGMENT_PROCESSING | 4527 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4528 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4529 EFFECTIVE_L2_QUEUE_SIZE(7) | 4530 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4531 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 4532 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4533 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4534 /* setup context0 */ 4535 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 4536 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 4537 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 4538 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 4539 (u32)(rdev->dummy_page.addr >> 12)); 4540 WREG32(VM_CONTEXT0_CNTL2, 0); 4541 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 4542 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 4543 4544 WREG32(0x15D4, 0); 4545 WREG32(0x15D8, 0); 4546 WREG32(0x15DC, 0); 4547 4548 /* empty context1-15 */ 4549 /* FIXME start with 4G, once using 2 level pt switch to full 4550 * vm size space 4551 */ 4552 /* set vm size, must be a multiple of 4 */ 4553 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 4554 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); 4555 for (i = 1; i < 16; i++) { 4556 if (i < 8) 4557 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 4558 rdev->gart.table_addr >> 12); 4559 else 4560 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 4561 rdev->gart.table_addr >> 12); 4562 } 4563 4564 /* enable context1-15 */ 4565 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 4566 (u32)(rdev->dummy_page.addr >> 12)); 4567 WREG32(VM_CONTEXT1_CNTL2, 4); 4568 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 4569 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4570 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4571 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4572 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4573 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 4574 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 4575 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 4576 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 4577 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 4578 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 4579 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4580 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 4581 4582 /* TC cache setup ??? */ 4583 WREG32(TC_CFG_L1_LOAD_POLICY0, 0); 4584 WREG32(TC_CFG_L1_LOAD_POLICY1, 0); 4585 WREG32(TC_CFG_L1_STORE_POLICY, 0); 4586 4587 WREG32(TC_CFG_L2_LOAD_POLICY0, 0); 4588 WREG32(TC_CFG_L2_LOAD_POLICY1, 0); 4589 WREG32(TC_CFG_L2_STORE_POLICY0, 0); 4590 WREG32(TC_CFG_L2_STORE_POLICY1, 0); 4591 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); 4592 4593 WREG32(TC_CFG_L1_VOLATILE, 0); 4594 WREG32(TC_CFG_L2_VOLATILE, 0); 4595 4596 if (rdev->family == CHIP_KAVERI) { 4597 u32 tmp = RREG32(CHUB_CONTROL); 4598 tmp &= ~BYPASS_VM; 4599 WREG32(CHUB_CONTROL, tmp); 4600 } 4601 4602 /* XXX SH_MEM regs */ 4603 /* where to put LDS, scratch, GPUVM in FSA64 space */ 4604 mutex_lock(&rdev->srbm_mutex); 4605 for (i = 0; i < 16; i++) { 4606 cik_srbm_select(rdev, 0, 0, 0, i); 4607 /* CP and shaders */ 4608 WREG32(SH_MEM_CONFIG, 0); 4609 WREG32(SH_MEM_APE1_BASE, 1); 4610 WREG32(SH_MEM_APE1_LIMIT, 0); 4611 WREG32(SH_MEM_BASES, 0); 4612 /* SDMA GFX */ 4613 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 4614 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 4615 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 4616 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 4617 /* XXX SDMA RLC - todo */ 4618 } 4619 cik_srbm_select(rdev, 0, 0, 0, 0); 4620 mutex_unlock(&rdev->srbm_mutex); 4621 4622 cik_pcie_gart_tlb_flush(rdev); 4623 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 4624 (unsigned)(rdev->mc.gtt_size >> 20), 4625 (unsigned long long)rdev->gart.table_addr); 4626 rdev->gart.ready = true; 4627 return 0; 4628 } 4629 4630 /** 4631 * cik_pcie_gart_disable - gart disable 4632 * 4633 * @rdev: radeon_device pointer 4634 * 4635 * This disables all VM page table (CIK). 4636 */ 4637 static void cik_pcie_gart_disable(struct radeon_device *rdev) 4638 { 4639 /* Disable all tables */ 4640 WREG32(VM_CONTEXT0_CNTL, 0); 4641 WREG32(VM_CONTEXT1_CNTL, 0); 4642 /* Setup TLB control */ 4643 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4644 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4645 /* Setup L2 cache */ 4646 WREG32(VM_L2_CNTL, 4647 ENABLE_L2_FRAGMENT_PROCESSING | 4648 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4649 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4650 EFFECTIVE_L2_QUEUE_SIZE(7) | 4651 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4652 WREG32(VM_L2_CNTL2, 0); 4653 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4654 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4655 radeon_gart_table_vram_unpin(rdev); 4656 } 4657 4658 /** 4659 * cik_pcie_gart_fini - vm fini callback 4660 * 4661 * @rdev: radeon_device pointer 4662 * 4663 * Tears down the driver GART/VM setup (CIK). 4664 */ 4665 static void cik_pcie_gart_fini(struct radeon_device *rdev) 4666 { 4667 cik_pcie_gart_disable(rdev); 4668 radeon_gart_table_vram_free(rdev); 4669 radeon_gart_fini(rdev); 4670 } 4671 4672 /* vm parser */ 4673 /** 4674 * cik_ib_parse - vm ib_parse callback 4675 * 4676 * @rdev: radeon_device pointer 4677 * @ib: indirect buffer pointer 4678 * 4679 * CIK uses hw IB checking so this is a nop (CIK). 4680 */ 4681 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 4682 { 4683 return 0; 4684 } 4685 4686 /* 4687 * vm 4688 * VMID 0 is the physical GPU addresses as used by the kernel. 4689 * VMIDs 1-15 are used for userspace clients and are handled 4690 * by the radeon vm/hsa code. 4691 */ 4692 /** 4693 * cik_vm_init - cik vm init callback 4694 * 4695 * @rdev: radeon_device pointer 4696 * 4697 * Inits cik specific vm parameters (number of VMs, base of vram for 4698 * VMIDs 1-15) (CIK). 4699 * Returns 0 for success. 4700 */ 4701 int cik_vm_init(struct radeon_device *rdev) 4702 { 4703 /* number of VMs */ 4704 rdev->vm_manager.nvm = 16; 4705 /* base offset of vram pages */ 4706 if (rdev->flags & RADEON_IS_IGP) { 4707 u64 tmp = RREG32(MC_VM_FB_OFFSET); 4708 tmp <<= 22; 4709 rdev->vm_manager.vram_base_offset = tmp; 4710 } else 4711 rdev->vm_manager.vram_base_offset = 0; 4712 4713 return 0; 4714 } 4715 4716 /** 4717 * cik_vm_fini - cik vm fini callback 4718 * 4719 * @rdev: radeon_device pointer 4720 * 4721 * Tear down any asic specific VM setup (CIK). 4722 */ 4723 void cik_vm_fini(struct radeon_device *rdev) 4724 { 4725 } 4726 4727 /** 4728 * cik_vm_decode_fault - print human readable fault info 4729 * 4730 * @rdev: radeon_device pointer 4731 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 4732 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 4733 * 4734 * Print human readable fault information (CIK). 4735 */ 4736 static void cik_vm_decode_fault(struct radeon_device *rdev, 4737 u32 status, u32 addr, u32 mc_client) 4738 { 4739 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 4740 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 4741 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 4742 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, 4743 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; 4744 4745 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", 4746 protections, vmid, addr, 4747 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 4748 block, mc_client, mc_id); 4749 } 4750 4751 /** 4752 * cik_vm_flush - cik vm flush using the CP 4753 * 4754 * @rdev: radeon_device pointer 4755 * 4756 * Update the page table base and flush the VM TLB 4757 * using the CP (CIK). 4758 */ 4759 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4760 { 4761 struct radeon_ring *ring = &rdev->ring[ridx]; 4762 4763 if (vm == NULL) 4764 return; 4765 4766 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4767 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4768 WRITE_DATA_DST_SEL(0))); 4769 if (vm->id < 8) { 4770 radeon_ring_write(ring, 4771 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4772 } else { 4773 radeon_ring_write(ring, 4774 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4775 } 4776 radeon_ring_write(ring, 0); 4777 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4778 4779 /* update SH_MEM_* regs */ 4780 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4781 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4782 WRITE_DATA_DST_SEL(0))); 4783 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4784 radeon_ring_write(ring, 0); 4785 radeon_ring_write(ring, VMID(vm->id)); 4786 4787 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 4788 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4789 WRITE_DATA_DST_SEL(0))); 4790 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4791 radeon_ring_write(ring, 0); 4792 4793 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 4794 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 4795 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 4796 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 4797 4798 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4799 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4800 WRITE_DATA_DST_SEL(0))); 4801 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4802 radeon_ring_write(ring, 0); 4803 radeon_ring_write(ring, VMID(0)); 4804 4805 /* HDP flush */ 4806 /* We should be using the WAIT_REG_MEM packet here like in 4807 * cik_fence_ring_emit(), but it causes the CP to hang in this 4808 * context... 4809 */ 4810 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4811 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4812 WRITE_DATA_DST_SEL(0))); 4813 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 4814 radeon_ring_write(ring, 0); 4815 radeon_ring_write(ring, 0); 4816 4817 /* bits 0-15 are the VM contexts0-15 */ 4818 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4819 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4820 WRITE_DATA_DST_SEL(0))); 4821 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4822 radeon_ring_write(ring, 0); 4823 radeon_ring_write(ring, 1 << vm->id); 4824 4825 /* compute doesn't have PFP */ 4826 if (ridx == RADEON_RING_TYPE_GFX_INDEX) { 4827 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4828 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4829 radeon_ring_write(ring, 0x0); 4830 } 4831 } 4832 4833 /** 4834 * cik_vm_set_page - update the page tables using sDMA 4835 * 4836 * @rdev: radeon_device pointer 4837 * @ib: indirect buffer to fill with commands 4838 * @pe: addr of the page entry 4839 * @addr: dst addr to write into pe 4840 * @count: number of page entries to update 4841 * @incr: increase next addr by incr bytes 4842 * @flags: access flags 4843 * 4844 * Update the page tables using CP or sDMA (CIK). 4845 */ 4846 void cik_vm_set_page(struct radeon_device *rdev, 4847 struct radeon_ib *ib, 4848 uint64_t pe, 4849 uint64_t addr, unsigned count, 4850 uint32_t incr, uint32_t flags) 4851 { 4852 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 4853 uint64_t value; 4854 unsigned ndw; 4855 4856 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { 4857 /* CP */ 4858 while (count) { 4859 ndw = 2 + count * 2; 4860 if (ndw > 0x3FFE) 4861 ndw = 0x3FFE; 4862 4863 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); 4864 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | 4865 WRITE_DATA_DST_SEL(1)); 4866 ib->ptr[ib->length_dw++] = pe; 4867 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4868 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 4869 if (flags & RADEON_VM_PAGE_SYSTEM) { 4870 value = radeon_vm_map_gart(rdev, addr); 4871 value &= 0xFFFFFFFFFFFFF000ULL; 4872 } else if (flags & RADEON_VM_PAGE_VALID) { 4873 value = addr; 4874 } else { 4875 value = 0; 4876 } 4877 addr += incr; 4878 value |= r600_flags; 4879 ib->ptr[ib->length_dw++] = value; 4880 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4881 } 4882 } 4883 } else { 4884 /* DMA */ 4885 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); 4886 } 4887 } 4888 4889 /* 4890 * RLC 4891 * The RLC is a multi-purpose microengine that handles a 4892 * variety of functions, the most important of which is 4893 * the interrupt controller. 4894 */ 4895 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 4896 bool enable) 4897 { 4898 u32 tmp = RREG32(CP_INT_CNTL_RING0); 4899 4900 if (enable) 4901 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4902 else 4903 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4904 WREG32(CP_INT_CNTL_RING0, tmp); 4905 } 4906 4907 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) 4908 { 4909 u32 tmp; 4910 4911 tmp = RREG32(RLC_LB_CNTL); 4912 if (enable) 4913 tmp |= LOAD_BALANCE_ENABLE; 4914 else 4915 tmp &= ~LOAD_BALANCE_ENABLE; 4916 WREG32(RLC_LB_CNTL, tmp); 4917 } 4918 4919 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) 4920 { 4921 u32 i, j, k; 4922 u32 mask; 4923 4924 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 4925 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 4926 cik_select_se_sh(rdev, i, j); 4927 for (k = 0; k < rdev->usec_timeout; k++) { 4928 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 4929 break; 4930 udelay(1); 4931 } 4932 } 4933 } 4934 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4935 4936 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 4937 for (k = 0; k < rdev->usec_timeout; k++) { 4938 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 4939 break; 4940 udelay(1); 4941 } 4942 } 4943 4944 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) 4945 { 4946 u32 tmp; 4947 4948 tmp = RREG32(RLC_CNTL); 4949 if (tmp != rlc) 4950 WREG32(RLC_CNTL, rlc); 4951 } 4952 4953 static u32 cik_halt_rlc(struct radeon_device *rdev) 4954 { 4955 u32 data, orig; 4956 4957 orig = data = RREG32(RLC_CNTL); 4958 4959 if (data & RLC_ENABLE) { 4960 u32 i; 4961 4962 data &= ~RLC_ENABLE; 4963 WREG32(RLC_CNTL, data); 4964 4965 for (i = 0; i < rdev->usec_timeout; i++) { 4966 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) 4967 break; 4968 udelay(1); 4969 } 4970 4971 cik_wait_for_rlc_serdes(rdev); 4972 } 4973 4974 return orig; 4975 } 4976 4977 void cik_enter_rlc_safe_mode(struct radeon_device *rdev) 4978 { 4979 u32 tmp, i, mask; 4980 4981 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); 4982 WREG32(RLC_GPR_REG2, tmp); 4983 4984 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; 4985 for (i = 0; i < rdev->usec_timeout; i++) { 4986 if ((RREG32(RLC_GPM_STAT) & mask) == mask) 4987 break; 4988 udelay(1); 4989 } 4990 4991 for (i = 0; i < rdev->usec_timeout; i++) { 4992 if ((RREG32(RLC_GPR_REG2) & REQ) == 0) 4993 break; 4994 udelay(1); 4995 } 4996 } 4997 4998 void cik_exit_rlc_safe_mode(struct radeon_device *rdev) 4999 { 5000 u32 tmp; 5001 5002 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); 5003 WREG32(RLC_GPR_REG2, tmp); 5004 } 5005 5006 /** 5007 * cik_rlc_stop - stop the RLC ME 5008 * 5009 * @rdev: radeon_device pointer 5010 * 5011 * Halt the RLC ME (MicroEngine) (CIK). 5012 */ 5013 static void cik_rlc_stop(struct radeon_device *rdev) 5014 { 5015 WREG32(RLC_CNTL, 0); 5016 5017 cik_enable_gui_idle_interrupt(rdev, false); 5018 5019 cik_wait_for_rlc_serdes(rdev); 5020 } 5021 5022 /** 5023 * cik_rlc_start - start the RLC ME 5024 * 5025 * @rdev: radeon_device pointer 5026 * 5027 * Unhalt the RLC ME (MicroEngine) (CIK). 5028 */ 5029 static void cik_rlc_start(struct radeon_device *rdev) 5030 { 5031 WREG32(RLC_CNTL, RLC_ENABLE); 5032 5033 cik_enable_gui_idle_interrupt(rdev, true); 5034 5035 udelay(50); 5036 } 5037 5038 /** 5039 * cik_rlc_resume - setup the RLC hw 5040 * 5041 * @rdev: radeon_device pointer 5042 * 5043 * Initialize the RLC registers, load the ucode, 5044 * and start the RLC (CIK). 5045 * Returns 0 for success, -EINVAL if the ucode is not available. 5046 */ 5047 static int cik_rlc_resume(struct radeon_device *rdev) 5048 { 5049 u32 i, size, tmp; 5050 const __be32 *fw_data; 5051 5052 if (!rdev->rlc_fw) 5053 return -EINVAL; 5054 5055 switch (rdev->family) { 5056 case CHIP_BONAIRE: 5057 default: 5058 size = BONAIRE_RLC_UCODE_SIZE; 5059 break; 5060 case CHIP_KAVERI: 5061 size = KV_RLC_UCODE_SIZE; 5062 break; 5063 case CHIP_KABINI: 5064 size = KB_RLC_UCODE_SIZE; 5065 break; 5066 } 5067 5068 cik_rlc_stop(rdev); 5069 5070 /* disable CG */ 5071 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 5072 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 5073 5074 si_rlc_reset(rdev); 5075 5076 cik_init_pg(rdev); 5077 5078 cik_init_cg(rdev); 5079 5080 WREG32(RLC_LB_CNTR_INIT, 0); 5081 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 5082 5083 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5084 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 5085 WREG32(RLC_LB_PARAMS, 0x00600408); 5086 WREG32(RLC_LB_CNTL, 0x80000004); 5087 5088 WREG32(RLC_MC_CNTL, 0); 5089 WREG32(RLC_UCODE_CNTL, 0); 5090 5091 fw_data = (const __be32 *)rdev->rlc_fw->data; 5092 WREG32(RLC_GPM_UCODE_ADDR, 0); 5093 for (i = 0; i < size; i++) 5094 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 5095 WREG32(RLC_GPM_UCODE_ADDR, 0); 5096 5097 /* XXX - find out what chips support lbpw */ 5098 cik_enable_lbpw(rdev, false); 5099 5100 if (rdev->family == CHIP_BONAIRE) 5101 WREG32(RLC_DRIVER_DMA_STATUS, 0); 5102 5103 cik_rlc_start(rdev); 5104 5105 return 0; 5106 } 5107 5108 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) 5109 { 5110 u32 data, orig, tmp, tmp2; 5111 5112 orig = data = RREG32(RLC_CGCG_CGLS_CTRL); 5113 5114 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { 5115 cik_enable_gui_idle_interrupt(rdev, true); 5116 5117 tmp = cik_halt_rlc(rdev); 5118 5119 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5120 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5121 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5122 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; 5123 WREG32(RLC_SERDES_WR_CTRL, tmp2); 5124 5125 cik_update_rlc(rdev, tmp); 5126 5127 data |= CGCG_EN | CGLS_EN; 5128 } else { 5129 cik_enable_gui_idle_interrupt(rdev, false); 5130 5131 RREG32(CB_CGTT_SCLK_CTRL); 5132 RREG32(CB_CGTT_SCLK_CTRL); 5133 RREG32(CB_CGTT_SCLK_CTRL); 5134 RREG32(CB_CGTT_SCLK_CTRL); 5135 5136 data &= ~(CGCG_EN | CGLS_EN); 5137 } 5138 5139 if (orig != data) 5140 WREG32(RLC_CGCG_CGLS_CTRL, data); 5141 5142 } 5143 5144 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) 5145 { 5146 u32 data, orig, tmp = 0; 5147 5148 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { 5149 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { 5150 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { 5151 orig = data = RREG32(CP_MEM_SLP_CNTL); 5152 data |= CP_MEM_LS_EN; 5153 if (orig != data) 5154 WREG32(CP_MEM_SLP_CNTL, data); 5155 } 5156 } 5157 5158 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5159 data &= 0xfffffffd; 5160 if (orig != data) 5161 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5162 5163 tmp = cik_halt_rlc(rdev); 5164 5165 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5166 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5167 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5168 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; 5169 WREG32(RLC_SERDES_WR_CTRL, data); 5170 5171 cik_update_rlc(rdev, tmp); 5172 5173 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { 5174 orig = data = RREG32(CGTS_SM_CTRL_REG); 5175 data &= ~SM_MODE_MASK; 5176 data |= SM_MODE(0x2); 5177 data |= SM_MODE_ENABLE; 5178 data &= ~CGTS_OVERRIDE; 5179 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && 5180 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) 5181 data &= ~CGTS_LS_OVERRIDE; 5182 data &= ~ON_MONITOR_ADD_MASK; 5183 data |= ON_MONITOR_ADD_EN; 5184 data |= ON_MONITOR_ADD(0x96); 5185 if (orig != data) 5186 WREG32(CGTS_SM_CTRL_REG, data); 5187 } 5188 } else { 5189 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5190 data |= 0x00000002; 5191 if (orig != data) 5192 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5193 5194 data = RREG32(RLC_MEM_SLP_CNTL); 5195 if (data & RLC_MEM_LS_EN) { 5196 data &= ~RLC_MEM_LS_EN; 5197 WREG32(RLC_MEM_SLP_CNTL, data); 5198 } 5199 5200 data = RREG32(CP_MEM_SLP_CNTL); 5201 if (data & CP_MEM_LS_EN) { 5202 data &= ~CP_MEM_LS_EN; 5203 WREG32(CP_MEM_SLP_CNTL, data); 5204 } 5205 5206 orig = data = RREG32(CGTS_SM_CTRL_REG); 5207 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; 5208 if (orig != data) 5209 WREG32(CGTS_SM_CTRL_REG, data); 5210 5211 tmp = cik_halt_rlc(rdev); 5212 5213 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5214 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5215 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5216 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; 5217 WREG32(RLC_SERDES_WR_CTRL, data); 5218 5219 cik_update_rlc(rdev, tmp); 5220 } 5221 } 5222 5223 static const u32 mc_cg_registers[] = 5224 { 5225 MC_HUB_MISC_HUB_CG, 5226 MC_HUB_MISC_SIP_CG, 5227 MC_HUB_MISC_VM_CG, 5228 MC_XPB_CLK_GAT, 5229 ATC_MISC_CG, 5230 MC_CITF_MISC_WR_CG, 5231 MC_CITF_MISC_RD_CG, 5232 MC_CITF_MISC_VM_CG, 5233 VM_L2_CG, 5234 }; 5235 5236 static void cik_enable_mc_ls(struct radeon_device *rdev, 5237 bool enable) 5238 { 5239 int i; 5240 u32 orig, data; 5241 5242 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5243 orig = data = RREG32(mc_cg_registers[i]); 5244 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) 5245 data |= MC_LS_ENABLE; 5246 else 5247 data &= ~MC_LS_ENABLE; 5248 if (data != orig) 5249 WREG32(mc_cg_registers[i], data); 5250 } 5251 } 5252 5253 static void cik_enable_mc_mgcg(struct radeon_device *rdev, 5254 bool enable) 5255 { 5256 int i; 5257 u32 orig, data; 5258 5259 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5260 orig = data = RREG32(mc_cg_registers[i]); 5261 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) 5262 data |= MC_CG_ENABLE; 5263 else 5264 data &= ~MC_CG_ENABLE; 5265 if (data != orig) 5266 WREG32(mc_cg_registers[i], data); 5267 } 5268 } 5269 5270 static void cik_enable_sdma_mgcg(struct radeon_device *rdev, 5271 bool enable) 5272 { 5273 u32 orig, data; 5274 5275 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { 5276 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); 5277 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); 5278 } else { 5279 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); 5280 data |= 0xff000000; 5281 if (data != orig) 5282 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); 5283 5284 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); 5285 data |= 0xff000000; 5286 if (data != orig) 5287 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); 5288 } 5289 } 5290 5291 static void cik_enable_sdma_mgls(struct radeon_device *rdev, 5292 bool enable) 5293 { 5294 u32 orig, data; 5295 5296 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { 5297 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5298 data |= 0x100; 5299 if (orig != data) 5300 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5301 5302 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5303 data |= 0x100; 5304 if (orig != data) 5305 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5306 } else { 5307 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5308 data &= ~0x100; 5309 if (orig != data) 5310 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5311 5312 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5313 data &= ~0x100; 5314 if (orig != data) 5315 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5316 } 5317 } 5318 5319 static void cik_enable_uvd_mgcg(struct radeon_device *rdev, 5320 bool enable) 5321 { 5322 u32 orig, data; 5323 5324 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { 5325 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5326 data = 0xfff; 5327 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5328 5329 orig = data = RREG32(UVD_CGC_CTRL); 5330 data |= DCM; 5331 if (orig != data) 5332 WREG32(UVD_CGC_CTRL, data); 5333 } else { 5334 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5335 data &= ~0xfff; 5336 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5337 5338 orig = data = RREG32(UVD_CGC_CTRL); 5339 data &= ~DCM; 5340 if (orig != data) 5341 WREG32(UVD_CGC_CTRL, data); 5342 } 5343 } 5344 5345 static void cik_enable_bif_mgls(struct radeon_device *rdev, 5346 bool enable) 5347 { 5348 u32 orig, data; 5349 5350 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 5351 5352 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) 5353 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | 5354 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; 5355 else 5356 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | 5357 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); 5358 5359 if (orig != data) 5360 WREG32_PCIE_PORT(PCIE_CNTL2, data); 5361 } 5362 5363 static void cik_enable_hdp_mgcg(struct radeon_device *rdev, 5364 bool enable) 5365 { 5366 u32 orig, data; 5367 5368 orig = data = RREG32(HDP_HOST_PATH_CNTL); 5369 5370 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) 5371 data &= ~CLOCK_GATING_DIS; 5372 else 5373 data |= CLOCK_GATING_DIS; 5374 5375 if (orig != data) 5376 WREG32(HDP_HOST_PATH_CNTL, data); 5377 } 5378 5379 static void cik_enable_hdp_ls(struct radeon_device *rdev, 5380 bool enable) 5381 { 5382 u32 orig, data; 5383 5384 orig = data = RREG32(HDP_MEM_POWER_LS); 5385 5386 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) 5387 data |= HDP_LS_ENABLE; 5388 else 5389 data &= ~HDP_LS_ENABLE; 5390 5391 if (orig != data) 5392 WREG32(HDP_MEM_POWER_LS, data); 5393 } 5394 5395 void cik_update_cg(struct radeon_device *rdev, 5396 u32 block, bool enable) 5397 { 5398 5399 if (block & RADEON_CG_BLOCK_GFX) { 5400 cik_enable_gui_idle_interrupt(rdev, false); 5401 /* order matters! */ 5402 if (enable) { 5403 cik_enable_mgcg(rdev, true); 5404 cik_enable_cgcg(rdev, true); 5405 } else { 5406 cik_enable_cgcg(rdev, false); 5407 cik_enable_mgcg(rdev, false); 5408 } 5409 cik_enable_gui_idle_interrupt(rdev, true); 5410 } 5411 5412 if (block & RADEON_CG_BLOCK_MC) { 5413 if (!(rdev->flags & RADEON_IS_IGP)) { 5414 cik_enable_mc_mgcg(rdev, enable); 5415 cik_enable_mc_ls(rdev, enable); 5416 } 5417 } 5418 5419 if (block & RADEON_CG_BLOCK_SDMA) { 5420 cik_enable_sdma_mgcg(rdev, enable); 5421 cik_enable_sdma_mgls(rdev, enable); 5422 } 5423 5424 if (block & RADEON_CG_BLOCK_BIF) { 5425 cik_enable_bif_mgls(rdev, enable); 5426 } 5427 5428 if (block & RADEON_CG_BLOCK_UVD) { 5429 if (rdev->has_uvd) 5430 cik_enable_uvd_mgcg(rdev, enable); 5431 } 5432 5433 if (block & RADEON_CG_BLOCK_HDP) { 5434 cik_enable_hdp_mgcg(rdev, enable); 5435 cik_enable_hdp_ls(rdev, enable); 5436 } 5437 } 5438 5439 static void cik_init_cg(struct radeon_device *rdev) 5440 { 5441 5442 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); 5443 5444 if (rdev->has_uvd) 5445 si_init_uvd_internal_cg(rdev); 5446 5447 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5448 RADEON_CG_BLOCK_SDMA | 5449 RADEON_CG_BLOCK_BIF | 5450 RADEON_CG_BLOCK_UVD | 5451 RADEON_CG_BLOCK_HDP), true); 5452 } 5453 5454 static void cik_fini_cg(struct radeon_device *rdev) 5455 { 5456 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5457 RADEON_CG_BLOCK_SDMA | 5458 RADEON_CG_BLOCK_BIF | 5459 RADEON_CG_BLOCK_UVD | 5460 RADEON_CG_BLOCK_HDP), false); 5461 5462 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); 5463 } 5464 5465 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, 5466 bool enable) 5467 { 5468 u32 data, orig; 5469 5470 orig = data = RREG32(RLC_PG_CNTL); 5471 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5472 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5473 else 5474 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5475 if (orig != data) 5476 WREG32(RLC_PG_CNTL, data); 5477 } 5478 5479 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, 5480 bool enable) 5481 { 5482 u32 data, orig; 5483 5484 orig = data = RREG32(RLC_PG_CNTL); 5485 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5486 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5487 else 5488 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5489 if (orig != data) 5490 WREG32(RLC_PG_CNTL, data); 5491 } 5492 5493 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) 5494 { 5495 u32 data, orig; 5496 5497 orig = data = RREG32(RLC_PG_CNTL); 5498 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) 5499 data &= ~DISABLE_CP_PG; 5500 else 5501 data |= DISABLE_CP_PG; 5502 if (orig != data) 5503 WREG32(RLC_PG_CNTL, data); 5504 } 5505 5506 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) 5507 { 5508 u32 data, orig; 5509 5510 orig = data = RREG32(RLC_PG_CNTL); 5511 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) 5512 data &= ~DISABLE_GDS_PG; 5513 else 5514 data |= DISABLE_GDS_PG; 5515 if (orig != data) 5516 WREG32(RLC_PG_CNTL, data); 5517 } 5518 5519 #define CP_ME_TABLE_SIZE 96 5520 #define CP_ME_TABLE_OFFSET 2048 5521 #define CP_MEC_TABLE_OFFSET 4096 5522 5523 void cik_init_cp_pg_table(struct radeon_device *rdev) 5524 { 5525 const __be32 *fw_data; 5526 volatile u32 *dst_ptr; 5527 int me, i, max_me = 4; 5528 u32 bo_offset = 0; 5529 u32 table_offset; 5530 5531 if (rdev->family == CHIP_KAVERI) 5532 max_me = 5; 5533 5534 if (rdev->rlc.cp_table_ptr == NULL) 5535 return; 5536 5537 /* write the cp table buffer */ 5538 dst_ptr = rdev->rlc.cp_table_ptr; 5539 for (me = 0; me < max_me; me++) { 5540 if (me == 0) { 5541 fw_data = (const __be32 *)rdev->ce_fw->data; 5542 table_offset = CP_ME_TABLE_OFFSET; 5543 } else if (me == 1) { 5544 fw_data = (const __be32 *)rdev->pfp_fw->data; 5545 table_offset = CP_ME_TABLE_OFFSET; 5546 } else if (me == 2) { 5547 fw_data = (const __be32 *)rdev->me_fw->data; 5548 table_offset = CP_ME_TABLE_OFFSET; 5549 } else { 5550 fw_data = (const __be32 *)rdev->mec_fw->data; 5551 table_offset = CP_MEC_TABLE_OFFSET; 5552 } 5553 5554 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) { 5555 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]); 5556 } 5557 bo_offset += CP_ME_TABLE_SIZE; 5558 } 5559 } 5560 5561 static void cik_enable_gfx_cgpg(struct radeon_device *rdev, 5562 bool enable) 5563 { 5564 u32 data, orig; 5565 5566 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { 5567 orig = data = RREG32(RLC_PG_CNTL); 5568 data |= GFX_PG_ENABLE; 5569 if (orig != data) 5570 WREG32(RLC_PG_CNTL, data); 5571 5572 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5573 data |= AUTO_PG_EN; 5574 if (orig != data) 5575 WREG32(RLC_AUTO_PG_CTRL, data); 5576 } else { 5577 orig = data = RREG32(RLC_PG_CNTL); 5578 data &= ~GFX_PG_ENABLE; 5579 if (orig != data) 5580 WREG32(RLC_PG_CNTL, data); 5581 5582 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5583 data &= ~AUTO_PG_EN; 5584 if (orig != data) 5585 WREG32(RLC_AUTO_PG_CTRL, data); 5586 5587 data = RREG32(DB_RENDER_CONTROL); 5588 } 5589 } 5590 5591 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) 5592 { 5593 u32 mask = 0, tmp, tmp1; 5594 int i; 5595 5596 cik_select_se_sh(rdev, se, sh); 5597 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); 5598 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); 5599 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5600 5601 tmp &= 0xffff0000; 5602 5603 tmp |= tmp1; 5604 tmp >>= 16; 5605 5606 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { 5607 mask <<= 1; 5608 mask |= 1; 5609 } 5610 5611 return (~tmp) & mask; 5612 } 5613 5614 static void cik_init_ao_cu_mask(struct radeon_device *rdev) 5615 { 5616 u32 i, j, k, active_cu_number = 0; 5617 u32 mask, counter, cu_bitmap; 5618 u32 tmp = 0; 5619 5620 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 5621 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 5622 mask = 1; 5623 cu_bitmap = 0; 5624 counter = 0; 5625 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { 5626 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { 5627 if (counter < 2) 5628 cu_bitmap |= mask; 5629 counter ++; 5630 } 5631 mask <<= 1; 5632 } 5633 5634 active_cu_number += counter; 5635 tmp |= (cu_bitmap << (i * 16 + j * 8)); 5636 } 5637 } 5638 5639 WREG32(RLC_PG_AO_CU_MASK, tmp); 5640 5641 tmp = RREG32(RLC_MAX_PG_CU); 5642 tmp &= ~MAX_PU_CU_MASK; 5643 tmp |= MAX_PU_CU(active_cu_number); 5644 WREG32(RLC_MAX_PG_CU, tmp); 5645 } 5646 5647 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, 5648 bool enable) 5649 { 5650 u32 data, orig; 5651 5652 orig = data = RREG32(RLC_PG_CNTL); 5653 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) 5654 data |= STATIC_PER_CU_PG_ENABLE; 5655 else 5656 data &= ~STATIC_PER_CU_PG_ENABLE; 5657 if (orig != data) 5658 WREG32(RLC_PG_CNTL, data); 5659 } 5660 5661 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, 5662 bool enable) 5663 { 5664 u32 data, orig; 5665 5666 orig = data = RREG32(RLC_PG_CNTL); 5667 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) 5668 data |= DYN_PER_CU_PG_ENABLE; 5669 else 5670 data &= ~DYN_PER_CU_PG_ENABLE; 5671 if (orig != data) 5672 WREG32(RLC_PG_CNTL, data); 5673 } 5674 5675 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 5676 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D 5677 5678 static void cik_init_gfx_cgpg(struct radeon_device *rdev) 5679 { 5680 u32 data, orig; 5681 u32 i; 5682 5683 if (rdev->rlc.cs_data) { 5684 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5685 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); 5686 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); 5687 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); 5688 } else { 5689 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5690 for (i = 0; i < 3; i++) 5691 WREG32(RLC_GPM_SCRATCH_DATA, 0); 5692 } 5693 if (rdev->rlc.reg_list) { 5694 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); 5695 for (i = 0; i < rdev->rlc.reg_list_size; i++) 5696 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); 5697 } 5698 5699 orig = data = RREG32(RLC_PG_CNTL); 5700 data |= GFX_PG_SRC; 5701 if (orig != data) 5702 WREG32(RLC_PG_CNTL, data); 5703 5704 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); 5705 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); 5706 5707 data = RREG32(CP_RB_WPTR_POLL_CNTL); 5708 data &= ~IDLE_POLL_COUNT_MASK; 5709 data |= IDLE_POLL_COUNT(0x60); 5710 WREG32(CP_RB_WPTR_POLL_CNTL, data); 5711 5712 data = 0x10101010; 5713 WREG32(RLC_PG_DELAY, data); 5714 5715 data = RREG32(RLC_PG_DELAY_2); 5716 data &= ~0xff; 5717 data |= 0x3; 5718 WREG32(RLC_PG_DELAY_2, data); 5719 5720 data = RREG32(RLC_AUTO_PG_CTRL); 5721 data &= ~GRBM_REG_SGIT_MASK; 5722 data |= GRBM_REG_SGIT(0x700); 5723 WREG32(RLC_AUTO_PG_CTRL, data); 5724 5725 } 5726 5727 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) 5728 { 5729 cik_enable_gfx_cgpg(rdev, enable); 5730 cik_enable_gfx_static_mgpg(rdev, enable); 5731 cik_enable_gfx_dynamic_mgpg(rdev, enable); 5732 } 5733 5734 u32 cik_get_csb_size(struct radeon_device *rdev) 5735 { 5736 u32 count = 0; 5737 const struct cs_section_def *sect = NULL; 5738 const struct cs_extent_def *ext = NULL; 5739 5740 if (rdev->rlc.cs_data == NULL) 5741 return 0; 5742 5743 /* begin clear state */ 5744 count += 2; 5745 /* context control state */ 5746 count += 3; 5747 5748 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5749 for (ext = sect->section; ext->extent != NULL; ++ext) { 5750 if (sect->id == SECT_CONTEXT) 5751 count += 2 + ext->reg_count; 5752 else 5753 return 0; 5754 } 5755 } 5756 /* pa_sc_raster_config/pa_sc_raster_config1 */ 5757 count += 4; 5758 /* end clear state */ 5759 count += 2; 5760 /* clear state */ 5761 count += 2; 5762 5763 return count; 5764 } 5765 5766 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) 5767 { 5768 u32 count = 0, i; 5769 const struct cs_section_def *sect = NULL; 5770 const struct cs_extent_def *ext = NULL; 5771 5772 if (rdev->rlc.cs_data == NULL) 5773 return; 5774 if (buffer == NULL) 5775 return; 5776 5777 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5778 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE; 5779 5780 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); 5781 buffer[count++] = 0x80000000; 5782 buffer[count++] = 0x80000000; 5783 5784 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5785 for (ext = sect->section; ext->extent != NULL; ++ext) { 5786 if (sect->id == SECT_CONTEXT) { 5787 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count); 5788 buffer[count++] = ext->reg_index - 0xa000; 5789 for (i = 0; i < ext->reg_count; i++) 5790 buffer[count++] = ext->extent[i]; 5791 } else { 5792 return; 5793 } 5794 } 5795 } 5796 5797 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 5798 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START; 5799 switch (rdev->family) { 5800 case CHIP_BONAIRE: 5801 buffer[count++] = 0x16000012; 5802 buffer[count++] = 0x00000000; 5803 break; 5804 case CHIP_KAVERI: 5805 buffer[count++] = 0x00000000; /* XXX */ 5806 buffer[count++] = 0x00000000; 5807 break; 5808 case CHIP_KABINI: 5809 buffer[count++] = 0x00000000; /* XXX */ 5810 buffer[count++] = 0x00000000; 5811 break; 5812 default: 5813 buffer[count++] = 0x00000000; 5814 buffer[count++] = 0x00000000; 5815 break; 5816 } 5817 5818 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5819 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE; 5820 5821 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0); 5822 buffer[count++] = 0; 5823 } 5824 5825 static void cik_init_pg(struct radeon_device *rdev) 5826 { 5827 if (rdev->pg_flags) { 5828 cik_enable_sck_slowdown_on_pu(rdev, true); 5829 cik_enable_sck_slowdown_on_pd(rdev, true); 5830 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5831 cik_init_gfx_cgpg(rdev); 5832 cik_enable_cp_pg(rdev, true); 5833 cik_enable_gds_pg(rdev, true); 5834 } 5835 cik_init_ao_cu_mask(rdev); 5836 cik_update_gfx_pg(rdev, true); 5837 } 5838 } 5839 5840 static void cik_fini_pg(struct radeon_device *rdev) 5841 { 5842 if (rdev->pg_flags) { 5843 cik_update_gfx_pg(rdev, false); 5844 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5845 cik_enable_cp_pg(rdev, false); 5846 cik_enable_gds_pg(rdev, false); 5847 } 5848 } 5849 } 5850 5851 /* 5852 * Interrupts 5853 * Starting with r6xx, interrupts are handled via a ring buffer. 5854 * Ring buffers are areas of GPU accessible memory that the GPU 5855 * writes interrupt vectors into and the host reads vectors out of. 5856 * There is a rptr (read pointer) that determines where the 5857 * host is currently reading, and a wptr (write pointer) 5858 * which determines where the GPU has written. When the 5859 * pointers are equal, the ring is idle. When the GPU 5860 * writes vectors to the ring buffer, it increments the 5861 * wptr. When there is an interrupt, the host then starts 5862 * fetching commands and processing them until the pointers are 5863 * equal again at which point it updates the rptr. 5864 */ 5865 5866 /** 5867 * cik_enable_interrupts - Enable the interrupt ring buffer 5868 * 5869 * @rdev: radeon_device pointer 5870 * 5871 * Enable the interrupt ring buffer (CIK). 5872 */ 5873 static void cik_enable_interrupts(struct radeon_device *rdev) 5874 { 5875 u32 ih_cntl = RREG32(IH_CNTL); 5876 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5877 5878 ih_cntl |= ENABLE_INTR; 5879 ih_rb_cntl |= IH_RB_ENABLE; 5880 WREG32(IH_CNTL, ih_cntl); 5881 WREG32(IH_RB_CNTL, ih_rb_cntl); 5882 rdev->ih.enabled = true; 5883 } 5884 5885 /** 5886 * cik_disable_interrupts - Disable the interrupt ring buffer 5887 * 5888 * @rdev: radeon_device pointer 5889 * 5890 * Disable the interrupt ring buffer (CIK). 5891 */ 5892 static void cik_disable_interrupts(struct radeon_device *rdev) 5893 { 5894 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5895 u32 ih_cntl = RREG32(IH_CNTL); 5896 5897 ih_rb_cntl &= ~IH_RB_ENABLE; 5898 ih_cntl &= ~ENABLE_INTR; 5899 WREG32(IH_RB_CNTL, ih_rb_cntl); 5900 WREG32(IH_CNTL, ih_cntl); 5901 /* set rptr, wptr to 0 */ 5902 WREG32(IH_RB_RPTR, 0); 5903 WREG32(IH_RB_WPTR, 0); 5904 rdev->ih.enabled = false; 5905 rdev->ih.rptr = 0; 5906 } 5907 5908 /** 5909 * cik_disable_interrupt_state - Disable all interrupt sources 5910 * 5911 * @rdev: radeon_device pointer 5912 * 5913 * Clear all interrupt enable bits used by the driver (CIK). 5914 */ 5915 static void cik_disable_interrupt_state(struct radeon_device *rdev) 5916 { 5917 u32 tmp; 5918 5919 /* gfx ring */ 5920 tmp = RREG32(CP_INT_CNTL_RING0) & 5921 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5922 WREG32(CP_INT_CNTL_RING0, tmp); 5923 /* sdma */ 5924 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 5925 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 5926 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 5927 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 5928 /* compute queues */ 5929 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 5930 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 5931 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 5932 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 5933 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 5934 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 5935 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 5936 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 5937 /* grbm */ 5938 WREG32(GRBM_INT_CNTL, 0); 5939 /* vline/vblank, etc. */ 5940 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 5941 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 5942 if (rdev->num_crtc >= 4) { 5943 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 5944 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 5945 } 5946 if (rdev->num_crtc >= 6) { 5947 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 5948 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 5949 } 5950 5951 /* dac hotplug */ 5952 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 5953 5954 /* digital hotplug */ 5955 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5956 WREG32(DC_HPD1_INT_CONTROL, tmp); 5957 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5958 WREG32(DC_HPD2_INT_CONTROL, tmp); 5959 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5960 WREG32(DC_HPD3_INT_CONTROL, tmp); 5961 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5962 WREG32(DC_HPD4_INT_CONTROL, tmp); 5963 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5964 WREG32(DC_HPD5_INT_CONTROL, tmp); 5965 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5966 WREG32(DC_HPD6_INT_CONTROL, tmp); 5967 5968 } 5969 5970 /** 5971 * cik_irq_init - init and enable the interrupt ring 5972 * 5973 * @rdev: radeon_device pointer 5974 * 5975 * Allocate a ring buffer for the interrupt controller, 5976 * enable the RLC, disable interrupts, enable the IH 5977 * ring buffer and enable it (CIK). 5978 * Called at device load and reume. 5979 * Returns 0 for success, errors for failure. 5980 */ 5981 static int cik_irq_init(struct radeon_device *rdev) 5982 { 5983 int ret = 0; 5984 int rb_bufsz; 5985 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 5986 5987 /* allocate ring */ 5988 ret = r600_ih_ring_alloc(rdev); 5989 if (ret) 5990 return ret; 5991 5992 /* disable irqs */ 5993 cik_disable_interrupts(rdev); 5994 5995 /* init rlc */ 5996 ret = cik_rlc_resume(rdev); 5997 if (ret) { 5998 r600_ih_ring_fini(rdev); 5999 return ret; 6000 } 6001 6002 /* setup interrupt control */ 6003 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 6004 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 6005 interrupt_cntl = RREG32(INTERRUPT_CNTL); 6006 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 6007 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 6008 */ 6009 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 6010 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 6011 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 6012 WREG32(INTERRUPT_CNTL, interrupt_cntl); 6013 6014 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 6015 rb_bufsz = order_base_2(rdev->ih.ring_size / 4); 6016 6017 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 6018 IH_WPTR_OVERFLOW_CLEAR | 6019 (rb_bufsz << 1)); 6020 6021 if (rdev->wb.enabled) 6022 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 6023 6024 /* set the writeback address whether it's enabled or not */ 6025 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 6026 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 6027 6028 WREG32(IH_RB_CNTL, ih_rb_cntl); 6029 6030 /* set rptr, wptr to 0 */ 6031 WREG32(IH_RB_RPTR, 0); 6032 WREG32(IH_RB_WPTR, 0); 6033 6034 /* Default settings for IH_CNTL (disabled at first) */ 6035 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 6036 /* RPTR_REARM only works if msi's are enabled */ 6037 if (rdev->msi_enabled) 6038 ih_cntl |= RPTR_REARM; 6039 WREG32(IH_CNTL, ih_cntl); 6040 6041 /* force the active interrupt state to all disabled */ 6042 cik_disable_interrupt_state(rdev); 6043 6044 pci_set_master(rdev->pdev); 6045 6046 /* enable irqs */ 6047 cik_enable_interrupts(rdev); 6048 6049 return ret; 6050 } 6051 6052 /** 6053 * cik_irq_set - enable/disable interrupt sources 6054 * 6055 * @rdev: radeon_device pointer 6056 * 6057 * Enable interrupt sources on the GPU (vblanks, hpd, 6058 * etc.) (CIK). 6059 * Returns 0 for success, errors for failure. 6060 */ 6061 int cik_irq_set(struct radeon_device *rdev) 6062 { 6063 u32 cp_int_cntl; 6064 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; 6065 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; 6066 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 6067 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 6068 u32 grbm_int_cntl = 0; 6069 u32 dma_cntl, dma_cntl1; 6070 u32 thermal_int; 6071 6072 if (!rdev->irq.installed) { 6073 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 6074 return -EINVAL; 6075 } 6076 /* don't enable anything if the ih is disabled */ 6077 if (!rdev->ih.enabled) { 6078 cik_disable_interrupts(rdev); 6079 /* force the active interrupt state to all disabled */ 6080 cik_disable_interrupt_state(rdev); 6081 return 0; 6082 } 6083 6084 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & 6085 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 6086 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 6087 6088 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; 6089 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; 6090 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; 6091 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; 6092 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 6093 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 6094 6095 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 6096 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 6097 6098 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6099 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6100 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6101 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6102 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6103 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6104 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6105 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6106 6107 if (rdev->flags & RADEON_IS_IGP) 6108 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & 6109 ~(THERM_INTH_MASK | THERM_INTL_MASK); 6110 else 6111 thermal_int = RREG32_SMC(CG_THERMAL_INT) & 6112 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); 6113 6114 /* enable CP interrupts on all rings */ 6115 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 6116 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 6117 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 6118 } 6119 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 6120 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6121 DRM_DEBUG("si_irq_set: sw int cp1\n"); 6122 if (ring->me == 1) { 6123 switch (ring->pipe) { 6124 case 0: 6125 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6126 break; 6127 case 1: 6128 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6129 break; 6130 case 2: 6131 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6132 break; 6133 case 3: 6134 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6135 break; 6136 default: 6137 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6138 break; 6139 } 6140 } else if (ring->me == 2) { 6141 switch (ring->pipe) { 6142 case 0: 6143 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6144 break; 6145 case 1: 6146 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6147 break; 6148 case 2: 6149 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6150 break; 6151 case 3: 6152 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6153 break; 6154 default: 6155 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6156 break; 6157 } 6158 } else { 6159 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 6160 } 6161 } 6162 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 6163 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6164 DRM_DEBUG("si_irq_set: sw int cp2\n"); 6165 if (ring->me == 1) { 6166 switch (ring->pipe) { 6167 case 0: 6168 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6169 break; 6170 case 1: 6171 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6172 break; 6173 case 2: 6174 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6175 break; 6176 case 3: 6177 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6178 break; 6179 default: 6180 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6181 break; 6182 } 6183 } else if (ring->me == 2) { 6184 switch (ring->pipe) { 6185 case 0: 6186 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6187 break; 6188 case 1: 6189 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6190 break; 6191 case 2: 6192 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6193 break; 6194 case 3: 6195 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6196 break; 6197 default: 6198 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6199 break; 6200 } 6201 } else { 6202 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 6203 } 6204 } 6205 6206 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 6207 DRM_DEBUG("cik_irq_set: sw int dma\n"); 6208 dma_cntl |= TRAP_ENABLE; 6209 } 6210 6211 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 6212 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 6213 dma_cntl1 |= TRAP_ENABLE; 6214 } 6215 6216 if (rdev->irq.crtc_vblank_int[0] || 6217 atomic_read(&rdev->irq.pflip[0])) { 6218 DRM_DEBUG("cik_irq_set: vblank 0\n"); 6219 crtc1 |= VBLANK_INTERRUPT_MASK; 6220 } 6221 if (rdev->irq.crtc_vblank_int[1] || 6222 atomic_read(&rdev->irq.pflip[1])) { 6223 DRM_DEBUG("cik_irq_set: vblank 1\n"); 6224 crtc2 |= VBLANK_INTERRUPT_MASK; 6225 } 6226 if (rdev->irq.crtc_vblank_int[2] || 6227 atomic_read(&rdev->irq.pflip[2])) { 6228 DRM_DEBUG("cik_irq_set: vblank 2\n"); 6229 crtc3 |= VBLANK_INTERRUPT_MASK; 6230 } 6231 if (rdev->irq.crtc_vblank_int[3] || 6232 atomic_read(&rdev->irq.pflip[3])) { 6233 DRM_DEBUG("cik_irq_set: vblank 3\n"); 6234 crtc4 |= VBLANK_INTERRUPT_MASK; 6235 } 6236 if (rdev->irq.crtc_vblank_int[4] || 6237 atomic_read(&rdev->irq.pflip[4])) { 6238 DRM_DEBUG("cik_irq_set: vblank 4\n"); 6239 crtc5 |= VBLANK_INTERRUPT_MASK; 6240 } 6241 if (rdev->irq.crtc_vblank_int[5] || 6242 atomic_read(&rdev->irq.pflip[5])) { 6243 DRM_DEBUG("cik_irq_set: vblank 5\n"); 6244 crtc6 |= VBLANK_INTERRUPT_MASK; 6245 } 6246 if (rdev->irq.hpd[0]) { 6247 DRM_DEBUG("cik_irq_set: hpd 1\n"); 6248 hpd1 |= DC_HPDx_INT_EN; 6249 } 6250 if (rdev->irq.hpd[1]) { 6251 DRM_DEBUG("cik_irq_set: hpd 2\n"); 6252 hpd2 |= DC_HPDx_INT_EN; 6253 } 6254 if (rdev->irq.hpd[2]) { 6255 DRM_DEBUG("cik_irq_set: hpd 3\n"); 6256 hpd3 |= DC_HPDx_INT_EN; 6257 } 6258 if (rdev->irq.hpd[3]) { 6259 DRM_DEBUG("cik_irq_set: hpd 4\n"); 6260 hpd4 |= DC_HPDx_INT_EN; 6261 } 6262 if (rdev->irq.hpd[4]) { 6263 DRM_DEBUG("cik_irq_set: hpd 5\n"); 6264 hpd5 |= DC_HPDx_INT_EN; 6265 } 6266 if (rdev->irq.hpd[5]) { 6267 DRM_DEBUG("cik_irq_set: hpd 6\n"); 6268 hpd6 |= DC_HPDx_INT_EN; 6269 } 6270 6271 if (rdev->irq.dpm_thermal) { 6272 DRM_DEBUG("dpm thermal\n"); 6273 if (rdev->flags & RADEON_IS_IGP) 6274 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; 6275 else 6276 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; 6277 } 6278 6279 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 6280 6281 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 6282 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 6283 6284 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 6285 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); 6286 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); 6287 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); 6288 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); 6289 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); 6290 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); 6291 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); 6292 6293 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 6294 6295 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 6296 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 6297 if (rdev->num_crtc >= 4) { 6298 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 6299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 6300 } 6301 if (rdev->num_crtc >= 6) { 6302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 6303 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 6304 } 6305 6306 WREG32(DC_HPD1_INT_CONTROL, hpd1); 6307 WREG32(DC_HPD2_INT_CONTROL, hpd2); 6308 WREG32(DC_HPD3_INT_CONTROL, hpd3); 6309 WREG32(DC_HPD4_INT_CONTROL, hpd4); 6310 WREG32(DC_HPD5_INT_CONTROL, hpd5); 6311 WREG32(DC_HPD6_INT_CONTROL, hpd6); 6312 6313 if (rdev->flags & RADEON_IS_IGP) 6314 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); 6315 else 6316 WREG32_SMC(CG_THERMAL_INT, thermal_int); 6317 6318 return 0; 6319 } 6320 6321 /** 6322 * cik_irq_ack - ack interrupt sources 6323 * 6324 * @rdev: radeon_device pointer 6325 * 6326 * Ack interrupt sources on the GPU (vblanks, hpd, 6327 * etc.) (CIK). Certain interrupts sources are sw 6328 * generated and do not require an explicit ack. 6329 */ 6330 static inline void cik_irq_ack(struct radeon_device *rdev) 6331 { 6332 u32 tmp; 6333 6334 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 6335 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 6336 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 6337 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 6338 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 6339 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 6340 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 6341 6342 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 6343 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 6344 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 6345 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 6346 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 6347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 6348 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 6349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 6350 6351 if (rdev->num_crtc >= 4) { 6352 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 6353 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 6354 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 6355 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 6356 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 6357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 6358 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 6359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 6360 } 6361 6362 if (rdev->num_crtc >= 6) { 6363 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 6364 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 6365 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 6366 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 6367 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 6368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 6369 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 6370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 6371 } 6372 6373 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6374 tmp = RREG32(DC_HPD1_INT_CONTROL); 6375 tmp |= DC_HPDx_INT_ACK; 6376 WREG32(DC_HPD1_INT_CONTROL, tmp); 6377 } 6378 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6379 tmp = RREG32(DC_HPD2_INT_CONTROL); 6380 tmp |= DC_HPDx_INT_ACK; 6381 WREG32(DC_HPD2_INT_CONTROL, tmp); 6382 } 6383 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6384 tmp = RREG32(DC_HPD3_INT_CONTROL); 6385 tmp |= DC_HPDx_INT_ACK; 6386 WREG32(DC_HPD3_INT_CONTROL, tmp); 6387 } 6388 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6389 tmp = RREG32(DC_HPD4_INT_CONTROL); 6390 tmp |= DC_HPDx_INT_ACK; 6391 WREG32(DC_HPD4_INT_CONTROL, tmp); 6392 } 6393 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6394 tmp = RREG32(DC_HPD5_INT_CONTROL); 6395 tmp |= DC_HPDx_INT_ACK; 6396 WREG32(DC_HPD5_INT_CONTROL, tmp); 6397 } 6398 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6399 tmp = RREG32(DC_HPD5_INT_CONTROL); 6400 tmp |= DC_HPDx_INT_ACK; 6401 WREG32(DC_HPD6_INT_CONTROL, tmp); 6402 } 6403 } 6404 6405 /** 6406 * cik_irq_disable - disable interrupts 6407 * 6408 * @rdev: radeon_device pointer 6409 * 6410 * Disable interrupts on the hw (CIK). 6411 */ 6412 static void cik_irq_disable(struct radeon_device *rdev) 6413 { 6414 cik_disable_interrupts(rdev); 6415 /* Wait and acknowledge irq */ 6416 mdelay(1); 6417 cik_irq_ack(rdev); 6418 cik_disable_interrupt_state(rdev); 6419 } 6420 6421 /** 6422 * cik_irq_disable - disable interrupts for suspend 6423 * 6424 * @rdev: radeon_device pointer 6425 * 6426 * Disable interrupts and stop the RLC (CIK). 6427 * Used for suspend. 6428 */ 6429 static void cik_irq_suspend(struct radeon_device *rdev) 6430 { 6431 cik_irq_disable(rdev); 6432 cik_rlc_stop(rdev); 6433 } 6434 6435 /** 6436 * cik_irq_fini - tear down interrupt support 6437 * 6438 * @rdev: radeon_device pointer 6439 * 6440 * Disable interrupts on the hw and free the IH ring 6441 * buffer (CIK). 6442 * Used for driver unload. 6443 */ 6444 static void cik_irq_fini(struct radeon_device *rdev) 6445 { 6446 cik_irq_suspend(rdev); 6447 r600_ih_ring_fini(rdev); 6448 } 6449 6450 /** 6451 * cik_get_ih_wptr - get the IH ring buffer wptr 6452 * 6453 * @rdev: radeon_device pointer 6454 * 6455 * Get the IH ring buffer wptr from either the register 6456 * or the writeback memory buffer (CIK). Also check for 6457 * ring buffer overflow and deal with it. 6458 * Used by cik_irq_process(). 6459 * Returns the value of the wptr. 6460 */ 6461 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 6462 { 6463 u32 wptr, tmp; 6464 6465 if (rdev->wb.enabled) 6466 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 6467 else 6468 wptr = RREG32(IH_RB_WPTR); 6469 6470 if (wptr & RB_OVERFLOW) { 6471 /* When a ring buffer overflow happen start parsing interrupt 6472 * from the last not overwritten vector (wptr + 16). Hopefully 6473 * this should allow us to catchup. 6474 */ 6475 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", 6476 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); 6477 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 6478 tmp = RREG32(IH_RB_CNTL); 6479 tmp |= IH_WPTR_OVERFLOW_CLEAR; 6480 WREG32(IH_RB_CNTL, tmp); 6481 } 6482 return (wptr & rdev->ih.ptr_mask); 6483 } 6484 6485 /* CIK IV Ring 6486 * Each IV ring entry is 128 bits: 6487 * [7:0] - interrupt source id 6488 * [31:8] - reserved 6489 * [59:32] - interrupt source data 6490 * [63:60] - reserved 6491 * [71:64] - RINGID 6492 * CP: 6493 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 6494 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 6495 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 6496 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 6497 * PIPE_ID - ME0 0=3D 6498 * - ME1&2 compute dispatcher (4 pipes each) 6499 * SDMA: 6500 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 6501 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 6502 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 6503 * [79:72] - VMID 6504 * [95:80] - PASID 6505 * [127:96] - reserved 6506 */ 6507 /** 6508 * cik_irq_process - interrupt handler 6509 * 6510 * @rdev: radeon_device pointer 6511 * 6512 * Interrupt hander (CIK). Walk the IH ring, 6513 * ack interrupts and schedule work to handle 6514 * interrupt events. 6515 * Returns irq process return code. 6516 */ 6517 int cik_irq_process(struct radeon_device *rdev) 6518 { 6519 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6520 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6521 u32 wptr; 6522 u32 rptr; 6523 u32 src_id, src_data, ring_id; 6524 u8 me_id, pipe_id, queue_id; 6525 u32 ring_index; 6526 bool queue_hotplug = false; 6527 bool queue_reset = false; 6528 u32 addr, status, mc_client; 6529 bool queue_thermal = false; 6530 6531 if (!rdev->ih.enabled || rdev->shutdown) 6532 return IRQ_NONE; 6533 6534 wptr = cik_get_ih_wptr(rdev); 6535 6536 restart_ih: 6537 /* is somebody else already processing irqs? */ 6538 if (atomic_xchg(&rdev->ih.lock, 1)) 6539 return IRQ_NONE; 6540 6541 rptr = rdev->ih.rptr; 6542 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 6543 6544 /* Order reading of wptr vs. reading of IH ring data */ 6545 rmb(); 6546 6547 /* display interrupts */ 6548 cik_irq_ack(rdev); 6549 6550 while (rptr != wptr) { 6551 /* wptr/rptr are in bytes! */ 6552 ring_index = rptr / 4; 6553 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 6554 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 6555 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 6556 6557 switch (src_id) { 6558 case 1: /* D1 vblank/vline */ 6559 switch (src_data) { 6560 case 0: /* D1 vblank */ 6561 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { 6562 if (rdev->irq.crtc_vblank_int[0]) { 6563 drm_handle_vblank(rdev->ddev, 0); 6564 rdev->pm.vblank_sync = true; 6565 wake_up(&rdev->irq.vblank_queue); 6566 } 6567 if (atomic_read(&rdev->irq.pflip[0])) 6568 radeon_crtc_handle_flip(rdev, 0); 6569 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 6570 DRM_DEBUG("IH: D1 vblank\n"); 6571 } 6572 break; 6573 case 1: /* D1 vline */ 6574 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { 6575 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 6576 DRM_DEBUG("IH: D1 vline\n"); 6577 } 6578 break; 6579 default: 6580 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6581 break; 6582 } 6583 break; 6584 case 2: /* D2 vblank/vline */ 6585 switch (src_data) { 6586 case 0: /* D2 vblank */ 6587 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { 6588 if (rdev->irq.crtc_vblank_int[1]) { 6589 drm_handle_vblank(rdev->ddev, 1); 6590 rdev->pm.vblank_sync = true; 6591 wake_up(&rdev->irq.vblank_queue); 6592 } 6593 if (atomic_read(&rdev->irq.pflip[1])) 6594 radeon_crtc_handle_flip(rdev, 1); 6595 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 6596 DRM_DEBUG("IH: D2 vblank\n"); 6597 } 6598 break; 6599 case 1: /* D2 vline */ 6600 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { 6601 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 6602 DRM_DEBUG("IH: D2 vline\n"); 6603 } 6604 break; 6605 default: 6606 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6607 break; 6608 } 6609 break; 6610 case 3: /* D3 vblank/vline */ 6611 switch (src_data) { 6612 case 0: /* D3 vblank */ 6613 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { 6614 if (rdev->irq.crtc_vblank_int[2]) { 6615 drm_handle_vblank(rdev->ddev, 2); 6616 rdev->pm.vblank_sync = true; 6617 wake_up(&rdev->irq.vblank_queue); 6618 } 6619 if (atomic_read(&rdev->irq.pflip[2])) 6620 radeon_crtc_handle_flip(rdev, 2); 6621 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 6622 DRM_DEBUG("IH: D3 vblank\n"); 6623 } 6624 break; 6625 case 1: /* D3 vline */ 6626 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { 6627 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 6628 DRM_DEBUG("IH: D3 vline\n"); 6629 } 6630 break; 6631 default: 6632 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6633 break; 6634 } 6635 break; 6636 case 4: /* D4 vblank/vline */ 6637 switch (src_data) { 6638 case 0: /* D4 vblank */ 6639 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { 6640 if (rdev->irq.crtc_vblank_int[3]) { 6641 drm_handle_vblank(rdev->ddev, 3); 6642 rdev->pm.vblank_sync = true; 6643 wake_up(&rdev->irq.vblank_queue); 6644 } 6645 if (atomic_read(&rdev->irq.pflip[3])) 6646 radeon_crtc_handle_flip(rdev, 3); 6647 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 6648 DRM_DEBUG("IH: D4 vblank\n"); 6649 } 6650 break; 6651 case 1: /* D4 vline */ 6652 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { 6653 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 6654 DRM_DEBUG("IH: D4 vline\n"); 6655 } 6656 break; 6657 default: 6658 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6659 break; 6660 } 6661 break; 6662 case 5: /* D5 vblank/vline */ 6663 switch (src_data) { 6664 case 0: /* D5 vblank */ 6665 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { 6666 if (rdev->irq.crtc_vblank_int[4]) { 6667 drm_handle_vblank(rdev->ddev, 4); 6668 rdev->pm.vblank_sync = true; 6669 wake_up(&rdev->irq.vblank_queue); 6670 } 6671 if (atomic_read(&rdev->irq.pflip[4])) 6672 radeon_crtc_handle_flip(rdev, 4); 6673 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 6674 DRM_DEBUG("IH: D5 vblank\n"); 6675 } 6676 break; 6677 case 1: /* D5 vline */ 6678 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { 6679 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 6680 DRM_DEBUG("IH: D5 vline\n"); 6681 } 6682 break; 6683 default: 6684 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6685 break; 6686 } 6687 break; 6688 case 6: /* D6 vblank/vline */ 6689 switch (src_data) { 6690 case 0: /* D6 vblank */ 6691 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { 6692 if (rdev->irq.crtc_vblank_int[5]) { 6693 drm_handle_vblank(rdev->ddev, 5); 6694 rdev->pm.vblank_sync = true; 6695 wake_up(&rdev->irq.vblank_queue); 6696 } 6697 if (atomic_read(&rdev->irq.pflip[5])) 6698 radeon_crtc_handle_flip(rdev, 5); 6699 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 6700 DRM_DEBUG("IH: D6 vblank\n"); 6701 } 6702 break; 6703 case 1: /* D6 vline */ 6704 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { 6705 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 6706 DRM_DEBUG("IH: D6 vline\n"); 6707 } 6708 break; 6709 default: 6710 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6711 break; 6712 } 6713 break; 6714 case 42: /* HPD hotplug */ 6715 switch (src_data) { 6716 case 0: 6717 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6718 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 6719 queue_hotplug = true; 6720 DRM_DEBUG("IH: HPD1\n"); 6721 } 6722 break; 6723 case 1: 6724 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6725 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 6726 queue_hotplug = true; 6727 DRM_DEBUG("IH: HPD2\n"); 6728 } 6729 break; 6730 case 2: 6731 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6732 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 6733 queue_hotplug = true; 6734 DRM_DEBUG("IH: HPD3\n"); 6735 } 6736 break; 6737 case 3: 6738 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6739 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 6740 queue_hotplug = true; 6741 DRM_DEBUG("IH: HPD4\n"); 6742 } 6743 break; 6744 case 4: 6745 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6746 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 6747 queue_hotplug = true; 6748 DRM_DEBUG("IH: HPD5\n"); 6749 } 6750 break; 6751 case 5: 6752 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6753 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 6754 queue_hotplug = true; 6755 DRM_DEBUG("IH: HPD6\n"); 6756 } 6757 break; 6758 default: 6759 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6760 break; 6761 } 6762 break; 6763 case 124: /* UVD */ 6764 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); 6765 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); 6766 break; 6767 case 146: 6768 case 147: 6769 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 6770 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 6771 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 6772 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 6773 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 6774 addr); 6775 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 6776 status); 6777 cik_vm_decode_fault(rdev, status, addr, mc_client); 6778 /* reset addr and status */ 6779 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 6780 break; 6781 case 176: /* GFX RB CP_INT */ 6782 case 177: /* GFX IB CP_INT */ 6783 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6784 break; 6785 case 181: /* CP EOP event */ 6786 DRM_DEBUG("IH: CP EOP\n"); 6787 /* XXX check the bitfield order! */ 6788 me_id = (ring_id & 0x60) >> 5; 6789 pipe_id = (ring_id & 0x18) >> 3; 6790 queue_id = (ring_id & 0x7) >> 0; 6791 switch (me_id) { 6792 case 0: 6793 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6794 break; 6795 case 1: 6796 case 2: 6797 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 6798 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 6799 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 6800 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 6801 break; 6802 } 6803 break; 6804 case 184: /* CP Privileged reg access */ 6805 DRM_ERROR("Illegal register access in command stream\n"); 6806 /* XXX check the bitfield order! */ 6807 me_id = (ring_id & 0x60) >> 5; 6808 pipe_id = (ring_id & 0x18) >> 3; 6809 queue_id = (ring_id & 0x7) >> 0; 6810 switch (me_id) { 6811 case 0: 6812 /* This results in a full GPU reset, but all we need to do is soft 6813 * reset the CP for gfx 6814 */ 6815 queue_reset = true; 6816 break; 6817 case 1: 6818 /* XXX compute */ 6819 queue_reset = true; 6820 break; 6821 case 2: 6822 /* XXX compute */ 6823 queue_reset = true; 6824 break; 6825 } 6826 break; 6827 case 185: /* CP Privileged inst */ 6828 DRM_ERROR("Illegal instruction in command stream\n"); 6829 /* XXX check the bitfield order! */ 6830 me_id = (ring_id & 0x60) >> 5; 6831 pipe_id = (ring_id & 0x18) >> 3; 6832 queue_id = (ring_id & 0x7) >> 0; 6833 switch (me_id) { 6834 case 0: 6835 /* This results in a full GPU reset, but all we need to do is soft 6836 * reset the CP for gfx 6837 */ 6838 queue_reset = true; 6839 break; 6840 case 1: 6841 /* XXX compute */ 6842 queue_reset = true; 6843 break; 6844 case 2: 6845 /* XXX compute */ 6846 queue_reset = true; 6847 break; 6848 } 6849 break; 6850 case 224: /* SDMA trap event */ 6851 /* XXX check the bitfield order! */ 6852 me_id = (ring_id & 0x3) >> 0; 6853 queue_id = (ring_id & 0xc) >> 2; 6854 DRM_DEBUG("IH: SDMA trap\n"); 6855 switch (me_id) { 6856 case 0: 6857 switch (queue_id) { 6858 case 0: 6859 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 6860 break; 6861 case 1: 6862 /* XXX compute */ 6863 break; 6864 case 2: 6865 /* XXX compute */ 6866 break; 6867 } 6868 break; 6869 case 1: 6870 switch (queue_id) { 6871 case 0: 6872 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 6873 break; 6874 case 1: 6875 /* XXX compute */ 6876 break; 6877 case 2: 6878 /* XXX compute */ 6879 break; 6880 } 6881 break; 6882 } 6883 break; 6884 case 230: /* thermal low to high */ 6885 DRM_DEBUG("IH: thermal low to high\n"); 6886 rdev->pm.dpm.thermal.high_to_low = false; 6887 queue_thermal = true; 6888 break; 6889 case 231: /* thermal high to low */ 6890 DRM_DEBUG("IH: thermal high to low\n"); 6891 rdev->pm.dpm.thermal.high_to_low = true; 6892 queue_thermal = true; 6893 break; 6894 case 233: /* GUI IDLE */ 6895 DRM_DEBUG("IH: GUI idle\n"); 6896 break; 6897 case 241: /* SDMA Privileged inst */ 6898 case 247: /* SDMA Privileged inst */ 6899 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 6900 /* XXX check the bitfield order! */ 6901 me_id = (ring_id & 0x3) >> 0; 6902 queue_id = (ring_id & 0xc) >> 2; 6903 switch (me_id) { 6904 case 0: 6905 switch (queue_id) { 6906 case 0: 6907 queue_reset = true; 6908 break; 6909 case 1: 6910 /* XXX compute */ 6911 queue_reset = true; 6912 break; 6913 case 2: 6914 /* XXX compute */ 6915 queue_reset = true; 6916 break; 6917 } 6918 break; 6919 case 1: 6920 switch (queue_id) { 6921 case 0: 6922 queue_reset = true; 6923 break; 6924 case 1: 6925 /* XXX compute */ 6926 queue_reset = true; 6927 break; 6928 case 2: 6929 /* XXX compute */ 6930 queue_reset = true; 6931 break; 6932 } 6933 break; 6934 } 6935 break; 6936 default: 6937 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6938 break; 6939 } 6940 6941 /* wptr/rptr are in bytes! */ 6942 rptr += 16; 6943 rptr &= rdev->ih.ptr_mask; 6944 } 6945 if (queue_hotplug) 6946 schedule_work(&rdev->hotplug_work); 6947 if (queue_reset) 6948 schedule_work(&rdev->reset_work); 6949 if (queue_thermal) 6950 schedule_work(&rdev->pm.dpm.thermal.work); 6951 rdev->ih.rptr = rptr; 6952 WREG32(IH_RB_RPTR, rdev->ih.rptr); 6953 atomic_set(&rdev->ih.lock, 0); 6954 6955 /* make sure wptr hasn't changed while processing */ 6956 wptr = cik_get_ih_wptr(rdev); 6957 if (wptr != rptr) 6958 goto restart_ih; 6959 6960 return IRQ_HANDLED; 6961 } 6962 6963 /* 6964 * startup/shutdown callbacks 6965 */ 6966 /** 6967 * cik_startup - program the asic to a functional state 6968 * 6969 * @rdev: radeon_device pointer 6970 * 6971 * Programs the asic to a functional state (CIK). 6972 * Called by cik_init() and cik_resume(). 6973 * Returns 0 for success, error for failure. 6974 */ 6975 static int cik_startup(struct radeon_device *rdev) 6976 { 6977 struct radeon_ring *ring; 6978 int r; 6979 6980 /* enable pcie gen2/3 link */ 6981 cik_pcie_gen3_enable(rdev); 6982 /* enable aspm */ 6983 cik_program_aspm(rdev); 6984 6985 /* scratch needs to be initialized before MC */ 6986 r = r600_vram_scratch_init(rdev); 6987 if (r) 6988 return r; 6989 6990 cik_mc_program(rdev); 6991 6992 if (rdev->flags & RADEON_IS_IGP) { 6993 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 6994 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 6995 r = cik_init_microcode(rdev); 6996 if (r) { 6997 DRM_ERROR("Failed to load firmware!\n"); 6998 return r; 6999 } 7000 } 7001 } else { 7002 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 7003 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 7004 !rdev->mc_fw) { 7005 r = cik_init_microcode(rdev); 7006 if (r) { 7007 DRM_ERROR("Failed to load firmware!\n"); 7008 return r; 7009 } 7010 } 7011 7012 r = ci_mc_load_microcode(rdev); 7013 if (r) { 7014 DRM_ERROR("Failed to load MC firmware!\n"); 7015 return r; 7016 } 7017 } 7018 7019 r = cik_pcie_gart_enable(rdev); 7020 if (r) 7021 return r; 7022 cik_gpu_init(rdev); 7023 7024 /* allocate rlc buffers */ 7025 if (rdev->flags & RADEON_IS_IGP) { 7026 if (rdev->family == CHIP_KAVERI) { 7027 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; 7028 rdev->rlc.reg_list_size = 7029 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); 7030 } else { 7031 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; 7032 rdev->rlc.reg_list_size = 7033 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); 7034 } 7035 } 7036 rdev->rlc.cs_data = ci_cs_data; 7037 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; 7038 r = sumo_rlc_init(rdev); 7039 if (r) { 7040 DRM_ERROR("Failed to init rlc BOs!\n"); 7041 return r; 7042 } 7043 7044 /* allocate wb buffer */ 7045 r = radeon_wb_init(rdev); 7046 if (r) 7047 return r; 7048 7049 /* allocate mec buffers */ 7050 r = cik_mec_init(rdev); 7051 if (r) { 7052 DRM_ERROR("Failed to init MEC BOs!\n"); 7053 return r; 7054 } 7055 7056 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 7057 if (r) { 7058 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7059 return r; 7060 } 7061 7062 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 7063 if (r) { 7064 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7065 return r; 7066 } 7067 7068 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 7069 if (r) { 7070 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7071 return r; 7072 } 7073 7074 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 7075 if (r) { 7076 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7077 return r; 7078 } 7079 7080 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 7081 if (r) { 7082 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7083 return r; 7084 } 7085 7086 r = radeon_uvd_resume(rdev); 7087 if (!r) { 7088 r = uvd_v4_2_resume(rdev); 7089 if (!r) { 7090 r = radeon_fence_driver_start_ring(rdev, 7091 R600_RING_TYPE_UVD_INDEX); 7092 if (r) 7093 dev_err(rdev->dev, "UVD fences init error (%d).\n", r); 7094 } 7095 } 7096 if (r) 7097 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 7098 7099 /* Enable IRQ */ 7100 if (!rdev->irq.installed) { 7101 r = radeon_irq_kms_init(rdev); 7102 if (r) 7103 return r; 7104 } 7105 7106 r = cik_irq_init(rdev); 7107 if (r) { 7108 DRM_ERROR("radeon: IH init failed (%d).\n", r); 7109 radeon_irq_kms_fini(rdev); 7110 return r; 7111 } 7112 cik_irq_set(rdev); 7113 7114 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7115 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 7116 CP_RB0_RPTR, CP_RB0_WPTR, 7117 RADEON_CP_PACKET2); 7118 if (r) 7119 return r; 7120 7121 /* set up the compute queues */ 7122 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7123 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7124 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 7125 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7126 PACKET3(PACKET3_NOP, 0x3FFF)); 7127 if (r) 7128 return r; 7129 ring->me = 1; /* first MEC */ 7130 ring->pipe = 0; /* first pipe */ 7131 ring->queue = 0; /* first queue */ 7132 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 7133 7134 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7135 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7136 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 7137 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7138 PACKET3(PACKET3_NOP, 0x3FFF)); 7139 if (r) 7140 return r; 7141 /* dGPU only have 1 MEC */ 7142 ring->me = 1; /* first MEC */ 7143 ring->pipe = 0; /* first pipe */ 7144 ring->queue = 1; /* second queue */ 7145 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 7146 7147 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7148 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 7149 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, 7150 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, 7151 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7152 if (r) 7153 return r; 7154 7155 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7156 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 7157 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, 7158 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, 7159 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7160 if (r) 7161 return r; 7162 7163 r = cik_cp_resume(rdev); 7164 if (r) 7165 return r; 7166 7167 r = cik_sdma_resume(rdev); 7168 if (r) 7169 return r; 7170 7171 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7172 if (ring->ring_size) { 7173 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 7174 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 7175 RADEON_CP_PACKET2); 7176 if (!r) 7177 r = uvd_v1_0_init(rdev); 7178 if (r) 7179 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 7180 } 7181 7182 r = radeon_ib_pool_init(rdev); 7183 if (r) { 7184 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 7185 return r; 7186 } 7187 7188 r = radeon_vm_manager_init(rdev); 7189 if (r) { 7190 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 7191 return r; 7192 } 7193 7194 r = dce6_audio_init(rdev); 7195 if (r) 7196 return r; 7197 7198 return 0; 7199 } 7200 7201 /** 7202 * cik_resume - resume the asic to a functional state 7203 * 7204 * @rdev: radeon_device pointer 7205 * 7206 * Programs the asic to a functional state (CIK). 7207 * Called at resume. 7208 * Returns 0 for success, error for failure. 7209 */ 7210 int cik_resume(struct radeon_device *rdev) 7211 { 7212 int r; 7213 7214 /* post card */ 7215 atom_asic_init(rdev->mode_info.atom_context); 7216 7217 /* init golden registers */ 7218 cik_init_golden_registers(rdev); 7219 7220 rdev->accel_working = true; 7221 r = cik_startup(rdev); 7222 if (r) { 7223 DRM_ERROR("cik startup failed on resume\n"); 7224 rdev->accel_working = false; 7225 return r; 7226 } 7227 7228 return r; 7229 7230 } 7231 7232 /** 7233 * cik_suspend - suspend the asic 7234 * 7235 * @rdev: radeon_device pointer 7236 * 7237 * Bring the chip into a state suitable for suspend (CIK). 7238 * Called at suspend. 7239 * Returns 0 for success. 7240 */ 7241 int cik_suspend(struct radeon_device *rdev) 7242 { 7243 dce6_audio_fini(rdev); 7244 radeon_vm_manager_fini(rdev); 7245 cik_cp_enable(rdev, false); 7246 cik_sdma_enable(rdev, false); 7247 uvd_v1_0_fini(rdev); 7248 radeon_uvd_suspend(rdev); 7249 cik_fini_pg(rdev); 7250 cik_fini_cg(rdev); 7251 cik_irq_suspend(rdev); 7252 radeon_wb_disable(rdev); 7253 cik_pcie_gart_disable(rdev); 7254 return 0; 7255 } 7256 7257 /* Plan is to move initialization in that function and use 7258 * helper function so that radeon_device_init pretty much 7259 * do nothing more than calling asic specific function. This 7260 * should also allow to remove a bunch of callback function 7261 * like vram_info. 7262 */ 7263 /** 7264 * cik_init - asic specific driver and hw init 7265 * 7266 * @rdev: radeon_device pointer 7267 * 7268 * Setup asic specific driver variables and program the hw 7269 * to a functional state (CIK). 7270 * Called at driver startup. 7271 * Returns 0 for success, errors for failure. 7272 */ 7273 int cik_init(struct radeon_device *rdev) 7274 { 7275 struct radeon_ring *ring; 7276 int r; 7277 7278 /* Read BIOS */ 7279 if (!radeon_get_bios(rdev)) { 7280 if (ASIC_IS_AVIVO(rdev)) 7281 return -EINVAL; 7282 } 7283 /* Must be an ATOMBIOS */ 7284 if (!rdev->is_atom_bios) { 7285 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 7286 return -EINVAL; 7287 } 7288 r = radeon_atombios_init(rdev); 7289 if (r) 7290 return r; 7291 7292 /* Post card if necessary */ 7293 if (!radeon_card_posted(rdev)) { 7294 if (!rdev->bios) { 7295 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 7296 return -EINVAL; 7297 } 7298 DRM_INFO("GPU not posted. posting now...\n"); 7299 atom_asic_init(rdev->mode_info.atom_context); 7300 } 7301 /* init golden registers */ 7302 cik_init_golden_registers(rdev); 7303 /* Initialize scratch registers */ 7304 cik_scratch_init(rdev); 7305 /* Initialize surface registers */ 7306 radeon_surface_init(rdev); 7307 /* Initialize clocks */ 7308 radeon_get_clock_info(rdev->ddev); 7309 7310 /* Fence driver */ 7311 r = radeon_fence_driver_init(rdev); 7312 if (r) 7313 return r; 7314 7315 /* initialize memory controller */ 7316 r = cik_mc_init(rdev); 7317 if (r) 7318 return r; 7319 /* Memory manager */ 7320 r = radeon_bo_init(rdev); 7321 if (r) 7322 return r; 7323 7324 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7325 ring->ring_obj = NULL; 7326 r600_ring_init(rdev, ring, 1024 * 1024); 7327 7328 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7329 ring->ring_obj = NULL; 7330 r600_ring_init(rdev, ring, 1024 * 1024); 7331 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7332 if (r) 7333 return r; 7334 7335 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7336 ring->ring_obj = NULL; 7337 r600_ring_init(rdev, ring, 1024 * 1024); 7338 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7339 if (r) 7340 return r; 7341 7342 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7343 ring->ring_obj = NULL; 7344 r600_ring_init(rdev, ring, 256 * 1024); 7345 7346 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7347 ring->ring_obj = NULL; 7348 r600_ring_init(rdev, ring, 256 * 1024); 7349 7350 r = radeon_uvd_init(rdev); 7351 if (!r) { 7352 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7353 ring->ring_obj = NULL; 7354 r600_ring_init(rdev, ring, 4096); 7355 } 7356 7357 rdev->ih.ring_obj = NULL; 7358 r600_ih_ring_init(rdev, 64 * 1024); 7359 7360 r = r600_pcie_gart_init(rdev); 7361 if (r) 7362 return r; 7363 7364 rdev->accel_working = true; 7365 r = cik_startup(rdev); 7366 if (r) { 7367 dev_err(rdev->dev, "disabling GPU acceleration\n"); 7368 cik_cp_fini(rdev); 7369 cik_sdma_fini(rdev); 7370 cik_irq_fini(rdev); 7371 sumo_rlc_fini(rdev); 7372 cik_mec_fini(rdev); 7373 radeon_wb_fini(rdev); 7374 radeon_ib_pool_fini(rdev); 7375 radeon_vm_manager_fini(rdev); 7376 radeon_irq_kms_fini(rdev); 7377 cik_pcie_gart_fini(rdev); 7378 rdev->accel_working = false; 7379 } 7380 7381 /* Don't start up if the MC ucode is missing. 7382 * The default clocks and voltages before the MC ucode 7383 * is loaded are not suffient for advanced operations. 7384 */ 7385 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 7386 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 7387 return -EINVAL; 7388 } 7389 7390 return 0; 7391 } 7392 7393 /** 7394 * cik_fini - asic specific driver and hw fini 7395 * 7396 * @rdev: radeon_device pointer 7397 * 7398 * Tear down the asic specific driver variables and program the hw 7399 * to an idle state (CIK). 7400 * Called at driver unload. 7401 */ 7402 void cik_fini(struct radeon_device *rdev) 7403 { 7404 cik_cp_fini(rdev); 7405 cik_sdma_fini(rdev); 7406 cik_fini_pg(rdev); 7407 cik_fini_cg(rdev); 7408 cik_irq_fini(rdev); 7409 sumo_rlc_fini(rdev); 7410 cik_mec_fini(rdev); 7411 radeon_wb_fini(rdev); 7412 radeon_vm_manager_fini(rdev); 7413 radeon_ib_pool_fini(rdev); 7414 radeon_irq_kms_fini(rdev); 7415 uvd_v1_0_fini(rdev); 7416 radeon_uvd_fini(rdev); 7417 cik_pcie_gart_fini(rdev); 7418 r600_vram_scratch_fini(rdev); 7419 radeon_gem_fini(rdev); 7420 radeon_fence_driver_fini(rdev); 7421 radeon_bo_fini(rdev); 7422 radeon_atombios_fini(rdev); 7423 kfree(rdev->bios); 7424 rdev->bios = NULL; 7425 } 7426 7427 /* display watermark setup */ 7428 /** 7429 * dce8_line_buffer_adjust - Set up the line buffer 7430 * 7431 * @rdev: radeon_device pointer 7432 * @radeon_crtc: the selected display controller 7433 * @mode: the current display mode on the selected display 7434 * controller 7435 * 7436 * Setup up the line buffer allocation for 7437 * the selected display controller (CIK). 7438 * Returns the line buffer size in pixels. 7439 */ 7440 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 7441 struct radeon_crtc *radeon_crtc, 7442 struct drm_display_mode *mode) 7443 { 7444 u32 tmp, buffer_alloc, i; 7445 u32 pipe_offset = radeon_crtc->crtc_id * 0x20; 7446 /* 7447 * Line Buffer Setup 7448 * There are 6 line buffers, one for each display controllers. 7449 * There are 3 partitions per LB. Select the number of partitions 7450 * to enable based on the display width. For display widths larger 7451 * than 4096, you need use to use 2 display controllers and combine 7452 * them using the stereo blender. 7453 */ 7454 if (radeon_crtc->base.enabled && mode) { 7455 if (mode->crtc_hdisplay < 1920) { 7456 tmp = 1; 7457 buffer_alloc = 2; 7458 } else if (mode->crtc_hdisplay < 2560) { 7459 tmp = 2; 7460 buffer_alloc = 2; 7461 } else if (mode->crtc_hdisplay < 4096) { 7462 tmp = 0; 7463 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7464 } else { 7465 DRM_DEBUG_KMS("Mode too big for LB!\n"); 7466 tmp = 0; 7467 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7468 } 7469 } else { 7470 tmp = 1; 7471 buffer_alloc = 0; 7472 } 7473 7474 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 7475 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 7476 7477 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, 7478 DMIF_BUFFERS_ALLOCATED(buffer_alloc)); 7479 for (i = 0; i < rdev->usec_timeout; i++) { 7480 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & 7481 DMIF_BUFFERS_ALLOCATED_COMPLETED) 7482 break; 7483 udelay(1); 7484 } 7485 7486 if (radeon_crtc->base.enabled && mode) { 7487 switch (tmp) { 7488 case 0: 7489 default: 7490 return 4096 * 2; 7491 case 1: 7492 return 1920 * 2; 7493 case 2: 7494 return 2560 * 2; 7495 } 7496 } 7497 7498 /* controller not enabled, so no lb used */ 7499 return 0; 7500 } 7501 7502 /** 7503 * cik_get_number_of_dram_channels - get the number of dram channels 7504 * 7505 * @rdev: radeon_device pointer 7506 * 7507 * Look up the number of video ram channels (CIK). 7508 * Used for display watermark bandwidth calculations 7509 * Returns the number of dram channels 7510 */ 7511 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 7512 { 7513 u32 tmp = RREG32(MC_SHARED_CHMAP); 7514 7515 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 7516 case 0: 7517 default: 7518 return 1; 7519 case 1: 7520 return 2; 7521 case 2: 7522 return 4; 7523 case 3: 7524 return 8; 7525 case 4: 7526 return 3; 7527 case 5: 7528 return 6; 7529 case 6: 7530 return 10; 7531 case 7: 7532 return 12; 7533 case 8: 7534 return 16; 7535 } 7536 } 7537 7538 struct dce8_wm_params { 7539 u32 dram_channels; /* number of dram channels */ 7540 u32 yclk; /* bandwidth per dram data pin in kHz */ 7541 u32 sclk; /* engine clock in kHz */ 7542 u32 disp_clk; /* display clock in kHz */ 7543 u32 src_width; /* viewport width */ 7544 u32 active_time; /* active display time in ns */ 7545 u32 blank_time; /* blank time in ns */ 7546 bool interlaced; /* mode is interlaced */ 7547 fixed20_12 vsc; /* vertical scale ratio */ 7548 u32 num_heads; /* number of active crtcs */ 7549 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 7550 u32 lb_size; /* line buffer allocated to pipe */ 7551 u32 vtaps; /* vertical scaler taps */ 7552 }; 7553 7554 /** 7555 * dce8_dram_bandwidth - get the dram bandwidth 7556 * 7557 * @wm: watermark calculation data 7558 * 7559 * Calculate the raw dram bandwidth (CIK). 7560 * Used for display watermark bandwidth calculations 7561 * Returns the dram bandwidth in MBytes/s 7562 */ 7563 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 7564 { 7565 /* Calculate raw DRAM Bandwidth */ 7566 fixed20_12 dram_efficiency; /* 0.7 */ 7567 fixed20_12 yclk, dram_channels, bandwidth; 7568 fixed20_12 a; 7569 7570 a.full = dfixed_const(1000); 7571 yclk.full = dfixed_const(wm->yclk); 7572 yclk.full = dfixed_div(yclk, a); 7573 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7574 a.full = dfixed_const(10); 7575 dram_efficiency.full = dfixed_const(7); 7576 dram_efficiency.full = dfixed_div(dram_efficiency, a); 7577 bandwidth.full = dfixed_mul(dram_channels, yclk); 7578 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 7579 7580 return dfixed_trunc(bandwidth); 7581 } 7582 7583 /** 7584 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 7585 * 7586 * @wm: watermark calculation data 7587 * 7588 * Calculate the dram bandwidth used for display (CIK). 7589 * Used for display watermark bandwidth calculations 7590 * Returns the dram bandwidth for display in MBytes/s 7591 */ 7592 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7593 { 7594 /* Calculate DRAM Bandwidth and the part allocated to display. */ 7595 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 7596 fixed20_12 yclk, dram_channels, bandwidth; 7597 fixed20_12 a; 7598 7599 a.full = dfixed_const(1000); 7600 yclk.full = dfixed_const(wm->yclk); 7601 yclk.full = dfixed_div(yclk, a); 7602 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7603 a.full = dfixed_const(10); 7604 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 7605 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 7606 bandwidth.full = dfixed_mul(dram_channels, yclk); 7607 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 7608 7609 return dfixed_trunc(bandwidth); 7610 } 7611 7612 /** 7613 * dce8_data_return_bandwidth - get the data return bandwidth 7614 * 7615 * @wm: watermark calculation data 7616 * 7617 * Calculate the data return bandwidth used for display (CIK). 7618 * Used for display watermark bandwidth calculations 7619 * Returns the data return bandwidth in MBytes/s 7620 */ 7621 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 7622 { 7623 /* Calculate the display Data return Bandwidth */ 7624 fixed20_12 return_efficiency; /* 0.8 */ 7625 fixed20_12 sclk, bandwidth; 7626 fixed20_12 a; 7627 7628 a.full = dfixed_const(1000); 7629 sclk.full = dfixed_const(wm->sclk); 7630 sclk.full = dfixed_div(sclk, a); 7631 a.full = dfixed_const(10); 7632 return_efficiency.full = dfixed_const(8); 7633 return_efficiency.full = dfixed_div(return_efficiency, a); 7634 a.full = dfixed_const(32); 7635 bandwidth.full = dfixed_mul(a, sclk); 7636 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 7637 7638 return dfixed_trunc(bandwidth); 7639 } 7640 7641 /** 7642 * dce8_dmif_request_bandwidth - get the dmif bandwidth 7643 * 7644 * @wm: watermark calculation data 7645 * 7646 * Calculate the dmif bandwidth used for display (CIK). 7647 * Used for display watermark bandwidth calculations 7648 * Returns the dmif bandwidth in MBytes/s 7649 */ 7650 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 7651 { 7652 /* Calculate the DMIF Request Bandwidth */ 7653 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 7654 fixed20_12 disp_clk, bandwidth; 7655 fixed20_12 a, b; 7656 7657 a.full = dfixed_const(1000); 7658 disp_clk.full = dfixed_const(wm->disp_clk); 7659 disp_clk.full = dfixed_div(disp_clk, a); 7660 a.full = dfixed_const(32); 7661 b.full = dfixed_mul(a, disp_clk); 7662 7663 a.full = dfixed_const(10); 7664 disp_clk_request_efficiency.full = dfixed_const(8); 7665 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 7666 7667 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 7668 7669 return dfixed_trunc(bandwidth); 7670 } 7671 7672 /** 7673 * dce8_available_bandwidth - get the min available bandwidth 7674 * 7675 * @wm: watermark calculation data 7676 * 7677 * Calculate the min available bandwidth used for display (CIK). 7678 * Used for display watermark bandwidth calculations 7679 * Returns the min available bandwidth in MBytes/s 7680 */ 7681 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 7682 { 7683 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 7684 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 7685 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 7686 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 7687 7688 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 7689 } 7690 7691 /** 7692 * dce8_average_bandwidth - get the average available bandwidth 7693 * 7694 * @wm: watermark calculation data 7695 * 7696 * Calculate the average available bandwidth used for display (CIK). 7697 * Used for display watermark bandwidth calculations 7698 * Returns the average available bandwidth in MBytes/s 7699 */ 7700 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 7701 { 7702 /* Calculate the display mode Average Bandwidth 7703 * DisplayMode should contain the source and destination dimensions, 7704 * timing, etc. 7705 */ 7706 fixed20_12 bpp; 7707 fixed20_12 line_time; 7708 fixed20_12 src_width; 7709 fixed20_12 bandwidth; 7710 fixed20_12 a; 7711 7712 a.full = dfixed_const(1000); 7713 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 7714 line_time.full = dfixed_div(line_time, a); 7715 bpp.full = dfixed_const(wm->bytes_per_pixel); 7716 src_width.full = dfixed_const(wm->src_width); 7717 bandwidth.full = dfixed_mul(src_width, bpp); 7718 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 7719 bandwidth.full = dfixed_div(bandwidth, line_time); 7720 7721 return dfixed_trunc(bandwidth); 7722 } 7723 7724 /** 7725 * dce8_latency_watermark - get the latency watermark 7726 * 7727 * @wm: watermark calculation data 7728 * 7729 * Calculate the latency watermark (CIK). 7730 * Used for display watermark bandwidth calculations 7731 * Returns the latency watermark in ns 7732 */ 7733 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 7734 { 7735 /* First calculate the latency in ns */ 7736 u32 mc_latency = 2000; /* 2000 ns. */ 7737 u32 available_bandwidth = dce8_available_bandwidth(wm); 7738 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 7739 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 7740 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 7741 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 7742 (wm->num_heads * cursor_line_pair_return_time); 7743 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 7744 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 7745 u32 tmp, dmif_size = 12288; 7746 fixed20_12 a, b, c; 7747 7748 if (wm->num_heads == 0) 7749 return 0; 7750 7751 a.full = dfixed_const(2); 7752 b.full = dfixed_const(1); 7753 if ((wm->vsc.full > a.full) || 7754 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 7755 (wm->vtaps >= 5) || 7756 ((wm->vsc.full >= a.full) && wm->interlaced)) 7757 max_src_lines_per_dst_line = 4; 7758 else 7759 max_src_lines_per_dst_line = 2; 7760 7761 a.full = dfixed_const(available_bandwidth); 7762 b.full = dfixed_const(wm->num_heads); 7763 a.full = dfixed_div(a, b); 7764 7765 b.full = dfixed_const(mc_latency + 512); 7766 c.full = dfixed_const(wm->disp_clk); 7767 b.full = dfixed_div(b, c); 7768 7769 c.full = dfixed_const(dmif_size); 7770 b.full = dfixed_div(c, b); 7771 7772 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 7773 7774 b.full = dfixed_const(1000); 7775 c.full = dfixed_const(wm->disp_clk); 7776 b.full = dfixed_div(c, b); 7777 c.full = dfixed_const(wm->bytes_per_pixel); 7778 b.full = dfixed_mul(b, c); 7779 7780 lb_fill_bw = min(tmp, dfixed_trunc(b)); 7781 7782 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 7783 b.full = dfixed_const(1000); 7784 c.full = dfixed_const(lb_fill_bw); 7785 b.full = dfixed_div(c, b); 7786 a.full = dfixed_div(a, b); 7787 line_fill_time = dfixed_trunc(a); 7788 7789 if (line_fill_time < wm->active_time) 7790 return latency; 7791 else 7792 return latency + (line_fill_time - wm->active_time); 7793 7794 } 7795 7796 /** 7797 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 7798 * average and available dram bandwidth 7799 * 7800 * @wm: watermark calculation data 7801 * 7802 * Check if the display average bandwidth fits in the display 7803 * dram bandwidth (CIK). 7804 * Used for display watermark bandwidth calculations 7805 * Returns true if the display fits, false if not. 7806 */ 7807 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7808 { 7809 if (dce8_average_bandwidth(wm) <= 7810 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 7811 return true; 7812 else 7813 return false; 7814 } 7815 7816 /** 7817 * dce8_average_bandwidth_vs_available_bandwidth - check 7818 * average and available bandwidth 7819 * 7820 * @wm: watermark calculation data 7821 * 7822 * Check if the display average bandwidth fits in the display 7823 * available bandwidth (CIK). 7824 * Used for display watermark bandwidth calculations 7825 * Returns true if the display fits, false if not. 7826 */ 7827 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 7828 { 7829 if (dce8_average_bandwidth(wm) <= 7830 (dce8_available_bandwidth(wm) / wm->num_heads)) 7831 return true; 7832 else 7833 return false; 7834 } 7835 7836 /** 7837 * dce8_check_latency_hiding - check latency hiding 7838 * 7839 * @wm: watermark calculation data 7840 * 7841 * Check latency hiding (CIK). 7842 * Used for display watermark bandwidth calculations 7843 * Returns true if the display fits, false if not. 7844 */ 7845 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 7846 { 7847 u32 lb_partitions = wm->lb_size / wm->src_width; 7848 u32 line_time = wm->active_time + wm->blank_time; 7849 u32 latency_tolerant_lines; 7850 u32 latency_hiding; 7851 fixed20_12 a; 7852 7853 a.full = dfixed_const(1); 7854 if (wm->vsc.full > a.full) 7855 latency_tolerant_lines = 1; 7856 else { 7857 if (lb_partitions <= (wm->vtaps + 1)) 7858 latency_tolerant_lines = 1; 7859 else 7860 latency_tolerant_lines = 2; 7861 } 7862 7863 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 7864 7865 if (dce8_latency_watermark(wm) <= latency_hiding) 7866 return true; 7867 else 7868 return false; 7869 } 7870 7871 /** 7872 * dce8_program_watermarks - program display watermarks 7873 * 7874 * @rdev: radeon_device pointer 7875 * @radeon_crtc: the selected display controller 7876 * @lb_size: line buffer size 7877 * @num_heads: number of display controllers in use 7878 * 7879 * Calculate and program the display watermarks for the 7880 * selected display controller (CIK). 7881 */ 7882 static void dce8_program_watermarks(struct radeon_device *rdev, 7883 struct radeon_crtc *radeon_crtc, 7884 u32 lb_size, u32 num_heads) 7885 { 7886 struct drm_display_mode *mode = &radeon_crtc->base.mode; 7887 struct dce8_wm_params wm_low, wm_high; 7888 u32 pixel_period; 7889 u32 line_time = 0; 7890 u32 latency_watermark_a = 0, latency_watermark_b = 0; 7891 u32 tmp, wm_mask; 7892 7893 if (radeon_crtc->base.enabled && num_heads && mode) { 7894 pixel_period = 1000000 / (u32)mode->clock; 7895 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 7896 7897 /* watermark for high clocks */ 7898 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7899 rdev->pm.dpm_enabled) { 7900 wm_high.yclk = 7901 radeon_dpm_get_mclk(rdev, false) * 10; 7902 wm_high.sclk = 7903 radeon_dpm_get_sclk(rdev, false) * 10; 7904 } else { 7905 wm_high.yclk = rdev->pm.current_mclk * 10; 7906 wm_high.sclk = rdev->pm.current_sclk * 10; 7907 } 7908 7909 wm_high.disp_clk = mode->clock; 7910 wm_high.src_width = mode->crtc_hdisplay; 7911 wm_high.active_time = mode->crtc_hdisplay * pixel_period; 7912 wm_high.blank_time = line_time - wm_high.active_time; 7913 wm_high.interlaced = false; 7914 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7915 wm_high.interlaced = true; 7916 wm_high.vsc = radeon_crtc->vsc; 7917 wm_high.vtaps = 1; 7918 if (radeon_crtc->rmx_type != RMX_OFF) 7919 wm_high.vtaps = 2; 7920 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7921 wm_high.lb_size = lb_size; 7922 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); 7923 wm_high.num_heads = num_heads; 7924 7925 /* set for high clocks */ 7926 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); 7927 7928 /* possibly force display priority to high */ 7929 /* should really do this at mode validation time... */ 7930 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || 7931 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || 7932 !dce8_check_latency_hiding(&wm_high) || 7933 (rdev->disp_priority == 2)) { 7934 DRM_DEBUG_KMS("force priority to high\n"); 7935 } 7936 7937 /* watermark for low clocks */ 7938 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7939 rdev->pm.dpm_enabled) { 7940 wm_low.yclk = 7941 radeon_dpm_get_mclk(rdev, true) * 10; 7942 wm_low.sclk = 7943 radeon_dpm_get_sclk(rdev, true) * 10; 7944 } else { 7945 wm_low.yclk = rdev->pm.current_mclk * 10; 7946 wm_low.sclk = rdev->pm.current_sclk * 10; 7947 } 7948 7949 wm_low.disp_clk = mode->clock; 7950 wm_low.src_width = mode->crtc_hdisplay; 7951 wm_low.active_time = mode->crtc_hdisplay * pixel_period; 7952 wm_low.blank_time = line_time - wm_low.active_time; 7953 wm_low.interlaced = false; 7954 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7955 wm_low.interlaced = true; 7956 wm_low.vsc = radeon_crtc->vsc; 7957 wm_low.vtaps = 1; 7958 if (radeon_crtc->rmx_type != RMX_OFF) 7959 wm_low.vtaps = 2; 7960 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7961 wm_low.lb_size = lb_size; 7962 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); 7963 wm_low.num_heads = num_heads; 7964 7965 /* set for low clocks */ 7966 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); 7967 7968 /* possibly force display priority to high */ 7969 /* should really do this at mode validation time... */ 7970 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || 7971 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || 7972 !dce8_check_latency_hiding(&wm_low) || 7973 (rdev->disp_priority == 2)) { 7974 DRM_DEBUG_KMS("force priority to high\n"); 7975 } 7976 } 7977 7978 /* select wm A */ 7979 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7980 tmp = wm_mask; 7981 tmp &= ~LATENCY_WATERMARK_MASK(3); 7982 tmp |= LATENCY_WATERMARK_MASK(1); 7983 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7984 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7985 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 7986 LATENCY_HIGH_WATERMARK(line_time))); 7987 /* select wm B */ 7988 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7989 tmp &= ~LATENCY_WATERMARK_MASK(3); 7990 tmp |= LATENCY_WATERMARK_MASK(2); 7991 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7992 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7993 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 7994 LATENCY_HIGH_WATERMARK(line_time))); 7995 /* restore original selection */ 7996 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 7997 7998 /* save values for DPM */ 7999 radeon_crtc->line_time = line_time; 8000 radeon_crtc->wm_high = latency_watermark_a; 8001 radeon_crtc->wm_low = latency_watermark_b; 8002 } 8003 8004 /** 8005 * dce8_bandwidth_update - program display watermarks 8006 * 8007 * @rdev: radeon_device pointer 8008 * 8009 * Calculate and program the display watermarks and line 8010 * buffer allocation (CIK). 8011 */ 8012 void dce8_bandwidth_update(struct radeon_device *rdev) 8013 { 8014 struct drm_display_mode *mode = NULL; 8015 u32 num_heads = 0, lb_size; 8016 int i; 8017 8018 radeon_update_display_priority(rdev); 8019 8020 for (i = 0; i < rdev->num_crtc; i++) { 8021 if (rdev->mode_info.crtcs[i]->base.enabled) 8022 num_heads++; 8023 } 8024 for (i = 0; i < rdev->num_crtc; i++) { 8025 mode = &rdev->mode_info.crtcs[i]->base.mode; 8026 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 8027 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 8028 } 8029 } 8030 8031 /** 8032 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 8033 * 8034 * @rdev: radeon_device pointer 8035 * 8036 * Fetches a GPU clock counter snapshot (SI). 8037 * Returns the 64 bit clock counter snapshot. 8038 */ 8039 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 8040 { 8041 uint64_t clock; 8042 8043 mutex_lock(&rdev->gpu_clock_mutex); 8044 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 8045 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 8046 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 8047 mutex_unlock(&rdev->gpu_clock_mutex); 8048 return clock; 8049 } 8050 8051 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 8052 u32 cntl_reg, u32 status_reg) 8053 { 8054 int r, i; 8055 struct atom_clock_dividers dividers; 8056 uint32_t tmp; 8057 8058 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 8059 clock, false, ÷rs); 8060 if (r) 8061 return r; 8062 8063 tmp = RREG32_SMC(cntl_reg); 8064 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 8065 tmp |= dividers.post_divider; 8066 WREG32_SMC(cntl_reg, tmp); 8067 8068 for (i = 0; i < 100; i++) { 8069 if (RREG32_SMC(status_reg) & DCLK_STATUS) 8070 break; 8071 mdelay(10); 8072 } 8073 if (i == 100) 8074 return -ETIMEDOUT; 8075 8076 return 0; 8077 } 8078 8079 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 8080 { 8081 int r = 0; 8082 8083 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 8084 if (r) 8085 return r; 8086 8087 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 8088 return r; 8089 } 8090 8091 static void cik_pcie_gen3_enable(struct radeon_device *rdev) 8092 { 8093 struct pci_dev *root = rdev->pdev->bus->self; 8094 int bridge_pos, gpu_pos; 8095 u32 speed_cntl, mask, current_data_rate; 8096 int ret, i; 8097 u16 tmp16; 8098 8099 if (radeon_pcie_gen2 == 0) 8100 return; 8101 8102 if (rdev->flags & RADEON_IS_IGP) 8103 return; 8104 8105 if (!(rdev->flags & RADEON_IS_PCIE)) 8106 return; 8107 8108 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); 8109 if (ret != 0) 8110 return; 8111 8112 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) 8113 return; 8114 8115 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8116 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> 8117 LC_CURRENT_DATA_RATE_SHIFT; 8118 if (mask & DRM_PCIE_SPEED_80) { 8119 if (current_data_rate == 2) { 8120 DRM_INFO("PCIE gen 3 link speeds already enabled\n"); 8121 return; 8122 } 8123 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); 8124 } else if (mask & DRM_PCIE_SPEED_50) { 8125 if (current_data_rate == 1) { 8126 DRM_INFO("PCIE gen 2 link speeds already enabled\n"); 8127 return; 8128 } 8129 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); 8130 } 8131 8132 bridge_pos = pci_pcie_cap(root); 8133 if (!bridge_pos) 8134 return; 8135 8136 gpu_pos = pci_pcie_cap(rdev->pdev); 8137 if (!gpu_pos) 8138 return; 8139 8140 if (mask & DRM_PCIE_SPEED_80) { 8141 /* re-try equalization if gen3 is not already enabled */ 8142 if (current_data_rate != 2) { 8143 u16 bridge_cfg, gpu_cfg; 8144 u16 bridge_cfg2, gpu_cfg2; 8145 u32 max_lw, current_lw, tmp; 8146 8147 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8148 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8149 8150 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; 8151 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8152 8153 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; 8154 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8155 8156 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8157 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; 8158 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; 8159 8160 if (current_lw < max_lw) { 8161 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8162 if (tmp & LC_RENEGOTIATION_SUPPORT) { 8163 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); 8164 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); 8165 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; 8166 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); 8167 } 8168 } 8169 8170 for (i = 0; i < 10; i++) { 8171 /* check status */ 8172 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); 8173 if (tmp16 & PCI_EXP_DEVSTA_TRPND) 8174 break; 8175 8176 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8177 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8178 8179 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); 8180 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); 8181 8182 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8183 tmp |= LC_SET_QUIESCE; 8184 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8185 8186 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8187 tmp |= LC_REDO_EQ; 8188 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8189 8190 mdelay(100); 8191 8192 /* linkctl */ 8193 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); 8194 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8195 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); 8196 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8197 8198 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); 8199 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8200 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); 8201 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8202 8203 /* linkctl2 */ 8204 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); 8205 tmp16 &= ~((1 << 4) | (7 << 9)); 8206 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); 8207 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); 8208 8209 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8210 tmp16 &= ~((1 << 4) | (7 << 9)); 8211 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); 8212 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8213 8214 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8215 tmp &= ~LC_SET_QUIESCE; 8216 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8217 } 8218 } 8219 } 8220 8221 /* set the link speed */ 8222 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; 8223 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; 8224 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8225 8226 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8227 tmp16 &= ~0xf; 8228 if (mask & DRM_PCIE_SPEED_80) 8229 tmp16 |= 3; /* gen3 */ 8230 else if (mask & DRM_PCIE_SPEED_50) 8231 tmp16 |= 2; /* gen2 */ 8232 else 8233 tmp16 |= 1; /* gen1 */ 8234 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8235 8236 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8237 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; 8238 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8239 8240 for (i = 0; i < rdev->usec_timeout; i++) { 8241 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8242 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) 8243 break; 8244 udelay(1); 8245 } 8246 } 8247 8248 static void cik_program_aspm(struct radeon_device *rdev) 8249 { 8250 u32 data, orig; 8251 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; 8252 bool disable_clkreq = false; 8253 8254 if (radeon_aspm == 0) 8255 return; 8256 8257 /* XXX double check IGPs */ 8258 if (rdev->flags & RADEON_IS_IGP) 8259 return; 8260 8261 if (!(rdev->flags & RADEON_IS_PCIE)) 8262 return; 8263 8264 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8265 data &= ~LC_XMIT_N_FTS_MASK; 8266 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; 8267 if (orig != data) 8268 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); 8269 8270 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); 8271 data |= LC_GO_TO_RECOVERY; 8272 if (orig != data) 8273 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); 8274 8275 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); 8276 data |= P_IGNORE_EDB_ERR; 8277 if (orig != data) 8278 WREG32_PCIE_PORT(PCIE_P_CNTL, data); 8279 8280 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8281 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); 8282 data |= LC_PMI_TO_L1_DIS; 8283 if (!disable_l0s) 8284 data |= LC_L0S_INACTIVITY(7); 8285 8286 if (!disable_l1) { 8287 data |= LC_L1_INACTIVITY(7); 8288 data &= ~LC_PMI_TO_L1_DIS; 8289 if (orig != data) 8290 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8291 8292 if (!disable_plloff_in_l1) { 8293 bool clk_req_support; 8294 8295 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); 8296 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8297 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8298 if (orig != data) 8299 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); 8300 8301 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); 8302 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8303 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8304 if (orig != data) 8305 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); 8306 8307 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); 8308 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8309 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8310 if (orig != data) 8311 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); 8312 8313 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); 8314 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8315 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8316 if (orig != data) 8317 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); 8318 8319 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8320 data &= ~LC_DYN_LANES_PWR_STATE_MASK; 8321 data |= LC_DYN_LANES_PWR_STATE(3); 8322 if (orig != data) 8323 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); 8324 8325 if (!disable_clkreq) { 8326 struct pci_dev *root = rdev->pdev->bus->self; 8327 u32 lnkcap; 8328 8329 clk_req_support = false; 8330 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); 8331 if (lnkcap & PCI_EXP_LNKCAP_CLKPM) 8332 clk_req_support = true; 8333 } else { 8334 clk_req_support = false; 8335 } 8336 8337 if (clk_req_support) { 8338 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); 8339 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; 8340 if (orig != data) 8341 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); 8342 8343 orig = data = RREG32_SMC(THM_CLK_CNTL); 8344 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); 8345 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); 8346 if (orig != data) 8347 WREG32_SMC(THM_CLK_CNTL, data); 8348 8349 orig = data = RREG32_SMC(MISC_CLK_CTRL); 8350 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); 8351 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); 8352 if (orig != data) 8353 WREG32_SMC(MISC_CLK_CTRL, data); 8354 8355 orig = data = RREG32_SMC(CG_CLKPIN_CNTL); 8356 data &= ~BCLK_AS_XCLK; 8357 if (orig != data) 8358 WREG32_SMC(CG_CLKPIN_CNTL, data); 8359 8360 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); 8361 data &= ~FORCE_BIF_REFCLK_EN; 8362 if (orig != data) 8363 WREG32_SMC(CG_CLKPIN_CNTL_2, data); 8364 8365 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); 8366 data &= ~MPLL_CLKOUT_SEL_MASK; 8367 data |= MPLL_CLKOUT_SEL(4); 8368 if (orig != data) 8369 WREG32_SMC(MPLL_BYPASSCLK_SEL, data); 8370 } 8371 } 8372 } else { 8373 if (orig != data) 8374 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8375 } 8376 8377 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 8378 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; 8379 if (orig != data) 8380 WREG32_PCIE_PORT(PCIE_CNTL2, data); 8381 8382 if (!disable_l0s) { 8383 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8384 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { 8385 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8386 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { 8387 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8388 data &= ~LC_L0S_INACTIVITY_MASK; 8389 if (orig != data) 8390 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8391 } 8392 } 8393 } 8394 } 8395