1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/slab.h> 26 #include <linux/module.h> 27 #include "drmP.h" 28 #include "radeon.h" 29 #include "radeon_asic.h" 30 #include "cikd.h" 31 #include "atom.h" 32 #include "cik_blit_shaders.h" 33 34 /* GFX */ 35 #define CIK_PFP_UCODE_SIZE 2144 36 #define CIK_ME_UCODE_SIZE 2144 37 #define CIK_CE_UCODE_SIZE 2144 38 /* compute */ 39 #define CIK_MEC_UCODE_SIZE 4192 40 /* interrupts */ 41 #define BONAIRE_RLC_UCODE_SIZE 2048 42 #define KB_RLC_UCODE_SIZE 2560 43 #define KV_RLC_UCODE_SIZE 2560 44 /* gddr controller */ 45 #define CIK_MC_UCODE_SIZE 7866 46 /* sdma */ 47 #define CIK_SDMA_UCODE_SIZE 1050 48 #define CIK_SDMA_UCODE_VERSION 64 49 50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 58 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 64 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 65 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 66 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 69 70 extern int r600_ih_ring_alloc(struct radeon_device *rdev); 71 extern void r600_ih_ring_fini(struct radeon_device *rdev); 72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save); 73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save); 74 extern bool evergreen_is_display_hung(struct radeon_device *rdev); 75 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 76 extern void si_rlc_fini(struct radeon_device *rdev); 77 extern int si_rlc_init(struct radeon_device *rdev); 78 static void cik_rlc_stop(struct radeon_device *rdev); 79 80 /* 81 * Indirect registers accessor 82 */ 83 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 84 { 85 u32 r; 86 87 WREG32(PCIE_INDEX, reg); 88 (void)RREG32(PCIE_INDEX); 89 r = RREG32(PCIE_DATA); 90 return r; 91 } 92 93 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 94 { 95 WREG32(PCIE_INDEX, reg); 96 (void)RREG32(PCIE_INDEX); 97 WREG32(PCIE_DATA, v); 98 (void)RREG32(PCIE_DATA); 99 } 100 101 static const u32 bonaire_golden_spm_registers[] = 102 { 103 0x30800, 0xe0ffffff, 0xe0000000 104 }; 105 106 static const u32 bonaire_golden_common_registers[] = 107 { 108 0xc770, 0xffffffff, 0x00000800, 109 0xc774, 0xffffffff, 0x00000800, 110 0xc798, 0xffffffff, 0x00007fbf, 111 0xc79c, 0xffffffff, 0x00007faf 112 }; 113 114 static const u32 bonaire_golden_registers[] = 115 { 116 0x3354, 0x00000333, 0x00000333, 117 0x3350, 0x000c0fc0, 0x00040200, 118 0x9a10, 0x00010000, 0x00058208, 119 0x3c000, 0xffff1fff, 0x00140000, 120 0x3c200, 0xfdfc0fff, 0x00000100, 121 0x3c234, 0x40000000, 0x40000200, 122 0x9830, 0xffffffff, 0x00000000, 123 0x9834, 0xf00fffff, 0x00000400, 124 0x9838, 0x0002021c, 0x00020200, 125 0xc78, 0x00000080, 0x00000000, 126 0x5bb0, 0x000000f0, 0x00000070, 127 0x5bc0, 0xf0311fff, 0x80300000, 128 0x98f8, 0x73773777, 0x12010001, 129 0x350c, 0x00810000, 0x408af000, 130 0x7030, 0x31000111, 0x00000011, 131 0x2f48, 0x73773777, 0x12010001, 132 0x220c, 0x00007fb6, 0x0021a1b1, 133 0x2210, 0x00007fb6, 0x002021b1, 134 0x2180, 0x00007fb6, 0x00002191, 135 0x2218, 0x00007fb6, 0x002121b1, 136 0x221c, 0x00007fb6, 0x002021b1, 137 0x21dc, 0x00007fb6, 0x00002191, 138 0x21e0, 0x00007fb6, 0x00002191, 139 0x3628, 0x0000003f, 0x0000000a, 140 0x362c, 0x0000003f, 0x0000000a, 141 0x2ae4, 0x00073ffe, 0x000022a2, 142 0x240c, 0x000007ff, 0x00000000, 143 0x8a14, 0xf000003f, 0x00000007, 144 0x8bf0, 0x00002001, 0x00000001, 145 0x8b24, 0xffffffff, 0x00ffffff, 146 0x30a04, 0x0000ff0f, 0x00000000, 147 0x28a4c, 0x07ffffff, 0x06000000, 148 0x4d8, 0x00000fff, 0x00000100, 149 0x3e78, 0x00000001, 0x00000002, 150 0x9100, 0x03000000, 0x0362c688, 151 0x8c00, 0x000000ff, 0x00000001, 152 0xe40, 0x00001fff, 0x00001fff, 153 0x9060, 0x0000007f, 0x00000020, 154 0x9508, 0x00010000, 0x00010000, 155 0xac14, 0x000003ff, 0x000000f3, 156 0xac0c, 0xffffffff, 0x00001032 157 }; 158 159 static const u32 bonaire_mgcg_cgcg_init[] = 160 { 161 0xc420, 0xffffffff, 0xfffffffc, 162 0x30800, 0xffffffff, 0xe0000000, 163 0x3c2a0, 0xffffffff, 0x00000100, 164 0x3c208, 0xffffffff, 0x00000100, 165 0x3c2c0, 0xffffffff, 0xc0000100, 166 0x3c2c8, 0xffffffff, 0xc0000100, 167 0x3c2c4, 0xffffffff, 0xc0000100, 168 0x55e4, 0xffffffff, 0x00600100, 169 0x3c280, 0xffffffff, 0x00000100, 170 0x3c214, 0xffffffff, 0x06000100, 171 0x3c220, 0xffffffff, 0x00000100, 172 0x3c218, 0xffffffff, 0x06000100, 173 0x3c204, 0xffffffff, 0x00000100, 174 0x3c2e0, 0xffffffff, 0x00000100, 175 0x3c224, 0xffffffff, 0x00000100, 176 0x3c200, 0xffffffff, 0x00000100, 177 0x3c230, 0xffffffff, 0x00000100, 178 0x3c234, 0xffffffff, 0x00000100, 179 0x3c250, 0xffffffff, 0x00000100, 180 0x3c254, 0xffffffff, 0x00000100, 181 0x3c258, 0xffffffff, 0x00000100, 182 0x3c25c, 0xffffffff, 0x00000100, 183 0x3c260, 0xffffffff, 0x00000100, 184 0x3c27c, 0xffffffff, 0x00000100, 185 0x3c278, 0xffffffff, 0x00000100, 186 0x3c210, 0xffffffff, 0x06000100, 187 0x3c290, 0xffffffff, 0x00000100, 188 0x3c274, 0xffffffff, 0x00000100, 189 0x3c2b4, 0xffffffff, 0x00000100, 190 0x3c2b0, 0xffffffff, 0x00000100, 191 0x3c270, 0xffffffff, 0x00000100, 192 0x30800, 0xffffffff, 0xe0000000, 193 0x3c020, 0xffffffff, 0x00010000, 194 0x3c024, 0xffffffff, 0x00030002, 195 0x3c028, 0xffffffff, 0x00040007, 196 0x3c02c, 0xffffffff, 0x00060005, 197 0x3c030, 0xffffffff, 0x00090008, 198 0x3c034, 0xffffffff, 0x00010000, 199 0x3c038, 0xffffffff, 0x00030002, 200 0x3c03c, 0xffffffff, 0x00040007, 201 0x3c040, 0xffffffff, 0x00060005, 202 0x3c044, 0xffffffff, 0x00090008, 203 0x3c048, 0xffffffff, 0x00010000, 204 0x3c04c, 0xffffffff, 0x00030002, 205 0x3c050, 0xffffffff, 0x00040007, 206 0x3c054, 0xffffffff, 0x00060005, 207 0x3c058, 0xffffffff, 0x00090008, 208 0x3c05c, 0xffffffff, 0x00010000, 209 0x3c060, 0xffffffff, 0x00030002, 210 0x3c064, 0xffffffff, 0x00040007, 211 0x3c068, 0xffffffff, 0x00060005, 212 0x3c06c, 0xffffffff, 0x00090008, 213 0x3c070, 0xffffffff, 0x00010000, 214 0x3c074, 0xffffffff, 0x00030002, 215 0x3c078, 0xffffffff, 0x00040007, 216 0x3c07c, 0xffffffff, 0x00060005, 217 0x3c080, 0xffffffff, 0x00090008, 218 0x3c084, 0xffffffff, 0x00010000, 219 0x3c088, 0xffffffff, 0x00030002, 220 0x3c08c, 0xffffffff, 0x00040007, 221 0x3c090, 0xffffffff, 0x00060005, 222 0x3c094, 0xffffffff, 0x00090008, 223 0x3c098, 0xffffffff, 0x00010000, 224 0x3c09c, 0xffffffff, 0x00030002, 225 0x3c0a0, 0xffffffff, 0x00040007, 226 0x3c0a4, 0xffffffff, 0x00060005, 227 0x3c0a8, 0xffffffff, 0x00090008, 228 0x3c000, 0xffffffff, 0x96e00200, 229 0x8708, 0xffffffff, 0x00900100, 230 0xc424, 0xffffffff, 0x0020003f, 231 0x38, 0xffffffff, 0x0140001c, 232 0x3c, 0x000f0000, 0x000f0000, 233 0x220, 0xffffffff, 0xC060000C, 234 0x224, 0xc0000fff, 0x00000100, 235 0xf90, 0xffffffff, 0x00000100, 236 0xf98, 0x00000101, 0x00000000, 237 0x20a8, 0xffffffff, 0x00000104, 238 0x55e4, 0xff000fff, 0x00000100, 239 0x30cc, 0xc0000fff, 0x00000104, 240 0xc1e4, 0x00000001, 0x00000001, 241 0xd00c, 0xff000ff0, 0x00000100, 242 0xd80c, 0xff000ff0, 0x00000100 243 }; 244 245 static const u32 spectre_golden_spm_registers[] = 246 { 247 0x30800, 0xe0ffffff, 0xe0000000 248 }; 249 250 static const u32 spectre_golden_common_registers[] = 251 { 252 0xc770, 0xffffffff, 0x00000800, 253 0xc774, 0xffffffff, 0x00000800, 254 0xc798, 0xffffffff, 0x00007fbf, 255 0xc79c, 0xffffffff, 0x00007faf 256 }; 257 258 static const u32 spectre_golden_registers[] = 259 { 260 0x3c000, 0xffff1fff, 0x96940200, 261 0x3c00c, 0xffff0001, 0xff000000, 262 0x3c200, 0xfffc0fff, 0x00000100, 263 0x6ed8, 0x00010101, 0x00010000, 264 0x9834, 0xf00fffff, 0x00000400, 265 0x9838, 0xfffffffc, 0x00020200, 266 0x5bb0, 0x000000f0, 0x00000070, 267 0x5bc0, 0xf0311fff, 0x80300000, 268 0x98f8, 0x73773777, 0x12010001, 269 0x9b7c, 0x00ff0000, 0x00fc0000, 270 0x2f48, 0x73773777, 0x12010001, 271 0x8a14, 0xf000003f, 0x00000007, 272 0x8b24, 0xffffffff, 0x00ffffff, 273 0x28350, 0x3f3f3fff, 0x00000082, 274 0x28355, 0x0000003f, 0x00000000, 275 0x3e78, 0x00000001, 0x00000002, 276 0x913c, 0xffff03df, 0x00000004, 277 0xc768, 0x00000008, 0x00000008, 278 0x8c00, 0x000008ff, 0x00000800, 279 0x9508, 0x00010000, 0x00010000, 280 0xac0c, 0xffffffff, 0x54763210, 281 0x214f8, 0x01ff01ff, 0x00000002, 282 0x21498, 0x007ff800, 0x00200000, 283 0x2015c, 0xffffffff, 0x00000f40, 284 0x30934, 0xffffffff, 0x00000001 285 }; 286 287 static const u32 spectre_mgcg_cgcg_init[] = 288 { 289 0xc420, 0xffffffff, 0xfffffffc, 290 0x30800, 0xffffffff, 0xe0000000, 291 0x3c2a0, 0xffffffff, 0x00000100, 292 0x3c208, 0xffffffff, 0x00000100, 293 0x3c2c0, 0xffffffff, 0x00000100, 294 0x3c2c8, 0xffffffff, 0x00000100, 295 0x3c2c4, 0xffffffff, 0x00000100, 296 0x55e4, 0xffffffff, 0x00600100, 297 0x3c280, 0xffffffff, 0x00000100, 298 0x3c214, 0xffffffff, 0x06000100, 299 0x3c220, 0xffffffff, 0x00000100, 300 0x3c218, 0xffffffff, 0x06000100, 301 0x3c204, 0xffffffff, 0x00000100, 302 0x3c2e0, 0xffffffff, 0x00000100, 303 0x3c224, 0xffffffff, 0x00000100, 304 0x3c200, 0xffffffff, 0x00000100, 305 0x3c230, 0xffffffff, 0x00000100, 306 0x3c234, 0xffffffff, 0x00000100, 307 0x3c250, 0xffffffff, 0x00000100, 308 0x3c254, 0xffffffff, 0x00000100, 309 0x3c258, 0xffffffff, 0x00000100, 310 0x3c25c, 0xffffffff, 0x00000100, 311 0x3c260, 0xffffffff, 0x00000100, 312 0x3c27c, 0xffffffff, 0x00000100, 313 0x3c278, 0xffffffff, 0x00000100, 314 0x3c210, 0xffffffff, 0x06000100, 315 0x3c290, 0xffffffff, 0x00000100, 316 0x3c274, 0xffffffff, 0x00000100, 317 0x3c2b4, 0xffffffff, 0x00000100, 318 0x3c2b0, 0xffffffff, 0x00000100, 319 0x3c270, 0xffffffff, 0x00000100, 320 0x30800, 0xffffffff, 0xe0000000, 321 0x3c020, 0xffffffff, 0x00010000, 322 0x3c024, 0xffffffff, 0x00030002, 323 0x3c028, 0xffffffff, 0x00040007, 324 0x3c02c, 0xffffffff, 0x00060005, 325 0x3c030, 0xffffffff, 0x00090008, 326 0x3c034, 0xffffffff, 0x00010000, 327 0x3c038, 0xffffffff, 0x00030002, 328 0x3c03c, 0xffffffff, 0x00040007, 329 0x3c040, 0xffffffff, 0x00060005, 330 0x3c044, 0xffffffff, 0x00090008, 331 0x3c048, 0xffffffff, 0x00010000, 332 0x3c04c, 0xffffffff, 0x00030002, 333 0x3c050, 0xffffffff, 0x00040007, 334 0x3c054, 0xffffffff, 0x00060005, 335 0x3c058, 0xffffffff, 0x00090008, 336 0x3c05c, 0xffffffff, 0x00010000, 337 0x3c060, 0xffffffff, 0x00030002, 338 0x3c064, 0xffffffff, 0x00040007, 339 0x3c068, 0xffffffff, 0x00060005, 340 0x3c06c, 0xffffffff, 0x00090008, 341 0x3c070, 0xffffffff, 0x00010000, 342 0x3c074, 0xffffffff, 0x00030002, 343 0x3c078, 0xffffffff, 0x00040007, 344 0x3c07c, 0xffffffff, 0x00060005, 345 0x3c080, 0xffffffff, 0x00090008, 346 0x3c084, 0xffffffff, 0x00010000, 347 0x3c088, 0xffffffff, 0x00030002, 348 0x3c08c, 0xffffffff, 0x00040007, 349 0x3c090, 0xffffffff, 0x00060005, 350 0x3c094, 0xffffffff, 0x00090008, 351 0x3c098, 0xffffffff, 0x00010000, 352 0x3c09c, 0xffffffff, 0x00030002, 353 0x3c0a0, 0xffffffff, 0x00040007, 354 0x3c0a4, 0xffffffff, 0x00060005, 355 0x3c0a8, 0xffffffff, 0x00090008, 356 0x3c0ac, 0xffffffff, 0x00010000, 357 0x3c0b0, 0xffffffff, 0x00030002, 358 0x3c0b4, 0xffffffff, 0x00040007, 359 0x3c0b8, 0xffffffff, 0x00060005, 360 0x3c0bc, 0xffffffff, 0x00090008, 361 0x3c000, 0xffffffff, 0x96e00200, 362 0x8708, 0xffffffff, 0x00900100, 363 0xc424, 0xffffffff, 0x0020003f, 364 0x38, 0xffffffff, 0x0140001c, 365 0x3c, 0x000f0000, 0x000f0000, 366 0x220, 0xffffffff, 0xC060000C, 367 0x224, 0xc0000fff, 0x00000100, 368 0xf90, 0xffffffff, 0x00000100, 369 0xf98, 0x00000101, 0x00000000, 370 0x20a8, 0xffffffff, 0x00000104, 371 0x55e4, 0xff000fff, 0x00000100, 372 0x30cc, 0xc0000fff, 0x00000104, 373 0xc1e4, 0x00000001, 0x00000001, 374 0xd00c, 0xff000ff0, 0x00000100, 375 0xd80c, 0xff000ff0, 0x00000100 376 }; 377 378 static const u32 kalindi_golden_spm_registers[] = 379 { 380 0x30800, 0xe0ffffff, 0xe0000000 381 }; 382 383 static const u32 kalindi_golden_common_registers[] = 384 { 385 0xc770, 0xffffffff, 0x00000800, 386 0xc774, 0xffffffff, 0x00000800, 387 0xc798, 0xffffffff, 0x00007fbf, 388 0xc79c, 0xffffffff, 0x00007faf 389 }; 390 391 static const u32 kalindi_golden_registers[] = 392 { 393 0x3c000, 0xffffdfff, 0x6e944040, 394 0x55e4, 0xff607fff, 0xfc000100, 395 0x3c220, 0xff000fff, 0x00000100, 396 0x3c224, 0xff000fff, 0x00000100, 397 0x3c200, 0xfffc0fff, 0x00000100, 398 0x6ed8, 0x00010101, 0x00010000, 399 0x9830, 0xffffffff, 0x00000000, 400 0x9834, 0xf00fffff, 0x00000400, 401 0x5bb0, 0x000000f0, 0x00000070, 402 0x5bc0, 0xf0311fff, 0x80300000, 403 0x98f8, 0x73773777, 0x12010001, 404 0x98fc, 0xffffffff, 0x00000010, 405 0x9b7c, 0x00ff0000, 0x00fc0000, 406 0x8030, 0x00001f0f, 0x0000100a, 407 0x2f48, 0x73773777, 0x12010001, 408 0x2408, 0x000fffff, 0x000c007f, 409 0x8a14, 0xf000003f, 0x00000007, 410 0x8b24, 0x3fff3fff, 0x00ffcfff, 411 0x30a04, 0x0000ff0f, 0x00000000, 412 0x28a4c, 0x07ffffff, 0x06000000, 413 0x4d8, 0x00000fff, 0x00000100, 414 0x3e78, 0x00000001, 0x00000002, 415 0xc768, 0x00000008, 0x00000008, 416 0x8c00, 0x000000ff, 0x00000003, 417 0x214f8, 0x01ff01ff, 0x00000002, 418 0x21498, 0x007ff800, 0x00200000, 419 0x2015c, 0xffffffff, 0x00000f40, 420 0x88c4, 0x001f3ae3, 0x00000082, 421 0x88d4, 0x0000001f, 0x00000010, 422 0x30934, 0xffffffff, 0x00000000 423 }; 424 425 static const u32 kalindi_mgcg_cgcg_init[] = 426 { 427 0xc420, 0xffffffff, 0xfffffffc, 428 0x30800, 0xffffffff, 0xe0000000, 429 0x3c2a0, 0xffffffff, 0x00000100, 430 0x3c208, 0xffffffff, 0x00000100, 431 0x3c2c0, 0xffffffff, 0x00000100, 432 0x3c2c8, 0xffffffff, 0x00000100, 433 0x3c2c4, 0xffffffff, 0x00000100, 434 0x55e4, 0xffffffff, 0x00600100, 435 0x3c280, 0xffffffff, 0x00000100, 436 0x3c214, 0xffffffff, 0x06000100, 437 0x3c220, 0xffffffff, 0x00000100, 438 0x3c218, 0xffffffff, 0x06000100, 439 0x3c204, 0xffffffff, 0x00000100, 440 0x3c2e0, 0xffffffff, 0x00000100, 441 0x3c224, 0xffffffff, 0x00000100, 442 0x3c200, 0xffffffff, 0x00000100, 443 0x3c230, 0xffffffff, 0x00000100, 444 0x3c234, 0xffffffff, 0x00000100, 445 0x3c250, 0xffffffff, 0x00000100, 446 0x3c254, 0xffffffff, 0x00000100, 447 0x3c258, 0xffffffff, 0x00000100, 448 0x3c25c, 0xffffffff, 0x00000100, 449 0x3c260, 0xffffffff, 0x00000100, 450 0x3c27c, 0xffffffff, 0x00000100, 451 0x3c278, 0xffffffff, 0x00000100, 452 0x3c210, 0xffffffff, 0x06000100, 453 0x3c290, 0xffffffff, 0x00000100, 454 0x3c274, 0xffffffff, 0x00000100, 455 0x3c2b4, 0xffffffff, 0x00000100, 456 0x3c2b0, 0xffffffff, 0x00000100, 457 0x3c270, 0xffffffff, 0x00000100, 458 0x30800, 0xffffffff, 0xe0000000, 459 0x3c020, 0xffffffff, 0x00010000, 460 0x3c024, 0xffffffff, 0x00030002, 461 0x3c028, 0xffffffff, 0x00040007, 462 0x3c02c, 0xffffffff, 0x00060005, 463 0x3c030, 0xffffffff, 0x00090008, 464 0x3c034, 0xffffffff, 0x00010000, 465 0x3c038, 0xffffffff, 0x00030002, 466 0x3c03c, 0xffffffff, 0x00040007, 467 0x3c040, 0xffffffff, 0x00060005, 468 0x3c044, 0xffffffff, 0x00090008, 469 0x3c000, 0xffffffff, 0x96e00200, 470 0x8708, 0xffffffff, 0x00900100, 471 0xc424, 0xffffffff, 0x0020003f, 472 0x38, 0xffffffff, 0x0140001c, 473 0x3c, 0x000f0000, 0x000f0000, 474 0x220, 0xffffffff, 0xC060000C, 475 0x224, 0xc0000fff, 0x00000100, 476 0x20a8, 0xffffffff, 0x00000104, 477 0x55e4, 0xff000fff, 0x00000100, 478 0x30cc, 0xc0000fff, 0x00000104, 479 0xc1e4, 0x00000001, 0x00000001, 480 0xd00c, 0xff000ff0, 0x00000100, 481 0xd80c, 0xff000ff0, 0x00000100 482 }; 483 484 static void cik_init_golden_registers(struct radeon_device *rdev) 485 { 486 switch (rdev->family) { 487 case CHIP_BONAIRE: 488 radeon_program_register_sequence(rdev, 489 bonaire_mgcg_cgcg_init, 490 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 491 radeon_program_register_sequence(rdev, 492 bonaire_golden_registers, 493 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 494 radeon_program_register_sequence(rdev, 495 bonaire_golden_common_registers, 496 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 497 radeon_program_register_sequence(rdev, 498 bonaire_golden_spm_registers, 499 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 500 break; 501 case CHIP_KABINI: 502 radeon_program_register_sequence(rdev, 503 kalindi_mgcg_cgcg_init, 504 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 505 radeon_program_register_sequence(rdev, 506 kalindi_golden_registers, 507 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 508 radeon_program_register_sequence(rdev, 509 kalindi_golden_common_registers, 510 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 511 radeon_program_register_sequence(rdev, 512 kalindi_golden_spm_registers, 513 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 514 break; 515 case CHIP_KAVERI: 516 radeon_program_register_sequence(rdev, 517 spectre_mgcg_cgcg_init, 518 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 519 radeon_program_register_sequence(rdev, 520 spectre_golden_registers, 521 (const u32)ARRAY_SIZE(spectre_golden_registers)); 522 radeon_program_register_sequence(rdev, 523 spectre_golden_common_registers, 524 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 525 radeon_program_register_sequence(rdev, 526 spectre_golden_spm_registers, 527 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 528 break; 529 default: 530 break; 531 } 532 } 533 534 /** 535 * cik_get_xclk - get the xclk 536 * 537 * @rdev: radeon_device pointer 538 * 539 * Returns the reference clock used by the gfx engine 540 * (CIK). 541 */ 542 u32 cik_get_xclk(struct radeon_device *rdev) 543 { 544 u32 reference_clock = rdev->clock.spll.reference_freq; 545 546 if (rdev->flags & RADEON_IS_IGP) { 547 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 548 return reference_clock / 2; 549 } else { 550 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 551 return reference_clock / 4; 552 } 553 return reference_clock; 554 } 555 556 /** 557 * cik_mm_rdoorbell - read a doorbell dword 558 * 559 * @rdev: radeon_device pointer 560 * @offset: byte offset into the aperture 561 * 562 * Returns the value in the doorbell aperture at the 563 * requested offset (CIK). 564 */ 565 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) 566 { 567 if (offset < rdev->doorbell.size) { 568 return readl(((void __iomem *)rdev->doorbell.ptr) + offset); 569 } else { 570 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); 571 return 0; 572 } 573 } 574 575 /** 576 * cik_mm_wdoorbell - write a doorbell dword 577 * 578 * @rdev: radeon_device pointer 579 * @offset: byte offset into the aperture 580 * @v: value to write 581 * 582 * Writes @v to the doorbell aperture at the 583 * requested offset (CIK). 584 */ 585 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) 586 { 587 if (offset < rdev->doorbell.size) { 588 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset); 589 } else { 590 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); 591 } 592 } 593 594 #define BONAIRE_IO_MC_REGS_SIZE 36 595 596 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 597 { 598 {0x00000070, 0x04400000}, 599 {0x00000071, 0x80c01803}, 600 {0x00000072, 0x00004004}, 601 {0x00000073, 0x00000100}, 602 {0x00000074, 0x00ff0000}, 603 {0x00000075, 0x34000000}, 604 {0x00000076, 0x08000014}, 605 {0x00000077, 0x00cc08ec}, 606 {0x00000078, 0x00000400}, 607 {0x00000079, 0x00000000}, 608 {0x0000007a, 0x04090000}, 609 {0x0000007c, 0x00000000}, 610 {0x0000007e, 0x4408a8e8}, 611 {0x0000007f, 0x00000304}, 612 {0x00000080, 0x00000000}, 613 {0x00000082, 0x00000001}, 614 {0x00000083, 0x00000002}, 615 {0x00000084, 0xf3e4f400}, 616 {0x00000085, 0x052024e3}, 617 {0x00000087, 0x00000000}, 618 {0x00000088, 0x01000000}, 619 {0x0000008a, 0x1c0a0000}, 620 {0x0000008b, 0xff010000}, 621 {0x0000008d, 0xffffefff}, 622 {0x0000008e, 0xfff3efff}, 623 {0x0000008f, 0xfff3efbf}, 624 {0x00000092, 0xf7ffffff}, 625 {0x00000093, 0xffffff7f}, 626 {0x00000095, 0x00101101}, 627 {0x00000096, 0x00000fff}, 628 {0x00000097, 0x00116fff}, 629 {0x00000098, 0x60010000}, 630 {0x00000099, 0x10010000}, 631 {0x0000009a, 0x00006000}, 632 {0x0000009b, 0x00001000}, 633 {0x0000009f, 0x00b48000} 634 }; 635 636 /** 637 * cik_srbm_select - select specific register instances 638 * 639 * @rdev: radeon_device pointer 640 * @me: selected ME (micro engine) 641 * @pipe: pipe 642 * @queue: queue 643 * @vmid: VMID 644 * 645 * Switches the currently active registers instances. Some 646 * registers are instanced per VMID, others are instanced per 647 * me/pipe/queue combination. 648 */ 649 static void cik_srbm_select(struct radeon_device *rdev, 650 u32 me, u32 pipe, u32 queue, u32 vmid) 651 { 652 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 653 MEID(me & 0x3) | 654 VMID(vmid & 0xf) | 655 QUEUEID(queue & 0x7)); 656 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 657 } 658 659 /* ucode loading */ 660 /** 661 * ci_mc_load_microcode - load MC ucode into the hw 662 * 663 * @rdev: radeon_device pointer 664 * 665 * Load the GDDR MC ucode into the hw (CIK). 666 * Returns 0 on success, error on failure. 667 */ 668 static int ci_mc_load_microcode(struct radeon_device *rdev) 669 { 670 const __be32 *fw_data; 671 u32 running, blackout = 0; 672 u32 *io_mc_regs; 673 int i, ucode_size, regs_size; 674 675 if (!rdev->mc_fw) 676 return -EINVAL; 677 678 switch (rdev->family) { 679 case CHIP_BONAIRE: 680 default: 681 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 682 ucode_size = CIK_MC_UCODE_SIZE; 683 regs_size = BONAIRE_IO_MC_REGS_SIZE; 684 break; 685 } 686 687 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 688 689 if (running == 0) { 690 if (running) { 691 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); 692 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); 693 } 694 695 /* reset the engine and set to writable */ 696 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 697 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 698 699 /* load mc io regs */ 700 for (i = 0; i < regs_size; i++) { 701 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 702 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 703 } 704 /* load the MC ucode */ 705 fw_data = (const __be32 *)rdev->mc_fw->data; 706 for (i = 0; i < ucode_size; i++) 707 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 708 709 /* put the engine back into the active state */ 710 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 711 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 712 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 713 714 /* wait for training to complete */ 715 for (i = 0; i < rdev->usec_timeout; i++) { 716 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 717 break; 718 udelay(1); 719 } 720 for (i = 0; i < rdev->usec_timeout; i++) { 721 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 722 break; 723 udelay(1); 724 } 725 726 if (running) 727 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); 728 } 729 730 return 0; 731 } 732 733 /** 734 * cik_init_microcode - load ucode images from disk 735 * 736 * @rdev: radeon_device pointer 737 * 738 * Use the firmware interface to load the ucode images into 739 * the driver (not loaded into hw). 740 * Returns 0 on success, error on failure. 741 */ 742 static int cik_init_microcode(struct radeon_device *rdev) 743 { 744 const char *chip_name; 745 size_t pfp_req_size, me_req_size, ce_req_size, 746 mec_req_size, rlc_req_size, mc_req_size, 747 sdma_req_size; 748 char fw_name[30]; 749 int err; 750 751 DRM_DEBUG("\n"); 752 753 switch (rdev->family) { 754 case CHIP_BONAIRE: 755 chip_name = "BONAIRE"; 756 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 757 me_req_size = CIK_ME_UCODE_SIZE * 4; 758 ce_req_size = CIK_CE_UCODE_SIZE * 4; 759 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 760 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 761 mc_req_size = CIK_MC_UCODE_SIZE * 4; 762 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 763 break; 764 case CHIP_KAVERI: 765 chip_name = "KAVERI"; 766 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 767 me_req_size = CIK_ME_UCODE_SIZE * 4; 768 ce_req_size = CIK_CE_UCODE_SIZE * 4; 769 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 770 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 772 break; 773 case CHIP_KABINI: 774 chip_name = "KABINI"; 775 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 776 me_req_size = CIK_ME_UCODE_SIZE * 4; 777 ce_req_size = CIK_CE_UCODE_SIZE * 4; 778 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 779 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 780 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 781 break; 782 default: BUG(); 783 } 784 785 DRM_INFO("Loading %s Microcode\n", chip_name); 786 787 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); 788 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 789 if (err) 790 goto out; 791 if (rdev->pfp_fw->size != pfp_req_size) { 792 printk(KERN_ERR 793 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 794 rdev->pfp_fw->size, fw_name); 795 err = -EINVAL; 796 goto out; 797 } 798 799 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); 800 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 801 if (err) 802 goto out; 803 if (rdev->me_fw->size != me_req_size) { 804 printk(KERN_ERR 805 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 806 rdev->me_fw->size, fw_name); 807 err = -EINVAL; 808 } 809 810 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); 811 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 812 if (err) 813 goto out; 814 if (rdev->ce_fw->size != ce_req_size) { 815 printk(KERN_ERR 816 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 817 rdev->ce_fw->size, fw_name); 818 err = -EINVAL; 819 } 820 821 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name); 822 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 823 if (err) 824 goto out; 825 if (rdev->mec_fw->size != mec_req_size) { 826 printk(KERN_ERR 827 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 828 rdev->mec_fw->size, fw_name); 829 err = -EINVAL; 830 } 831 832 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); 833 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 834 if (err) 835 goto out; 836 if (rdev->rlc_fw->size != rlc_req_size) { 837 printk(KERN_ERR 838 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 839 rdev->rlc_fw->size, fw_name); 840 err = -EINVAL; 841 } 842 843 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); 844 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 845 if (err) 846 goto out; 847 if (rdev->sdma_fw->size != sdma_req_size) { 848 printk(KERN_ERR 849 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 850 rdev->sdma_fw->size, fw_name); 851 err = -EINVAL; 852 } 853 854 /* No MC ucode on APUs */ 855 if (!(rdev->flags & RADEON_IS_IGP)) { 856 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); 857 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 858 if (err) 859 goto out; 860 if (rdev->mc_fw->size != mc_req_size) { 861 printk(KERN_ERR 862 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 863 rdev->mc_fw->size, fw_name); 864 err = -EINVAL; 865 } 866 } 867 868 out: 869 if (err) { 870 if (err != -EINVAL) 871 printk(KERN_ERR 872 "cik_cp: Failed to load firmware \"%s\"\n", 873 fw_name); 874 release_firmware(rdev->pfp_fw); 875 rdev->pfp_fw = NULL; 876 release_firmware(rdev->me_fw); 877 rdev->me_fw = NULL; 878 release_firmware(rdev->ce_fw); 879 rdev->ce_fw = NULL; 880 release_firmware(rdev->rlc_fw); 881 rdev->rlc_fw = NULL; 882 release_firmware(rdev->mc_fw); 883 rdev->mc_fw = NULL; 884 } 885 return err; 886 } 887 888 /* 889 * Core functions 890 */ 891 /** 892 * cik_tiling_mode_table_init - init the hw tiling table 893 * 894 * @rdev: radeon_device pointer 895 * 896 * Starting with SI, the tiling setup is done globally in a 897 * set of 32 tiling modes. Rather than selecting each set of 898 * parameters per surface as on older asics, we just select 899 * which index in the tiling table we want to use, and the 900 * surface uses those parameters (CIK). 901 */ 902 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 903 { 904 const u32 num_tile_mode_states = 32; 905 const u32 num_secondary_tile_mode_states = 16; 906 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 907 u32 num_pipe_configs; 908 u32 num_rbs = rdev->config.cik.max_backends_per_se * 909 rdev->config.cik.max_shader_engines; 910 911 switch (rdev->config.cik.mem_row_size_in_kb) { 912 case 1: 913 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 914 break; 915 case 2: 916 default: 917 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 918 break; 919 case 4: 920 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 921 break; 922 } 923 924 num_pipe_configs = rdev->config.cik.max_tile_pipes; 925 if (num_pipe_configs > 8) 926 num_pipe_configs = 8; /* ??? */ 927 928 if (num_pipe_configs == 8) { 929 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 930 switch (reg_offset) { 931 case 0: 932 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 936 break; 937 case 1: 938 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 942 break; 943 case 2: 944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 948 break; 949 case 3: 950 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 952 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 953 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 954 break; 955 case 4: 956 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 957 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 959 TILE_SPLIT(split_equal_to_row_size)); 960 break; 961 case 5: 962 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 963 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 964 break; 965 case 6: 966 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 967 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 970 break; 971 case 7: 972 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 975 TILE_SPLIT(split_equal_to_row_size)); 976 break; 977 case 8: 978 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 980 break; 981 case 9: 982 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 983 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 984 break; 985 case 10: 986 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 987 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 990 break; 991 case 11: 992 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 993 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 994 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 996 break; 997 case 12: 998 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1002 break; 1003 case 13: 1004 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1005 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1006 break; 1007 case 14: 1008 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1009 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1012 break; 1013 case 16: 1014 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1016 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1018 break; 1019 case 17: 1020 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1021 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1024 break; 1025 case 27: 1026 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1027 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1028 break; 1029 case 28: 1030 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1031 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1034 break; 1035 case 29: 1036 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1038 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1040 break; 1041 case 30: 1042 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1043 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1046 break; 1047 default: 1048 gb_tile_moden = 0; 1049 break; 1050 } 1051 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1052 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1053 } 1054 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1055 switch (reg_offset) { 1056 case 0: 1057 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1060 NUM_BANKS(ADDR_SURF_16_BANK)); 1061 break; 1062 case 1: 1063 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1066 NUM_BANKS(ADDR_SURF_16_BANK)); 1067 break; 1068 case 2: 1069 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1072 NUM_BANKS(ADDR_SURF_16_BANK)); 1073 break; 1074 case 3: 1075 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1078 NUM_BANKS(ADDR_SURF_16_BANK)); 1079 break; 1080 case 4: 1081 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1084 NUM_BANKS(ADDR_SURF_8_BANK)); 1085 break; 1086 case 5: 1087 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1090 NUM_BANKS(ADDR_SURF_4_BANK)); 1091 break; 1092 case 6: 1093 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1096 NUM_BANKS(ADDR_SURF_2_BANK)); 1097 break; 1098 case 8: 1099 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1100 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1101 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1102 NUM_BANKS(ADDR_SURF_16_BANK)); 1103 break; 1104 case 9: 1105 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1108 NUM_BANKS(ADDR_SURF_16_BANK)); 1109 break; 1110 case 10: 1111 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1114 NUM_BANKS(ADDR_SURF_16_BANK)); 1115 break; 1116 case 11: 1117 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1120 NUM_BANKS(ADDR_SURF_16_BANK)); 1121 break; 1122 case 12: 1123 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1126 NUM_BANKS(ADDR_SURF_8_BANK)); 1127 break; 1128 case 13: 1129 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1130 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1131 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1132 NUM_BANKS(ADDR_SURF_4_BANK)); 1133 break; 1134 case 14: 1135 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1138 NUM_BANKS(ADDR_SURF_2_BANK)); 1139 break; 1140 default: 1141 gb_tile_moden = 0; 1142 break; 1143 } 1144 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1145 } 1146 } else if (num_pipe_configs == 4) { 1147 if (num_rbs == 4) { 1148 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1149 switch (reg_offset) { 1150 case 0: 1151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1155 break; 1156 case 1: 1157 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1159 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1161 break; 1162 case 2: 1163 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1165 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1167 break; 1168 case 3: 1169 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1171 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1173 break; 1174 case 4: 1175 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1177 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1178 TILE_SPLIT(split_equal_to_row_size)); 1179 break; 1180 case 5: 1181 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1183 break; 1184 case 6: 1185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1187 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1189 break; 1190 case 7: 1191 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1193 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1194 TILE_SPLIT(split_equal_to_row_size)); 1195 break; 1196 case 8: 1197 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1198 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 1199 break; 1200 case 9: 1201 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1203 break; 1204 case 10: 1205 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1206 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1207 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1209 break; 1210 case 11: 1211 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1213 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1215 break; 1216 case 12: 1217 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1218 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1219 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1221 break; 1222 case 13: 1223 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1225 break; 1226 case 14: 1227 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1229 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1231 break; 1232 case 16: 1233 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1235 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1237 break; 1238 case 17: 1239 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1241 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1243 break; 1244 case 27: 1245 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1246 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1247 break; 1248 case 28: 1249 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1250 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1253 break; 1254 case 29: 1255 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1256 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1257 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1259 break; 1260 case 30: 1261 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1262 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1263 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1265 break; 1266 default: 1267 gb_tile_moden = 0; 1268 break; 1269 } 1270 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1271 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1272 } 1273 } else if (num_rbs < 4) { 1274 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1275 switch (reg_offset) { 1276 case 0: 1277 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1279 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1281 break; 1282 case 1: 1283 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1285 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1287 break; 1288 case 2: 1289 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1291 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1293 break; 1294 case 3: 1295 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1297 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1299 break; 1300 case 4: 1301 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1303 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1304 TILE_SPLIT(split_equal_to_row_size)); 1305 break; 1306 case 5: 1307 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1309 break; 1310 case 6: 1311 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1313 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1315 break; 1316 case 7: 1317 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1319 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1320 TILE_SPLIT(split_equal_to_row_size)); 1321 break; 1322 case 8: 1323 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1324 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 1325 break; 1326 case 9: 1327 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1328 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1329 break; 1330 case 10: 1331 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1332 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1333 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1335 break; 1336 case 11: 1337 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1338 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1339 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1341 break; 1342 case 12: 1343 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1344 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1345 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1347 break; 1348 case 13: 1349 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1351 break; 1352 case 14: 1353 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1355 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1357 break; 1358 case 16: 1359 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1360 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1361 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1363 break; 1364 case 17: 1365 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1366 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1367 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1369 break; 1370 case 27: 1371 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1373 break; 1374 case 28: 1375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1377 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1379 break; 1380 case 29: 1381 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1382 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1383 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1385 break; 1386 case 30: 1387 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1388 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1389 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1391 break; 1392 default: 1393 gb_tile_moden = 0; 1394 break; 1395 } 1396 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1397 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1398 } 1399 } 1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1401 switch (reg_offset) { 1402 case 0: 1403 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1406 NUM_BANKS(ADDR_SURF_16_BANK)); 1407 break; 1408 case 1: 1409 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1412 NUM_BANKS(ADDR_SURF_16_BANK)); 1413 break; 1414 case 2: 1415 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1418 NUM_BANKS(ADDR_SURF_16_BANK)); 1419 break; 1420 case 3: 1421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1424 NUM_BANKS(ADDR_SURF_16_BANK)); 1425 break; 1426 case 4: 1427 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1430 NUM_BANKS(ADDR_SURF_16_BANK)); 1431 break; 1432 case 5: 1433 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1436 NUM_BANKS(ADDR_SURF_8_BANK)); 1437 break; 1438 case 6: 1439 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1442 NUM_BANKS(ADDR_SURF_4_BANK)); 1443 break; 1444 case 8: 1445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1448 NUM_BANKS(ADDR_SURF_16_BANK)); 1449 break; 1450 case 9: 1451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1454 NUM_BANKS(ADDR_SURF_16_BANK)); 1455 break; 1456 case 10: 1457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1460 NUM_BANKS(ADDR_SURF_16_BANK)); 1461 break; 1462 case 11: 1463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1466 NUM_BANKS(ADDR_SURF_16_BANK)); 1467 break; 1468 case 12: 1469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1472 NUM_BANKS(ADDR_SURF_16_BANK)); 1473 break; 1474 case 13: 1475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1478 NUM_BANKS(ADDR_SURF_8_BANK)); 1479 break; 1480 case 14: 1481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1484 NUM_BANKS(ADDR_SURF_4_BANK)); 1485 break; 1486 default: 1487 gb_tile_moden = 0; 1488 break; 1489 } 1490 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1491 } 1492 } else if (num_pipe_configs == 2) { 1493 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1494 switch (reg_offset) { 1495 case 0: 1496 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1498 PIPE_CONFIG(ADDR_SURF_P2) | 1499 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1500 break; 1501 case 1: 1502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1504 PIPE_CONFIG(ADDR_SURF_P2) | 1505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1506 break; 1507 case 2: 1508 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1510 PIPE_CONFIG(ADDR_SURF_P2) | 1511 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1512 break; 1513 case 3: 1514 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1516 PIPE_CONFIG(ADDR_SURF_P2) | 1517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1518 break; 1519 case 4: 1520 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1522 PIPE_CONFIG(ADDR_SURF_P2) | 1523 TILE_SPLIT(split_equal_to_row_size)); 1524 break; 1525 case 5: 1526 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1528 break; 1529 case 6: 1530 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1532 PIPE_CONFIG(ADDR_SURF_P2) | 1533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1534 break; 1535 case 7: 1536 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1538 PIPE_CONFIG(ADDR_SURF_P2) | 1539 TILE_SPLIT(split_equal_to_row_size)); 1540 break; 1541 case 8: 1542 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); 1543 break; 1544 case 9: 1545 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1546 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1547 break; 1548 case 10: 1549 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1551 PIPE_CONFIG(ADDR_SURF_P2) | 1552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1553 break; 1554 case 11: 1555 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1557 PIPE_CONFIG(ADDR_SURF_P2) | 1558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1559 break; 1560 case 12: 1561 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1562 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1563 PIPE_CONFIG(ADDR_SURF_P2) | 1564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1565 break; 1566 case 13: 1567 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1569 break; 1570 case 14: 1571 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1573 PIPE_CONFIG(ADDR_SURF_P2) | 1574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1575 break; 1576 case 16: 1577 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1579 PIPE_CONFIG(ADDR_SURF_P2) | 1580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1581 break; 1582 case 17: 1583 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1585 PIPE_CONFIG(ADDR_SURF_P2) | 1586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1587 break; 1588 case 27: 1589 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1590 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1591 break; 1592 case 28: 1593 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1594 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1595 PIPE_CONFIG(ADDR_SURF_P2) | 1596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1597 break; 1598 case 29: 1599 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1600 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1601 PIPE_CONFIG(ADDR_SURF_P2) | 1602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1603 break; 1604 case 30: 1605 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1606 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1607 PIPE_CONFIG(ADDR_SURF_P2) | 1608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1609 break; 1610 default: 1611 gb_tile_moden = 0; 1612 break; 1613 } 1614 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1615 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1616 } 1617 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1618 switch (reg_offset) { 1619 case 0: 1620 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1623 NUM_BANKS(ADDR_SURF_16_BANK)); 1624 break; 1625 case 1: 1626 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1629 NUM_BANKS(ADDR_SURF_16_BANK)); 1630 break; 1631 case 2: 1632 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1635 NUM_BANKS(ADDR_SURF_16_BANK)); 1636 break; 1637 case 3: 1638 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1641 NUM_BANKS(ADDR_SURF_16_BANK)); 1642 break; 1643 case 4: 1644 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1647 NUM_BANKS(ADDR_SURF_16_BANK)); 1648 break; 1649 case 5: 1650 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1653 NUM_BANKS(ADDR_SURF_16_BANK)); 1654 break; 1655 case 6: 1656 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1659 NUM_BANKS(ADDR_SURF_8_BANK)); 1660 break; 1661 case 8: 1662 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1665 NUM_BANKS(ADDR_SURF_16_BANK)); 1666 break; 1667 case 9: 1668 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1671 NUM_BANKS(ADDR_SURF_16_BANK)); 1672 break; 1673 case 10: 1674 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1677 NUM_BANKS(ADDR_SURF_16_BANK)); 1678 break; 1679 case 11: 1680 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1683 NUM_BANKS(ADDR_SURF_16_BANK)); 1684 break; 1685 case 12: 1686 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1689 NUM_BANKS(ADDR_SURF_16_BANK)); 1690 break; 1691 case 13: 1692 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1695 NUM_BANKS(ADDR_SURF_16_BANK)); 1696 break; 1697 case 14: 1698 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1701 NUM_BANKS(ADDR_SURF_8_BANK)); 1702 break; 1703 default: 1704 gb_tile_moden = 0; 1705 break; 1706 } 1707 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1708 } 1709 } else 1710 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 1711 } 1712 1713 /** 1714 * cik_select_se_sh - select which SE, SH to address 1715 * 1716 * @rdev: radeon_device pointer 1717 * @se_num: shader engine to address 1718 * @sh_num: sh block to address 1719 * 1720 * Select which SE, SH combinations to address. Certain 1721 * registers are instanced per SE or SH. 0xffffffff means 1722 * broadcast to all SEs or SHs (CIK). 1723 */ 1724 static void cik_select_se_sh(struct radeon_device *rdev, 1725 u32 se_num, u32 sh_num) 1726 { 1727 u32 data = INSTANCE_BROADCAST_WRITES; 1728 1729 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 1730 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 1731 else if (se_num == 0xffffffff) 1732 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 1733 else if (sh_num == 0xffffffff) 1734 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 1735 else 1736 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 1737 WREG32(GRBM_GFX_INDEX, data); 1738 } 1739 1740 /** 1741 * cik_create_bitmask - create a bitmask 1742 * 1743 * @bit_width: length of the mask 1744 * 1745 * create a variable length bit mask (CIK). 1746 * Returns the bitmask. 1747 */ 1748 static u32 cik_create_bitmask(u32 bit_width) 1749 { 1750 u32 i, mask = 0; 1751 1752 for (i = 0; i < bit_width; i++) { 1753 mask <<= 1; 1754 mask |= 1; 1755 } 1756 return mask; 1757 } 1758 1759 /** 1760 * cik_select_se_sh - select which SE, SH to address 1761 * 1762 * @rdev: radeon_device pointer 1763 * @max_rb_num: max RBs (render backends) for the asic 1764 * @se_num: number of SEs (shader engines) for the asic 1765 * @sh_per_se: number of SH blocks per SE for the asic 1766 * 1767 * Calculates the bitmask of disabled RBs (CIK). 1768 * Returns the disabled RB bitmask. 1769 */ 1770 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 1771 u32 max_rb_num, u32 se_num, 1772 u32 sh_per_se) 1773 { 1774 u32 data, mask; 1775 1776 data = RREG32(CC_RB_BACKEND_DISABLE); 1777 if (data & 1) 1778 data &= BACKEND_DISABLE_MASK; 1779 else 1780 data = 0; 1781 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 1782 1783 data >>= BACKEND_DISABLE_SHIFT; 1784 1785 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se); 1786 1787 return data & mask; 1788 } 1789 1790 /** 1791 * cik_setup_rb - setup the RBs on the asic 1792 * 1793 * @rdev: radeon_device pointer 1794 * @se_num: number of SEs (shader engines) for the asic 1795 * @sh_per_se: number of SH blocks per SE for the asic 1796 * @max_rb_num: max RBs (render backends) for the asic 1797 * 1798 * Configures per-SE/SH RB registers (CIK). 1799 */ 1800 static void cik_setup_rb(struct radeon_device *rdev, 1801 u32 se_num, u32 sh_per_se, 1802 u32 max_rb_num) 1803 { 1804 int i, j; 1805 u32 data, mask; 1806 u32 disabled_rbs = 0; 1807 u32 enabled_rbs = 0; 1808 1809 for (i = 0; i < se_num; i++) { 1810 for (j = 0; j < sh_per_se; j++) { 1811 cik_select_se_sh(rdev, i, j); 1812 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); 1813 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 1814 } 1815 } 1816 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 1817 1818 mask = 1; 1819 for (i = 0; i < max_rb_num; i++) { 1820 if (!(disabled_rbs & mask)) 1821 enabled_rbs |= mask; 1822 mask <<= 1; 1823 } 1824 1825 for (i = 0; i < se_num; i++) { 1826 cik_select_se_sh(rdev, i, 0xffffffff); 1827 data = 0; 1828 for (j = 0; j < sh_per_se; j++) { 1829 switch (enabled_rbs & 3) { 1830 case 1: 1831 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 1832 break; 1833 case 2: 1834 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 1835 break; 1836 case 3: 1837 default: 1838 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 1839 break; 1840 } 1841 enabled_rbs >>= 2; 1842 } 1843 WREG32(PA_SC_RASTER_CONFIG, data); 1844 } 1845 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 1846 } 1847 1848 /** 1849 * cik_gpu_init - setup the 3D engine 1850 * 1851 * @rdev: radeon_device pointer 1852 * 1853 * Configures the 3D engine and tiling configuration 1854 * registers so that the 3D engine is usable. 1855 */ 1856 static void cik_gpu_init(struct radeon_device *rdev) 1857 { 1858 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 1859 u32 mc_shared_chmap, mc_arb_ramcfg; 1860 u32 hdp_host_path_cntl; 1861 u32 tmp; 1862 int i, j; 1863 1864 switch (rdev->family) { 1865 case CHIP_BONAIRE: 1866 rdev->config.cik.max_shader_engines = 2; 1867 rdev->config.cik.max_tile_pipes = 4; 1868 rdev->config.cik.max_cu_per_sh = 7; 1869 rdev->config.cik.max_sh_per_se = 1; 1870 rdev->config.cik.max_backends_per_se = 2; 1871 rdev->config.cik.max_texture_channel_caches = 4; 1872 rdev->config.cik.max_gprs = 256; 1873 rdev->config.cik.max_gs_threads = 32; 1874 rdev->config.cik.max_hw_contexts = 8; 1875 1876 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 1877 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 1878 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 1879 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 1880 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 1881 break; 1882 case CHIP_KAVERI: 1883 /* TODO */ 1884 break; 1885 case CHIP_KABINI: 1886 default: 1887 rdev->config.cik.max_shader_engines = 1; 1888 rdev->config.cik.max_tile_pipes = 2; 1889 rdev->config.cik.max_cu_per_sh = 2; 1890 rdev->config.cik.max_sh_per_se = 1; 1891 rdev->config.cik.max_backends_per_se = 1; 1892 rdev->config.cik.max_texture_channel_caches = 2; 1893 rdev->config.cik.max_gprs = 256; 1894 rdev->config.cik.max_gs_threads = 16; 1895 rdev->config.cik.max_hw_contexts = 8; 1896 1897 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 1898 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 1899 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 1900 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 1901 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 1902 break; 1903 } 1904 1905 /* Initialize HDP */ 1906 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 1907 WREG32((0x2c14 + j), 0x00000000); 1908 WREG32((0x2c18 + j), 0x00000000); 1909 WREG32((0x2c1c + j), 0x00000000); 1910 WREG32((0x2c20 + j), 0x00000000); 1911 WREG32((0x2c24 + j), 0x00000000); 1912 } 1913 1914 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 1915 1916 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 1917 1918 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 1919 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 1920 1921 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 1922 rdev->config.cik.mem_max_burst_length_bytes = 256; 1923 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 1924 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1925 if (rdev->config.cik.mem_row_size_in_kb > 4) 1926 rdev->config.cik.mem_row_size_in_kb = 4; 1927 /* XXX use MC settings? */ 1928 rdev->config.cik.shader_engine_tile_size = 32; 1929 rdev->config.cik.num_gpus = 1; 1930 rdev->config.cik.multi_gpu_tile_size = 64; 1931 1932 /* fix up row size */ 1933 gb_addr_config &= ~ROW_SIZE_MASK; 1934 switch (rdev->config.cik.mem_row_size_in_kb) { 1935 case 1: 1936 default: 1937 gb_addr_config |= ROW_SIZE(0); 1938 break; 1939 case 2: 1940 gb_addr_config |= ROW_SIZE(1); 1941 break; 1942 case 4: 1943 gb_addr_config |= ROW_SIZE(2); 1944 break; 1945 } 1946 1947 /* setup tiling info dword. gb_addr_config is not adequate since it does 1948 * not have bank info, so create a custom tiling dword. 1949 * bits 3:0 num_pipes 1950 * bits 7:4 num_banks 1951 * bits 11:8 group_size 1952 * bits 15:12 row_size 1953 */ 1954 rdev->config.cik.tile_config = 0; 1955 switch (rdev->config.cik.num_tile_pipes) { 1956 case 1: 1957 rdev->config.cik.tile_config |= (0 << 0); 1958 break; 1959 case 2: 1960 rdev->config.cik.tile_config |= (1 << 0); 1961 break; 1962 case 4: 1963 rdev->config.cik.tile_config |= (2 << 0); 1964 break; 1965 case 8: 1966 default: 1967 /* XXX what about 12? */ 1968 rdev->config.cik.tile_config |= (3 << 0); 1969 break; 1970 } 1971 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) 1972 rdev->config.cik.tile_config |= 1 << 4; 1973 else 1974 rdev->config.cik.tile_config |= 0 << 4; 1975 rdev->config.cik.tile_config |= 1976 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 1977 rdev->config.cik.tile_config |= 1978 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 1979 1980 WREG32(GB_ADDR_CONFIG, gb_addr_config); 1981 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 1982 WREG32(DMIF_ADDR_CALC, gb_addr_config); 1983 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 1984 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 1985 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 1986 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 1987 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 1988 1989 cik_tiling_mode_table_init(rdev); 1990 1991 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 1992 rdev->config.cik.max_sh_per_se, 1993 rdev->config.cik.max_backends_per_se); 1994 1995 /* set HW defaults for 3D engine */ 1996 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 1997 1998 WREG32(SX_DEBUG_1, 0x20); 1999 2000 WREG32(TA_CNTL_AUX, 0x00010000); 2001 2002 tmp = RREG32(SPI_CONFIG_CNTL); 2003 tmp |= 0x03000000; 2004 WREG32(SPI_CONFIG_CNTL, tmp); 2005 2006 WREG32(SQ_CONFIG, 1); 2007 2008 WREG32(DB_DEBUG, 0); 2009 2010 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 2011 tmp |= 0x00000400; 2012 WREG32(DB_DEBUG2, tmp); 2013 2014 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 2015 tmp |= 0x00020200; 2016 WREG32(DB_DEBUG3, tmp); 2017 2018 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 2019 tmp |= 0x00018208; 2020 WREG32(CB_HW_CONTROL, tmp); 2021 2022 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 2023 2024 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 2025 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 2026 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 2027 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 2028 2029 WREG32(VGT_NUM_INSTANCES, 1); 2030 2031 WREG32(CP_PERFMON_CNTL, 0); 2032 2033 WREG32(SQ_CONFIG, 0); 2034 2035 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 2036 FORCE_EOV_MAX_REZ_CNT(255))); 2037 2038 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 2039 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 2040 2041 WREG32(VGT_GS_VERTEX_REUSE, 16); 2042 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 2043 2044 tmp = RREG32(HDP_MISC_CNTL); 2045 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 2046 WREG32(HDP_MISC_CNTL, tmp); 2047 2048 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 2049 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 2050 2051 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 2052 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 2053 2054 udelay(50); 2055 } 2056 2057 /* 2058 * GPU scratch registers helpers function. 2059 */ 2060 /** 2061 * cik_scratch_init - setup driver info for CP scratch regs 2062 * 2063 * @rdev: radeon_device pointer 2064 * 2065 * Set up the number and offset of the CP scratch registers. 2066 * NOTE: use of CP scratch registers is a legacy inferface and 2067 * is not used by default on newer asics (r6xx+). On newer asics, 2068 * memory buffers are used for fences rather than scratch regs. 2069 */ 2070 static void cik_scratch_init(struct radeon_device *rdev) 2071 { 2072 int i; 2073 2074 rdev->scratch.num_reg = 7; 2075 rdev->scratch.reg_base = SCRATCH_REG0; 2076 for (i = 0; i < rdev->scratch.num_reg; i++) { 2077 rdev->scratch.free[i] = true; 2078 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 2079 } 2080 } 2081 2082 /** 2083 * cik_ring_test - basic gfx ring test 2084 * 2085 * @rdev: radeon_device pointer 2086 * @ring: radeon_ring structure holding ring information 2087 * 2088 * Allocate a scratch register and write to it using the gfx ring (CIK). 2089 * Provides a basic gfx ring test to verify that the ring is working. 2090 * Used by cik_cp_gfx_resume(); 2091 * Returns 0 on success, error on failure. 2092 */ 2093 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 2094 { 2095 uint32_t scratch; 2096 uint32_t tmp = 0; 2097 unsigned i; 2098 int r; 2099 2100 r = radeon_scratch_get(rdev, &scratch); 2101 if (r) { 2102 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 2103 return r; 2104 } 2105 WREG32(scratch, 0xCAFEDEAD); 2106 r = radeon_ring_lock(rdev, ring, 3); 2107 if (r) { 2108 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 2109 radeon_scratch_free(rdev, scratch); 2110 return r; 2111 } 2112 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2113 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 2114 radeon_ring_write(ring, 0xDEADBEEF); 2115 radeon_ring_unlock_commit(rdev, ring); 2116 2117 for (i = 0; i < rdev->usec_timeout; i++) { 2118 tmp = RREG32(scratch); 2119 if (tmp == 0xDEADBEEF) 2120 break; 2121 DRM_UDELAY(1); 2122 } 2123 if (i < rdev->usec_timeout) { 2124 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 2125 } else { 2126 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 2127 ring->idx, scratch, tmp); 2128 r = -EINVAL; 2129 } 2130 radeon_scratch_free(rdev, scratch); 2131 return r; 2132 } 2133 2134 /** 2135 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 2136 * 2137 * @rdev: radeon_device pointer 2138 * @fence: radeon fence object 2139 * 2140 * Emits a fence sequnce number on the gfx ring and flushes 2141 * GPU caches. 2142 */ 2143 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 2144 struct radeon_fence *fence) 2145 { 2146 struct radeon_ring *ring = &rdev->ring[fence->ring]; 2147 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 2148 2149 /* EVENT_WRITE_EOP - flush caches, send int */ 2150 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 2151 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 2152 EOP_TC_ACTION_EN | 2153 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 2154 EVENT_INDEX(5))); 2155 radeon_ring_write(ring, addr & 0xfffffffc); 2156 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 2157 radeon_ring_write(ring, fence->seq); 2158 radeon_ring_write(ring, 0); 2159 /* HDP flush */ 2160 /* We should be using the new WAIT_REG_MEM special op packet here 2161 * but it causes the CP to hang 2162 */ 2163 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2164 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2165 WRITE_DATA_DST_SEL(0))); 2166 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 2167 radeon_ring_write(ring, 0); 2168 radeon_ring_write(ring, 0); 2169 } 2170 2171 /** 2172 * cik_fence_compute_ring_emit - emit a fence on the compute ring 2173 * 2174 * @rdev: radeon_device pointer 2175 * @fence: radeon fence object 2176 * 2177 * Emits a fence sequnce number on the compute ring and flushes 2178 * GPU caches. 2179 */ 2180 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 2181 struct radeon_fence *fence) 2182 { 2183 struct radeon_ring *ring = &rdev->ring[fence->ring]; 2184 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 2185 2186 /* RELEASE_MEM - flush caches, send int */ 2187 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 2188 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 2189 EOP_TC_ACTION_EN | 2190 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 2191 EVENT_INDEX(5))); 2192 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 2193 radeon_ring_write(ring, addr & 0xfffffffc); 2194 radeon_ring_write(ring, upper_32_bits(addr)); 2195 radeon_ring_write(ring, fence->seq); 2196 radeon_ring_write(ring, 0); 2197 /* HDP flush */ 2198 /* We should be using the new WAIT_REG_MEM special op packet here 2199 * but it causes the CP to hang 2200 */ 2201 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2202 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2203 WRITE_DATA_DST_SEL(0))); 2204 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 2205 radeon_ring_write(ring, 0); 2206 radeon_ring_write(ring, 0); 2207 } 2208 2209 void cik_semaphore_ring_emit(struct radeon_device *rdev, 2210 struct radeon_ring *ring, 2211 struct radeon_semaphore *semaphore, 2212 bool emit_wait) 2213 { 2214 uint64_t addr = semaphore->gpu_addr; 2215 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 2216 2217 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 2218 radeon_ring_write(ring, addr & 0xffffffff); 2219 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 2220 } 2221 2222 /* 2223 * IB stuff 2224 */ 2225 /** 2226 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 2227 * 2228 * @rdev: radeon_device pointer 2229 * @ib: radeon indirect buffer object 2230 * 2231 * Emits an DE (drawing engine) or CE (constant engine) IB 2232 * on the gfx ring. IBs are usually generated by userspace 2233 * acceleration drivers and submitted to the kernel for 2234 * sheduling on the ring. This function schedules the IB 2235 * on the gfx ring for execution by the GPU. 2236 */ 2237 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 2238 { 2239 struct radeon_ring *ring = &rdev->ring[ib->ring]; 2240 u32 header, control = INDIRECT_BUFFER_VALID; 2241 2242 if (ib->is_const_ib) { 2243 /* set switch buffer packet before const IB */ 2244 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 2245 radeon_ring_write(ring, 0); 2246 2247 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 2248 } else { 2249 u32 next_rptr; 2250 if (ring->rptr_save_reg) { 2251 next_rptr = ring->wptr + 3 + 4; 2252 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2253 radeon_ring_write(ring, ((ring->rptr_save_reg - 2254 PACKET3_SET_UCONFIG_REG_START) >> 2)); 2255 radeon_ring_write(ring, next_rptr); 2256 } else if (rdev->wb.enabled) { 2257 next_rptr = ring->wptr + 5 + 4; 2258 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2259 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 2260 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 2261 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 2262 radeon_ring_write(ring, next_rptr); 2263 } 2264 2265 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 2266 } 2267 2268 control |= ib->length_dw | 2269 (ib->vm ? (ib->vm->id << 24) : 0); 2270 2271 radeon_ring_write(ring, header); 2272 radeon_ring_write(ring, 2273 #ifdef __BIG_ENDIAN 2274 (2 << 0) | 2275 #endif 2276 (ib->gpu_addr & 0xFFFFFFFC)); 2277 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 2278 radeon_ring_write(ring, control); 2279 } 2280 2281 /** 2282 * cik_ib_test - basic gfx ring IB test 2283 * 2284 * @rdev: radeon_device pointer 2285 * @ring: radeon_ring structure holding ring information 2286 * 2287 * Allocate an IB and execute it on the gfx ring (CIK). 2288 * Provides a basic gfx ring test to verify that IBs are working. 2289 * Returns 0 on success, error on failure. 2290 */ 2291 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 2292 { 2293 struct radeon_ib ib; 2294 uint32_t scratch; 2295 uint32_t tmp = 0; 2296 unsigned i; 2297 int r; 2298 2299 r = radeon_scratch_get(rdev, &scratch); 2300 if (r) { 2301 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 2302 return r; 2303 } 2304 WREG32(scratch, 0xCAFEDEAD); 2305 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 2306 if (r) { 2307 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 2308 return r; 2309 } 2310 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2311 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 2312 ib.ptr[2] = 0xDEADBEEF; 2313 ib.length_dw = 3; 2314 r = radeon_ib_schedule(rdev, &ib, NULL); 2315 if (r) { 2316 radeon_scratch_free(rdev, scratch); 2317 radeon_ib_free(rdev, &ib); 2318 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 2319 return r; 2320 } 2321 r = radeon_fence_wait(ib.fence, false); 2322 if (r) { 2323 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 2324 return r; 2325 } 2326 for (i = 0; i < rdev->usec_timeout; i++) { 2327 tmp = RREG32(scratch); 2328 if (tmp == 0xDEADBEEF) 2329 break; 2330 DRM_UDELAY(1); 2331 } 2332 if (i < rdev->usec_timeout) { 2333 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 2334 } else { 2335 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 2336 scratch, tmp); 2337 r = -EINVAL; 2338 } 2339 radeon_scratch_free(rdev, scratch); 2340 radeon_ib_free(rdev, &ib); 2341 return r; 2342 } 2343 2344 /* 2345 * CP. 2346 * On CIK, gfx and compute now have independant command processors. 2347 * 2348 * GFX 2349 * Gfx consists of a single ring and can process both gfx jobs and 2350 * compute jobs. The gfx CP consists of three microengines (ME): 2351 * PFP - Pre-Fetch Parser 2352 * ME - Micro Engine 2353 * CE - Constant Engine 2354 * The PFP and ME make up what is considered the Drawing Engine (DE). 2355 * The CE is an asynchronous engine used for updating buffer desciptors 2356 * used by the DE so that they can be loaded into cache in parallel 2357 * while the DE is processing state update packets. 2358 * 2359 * Compute 2360 * The compute CP consists of two microengines (ME): 2361 * MEC1 - Compute MicroEngine 1 2362 * MEC2 - Compute MicroEngine 2 2363 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 2364 * The queues are exposed to userspace and are programmed directly 2365 * by the compute runtime. 2366 */ 2367 /** 2368 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 2369 * 2370 * @rdev: radeon_device pointer 2371 * @enable: enable or disable the MEs 2372 * 2373 * Halts or unhalts the gfx MEs. 2374 */ 2375 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 2376 { 2377 if (enable) 2378 WREG32(CP_ME_CNTL, 0); 2379 else { 2380 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 2381 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 2382 } 2383 udelay(50); 2384 } 2385 2386 /** 2387 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 2388 * 2389 * @rdev: radeon_device pointer 2390 * 2391 * Loads the gfx PFP, ME, and CE ucode. 2392 * Returns 0 for success, -EINVAL if the ucode is not available. 2393 */ 2394 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 2395 { 2396 const __be32 *fw_data; 2397 int i; 2398 2399 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 2400 return -EINVAL; 2401 2402 cik_cp_gfx_enable(rdev, false); 2403 2404 /* PFP */ 2405 fw_data = (const __be32 *)rdev->pfp_fw->data; 2406 WREG32(CP_PFP_UCODE_ADDR, 0); 2407 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 2408 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 2409 WREG32(CP_PFP_UCODE_ADDR, 0); 2410 2411 /* CE */ 2412 fw_data = (const __be32 *)rdev->ce_fw->data; 2413 WREG32(CP_CE_UCODE_ADDR, 0); 2414 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 2415 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 2416 WREG32(CP_CE_UCODE_ADDR, 0); 2417 2418 /* ME */ 2419 fw_data = (const __be32 *)rdev->me_fw->data; 2420 WREG32(CP_ME_RAM_WADDR, 0); 2421 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 2422 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 2423 WREG32(CP_ME_RAM_WADDR, 0); 2424 2425 WREG32(CP_PFP_UCODE_ADDR, 0); 2426 WREG32(CP_CE_UCODE_ADDR, 0); 2427 WREG32(CP_ME_RAM_WADDR, 0); 2428 WREG32(CP_ME_RAM_RADDR, 0); 2429 return 0; 2430 } 2431 2432 /** 2433 * cik_cp_gfx_start - start the gfx ring 2434 * 2435 * @rdev: radeon_device pointer 2436 * 2437 * Enables the ring and loads the clear state context and other 2438 * packets required to init the ring. 2439 * Returns 0 for success, error for failure. 2440 */ 2441 static int cik_cp_gfx_start(struct radeon_device *rdev) 2442 { 2443 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2444 int r, i; 2445 2446 /* init the CP */ 2447 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 2448 WREG32(CP_ENDIAN_SWAP, 0); 2449 WREG32(CP_DEVICE_ID, 1); 2450 2451 cik_cp_gfx_enable(rdev, true); 2452 2453 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 2454 if (r) { 2455 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 2456 return r; 2457 } 2458 2459 /* init the CE partitions. CE only used for gfx on CIK */ 2460 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2461 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2462 radeon_ring_write(ring, 0xc000); 2463 radeon_ring_write(ring, 0xc000); 2464 2465 /* setup clear context state */ 2466 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2467 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2468 2469 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2470 radeon_ring_write(ring, 0x80000000); 2471 radeon_ring_write(ring, 0x80000000); 2472 2473 for (i = 0; i < cik_default_size; i++) 2474 radeon_ring_write(ring, cik_default_state[i]); 2475 2476 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2477 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2478 2479 /* set clear context state */ 2480 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2481 radeon_ring_write(ring, 0); 2482 2483 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 2484 radeon_ring_write(ring, 0x00000316); 2485 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 2486 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 2487 2488 radeon_ring_unlock_commit(rdev, ring); 2489 2490 return 0; 2491 } 2492 2493 /** 2494 * cik_cp_gfx_fini - stop the gfx ring 2495 * 2496 * @rdev: radeon_device pointer 2497 * 2498 * Stop the gfx ring and tear down the driver ring 2499 * info. 2500 */ 2501 static void cik_cp_gfx_fini(struct radeon_device *rdev) 2502 { 2503 cik_cp_gfx_enable(rdev, false); 2504 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 2505 } 2506 2507 /** 2508 * cik_cp_gfx_resume - setup the gfx ring buffer registers 2509 * 2510 * @rdev: radeon_device pointer 2511 * 2512 * Program the location and size of the gfx ring buffer 2513 * and test it to make sure it's working. 2514 * Returns 0 for success, error for failure. 2515 */ 2516 static int cik_cp_gfx_resume(struct radeon_device *rdev) 2517 { 2518 struct radeon_ring *ring; 2519 u32 tmp; 2520 u32 rb_bufsz; 2521 u64 rb_addr; 2522 int r; 2523 2524 WREG32(CP_SEM_WAIT_TIMER, 0x0); 2525 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 2526 2527 /* Set the write pointer delay */ 2528 WREG32(CP_RB_WPTR_DELAY, 0); 2529 2530 /* set the RB to use vmid 0 */ 2531 WREG32(CP_RB_VMID, 0); 2532 2533 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 2534 2535 /* ring 0 - compute and gfx */ 2536 /* Set ring buffer size */ 2537 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2538 rb_bufsz = drm_order(ring->ring_size / 8); 2539 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 2540 #ifdef __BIG_ENDIAN 2541 tmp |= BUF_SWAP_32BIT; 2542 #endif 2543 WREG32(CP_RB0_CNTL, tmp); 2544 2545 /* Initialize the ring buffer's read and write pointers */ 2546 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 2547 ring->wptr = 0; 2548 WREG32(CP_RB0_WPTR, ring->wptr); 2549 2550 /* set the wb address wether it's enabled or not */ 2551 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 2552 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 2553 2554 /* scratch register shadowing is no longer supported */ 2555 WREG32(SCRATCH_UMSK, 0); 2556 2557 if (!rdev->wb.enabled) 2558 tmp |= RB_NO_UPDATE; 2559 2560 mdelay(1); 2561 WREG32(CP_RB0_CNTL, tmp); 2562 2563 rb_addr = ring->gpu_addr >> 8; 2564 WREG32(CP_RB0_BASE, rb_addr); 2565 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2566 2567 ring->rptr = RREG32(CP_RB0_RPTR); 2568 2569 /* start the ring */ 2570 cik_cp_gfx_start(rdev); 2571 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 2572 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 2573 if (r) { 2574 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 2575 return r; 2576 } 2577 return 0; 2578 } 2579 2580 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, 2581 struct radeon_ring *ring) 2582 { 2583 u32 rptr; 2584 2585 2586 2587 if (rdev->wb.enabled) { 2588 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); 2589 } else { 2590 mutex_lock(&rdev->srbm_mutex); 2591 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 2592 rptr = RREG32(CP_HQD_PQ_RPTR); 2593 cik_srbm_select(rdev, 0, 0, 0, 0); 2594 mutex_unlock(&rdev->srbm_mutex); 2595 } 2596 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; 2597 2598 return rptr; 2599 } 2600 2601 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, 2602 struct radeon_ring *ring) 2603 { 2604 u32 wptr; 2605 2606 if (rdev->wb.enabled) { 2607 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); 2608 } else { 2609 mutex_lock(&rdev->srbm_mutex); 2610 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 2611 wptr = RREG32(CP_HQD_PQ_WPTR); 2612 cik_srbm_select(rdev, 0, 0, 0, 0); 2613 mutex_unlock(&rdev->srbm_mutex); 2614 } 2615 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; 2616 2617 return wptr; 2618 } 2619 2620 void cik_compute_ring_set_wptr(struct radeon_device *rdev, 2621 struct radeon_ring *ring) 2622 { 2623 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask; 2624 2625 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr); 2626 WDOORBELL32(ring->doorbell_offset, wptr); 2627 } 2628 2629 /** 2630 * cik_cp_compute_enable - enable/disable the compute CP MEs 2631 * 2632 * @rdev: radeon_device pointer 2633 * @enable: enable or disable the MEs 2634 * 2635 * Halts or unhalts the compute MEs. 2636 */ 2637 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 2638 { 2639 if (enable) 2640 WREG32(CP_MEC_CNTL, 0); 2641 else 2642 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 2643 udelay(50); 2644 } 2645 2646 /** 2647 * cik_cp_compute_load_microcode - load the compute CP ME ucode 2648 * 2649 * @rdev: radeon_device pointer 2650 * 2651 * Loads the compute MEC1&2 ucode. 2652 * Returns 0 for success, -EINVAL if the ucode is not available. 2653 */ 2654 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 2655 { 2656 const __be32 *fw_data; 2657 int i; 2658 2659 if (!rdev->mec_fw) 2660 return -EINVAL; 2661 2662 cik_cp_compute_enable(rdev, false); 2663 2664 /* MEC1 */ 2665 fw_data = (const __be32 *)rdev->mec_fw->data; 2666 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 2667 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 2668 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 2669 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 2670 2671 if (rdev->family == CHIP_KAVERI) { 2672 /* MEC2 */ 2673 fw_data = (const __be32 *)rdev->mec_fw->data; 2674 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 2675 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 2676 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 2677 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 2678 } 2679 2680 return 0; 2681 } 2682 2683 /** 2684 * cik_cp_compute_start - start the compute queues 2685 * 2686 * @rdev: radeon_device pointer 2687 * 2688 * Enable the compute queues. 2689 * Returns 0 for success, error for failure. 2690 */ 2691 static int cik_cp_compute_start(struct radeon_device *rdev) 2692 { 2693 cik_cp_compute_enable(rdev, true); 2694 2695 return 0; 2696 } 2697 2698 /** 2699 * cik_cp_compute_fini - stop the compute queues 2700 * 2701 * @rdev: radeon_device pointer 2702 * 2703 * Stop the compute queues and tear down the driver queue 2704 * info. 2705 */ 2706 static void cik_cp_compute_fini(struct radeon_device *rdev) 2707 { 2708 int i, idx, r; 2709 2710 cik_cp_compute_enable(rdev, false); 2711 2712 for (i = 0; i < 2; i++) { 2713 if (i == 0) 2714 idx = CAYMAN_RING_TYPE_CP1_INDEX; 2715 else 2716 idx = CAYMAN_RING_TYPE_CP2_INDEX; 2717 2718 if (rdev->ring[idx].mqd_obj) { 2719 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 2720 if (unlikely(r != 0)) 2721 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 2722 2723 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 2724 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 2725 2726 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 2727 rdev->ring[idx].mqd_obj = NULL; 2728 } 2729 } 2730 } 2731 2732 static void cik_mec_fini(struct radeon_device *rdev) 2733 { 2734 int r; 2735 2736 if (rdev->mec.hpd_eop_obj) { 2737 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 2738 if (unlikely(r != 0)) 2739 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 2740 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 2741 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 2742 2743 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 2744 rdev->mec.hpd_eop_obj = NULL; 2745 } 2746 } 2747 2748 #define MEC_HPD_SIZE 2048 2749 2750 static int cik_mec_init(struct radeon_device *rdev) 2751 { 2752 int r; 2753 u32 *hpd; 2754 2755 /* 2756 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 2757 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 2758 */ 2759 if (rdev->family == CHIP_KAVERI) 2760 rdev->mec.num_mec = 2; 2761 else 2762 rdev->mec.num_mec = 1; 2763 rdev->mec.num_pipe = 4; 2764 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 2765 2766 if (rdev->mec.hpd_eop_obj == NULL) { 2767 r = radeon_bo_create(rdev, 2768 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 2769 PAGE_SIZE, true, 2770 RADEON_GEM_DOMAIN_GTT, NULL, 2771 &rdev->mec.hpd_eop_obj); 2772 if (r) { 2773 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 2774 return r; 2775 } 2776 } 2777 2778 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 2779 if (unlikely(r != 0)) { 2780 cik_mec_fini(rdev); 2781 return r; 2782 } 2783 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 2784 &rdev->mec.hpd_eop_gpu_addr); 2785 if (r) { 2786 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 2787 cik_mec_fini(rdev); 2788 return r; 2789 } 2790 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 2791 if (r) { 2792 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 2793 cik_mec_fini(rdev); 2794 return r; 2795 } 2796 2797 /* clear memory. Not sure if this is required or not */ 2798 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 2799 2800 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 2801 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 2802 2803 return 0; 2804 } 2805 2806 struct hqd_registers 2807 { 2808 u32 cp_mqd_base_addr; 2809 u32 cp_mqd_base_addr_hi; 2810 u32 cp_hqd_active; 2811 u32 cp_hqd_vmid; 2812 u32 cp_hqd_persistent_state; 2813 u32 cp_hqd_pipe_priority; 2814 u32 cp_hqd_queue_priority; 2815 u32 cp_hqd_quantum; 2816 u32 cp_hqd_pq_base; 2817 u32 cp_hqd_pq_base_hi; 2818 u32 cp_hqd_pq_rptr; 2819 u32 cp_hqd_pq_rptr_report_addr; 2820 u32 cp_hqd_pq_rptr_report_addr_hi; 2821 u32 cp_hqd_pq_wptr_poll_addr; 2822 u32 cp_hqd_pq_wptr_poll_addr_hi; 2823 u32 cp_hqd_pq_doorbell_control; 2824 u32 cp_hqd_pq_wptr; 2825 u32 cp_hqd_pq_control; 2826 u32 cp_hqd_ib_base_addr; 2827 u32 cp_hqd_ib_base_addr_hi; 2828 u32 cp_hqd_ib_rptr; 2829 u32 cp_hqd_ib_control; 2830 u32 cp_hqd_iq_timer; 2831 u32 cp_hqd_iq_rptr; 2832 u32 cp_hqd_dequeue_request; 2833 u32 cp_hqd_dma_offload; 2834 u32 cp_hqd_sema_cmd; 2835 u32 cp_hqd_msg_type; 2836 u32 cp_hqd_atomic0_preop_lo; 2837 u32 cp_hqd_atomic0_preop_hi; 2838 u32 cp_hqd_atomic1_preop_lo; 2839 u32 cp_hqd_atomic1_preop_hi; 2840 u32 cp_hqd_hq_scheduler0; 2841 u32 cp_hqd_hq_scheduler1; 2842 u32 cp_mqd_control; 2843 }; 2844 2845 struct bonaire_mqd 2846 { 2847 u32 header; 2848 u32 dispatch_initiator; 2849 u32 dimensions[3]; 2850 u32 start_idx[3]; 2851 u32 num_threads[3]; 2852 u32 pipeline_stat_enable; 2853 u32 perf_counter_enable; 2854 u32 pgm[2]; 2855 u32 tba[2]; 2856 u32 tma[2]; 2857 u32 pgm_rsrc[2]; 2858 u32 vmid; 2859 u32 resource_limits; 2860 u32 static_thread_mgmt01[2]; 2861 u32 tmp_ring_size; 2862 u32 static_thread_mgmt23[2]; 2863 u32 restart[3]; 2864 u32 thread_trace_enable; 2865 u32 reserved1; 2866 u32 user_data[16]; 2867 u32 vgtcs_invoke_count[2]; 2868 struct hqd_registers queue_state; 2869 u32 dequeue_cntr; 2870 u32 interrupt_queue[64]; 2871 }; 2872 2873 /** 2874 * cik_cp_compute_resume - setup the compute queue registers 2875 * 2876 * @rdev: radeon_device pointer 2877 * 2878 * Program the compute queues and test them to make sure they 2879 * are working. 2880 * Returns 0 for success, error for failure. 2881 */ 2882 static int cik_cp_compute_resume(struct radeon_device *rdev) 2883 { 2884 int r, i, idx; 2885 u32 tmp; 2886 bool use_doorbell = true; 2887 u64 hqd_gpu_addr; 2888 u64 mqd_gpu_addr; 2889 u64 eop_gpu_addr; 2890 u64 wb_gpu_addr; 2891 u32 *buf; 2892 struct bonaire_mqd *mqd; 2893 2894 r = cik_cp_compute_start(rdev); 2895 if (r) 2896 return r; 2897 2898 /* fix up chicken bits */ 2899 tmp = RREG32(CP_CPF_DEBUG); 2900 tmp |= (1 << 23); 2901 WREG32(CP_CPF_DEBUG, tmp); 2902 2903 /* init the pipes */ 2904 mutex_lock(&rdev->srbm_mutex); 2905 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { 2906 int me = (i < 4) ? 1 : 2; 2907 int pipe = (i < 4) ? i : (i - 4); 2908 2909 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 2910 2911 cik_srbm_select(rdev, me, pipe, 0, 0); 2912 2913 /* write the EOP addr */ 2914 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 2915 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 2916 2917 /* set the VMID assigned */ 2918 WREG32(CP_HPD_EOP_VMID, 0); 2919 2920 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2921 tmp = RREG32(CP_HPD_EOP_CONTROL); 2922 tmp &= ~EOP_SIZE_MASK; 2923 tmp |= drm_order(MEC_HPD_SIZE / 8); 2924 WREG32(CP_HPD_EOP_CONTROL, tmp); 2925 } 2926 cik_srbm_select(rdev, 0, 0, 0, 0); 2927 mutex_unlock(&rdev->srbm_mutex); 2928 2929 /* init the queues. Just two for now. */ 2930 for (i = 0; i < 2; i++) { 2931 if (i == 0) 2932 idx = CAYMAN_RING_TYPE_CP1_INDEX; 2933 else 2934 idx = CAYMAN_RING_TYPE_CP2_INDEX; 2935 2936 if (rdev->ring[idx].mqd_obj == NULL) { 2937 r = radeon_bo_create(rdev, 2938 sizeof(struct bonaire_mqd), 2939 PAGE_SIZE, true, 2940 RADEON_GEM_DOMAIN_GTT, NULL, 2941 &rdev->ring[idx].mqd_obj); 2942 if (r) { 2943 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 2944 return r; 2945 } 2946 } 2947 2948 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 2949 if (unlikely(r != 0)) { 2950 cik_cp_compute_fini(rdev); 2951 return r; 2952 } 2953 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 2954 &mqd_gpu_addr); 2955 if (r) { 2956 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 2957 cik_cp_compute_fini(rdev); 2958 return r; 2959 } 2960 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 2961 if (r) { 2962 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 2963 cik_cp_compute_fini(rdev); 2964 return r; 2965 } 2966 2967 /* doorbell offset */ 2968 rdev->ring[idx].doorbell_offset = 2969 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; 2970 2971 /* init the mqd struct */ 2972 memset(buf, 0, sizeof(struct bonaire_mqd)); 2973 2974 mqd = (struct bonaire_mqd *)buf; 2975 mqd->header = 0xC0310800; 2976 mqd->static_thread_mgmt01[0] = 0xffffffff; 2977 mqd->static_thread_mgmt01[1] = 0xffffffff; 2978 mqd->static_thread_mgmt23[0] = 0xffffffff; 2979 mqd->static_thread_mgmt23[1] = 0xffffffff; 2980 2981 mutex_lock(&rdev->srbm_mutex); 2982 cik_srbm_select(rdev, rdev->ring[idx].me, 2983 rdev->ring[idx].pipe, 2984 rdev->ring[idx].queue, 0); 2985 2986 /* disable wptr polling */ 2987 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 2988 tmp &= ~WPTR_POLL_EN; 2989 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 2990 2991 /* enable doorbell? */ 2992 mqd->queue_state.cp_hqd_pq_doorbell_control = 2993 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 2994 if (use_doorbell) 2995 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 2996 else 2997 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 2998 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 2999 mqd->queue_state.cp_hqd_pq_doorbell_control); 3000 3001 /* disable the queue if it's active */ 3002 mqd->queue_state.cp_hqd_dequeue_request = 0; 3003 mqd->queue_state.cp_hqd_pq_rptr = 0; 3004 mqd->queue_state.cp_hqd_pq_wptr= 0; 3005 if (RREG32(CP_HQD_ACTIVE) & 1) { 3006 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 3007 for (i = 0; i < rdev->usec_timeout; i++) { 3008 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 3009 break; 3010 udelay(1); 3011 } 3012 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 3013 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 3014 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3015 } 3016 3017 /* set the pointer to the MQD */ 3018 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 3019 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3020 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 3021 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 3022 /* set MQD vmid to 0 */ 3023 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 3024 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 3025 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 3026 3027 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3028 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 3029 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 3030 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3031 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 3032 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 3033 3034 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3035 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 3036 mqd->queue_state.cp_hqd_pq_control &= 3037 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 3038 3039 mqd->queue_state.cp_hqd_pq_control |= 3040 drm_order(rdev->ring[idx].ring_size / 8); 3041 mqd->queue_state.cp_hqd_pq_control |= 3042 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8); 3043 #ifdef __BIG_ENDIAN 3044 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 3045 #endif 3046 mqd->queue_state.cp_hqd_pq_control &= 3047 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 3048 mqd->queue_state.cp_hqd_pq_control |= 3049 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 3050 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 3051 3052 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 3053 if (i == 0) 3054 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 3055 else 3056 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 3057 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 3058 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3059 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 3060 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 3061 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 3062 3063 /* set the wb address wether it's enabled or not */ 3064 if (i == 0) 3065 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 3066 else 3067 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 3068 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 3069 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 3070 upper_32_bits(wb_gpu_addr) & 0xffff; 3071 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 3072 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 3073 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3074 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 3075 3076 /* enable the doorbell if requested */ 3077 if (use_doorbell) { 3078 mqd->queue_state.cp_hqd_pq_doorbell_control = 3079 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3080 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 3081 mqd->queue_state.cp_hqd_pq_doorbell_control |= 3082 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); 3083 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3084 mqd->queue_state.cp_hqd_pq_doorbell_control &= 3085 ~(DOORBELL_SOURCE | DOORBELL_HIT); 3086 3087 } else { 3088 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 3089 } 3090 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3091 mqd->queue_state.cp_hqd_pq_doorbell_control); 3092 3093 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3094 rdev->ring[idx].wptr = 0; 3095 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 3096 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3097 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); 3098 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; 3099 3100 /* set the vmid for the queue */ 3101 mqd->queue_state.cp_hqd_vmid = 0; 3102 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 3103 3104 /* activate the queue */ 3105 mqd->queue_state.cp_hqd_active = 1; 3106 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 3107 3108 cik_srbm_select(rdev, 0, 0, 0, 0); 3109 mutex_unlock(&rdev->srbm_mutex); 3110 3111 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 3112 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3113 3114 rdev->ring[idx].ready = true; 3115 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 3116 if (r) 3117 rdev->ring[idx].ready = false; 3118 } 3119 3120 return 0; 3121 } 3122 3123 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 3124 { 3125 cik_cp_gfx_enable(rdev, enable); 3126 cik_cp_compute_enable(rdev, enable); 3127 } 3128 3129 static int cik_cp_load_microcode(struct radeon_device *rdev) 3130 { 3131 int r; 3132 3133 r = cik_cp_gfx_load_microcode(rdev); 3134 if (r) 3135 return r; 3136 r = cik_cp_compute_load_microcode(rdev); 3137 if (r) 3138 return r; 3139 3140 return 0; 3141 } 3142 3143 static void cik_cp_fini(struct radeon_device *rdev) 3144 { 3145 cik_cp_gfx_fini(rdev); 3146 cik_cp_compute_fini(rdev); 3147 } 3148 3149 static int cik_cp_resume(struct radeon_device *rdev) 3150 { 3151 int r; 3152 3153 /* Reset all cp blocks */ 3154 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); 3155 RREG32(GRBM_SOFT_RESET); 3156 mdelay(15); 3157 WREG32(GRBM_SOFT_RESET, 0); 3158 RREG32(GRBM_SOFT_RESET); 3159 3160 r = cik_cp_load_microcode(rdev); 3161 if (r) 3162 return r; 3163 3164 r = cik_cp_gfx_resume(rdev); 3165 if (r) 3166 return r; 3167 r = cik_cp_compute_resume(rdev); 3168 if (r) 3169 return r; 3170 3171 return 0; 3172 } 3173 3174 /* 3175 * sDMA - System DMA 3176 * Starting with CIK, the GPU has new asynchronous 3177 * DMA engines. These engines are used for compute 3178 * and gfx. There are two DMA engines (SDMA0, SDMA1) 3179 * and each one supports 1 ring buffer used for gfx 3180 * and 2 queues used for compute. 3181 * 3182 * The programming model is very similar to the CP 3183 * (ring buffer, IBs, etc.), but sDMA has it's own 3184 * packet format that is different from the PM4 format 3185 * used by the CP. sDMA supports copying data, writing 3186 * embedded data, solid fills, and a number of other 3187 * things. It also has support for tiling/detiling of 3188 * buffers. 3189 */ 3190 /** 3191 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine 3192 * 3193 * @rdev: radeon_device pointer 3194 * @ib: IB object to schedule 3195 * 3196 * Schedule an IB in the DMA ring (CIK). 3197 */ 3198 void cik_sdma_ring_ib_execute(struct radeon_device *rdev, 3199 struct radeon_ib *ib) 3200 { 3201 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3202 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; 3203 3204 if (rdev->wb.enabled) { 3205 u32 next_rptr = ring->wptr + 5; 3206 while ((next_rptr & 7) != 4) 3207 next_rptr++; 3208 next_rptr += 4; 3209 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 3210 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3211 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 3212 radeon_ring_write(ring, 1); /* number of DWs to follow */ 3213 radeon_ring_write(ring, next_rptr); 3214 } 3215 3216 /* IB packet must end on a 8 DW boundary */ 3217 while ((ring->wptr & 7) != 4) 3218 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 3219 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); 3220 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ 3221 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); 3222 radeon_ring_write(ring, ib->length_dw); 3223 3224 } 3225 3226 /** 3227 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring 3228 * 3229 * @rdev: radeon_device pointer 3230 * @fence: radeon fence object 3231 * 3232 * Add a DMA fence packet to the ring to write 3233 * the fence seq number and DMA trap packet to generate 3234 * an interrupt if needed (CIK). 3235 */ 3236 void cik_sdma_fence_ring_emit(struct radeon_device *rdev, 3237 struct radeon_fence *fence) 3238 { 3239 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3240 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3241 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | 3242 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 3243 u32 ref_and_mask; 3244 3245 if (fence->ring == R600_RING_TYPE_DMA_INDEX) 3246 ref_and_mask = SDMA0; 3247 else 3248 ref_and_mask = SDMA1; 3249 3250 /* write the fence */ 3251 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); 3252 radeon_ring_write(ring, addr & 0xffffffff); 3253 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 3254 radeon_ring_write(ring, fence->seq); 3255 /* generate an interrupt */ 3256 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); 3257 /* flush HDP */ 3258 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 3259 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); 3260 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); 3261 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ 3262 radeon_ring_write(ring, ref_and_mask); /* MASK */ 3263 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ 3264 } 3265 3266 /** 3267 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring 3268 * 3269 * @rdev: radeon_device pointer 3270 * @ring: radeon_ring structure holding ring information 3271 * @semaphore: radeon semaphore object 3272 * @emit_wait: wait or signal semaphore 3273 * 3274 * Add a DMA semaphore packet to the ring wait on or signal 3275 * other rings (CIK). 3276 */ 3277 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, 3278 struct radeon_ring *ring, 3279 struct radeon_semaphore *semaphore, 3280 bool emit_wait) 3281 { 3282 u64 addr = semaphore->gpu_addr; 3283 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; 3284 3285 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); 3286 radeon_ring_write(ring, addr & 0xfffffff8); 3287 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 3288 } 3289 3290 /** 3291 * cik_sdma_gfx_stop - stop the gfx async dma engines 3292 * 3293 * @rdev: radeon_device pointer 3294 * 3295 * Stop the gfx async dma ring buffers (CIK). 3296 */ 3297 static void cik_sdma_gfx_stop(struct radeon_device *rdev) 3298 { 3299 u32 rb_cntl, reg_offset; 3300 int i; 3301 3302 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 3303 3304 for (i = 0; i < 2; i++) { 3305 if (i == 0) 3306 reg_offset = SDMA0_REGISTER_OFFSET; 3307 else 3308 reg_offset = SDMA1_REGISTER_OFFSET; 3309 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); 3310 rb_cntl &= ~SDMA_RB_ENABLE; 3311 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 3312 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); 3313 } 3314 } 3315 3316 /** 3317 * cik_sdma_rlc_stop - stop the compute async dma engines 3318 * 3319 * @rdev: radeon_device pointer 3320 * 3321 * Stop the compute async dma queues (CIK). 3322 */ 3323 static void cik_sdma_rlc_stop(struct radeon_device *rdev) 3324 { 3325 /* XXX todo */ 3326 } 3327 3328 /** 3329 * cik_sdma_enable - stop the async dma engines 3330 * 3331 * @rdev: radeon_device pointer 3332 * @enable: enable/disable the DMA MEs. 3333 * 3334 * Halt or unhalt the async dma engines (CIK). 3335 */ 3336 static void cik_sdma_enable(struct radeon_device *rdev, bool enable) 3337 { 3338 u32 me_cntl, reg_offset; 3339 int i; 3340 3341 for (i = 0; i < 2; i++) { 3342 if (i == 0) 3343 reg_offset = SDMA0_REGISTER_OFFSET; 3344 else 3345 reg_offset = SDMA1_REGISTER_OFFSET; 3346 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); 3347 if (enable) 3348 me_cntl &= ~SDMA_HALT; 3349 else 3350 me_cntl |= SDMA_HALT; 3351 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); 3352 } 3353 } 3354 3355 /** 3356 * cik_sdma_gfx_resume - setup and start the async dma engines 3357 * 3358 * @rdev: radeon_device pointer 3359 * 3360 * Set up the gfx DMA ring buffers and enable them (CIK). 3361 * Returns 0 for success, error for failure. 3362 */ 3363 static int cik_sdma_gfx_resume(struct radeon_device *rdev) 3364 { 3365 struct radeon_ring *ring; 3366 u32 rb_cntl, ib_cntl; 3367 u32 rb_bufsz; 3368 u32 reg_offset, wb_offset; 3369 int i, r; 3370 3371 for (i = 0; i < 2; i++) { 3372 if (i == 0) { 3373 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 3374 reg_offset = SDMA0_REGISTER_OFFSET; 3375 wb_offset = R600_WB_DMA_RPTR_OFFSET; 3376 } else { 3377 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 3378 reg_offset = SDMA1_REGISTER_OFFSET; 3379 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 3380 } 3381 3382 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 3383 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 3384 3385 /* Set ring buffer size in dwords */ 3386 rb_bufsz = drm_order(ring->ring_size / 4); 3387 rb_cntl = rb_bufsz << 1; 3388 #ifdef __BIG_ENDIAN 3389 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; 3390 #endif 3391 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 3392 3393 /* Initialize the ring buffer's read and write pointers */ 3394 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); 3395 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); 3396 3397 /* set the wb address whether it's enabled or not */ 3398 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, 3399 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 3400 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, 3401 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 3402 3403 if (rdev->wb.enabled) 3404 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; 3405 3406 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); 3407 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); 3408 3409 ring->wptr = 0; 3410 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); 3411 3412 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; 3413 3414 /* enable DMA RB */ 3415 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); 3416 3417 ib_cntl = SDMA_IB_ENABLE; 3418 #ifdef __BIG_ENDIAN 3419 ib_cntl |= SDMA_IB_SWAP_ENABLE; 3420 #endif 3421 /* enable DMA IBs */ 3422 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); 3423 3424 ring->ready = true; 3425 3426 r = radeon_ring_test(rdev, ring->idx, ring); 3427 if (r) { 3428 ring->ready = false; 3429 return r; 3430 } 3431 } 3432 3433 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 3434 3435 return 0; 3436 } 3437 3438 /** 3439 * cik_sdma_rlc_resume - setup and start the async dma engines 3440 * 3441 * @rdev: radeon_device pointer 3442 * 3443 * Set up the compute DMA queues and enable them (CIK). 3444 * Returns 0 for success, error for failure. 3445 */ 3446 static int cik_sdma_rlc_resume(struct radeon_device *rdev) 3447 { 3448 /* XXX todo */ 3449 return 0; 3450 } 3451 3452 /** 3453 * cik_sdma_load_microcode - load the sDMA ME ucode 3454 * 3455 * @rdev: radeon_device pointer 3456 * 3457 * Loads the sDMA0/1 ucode. 3458 * Returns 0 for success, -EINVAL if the ucode is not available. 3459 */ 3460 static int cik_sdma_load_microcode(struct radeon_device *rdev) 3461 { 3462 const __be32 *fw_data; 3463 int i; 3464 3465 if (!rdev->sdma_fw) 3466 return -EINVAL; 3467 3468 /* stop the gfx rings and rlc compute queues */ 3469 cik_sdma_gfx_stop(rdev); 3470 cik_sdma_rlc_stop(rdev); 3471 3472 /* halt the MEs */ 3473 cik_sdma_enable(rdev, false); 3474 3475 /* sdma0 */ 3476 fw_data = (const __be32 *)rdev->sdma_fw->data; 3477 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 3478 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 3479 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 3480 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 3481 3482 /* sdma1 */ 3483 fw_data = (const __be32 *)rdev->sdma_fw->data; 3484 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 3485 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 3486 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 3487 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 3488 3489 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 3490 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 3491 return 0; 3492 } 3493 3494 /** 3495 * cik_sdma_resume - setup and start the async dma engines 3496 * 3497 * @rdev: radeon_device pointer 3498 * 3499 * Set up the DMA engines and enable them (CIK). 3500 * Returns 0 for success, error for failure. 3501 */ 3502 static int cik_sdma_resume(struct radeon_device *rdev) 3503 { 3504 int r; 3505 3506 /* Reset dma */ 3507 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); 3508 RREG32(SRBM_SOFT_RESET); 3509 udelay(50); 3510 WREG32(SRBM_SOFT_RESET, 0); 3511 RREG32(SRBM_SOFT_RESET); 3512 3513 r = cik_sdma_load_microcode(rdev); 3514 if (r) 3515 return r; 3516 3517 /* unhalt the MEs */ 3518 cik_sdma_enable(rdev, true); 3519 3520 /* start the gfx rings and rlc compute queues */ 3521 r = cik_sdma_gfx_resume(rdev); 3522 if (r) 3523 return r; 3524 r = cik_sdma_rlc_resume(rdev); 3525 if (r) 3526 return r; 3527 3528 return 0; 3529 } 3530 3531 /** 3532 * cik_sdma_fini - tear down the async dma engines 3533 * 3534 * @rdev: radeon_device pointer 3535 * 3536 * Stop the async dma engines and free the rings (CIK). 3537 */ 3538 static void cik_sdma_fini(struct radeon_device *rdev) 3539 { 3540 /* stop the gfx rings and rlc compute queues */ 3541 cik_sdma_gfx_stop(rdev); 3542 cik_sdma_rlc_stop(rdev); 3543 /* halt the MEs */ 3544 cik_sdma_enable(rdev, false); 3545 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 3546 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 3547 /* XXX - compute dma queue tear down */ 3548 } 3549 3550 /** 3551 * cik_copy_dma - copy pages using the DMA engine 3552 * 3553 * @rdev: radeon_device pointer 3554 * @src_offset: src GPU address 3555 * @dst_offset: dst GPU address 3556 * @num_gpu_pages: number of GPU pages to xfer 3557 * @fence: radeon fence object 3558 * 3559 * Copy GPU paging using the DMA engine (CIK). 3560 * Used by the radeon ttm implementation to move pages if 3561 * registered as the asic copy callback. 3562 */ 3563 int cik_copy_dma(struct radeon_device *rdev, 3564 uint64_t src_offset, uint64_t dst_offset, 3565 unsigned num_gpu_pages, 3566 struct radeon_fence **fence) 3567 { 3568 struct radeon_semaphore *sem = NULL; 3569 int ring_index = rdev->asic->copy.dma_ring_index; 3570 struct radeon_ring *ring = &rdev->ring[ring_index]; 3571 u32 size_in_bytes, cur_size_in_bytes; 3572 int i, num_loops; 3573 int r = 0; 3574 3575 r = radeon_semaphore_create(rdev, &sem); 3576 if (r) { 3577 DRM_ERROR("radeon: moving bo (%d).\n", r); 3578 return r; 3579 } 3580 3581 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); 3582 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); 3583 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); 3584 if (r) { 3585 DRM_ERROR("radeon: moving bo (%d).\n", r); 3586 radeon_semaphore_free(rdev, &sem, NULL); 3587 return r; 3588 } 3589 3590 if (radeon_fence_need_sync(*fence, ring->idx)) { 3591 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 3592 ring->idx); 3593 radeon_fence_note_sync(*fence, ring->idx); 3594 } else { 3595 radeon_semaphore_free(rdev, &sem, NULL); 3596 } 3597 3598 for (i = 0; i < num_loops; i++) { 3599 cur_size_in_bytes = size_in_bytes; 3600 if (cur_size_in_bytes > 0x1fffff) 3601 cur_size_in_bytes = 0x1fffff; 3602 size_in_bytes -= cur_size_in_bytes; 3603 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); 3604 radeon_ring_write(ring, cur_size_in_bytes); 3605 radeon_ring_write(ring, 0); /* src/dst endian swap */ 3606 radeon_ring_write(ring, src_offset & 0xffffffff); 3607 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); 3608 radeon_ring_write(ring, dst_offset & 0xfffffffc); 3609 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); 3610 src_offset += cur_size_in_bytes; 3611 dst_offset += cur_size_in_bytes; 3612 } 3613 3614 r = radeon_fence_emit(rdev, fence, ring->idx); 3615 if (r) { 3616 radeon_ring_unlock_undo(rdev, ring); 3617 return r; 3618 } 3619 3620 radeon_ring_unlock_commit(rdev, ring); 3621 radeon_semaphore_free(rdev, &sem, *fence); 3622 3623 return r; 3624 } 3625 3626 /** 3627 * cik_sdma_ring_test - simple async dma engine test 3628 * 3629 * @rdev: radeon_device pointer 3630 * @ring: radeon_ring structure holding ring information 3631 * 3632 * Test the DMA engine by writing using it to write an 3633 * value to memory. (CIK). 3634 * Returns 0 for success, error for failure. 3635 */ 3636 int cik_sdma_ring_test(struct radeon_device *rdev, 3637 struct radeon_ring *ring) 3638 { 3639 unsigned i; 3640 int r; 3641 void __iomem *ptr = (void *)rdev->vram_scratch.ptr; 3642 u32 tmp; 3643 3644 if (!ptr) { 3645 DRM_ERROR("invalid vram scratch pointer\n"); 3646 return -EINVAL; 3647 } 3648 3649 tmp = 0xCAFEDEAD; 3650 writel(tmp, ptr); 3651 3652 r = radeon_ring_lock(rdev, ring, 4); 3653 if (r) { 3654 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); 3655 return r; 3656 } 3657 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 3658 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); 3659 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); 3660 radeon_ring_write(ring, 1); /* number of DWs to follow */ 3661 radeon_ring_write(ring, 0xDEADBEEF); 3662 radeon_ring_unlock_commit(rdev, ring); 3663 3664 for (i = 0; i < rdev->usec_timeout; i++) { 3665 tmp = readl(ptr); 3666 if (tmp == 0xDEADBEEF) 3667 break; 3668 DRM_UDELAY(1); 3669 } 3670 3671 if (i < rdev->usec_timeout) { 3672 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 3673 } else { 3674 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", 3675 ring->idx, tmp); 3676 r = -EINVAL; 3677 } 3678 return r; 3679 } 3680 3681 /** 3682 * cik_sdma_ib_test - test an IB on the DMA engine 3683 * 3684 * @rdev: radeon_device pointer 3685 * @ring: radeon_ring structure holding ring information 3686 * 3687 * Test a simple IB in the DMA ring (CIK). 3688 * Returns 0 on success, error on failure. 3689 */ 3690 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3691 { 3692 struct radeon_ib ib; 3693 unsigned i; 3694 int r; 3695 void __iomem *ptr = (void *)rdev->vram_scratch.ptr; 3696 u32 tmp = 0; 3697 3698 if (!ptr) { 3699 DRM_ERROR("invalid vram scratch pointer\n"); 3700 return -EINVAL; 3701 } 3702 3703 tmp = 0xCAFEDEAD; 3704 writel(tmp, ptr); 3705 3706 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3707 if (r) { 3708 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3709 return r; 3710 } 3711 3712 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 3713 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; 3714 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; 3715 ib.ptr[3] = 1; 3716 ib.ptr[4] = 0xDEADBEEF; 3717 ib.length_dw = 5; 3718 3719 r = radeon_ib_schedule(rdev, &ib, NULL); 3720 if (r) { 3721 radeon_ib_free(rdev, &ib); 3722 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3723 return r; 3724 } 3725 r = radeon_fence_wait(ib.fence, false); 3726 if (r) { 3727 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3728 return r; 3729 } 3730 for (i = 0; i < rdev->usec_timeout; i++) { 3731 tmp = readl(ptr); 3732 if (tmp == 0xDEADBEEF) 3733 break; 3734 DRM_UDELAY(1); 3735 } 3736 if (i < rdev->usec_timeout) { 3737 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3738 } else { 3739 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); 3740 r = -EINVAL; 3741 } 3742 radeon_ib_free(rdev, &ib); 3743 return r; 3744 } 3745 3746 3747 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 3748 { 3749 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 3750 RREG32(GRBM_STATUS)); 3751 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 3752 RREG32(GRBM_STATUS2)); 3753 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 3754 RREG32(GRBM_STATUS_SE0)); 3755 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 3756 RREG32(GRBM_STATUS_SE1)); 3757 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 3758 RREG32(GRBM_STATUS_SE2)); 3759 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 3760 RREG32(GRBM_STATUS_SE3)); 3761 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 3762 RREG32(SRBM_STATUS)); 3763 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 3764 RREG32(SRBM_STATUS2)); 3765 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 3766 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 3767 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 3768 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 3769 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 3770 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 3771 RREG32(CP_STALLED_STAT1)); 3772 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 3773 RREG32(CP_STALLED_STAT2)); 3774 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 3775 RREG32(CP_STALLED_STAT3)); 3776 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 3777 RREG32(CP_CPF_BUSY_STAT)); 3778 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 3779 RREG32(CP_CPF_STALLED_STAT1)); 3780 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 3781 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 3782 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 3783 RREG32(CP_CPC_STALLED_STAT1)); 3784 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 3785 } 3786 3787 /** 3788 * cik_gpu_check_soft_reset - check which blocks are busy 3789 * 3790 * @rdev: radeon_device pointer 3791 * 3792 * Check which blocks are busy and return the relevant reset 3793 * mask to be used by cik_gpu_soft_reset(). 3794 * Returns a mask of the blocks to be reset. 3795 */ 3796 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 3797 { 3798 u32 reset_mask = 0; 3799 u32 tmp; 3800 3801 /* GRBM_STATUS */ 3802 tmp = RREG32(GRBM_STATUS); 3803 if (tmp & (PA_BUSY | SC_BUSY | 3804 BCI_BUSY | SX_BUSY | 3805 TA_BUSY | VGT_BUSY | 3806 DB_BUSY | CB_BUSY | 3807 GDS_BUSY | SPI_BUSY | 3808 IA_BUSY | IA_BUSY_NO_DMA)) 3809 reset_mask |= RADEON_RESET_GFX; 3810 3811 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 3812 reset_mask |= RADEON_RESET_CP; 3813 3814 /* GRBM_STATUS2 */ 3815 tmp = RREG32(GRBM_STATUS2); 3816 if (tmp & RLC_BUSY) 3817 reset_mask |= RADEON_RESET_RLC; 3818 3819 /* SDMA0_STATUS_REG */ 3820 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 3821 if (!(tmp & SDMA_IDLE)) 3822 reset_mask |= RADEON_RESET_DMA; 3823 3824 /* SDMA1_STATUS_REG */ 3825 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 3826 if (!(tmp & SDMA_IDLE)) 3827 reset_mask |= RADEON_RESET_DMA1; 3828 3829 /* SRBM_STATUS2 */ 3830 tmp = RREG32(SRBM_STATUS2); 3831 if (tmp & SDMA_BUSY) 3832 reset_mask |= RADEON_RESET_DMA; 3833 3834 if (tmp & SDMA1_BUSY) 3835 reset_mask |= RADEON_RESET_DMA1; 3836 3837 /* SRBM_STATUS */ 3838 tmp = RREG32(SRBM_STATUS); 3839 3840 if (tmp & IH_BUSY) 3841 reset_mask |= RADEON_RESET_IH; 3842 3843 if (tmp & SEM_BUSY) 3844 reset_mask |= RADEON_RESET_SEM; 3845 3846 if (tmp & GRBM_RQ_PENDING) 3847 reset_mask |= RADEON_RESET_GRBM; 3848 3849 if (tmp & VMC_BUSY) 3850 reset_mask |= RADEON_RESET_VMC; 3851 3852 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 3853 MCC_BUSY | MCD_BUSY)) 3854 reset_mask |= RADEON_RESET_MC; 3855 3856 if (evergreen_is_display_hung(rdev)) 3857 reset_mask |= RADEON_RESET_DISPLAY; 3858 3859 /* Skip MC reset as it's mostly likely not hung, just busy */ 3860 if (reset_mask & RADEON_RESET_MC) { 3861 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 3862 reset_mask &= ~RADEON_RESET_MC; 3863 } 3864 3865 return reset_mask; 3866 } 3867 3868 /** 3869 * cik_gpu_soft_reset - soft reset GPU 3870 * 3871 * @rdev: radeon_device pointer 3872 * @reset_mask: mask of which blocks to reset 3873 * 3874 * Soft reset the blocks specified in @reset_mask. 3875 */ 3876 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 3877 { 3878 struct evergreen_mc_save save; 3879 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 3880 u32 tmp; 3881 3882 if (reset_mask == 0) 3883 return; 3884 3885 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 3886 3887 cik_print_gpu_status_regs(rdev); 3888 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 3889 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 3890 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 3891 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 3892 3893 /* stop the rlc */ 3894 cik_rlc_stop(rdev); 3895 3896 /* Disable GFX parsing/prefetching */ 3897 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 3898 3899 /* Disable MEC parsing/prefetching */ 3900 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 3901 3902 if (reset_mask & RADEON_RESET_DMA) { 3903 /* sdma0 */ 3904 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 3905 tmp |= SDMA_HALT; 3906 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 3907 } 3908 if (reset_mask & RADEON_RESET_DMA1) { 3909 /* sdma1 */ 3910 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 3911 tmp |= SDMA_HALT; 3912 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 3913 } 3914 3915 evergreen_mc_stop(rdev, &save); 3916 if (evergreen_mc_wait_for_idle(rdev)) { 3917 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 3918 } 3919 3920 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 3921 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 3922 3923 if (reset_mask & RADEON_RESET_CP) { 3924 grbm_soft_reset |= SOFT_RESET_CP; 3925 3926 srbm_soft_reset |= SOFT_RESET_GRBM; 3927 } 3928 3929 if (reset_mask & RADEON_RESET_DMA) 3930 srbm_soft_reset |= SOFT_RESET_SDMA; 3931 3932 if (reset_mask & RADEON_RESET_DMA1) 3933 srbm_soft_reset |= SOFT_RESET_SDMA1; 3934 3935 if (reset_mask & RADEON_RESET_DISPLAY) 3936 srbm_soft_reset |= SOFT_RESET_DC; 3937 3938 if (reset_mask & RADEON_RESET_RLC) 3939 grbm_soft_reset |= SOFT_RESET_RLC; 3940 3941 if (reset_mask & RADEON_RESET_SEM) 3942 srbm_soft_reset |= SOFT_RESET_SEM; 3943 3944 if (reset_mask & RADEON_RESET_IH) 3945 srbm_soft_reset |= SOFT_RESET_IH; 3946 3947 if (reset_mask & RADEON_RESET_GRBM) 3948 srbm_soft_reset |= SOFT_RESET_GRBM; 3949 3950 if (reset_mask & RADEON_RESET_VMC) 3951 srbm_soft_reset |= SOFT_RESET_VMC; 3952 3953 if (!(rdev->flags & RADEON_IS_IGP)) { 3954 if (reset_mask & RADEON_RESET_MC) 3955 srbm_soft_reset |= SOFT_RESET_MC; 3956 } 3957 3958 if (grbm_soft_reset) { 3959 tmp = RREG32(GRBM_SOFT_RESET); 3960 tmp |= grbm_soft_reset; 3961 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3962 WREG32(GRBM_SOFT_RESET, tmp); 3963 tmp = RREG32(GRBM_SOFT_RESET); 3964 3965 udelay(50); 3966 3967 tmp &= ~grbm_soft_reset; 3968 WREG32(GRBM_SOFT_RESET, tmp); 3969 tmp = RREG32(GRBM_SOFT_RESET); 3970 } 3971 3972 if (srbm_soft_reset) { 3973 tmp = RREG32(SRBM_SOFT_RESET); 3974 tmp |= srbm_soft_reset; 3975 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 3976 WREG32(SRBM_SOFT_RESET, tmp); 3977 tmp = RREG32(SRBM_SOFT_RESET); 3978 3979 udelay(50); 3980 3981 tmp &= ~srbm_soft_reset; 3982 WREG32(SRBM_SOFT_RESET, tmp); 3983 tmp = RREG32(SRBM_SOFT_RESET); 3984 } 3985 3986 /* Wait a little for things to settle down */ 3987 udelay(50); 3988 3989 evergreen_mc_resume(rdev, &save); 3990 udelay(50); 3991 3992 cik_print_gpu_status_regs(rdev); 3993 } 3994 3995 /** 3996 * cik_asic_reset - soft reset GPU 3997 * 3998 * @rdev: radeon_device pointer 3999 * 4000 * Look up which blocks are hung and attempt 4001 * to reset them. 4002 * Returns 0 for success. 4003 */ 4004 int cik_asic_reset(struct radeon_device *rdev) 4005 { 4006 u32 reset_mask; 4007 4008 reset_mask = cik_gpu_check_soft_reset(rdev); 4009 4010 if (reset_mask) 4011 r600_set_bios_scratch_engine_hung(rdev, true); 4012 4013 cik_gpu_soft_reset(rdev, reset_mask); 4014 4015 reset_mask = cik_gpu_check_soft_reset(rdev); 4016 4017 if (!reset_mask) 4018 r600_set_bios_scratch_engine_hung(rdev, false); 4019 4020 return 0; 4021 } 4022 4023 /** 4024 * cik_gfx_is_lockup - check if the 3D engine is locked up 4025 * 4026 * @rdev: radeon_device pointer 4027 * @ring: radeon_ring structure holding ring information 4028 * 4029 * Check if the 3D engine is locked up (CIK). 4030 * Returns true if the engine is locked, false if not. 4031 */ 4032 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4033 { 4034 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4035 4036 if (!(reset_mask & (RADEON_RESET_GFX | 4037 RADEON_RESET_COMPUTE | 4038 RADEON_RESET_CP))) { 4039 radeon_ring_lockup_update(ring); 4040 return false; 4041 } 4042 /* force CP activities */ 4043 radeon_ring_force_activity(rdev, ring); 4044 return radeon_ring_test_lockup(rdev, ring); 4045 } 4046 4047 /** 4048 * cik_sdma_is_lockup - Check if the DMA engine is locked up 4049 * 4050 * @rdev: radeon_device pointer 4051 * @ring: radeon_ring structure holding ring information 4052 * 4053 * Check if the async DMA engine is locked up (CIK). 4054 * Returns true if the engine appears to be locked up, false if not. 4055 */ 4056 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4057 { 4058 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4059 u32 mask; 4060 4061 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 4062 mask = RADEON_RESET_DMA; 4063 else 4064 mask = RADEON_RESET_DMA1; 4065 4066 if (!(reset_mask & mask)) { 4067 radeon_ring_lockup_update(ring); 4068 return false; 4069 } 4070 /* force ring activities */ 4071 radeon_ring_force_activity(rdev, ring); 4072 return radeon_ring_test_lockup(rdev, ring); 4073 } 4074 4075 /* MC */ 4076 /** 4077 * cik_mc_program - program the GPU memory controller 4078 * 4079 * @rdev: radeon_device pointer 4080 * 4081 * Set the location of vram, gart, and AGP in the GPU's 4082 * physical address space (CIK). 4083 */ 4084 static void cik_mc_program(struct radeon_device *rdev) 4085 { 4086 struct evergreen_mc_save save; 4087 u32 tmp; 4088 int i, j; 4089 4090 /* Initialize HDP */ 4091 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 4092 WREG32((0x2c14 + j), 0x00000000); 4093 WREG32((0x2c18 + j), 0x00000000); 4094 WREG32((0x2c1c + j), 0x00000000); 4095 WREG32((0x2c20 + j), 0x00000000); 4096 WREG32((0x2c24 + j), 0x00000000); 4097 } 4098 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 4099 4100 evergreen_mc_stop(rdev, &save); 4101 if (radeon_mc_wait_for_idle(rdev)) { 4102 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4103 } 4104 /* Lockout access through VGA aperture*/ 4105 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 4106 /* Update configuration */ 4107 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 4108 rdev->mc.vram_start >> 12); 4109 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 4110 rdev->mc.vram_end >> 12); 4111 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 4112 rdev->vram_scratch.gpu_addr >> 12); 4113 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 4114 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 4115 WREG32(MC_VM_FB_LOCATION, tmp); 4116 /* XXX double check these! */ 4117 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 4118 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 4119 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 4120 WREG32(MC_VM_AGP_BASE, 0); 4121 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 4122 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 4123 if (radeon_mc_wait_for_idle(rdev)) { 4124 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4125 } 4126 evergreen_mc_resume(rdev, &save); 4127 /* we need to own VRAM, so turn off the VGA renderer here 4128 * to stop it overwriting our objects */ 4129 rv515_vga_render_disable(rdev); 4130 } 4131 4132 /** 4133 * cik_mc_init - initialize the memory controller driver params 4134 * 4135 * @rdev: radeon_device pointer 4136 * 4137 * Look up the amount of vram, vram width, and decide how to place 4138 * vram and gart within the GPU's physical address space (CIK). 4139 * Returns 0 for success. 4140 */ 4141 static int cik_mc_init(struct radeon_device *rdev) 4142 { 4143 u32 tmp; 4144 int chansize, numchan; 4145 4146 /* Get VRAM informations */ 4147 rdev->mc.vram_is_ddr = true; 4148 tmp = RREG32(MC_ARB_RAMCFG); 4149 if (tmp & CHANSIZE_MASK) { 4150 chansize = 64; 4151 } else { 4152 chansize = 32; 4153 } 4154 tmp = RREG32(MC_SHARED_CHMAP); 4155 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 4156 case 0: 4157 default: 4158 numchan = 1; 4159 break; 4160 case 1: 4161 numchan = 2; 4162 break; 4163 case 2: 4164 numchan = 4; 4165 break; 4166 case 3: 4167 numchan = 8; 4168 break; 4169 case 4: 4170 numchan = 3; 4171 break; 4172 case 5: 4173 numchan = 6; 4174 break; 4175 case 6: 4176 numchan = 10; 4177 break; 4178 case 7: 4179 numchan = 12; 4180 break; 4181 case 8: 4182 numchan = 16; 4183 break; 4184 } 4185 rdev->mc.vram_width = numchan * chansize; 4186 /* Could aper size report 0 ? */ 4187 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 4188 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 4189 /* size in MB on si */ 4190 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 4191 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 4192 rdev->mc.visible_vram_size = rdev->mc.aper_size; 4193 si_vram_gtt_location(rdev, &rdev->mc); 4194 radeon_update_bandwidth_info(rdev); 4195 4196 return 0; 4197 } 4198 4199 /* 4200 * GART 4201 * VMID 0 is the physical GPU addresses as used by the kernel. 4202 * VMIDs 1-15 are used for userspace clients and are handled 4203 * by the radeon vm/hsa code. 4204 */ 4205 /** 4206 * cik_pcie_gart_tlb_flush - gart tlb flush callback 4207 * 4208 * @rdev: radeon_device pointer 4209 * 4210 * Flush the TLB for the VMID 0 page table (CIK). 4211 */ 4212 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 4213 { 4214 /* flush hdp cache */ 4215 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 4216 4217 /* bits 0-15 are the VM contexts0-15 */ 4218 WREG32(VM_INVALIDATE_REQUEST, 0x1); 4219 } 4220 4221 /** 4222 * cik_pcie_gart_enable - gart enable 4223 * 4224 * @rdev: radeon_device pointer 4225 * 4226 * This sets up the TLBs, programs the page tables for VMID0, 4227 * sets up the hw for VMIDs 1-15 which are allocated on 4228 * demand, and sets up the global locations for the LDS, GDS, 4229 * and GPUVM for FSA64 clients (CIK). 4230 * Returns 0 for success, errors for failure. 4231 */ 4232 static int cik_pcie_gart_enable(struct radeon_device *rdev) 4233 { 4234 int r, i; 4235 4236 if (rdev->gart.robj == NULL) { 4237 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 4238 return -EINVAL; 4239 } 4240 r = radeon_gart_table_vram_pin(rdev); 4241 if (r) 4242 return r; 4243 radeon_gart_restore(rdev); 4244 /* Setup TLB control */ 4245 WREG32(MC_VM_MX_L1_TLB_CNTL, 4246 (0xA << 7) | 4247 ENABLE_L1_TLB | 4248 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4249 ENABLE_ADVANCED_DRIVER_MODEL | 4250 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4251 /* Setup L2 cache */ 4252 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 4253 ENABLE_L2_FRAGMENT_PROCESSING | 4254 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4255 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4256 EFFECTIVE_L2_QUEUE_SIZE(7) | 4257 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4258 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 4259 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4260 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4261 /* setup context0 */ 4262 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 4263 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 4264 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 4265 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 4266 (u32)(rdev->dummy_page.addr >> 12)); 4267 WREG32(VM_CONTEXT0_CNTL2, 0); 4268 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 4269 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 4270 4271 WREG32(0x15D4, 0); 4272 WREG32(0x15D8, 0); 4273 WREG32(0x15DC, 0); 4274 4275 /* empty context1-15 */ 4276 /* FIXME start with 4G, once using 2 level pt switch to full 4277 * vm size space 4278 */ 4279 /* set vm size, must be a multiple of 4 */ 4280 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 4281 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); 4282 for (i = 1; i < 16; i++) { 4283 if (i < 8) 4284 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 4285 rdev->gart.table_addr >> 12); 4286 else 4287 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 4288 rdev->gart.table_addr >> 12); 4289 } 4290 4291 /* enable context1-15 */ 4292 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 4293 (u32)(rdev->dummy_page.addr >> 12)); 4294 WREG32(VM_CONTEXT1_CNTL2, 4); 4295 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 4296 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4297 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4298 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4299 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4300 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 4301 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 4302 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 4303 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 4304 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 4305 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 4306 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4307 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 4308 4309 /* TC cache setup ??? */ 4310 WREG32(TC_CFG_L1_LOAD_POLICY0, 0); 4311 WREG32(TC_CFG_L1_LOAD_POLICY1, 0); 4312 WREG32(TC_CFG_L1_STORE_POLICY, 0); 4313 4314 WREG32(TC_CFG_L2_LOAD_POLICY0, 0); 4315 WREG32(TC_CFG_L2_LOAD_POLICY1, 0); 4316 WREG32(TC_CFG_L2_STORE_POLICY0, 0); 4317 WREG32(TC_CFG_L2_STORE_POLICY1, 0); 4318 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); 4319 4320 WREG32(TC_CFG_L1_VOLATILE, 0); 4321 WREG32(TC_CFG_L2_VOLATILE, 0); 4322 4323 if (rdev->family == CHIP_KAVERI) { 4324 u32 tmp = RREG32(CHUB_CONTROL); 4325 tmp &= ~BYPASS_VM; 4326 WREG32(CHUB_CONTROL, tmp); 4327 } 4328 4329 /* XXX SH_MEM regs */ 4330 /* where to put LDS, scratch, GPUVM in FSA64 space */ 4331 mutex_lock(&rdev->srbm_mutex); 4332 for (i = 0; i < 16; i++) { 4333 cik_srbm_select(rdev, 0, 0, 0, i); 4334 /* CP and shaders */ 4335 WREG32(SH_MEM_CONFIG, 0); 4336 WREG32(SH_MEM_APE1_BASE, 1); 4337 WREG32(SH_MEM_APE1_LIMIT, 0); 4338 WREG32(SH_MEM_BASES, 0); 4339 /* SDMA GFX */ 4340 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 4341 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 4342 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 4343 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 4344 /* XXX SDMA RLC - todo */ 4345 } 4346 cik_srbm_select(rdev, 0, 0, 0, 0); 4347 mutex_unlock(&rdev->srbm_mutex); 4348 4349 cik_pcie_gart_tlb_flush(rdev); 4350 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 4351 (unsigned)(rdev->mc.gtt_size >> 20), 4352 (unsigned long long)rdev->gart.table_addr); 4353 rdev->gart.ready = true; 4354 return 0; 4355 } 4356 4357 /** 4358 * cik_pcie_gart_disable - gart disable 4359 * 4360 * @rdev: radeon_device pointer 4361 * 4362 * This disables all VM page table (CIK). 4363 */ 4364 static void cik_pcie_gart_disable(struct radeon_device *rdev) 4365 { 4366 /* Disable all tables */ 4367 WREG32(VM_CONTEXT0_CNTL, 0); 4368 WREG32(VM_CONTEXT1_CNTL, 0); 4369 /* Setup TLB control */ 4370 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4371 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4372 /* Setup L2 cache */ 4373 WREG32(VM_L2_CNTL, 4374 ENABLE_L2_FRAGMENT_PROCESSING | 4375 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4376 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4377 EFFECTIVE_L2_QUEUE_SIZE(7) | 4378 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4379 WREG32(VM_L2_CNTL2, 0); 4380 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4381 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4382 radeon_gart_table_vram_unpin(rdev); 4383 } 4384 4385 /** 4386 * cik_pcie_gart_fini - vm fini callback 4387 * 4388 * @rdev: radeon_device pointer 4389 * 4390 * Tears down the driver GART/VM setup (CIK). 4391 */ 4392 static void cik_pcie_gart_fini(struct radeon_device *rdev) 4393 { 4394 cik_pcie_gart_disable(rdev); 4395 radeon_gart_table_vram_free(rdev); 4396 radeon_gart_fini(rdev); 4397 } 4398 4399 /* vm parser */ 4400 /** 4401 * cik_ib_parse - vm ib_parse callback 4402 * 4403 * @rdev: radeon_device pointer 4404 * @ib: indirect buffer pointer 4405 * 4406 * CIK uses hw IB checking so this is a nop (CIK). 4407 */ 4408 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 4409 { 4410 return 0; 4411 } 4412 4413 /* 4414 * vm 4415 * VMID 0 is the physical GPU addresses as used by the kernel. 4416 * VMIDs 1-15 are used for userspace clients and are handled 4417 * by the radeon vm/hsa code. 4418 */ 4419 /** 4420 * cik_vm_init - cik vm init callback 4421 * 4422 * @rdev: radeon_device pointer 4423 * 4424 * Inits cik specific vm parameters (number of VMs, base of vram for 4425 * VMIDs 1-15) (CIK). 4426 * Returns 0 for success. 4427 */ 4428 int cik_vm_init(struct radeon_device *rdev) 4429 { 4430 /* number of VMs */ 4431 rdev->vm_manager.nvm = 16; 4432 /* base offset of vram pages */ 4433 if (rdev->flags & RADEON_IS_IGP) { 4434 u64 tmp = RREG32(MC_VM_FB_OFFSET); 4435 tmp <<= 22; 4436 rdev->vm_manager.vram_base_offset = tmp; 4437 } else 4438 rdev->vm_manager.vram_base_offset = 0; 4439 4440 return 0; 4441 } 4442 4443 /** 4444 * cik_vm_fini - cik vm fini callback 4445 * 4446 * @rdev: radeon_device pointer 4447 * 4448 * Tear down any asic specific VM setup (CIK). 4449 */ 4450 void cik_vm_fini(struct radeon_device *rdev) 4451 { 4452 } 4453 4454 /** 4455 * cik_vm_decode_fault - print human readable fault info 4456 * 4457 * @rdev: radeon_device pointer 4458 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 4459 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 4460 * 4461 * Print human readable fault information (CIK). 4462 */ 4463 static void cik_vm_decode_fault(struct radeon_device *rdev, 4464 u32 status, u32 addr, u32 mc_client) 4465 { 4466 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 4467 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 4468 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 4469 char *block = (char *)&mc_client; 4470 4471 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n", 4472 protections, vmid, addr, 4473 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 4474 block, mc_id); 4475 } 4476 4477 /** 4478 * cik_vm_flush - cik vm flush using the CP 4479 * 4480 * @rdev: radeon_device pointer 4481 * 4482 * Update the page table base and flush the VM TLB 4483 * using the CP (CIK). 4484 */ 4485 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4486 { 4487 struct radeon_ring *ring = &rdev->ring[ridx]; 4488 4489 if (vm == NULL) 4490 return; 4491 4492 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4493 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4494 WRITE_DATA_DST_SEL(0))); 4495 if (vm->id < 8) { 4496 radeon_ring_write(ring, 4497 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4498 } else { 4499 radeon_ring_write(ring, 4500 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4501 } 4502 radeon_ring_write(ring, 0); 4503 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4504 4505 /* update SH_MEM_* regs */ 4506 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4507 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4508 WRITE_DATA_DST_SEL(0))); 4509 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4510 radeon_ring_write(ring, 0); 4511 radeon_ring_write(ring, VMID(vm->id)); 4512 4513 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 4514 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4515 WRITE_DATA_DST_SEL(0))); 4516 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4517 radeon_ring_write(ring, 0); 4518 4519 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 4520 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 4521 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 4522 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 4523 4524 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4525 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4526 WRITE_DATA_DST_SEL(0))); 4527 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4528 radeon_ring_write(ring, 0); 4529 radeon_ring_write(ring, VMID(0)); 4530 4531 /* HDP flush */ 4532 /* We should be using the WAIT_REG_MEM packet here like in 4533 * cik_fence_ring_emit(), but it causes the CP to hang in this 4534 * context... 4535 */ 4536 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4537 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4538 WRITE_DATA_DST_SEL(0))); 4539 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 4540 radeon_ring_write(ring, 0); 4541 radeon_ring_write(ring, 0); 4542 4543 /* bits 0-15 are the VM contexts0-15 */ 4544 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4545 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4546 WRITE_DATA_DST_SEL(0))); 4547 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4548 radeon_ring_write(ring, 0); 4549 radeon_ring_write(ring, 1 << vm->id); 4550 4551 /* compute doesn't have PFP */ 4552 if (ridx == RADEON_RING_TYPE_GFX_INDEX) { 4553 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4554 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4555 radeon_ring_write(ring, 0x0); 4556 } 4557 } 4558 4559 /** 4560 * cik_vm_set_page - update the page tables using sDMA 4561 * 4562 * @rdev: radeon_device pointer 4563 * @ib: indirect buffer to fill with commands 4564 * @pe: addr of the page entry 4565 * @addr: dst addr to write into pe 4566 * @count: number of page entries to update 4567 * @incr: increase next addr by incr bytes 4568 * @flags: access flags 4569 * 4570 * Update the page tables using CP or sDMA (CIK). 4571 */ 4572 void cik_vm_set_page(struct radeon_device *rdev, 4573 struct radeon_ib *ib, 4574 uint64_t pe, 4575 uint64_t addr, unsigned count, 4576 uint32_t incr, uint32_t flags) 4577 { 4578 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 4579 uint64_t value; 4580 unsigned ndw; 4581 4582 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { 4583 /* CP */ 4584 while (count) { 4585 ndw = 2 + count * 2; 4586 if (ndw > 0x3FFE) 4587 ndw = 0x3FFE; 4588 4589 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); 4590 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | 4591 WRITE_DATA_DST_SEL(1)); 4592 ib->ptr[ib->length_dw++] = pe; 4593 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4594 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 4595 if (flags & RADEON_VM_PAGE_SYSTEM) { 4596 value = radeon_vm_map_gart(rdev, addr); 4597 value &= 0xFFFFFFFFFFFFF000ULL; 4598 } else if (flags & RADEON_VM_PAGE_VALID) { 4599 value = addr; 4600 } else { 4601 value = 0; 4602 } 4603 addr += incr; 4604 value |= r600_flags; 4605 ib->ptr[ib->length_dw++] = value; 4606 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4607 } 4608 } 4609 } else { 4610 /* DMA */ 4611 if (flags & RADEON_VM_PAGE_SYSTEM) { 4612 while (count) { 4613 ndw = count * 2; 4614 if (ndw > 0xFFFFE) 4615 ndw = 0xFFFFE; 4616 4617 /* for non-physically contiguous pages (system) */ 4618 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 4619 ib->ptr[ib->length_dw++] = pe; 4620 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4621 ib->ptr[ib->length_dw++] = ndw; 4622 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 4623 if (flags & RADEON_VM_PAGE_SYSTEM) { 4624 value = radeon_vm_map_gart(rdev, addr); 4625 value &= 0xFFFFFFFFFFFFF000ULL; 4626 } else if (flags & RADEON_VM_PAGE_VALID) { 4627 value = addr; 4628 } else { 4629 value = 0; 4630 } 4631 addr += incr; 4632 value |= r600_flags; 4633 ib->ptr[ib->length_dw++] = value; 4634 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4635 } 4636 } 4637 } else { 4638 while (count) { 4639 ndw = count; 4640 if (ndw > 0x7FFFF) 4641 ndw = 0x7FFFF; 4642 4643 if (flags & RADEON_VM_PAGE_VALID) 4644 value = addr; 4645 else 4646 value = 0; 4647 /* for physically contiguous pages (vram) */ 4648 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); 4649 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 4650 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4651 ib->ptr[ib->length_dw++] = r600_flags; /* mask */ 4652 ib->ptr[ib->length_dw++] = 0; 4653 ib->ptr[ib->length_dw++] = value; /* value */ 4654 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4655 ib->ptr[ib->length_dw++] = incr; /* increment size */ 4656 ib->ptr[ib->length_dw++] = 0; 4657 ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 4658 pe += ndw * 8; 4659 addr += ndw * incr; 4660 count -= ndw; 4661 } 4662 } 4663 while (ib->length_dw & 0x7) 4664 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); 4665 } 4666 } 4667 4668 /** 4669 * cik_dma_vm_flush - cik vm flush using sDMA 4670 * 4671 * @rdev: radeon_device pointer 4672 * 4673 * Update the page table base and flush the VM TLB 4674 * using sDMA (CIK). 4675 */ 4676 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4677 { 4678 struct radeon_ring *ring = &rdev->ring[ridx]; 4679 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | 4680 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 4681 u32 ref_and_mask; 4682 4683 if (vm == NULL) 4684 return; 4685 4686 if (ridx == R600_RING_TYPE_DMA_INDEX) 4687 ref_and_mask = SDMA0; 4688 else 4689 ref_and_mask = SDMA1; 4690 4691 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4692 if (vm->id < 8) { 4693 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4694 } else { 4695 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4696 } 4697 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4698 4699 /* update SH_MEM_* regs */ 4700 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4701 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4702 radeon_ring_write(ring, VMID(vm->id)); 4703 4704 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4705 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4706 radeon_ring_write(ring, 0); 4707 4708 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4709 radeon_ring_write(ring, SH_MEM_CONFIG >> 2); 4710 radeon_ring_write(ring, 0); 4711 4712 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4713 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2); 4714 radeon_ring_write(ring, 1); 4715 4716 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4717 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2); 4718 radeon_ring_write(ring, 0); 4719 4720 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4721 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4722 radeon_ring_write(ring, VMID(0)); 4723 4724 /* flush HDP */ 4725 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 4726 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); 4727 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); 4728 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ 4729 radeon_ring_write(ring, ref_and_mask); /* MASK */ 4730 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ 4731 4732 /* flush TLB */ 4733 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4734 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4735 radeon_ring_write(ring, 1 << vm->id); 4736 } 4737 4738 /* 4739 * RLC 4740 * The RLC is a multi-purpose microengine that handles a 4741 * variety of functions, the most important of which is 4742 * the interrupt controller. 4743 */ 4744 /** 4745 * cik_rlc_stop - stop the RLC ME 4746 * 4747 * @rdev: radeon_device pointer 4748 * 4749 * Halt the RLC ME (MicroEngine) (CIK). 4750 */ 4751 static void cik_rlc_stop(struct radeon_device *rdev) 4752 { 4753 int i, j, k; 4754 u32 mask, tmp; 4755 4756 tmp = RREG32(CP_INT_CNTL_RING0); 4757 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4758 WREG32(CP_INT_CNTL_RING0, tmp); 4759 4760 RREG32(CB_CGTT_SCLK_CTRL); 4761 RREG32(CB_CGTT_SCLK_CTRL); 4762 RREG32(CB_CGTT_SCLK_CTRL); 4763 RREG32(CB_CGTT_SCLK_CTRL); 4764 4765 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 4766 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 4767 4768 WREG32(RLC_CNTL, 0); 4769 4770 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 4771 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 4772 cik_select_se_sh(rdev, i, j); 4773 for (k = 0; k < rdev->usec_timeout; k++) { 4774 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 4775 break; 4776 udelay(1); 4777 } 4778 } 4779 } 4780 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4781 4782 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 4783 for (k = 0; k < rdev->usec_timeout; k++) { 4784 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 4785 break; 4786 udelay(1); 4787 } 4788 } 4789 4790 /** 4791 * cik_rlc_start - start the RLC ME 4792 * 4793 * @rdev: radeon_device pointer 4794 * 4795 * Unhalt the RLC ME (MicroEngine) (CIK). 4796 */ 4797 static void cik_rlc_start(struct radeon_device *rdev) 4798 { 4799 u32 tmp; 4800 4801 WREG32(RLC_CNTL, RLC_ENABLE); 4802 4803 tmp = RREG32(CP_INT_CNTL_RING0); 4804 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4805 WREG32(CP_INT_CNTL_RING0, tmp); 4806 4807 udelay(50); 4808 } 4809 4810 /** 4811 * cik_rlc_resume - setup the RLC hw 4812 * 4813 * @rdev: radeon_device pointer 4814 * 4815 * Initialize the RLC registers, load the ucode, 4816 * and start the RLC (CIK). 4817 * Returns 0 for success, -EINVAL if the ucode is not available. 4818 */ 4819 static int cik_rlc_resume(struct radeon_device *rdev) 4820 { 4821 u32 i, size; 4822 u32 clear_state_info[3]; 4823 const __be32 *fw_data; 4824 4825 if (!rdev->rlc_fw) 4826 return -EINVAL; 4827 4828 switch (rdev->family) { 4829 case CHIP_BONAIRE: 4830 default: 4831 size = BONAIRE_RLC_UCODE_SIZE; 4832 break; 4833 case CHIP_KAVERI: 4834 size = KV_RLC_UCODE_SIZE; 4835 break; 4836 case CHIP_KABINI: 4837 size = KB_RLC_UCODE_SIZE; 4838 break; 4839 } 4840 4841 cik_rlc_stop(rdev); 4842 4843 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC); 4844 RREG32(GRBM_SOFT_RESET); 4845 udelay(50); 4846 WREG32(GRBM_SOFT_RESET, 0); 4847 RREG32(GRBM_SOFT_RESET); 4848 udelay(50); 4849 4850 WREG32(RLC_LB_CNTR_INIT, 0); 4851 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 4852 4853 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4854 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 4855 WREG32(RLC_LB_PARAMS, 0x00600408); 4856 WREG32(RLC_LB_CNTL, 0x80000004); 4857 4858 WREG32(RLC_MC_CNTL, 0); 4859 WREG32(RLC_UCODE_CNTL, 0); 4860 4861 fw_data = (const __be32 *)rdev->rlc_fw->data; 4862 WREG32(RLC_GPM_UCODE_ADDR, 0); 4863 for (i = 0; i < size; i++) 4864 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 4865 WREG32(RLC_GPM_UCODE_ADDR, 0); 4866 4867 /* XXX */ 4868 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr); 4869 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr; 4870 clear_state_info[2] = 0;//cik_default_size; 4871 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d); 4872 for (i = 0; i < 3; i++) 4873 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]); 4874 WREG32(RLC_DRIVER_DMA_STATUS, 0); 4875 4876 cik_rlc_start(rdev); 4877 4878 return 0; 4879 } 4880 4881 /* 4882 * Interrupts 4883 * Starting with r6xx, interrupts are handled via a ring buffer. 4884 * Ring buffers are areas of GPU accessible memory that the GPU 4885 * writes interrupt vectors into and the host reads vectors out of. 4886 * There is a rptr (read pointer) that determines where the 4887 * host is currently reading, and a wptr (write pointer) 4888 * which determines where the GPU has written. When the 4889 * pointers are equal, the ring is idle. When the GPU 4890 * writes vectors to the ring buffer, it increments the 4891 * wptr. When there is an interrupt, the host then starts 4892 * fetching commands and processing them until the pointers are 4893 * equal again at which point it updates the rptr. 4894 */ 4895 4896 /** 4897 * cik_enable_interrupts - Enable the interrupt ring buffer 4898 * 4899 * @rdev: radeon_device pointer 4900 * 4901 * Enable the interrupt ring buffer (CIK). 4902 */ 4903 static void cik_enable_interrupts(struct radeon_device *rdev) 4904 { 4905 u32 ih_cntl = RREG32(IH_CNTL); 4906 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 4907 4908 ih_cntl |= ENABLE_INTR; 4909 ih_rb_cntl |= IH_RB_ENABLE; 4910 WREG32(IH_CNTL, ih_cntl); 4911 WREG32(IH_RB_CNTL, ih_rb_cntl); 4912 rdev->ih.enabled = true; 4913 } 4914 4915 /** 4916 * cik_disable_interrupts - Disable the interrupt ring buffer 4917 * 4918 * @rdev: radeon_device pointer 4919 * 4920 * Disable the interrupt ring buffer (CIK). 4921 */ 4922 static void cik_disable_interrupts(struct radeon_device *rdev) 4923 { 4924 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 4925 u32 ih_cntl = RREG32(IH_CNTL); 4926 4927 ih_rb_cntl &= ~IH_RB_ENABLE; 4928 ih_cntl &= ~ENABLE_INTR; 4929 WREG32(IH_RB_CNTL, ih_rb_cntl); 4930 WREG32(IH_CNTL, ih_cntl); 4931 /* set rptr, wptr to 0 */ 4932 WREG32(IH_RB_RPTR, 0); 4933 WREG32(IH_RB_WPTR, 0); 4934 rdev->ih.enabled = false; 4935 rdev->ih.rptr = 0; 4936 } 4937 4938 /** 4939 * cik_disable_interrupt_state - Disable all interrupt sources 4940 * 4941 * @rdev: radeon_device pointer 4942 * 4943 * Clear all interrupt enable bits used by the driver (CIK). 4944 */ 4945 static void cik_disable_interrupt_state(struct radeon_device *rdev) 4946 { 4947 u32 tmp; 4948 4949 /* gfx ring */ 4950 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4951 /* sdma */ 4952 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 4953 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 4954 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 4955 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 4956 /* compute queues */ 4957 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 4958 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 4959 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 4960 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 4961 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 4962 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 4963 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 4964 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 4965 /* grbm */ 4966 WREG32(GRBM_INT_CNTL, 0); 4967 /* vline/vblank, etc. */ 4968 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 4969 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 4970 if (rdev->num_crtc >= 4) { 4971 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 4972 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 4973 } 4974 if (rdev->num_crtc >= 6) { 4975 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 4976 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 4977 } 4978 4979 /* dac hotplug */ 4980 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 4981 4982 /* digital hotplug */ 4983 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4984 WREG32(DC_HPD1_INT_CONTROL, tmp); 4985 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4986 WREG32(DC_HPD2_INT_CONTROL, tmp); 4987 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4988 WREG32(DC_HPD3_INT_CONTROL, tmp); 4989 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4990 WREG32(DC_HPD4_INT_CONTROL, tmp); 4991 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4992 WREG32(DC_HPD5_INT_CONTROL, tmp); 4993 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4994 WREG32(DC_HPD6_INT_CONTROL, tmp); 4995 4996 } 4997 4998 /** 4999 * cik_irq_init - init and enable the interrupt ring 5000 * 5001 * @rdev: radeon_device pointer 5002 * 5003 * Allocate a ring buffer for the interrupt controller, 5004 * enable the RLC, disable interrupts, enable the IH 5005 * ring buffer and enable it (CIK). 5006 * Called at device load and reume. 5007 * Returns 0 for success, errors for failure. 5008 */ 5009 static int cik_irq_init(struct radeon_device *rdev) 5010 { 5011 int ret = 0; 5012 int rb_bufsz; 5013 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 5014 5015 /* allocate ring */ 5016 ret = r600_ih_ring_alloc(rdev); 5017 if (ret) 5018 return ret; 5019 5020 /* disable irqs */ 5021 cik_disable_interrupts(rdev); 5022 5023 /* init rlc */ 5024 ret = cik_rlc_resume(rdev); 5025 if (ret) { 5026 r600_ih_ring_fini(rdev); 5027 return ret; 5028 } 5029 5030 /* setup interrupt control */ 5031 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 5032 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 5033 interrupt_cntl = RREG32(INTERRUPT_CNTL); 5034 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 5035 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 5036 */ 5037 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 5038 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 5039 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 5040 WREG32(INTERRUPT_CNTL, interrupt_cntl); 5041 5042 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 5043 rb_bufsz = drm_order(rdev->ih.ring_size / 4); 5044 5045 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 5046 IH_WPTR_OVERFLOW_CLEAR | 5047 (rb_bufsz << 1)); 5048 5049 if (rdev->wb.enabled) 5050 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 5051 5052 /* set the writeback address whether it's enabled or not */ 5053 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 5054 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 5055 5056 WREG32(IH_RB_CNTL, ih_rb_cntl); 5057 5058 /* set rptr, wptr to 0 */ 5059 WREG32(IH_RB_RPTR, 0); 5060 WREG32(IH_RB_WPTR, 0); 5061 5062 /* Default settings for IH_CNTL (disabled at first) */ 5063 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 5064 /* RPTR_REARM only works if msi's are enabled */ 5065 if (rdev->msi_enabled) 5066 ih_cntl |= RPTR_REARM; 5067 WREG32(IH_CNTL, ih_cntl); 5068 5069 /* force the active interrupt state to all disabled */ 5070 cik_disable_interrupt_state(rdev); 5071 5072 pci_set_master(rdev->pdev); 5073 5074 /* enable irqs */ 5075 cik_enable_interrupts(rdev); 5076 5077 return ret; 5078 } 5079 5080 /** 5081 * cik_irq_set - enable/disable interrupt sources 5082 * 5083 * @rdev: radeon_device pointer 5084 * 5085 * Enable interrupt sources on the GPU (vblanks, hpd, 5086 * etc.) (CIK). 5087 * Returns 0 for success, errors for failure. 5088 */ 5089 int cik_irq_set(struct radeon_device *rdev) 5090 { 5091 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE | 5092 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 5093 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; 5094 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; 5095 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 5096 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 5097 u32 grbm_int_cntl = 0; 5098 u32 dma_cntl, dma_cntl1; 5099 5100 if (!rdev->irq.installed) { 5101 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 5102 return -EINVAL; 5103 } 5104 /* don't enable anything if the ih is disabled */ 5105 if (!rdev->ih.enabled) { 5106 cik_disable_interrupts(rdev); 5107 /* force the active interrupt state to all disabled */ 5108 cik_disable_interrupt_state(rdev); 5109 return 0; 5110 } 5111 5112 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; 5113 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; 5114 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; 5115 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; 5116 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 5117 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 5118 5119 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 5120 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 5121 5122 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5123 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5124 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5125 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5126 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5127 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5128 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5129 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5130 5131 /* enable CP interrupts on all rings */ 5132 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 5133 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 5134 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 5135 } 5136 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 5137 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 5138 DRM_DEBUG("si_irq_set: sw int cp1\n"); 5139 if (ring->me == 1) { 5140 switch (ring->pipe) { 5141 case 0: 5142 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 5143 break; 5144 case 1: 5145 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 5146 break; 5147 case 2: 5148 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5149 break; 5150 case 3: 5151 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5152 break; 5153 default: 5154 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 5155 break; 5156 } 5157 } else if (ring->me == 2) { 5158 switch (ring->pipe) { 5159 case 0: 5160 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 5161 break; 5162 case 1: 5163 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 5164 break; 5165 case 2: 5166 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5167 break; 5168 case 3: 5169 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5170 break; 5171 default: 5172 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 5173 break; 5174 } 5175 } else { 5176 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 5177 } 5178 } 5179 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 5180 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 5181 DRM_DEBUG("si_irq_set: sw int cp2\n"); 5182 if (ring->me == 1) { 5183 switch (ring->pipe) { 5184 case 0: 5185 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 5186 break; 5187 case 1: 5188 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 5189 break; 5190 case 2: 5191 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5192 break; 5193 case 3: 5194 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5195 break; 5196 default: 5197 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 5198 break; 5199 } 5200 } else if (ring->me == 2) { 5201 switch (ring->pipe) { 5202 case 0: 5203 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 5204 break; 5205 case 1: 5206 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 5207 break; 5208 case 2: 5209 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5210 break; 5211 case 3: 5212 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5213 break; 5214 default: 5215 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 5216 break; 5217 } 5218 } else { 5219 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 5220 } 5221 } 5222 5223 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 5224 DRM_DEBUG("cik_irq_set: sw int dma\n"); 5225 dma_cntl |= TRAP_ENABLE; 5226 } 5227 5228 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 5229 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 5230 dma_cntl1 |= TRAP_ENABLE; 5231 } 5232 5233 if (rdev->irq.crtc_vblank_int[0] || 5234 atomic_read(&rdev->irq.pflip[0])) { 5235 DRM_DEBUG("cik_irq_set: vblank 0\n"); 5236 crtc1 |= VBLANK_INTERRUPT_MASK; 5237 } 5238 if (rdev->irq.crtc_vblank_int[1] || 5239 atomic_read(&rdev->irq.pflip[1])) { 5240 DRM_DEBUG("cik_irq_set: vblank 1\n"); 5241 crtc2 |= VBLANK_INTERRUPT_MASK; 5242 } 5243 if (rdev->irq.crtc_vblank_int[2] || 5244 atomic_read(&rdev->irq.pflip[2])) { 5245 DRM_DEBUG("cik_irq_set: vblank 2\n"); 5246 crtc3 |= VBLANK_INTERRUPT_MASK; 5247 } 5248 if (rdev->irq.crtc_vblank_int[3] || 5249 atomic_read(&rdev->irq.pflip[3])) { 5250 DRM_DEBUG("cik_irq_set: vblank 3\n"); 5251 crtc4 |= VBLANK_INTERRUPT_MASK; 5252 } 5253 if (rdev->irq.crtc_vblank_int[4] || 5254 atomic_read(&rdev->irq.pflip[4])) { 5255 DRM_DEBUG("cik_irq_set: vblank 4\n"); 5256 crtc5 |= VBLANK_INTERRUPT_MASK; 5257 } 5258 if (rdev->irq.crtc_vblank_int[5] || 5259 atomic_read(&rdev->irq.pflip[5])) { 5260 DRM_DEBUG("cik_irq_set: vblank 5\n"); 5261 crtc6 |= VBLANK_INTERRUPT_MASK; 5262 } 5263 if (rdev->irq.hpd[0]) { 5264 DRM_DEBUG("cik_irq_set: hpd 1\n"); 5265 hpd1 |= DC_HPDx_INT_EN; 5266 } 5267 if (rdev->irq.hpd[1]) { 5268 DRM_DEBUG("cik_irq_set: hpd 2\n"); 5269 hpd2 |= DC_HPDx_INT_EN; 5270 } 5271 if (rdev->irq.hpd[2]) { 5272 DRM_DEBUG("cik_irq_set: hpd 3\n"); 5273 hpd3 |= DC_HPDx_INT_EN; 5274 } 5275 if (rdev->irq.hpd[3]) { 5276 DRM_DEBUG("cik_irq_set: hpd 4\n"); 5277 hpd4 |= DC_HPDx_INT_EN; 5278 } 5279 if (rdev->irq.hpd[4]) { 5280 DRM_DEBUG("cik_irq_set: hpd 5\n"); 5281 hpd5 |= DC_HPDx_INT_EN; 5282 } 5283 if (rdev->irq.hpd[5]) { 5284 DRM_DEBUG("cik_irq_set: hpd 6\n"); 5285 hpd6 |= DC_HPDx_INT_EN; 5286 } 5287 5288 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 5289 5290 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 5291 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 5292 5293 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 5294 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); 5295 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); 5296 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); 5297 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); 5298 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); 5299 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); 5300 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); 5301 5302 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 5303 5304 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 5305 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 5306 if (rdev->num_crtc >= 4) { 5307 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 5308 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 5309 } 5310 if (rdev->num_crtc >= 6) { 5311 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 5312 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 5313 } 5314 5315 WREG32(DC_HPD1_INT_CONTROL, hpd1); 5316 WREG32(DC_HPD2_INT_CONTROL, hpd2); 5317 WREG32(DC_HPD3_INT_CONTROL, hpd3); 5318 WREG32(DC_HPD4_INT_CONTROL, hpd4); 5319 WREG32(DC_HPD5_INT_CONTROL, hpd5); 5320 WREG32(DC_HPD6_INT_CONTROL, hpd6); 5321 5322 return 0; 5323 } 5324 5325 /** 5326 * cik_irq_ack - ack interrupt sources 5327 * 5328 * @rdev: radeon_device pointer 5329 * 5330 * Ack interrupt sources on the GPU (vblanks, hpd, 5331 * etc.) (CIK). Certain interrupts sources are sw 5332 * generated and do not require an explicit ack. 5333 */ 5334 static inline void cik_irq_ack(struct radeon_device *rdev) 5335 { 5336 u32 tmp; 5337 5338 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 5339 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 5340 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 5341 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 5342 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 5343 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 5344 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 5345 5346 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 5347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 5348 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 5349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 5350 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 5351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 5352 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 5353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 5354 5355 if (rdev->num_crtc >= 4) { 5356 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 5357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 5358 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 5359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 5360 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 5361 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 5362 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 5363 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 5364 } 5365 5366 if (rdev->num_crtc >= 6) { 5367 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 5368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 5369 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 5370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 5371 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 5372 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 5373 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 5374 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 5375 } 5376 5377 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 5378 tmp = RREG32(DC_HPD1_INT_CONTROL); 5379 tmp |= DC_HPDx_INT_ACK; 5380 WREG32(DC_HPD1_INT_CONTROL, tmp); 5381 } 5382 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 5383 tmp = RREG32(DC_HPD2_INT_CONTROL); 5384 tmp |= DC_HPDx_INT_ACK; 5385 WREG32(DC_HPD2_INT_CONTROL, tmp); 5386 } 5387 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 5388 tmp = RREG32(DC_HPD3_INT_CONTROL); 5389 tmp |= DC_HPDx_INT_ACK; 5390 WREG32(DC_HPD3_INT_CONTROL, tmp); 5391 } 5392 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 5393 tmp = RREG32(DC_HPD4_INT_CONTROL); 5394 tmp |= DC_HPDx_INT_ACK; 5395 WREG32(DC_HPD4_INT_CONTROL, tmp); 5396 } 5397 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 5398 tmp = RREG32(DC_HPD5_INT_CONTROL); 5399 tmp |= DC_HPDx_INT_ACK; 5400 WREG32(DC_HPD5_INT_CONTROL, tmp); 5401 } 5402 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 5403 tmp = RREG32(DC_HPD5_INT_CONTROL); 5404 tmp |= DC_HPDx_INT_ACK; 5405 WREG32(DC_HPD6_INT_CONTROL, tmp); 5406 } 5407 } 5408 5409 /** 5410 * cik_irq_disable - disable interrupts 5411 * 5412 * @rdev: radeon_device pointer 5413 * 5414 * Disable interrupts on the hw (CIK). 5415 */ 5416 static void cik_irq_disable(struct radeon_device *rdev) 5417 { 5418 cik_disable_interrupts(rdev); 5419 /* Wait and acknowledge irq */ 5420 mdelay(1); 5421 cik_irq_ack(rdev); 5422 cik_disable_interrupt_state(rdev); 5423 } 5424 5425 /** 5426 * cik_irq_disable - disable interrupts for suspend 5427 * 5428 * @rdev: radeon_device pointer 5429 * 5430 * Disable interrupts and stop the RLC (CIK). 5431 * Used for suspend. 5432 */ 5433 static void cik_irq_suspend(struct radeon_device *rdev) 5434 { 5435 cik_irq_disable(rdev); 5436 cik_rlc_stop(rdev); 5437 } 5438 5439 /** 5440 * cik_irq_fini - tear down interrupt support 5441 * 5442 * @rdev: radeon_device pointer 5443 * 5444 * Disable interrupts on the hw and free the IH ring 5445 * buffer (CIK). 5446 * Used for driver unload. 5447 */ 5448 static void cik_irq_fini(struct radeon_device *rdev) 5449 { 5450 cik_irq_suspend(rdev); 5451 r600_ih_ring_fini(rdev); 5452 } 5453 5454 /** 5455 * cik_get_ih_wptr - get the IH ring buffer wptr 5456 * 5457 * @rdev: radeon_device pointer 5458 * 5459 * Get the IH ring buffer wptr from either the register 5460 * or the writeback memory buffer (CIK). Also check for 5461 * ring buffer overflow and deal with it. 5462 * Used by cik_irq_process(). 5463 * Returns the value of the wptr. 5464 */ 5465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 5466 { 5467 u32 wptr, tmp; 5468 5469 if (rdev->wb.enabled) 5470 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 5471 else 5472 wptr = RREG32(IH_RB_WPTR); 5473 5474 if (wptr & RB_OVERFLOW) { 5475 /* When a ring buffer overflow happen start parsing interrupt 5476 * from the last not overwritten vector (wptr + 16). Hopefully 5477 * this should allow us to catchup. 5478 */ 5479 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", 5480 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); 5481 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 5482 tmp = RREG32(IH_RB_CNTL); 5483 tmp |= IH_WPTR_OVERFLOW_CLEAR; 5484 WREG32(IH_RB_CNTL, tmp); 5485 } 5486 return (wptr & rdev->ih.ptr_mask); 5487 } 5488 5489 /* CIK IV Ring 5490 * Each IV ring entry is 128 bits: 5491 * [7:0] - interrupt source id 5492 * [31:8] - reserved 5493 * [59:32] - interrupt source data 5494 * [63:60] - reserved 5495 * [71:64] - RINGID 5496 * CP: 5497 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 5498 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 5499 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 5500 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 5501 * PIPE_ID - ME0 0=3D 5502 * - ME1&2 compute dispatcher (4 pipes each) 5503 * SDMA: 5504 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 5505 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 5506 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 5507 * [79:72] - VMID 5508 * [95:80] - PASID 5509 * [127:96] - reserved 5510 */ 5511 /** 5512 * cik_irq_process - interrupt handler 5513 * 5514 * @rdev: radeon_device pointer 5515 * 5516 * Interrupt hander (CIK). Walk the IH ring, 5517 * ack interrupts and schedule work to handle 5518 * interrupt events. 5519 * Returns irq process return code. 5520 */ 5521 int cik_irq_process(struct radeon_device *rdev) 5522 { 5523 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 5524 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 5525 u32 wptr; 5526 u32 rptr; 5527 u32 src_id, src_data, ring_id; 5528 u8 me_id, pipe_id, queue_id; 5529 u32 ring_index; 5530 bool queue_hotplug = false; 5531 bool queue_reset = false; 5532 u32 addr, status, mc_client; 5533 5534 if (!rdev->ih.enabled || rdev->shutdown) 5535 return IRQ_NONE; 5536 5537 wptr = cik_get_ih_wptr(rdev); 5538 5539 restart_ih: 5540 /* is somebody else already processing irqs? */ 5541 if (atomic_xchg(&rdev->ih.lock, 1)) 5542 return IRQ_NONE; 5543 5544 rptr = rdev->ih.rptr; 5545 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 5546 5547 /* Order reading of wptr vs. reading of IH ring data */ 5548 rmb(); 5549 5550 /* display interrupts */ 5551 cik_irq_ack(rdev); 5552 5553 while (rptr != wptr) { 5554 /* wptr/rptr are in bytes! */ 5555 ring_index = rptr / 4; 5556 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 5557 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 5558 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 5559 5560 switch (src_id) { 5561 case 1: /* D1 vblank/vline */ 5562 switch (src_data) { 5563 case 0: /* D1 vblank */ 5564 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { 5565 if (rdev->irq.crtc_vblank_int[0]) { 5566 drm_handle_vblank(rdev->ddev, 0); 5567 rdev->pm.vblank_sync = true; 5568 wake_up(&rdev->irq.vblank_queue); 5569 } 5570 if (atomic_read(&rdev->irq.pflip[0])) 5571 radeon_crtc_handle_flip(rdev, 0); 5572 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 5573 DRM_DEBUG("IH: D1 vblank\n"); 5574 } 5575 break; 5576 case 1: /* D1 vline */ 5577 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { 5578 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 5579 DRM_DEBUG("IH: D1 vline\n"); 5580 } 5581 break; 5582 default: 5583 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5584 break; 5585 } 5586 break; 5587 case 2: /* D2 vblank/vline */ 5588 switch (src_data) { 5589 case 0: /* D2 vblank */ 5590 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { 5591 if (rdev->irq.crtc_vblank_int[1]) { 5592 drm_handle_vblank(rdev->ddev, 1); 5593 rdev->pm.vblank_sync = true; 5594 wake_up(&rdev->irq.vblank_queue); 5595 } 5596 if (atomic_read(&rdev->irq.pflip[1])) 5597 radeon_crtc_handle_flip(rdev, 1); 5598 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 5599 DRM_DEBUG("IH: D2 vblank\n"); 5600 } 5601 break; 5602 case 1: /* D2 vline */ 5603 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { 5604 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 5605 DRM_DEBUG("IH: D2 vline\n"); 5606 } 5607 break; 5608 default: 5609 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5610 break; 5611 } 5612 break; 5613 case 3: /* D3 vblank/vline */ 5614 switch (src_data) { 5615 case 0: /* D3 vblank */ 5616 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { 5617 if (rdev->irq.crtc_vblank_int[2]) { 5618 drm_handle_vblank(rdev->ddev, 2); 5619 rdev->pm.vblank_sync = true; 5620 wake_up(&rdev->irq.vblank_queue); 5621 } 5622 if (atomic_read(&rdev->irq.pflip[2])) 5623 radeon_crtc_handle_flip(rdev, 2); 5624 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 5625 DRM_DEBUG("IH: D3 vblank\n"); 5626 } 5627 break; 5628 case 1: /* D3 vline */ 5629 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { 5630 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 5631 DRM_DEBUG("IH: D3 vline\n"); 5632 } 5633 break; 5634 default: 5635 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5636 break; 5637 } 5638 break; 5639 case 4: /* D4 vblank/vline */ 5640 switch (src_data) { 5641 case 0: /* D4 vblank */ 5642 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { 5643 if (rdev->irq.crtc_vblank_int[3]) { 5644 drm_handle_vblank(rdev->ddev, 3); 5645 rdev->pm.vblank_sync = true; 5646 wake_up(&rdev->irq.vblank_queue); 5647 } 5648 if (atomic_read(&rdev->irq.pflip[3])) 5649 radeon_crtc_handle_flip(rdev, 3); 5650 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 5651 DRM_DEBUG("IH: D4 vblank\n"); 5652 } 5653 break; 5654 case 1: /* D4 vline */ 5655 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { 5656 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 5657 DRM_DEBUG("IH: D4 vline\n"); 5658 } 5659 break; 5660 default: 5661 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5662 break; 5663 } 5664 break; 5665 case 5: /* D5 vblank/vline */ 5666 switch (src_data) { 5667 case 0: /* D5 vblank */ 5668 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { 5669 if (rdev->irq.crtc_vblank_int[4]) { 5670 drm_handle_vblank(rdev->ddev, 4); 5671 rdev->pm.vblank_sync = true; 5672 wake_up(&rdev->irq.vblank_queue); 5673 } 5674 if (atomic_read(&rdev->irq.pflip[4])) 5675 radeon_crtc_handle_flip(rdev, 4); 5676 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 5677 DRM_DEBUG("IH: D5 vblank\n"); 5678 } 5679 break; 5680 case 1: /* D5 vline */ 5681 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { 5682 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 5683 DRM_DEBUG("IH: D5 vline\n"); 5684 } 5685 break; 5686 default: 5687 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5688 break; 5689 } 5690 break; 5691 case 6: /* D6 vblank/vline */ 5692 switch (src_data) { 5693 case 0: /* D6 vblank */ 5694 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { 5695 if (rdev->irq.crtc_vblank_int[5]) { 5696 drm_handle_vblank(rdev->ddev, 5); 5697 rdev->pm.vblank_sync = true; 5698 wake_up(&rdev->irq.vblank_queue); 5699 } 5700 if (atomic_read(&rdev->irq.pflip[5])) 5701 radeon_crtc_handle_flip(rdev, 5); 5702 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 5703 DRM_DEBUG("IH: D6 vblank\n"); 5704 } 5705 break; 5706 case 1: /* D6 vline */ 5707 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { 5708 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 5709 DRM_DEBUG("IH: D6 vline\n"); 5710 } 5711 break; 5712 default: 5713 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5714 break; 5715 } 5716 break; 5717 case 42: /* HPD hotplug */ 5718 switch (src_data) { 5719 case 0: 5720 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 5721 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 5722 queue_hotplug = true; 5723 DRM_DEBUG("IH: HPD1\n"); 5724 } 5725 break; 5726 case 1: 5727 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 5728 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 5729 queue_hotplug = true; 5730 DRM_DEBUG("IH: HPD2\n"); 5731 } 5732 break; 5733 case 2: 5734 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 5735 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 5736 queue_hotplug = true; 5737 DRM_DEBUG("IH: HPD3\n"); 5738 } 5739 break; 5740 case 3: 5741 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 5742 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 5743 queue_hotplug = true; 5744 DRM_DEBUG("IH: HPD4\n"); 5745 } 5746 break; 5747 case 4: 5748 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 5749 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 5750 queue_hotplug = true; 5751 DRM_DEBUG("IH: HPD5\n"); 5752 } 5753 break; 5754 case 5: 5755 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 5756 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 5757 queue_hotplug = true; 5758 DRM_DEBUG("IH: HPD6\n"); 5759 } 5760 break; 5761 default: 5762 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5763 break; 5764 } 5765 break; 5766 case 146: 5767 case 147: 5768 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 5769 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 5770 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 5771 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 5772 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 5773 addr); 5774 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 5775 status); 5776 cik_vm_decode_fault(rdev, status, addr, mc_client); 5777 /* reset addr and status */ 5778 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 5779 break; 5780 case 176: /* GFX RB CP_INT */ 5781 case 177: /* GFX IB CP_INT */ 5782 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 5783 break; 5784 case 181: /* CP EOP event */ 5785 DRM_DEBUG("IH: CP EOP\n"); 5786 /* XXX check the bitfield order! */ 5787 me_id = (ring_id & 0x60) >> 5; 5788 pipe_id = (ring_id & 0x18) >> 3; 5789 queue_id = (ring_id & 0x7) >> 0; 5790 switch (me_id) { 5791 case 0: 5792 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 5793 break; 5794 case 1: 5795 case 2: 5796 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 5797 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 5798 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 5799 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 5800 break; 5801 } 5802 break; 5803 case 184: /* CP Privileged reg access */ 5804 DRM_ERROR("Illegal register access in command stream\n"); 5805 /* XXX check the bitfield order! */ 5806 me_id = (ring_id & 0x60) >> 5; 5807 pipe_id = (ring_id & 0x18) >> 3; 5808 queue_id = (ring_id & 0x7) >> 0; 5809 switch (me_id) { 5810 case 0: 5811 /* This results in a full GPU reset, but all we need to do is soft 5812 * reset the CP for gfx 5813 */ 5814 queue_reset = true; 5815 break; 5816 case 1: 5817 /* XXX compute */ 5818 queue_reset = true; 5819 break; 5820 case 2: 5821 /* XXX compute */ 5822 queue_reset = true; 5823 break; 5824 } 5825 break; 5826 case 185: /* CP Privileged inst */ 5827 DRM_ERROR("Illegal instruction in command stream\n"); 5828 /* XXX check the bitfield order! */ 5829 me_id = (ring_id & 0x60) >> 5; 5830 pipe_id = (ring_id & 0x18) >> 3; 5831 queue_id = (ring_id & 0x7) >> 0; 5832 switch (me_id) { 5833 case 0: 5834 /* This results in a full GPU reset, but all we need to do is soft 5835 * reset the CP for gfx 5836 */ 5837 queue_reset = true; 5838 break; 5839 case 1: 5840 /* XXX compute */ 5841 queue_reset = true; 5842 break; 5843 case 2: 5844 /* XXX compute */ 5845 queue_reset = true; 5846 break; 5847 } 5848 break; 5849 case 224: /* SDMA trap event */ 5850 /* XXX check the bitfield order! */ 5851 me_id = (ring_id & 0x3) >> 0; 5852 queue_id = (ring_id & 0xc) >> 2; 5853 DRM_DEBUG("IH: SDMA trap\n"); 5854 switch (me_id) { 5855 case 0: 5856 switch (queue_id) { 5857 case 0: 5858 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 5859 break; 5860 case 1: 5861 /* XXX compute */ 5862 break; 5863 case 2: 5864 /* XXX compute */ 5865 break; 5866 } 5867 break; 5868 case 1: 5869 switch (queue_id) { 5870 case 0: 5871 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 5872 break; 5873 case 1: 5874 /* XXX compute */ 5875 break; 5876 case 2: 5877 /* XXX compute */ 5878 break; 5879 } 5880 break; 5881 } 5882 break; 5883 case 241: /* SDMA Privileged inst */ 5884 case 247: /* SDMA Privileged inst */ 5885 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 5886 /* XXX check the bitfield order! */ 5887 me_id = (ring_id & 0x3) >> 0; 5888 queue_id = (ring_id & 0xc) >> 2; 5889 switch (me_id) { 5890 case 0: 5891 switch (queue_id) { 5892 case 0: 5893 queue_reset = true; 5894 break; 5895 case 1: 5896 /* XXX compute */ 5897 queue_reset = true; 5898 break; 5899 case 2: 5900 /* XXX compute */ 5901 queue_reset = true; 5902 break; 5903 } 5904 break; 5905 case 1: 5906 switch (queue_id) { 5907 case 0: 5908 queue_reset = true; 5909 break; 5910 case 1: 5911 /* XXX compute */ 5912 queue_reset = true; 5913 break; 5914 case 2: 5915 /* XXX compute */ 5916 queue_reset = true; 5917 break; 5918 } 5919 break; 5920 } 5921 break; 5922 case 233: /* GUI IDLE */ 5923 DRM_DEBUG("IH: GUI idle\n"); 5924 break; 5925 default: 5926 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5927 break; 5928 } 5929 5930 /* wptr/rptr are in bytes! */ 5931 rptr += 16; 5932 rptr &= rdev->ih.ptr_mask; 5933 } 5934 if (queue_hotplug) 5935 schedule_work(&rdev->hotplug_work); 5936 if (queue_reset) 5937 schedule_work(&rdev->reset_work); 5938 rdev->ih.rptr = rptr; 5939 WREG32(IH_RB_RPTR, rdev->ih.rptr); 5940 atomic_set(&rdev->ih.lock, 0); 5941 5942 /* make sure wptr hasn't changed while processing */ 5943 wptr = cik_get_ih_wptr(rdev); 5944 if (wptr != rptr) 5945 goto restart_ih; 5946 5947 return IRQ_HANDLED; 5948 } 5949 5950 /* 5951 * startup/shutdown callbacks 5952 */ 5953 /** 5954 * cik_startup - program the asic to a functional state 5955 * 5956 * @rdev: radeon_device pointer 5957 * 5958 * Programs the asic to a functional state (CIK). 5959 * Called by cik_init() and cik_resume(). 5960 * Returns 0 for success, error for failure. 5961 */ 5962 static int cik_startup(struct radeon_device *rdev) 5963 { 5964 struct radeon_ring *ring; 5965 int r; 5966 5967 cik_mc_program(rdev); 5968 5969 if (rdev->flags & RADEON_IS_IGP) { 5970 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 5971 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 5972 r = cik_init_microcode(rdev); 5973 if (r) { 5974 DRM_ERROR("Failed to load firmware!\n"); 5975 return r; 5976 } 5977 } 5978 } else { 5979 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 5980 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 5981 !rdev->mc_fw) { 5982 r = cik_init_microcode(rdev); 5983 if (r) { 5984 DRM_ERROR("Failed to load firmware!\n"); 5985 return r; 5986 } 5987 } 5988 5989 r = ci_mc_load_microcode(rdev); 5990 if (r) { 5991 DRM_ERROR("Failed to load MC firmware!\n"); 5992 return r; 5993 } 5994 } 5995 5996 r = r600_vram_scratch_init(rdev); 5997 if (r) 5998 return r; 5999 6000 r = cik_pcie_gart_enable(rdev); 6001 if (r) 6002 return r; 6003 cik_gpu_init(rdev); 6004 6005 /* allocate rlc buffers */ 6006 r = si_rlc_init(rdev); 6007 if (r) { 6008 DRM_ERROR("Failed to init rlc BOs!\n"); 6009 return r; 6010 } 6011 6012 /* allocate wb buffer */ 6013 r = radeon_wb_init(rdev); 6014 if (r) 6015 return r; 6016 6017 /* allocate mec buffers */ 6018 r = cik_mec_init(rdev); 6019 if (r) { 6020 DRM_ERROR("Failed to init MEC BOs!\n"); 6021 return r; 6022 } 6023 6024 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 6025 if (r) { 6026 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 6027 return r; 6028 } 6029 6030 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 6031 if (r) { 6032 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 6033 return r; 6034 } 6035 6036 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 6037 if (r) { 6038 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 6039 return r; 6040 } 6041 6042 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 6043 if (r) { 6044 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 6045 return r; 6046 } 6047 6048 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 6049 if (r) { 6050 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 6051 return r; 6052 } 6053 6054 r = cik_uvd_resume(rdev); 6055 if (!r) { 6056 r = radeon_fence_driver_start_ring(rdev, 6057 R600_RING_TYPE_UVD_INDEX); 6058 if (r) 6059 dev_err(rdev->dev, "UVD fences init error (%d).\n", r); 6060 } 6061 if (r) 6062 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 6063 6064 /* Enable IRQ */ 6065 if (!rdev->irq.installed) { 6066 r = radeon_irq_kms_init(rdev); 6067 if (r) 6068 return r; 6069 } 6070 6071 r = cik_irq_init(rdev); 6072 if (r) { 6073 DRM_ERROR("radeon: IH init failed (%d).\n", r); 6074 radeon_irq_kms_fini(rdev); 6075 return r; 6076 } 6077 cik_irq_set(rdev); 6078 6079 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 6080 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 6081 CP_RB0_RPTR, CP_RB0_WPTR, 6082 0, 0xfffff, RADEON_CP_PACKET2); 6083 if (r) 6084 return r; 6085 6086 /* set up the compute queues */ 6087 /* type-2 packets are deprecated on MEC, use type-3 instead */ 6088 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6089 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 6090 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 6091 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF)); 6092 if (r) 6093 return r; 6094 ring->me = 1; /* first MEC */ 6095 ring->pipe = 0; /* first pipe */ 6096 ring->queue = 0; /* first queue */ 6097 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 6098 6099 /* type-2 packets are deprecated on MEC, use type-3 instead */ 6100 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6101 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 6102 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 6103 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF)); 6104 if (r) 6105 return r; 6106 /* dGPU only have 1 MEC */ 6107 ring->me = 1; /* first MEC */ 6108 ring->pipe = 0; /* first pipe */ 6109 ring->queue = 1; /* second queue */ 6110 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 6111 6112 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 6113 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 6114 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, 6115 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, 6116 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 6117 if (r) 6118 return r; 6119 6120 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 6121 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 6122 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, 6123 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, 6124 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 6125 if (r) 6126 return r; 6127 6128 r = cik_cp_resume(rdev); 6129 if (r) 6130 return r; 6131 6132 r = cik_sdma_resume(rdev); 6133 if (r) 6134 return r; 6135 6136 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 6137 if (ring->ring_size) { 6138 r = radeon_ring_init(rdev, ring, ring->ring_size, 6139 R600_WB_UVD_RPTR_OFFSET, 6140 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 6141 0, 0xfffff, RADEON_CP_PACKET2); 6142 if (!r) 6143 r = r600_uvd_init(rdev); 6144 if (r) 6145 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 6146 } 6147 6148 r = radeon_ib_pool_init(rdev); 6149 if (r) { 6150 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 6151 return r; 6152 } 6153 6154 r = radeon_vm_manager_init(rdev); 6155 if (r) { 6156 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 6157 return r; 6158 } 6159 6160 return 0; 6161 } 6162 6163 /** 6164 * cik_resume - resume the asic to a functional state 6165 * 6166 * @rdev: radeon_device pointer 6167 * 6168 * Programs the asic to a functional state (CIK). 6169 * Called at resume. 6170 * Returns 0 for success, error for failure. 6171 */ 6172 int cik_resume(struct radeon_device *rdev) 6173 { 6174 int r; 6175 6176 /* post card */ 6177 atom_asic_init(rdev->mode_info.atom_context); 6178 6179 /* init golden registers */ 6180 cik_init_golden_registers(rdev); 6181 6182 rdev->accel_working = true; 6183 r = cik_startup(rdev); 6184 if (r) { 6185 DRM_ERROR("cik startup failed on resume\n"); 6186 rdev->accel_working = false; 6187 return r; 6188 } 6189 6190 return r; 6191 6192 } 6193 6194 /** 6195 * cik_suspend - suspend the asic 6196 * 6197 * @rdev: radeon_device pointer 6198 * 6199 * Bring the chip into a state suitable for suspend (CIK). 6200 * Called at suspend. 6201 * Returns 0 for success. 6202 */ 6203 int cik_suspend(struct radeon_device *rdev) 6204 { 6205 radeon_vm_manager_fini(rdev); 6206 cik_cp_enable(rdev, false); 6207 cik_sdma_enable(rdev, false); 6208 r600_uvd_stop(rdev); 6209 radeon_uvd_suspend(rdev); 6210 cik_irq_suspend(rdev); 6211 radeon_wb_disable(rdev); 6212 cik_pcie_gart_disable(rdev); 6213 return 0; 6214 } 6215 6216 /* Plan is to move initialization in that function and use 6217 * helper function so that radeon_device_init pretty much 6218 * do nothing more than calling asic specific function. This 6219 * should also allow to remove a bunch of callback function 6220 * like vram_info. 6221 */ 6222 /** 6223 * cik_init - asic specific driver and hw init 6224 * 6225 * @rdev: radeon_device pointer 6226 * 6227 * Setup asic specific driver variables and program the hw 6228 * to a functional state (CIK). 6229 * Called at driver startup. 6230 * Returns 0 for success, errors for failure. 6231 */ 6232 int cik_init(struct radeon_device *rdev) 6233 { 6234 struct radeon_ring *ring; 6235 int r; 6236 6237 /* Read BIOS */ 6238 if (!radeon_get_bios(rdev)) { 6239 if (ASIC_IS_AVIVO(rdev)) 6240 return -EINVAL; 6241 } 6242 /* Must be an ATOMBIOS */ 6243 if (!rdev->is_atom_bios) { 6244 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 6245 return -EINVAL; 6246 } 6247 r = radeon_atombios_init(rdev); 6248 if (r) 6249 return r; 6250 6251 /* Post card if necessary */ 6252 if (!radeon_card_posted(rdev)) { 6253 if (!rdev->bios) { 6254 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 6255 return -EINVAL; 6256 } 6257 DRM_INFO("GPU not posted. posting now...\n"); 6258 atom_asic_init(rdev->mode_info.atom_context); 6259 } 6260 /* init golden registers */ 6261 cik_init_golden_registers(rdev); 6262 /* Initialize scratch registers */ 6263 cik_scratch_init(rdev); 6264 /* Initialize surface registers */ 6265 radeon_surface_init(rdev); 6266 /* Initialize clocks */ 6267 radeon_get_clock_info(rdev->ddev); 6268 6269 /* Fence driver */ 6270 r = radeon_fence_driver_init(rdev); 6271 if (r) 6272 return r; 6273 6274 /* initialize memory controller */ 6275 r = cik_mc_init(rdev); 6276 if (r) 6277 return r; 6278 /* Memory manager */ 6279 r = radeon_bo_init(rdev); 6280 if (r) 6281 return r; 6282 6283 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 6284 ring->ring_obj = NULL; 6285 r600_ring_init(rdev, ring, 1024 * 1024); 6286 6287 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6288 ring->ring_obj = NULL; 6289 r600_ring_init(rdev, ring, 1024 * 1024); 6290 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 6291 if (r) 6292 return r; 6293 6294 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6295 ring->ring_obj = NULL; 6296 r600_ring_init(rdev, ring, 1024 * 1024); 6297 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 6298 if (r) 6299 return r; 6300 6301 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 6302 ring->ring_obj = NULL; 6303 r600_ring_init(rdev, ring, 256 * 1024); 6304 6305 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 6306 ring->ring_obj = NULL; 6307 r600_ring_init(rdev, ring, 256 * 1024); 6308 6309 r = radeon_uvd_init(rdev); 6310 if (!r) { 6311 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 6312 ring->ring_obj = NULL; 6313 r600_ring_init(rdev, ring, 4096); 6314 } 6315 6316 rdev->ih.ring_obj = NULL; 6317 r600_ih_ring_init(rdev, 64 * 1024); 6318 6319 r = r600_pcie_gart_init(rdev); 6320 if (r) 6321 return r; 6322 6323 rdev->accel_working = true; 6324 r = cik_startup(rdev); 6325 if (r) { 6326 dev_err(rdev->dev, "disabling GPU acceleration\n"); 6327 cik_cp_fini(rdev); 6328 cik_sdma_fini(rdev); 6329 cik_irq_fini(rdev); 6330 si_rlc_fini(rdev); 6331 cik_mec_fini(rdev); 6332 radeon_wb_fini(rdev); 6333 radeon_ib_pool_fini(rdev); 6334 radeon_vm_manager_fini(rdev); 6335 radeon_irq_kms_fini(rdev); 6336 cik_pcie_gart_fini(rdev); 6337 rdev->accel_working = false; 6338 } 6339 6340 /* Don't start up if the MC ucode is missing. 6341 * The default clocks and voltages before the MC ucode 6342 * is loaded are not suffient for advanced operations. 6343 */ 6344 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 6345 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 6346 return -EINVAL; 6347 } 6348 6349 return 0; 6350 } 6351 6352 /** 6353 * cik_fini - asic specific driver and hw fini 6354 * 6355 * @rdev: radeon_device pointer 6356 * 6357 * Tear down the asic specific driver variables and program the hw 6358 * to an idle state (CIK). 6359 * Called at driver unload. 6360 */ 6361 void cik_fini(struct radeon_device *rdev) 6362 { 6363 cik_cp_fini(rdev); 6364 cik_sdma_fini(rdev); 6365 cik_irq_fini(rdev); 6366 si_rlc_fini(rdev); 6367 cik_mec_fini(rdev); 6368 radeon_wb_fini(rdev); 6369 radeon_vm_manager_fini(rdev); 6370 radeon_ib_pool_fini(rdev); 6371 radeon_irq_kms_fini(rdev); 6372 r600_uvd_stop(rdev); 6373 radeon_uvd_fini(rdev); 6374 cik_pcie_gart_fini(rdev); 6375 r600_vram_scratch_fini(rdev); 6376 radeon_gem_fini(rdev); 6377 radeon_fence_driver_fini(rdev); 6378 radeon_bo_fini(rdev); 6379 radeon_atombios_fini(rdev); 6380 kfree(rdev->bios); 6381 rdev->bios = NULL; 6382 } 6383 6384 /* display watermark setup */ 6385 /** 6386 * dce8_line_buffer_adjust - Set up the line buffer 6387 * 6388 * @rdev: radeon_device pointer 6389 * @radeon_crtc: the selected display controller 6390 * @mode: the current display mode on the selected display 6391 * controller 6392 * 6393 * Setup up the line buffer allocation for 6394 * the selected display controller (CIK). 6395 * Returns the line buffer size in pixels. 6396 */ 6397 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 6398 struct radeon_crtc *radeon_crtc, 6399 struct drm_display_mode *mode) 6400 { 6401 u32 tmp; 6402 6403 /* 6404 * Line Buffer Setup 6405 * There are 6 line buffers, one for each display controllers. 6406 * There are 3 partitions per LB. Select the number of partitions 6407 * to enable based on the display width. For display widths larger 6408 * than 4096, you need use to use 2 display controllers and combine 6409 * them using the stereo blender. 6410 */ 6411 if (radeon_crtc->base.enabled && mode) { 6412 if (mode->crtc_hdisplay < 1920) 6413 tmp = 1; 6414 else if (mode->crtc_hdisplay < 2560) 6415 tmp = 2; 6416 else if (mode->crtc_hdisplay < 4096) 6417 tmp = 0; 6418 else { 6419 DRM_DEBUG_KMS("Mode too big for LB!\n"); 6420 tmp = 0; 6421 } 6422 } else 6423 tmp = 1; 6424 6425 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 6426 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 6427 6428 if (radeon_crtc->base.enabled && mode) { 6429 switch (tmp) { 6430 case 0: 6431 default: 6432 return 4096 * 2; 6433 case 1: 6434 return 1920 * 2; 6435 case 2: 6436 return 2560 * 2; 6437 } 6438 } 6439 6440 /* controller not enabled, so no lb used */ 6441 return 0; 6442 } 6443 6444 /** 6445 * cik_get_number_of_dram_channels - get the number of dram channels 6446 * 6447 * @rdev: radeon_device pointer 6448 * 6449 * Look up the number of video ram channels (CIK). 6450 * Used for display watermark bandwidth calculations 6451 * Returns the number of dram channels 6452 */ 6453 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 6454 { 6455 u32 tmp = RREG32(MC_SHARED_CHMAP); 6456 6457 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 6458 case 0: 6459 default: 6460 return 1; 6461 case 1: 6462 return 2; 6463 case 2: 6464 return 4; 6465 case 3: 6466 return 8; 6467 case 4: 6468 return 3; 6469 case 5: 6470 return 6; 6471 case 6: 6472 return 10; 6473 case 7: 6474 return 12; 6475 case 8: 6476 return 16; 6477 } 6478 } 6479 6480 struct dce8_wm_params { 6481 u32 dram_channels; /* number of dram channels */ 6482 u32 yclk; /* bandwidth per dram data pin in kHz */ 6483 u32 sclk; /* engine clock in kHz */ 6484 u32 disp_clk; /* display clock in kHz */ 6485 u32 src_width; /* viewport width */ 6486 u32 active_time; /* active display time in ns */ 6487 u32 blank_time; /* blank time in ns */ 6488 bool interlaced; /* mode is interlaced */ 6489 fixed20_12 vsc; /* vertical scale ratio */ 6490 u32 num_heads; /* number of active crtcs */ 6491 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 6492 u32 lb_size; /* line buffer allocated to pipe */ 6493 u32 vtaps; /* vertical scaler taps */ 6494 }; 6495 6496 /** 6497 * dce8_dram_bandwidth - get the dram bandwidth 6498 * 6499 * @wm: watermark calculation data 6500 * 6501 * Calculate the raw dram bandwidth (CIK). 6502 * Used for display watermark bandwidth calculations 6503 * Returns the dram bandwidth in MBytes/s 6504 */ 6505 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 6506 { 6507 /* Calculate raw DRAM Bandwidth */ 6508 fixed20_12 dram_efficiency; /* 0.7 */ 6509 fixed20_12 yclk, dram_channels, bandwidth; 6510 fixed20_12 a; 6511 6512 a.full = dfixed_const(1000); 6513 yclk.full = dfixed_const(wm->yclk); 6514 yclk.full = dfixed_div(yclk, a); 6515 dram_channels.full = dfixed_const(wm->dram_channels * 4); 6516 a.full = dfixed_const(10); 6517 dram_efficiency.full = dfixed_const(7); 6518 dram_efficiency.full = dfixed_div(dram_efficiency, a); 6519 bandwidth.full = dfixed_mul(dram_channels, yclk); 6520 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 6521 6522 return dfixed_trunc(bandwidth); 6523 } 6524 6525 /** 6526 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 6527 * 6528 * @wm: watermark calculation data 6529 * 6530 * Calculate the dram bandwidth used for display (CIK). 6531 * Used for display watermark bandwidth calculations 6532 * Returns the dram bandwidth for display in MBytes/s 6533 */ 6534 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 6535 { 6536 /* Calculate DRAM Bandwidth and the part allocated to display. */ 6537 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 6538 fixed20_12 yclk, dram_channels, bandwidth; 6539 fixed20_12 a; 6540 6541 a.full = dfixed_const(1000); 6542 yclk.full = dfixed_const(wm->yclk); 6543 yclk.full = dfixed_div(yclk, a); 6544 dram_channels.full = dfixed_const(wm->dram_channels * 4); 6545 a.full = dfixed_const(10); 6546 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 6547 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 6548 bandwidth.full = dfixed_mul(dram_channels, yclk); 6549 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 6550 6551 return dfixed_trunc(bandwidth); 6552 } 6553 6554 /** 6555 * dce8_data_return_bandwidth - get the data return bandwidth 6556 * 6557 * @wm: watermark calculation data 6558 * 6559 * Calculate the data return bandwidth used for display (CIK). 6560 * Used for display watermark bandwidth calculations 6561 * Returns the data return bandwidth in MBytes/s 6562 */ 6563 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 6564 { 6565 /* Calculate the display Data return Bandwidth */ 6566 fixed20_12 return_efficiency; /* 0.8 */ 6567 fixed20_12 sclk, bandwidth; 6568 fixed20_12 a; 6569 6570 a.full = dfixed_const(1000); 6571 sclk.full = dfixed_const(wm->sclk); 6572 sclk.full = dfixed_div(sclk, a); 6573 a.full = dfixed_const(10); 6574 return_efficiency.full = dfixed_const(8); 6575 return_efficiency.full = dfixed_div(return_efficiency, a); 6576 a.full = dfixed_const(32); 6577 bandwidth.full = dfixed_mul(a, sclk); 6578 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 6579 6580 return dfixed_trunc(bandwidth); 6581 } 6582 6583 /** 6584 * dce8_dmif_request_bandwidth - get the dmif bandwidth 6585 * 6586 * @wm: watermark calculation data 6587 * 6588 * Calculate the dmif bandwidth used for display (CIK). 6589 * Used for display watermark bandwidth calculations 6590 * Returns the dmif bandwidth in MBytes/s 6591 */ 6592 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 6593 { 6594 /* Calculate the DMIF Request Bandwidth */ 6595 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 6596 fixed20_12 disp_clk, bandwidth; 6597 fixed20_12 a, b; 6598 6599 a.full = dfixed_const(1000); 6600 disp_clk.full = dfixed_const(wm->disp_clk); 6601 disp_clk.full = dfixed_div(disp_clk, a); 6602 a.full = dfixed_const(32); 6603 b.full = dfixed_mul(a, disp_clk); 6604 6605 a.full = dfixed_const(10); 6606 disp_clk_request_efficiency.full = dfixed_const(8); 6607 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 6608 6609 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 6610 6611 return dfixed_trunc(bandwidth); 6612 } 6613 6614 /** 6615 * dce8_available_bandwidth - get the min available bandwidth 6616 * 6617 * @wm: watermark calculation data 6618 * 6619 * Calculate the min available bandwidth used for display (CIK). 6620 * Used for display watermark bandwidth calculations 6621 * Returns the min available bandwidth in MBytes/s 6622 */ 6623 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 6624 { 6625 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 6626 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 6627 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 6628 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 6629 6630 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 6631 } 6632 6633 /** 6634 * dce8_average_bandwidth - get the average available bandwidth 6635 * 6636 * @wm: watermark calculation data 6637 * 6638 * Calculate the average available bandwidth used for display (CIK). 6639 * Used for display watermark bandwidth calculations 6640 * Returns the average available bandwidth in MBytes/s 6641 */ 6642 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 6643 { 6644 /* Calculate the display mode Average Bandwidth 6645 * DisplayMode should contain the source and destination dimensions, 6646 * timing, etc. 6647 */ 6648 fixed20_12 bpp; 6649 fixed20_12 line_time; 6650 fixed20_12 src_width; 6651 fixed20_12 bandwidth; 6652 fixed20_12 a; 6653 6654 a.full = dfixed_const(1000); 6655 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 6656 line_time.full = dfixed_div(line_time, a); 6657 bpp.full = dfixed_const(wm->bytes_per_pixel); 6658 src_width.full = dfixed_const(wm->src_width); 6659 bandwidth.full = dfixed_mul(src_width, bpp); 6660 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 6661 bandwidth.full = dfixed_div(bandwidth, line_time); 6662 6663 return dfixed_trunc(bandwidth); 6664 } 6665 6666 /** 6667 * dce8_latency_watermark - get the latency watermark 6668 * 6669 * @wm: watermark calculation data 6670 * 6671 * Calculate the latency watermark (CIK). 6672 * Used for display watermark bandwidth calculations 6673 * Returns the latency watermark in ns 6674 */ 6675 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 6676 { 6677 /* First calculate the latency in ns */ 6678 u32 mc_latency = 2000; /* 2000 ns. */ 6679 u32 available_bandwidth = dce8_available_bandwidth(wm); 6680 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 6681 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 6682 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 6683 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 6684 (wm->num_heads * cursor_line_pair_return_time); 6685 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 6686 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 6687 u32 tmp, dmif_size = 12288; 6688 fixed20_12 a, b, c; 6689 6690 if (wm->num_heads == 0) 6691 return 0; 6692 6693 a.full = dfixed_const(2); 6694 b.full = dfixed_const(1); 6695 if ((wm->vsc.full > a.full) || 6696 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 6697 (wm->vtaps >= 5) || 6698 ((wm->vsc.full >= a.full) && wm->interlaced)) 6699 max_src_lines_per_dst_line = 4; 6700 else 6701 max_src_lines_per_dst_line = 2; 6702 6703 a.full = dfixed_const(available_bandwidth); 6704 b.full = dfixed_const(wm->num_heads); 6705 a.full = dfixed_div(a, b); 6706 6707 b.full = dfixed_const(mc_latency + 512); 6708 c.full = dfixed_const(wm->disp_clk); 6709 b.full = dfixed_div(b, c); 6710 6711 c.full = dfixed_const(dmif_size); 6712 b.full = dfixed_div(c, b); 6713 6714 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 6715 6716 b.full = dfixed_const(1000); 6717 c.full = dfixed_const(wm->disp_clk); 6718 b.full = dfixed_div(c, b); 6719 c.full = dfixed_const(wm->bytes_per_pixel); 6720 b.full = dfixed_mul(b, c); 6721 6722 lb_fill_bw = min(tmp, dfixed_trunc(b)); 6723 6724 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 6725 b.full = dfixed_const(1000); 6726 c.full = dfixed_const(lb_fill_bw); 6727 b.full = dfixed_div(c, b); 6728 a.full = dfixed_div(a, b); 6729 line_fill_time = dfixed_trunc(a); 6730 6731 if (line_fill_time < wm->active_time) 6732 return latency; 6733 else 6734 return latency + (line_fill_time - wm->active_time); 6735 6736 } 6737 6738 /** 6739 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 6740 * average and available dram bandwidth 6741 * 6742 * @wm: watermark calculation data 6743 * 6744 * Check if the display average bandwidth fits in the display 6745 * dram bandwidth (CIK). 6746 * Used for display watermark bandwidth calculations 6747 * Returns true if the display fits, false if not. 6748 */ 6749 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 6750 { 6751 if (dce8_average_bandwidth(wm) <= 6752 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 6753 return true; 6754 else 6755 return false; 6756 } 6757 6758 /** 6759 * dce8_average_bandwidth_vs_available_bandwidth - check 6760 * average and available bandwidth 6761 * 6762 * @wm: watermark calculation data 6763 * 6764 * Check if the display average bandwidth fits in the display 6765 * available bandwidth (CIK). 6766 * Used for display watermark bandwidth calculations 6767 * Returns true if the display fits, false if not. 6768 */ 6769 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 6770 { 6771 if (dce8_average_bandwidth(wm) <= 6772 (dce8_available_bandwidth(wm) / wm->num_heads)) 6773 return true; 6774 else 6775 return false; 6776 } 6777 6778 /** 6779 * dce8_check_latency_hiding - check latency hiding 6780 * 6781 * @wm: watermark calculation data 6782 * 6783 * Check latency hiding (CIK). 6784 * Used for display watermark bandwidth calculations 6785 * Returns true if the display fits, false if not. 6786 */ 6787 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 6788 { 6789 u32 lb_partitions = wm->lb_size / wm->src_width; 6790 u32 line_time = wm->active_time + wm->blank_time; 6791 u32 latency_tolerant_lines; 6792 u32 latency_hiding; 6793 fixed20_12 a; 6794 6795 a.full = dfixed_const(1); 6796 if (wm->vsc.full > a.full) 6797 latency_tolerant_lines = 1; 6798 else { 6799 if (lb_partitions <= (wm->vtaps + 1)) 6800 latency_tolerant_lines = 1; 6801 else 6802 latency_tolerant_lines = 2; 6803 } 6804 6805 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 6806 6807 if (dce8_latency_watermark(wm) <= latency_hiding) 6808 return true; 6809 else 6810 return false; 6811 } 6812 6813 /** 6814 * dce8_program_watermarks - program display watermarks 6815 * 6816 * @rdev: radeon_device pointer 6817 * @radeon_crtc: the selected display controller 6818 * @lb_size: line buffer size 6819 * @num_heads: number of display controllers in use 6820 * 6821 * Calculate and program the display watermarks for the 6822 * selected display controller (CIK). 6823 */ 6824 static void dce8_program_watermarks(struct radeon_device *rdev, 6825 struct radeon_crtc *radeon_crtc, 6826 u32 lb_size, u32 num_heads) 6827 { 6828 struct drm_display_mode *mode = &radeon_crtc->base.mode; 6829 struct dce8_wm_params wm; 6830 u32 pixel_period; 6831 u32 line_time = 0; 6832 u32 latency_watermark_a = 0, latency_watermark_b = 0; 6833 u32 tmp, wm_mask; 6834 6835 if (radeon_crtc->base.enabled && num_heads && mode) { 6836 pixel_period = 1000000 / (u32)mode->clock; 6837 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 6838 6839 wm.yclk = rdev->pm.current_mclk * 10; 6840 wm.sclk = rdev->pm.current_sclk * 10; 6841 wm.disp_clk = mode->clock; 6842 wm.src_width = mode->crtc_hdisplay; 6843 wm.active_time = mode->crtc_hdisplay * pixel_period; 6844 wm.blank_time = line_time - wm.active_time; 6845 wm.interlaced = false; 6846 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 6847 wm.interlaced = true; 6848 wm.vsc = radeon_crtc->vsc; 6849 wm.vtaps = 1; 6850 if (radeon_crtc->rmx_type != RMX_OFF) 6851 wm.vtaps = 2; 6852 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */ 6853 wm.lb_size = lb_size; 6854 wm.dram_channels = cik_get_number_of_dram_channels(rdev); 6855 wm.num_heads = num_heads; 6856 6857 /* set for high clocks */ 6858 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535); 6859 /* set for low clocks */ 6860 /* wm.yclk = low clk; wm.sclk = low clk */ 6861 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535); 6862 6863 /* possibly force display priority to high */ 6864 /* should really do this at mode validation time... */ 6865 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) || 6866 !dce8_average_bandwidth_vs_available_bandwidth(&wm) || 6867 !dce8_check_latency_hiding(&wm) || 6868 (rdev->disp_priority == 2)) { 6869 DRM_DEBUG_KMS("force priority to high\n"); 6870 } 6871 } 6872 6873 /* select wm A */ 6874 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 6875 tmp = wm_mask; 6876 tmp &= ~LATENCY_WATERMARK_MASK(3); 6877 tmp |= LATENCY_WATERMARK_MASK(1); 6878 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 6879 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 6880 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 6881 LATENCY_HIGH_WATERMARK(line_time))); 6882 /* select wm B */ 6883 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 6884 tmp &= ~LATENCY_WATERMARK_MASK(3); 6885 tmp |= LATENCY_WATERMARK_MASK(2); 6886 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 6887 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 6888 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 6889 LATENCY_HIGH_WATERMARK(line_time))); 6890 /* restore original selection */ 6891 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 6892 } 6893 6894 /** 6895 * dce8_bandwidth_update - program display watermarks 6896 * 6897 * @rdev: radeon_device pointer 6898 * 6899 * Calculate and program the display watermarks and line 6900 * buffer allocation (CIK). 6901 */ 6902 void dce8_bandwidth_update(struct radeon_device *rdev) 6903 { 6904 struct drm_display_mode *mode = NULL; 6905 u32 num_heads = 0, lb_size; 6906 int i; 6907 6908 radeon_update_display_priority(rdev); 6909 6910 for (i = 0; i < rdev->num_crtc; i++) { 6911 if (rdev->mode_info.crtcs[i]->base.enabled) 6912 num_heads++; 6913 } 6914 for (i = 0; i < rdev->num_crtc; i++) { 6915 mode = &rdev->mode_info.crtcs[i]->base.mode; 6916 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 6917 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 6918 } 6919 } 6920 6921 /** 6922 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 6923 * 6924 * @rdev: radeon_device pointer 6925 * 6926 * Fetches a GPU clock counter snapshot (SI). 6927 * Returns the 64 bit clock counter snapshot. 6928 */ 6929 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 6930 { 6931 uint64_t clock; 6932 6933 mutex_lock(&rdev->gpu_clock_mutex); 6934 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 6935 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 6936 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 6937 mutex_unlock(&rdev->gpu_clock_mutex); 6938 return clock; 6939 } 6940 6941 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 6942 u32 cntl_reg, u32 status_reg) 6943 { 6944 int r, i; 6945 struct atom_clock_dividers dividers; 6946 uint32_t tmp; 6947 6948 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 6949 clock, false, ÷rs); 6950 if (r) 6951 return r; 6952 6953 tmp = RREG32_SMC(cntl_reg); 6954 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 6955 tmp |= dividers.post_divider; 6956 WREG32_SMC(cntl_reg, tmp); 6957 6958 for (i = 0; i < 100; i++) { 6959 if (RREG32_SMC(status_reg) & DCLK_STATUS) 6960 break; 6961 mdelay(10); 6962 } 6963 if (i == 100) 6964 return -ETIMEDOUT; 6965 6966 return 0; 6967 } 6968 6969 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 6970 { 6971 int r = 0; 6972 6973 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 6974 if (r) 6975 return r; 6976 6977 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 6978 return r; 6979 } 6980 6981 int cik_uvd_resume(struct radeon_device *rdev) 6982 { 6983 uint64_t addr; 6984 uint32_t size; 6985 int r; 6986 6987 r = radeon_uvd_resume(rdev); 6988 if (r) 6989 return r; 6990 6991 /* programm the VCPU memory controller bits 0-27 */ 6992 addr = rdev->uvd.gpu_addr >> 3; 6993 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; 6994 WREG32(UVD_VCPU_CACHE_OFFSET0, addr); 6995 WREG32(UVD_VCPU_CACHE_SIZE0, size); 6996 6997 addr += size; 6998 size = RADEON_UVD_STACK_SIZE >> 3; 6999 WREG32(UVD_VCPU_CACHE_OFFSET1, addr); 7000 WREG32(UVD_VCPU_CACHE_SIZE1, size); 7001 7002 addr += size; 7003 size = RADEON_UVD_HEAP_SIZE >> 3; 7004 WREG32(UVD_VCPU_CACHE_OFFSET2, addr); 7005 WREG32(UVD_VCPU_CACHE_SIZE2, size); 7006 7007 /* bits 28-31 */ 7008 addr = (rdev->uvd.gpu_addr >> 28) & 0xF; 7009 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); 7010 7011 /* bits 32-39 */ 7012 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; 7013 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); 7014 7015 return 0; 7016 } 7017