1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * Copyright (c) 2014 The Linux Foundation. All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published by 9 * the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <linux/pm_opp.h> 21 #include "adreno_gpu.h" 22 #include "msm_gem.h" 23 #include "msm_mmu.h" 24 25 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) 26 { 27 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 28 29 switch (param) { 30 case MSM_PARAM_GPU_ID: 31 *value = adreno_gpu->info->revn; 32 return 0; 33 case MSM_PARAM_GMEM_SIZE: 34 *value = adreno_gpu->gmem; 35 return 0; 36 case MSM_PARAM_GMEM_BASE: 37 *value = 0x100000; 38 return 0; 39 case MSM_PARAM_CHIP_ID: 40 *value = adreno_gpu->rev.patchid | 41 (adreno_gpu->rev.minor << 8) | 42 (adreno_gpu->rev.major << 16) | 43 (adreno_gpu->rev.core << 24); 44 return 0; 45 case MSM_PARAM_MAX_FREQ: 46 *value = adreno_gpu->base.fast_rate; 47 return 0; 48 case MSM_PARAM_TIMESTAMP: 49 if (adreno_gpu->funcs->get_timestamp) { 50 int ret; 51 52 pm_runtime_get_sync(&gpu->pdev->dev); 53 ret = adreno_gpu->funcs->get_timestamp(gpu, value); 54 pm_runtime_put_autosuspend(&gpu->pdev->dev); 55 56 return ret; 57 } 58 return -EINVAL; 59 case MSM_PARAM_NR_RINGS: 60 *value = gpu->nr_rings; 61 return 0; 62 default: 63 DBG("%s: invalid param: %u", gpu->name, param); 64 return -EINVAL; 65 } 66 } 67 68 const struct firmware * 69 adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) 70 { 71 struct drm_device *drm = adreno_gpu->base.dev; 72 const struct firmware *fw = NULL; 73 char newname[strlen("qcom/") + strlen(fwname) + 1]; 74 int ret; 75 76 sprintf(newname, "qcom/%s", fwname); 77 78 /* 79 * Try first to load from qcom/$fwfile using a direct load (to avoid 80 * a potential timeout waiting for usermode helper) 81 */ 82 if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || 83 (adreno_gpu->fwloc == FW_LOCATION_NEW)) { 84 85 ret = request_firmware_direct(&fw, newname, drm->dev); 86 if (!ret) { 87 dev_info(drm->dev, "loaded %s from new location\n", 88 newname); 89 adreno_gpu->fwloc = FW_LOCATION_NEW; 90 return fw; 91 } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { 92 dev_err(drm->dev, "failed to load %s: %d\n", 93 newname, ret); 94 return ERR_PTR(ret); 95 } 96 } 97 98 /* 99 * Then try the legacy location without qcom/ prefix 100 */ 101 if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || 102 (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) { 103 104 ret = request_firmware_direct(&fw, fwname, drm->dev); 105 if (!ret) { 106 dev_info(drm->dev, "loaded %s from legacy location\n", 107 newname); 108 adreno_gpu->fwloc = FW_LOCATION_LEGACY; 109 return fw; 110 } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { 111 dev_err(drm->dev, "failed to load %s: %d\n", 112 fwname, ret); 113 return ERR_PTR(ret); 114 } 115 } 116 117 /* 118 * Finally fall back to request_firmware() for cases where the 119 * usermode helper is needed (I think mainly android) 120 */ 121 if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || 122 (adreno_gpu->fwloc == FW_LOCATION_HELPER)) { 123 124 ret = request_firmware(&fw, newname, drm->dev); 125 if (!ret) { 126 dev_info(drm->dev, "loaded %s with helper\n", 127 newname); 128 adreno_gpu->fwloc = FW_LOCATION_HELPER; 129 return fw; 130 } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { 131 dev_err(drm->dev, "failed to load %s: %d\n", 132 newname, ret); 133 return ERR_PTR(ret); 134 } 135 } 136 137 dev_err(drm->dev, "failed to load %s\n", fwname); 138 return ERR_PTR(-ENOENT); 139 } 140 141 static int adreno_load_fw(struct adreno_gpu *adreno_gpu) 142 { 143 const struct firmware *fw; 144 145 if (adreno_gpu->pm4) 146 return 0; 147 148 fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw); 149 if (IS_ERR(fw)) 150 return PTR_ERR(fw); 151 adreno_gpu->pm4 = fw; 152 153 fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw); 154 if (IS_ERR(fw)) { 155 release_firmware(adreno_gpu->pm4); 156 adreno_gpu->pm4 = NULL; 157 return PTR_ERR(fw); 158 } 159 adreno_gpu->pfp = fw; 160 161 return 0; 162 } 163 164 int adreno_hw_init(struct msm_gpu *gpu) 165 { 166 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 167 int ret, i; 168 169 DBG("%s", gpu->name); 170 171 ret = adreno_load_fw(adreno_gpu); 172 if (ret) 173 return ret; 174 175 for (i = 0; i < gpu->nr_rings; i++) { 176 struct msm_ringbuffer *ring = gpu->rb[i]; 177 178 if (!ring) 179 continue; 180 181 ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); 182 if (ret) { 183 ring->iova = 0; 184 dev_err(gpu->dev->dev, 185 "could not map ringbuffer %d: %d\n", i, ret); 186 return ret; 187 } 188 189 ring->cur = ring->start; 190 ring->next = ring->start; 191 192 /* reset completed fence seqno: */ 193 ring->memptrs->fence = ring->seqno; 194 ring->memptrs->rptr = 0; 195 } 196 197 /* 198 * Setup REG_CP_RB_CNTL. The same value is used across targets (with 199 * the excpetion of A430 that disables the RPTR shadow) - the cacluation 200 * for the ringbuffer size and block size is moved to msm_gpu.h for the 201 * pre-processor to deal with and the A430 variant is ORed in here 202 */ 203 adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, 204 MSM_GPU_RB_CNTL_DEFAULT | 205 (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); 206 207 /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ 208 adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, 209 REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); 210 211 if (!adreno_is_a430(adreno_gpu)) { 212 adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, 213 REG_ADRENO_CP_RB_RPTR_ADDR_HI, 214 rbmemptr(gpu->rb[0], rptr)); 215 } 216 217 return 0; 218 } 219 220 /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ 221 static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, 222 struct msm_ringbuffer *ring) 223 { 224 if (adreno_is_a430(adreno_gpu)) 225 return ring->memptrs->rptr = adreno_gpu_read( 226 adreno_gpu, REG_ADRENO_CP_RB_RPTR); 227 else 228 return ring->memptrs->rptr; 229 } 230 231 struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) 232 { 233 return gpu->rb[0]; 234 } 235 236 void adreno_recover(struct msm_gpu *gpu) 237 { 238 struct drm_device *dev = gpu->dev; 239 int ret; 240 241 // XXX pm-runtime?? we *need* the device to be off after this 242 // so maybe continuing to call ->pm_suspend/resume() is better? 243 244 gpu->funcs->pm_suspend(gpu); 245 gpu->funcs->pm_resume(gpu); 246 247 ret = msm_gpu_hw_init(gpu); 248 if (ret) { 249 dev_err(dev->dev, "gpu hw init failed: %d\n", ret); 250 /* hmm, oh well? */ 251 } 252 } 253 254 void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 255 struct msm_file_private *ctx) 256 { 257 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 258 struct msm_drm_private *priv = gpu->dev->dev_private; 259 struct msm_ringbuffer *ring = submit->ring; 260 unsigned i; 261 262 for (i = 0; i < submit->nr_cmds; i++) { 263 switch (submit->cmd[i].type) { 264 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 265 /* ignore IB-targets */ 266 break; 267 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 268 /* ignore if there has not been a ctx switch: */ 269 if (priv->lastctx == ctx) 270 break; 271 case MSM_SUBMIT_CMD_BUF: 272 OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? 273 CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); 274 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 275 OUT_RING(ring, submit->cmd[i].size); 276 OUT_PKT2(ring); 277 break; 278 } 279 } 280 281 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); 282 OUT_RING(ring, submit->seqno); 283 284 if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { 285 /* Flush HLSQ lazy updates to make sure there is nothing 286 * pending for indirect loads after the timestamp has 287 * passed: 288 */ 289 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 290 OUT_RING(ring, HLSQ_FLUSH); 291 292 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 293 OUT_RING(ring, 0x00000000); 294 } 295 296 OUT_PKT3(ring, CP_EVENT_WRITE, 3); 297 OUT_RING(ring, CACHE_FLUSH_TS); 298 OUT_RING(ring, rbmemptr(ring, fence)); 299 OUT_RING(ring, submit->seqno); 300 301 /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ 302 OUT_PKT3(ring, CP_INTERRUPT, 1); 303 OUT_RING(ring, 0x80000000); 304 305 /* Workaround for missing irq issue on 8x16/a306. Unsure if the 306 * root cause is a platform issue or some a306 quirk, but this 307 * keeps things humming along: 308 */ 309 if (adreno_is_a306(adreno_gpu)) { 310 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 311 OUT_RING(ring, 0x00000000); 312 OUT_PKT3(ring, CP_INTERRUPT, 1); 313 OUT_RING(ring, 0x80000000); 314 } 315 316 #if 0 317 if (adreno_is_a3xx(adreno_gpu)) { 318 /* Dummy set-constant to trigger context rollover */ 319 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 320 OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); 321 OUT_RING(ring, 0x00000000); 322 } 323 #endif 324 325 gpu->funcs->flush(gpu, ring); 326 } 327 328 void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 329 { 330 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 331 uint32_t wptr; 332 333 /* Copy the shadow to the actual register */ 334 ring->cur = ring->next; 335 336 /* 337 * Mask wptr value that we calculate to fit in the HW range. This is 338 * to account for the possibility that the last command fit exactly into 339 * the ringbuffer and rb->next hasn't wrapped to zero yet 340 */ 341 wptr = get_wptr(ring); 342 343 /* ensure writes to ringbuffer have hit system memory: */ 344 mb(); 345 346 adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); 347 } 348 349 bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 350 { 351 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 352 uint32_t wptr = get_wptr(ring); 353 354 /* wait for CP to drain ringbuffer: */ 355 if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) 356 return true; 357 358 /* TODO maybe we need to reset GPU here to recover from hang? */ 359 DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n", 360 gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); 361 362 return false; 363 } 364 365 #ifdef CONFIG_DEBUG_FS 366 void adreno_show(struct msm_gpu *gpu, struct seq_file *m) 367 { 368 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 369 int i; 370 371 seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", 372 adreno_gpu->info->revn, adreno_gpu->rev.core, 373 adreno_gpu->rev.major, adreno_gpu->rev.minor, 374 adreno_gpu->rev.patchid); 375 376 for (i = 0; i < gpu->nr_rings; i++) { 377 struct msm_ringbuffer *ring = gpu->rb[i]; 378 379 seq_printf(m, "rb %d: fence: %d/%d\n", i, 380 ring->memptrs->fence, ring->seqno); 381 382 seq_printf(m, " rptr: %d\n", 383 get_rptr(adreno_gpu, ring)); 384 seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); 385 } 386 387 /* dump these out in a form that can be parsed by demsm: */ 388 seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); 389 for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { 390 uint32_t start = adreno_gpu->registers[i]; 391 uint32_t end = adreno_gpu->registers[i+1]; 392 uint32_t addr; 393 394 for (addr = start; addr <= end; addr++) { 395 uint32_t val = gpu_read(gpu, addr); 396 seq_printf(m, "IO:R %08x %08x\n", addr<<2, val); 397 } 398 } 399 } 400 #endif 401 402 /* Dump common gpu status and scratch registers on any hang, to make 403 * the hangcheck logs more useful. The scratch registers seem always 404 * safe to read when GPU has hung (unlike some other regs, depending 405 * on how the GPU hung), and they are useful to match up to cmdstream 406 * dumps when debugging hangs: 407 */ 408 void adreno_dump_info(struct msm_gpu *gpu) 409 { 410 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 411 int i; 412 413 printk("revision: %d (%d.%d.%d.%d)\n", 414 adreno_gpu->info->revn, adreno_gpu->rev.core, 415 adreno_gpu->rev.major, adreno_gpu->rev.minor, 416 adreno_gpu->rev.patchid); 417 418 for (i = 0; i < gpu->nr_rings; i++) { 419 struct msm_ringbuffer *ring = gpu->rb[i]; 420 421 printk("rb %d: fence: %d/%d\n", i, 422 ring->memptrs->fence, 423 ring->seqno); 424 425 printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); 426 printk("rb wptr: %d\n", get_wptr(ring)); 427 } 428 } 429 430 /* would be nice to not have to duplicate the _show() stuff with printk(): */ 431 void adreno_dump(struct msm_gpu *gpu) 432 { 433 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 434 int i; 435 436 /* dump these out in a form that can be parsed by demsm: */ 437 printk("IO:region %s 00000000 00020000\n", gpu->name); 438 for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { 439 uint32_t start = adreno_gpu->registers[i]; 440 uint32_t end = adreno_gpu->registers[i+1]; 441 uint32_t addr; 442 443 for (addr = start; addr <= end; addr++) { 444 uint32_t val = gpu_read(gpu, addr); 445 printk("IO:R %08x %08x\n", addr<<2, val); 446 } 447 } 448 } 449 450 static uint32_t ring_freewords(struct msm_ringbuffer *ring) 451 { 452 struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); 453 uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; 454 /* Use ring->next to calculate free size */ 455 uint32_t wptr = ring->next - ring->start; 456 uint32_t rptr = get_rptr(adreno_gpu, ring); 457 return (rptr + (size - 1) - wptr) % size; 458 } 459 460 void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) 461 { 462 if (spin_until(ring_freewords(ring) >= ndwords)) 463 DRM_DEV_ERROR(ring->gpu->dev->dev, 464 "timeout waiting for space in ringbuffer %d\n", 465 ring->id); 466 } 467 468 /* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */ 469 static int adreno_get_legacy_pwrlevels(struct device *dev) 470 { 471 struct device_node *child, *node; 472 int ret; 473 474 node = of_find_compatible_node(dev->of_node, NULL, 475 "qcom,gpu-pwrlevels"); 476 if (!node) { 477 dev_err(dev, "Could not find the GPU powerlevels\n"); 478 return -ENXIO; 479 } 480 481 for_each_child_of_node(node, child) { 482 unsigned int val; 483 484 ret = of_property_read_u32(child, "qcom,gpu-freq", &val); 485 if (ret) 486 continue; 487 488 /* 489 * Skip the intentionally bogus clock value found at the bottom 490 * of most legacy frequency tables 491 */ 492 if (val != 27000000) 493 dev_pm_opp_add(dev, val, 0); 494 } 495 496 return 0; 497 } 498 499 static int adreno_get_pwrlevels(struct device *dev, 500 struct msm_gpu *gpu) 501 { 502 unsigned long freq = ULONG_MAX; 503 struct dev_pm_opp *opp; 504 int ret; 505 506 gpu->fast_rate = 0; 507 508 /* You down with OPP? */ 509 if (!of_find_property(dev->of_node, "operating-points-v2", NULL)) 510 ret = adreno_get_legacy_pwrlevels(dev); 511 else { 512 ret = dev_pm_opp_of_add_table(dev); 513 if (ret) 514 dev_err(dev, "Unable to set the OPP table\n"); 515 } 516 517 if (!ret) { 518 /* Find the fastest defined rate */ 519 opp = dev_pm_opp_find_freq_floor(dev, &freq); 520 if (!IS_ERR(opp)) { 521 gpu->fast_rate = freq; 522 dev_pm_opp_put(opp); 523 } 524 } 525 526 if (!gpu->fast_rate) { 527 dev_warn(dev, 528 "Could not find a clock rate. Using a reasonable default\n"); 529 /* Pick a suitably safe clock speed for any target */ 530 gpu->fast_rate = 200000000; 531 } 532 533 DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate); 534 535 return 0; 536 } 537 538 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, 539 struct adreno_gpu *adreno_gpu, 540 const struct adreno_gpu_funcs *funcs, int nr_rings) 541 { 542 struct adreno_platform_config *config = pdev->dev.platform_data; 543 struct msm_gpu_config adreno_gpu_config = { 0 }; 544 struct msm_gpu *gpu = &adreno_gpu->base; 545 546 adreno_gpu->funcs = funcs; 547 adreno_gpu->info = adreno_info(config->rev); 548 adreno_gpu->gmem = adreno_gpu->info->gmem; 549 adreno_gpu->revn = adreno_gpu->info->revn; 550 adreno_gpu->rev = config->rev; 551 552 adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; 553 adreno_gpu_config.irqname = "kgsl_3d0_irq"; 554 555 adreno_gpu_config.va_start = SZ_16M; 556 adreno_gpu_config.va_end = 0xffffffff; 557 558 adreno_gpu_config.nr_rings = nr_rings; 559 560 adreno_get_pwrlevels(&pdev->dev, gpu); 561 562 pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); 563 pm_runtime_use_autosuspend(&pdev->dev); 564 pm_runtime_enable(&pdev->dev); 565 566 return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, 567 adreno_gpu->info->name, &adreno_gpu_config); 568 } 569 570 void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) 571 { 572 release_firmware(adreno_gpu->pm4); 573 release_firmware(adreno_gpu->pfp); 574 575 msm_gpu_cleanup(&adreno_gpu->base); 576 } 577