1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * Copyright (c) 2014 The Linux Foundation. All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published by 9 * the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <linux/pm_opp.h> 21 #include "adreno_gpu.h" 22 #include "msm_gem.h" 23 #include "msm_mmu.h" 24 25 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) 26 { 27 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 28 29 switch (param) { 30 case MSM_PARAM_GPU_ID: 31 *value = adreno_gpu->info->revn; 32 return 0; 33 case MSM_PARAM_GMEM_SIZE: 34 *value = adreno_gpu->gmem; 35 return 0; 36 case MSM_PARAM_GMEM_BASE: 37 *value = 0x100000; 38 return 0; 39 case MSM_PARAM_CHIP_ID: 40 *value = adreno_gpu->rev.patchid | 41 (adreno_gpu->rev.minor << 8) | 42 (adreno_gpu->rev.major << 16) | 43 (adreno_gpu->rev.core << 24); 44 return 0; 45 case MSM_PARAM_MAX_FREQ: 46 *value = adreno_gpu->base.fast_rate; 47 return 0; 48 case MSM_PARAM_TIMESTAMP: 49 if (adreno_gpu->funcs->get_timestamp) { 50 int ret; 51 52 pm_runtime_get_sync(&gpu->pdev->dev); 53 ret = adreno_gpu->funcs->get_timestamp(gpu, value); 54 pm_runtime_put_autosuspend(&gpu->pdev->dev); 55 56 return ret; 57 } 58 return -EINVAL; 59 case MSM_PARAM_NR_RINGS: 60 *value = gpu->nr_rings; 61 return 0; 62 default: 63 DBG("%s: invalid param: %u", gpu->name, param); 64 return -EINVAL; 65 } 66 } 67 68 const struct firmware * 69 adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) 70 { 71 struct drm_device *drm = adreno_gpu->base.dev; 72 const struct firmware *fw = NULL; 73 char newname[strlen("qcom/") + strlen(fwname) + 1]; 74 int ret; 75 76 sprintf(newname, "qcom/%s", fwname); 77 78 /* 79 * Try first to load from qcom/$fwfile using a direct load (to avoid 80 * a potential timeout waiting for usermode helper) 81 */ 82 if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || 83 (adreno_gpu->fwloc == FW_LOCATION_NEW)) { 84 85 ret = request_firmware_direct(&fw, newname, drm->dev); 86 if (!ret) { 87 dev_info(drm->dev, "loaded %s from new location\n", 88 newname); 89 adreno_gpu->fwloc = FW_LOCATION_NEW; 90 return fw; 91 } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { 92 dev_err(drm->dev, "failed to load %s: %d\n", 93 newname, ret); 94 return ERR_PTR(ret); 95 } 96 } 97 98 /* 99 * Then try the legacy location without qcom/ prefix 100 */ 101 if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || 102 (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) { 103 104 ret = request_firmware_direct(&fw, fwname, drm->dev); 105 if (!ret) { 106 dev_info(drm->dev, "loaded %s from legacy location\n", 107 newname); 108 adreno_gpu->fwloc = FW_LOCATION_LEGACY; 109 return fw; 110 } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { 111 dev_err(drm->dev, "failed to load %s: %d\n", 112 fwname, ret); 113 return ERR_PTR(ret); 114 } 115 } 116 117 /* 118 * Finally fall back to request_firmware() for cases where the 119 * usermode helper is needed (I think mainly android) 120 */ 121 if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || 122 (adreno_gpu->fwloc == FW_LOCATION_HELPER)) { 123 124 ret = request_firmware(&fw, newname, drm->dev); 125 if (!ret) { 126 dev_info(drm->dev, "loaded %s with helper\n", 127 newname); 128 adreno_gpu->fwloc = FW_LOCATION_HELPER; 129 return fw; 130 } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { 131 dev_err(drm->dev, "failed to load %s: %d\n", 132 newname, ret); 133 return ERR_PTR(ret); 134 } 135 } 136 137 dev_err(drm->dev, "failed to load %s\n", fwname); 138 return ERR_PTR(-ENOENT); 139 } 140 141 static int adreno_load_fw(struct adreno_gpu *adreno_gpu) 142 { 143 int i; 144 145 for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) { 146 const struct firmware *fw; 147 148 if (!adreno_gpu->info->fw[i]) 149 continue; 150 151 /* Skip if the firmware has already been loaded */ 152 if (adreno_gpu->fw[i]) 153 continue; 154 155 fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->fw[i]); 156 if (IS_ERR(fw)) 157 return PTR_ERR(fw); 158 159 adreno_gpu->fw[i] = fw; 160 } 161 162 return 0; 163 } 164 165 struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, 166 const struct firmware *fw, u64 *iova) 167 { 168 struct drm_gem_object *bo; 169 void *ptr; 170 171 ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, 172 MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); 173 174 if (IS_ERR(ptr)) 175 return ERR_CAST(ptr); 176 177 memcpy(ptr, &fw->data[4], fw->size - 4); 178 179 msm_gem_put_vaddr(bo); 180 181 return bo; 182 } 183 184 int adreno_hw_init(struct msm_gpu *gpu) 185 { 186 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 187 int ret, i; 188 189 DBG("%s", gpu->name); 190 191 ret = adreno_load_fw(adreno_gpu); 192 if (ret) 193 return ret; 194 195 for (i = 0; i < gpu->nr_rings; i++) { 196 struct msm_ringbuffer *ring = gpu->rb[i]; 197 198 if (!ring) 199 continue; 200 201 ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); 202 if (ret) { 203 ring->iova = 0; 204 dev_err(gpu->dev->dev, 205 "could not map ringbuffer %d: %d\n", i, ret); 206 return ret; 207 } 208 209 ring->cur = ring->start; 210 ring->next = ring->start; 211 212 /* reset completed fence seqno: */ 213 ring->memptrs->fence = ring->seqno; 214 ring->memptrs->rptr = 0; 215 } 216 217 /* 218 * Setup REG_CP_RB_CNTL. The same value is used across targets (with 219 * the excpetion of A430 that disables the RPTR shadow) - the cacluation 220 * for the ringbuffer size and block size is moved to msm_gpu.h for the 221 * pre-processor to deal with and the A430 variant is ORed in here 222 */ 223 adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, 224 MSM_GPU_RB_CNTL_DEFAULT | 225 (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); 226 227 /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ 228 adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, 229 REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); 230 231 if (!adreno_is_a430(adreno_gpu)) { 232 adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, 233 REG_ADRENO_CP_RB_RPTR_ADDR_HI, 234 rbmemptr(gpu->rb[0], rptr)); 235 } 236 237 return 0; 238 } 239 240 /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ 241 static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, 242 struct msm_ringbuffer *ring) 243 { 244 if (adreno_is_a430(adreno_gpu)) 245 return ring->memptrs->rptr = adreno_gpu_read( 246 adreno_gpu, REG_ADRENO_CP_RB_RPTR); 247 else 248 return ring->memptrs->rptr; 249 } 250 251 struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) 252 { 253 return gpu->rb[0]; 254 } 255 256 void adreno_recover(struct msm_gpu *gpu) 257 { 258 struct drm_device *dev = gpu->dev; 259 int ret; 260 261 // XXX pm-runtime?? we *need* the device to be off after this 262 // so maybe continuing to call ->pm_suspend/resume() is better? 263 264 gpu->funcs->pm_suspend(gpu); 265 gpu->funcs->pm_resume(gpu); 266 267 ret = msm_gpu_hw_init(gpu); 268 if (ret) { 269 dev_err(dev->dev, "gpu hw init failed: %d\n", ret); 270 /* hmm, oh well? */ 271 } 272 } 273 274 void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 275 struct msm_file_private *ctx) 276 { 277 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 278 struct msm_drm_private *priv = gpu->dev->dev_private; 279 struct msm_ringbuffer *ring = submit->ring; 280 unsigned i; 281 282 for (i = 0; i < submit->nr_cmds; i++) { 283 switch (submit->cmd[i].type) { 284 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 285 /* ignore IB-targets */ 286 break; 287 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 288 /* ignore if there has not been a ctx switch: */ 289 if (priv->lastctx == ctx) 290 break; 291 case MSM_SUBMIT_CMD_BUF: 292 OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? 293 CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); 294 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 295 OUT_RING(ring, submit->cmd[i].size); 296 OUT_PKT2(ring); 297 break; 298 } 299 } 300 301 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); 302 OUT_RING(ring, submit->seqno); 303 304 if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { 305 /* Flush HLSQ lazy updates to make sure there is nothing 306 * pending for indirect loads after the timestamp has 307 * passed: 308 */ 309 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 310 OUT_RING(ring, HLSQ_FLUSH); 311 312 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 313 OUT_RING(ring, 0x00000000); 314 } 315 316 /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ 317 OUT_PKT3(ring, CP_EVENT_WRITE, 3); 318 OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); 319 OUT_RING(ring, rbmemptr(ring, fence)); 320 OUT_RING(ring, submit->seqno); 321 322 #if 0 323 if (adreno_is_a3xx(adreno_gpu)) { 324 /* Dummy set-constant to trigger context rollover */ 325 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 326 OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); 327 OUT_RING(ring, 0x00000000); 328 } 329 #endif 330 331 gpu->funcs->flush(gpu, ring); 332 } 333 334 void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 335 { 336 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 337 uint32_t wptr; 338 339 /* Copy the shadow to the actual register */ 340 ring->cur = ring->next; 341 342 /* 343 * Mask wptr value that we calculate to fit in the HW range. This is 344 * to account for the possibility that the last command fit exactly into 345 * the ringbuffer and rb->next hasn't wrapped to zero yet 346 */ 347 wptr = get_wptr(ring); 348 349 /* ensure writes to ringbuffer have hit system memory: */ 350 mb(); 351 352 adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); 353 } 354 355 bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 356 { 357 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 358 uint32_t wptr = get_wptr(ring); 359 360 /* wait for CP to drain ringbuffer: */ 361 if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) 362 return true; 363 364 /* TODO maybe we need to reset GPU here to recover from hang? */ 365 DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n", 366 gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); 367 368 return false; 369 } 370 371 #ifdef CONFIG_DEBUG_FS 372 void adreno_show(struct msm_gpu *gpu, struct seq_file *m) 373 { 374 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 375 int i; 376 377 seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", 378 adreno_gpu->info->revn, adreno_gpu->rev.core, 379 adreno_gpu->rev.major, adreno_gpu->rev.minor, 380 adreno_gpu->rev.patchid); 381 382 for (i = 0; i < gpu->nr_rings; i++) { 383 struct msm_ringbuffer *ring = gpu->rb[i]; 384 385 seq_printf(m, "rb %d: fence: %d/%d\n", i, 386 ring->memptrs->fence, ring->seqno); 387 388 seq_printf(m, " rptr: %d\n", 389 get_rptr(adreno_gpu, ring)); 390 seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); 391 } 392 393 /* dump these out in a form that can be parsed by demsm: */ 394 seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); 395 for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { 396 uint32_t start = adreno_gpu->registers[i]; 397 uint32_t end = adreno_gpu->registers[i+1]; 398 uint32_t addr; 399 400 for (addr = start; addr <= end; addr++) { 401 uint32_t val = gpu_read(gpu, addr); 402 seq_printf(m, "IO:R %08x %08x\n", addr<<2, val); 403 } 404 } 405 } 406 #endif 407 408 /* Dump common gpu status and scratch registers on any hang, to make 409 * the hangcheck logs more useful. The scratch registers seem always 410 * safe to read when GPU has hung (unlike some other regs, depending 411 * on how the GPU hung), and they are useful to match up to cmdstream 412 * dumps when debugging hangs: 413 */ 414 void adreno_dump_info(struct msm_gpu *gpu) 415 { 416 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 417 int i; 418 419 printk("revision: %d (%d.%d.%d.%d)\n", 420 adreno_gpu->info->revn, adreno_gpu->rev.core, 421 adreno_gpu->rev.major, adreno_gpu->rev.minor, 422 adreno_gpu->rev.patchid); 423 424 for (i = 0; i < gpu->nr_rings; i++) { 425 struct msm_ringbuffer *ring = gpu->rb[i]; 426 427 printk("rb %d: fence: %d/%d\n", i, 428 ring->memptrs->fence, 429 ring->seqno); 430 431 printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); 432 printk("rb wptr: %d\n", get_wptr(ring)); 433 } 434 } 435 436 /* would be nice to not have to duplicate the _show() stuff with printk(): */ 437 void adreno_dump(struct msm_gpu *gpu) 438 { 439 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 440 int i; 441 442 /* dump these out in a form that can be parsed by demsm: */ 443 printk("IO:region %s 00000000 00020000\n", gpu->name); 444 for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { 445 uint32_t start = adreno_gpu->registers[i]; 446 uint32_t end = adreno_gpu->registers[i+1]; 447 uint32_t addr; 448 449 for (addr = start; addr <= end; addr++) { 450 uint32_t val = gpu_read(gpu, addr); 451 printk("IO:R %08x %08x\n", addr<<2, val); 452 } 453 } 454 } 455 456 static uint32_t ring_freewords(struct msm_ringbuffer *ring) 457 { 458 struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); 459 uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; 460 /* Use ring->next to calculate free size */ 461 uint32_t wptr = ring->next - ring->start; 462 uint32_t rptr = get_rptr(adreno_gpu, ring); 463 return (rptr + (size - 1) - wptr) % size; 464 } 465 466 void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) 467 { 468 if (spin_until(ring_freewords(ring) >= ndwords)) 469 DRM_DEV_ERROR(ring->gpu->dev->dev, 470 "timeout waiting for space in ringbuffer %d\n", 471 ring->id); 472 } 473 474 /* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */ 475 static int adreno_get_legacy_pwrlevels(struct device *dev) 476 { 477 struct device_node *child, *node; 478 int ret; 479 480 node = of_find_compatible_node(dev->of_node, NULL, 481 "qcom,gpu-pwrlevels"); 482 if (!node) { 483 dev_err(dev, "Could not find the GPU powerlevels\n"); 484 return -ENXIO; 485 } 486 487 for_each_child_of_node(node, child) { 488 unsigned int val; 489 490 ret = of_property_read_u32(child, "qcom,gpu-freq", &val); 491 if (ret) 492 continue; 493 494 /* 495 * Skip the intentionally bogus clock value found at the bottom 496 * of most legacy frequency tables 497 */ 498 if (val != 27000000) 499 dev_pm_opp_add(dev, val, 0); 500 } 501 502 return 0; 503 } 504 505 static int adreno_get_pwrlevels(struct device *dev, 506 struct msm_gpu *gpu) 507 { 508 unsigned long freq = ULONG_MAX; 509 struct dev_pm_opp *opp; 510 int ret; 511 512 gpu->fast_rate = 0; 513 514 /* You down with OPP? */ 515 if (!of_find_property(dev->of_node, "operating-points-v2", NULL)) 516 ret = adreno_get_legacy_pwrlevels(dev); 517 else { 518 ret = dev_pm_opp_of_add_table(dev); 519 if (ret) 520 dev_err(dev, "Unable to set the OPP table\n"); 521 } 522 523 if (!ret) { 524 /* Find the fastest defined rate */ 525 opp = dev_pm_opp_find_freq_floor(dev, &freq); 526 if (!IS_ERR(opp)) { 527 gpu->fast_rate = freq; 528 dev_pm_opp_put(opp); 529 } 530 } 531 532 if (!gpu->fast_rate) { 533 dev_warn(dev, 534 "Could not find a clock rate. Using a reasonable default\n"); 535 /* Pick a suitably safe clock speed for any target */ 536 gpu->fast_rate = 200000000; 537 } 538 539 DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate); 540 541 return 0; 542 } 543 544 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, 545 struct adreno_gpu *adreno_gpu, 546 const struct adreno_gpu_funcs *funcs, int nr_rings) 547 { 548 struct adreno_platform_config *config = pdev->dev.platform_data; 549 struct msm_gpu_config adreno_gpu_config = { 0 }; 550 struct msm_gpu *gpu = &adreno_gpu->base; 551 552 adreno_gpu->funcs = funcs; 553 adreno_gpu->info = adreno_info(config->rev); 554 adreno_gpu->gmem = adreno_gpu->info->gmem; 555 adreno_gpu->revn = adreno_gpu->info->revn; 556 adreno_gpu->rev = config->rev; 557 558 adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; 559 adreno_gpu_config.irqname = "kgsl_3d0_irq"; 560 561 adreno_gpu_config.va_start = SZ_16M; 562 adreno_gpu_config.va_end = 0xffffffff; 563 564 adreno_gpu_config.nr_rings = nr_rings; 565 566 adreno_get_pwrlevels(&pdev->dev, gpu); 567 568 pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); 569 pm_runtime_use_autosuspend(&pdev->dev); 570 pm_runtime_enable(&pdev->dev); 571 572 return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, 573 adreno_gpu->info->name, &adreno_gpu_config); 574 } 575 576 void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) 577 { 578 unsigned int i; 579 580 for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) 581 release_firmware(adreno_gpu->fw[i]); 582 583 msm_gpu_cleanup(&adreno_gpu->base); 584 } 585