1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include <uapi/drm/habanalabs_accel.h> 9 #include "habanalabs.h" 10 11 #include <linux/mm.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 15 #define CB_VA_POOL_SIZE (4UL * SZ_1G) 16 17 static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) 18 { 19 struct hl_device *hdev = ctx->hdev; 20 struct asic_fixed_properties *prop = &hdev->asic_prop; 21 u32 page_size = prop->pmmu.page_size; 22 int rc; 23 24 if (!hdev->supports_cb_mapping) { 25 dev_err_ratelimited(hdev->dev, 26 "Mapping a CB to the device's MMU is not supported\n"); 27 return -EINVAL; 28 } 29 30 if (!hdev->mmu_enable) { 31 dev_err_ratelimited(hdev->dev, 32 "Cannot map CB because MMU is disabled\n"); 33 return -EINVAL; 34 } 35 36 if (cb->is_mmu_mapped) 37 return 0; 38 39 cb->roundup_size = roundup(cb->size, page_size); 40 41 cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size); 42 if (!cb->virtual_addr) { 43 dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n"); 44 return -ENOMEM; 45 } 46 47 mutex_lock(&hdev->mmu_lock); 48 49 rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); 50 if (rc) { 51 dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); 52 goto err_va_pool_free; 53 } 54 55 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); 56 if (rc) 57 goto err_mmu_unmap; 58 59 mutex_unlock(&hdev->mmu_lock); 60 61 cb->is_mmu_mapped = true; 62 63 return 0; 64 65 err_mmu_unmap: 66 hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); 67 err_va_pool_free: 68 mutex_unlock(&hdev->mmu_lock); 69 gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 70 71 return rc; 72 } 73 74 static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) 75 { 76 struct hl_device *hdev = ctx->hdev; 77 78 mutex_lock(&hdev->mmu_lock); 79 hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); 80 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 81 mutex_unlock(&hdev->mmu_lock); 82 83 gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 84 } 85 86 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) 87 { 88 if (cb->is_internal) 89 gen_pool_free(hdev->internal_cb_pool, 90 (uintptr_t)cb->kernel_address, cb->size); 91 else 92 hl_asic_dma_free_coherent(hdev, cb->size, cb->kernel_address, cb->bus_address); 93 94 kfree(cb); 95 } 96 97 static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) 98 { 99 if (cb->is_pool) { 100 atomic_set(&cb->is_handle_destroyed, 0); 101 spin_lock(&hdev->cb_pool_lock); 102 list_add(&cb->pool_list, &hdev->cb_pool); 103 spin_unlock(&hdev->cb_pool_lock); 104 } else { 105 cb_fini(hdev, cb); 106 } 107 } 108 109 static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, 110 int ctx_id, bool internal_cb) 111 { 112 struct hl_cb *cb = NULL; 113 u32 cb_offset; 114 void *p; 115 116 /* 117 * We use of GFP_ATOMIC here because this function can be called from 118 * the latency-sensitive code path for command submission. Due to H/W 119 * limitations in some of the ASICs, the kernel must copy the user CB 120 * that is designated for an external queue and actually enqueue 121 * the kernel's copy. Hence, we must never sleep in this code section 122 * and must use GFP_ATOMIC for all memory allocations. 123 */ 124 if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) 125 cb = kzalloc(sizeof(*cb), GFP_ATOMIC); 126 127 if (!cb) 128 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 129 130 if (!cb) 131 return NULL; 132 133 if (internal_cb) { 134 p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); 135 if (!p) { 136 kfree(cb); 137 return NULL; 138 } 139 140 cb_offset = p - hdev->internal_cb_pool_virt_addr; 141 cb->is_internal = true; 142 cb->bus_address = hdev->internal_cb_va_base + cb_offset; 143 } else if (ctx_id == HL_KERNEL_ASID_ID) { 144 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); 145 if (!p) 146 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_KERNEL); 147 } else { 148 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, 149 GFP_USER | __GFP_ZERO); 150 } 151 152 if (!p) { 153 dev_err(hdev->dev, 154 "failed to allocate %d of dma memory for CB\n", 155 cb_size); 156 kfree(cb); 157 return NULL; 158 } 159 160 cb->kernel_address = p; 161 cb->size = cb_size; 162 163 return cb; 164 } 165 166 struct hl_cb_mmap_mem_alloc_args { 167 struct hl_device *hdev; 168 struct hl_ctx *ctx; 169 u32 cb_size; 170 bool internal_cb; 171 bool map_cb; 172 }; 173 174 static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf) 175 { 176 struct hl_cb *cb = buf->private; 177 178 hl_debugfs_remove_cb(cb); 179 180 if (cb->is_mmu_mapped) 181 cb_unmap_mem(cb->ctx, cb); 182 183 hl_ctx_put(cb->ctx); 184 185 cb_do_release(cb->hdev, cb); 186 } 187 188 static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) 189 { 190 struct hl_cb_mmap_mem_alloc_args *cb_args = args; 191 struct hl_cb *cb; 192 int rc, ctx_id = cb_args->ctx->asid; 193 bool alloc_new_cb = true; 194 195 if (!cb_args->internal_cb) { 196 /* Minimum allocation must be PAGE SIZE */ 197 if (cb_args->cb_size < PAGE_SIZE) 198 cb_args->cb_size = PAGE_SIZE; 199 200 if (ctx_id == HL_KERNEL_ASID_ID && 201 cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) { 202 203 spin_lock(&cb_args->hdev->cb_pool_lock); 204 if (!list_empty(&cb_args->hdev->cb_pool)) { 205 cb = list_first_entry(&cb_args->hdev->cb_pool, 206 typeof(*cb), pool_list); 207 list_del(&cb->pool_list); 208 spin_unlock(&cb_args->hdev->cb_pool_lock); 209 alloc_new_cb = false; 210 } else { 211 spin_unlock(&cb_args->hdev->cb_pool_lock); 212 dev_dbg(cb_args->hdev->dev, "CB pool is empty\n"); 213 } 214 } 215 } 216 217 if (alloc_new_cb) { 218 cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb); 219 if (!cb) 220 return -ENOMEM; 221 } 222 223 cb->hdev = cb_args->hdev; 224 cb->ctx = cb_args->ctx; 225 cb->buf = buf; 226 cb->buf->mappable_size = cb->size; 227 cb->buf->private = cb; 228 229 hl_ctx_get(cb->ctx); 230 231 if (cb_args->map_cb) { 232 if (ctx_id == HL_KERNEL_ASID_ID) { 233 dev_err(cb_args->hdev->dev, 234 "CB mapping is not supported for kernel context\n"); 235 rc = -EINVAL; 236 goto release_cb; 237 } 238 239 rc = cb_map_mem(cb_args->ctx, cb); 240 if (rc) 241 goto release_cb; 242 } 243 244 hl_debugfs_add_cb(cb); 245 246 return 0; 247 248 release_cb: 249 hl_ctx_put(cb->ctx); 250 cb_do_release(cb_args->hdev, cb); 251 252 return rc; 253 } 254 255 static int hl_cb_mmap(struct hl_mmap_mem_buf *buf, 256 struct vm_area_struct *vma, void *args) 257 { 258 struct hl_cb *cb = buf->private; 259 260 return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address, 261 cb->bus_address, cb->size); 262 } 263 264 static struct hl_mmap_mem_buf_behavior cb_behavior = { 265 .topic = "CB", 266 .mem_id = HL_MMAP_TYPE_CB, 267 .alloc = hl_cb_mmap_mem_alloc, 268 .release = hl_cb_mmap_mem_release, 269 .mmap = hl_cb_mmap, 270 }; 271 272 int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, 273 struct hl_ctx *ctx, u32 cb_size, bool internal_cb, 274 bool map_cb, u64 *handle) 275 { 276 struct hl_cb_mmap_mem_alloc_args args = { 277 .hdev = hdev, 278 .ctx = ctx, 279 .cb_size = cb_size, 280 .internal_cb = internal_cb, 281 .map_cb = map_cb, 282 }; 283 struct hl_mmap_mem_buf *buf; 284 int ctx_id = ctx->asid; 285 286 if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { 287 dev_warn_ratelimited(hdev->dev, 288 "Device is disabled or in reset. Can't create new CBs\n"); 289 return -EBUSY; 290 } 291 292 if (cb_size > SZ_2M) { 293 dev_err(hdev->dev, "CB size %d must be less than %d\n", 294 cb_size, SZ_2M); 295 return -EINVAL; 296 } 297 298 buf = hl_mmap_mem_buf_alloc( 299 mmg, &cb_behavior, 300 ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args); 301 if (!buf) 302 return -ENOMEM; 303 304 *handle = buf->handle; 305 306 return 0; 307 } 308 309 int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) 310 { 311 struct hl_cb *cb; 312 int rc; 313 314 cb = hl_cb_get(mmg, cb_handle); 315 if (!cb) { 316 dev_dbg(mmg->dev, "CB destroy failed, no CB was found for handle %#llx\n", 317 cb_handle); 318 return -EINVAL; 319 } 320 321 /* Make sure that CB handle isn't destroyed more than once */ 322 rc = atomic_cmpxchg(&cb->is_handle_destroyed, 0, 1); 323 hl_cb_put(cb); 324 if (rc) { 325 dev_dbg(mmg->dev, "CB destroy failed, handle %#llx was already destroyed\n", 326 cb_handle); 327 return -EINVAL; 328 } 329 330 rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle); 331 if (rc < 0) 332 return rc; /* Invalid handle */ 333 334 if (rc == 0) 335 dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle); 336 337 return 0; 338 } 339 340 static int hl_cb_info(struct hl_mem_mgr *mmg, 341 u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) 342 { 343 struct hl_cb *cb; 344 int rc = 0; 345 346 cb = hl_cb_get(mmg, handle); 347 if (!cb) { 348 dev_err(mmg->dev, 349 "CB info failed, no match to handle 0x%llx\n", handle); 350 return -EINVAL; 351 } 352 353 if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { 354 if (cb->is_mmu_mapped) { 355 *device_va = cb->virtual_addr; 356 } else { 357 dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); 358 rc = -EINVAL; 359 goto out; 360 } 361 } else { 362 *usage_cnt = atomic_read(&cb->cs_cnt); 363 } 364 365 out: 366 hl_cb_put(cb); 367 return rc; 368 } 369 370 int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) 371 { 372 union hl_cb_args *args = data; 373 struct hl_device *hdev = hpriv->hdev; 374 u64 handle = 0, device_va = 0; 375 enum hl_device_status status; 376 u32 usage_cnt = 0; 377 int rc; 378 379 if (!hl_device_operational(hdev, &status)) { 380 dev_dbg_ratelimited(hdev->dev, 381 "Device is %s. Can't execute CB IOCTL\n", 382 hdev->status[status]); 383 return -EBUSY; 384 } 385 386 switch (args->in.op) { 387 case HL_CB_OP_CREATE: 388 if (args->in.cb_size > HL_MAX_CB_SIZE) { 389 dev_err(hdev->dev, 390 "User requested CB size %d must be less than %d\n", 391 args->in.cb_size, HL_MAX_CB_SIZE); 392 rc = -EINVAL; 393 } else { 394 rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx, 395 args->in.cb_size, false, 396 !!(args->in.flags & HL_CB_FLAGS_MAP), 397 &handle); 398 } 399 400 memset(args, 0, sizeof(*args)); 401 args->out.cb_handle = handle; 402 break; 403 404 case HL_CB_OP_DESTROY: 405 rc = hl_cb_destroy(&hpriv->mem_mgr, 406 args->in.cb_handle); 407 break; 408 409 case HL_CB_OP_INFO: 410 rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle, 411 args->in.flags, 412 &usage_cnt, 413 &device_va); 414 if (rc) 415 break; 416 417 memset(&args->out, 0, sizeof(args->out)); 418 419 if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA) 420 args->out.device_va = device_va; 421 else 422 args->out.usage_cnt = usage_cnt; 423 break; 424 425 default: 426 rc = -EINVAL; 427 break; 428 } 429 430 return rc; 431 } 432 433 struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle) 434 { 435 struct hl_mmap_mem_buf *buf; 436 437 buf = hl_mmap_mem_buf_get(mmg, handle); 438 if (!buf) 439 return NULL; 440 return buf->private; 441 442 } 443 444 void hl_cb_put(struct hl_cb *cb) 445 { 446 hl_mmap_mem_buf_put(cb->buf); 447 } 448 449 struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, 450 bool internal_cb) 451 { 452 u64 cb_handle; 453 struct hl_cb *cb; 454 int rc; 455 456 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size, 457 internal_cb, false, &cb_handle); 458 if (rc) { 459 dev_err(hdev->dev, 460 "Failed to allocate CB for the kernel driver %d\n", rc); 461 return NULL; 462 } 463 464 cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle); 465 /* hl_cb_get should never fail here */ 466 if (!cb) { 467 dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n", 468 (u32) cb_handle); 469 goto destroy_cb; 470 } 471 472 return cb; 473 474 destroy_cb: 475 hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle); 476 477 return NULL; 478 } 479 480 int hl_cb_pool_init(struct hl_device *hdev) 481 { 482 struct hl_cb *cb; 483 int i; 484 485 INIT_LIST_HEAD(&hdev->cb_pool); 486 spin_lock_init(&hdev->cb_pool_lock); 487 488 for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { 489 cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, 490 HL_KERNEL_ASID_ID, false); 491 if (cb) { 492 cb->is_pool = true; 493 list_add(&cb->pool_list, &hdev->cb_pool); 494 } else { 495 hl_cb_pool_fini(hdev); 496 return -ENOMEM; 497 } 498 } 499 500 return 0; 501 } 502 503 int hl_cb_pool_fini(struct hl_device *hdev) 504 { 505 struct hl_cb *cb, *tmp; 506 507 list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { 508 list_del(&cb->pool_list); 509 cb_fini(hdev, cb); 510 } 511 512 return 0; 513 } 514 515 int hl_cb_va_pool_init(struct hl_ctx *ctx) 516 { 517 struct hl_device *hdev = ctx->hdev; 518 struct asic_fixed_properties *prop = &hdev->asic_prop; 519 int rc; 520 521 if (!hdev->supports_cb_mapping) 522 return 0; 523 524 ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); 525 if (!ctx->cb_va_pool) { 526 dev_err(hdev->dev, 527 "Failed to create VA gen pool for CB mapping\n"); 528 return -ENOMEM; 529 } 530 531 ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 532 CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 533 if (!ctx->cb_va_pool_base) { 534 rc = -ENOMEM; 535 goto err_pool_destroy; 536 } 537 rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1); 538 if (rc) { 539 dev_err(hdev->dev, 540 "Failed to add memory to VA gen pool for CB mapping\n"); 541 goto err_unreserve_va_block; 542 } 543 544 return 0; 545 546 err_unreserve_va_block: 547 hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 548 err_pool_destroy: 549 gen_pool_destroy(ctx->cb_va_pool); 550 551 return rc; 552 } 553 554 void hl_cb_va_pool_fini(struct hl_ctx *ctx) 555 { 556 struct hl_device *hdev = ctx->hdev; 557 558 if (!hdev->supports_cb_mapping) 559 return; 560 561 gen_pool_destroy(ctx->cb_va_pool); 562 hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 563 } 564