1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include <uapi/drm/habanalabs_accel.h> 9 #include "habanalabs.h" 10 11 #include <linux/mm.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 15 #define CB_VA_POOL_SIZE (4UL * SZ_1G) 16 17 static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) 18 { 19 struct hl_device *hdev = ctx->hdev; 20 struct asic_fixed_properties *prop = &hdev->asic_prop; 21 u32 page_size = prop->pmmu.page_size; 22 int rc; 23 24 if (!hdev->supports_cb_mapping) { 25 dev_err_ratelimited(hdev->dev, 26 "Mapping a CB to the device's MMU is not supported\n"); 27 return -EINVAL; 28 } 29 30 if (!hdev->mmu_enable) { 31 dev_err_ratelimited(hdev->dev, 32 "Cannot map CB because MMU is disabled\n"); 33 return -EINVAL; 34 } 35 36 if (cb->is_mmu_mapped) 37 return 0; 38 39 cb->roundup_size = roundup(cb->size, page_size); 40 41 cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size); 42 if (!cb->virtual_addr) { 43 dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n"); 44 return -ENOMEM; 45 } 46 47 mutex_lock(&hdev->mmu_lock); 48 rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); 49 if (rc) { 50 dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); 51 goto err_va_umap; 52 } 53 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); 54 mutex_unlock(&hdev->mmu_lock); 55 56 cb->is_mmu_mapped = true; 57 return rc; 58 59 err_va_umap: 60 mutex_unlock(&hdev->mmu_lock); 61 gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 62 return rc; 63 } 64 65 static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) 66 { 67 struct hl_device *hdev = ctx->hdev; 68 69 mutex_lock(&hdev->mmu_lock); 70 hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); 71 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 72 mutex_unlock(&hdev->mmu_lock); 73 74 gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 75 } 76 77 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) 78 { 79 if (cb->is_internal) 80 gen_pool_free(hdev->internal_cb_pool, 81 (uintptr_t)cb->kernel_address, cb->size); 82 else 83 hl_asic_dma_free_coherent(hdev, cb->size, cb->kernel_address, cb->bus_address); 84 85 kfree(cb); 86 } 87 88 static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) 89 { 90 if (cb->is_pool) { 91 spin_lock(&hdev->cb_pool_lock); 92 list_add(&cb->pool_list, &hdev->cb_pool); 93 spin_unlock(&hdev->cb_pool_lock); 94 } else { 95 cb_fini(hdev, cb); 96 } 97 } 98 99 static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, 100 int ctx_id, bool internal_cb) 101 { 102 struct hl_cb *cb = NULL; 103 u32 cb_offset; 104 void *p; 105 106 /* 107 * We use of GFP_ATOMIC here because this function can be called from 108 * the latency-sensitive code path for command submission. Due to H/W 109 * limitations in some of the ASICs, the kernel must copy the user CB 110 * that is designated for an external queue and actually enqueue 111 * the kernel's copy. Hence, we must never sleep in this code section 112 * and must use GFP_ATOMIC for all memory allocations. 113 */ 114 if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) 115 cb = kzalloc(sizeof(*cb), GFP_ATOMIC); 116 117 if (!cb) 118 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 119 120 if (!cb) 121 return NULL; 122 123 if (internal_cb) { 124 p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); 125 if (!p) { 126 kfree(cb); 127 return NULL; 128 } 129 130 cb_offset = p - hdev->internal_cb_pool_virt_addr; 131 cb->is_internal = true; 132 cb->bus_address = hdev->internal_cb_va_base + cb_offset; 133 } else if (ctx_id == HL_KERNEL_ASID_ID) { 134 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); 135 if (!p) 136 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_KERNEL); 137 } else { 138 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, 139 GFP_USER | __GFP_ZERO); 140 } 141 142 if (!p) { 143 dev_err(hdev->dev, 144 "failed to allocate %d of dma memory for CB\n", 145 cb_size); 146 kfree(cb); 147 return NULL; 148 } 149 150 cb->kernel_address = p; 151 cb->size = cb_size; 152 153 return cb; 154 } 155 156 struct hl_cb_mmap_mem_alloc_args { 157 struct hl_device *hdev; 158 struct hl_ctx *ctx; 159 u32 cb_size; 160 bool internal_cb; 161 bool map_cb; 162 }; 163 164 static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf) 165 { 166 struct hl_cb *cb = buf->private; 167 168 hl_debugfs_remove_cb(cb); 169 170 if (cb->is_mmu_mapped) 171 cb_unmap_mem(cb->ctx, cb); 172 173 hl_ctx_put(cb->ctx); 174 175 cb_do_release(cb->hdev, cb); 176 } 177 178 static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) 179 { 180 struct hl_cb_mmap_mem_alloc_args *cb_args = args; 181 struct hl_cb *cb; 182 int rc, ctx_id = cb_args->ctx->asid; 183 bool alloc_new_cb = true; 184 185 if (!cb_args->internal_cb) { 186 /* Minimum allocation must be PAGE SIZE */ 187 if (cb_args->cb_size < PAGE_SIZE) 188 cb_args->cb_size = PAGE_SIZE; 189 190 if (ctx_id == HL_KERNEL_ASID_ID && 191 cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) { 192 193 spin_lock(&cb_args->hdev->cb_pool_lock); 194 if (!list_empty(&cb_args->hdev->cb_pool)) { 195 cb = list_first_entry(&cb_args->hdev->cb_pool, 196 typeof(*cb), pool_list); 197 list_del(&cb->pool_list); 198 spin_unlock(&cb_args->hdev->cb_pool_lock); 199 alloc_new_cb = false; 200 } else { 201 spin_unlock(&cb_args->hdev->cb_pool_lock); 202 dev_dbg(cb_args->hdev->dev, "CB pool is empty\n"); 203 } 204 } 205 } 206 207 if (alloc_new_cb) { 208 cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb); 209 if (!cb) 210 return -ENOMEM; 211 } 212 213 cb->hdev = cb_args->hdev; 214 cb->ctx = cb_args->ctx; 215 cb->buf = buf; 216 cb->buf->mappable_size = cb->size; 217 cb->buf->private = cb; 218 219 hl_ctx_get(cb->ctx); 220 221 if (cb_args->map_cb) { 222 if (ctx_id == HL_KERNEL_ASID_ID) { 223 dev_err(cb_args->hdev->dev, 224 "CB mapping is not supported for kernel context\n"); 225 rc = -EINVAL; 226 goto release_cb; 227 } 228 229 rc = cb_map_mem(cb_args->ctx, cb); 230 if (rc) 231 goto release_cb; 232 } 233 234 hl_debugfs_add_cb(cb); 235 236 return 0; 237 238 release_cb: 239 hl_ctx_put(cb->ctx); 240 cb_do_release(cb_args->hdev, cb); 241 242 return rc; 243 } 244 245 static int hl_cb_mmap(struct hl_mmap_mem_buf *buf, 246 struct vm_area_struct *vma, void *args) 247 { 248 struct hl_cb *cb = buf->private; 249 250 return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address, 251 cb->bus_address, cb->size); 252 } 253 254 static struct hl_mmap_mem_buf_behavior cb_behavior = { 255 .topic = "CB", 256 .mem_id = HL_MMAP_TYPE_CB, 257 .alloc = hl_cb_mmap_mem_alloc, 258 .release = hl_cb_mmap_mem_release, 259 .mmap = hl_cb_mmap, 260 }; 261 262 int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, 263 struct hl_ctx *ctx, u32 cb_size, bool internal_cb, 264 bool map_cb, u64 *handle) 265 { 266 struct hl_cb_mmap_mem_alloc_args args = { 267 .hdev = hdev, 268 .ctx = ctx, 269 .cb_size = cb_size, 270 .internal_cb = internal_cb, 271 .map_cb = map_cb, 272 }; 273 struct hl_mmap_mem_buf *buf; 274 int ctx_id = ctx->asid; 275 276 if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { 277 dev_warn_ratelimited(hdev->dev, 278 "Device is disabled or in reset. Can't create new CBs\n"); 279 return -EBUSY; 280 } 281 282 if (cb_size > SZ_2M) { 283 dev_err(hdev->dev, "CB size %d must be less than %d\n", 284 cb_size, SZ_2M); 285 return -EINVAL; 286 } 287 288 buf = hl_mmap_mem_buf_alloc( 289 mmg, &cb_behavior, 290 ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args); 291 if (!buf) 292 return -ENOMEM; 293 294 *handle = buf->handle; 295 296 return 0; 297 } 298 299 int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) 300 { 301 struct hl_cb *cb; 302 int rc; 303 304 /* Make sure that a CB handle isn't destroyed by user more than once */ 305 if (!mmg->is_kernel_mem_mgr) { 306 cb = hl_cb_get(mmg, cb_handle); 307 if (!cb) { 308 dev_dbg(mmg->dev, "CB destroy failed, no CB was found for handle %#llx\n", 309 cb_handle); 310 rc = -EINVAL; 311 goto out; 312 } 313 314 rc = atomic_cmpxchg(&cb->is_handle_destroyed, 0, 1); 315 hl_cb_put(cb); 316 if (rc) { 317 dev_dbg(mmg->dev, "CB destroy failed, handle %#llx was already destroyed\n", 318 cb_handle); 319 rc = -EINVAL; 320 goto out; 321 } 322 } 323 324 rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle); 325 out: 326 if (rc < 0) 327 return rc; /* Invalid handle */ 328 329 if (rc == 0) 330 dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle); 331 332 return 0; 333 } 334 335 static int hl_cb_info(struct hl_mem_mgr *mmg, 336 u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) 337 { 338 struct hl_cb *cb; 339 int rc = 0; 340 341 cb = hl_cb_get(mmg, handle); 342 if (!cb) { 343 dev_err(mmg->dev, 344 "CB info failed, no match to handle 0x%llx\n", handle); 345 return -EINVAL; 346 } 347 348 if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { 349 if (cb->is_mmu_mapped) { 350 *device_va = cb->virtual_addr; 351 } else { 352 dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); 353 rc = -EINVAL; 354 goto out; 355 } 356 } else { 357 *usage_cnt = atomic_read(&cb->cs_cnt); 358 } 359 360 out: 361 hl_cb_put(cb); 362 return rc; 363 } 364 365 int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) 366 { 367 union hl_cb_args *args = data; 368 struct hl_device *hdev = hpriv->hdev; 369 u64 handle = 0, device_va = 0; 370 enum hl_device_status status; 371 u32 usage_cnt = 0; 372 int rc; 373 374 if (!hl_device_operational(hdev, &status)) { 375 dev_warn_ratelimited(hdev->dev, 376 "Device is %s. Can't execute CB IOCTL\n", 377 hdev->status[status]); 378 return -EBUSY; 379 } 380 381 switch (args->in.op) { 382 case HL_CB_OP_CREATE: 383 if (args->in.cb_size > HL_MAX_CB_SIZE) { 384 dev_err(hdev->dev, 385 "User requested CB size %d must be less than %d\n", 386 args->in.cb_size, HL_MAX_CB_SIZE); 387 rc = -EINVAL; 388 } else { 389 rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx, 390 args->in.cb_size, false, 391 !!(args->in.flags & HL_CB_FLAGS_MAP), 392 &handle); 393 } 394 395 memset(args, 0, sizeof(*args)); 396 args->out.cb_handle = handle; 397 break; 398 399 case HL_CB_OP_DESTROY: 400 rc = hl_cb_destroy(&hpriv->mem_mgr, 401 args->in.cb_handle); 402 break; 403 404 case HL_CB_OP_INFO: 405 rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle, 406 args->in.flags, 407 &usage_cnt, 408 &device_va); 409 if (rc) 410 break; 411 412 memset(&args->out, 0, sizeof(args->out)); 413 414 if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA) 415 args->out.device_va = device_va; 416 else 417 args->out.usage_cnt = usage_cnt; 418 break; 419 420 default: 421 rc = -EINVAL; 422 break; 423 } 424 425 return rc; 426 } 427 428 struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle) 429 { 430 struct hl_mmap_mem_buf *buf; 431 432 buf = hl_mmap_mem_buf_get(mmg, handle); 433 if (!buf) 434 return NULL; 435 return buf->private; 436 437 } 438 439 void hl_cb_put(struct hl_cb *cb) 440 { 441 hl_mmap_mem_buf_put(cb->buf); 442 } 443 444 struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, 445 bool internal_cb) 446 { 447 u64 cb_handle; 448 struct hl_cb *cb; 449 int rc; 450 451 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size, 452 internal_cb, false, &cb_handle); 453 if (rc) { 454 dev_err(hdev->dev, 455 "Failed to allocate CB for the kernel driver %d\n", rc); 456 return NULL; 457 } 458 459 cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle); 460 /* hl_cb_get should never fail here */ 461 if (!cb) { 462 dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n", 463 (u32) cb_handle); 464 goto destroy_cb; 465 } 466 467 return cb; 468 469 destroy_cb: 470 hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle); 471 472 return NULL; 473 } 474 475 int hl_cb_pool_init(struct hl_device *hdev) 476 { 477 struct hl_cb *cb; 478 int i; 479 480 INIT_LIST_HEAD(&hdev->cb_pool); 481 spin_lock_init(&hdev->cb_pool_lock); 482 483 for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { 484 cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, 485 HL_KERNEL_ASID_ID, false); 486 if (cb) { 487 cb->is_pool = true; 488 list_add(&cb->pool_list, &hdev->cb_pool); 489 } else { 490 hl_cb_pool_fini(hdev); 491 return -ENOMEM; 492 } 493 } 494 495 return 0; 496 } 497 498 int hl_cb_pool_fini(struct hl_device *hdev) 499 { 500 struct hl_cb *cb, *tmp; 501 502 list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { 503 list_del(&cb->pool_list); 504 cb_fini(hdev, cb); 505 } 506 507 return 0; 508 } 509 510 int hl_cb_va_pool_init(struct hl_ctx *ctx) 511 { 512 struct hl_device *hdev = ctx->hdev; 513 struct asic_fixed_properties *prop = &hdev->asic_prop; 514 int rc; 515 516 if (!hdev->supports_cb_mapping) 517 return 0; 518 519 ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); 520 if (!ctx->cb_va_pool) { 521 dev_err(hdev->dev, 522 "Failed to create VA gen pool for CB mapping\n"); 523 return -ENOMEM; 524 } 525 526 ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 527 CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 528 if (!ctx->cb_va_pool_base) { 529 rc = -ENOMEM; 530 goto err_pool_destroy; 531 } 532 rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1); 533 if (rc) { 534 dev_err(hdev->dev, 535 "Failed to add memory to VA gen pool for CB mapping\n"); 536 goto err_unreserve_va_block; 537 } 538 539 return 0; 540 541 err_unreserve_va_block: 542 hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 543 err_pool_destroy: 544 gen_pool_destroy(ctx->cb_va_pool); 545 546 return rc; 547 } 548 549 void hl_cb_va_pool_fini(struct hl_ctx *ctx) 550 { 551 struct hl_device *hdev = ctx->hdev; 552 553 if (!hdev->supports_cb_mapping) 554 return; 555 556 gen_pool_destroy(ctx->cb_va_pool); 557 hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 558 } 559