1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include <uapi/drm/habanalabs_accel.h> 9 #include "habanalabs.h" 10 11 #include <linux/mm.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 15 #define CB_VA_POOL_SIZE (4UL * SZ_1G) 16 17 static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) 18 { 19 struct hl_device *hdev = ctx->hdev; 20 struct asic_fixed_properties *prop = &hdev->asic_prop; 21 u32 page_size = prop->pmmu.page_size; 22 int rc; 23 24 if (!hdev->supports_cb_mapping) { 25 dev_err_ratelimited(hdev->dev, 26 "Mapping a CB to the device's MMU is not supported\n"); 27 return -EINVAL; 28 } 29 30 if (cb->is_mmu_mapped) 31 return 0; 32 33 cb->roundup_size = roundup(cb->size, page_size); 34 35 cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size); 36 if (!cb->virtual_addr) { 37 dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n"); 38 return -ENOMEM; 39 } 40 41 mutex_lock(&hdev->mmu_lock); 42 43 rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); 44 if (rc) { 45 dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); 46 goto err_va_pool_free; 47 } 48 49 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); 50 if (rc) 51 goto err_mmu_unmap; 52 53 mutex_unlock(&hdev->mmu_lock); 54 55 cb->is_mmu_mapped = true; 56 57 return 0; 58 59 err_mmu_unmap: 60 hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); 61 err_va_pool_free: 62 mutex_unlock(&hdev->mmu_lock); 63 gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 64 65 return rc; 66 } 67 68 static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) 69 { 70 struct hl_device *hdev = ctx->hdev; 71 72 mutex_lock(&hdev->mmu_lock); 73 hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); 74 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 75 mutex_unlock(&hdev->mmu_lock); 76 77 gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 78 } 79 80 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) 81 { 82 if (cb->is_internal) 83 gen_pool_free(hdev->internal_cb_pool, 84 (uintptr_t)cb->kernel_address, cb->size); 85 else 86 hl_asic_dma_free_coherent(hdev, cb->size, cb->kernel_address, cb->bus_address); 87 88 kfree(cb); 89 } 90 91 static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) 92 { 93 if (cb->is_pool) { 94 atomic_set(&cb->is_handle_destroyed, 0); 95 spin_lock(&hdev->cb_pool_lock); 96 list_add(&cb->pool_list, &hdev->cb_pool); 97 spin_unlock(&hdev->cb_pool_lock); 98 } else { 99 cb_fini(hdev, cb); 100 } 101 } 102 103 static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, 104 int ctx_id, bool internal_cb) 105 { 106 struct hl_cb *cb = NULL; 107 u32 cb_offset; 108 void *p; 109 110 /* 111 * We use of GFP_ATOMIC here because this function can be called from 112 * the latency-sensitive code path for command submission. Due to H/W 113 * limitations in some of the ASICs, the kernel must copy the user CB 114 * that is designated for an external queue and actually enqueue 115 * the kernel's copy. Hence, we must never sleep in this code section 116 * and must use GFP_ATOMIC for all memory allocations. 117 */ 118 if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) 119 cb = kzalloc(sizeof(*cb), GFP_ATOMIC); 120 121 if (!cb) 122 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 123 124 if (!cb) 125 return NULL; 126 127 if (internal_cb) { 128 p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); 129 if (!p) { 130 kfree(cb); 131 return NULL; 132 } 133 134 cb_offset = p - hdev->internal_cb_pool_virt_addr; 135 cb->is_internal = true; 136 cb->bus_address = hdev->internal_cb_va_base + cb_offset; 137 } else if (ctx_id == HL_KERNEL_ASID_ID) { 138 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); 139 if (!p) 140 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_KERNEL); 141 } else { 142 p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, 143 GFP_USER | __GFP_ZERO); 144 } 145 146 if (!p) { 147 dev_err(hdev->dev, 148 "failed to allocate %d of dma memory for CB\n", 149 cb_size); 150 kfree(cb); 151 return NULL; 152 } 153 154 cb->kernel_address = p; 155 cb->size = cb_size; 156 157 return cb; 158 } 159 160 struct hl_cb_mmap_mem_alloc_args { 161 struct hl_device *hdev; 162 struct hl_ctx *ctx; 163 u32 cb_size; 164 bool internal_cb; 165 bool map_cb; 166 }; 167 168 static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf) 169 { 170 struct hl_cb *cb = buf->private; 171 172 hl_debugfs_remove_cb(cb); 173 174 if (cb->is_mmu_mapped) 175 cb_unmap_mem(cb->ctx, cb); 176 177 hl_ctx_put(cb->ctx); 178 179 cb_do_release(cb->hdev, cb); 180 } 181 182 static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) 183 { 184 struct hl_cb_mmap_mem_alloc_args *cb_args = args; 185 struct hl_cb *cb; 186 int rc, ctx_id = cb_args->ctx->asid; 187 bool alloc_new_cb = true; 188 189 if (!cb_args->internal_cb) { 190 /* Minimum allocation must be PAGE SIZE */ 191 if (cb_args->cb_size < PAGE_SIZE) 192 cb_args->cb_size = PAGE_SIZE; 193 194 if (ctx_id == HL_KERNEL_ASID_ID && 195 cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) { 196 197 spin_lock(&cb_args->hdev->cb_pool_lock); 198 if (!list_empty(&cb_args->hdev->cb_pool)) { 199 cb = list_first_entry(&cb_args->hdev->cb_pool, 200 typeof(*cb), pool_list); 201 list_del(&cb->pool_list); 202 spin_unlock(&cb_args->hdev->cb_pool_lock); 203 alloc_new_cb = false; 204 } else { 205 spin_unlock(&cb_args->hdev->cb_pool_lock); 206 dev_dbg(cb_args->hdev->dev, "CB pool is empty\n"); 207 } 208 } 209 } 210 211 if (alloc_new_cb) { 212 cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb); 213 if (!cb) 214 return -ENOMEM; 215 } 216 217 cb->hdev = cb_args->hdev; 218 cb->ctx = cb_args->ctx; 219 cb->buf = buf; 220 cb->buf->mappable_size = cb->size; 221 cb->buf->private = cb; 222 223 hl_ctx_get(cb->ctx); 224 225 if (cb_args->map_cb) { 226 if (ctx_id == HL_KERNEL_ASID_ID) { 227 dev_err(cb_args->hdev->dev, 228 "CB mapping is not supported for kernel context\n"); 229 rc = -EINVAL; 230 goto release_cb; 231 } 232 233 rc = cb_map_mem(cb_args->ctx, cb); 234 if (rc) 235 goto release_cb; 236 } 237 238 hl_debugfs_add_cb(cb); 239 240 return 0; 241 242 release_cb: 243 hl_ctx_put(cb->ctx); 244 cb_do_release(cb_args->hdev, cb); 245 246 return rc; 247 } 248 249 static int hl_cb_mmap(struct hl_mmap_mem_buf *buf, 250 struct vm_area_struct *vma, void *args) 251 { 252 struct hl_cb *cb = buf->private; 253 254 return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address, 255 cb->bus_address, cb->size); 256 } 257 258 static struct hl_mmap_mem_buf_behavior cb_behavior = { 259 .topic = "CB", 260 .mem_id = HL_MMAP_TYPE_CB, 261 .alloc = hl_cb_mmap_mem_alloc, 262 .release = hl_cb_mmap_mem_release, 263 .mmap = hl_cb_mmap, 264 }; 265 266 int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, 267 struct hl_ctx *ctx, u32 cb_size, bool internal_cb, 268 bool map_cb, u64 *handle) 269 { 270 struct hl_cb_mmap_mem_alloc_args args = { 271 .hdev = hdev, 272 .ctx = ctx, 273 .cb_size = cb_size, 274 .internal_cb = internal_cb, 275 .map_cb = map_cb, 276 }; 277 struct hl_mmap_mem_buf *buf; 278 int ctx_id = ctx->asid; 279 280 if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { 281 dev_warn_ratelimited(hdev->dev, 282 "Device is disabled or in reset. Can't create new CBs\n"); 283 return -EBUSY; 284 } 285 286 if (cb_size > SZ_2M) { 287 dev_err(hdev->dev, "CB size %d must be less than %d\n", 288 cb_size, SZ_2M); 289 return -EINVAL; 290 } 291 292 buf = hl_mmap_mem_buf_alloc( 293 mmg, &cb_behavior, 294 ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args); 295 if (!buf) 296 return -ENOMEM; 297 298 *handle = buf->handle; 299 300 return 0; 301 } 302 303 int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) 304 { 305 struct hl_cb *cb; 306 int rc; 307 308 cb = hl_cb_get(mmg, cb_handle); 309 if (!cb) { 310 dev_dbg(mmg->dev, "CB destroy failed, no CB was found for handle %#llx\n", 311 cb_handle); 312 return -EINVAL; 313 } 314 315 /* Make sure that CB handle isn't destroyed more than once */ 316 rc = atomic_cmpxchg(&cb->is_handle_destroyed, 0, 1); 317 hl_cb_put(cb); 318 if (rc) { 319 dev_dbg(mmg->dev, "CB destroy failed, handle %#llx was already destroyed\n", 320 cb_handle); 321 return -EINVAL; 322 } 323 324 rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle); 325 if (rc < 0) 326 return rc; /* Invalid handle */ 327 328 if (rc == 0) 329 dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle); 330 331 return 0; 332 } 333 334 static int hl_cb_info(struct hl_mem_mgr *mmg, 335 u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) 336 { 337 struct hl_cb *cb; 338 int rc = 0; 339 340 cb = hl_cb_get(mmg, handle); 341 if (!cb) { 342 dev_err(mmg->dev, 343 "CB info failed, no match to handle 0x%llx\n", handle); 344 return -EINVAL; 345 } 346 347 if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { 348 if (cb->is_mmu_mapped) { 349 *device_va = cb->virtual_addr; 350 } else { 351 dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); 352 rc = -EINVAL; 353 goto out; 354 } 355 } else { 356 *usage_cnt = atomic_read(&cb->cs_cnt); 357 } 358 359 out: 360 hl_cb_put(cb); 361 return rc; 362 } 363 364 int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) 365 { 366 union hl_cb_args *args = data; 367 struct hl_device *hdev = hpriv->hdev; 368 u64 handle = 0, device_va = 0; 369 enum hl_device_status status; 370 u32 usage_cnt = 0; 371 int rc; 372 373 if (!hl_device_operational(hdev, &status)) { 374 dev_dbg_ratelimited(hdev->dev, 375 "Device is %s. Can't execute CB IOCTL\n", 376 hdev->status[status]); 377 return -EBUSY; 378 } 379 380 switch (args->in.op) { 381 case HL_CB_OP_CREATE: 382 if (args->in.cb_size > HL_MAX_CB_SIZE) { 383 dev_err(hdev->dev, 384 "User requested CB size %d must be less than %d\n", 385 args->in.cb_size, HL_MAX_CB_SIZE); 386 rc = -EINVAL; 387 } else { 388 rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx, 389 args->in.cb_size, false, 390 !!(args->in.flags & HL_CB_FLAGS_MAP), 391 &handle); 392 } 393 394 memset(args, 0, sizeof(*args)); 395 args->out.cb_handle = handle; 396 break; 397 398 case HL_CB_OP_DESTROY: 399 rc = hl_cb_destroy(&hpriv->mem_mgr, 400 args->in.cb_handle); 401 break; 402 403 case HL_CB_OP_INFO: 404 rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle, 405 args->in.flags, 406 &usage_cnt, 407 &device_va); 408 if (rc) 409 break; 410 411 memset(&args->out, 0, sizeof(args->out)); 412 413 if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA) 414 args->out.device_va = device_va; 415 else 416 args->out.usage_cnt = usage_cnt; 417 break; 418 419 default: 420 rc = -EINVAL; 421 break; 422 } 423 424 return rc; 425 } 426 427 struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle) 428 { 429 struct hl_mmap_mem_buf *buf; 430 431 buf = hl_mmap_mem_buf_get(mmg, handle); 432 if (!buf) 433 return NULL; 434 return buf->private; 435 436 } 437 438 void hl_cb_put(struct hl_cb *cb) 439 { 440 hl_mmap_mem_buf_put(cb->buf); 441 } 442 443 struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, 444 bool internal_cb) 445 { 446 u64 cb_handle; 447 struct hl_cb *cb; 448 int rc; 449 450 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size, 451 internal_cb, false, &cb_handle); 452 if (rc) { 453 dev_err(hdev->dev, 454 "Failed to allocate CB for the kernel driver %d\n", rc); 455 return NULL; 456 } 457 458 cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle); 459 /* hl_cb_get should never fail here */ 460 if (!cb) { 461 dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n", 462 (u32) cb_handle); 463 goto destroy_cb; 464 } 465 466 return cb; 467 468 destroy_cb: 469 hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle); 470 471 return NULL; 472 } 473 474 int hl_cb_pool_init(struct hl_device *hdev) 475 { 476 struct hl_cb *cb; 477 int i; 478 479 INIT_LIST_HEAD(&hdev->cb_pool); 480 spin_lock_init(&hdev->cb_pool_lock); 481 482 for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { 483 cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, 484 HL_KERNEL_ASID_ID, false); 485 if (cb) { 486 cb->is_pool = true; 487 list_add(&cb->pool_list, &hdev->cb_pool); 488 } else { 489 hl_cb_pool_fini(hdev); 490 return -ENOMEM; 491 } 492 } 493 494 return 0; 495 } 496 497 int hl_cb_pool_fini(struct hl_device *hdev) 498 { 499 struct hl_cb *cb, *tmp; 500 501 list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { 502 list_del(&cb->pool_list); 503 cb_fini(hdev, cb); 504 } 505 506 return 0; 507 } 508 509 int hl_cb_va_pool_init(struct hl_ctx *ctx) 510 { 511 struct hl_device *hdev = ctx->hdev; 512 struct asic_fixed_properties *prop = &hdev->asic_prop; 513 int rc; 514 515 if (!hdev->supports_cb_mapping) 516 return 0; 517 518 ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); 519 if (!ctx->cb_va_pool) { 520 dev_err(hdev->dev, 521 "Failed to create VA gen pool for CB mapping\n"); 522 return -ENOMEM; 523 } 524 525 ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 526 CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 527 if (!ctx->cb_va_pool_base) { 528 rc = -ENOMEM; 529 goto err_pool_destroy; 530 } 531 rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1); 532 if (rc) { 533 dev_err(hdev->dev, 534 "Failed to add memory to VA gen pool for CB mapping\n"); 535 goto err_unreserve_va_block; 536 } 537 538 return 0; 539 540 err_unreserve_va_block: 541 hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 542 err_pool_destroy: 543 gen_pool_destroy(ctx->cb_va_pool); 544 545 return rc; 546 } 547 548 void hl_cb_va_pool_fini(struct hl_ctx *ctx) 549 { 550 struct hl_device *hdev = ctx->hdev; 551 552 if (!hdev->supports_cb_mapping) 553 return; 554 555 gen_pool_destroy(ctx->cb_va_pool); 556 hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 557 } 558