1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 #include "amdgpu_amdkfd.h" 37 38 /* Size of the per-pipe EOP queue */ 39 #define CIK_HPD_EOP_BYTES_LOG2 11 40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 41 42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 43 unsigned int pasid, unsigned int vmid); 44 45 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 46 struct queue *q, 47 struct qcm_process_device *qpd); 48 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param); 52 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param); 55 56 static int map_queues_cpsch(struct device_queue_manager *dqm); 57 58 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 59 struct queue *q, 60 struct qcm_process_device *qpd); 61 62 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 63 unsigned int sdma_queue_id); 64 65 static void kfd_process_hw_exception(struct work_struct *work); 66 67 static inline 68 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 69 { 70 if (type == KFD_QUEUE_TYPE_SDMA) 71 return KFD_MQD_TYPE_SDMA; 72 return KFD_MQD_TYPE_CP; 73 } 74 75 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 76 { 77 int i; 78 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 79 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 80 81 /* queue is available for KFD usage if bit is 1 */ 82 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 83 if (test_bit(pipe_offset + i, 84 dqm->dev->shared_resources.queue_bitmap)) 85 return true; 86 return false; 87 } 88 89 unsigned int get_queues_num(struct device_queue_manager *dqm) 90 { 91 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 92 KGD_MAX_QUEUES); 93 } 94 95 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 96 { 97 return dqm->dev->shared_resources.num_queue_per_pipe; 98 } 99 100 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 101 { 102 return dqm->dev->shared_resources.num_pipe_per_mec; 103 } 104 105 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) 106 { 107 return dqm->dev->device_info->num_sdma_engines; 108 } 109 110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 111 { 112 return dqm->dev->device_info->num_sdma_engines 113 * dqm->dev->device_info->num_sdma_queues_per_engine; 114 } 115 116 void program_sh_mem_settings(struct device_queue_manager *dqm, 117 struct qcm_process_device *qpd) 118 { 119 return dqm->dev->kfd2kgd->program_sh_mem_settings( 120 dqm->dev->kgd, qpd->vmid, 121 qpd->sh_mem_config, 122 qpd->sh_mem_ape1_base, 123 qpd->sh_mem_ape1_limit, 124 qpd->sh_mem_bases); 125 } 126 127 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) 128 { 129 struct kfd_dev *dev = qpd->dqm->dev; 130 131 if (!KFD_IS_SOC15(dev->device_info->asic_family)) { 132 /* On pre-SOC15 chips we need to use the queue ID to 133 * preserve the user mode ABI. 134 */ 135 q->doorbell_id = q->properties.queue_id; 136 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 137 /* For SDMA queues on SOC15, use static doorbell 138 * assignments based on the engine and queue. 139 */ 140 q->doorbell_id = dev->shared_resources.sdma_doorbell 141 [q->properties.sdma_engine_id] 142 [q->properties.sdma_queue_id]; 143 } else { 144 /* For CP queues on SOC15 reserve a free doorbell ID */ 145 unsigned int found; 146 147 found = find_first_zero_bit(qpd->doorbell_bitmap, 148 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 149 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 150 pr_debug("No doorbells available"); 151 return -EBUSY; 152 } 153 set_bit(found, qpd->doorbell_bitmap); 154 q->doorbell_id = found; 155 } 156 157 q->properties.doorbell_off = 158 kfd_doorbell_id_to_offset(dev, q->process, 159 q->doorbell_id); 160 161 return 0; 162 } 163 164 static void deallocate_doorbell(struct qcm_process_device *qpd, 165 struct queue *q) 166 { 167 unsigned int old; 168 struct kfd_dev *dev = qpd->dqm->dev; 169 170 if (!KFD_IS_SOC15(dev->device_info->asic_family) || 171 q->properties.type == KFD_QUEUE_TYPE_SDMA) 172 return; 173 174 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 175 WARN_ON(!old); 176 } 177 178 static int allocate_vmid(struct device_queue_manager *dqm, 179 struct qcm_process_device *qpd, 180 struct queue *q) 181 { 182 int bit, allocated_vmid; 183 184 if (dqm->vmid_bitmap == 0) 185 return -ENOMEM; 186 187 bit = ffs(dqm->vmid_bitmap) - 1; 188 dqm->vmid_bitmap &= ~(1 << bit); 189 190 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 191 pr_debug("vmid allocation %d\n", allocated_vmid); 192 qpd->vmid = allocated_vmid; 193 q->properties.vmid = allocated_vmid; 194 195 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 196 program_sh_mem_settings(dqm, qpd); 197 198 /* qpd->page_table_base is set earlier when register_process() 199 * is called, i.e. when the first queue is created. 200 */ 201 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, 202 qpd->vmid, 203 qpd->page_table_base); 204 /* invalidate the VM context after pasid and vmid mapping is set up */ 205 kfd_flush_tlb(qpd_to_pdd(qpd)); 206 207 return 0; 208 } 209 210 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 211 struct qcm_process_device *qpd) 212 { 213 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; 214 int ret; 215 216 if (!qpd->ib_kaddr) 217 return -ENOMEM; 218 219 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 220 if (ret) 221 return ret; 222 223 return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, 224 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 225 pmf->release_mem_size / sizeof(uint32_t)); 226 } 227 228 static void deallocate_vmid(struct device_queue_manager *dqm, 229 struct qcm_process_device *qpd, 230 struct queue *q) 231 { 232 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 233 234 /* On GFX v7, CP doesn't flush TC at dequeue */ 235 if (q->device->device_info->asic_family == CHIP_HAWAII) 236 if (flush_texture_cache_nocpsch(q->device, qpd)) 237 pr_err("Failed to flush TC\n"); 238 239 kfd_flush_tlb(qpd_to_pdd(qpd)); 240 241 /* Release the vmid mapping */ 242 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 243 244 dqm->vmid_bitmap |= (1 << bit); 245 qpd->vmid = 0; 246 q->properties.vmid = 0; 247 } 248 249 static int create_queue_nocpsch(struct device_queue_manager *dqm, 250 struct queue *q, 251 struct qcm_process_device *qpd) 252 { 253 int retval; 254 255 print_queue(q); 256 257 dqm_lock(dqm); 258 259 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 260 pr_warn("Can't create new usermode queue because %d queues were already created\n", 261 dqm->total_queue_count); 262 retval = -EPERM; 263 goto out_unlock; 264 } 265 266 if (list_empty(&qpd->queues_list)) { 267 retval = allocate_vmid(dqm, qpd, q); 268 if (retval) 269 goto out_unlock; 270 } 271 q->properties.vmid = qpd->vmid; 272 /* 273 * Eviction state logic: we only mark active queues as evicted 274 * to avoid the overhead of restoring inactive queues later 275 */ 276 if (qpd->evicted) 277 q->properties.is_evicted = (q->properties.queue_size > 0 && 278 q->properties.queue_percent > 0 && 279 q->properties.queue_address != 0); 280 281 q->properties.tba_addr = qpd->tba_addr; 282 q->properties.tma_addr = qpd->tma_addr; 283 284 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 285 retval = create_compute_queue_nocpsch(dqm, q, qpd); 286 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 287 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 288 else 289 retval = -EINVAL; 290 291 if (retval) { 292 if (list_empty(&qpd->queues_list)) 293 deallocate_vmid(dqm, qpd, q); 294 goto out_unlock; 295 } 296 297 list_add(&q->list, &qpd->queues_list); 298 qpd->queue_count++; 299 if (q->properties.is_active) 300 dqm->queue_count++; 301 302 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 303 dqm->sdma_queue_count++; 304 305 /* 306 * Unconditionally increment this counter, regardless of the queue's 307 * type or whether the queue is active. 308 */ 309 dqm->total_queue_count++; 310 pr_debug("Total of %d queues are accountable so far\n", 311 dqm->total_queue_count); 312 313 out_unlock: 314 dqm_unlock(dqm); 315 return retval; 316 } 317 318 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 319 { 320 bool set; 321 int pipe, bit, i; 322 323 set = false; 324 325 for (pipe = dqm->next_pipe_to_allocate, i = 0; 326 i < get_pipes_per_mec(dqm); 327 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 328 329 if (!is_pipe_enabled(dqm, 0, pipe)) 330 continue; 331 332 if (dqm->allocated_queues[pipe] != 0) { 333 bit = ffs(dqm->allocated_queues[pipe]) - 1; 334 dqm->allocated_queues[pipe] &= ~(1 << bit); 335 q->pipe = pipe; 336 q->queue = bit; 337 set = true; 338 break; 339 } 340 } 341 342 if (!set) 343 return -EBUSY; 344 345 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 346 /* horizontal hqd allocation */ 347 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 348 349 return 0; 350 } 351 352 static inline void deallocate_hqd(struct device_queue_manager *dqm, 353 struct queue *q) 354 { 355 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 356 } 357 358 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 359 struct queue *q, 360 struct qcm_process_device *qpd) 361 { 362 struct mqd_manager *mqd_mgr; 363 int retval; 364 365 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 366 if (!mqd_mgr) 367 return -ENOMEM; 368 369 retval = allocate_hqd(dqm, q); 370 if (retval) 371 return retval; 372 373 retval = allocate_doorbell(qpd, q); 374 if (retval) 375 goto out_deallocate_hqd; 376 377 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 378 &q->gart_mqd_addr, &q->properties); 379 if (retval) 380 goto out_deallocate_doorbell; 381 382 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 383 q->pipe, q->queue); 384 385 dqm->dev->kfd2kgd->set_scratch_backing_va( 386 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 387 388 if (!q->properties.is_active) 389 return 0; 390 391 if (WARN(q->process->mm != current->mm, 392 "should only run in user thread")) 393 retval = -EFAULT; 394 else 395 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 396 &q->properties, current->mm); 397 if (retval) 398 goto out_uninit_mqd; 399 400 return 0; 401 402 out_uninit_mqd: 403 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 404 out_deallocate_doorbell: 405 deallocate_doorbell(qpd, q); 406 out_deallocate_hqd: 407 deallocate_hqd(dqm, q); 408 409 return retval; 410 } 411 412 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 413 * to avoid asynchronized access 414 */ 415 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 416 struct qcm_process_device *qpd, 417 struct queue *q) 418 { 419 int retval; 420 struct mqd_manager *mqd_mgr; 421 422 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 423 get_mqd_type_from_queue_type(q->properties.type)); 424 if (!mqd_mgr) 425 return -ENOMEM; 426 427 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 428 deallocate_hqd(dqm, q); 429 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 430 dqm->sdma_queue_count--; 431 deallocate_sdma_queue(dqm, q->sdma_id); 432 } else { 433 pr_debug("q->properties.type %d is invalid\n", 434 q->properties.type); 435 return -EINVAL; 436 } 437 dqm->total_queue_count--; 438 439 deallocate_doorbell(qpd, q); 440 441 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 442 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 443 KFD_UNMAP_LATENCY_MS, 444 q->pipe, q->queue); 445 if (retval == -ETIME) 446 qpd->reset_wavefronts = true; 447 448 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 449 450 list_del(&q->list); 451 if (list_empty(&qpd->queues_list)) { 452 if (qpd->reset_wavefronts) { 453 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 454 dqm->dev); 455 /* dbgdev_wave_reset_wavefronts has to be called before 456 * deallocate_vmid(), i.e. when vmid is still in use. 457 */ 458 dbgdev_wave_reset_wavefronts(dqm->dev, 459 qpd->pqm->process); 460 qpd->reset_wavefronts = false; 461 } 462 463 deallocate_vmid(dqm, qpd, q); 464 } 465 qpd->queue_count--; 466 if (q->properties.is_active) 467 dqm->queue_count--; 468 469 return retval; 470 } 471 472 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 473 struct qcm_process_device *qpd, 474 struct queue *q) 475 { 476 int retval; 477 478 dqm_lock(dqm); 479 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 480 dqm_unlock(dqm); 481 482 return retval; 483 } 484 485 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 486 { 487 int retval; 488 struct mqd_manager *mqd_mgr; 489 struct kfd_process_device *pdd; 490 bool prev_active = false; 491 492 dqm_lock(dqm); 493 pdd = kfd_get_process_device_data(q->device, q->process); 494 if (!pdd) { 495 retval = -ENODEV; 496 goto out_unlock; 497 } 498 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 499 get_mqd_type_from_queue_type(q->properties.type)); 500 if (!mqd_mgr) { 501 retval = -ENOMEM; 502 goto out_unlock; 503 } 504 /* 505 * Eviction state logic: we only mark active queues as evicted 506 * to avoid the overhead of restoring inactive queues later 507 */ 508 if (pdd->qpd.evicted) 509 q->properties.is_evicted = (q->properties.queue_size > 0 && 510 q->properties.queue_percent > 0 && 511 q->properties.queue_address != 0); 512 513 /* Save previous activity state for counters */ 514 prev_active = q->properties.is_active; 515 516 /* Make sure the queue is unmapped before updating the MQD */ 517 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 518 retval = unmap_queues_cpsch(dqm, 519 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 520 if (retval) { 521 pr_err("unmap queue failed\n"); 522 goto out_unlock; 523 } 524 } else if (prev_active && 525 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 526 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 527 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 528 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 529 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 530 if (retval) { 531 pr_err("destroy mqd failed\n"); 532 goto out_unlock; 533 } 534 } 535 536 retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); 537 538 /* 539 * check active state vs. the previous state and modify 540 * counter accordingly. map_queues_cpsch uses the 541 * dqm->queue_count to determine whether a new runlist must be 542 * uploaded. 543 */ 544 if (q->properties.is_active && !prev_active) 545 dqm->queue_count++; 546 else if (!q->properties.is_active && prev_active) 547 dqm->queue_count--; 548 549 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 550 retval = map_queues_cpsch(dqm); 551 else if (q->properties.is_active && 552 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 553 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 554 if (WARN(q->process->mm != current->mm, 555 "should only run in user thread")) 556 retval = -EFAULT; 557 else 558 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 559 q->pipe, q->queue, 560 &q->properties, current->mm); 561 } 562 563 out_unlock: 564 dqm_unlock(dqm); 565 return retval; 566 } 567 568 static struct mqd_manager *get_mqd_manager( 569 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 570 { 571 struct mqd_manager *mqd_mgr; 572 573 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 574 return NULL; 575 576 pr_debug("mqd type %d\n", type); 577 578 mqd_mgr = dqm->mqd_mgrs[type]; 579 if (!mqd_mgr) { 580 mqd_mgr = mqd_manager_init(type, dqm->dev); 581 if (!mqd_mgr) 582 pr_err("mqd manager is NULL"); 583 dqm->mqd_mgrs[type] = mqd_mgr; 584 } 585 586 return mqd_mgr; 587 } 588 589 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 590 struct qcm_process_device *qpd) 591 { 592 struct queue *q; 593 struct mqd_manager *mqd_mgr; 594 struct kfd_process_device *pdd; 595 int retval = 0; 596 597 dqm_lock(dqm); 598 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 599 goto out; 600 601 pdd = qpd_to_pdd(qpd); 602 pr_info_ratelimited("Evicting PASID %u queues\n", 603 pdd->process->pasid); 604 605 /* unactivate all active queues on the qpd */ 606 list_for_each_entry(q, &qpd->queues_list, list) { 607 if (!q->properties.is_active) 608 continue; 609 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 610 get_mqd_type_from_queue_type(q->properties.type)); 611 if (!mqd_mgr) { /* should not be here */ 612 pr_err("Cannot evict queue, mqd mgr is NULL\n"); 613 retval = -ENOMEM; 614 goto out; 615 } 616 q->properties.is_evicted = true; 617 q->properties.is_active = false; 618 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 619 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 620 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 621 if (retval) 622 goto out; 623 dqm->queue_count--; 624 } 625 626 out: 627 dqm_unlock(dqm); 628 return retval; 629 } 630 631 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 632 struct qcm_process_device *qpd) 633 { 634 struct queue *q; 635 struct kfd_process_device *pdd; 636 int retval = 0; 637 638 dqm_lock(dqm); 639 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 640 goto out; 641 642 pdd = qpd_to_pdd(qpd); 643 pr_info_ratelimited("Evicting PASID %u queues\n", 644 pdd->process->pasid); 645 646 /* unactivate all active queues on the qpd */ 647 list_for_each_entry(q, &qpd->queues_list, list) { 648 if (!q->properties.is_active) 649 continue; 650 q->properties.is_evicted = true; 651 q->properties.is_active = false; 652 dqm->queue_count--; 653 } 654 retval = execute_queues_cpsch(dqm, 655 qpd->is_debug ? 656 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 657 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 658 659 out: 660 dqm_unlock(dqm); 661 return retval; 662 } 663 664 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 665 struct qcm_process_device *qpd) 666 { 667 struct mm_struct *mm = NULL; 668 struct queue *q; 669 struct mqd_manager *mqd_mgr; 670 struct kfd_process_device *pdd; 671 uint64_t pd_base; 672 int retval = 0; 673 674 pdd = qpd_to_pdd(qpd); 675 /* Retrieve PD base */ 676 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 677 678 dqm_lock(dqm); 679 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 680 goto out; 681 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 682 qpd->evicted--; 683 goto out; 684 } 685 686 pr_info_ratelimited("Restoring PASID %u queues\n", 687 pdd->process->pasid); 688 689 /* Update PD Base in QPD */ 690 qpd->page_table_base = pd_base; 691 pr_debug("Updated PD address to 0x%llx\n", pd_base); 692 693 if (!list_empty(&qpd->queues_list)) { 694 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 695 dqm->dev->kgd, 696 qpd->vmid, 697 qpd->page_table_base); 698 kfd_flush_tlb(pdd); 699 } 700 701 /* Take a safe reference to the mm_struct, which may otherwise 702 * disappear even while the kfd_process is still referenced. 703 */ 704 mm = get_task_mm(pdd->process->lead_thread); 705 if (!mm) { 706 retval = -EFAULT; 707 goto out; 708 } 709 710 /* activate all active queues on the qpd */ 711 list_for_each_entry(q, &qpd->queues_list, list) { 712 if (!q->properties.is_evicted) 713 continue; 714 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 715 get_mqd_type_from_queue_type(q->properties.type)); 716 if (!mqd_mgr) { /* should not be here */ 717 pr_err("Cannot restore queue, mqd mgr is NULL\n"); 718 retval = -ENOMEM; 719 goto out; 720 } 721 q->properties.is_evicted = false; 722 q->properties.is_active = true; 723 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 724 q->queue, &q->properties, mm); 725 if (retval) 726 goto out; 727 dqm->queue_count++; 728 } 729 qpd->evicted = 0; 730 out: 731 if (mm) 732 mmput(mm); 733 dqm_unlock(dqm); 734 return retval; 735 } 736 737 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 738 struct qcm_process_device *qpd) 739 { 740 struct queue *q; 741 struct kfd_process_device *pdd; 742 uint64_t pd_base; 743 int retval = 0; 744 745 pdd = qpd_to_pdd(qpd); 746 /* Retrieve PD base */ 747 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 748 749 dqm_lock(dqm); 750 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 751 goto out; 752 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 753 qpd->evicted--; 754 goto out; 755 } 756 757 pr_info_ratelimited("Restoring PASID %u queues\n", 758 pdd->process->pasid); 759 760 /* Update PD Base in QPD */ 761 qpd->page_table_base = pd_base; 762 pr_debug("Updated PD address to 0x%llx\n", pd_base); 763 764 /* activate all active queues on the qpd */ 765 list_for_each_entry(q, &qpd->queues_list, list) { 766 if (!q->properties.is_evicted) 767 continue; 768 q->properties.is_evicted = false; 769 q->properties.is_active = true; 770 dqm->queue_count++; 771 } 772 retval = execute_queues_cpsch(dqm, 773 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 774 if (!retval) 775 qpd->evicted = 0; 776 out: 777 dqm_unlock(dqm); 778 return retval; 779 } 780 781 static int register_process(struct device_queue_manager *dqm, 782 struct qcm_process_device *qpd) 783 { 784 struct device_process_node *n; 785 struct kfd_process_device *pdd; 786 uint64_t pd_base; 787 int retval; 788 789 n = kzalloc(sizeof(*n), GFP_KERNEL); 790 if (!n) 791 return -ENOMEM; 792 793 n->qpd = qpd; 794 795 pdd = qpd_to_pdd(qpd); 796 /* Retrieve PD base */ 797 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 798 799 dqm_lock(dqm); 800 list_add(&n->list, &dqm->queues); 801 802 /* Update PD Base in QPD */ 803 qpd->page_table_base = pd_base; 804 pr_debug("Updated PD address to 0x%llx\n", pd_base); 805 806 retval = dqm->asic_ops.update_qpd(dqm, qpd); 807 808 if (dqm->processes_count++ == 0) 809 amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false); 810 811 dqm_unlock(dqm); 812 813 return retval; 814 } 815 816 static int unregister_process(struct device_queue_manager *dqm, 817 struct qcm_process_device *qpd) 818 { 819 int retval; 820 struct device_process_node *cur, *next; 821 822 pr_debug("qpd->queues_list is %s\n", 823 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 824 825 retval = 0; 826 dqm_lock(dqm); 827 828 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 829 if (qpd == cur->qpd) { 830 list_del(&cur->list); 831 kfree(cur); 832 if (--dqm->processes_count == 0) 833 amdgpu_amdkfd_set_compute_idle( 834 dqm->dev->kgd, true); 835 goto out; 836 } 837 } 838 /* qpd not found in dqm list */ 839 retval = 1; 840 out: 841 dqm_unlock(dqm); 842 return retval; 843 } 844 845 static int 846 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 847 unsigned int vmid) 848 { 849 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 850 dqm->dev->kgd, pasid, vmid); 851 } 852 853 static void init_interrupts(struct device_queue_manager *dqm) 854 { 855 unsigned int i; 856 857 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 858 if (is_pipe_enabled(dqm, 0, i)) 859 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 860 } 861 862 static int initialize_nocpsch(struct device_queue_manager *dqm) 863 { 864 int pipe, queue; 865 866 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 867 868 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 869 sizeof(unsigned int), GFP_KERNEL); 870 if (!dqm->allocated_queues) 871 return -ENOMEM; 872 873 mutex_init(&dqm->lock_hidden); 874 INIT_LIST_HEAD(&dqm->queues); 875 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 876 dqm->sdma_queue_count = 0; 877 878 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 879 int pipe_offset = pipe * get_queues_per_pipe(dqm); 880 881 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 882 if (test_bit(pipe_offset + queue, 883 dqm->dev->shared_resources.queue_bitmap)) 884 dqm->allocated_queues[pipe] |= 1 << queue; 885 } 886 887 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 888 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 889 890 return 0; 891 } 892 893 static void uninitialize(struct device_queue_manager *dqm) 894 { 895 int i; 896 897 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 898 899 kfree(dqm->allocated_queues); 900 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 901 kfree(dqm->mqd_mgrs[i]); 902 mutex_destroy(&dqm->lock_hidden); 903 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 904 } 905 906 static int start_nocpsch(struct device_queue_manager *dqm) 907 { 908 init_interrupts(dqm); 909 return pm_init(&dqm->packets, dqm); 910 } 911 912 static int stop_nocpsch(struct device_queue_manager *dqm) 913 { 914 pm_uninit(&dqm->packets); 915 return 0; 916 } 917 918 static int allocate_sdma_queue(struct device_queue_manager *dqm, 919 unsigned int *sdma_queue_id) 920 { 921 int bit; 922 923 if (dqm->sdma_bitmap == 0) 924 return -ENOMEM; 925 926 bit = ffs(dqm->sdma_bitmap) - 1; 927 dqm->sdma_bitmap &= ~(1 << bit); 928 *sdma_queue_id = bit; 929 930 return 0; 931 } 932 933 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 934 unsigned int sdma_queue_id) 935 { 936 if (sdma_queue_id >= get_num_sdma_queues(dqm)) 937 return; 938 dqm->sdma_bitmap |= (1 << sdma_queue_id); 939 } 940 941 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 942 struct queue *q, 943 struct qcm_process_device *qpd) 944 { 945 struct mqd_manager *mqd_mgr; 946 int retval; 947 948 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 949 if (!mqd_mgr) 950 return -ENOMEM; 951 952 retval = allocate_sdma_queue(dqm, &q->sdma_id); 953 if (retval) 954 return retval; 955 956 q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); 957 q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); 958 959 retval = allocate_doorbell(qpd, q); 960 if (retval) 961 goto out_deallocate_sdma_queue; 962 963 pr_debug("SDMA id is: %d\n", q->sdma_id); 964 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 965 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 966 967 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 968 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 969 &q->gart_mqd_addr, &q->properties); 970 if (retval) 971 goto out_deallocate_doorbell; 972 973 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, 974 NULL); 975 if (retval) 976 goto out_uninit_mqd; 977 978 return 0; 979 980 out_uninit_mqd: 981 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 982 out_deallocate_doorbell: 983 deallocate_doorbell(qpd, q); 984 out_deallocate_sdma_queue: 985 deallocate_sdma_queue(dqm, q->sdma_id); 986 987 return retval; 988 } 989 990 /* 991 * Device Queue Manager implementation for cp scheduler 992 */ 993 994 static int set_sched_resources(struct device_queue_manager *dqm) 995 { 996 int i, mec; 997 struct scheduling_resources res; 998 999 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1000 1001 res.queue_mask = 0; 1002 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1003 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1004 / dqm->dev->shared_resources.num_pipe_per_mec; 1005 1006 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 1007 continue; 1008 1009 /* only acquire queues from the first MEC */ 1010 if (mec > 0) 1011 continue; 1012 1013 /* This situation may be hit in the future if a new HW 1014 * generation exposes more than 64 queues. If so, the 1015 * definition of res.queue_mask needs updating 1016 */ 1017 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1018 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1019 break; 1020 } 1021 1022 res.queue_mask |= (1ull << i); 1023 } 1024 res.gws_mask = res.oac_mask = res.gds_heap_base = 1025 res.gds_heap_size = 0; 1026 1027 pr_debug("Scheduling resources:\n" 1028 "vmid mask: 0x%8X\n" 1029 "queue mask: 0x%8llX\n", 1030 res.vmid_mask, res.queue_mask); 1031 1032 return pm_send_set_resources(&dqm->packets, &res); 1033 } 1034 1035 static int initialize_cpsch(struct device_queue_manager *dqm) 1036 { 1037 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1038 1039 mutex_init(&dqm->lock_hidden); 1040 INIT_LIST_HEAD(&dqm->queues); 1041 dqm->queue_count = dqm->processes_count = 0; 1042 dqm->sdma_queue_count = 0; 1043 dqm->active_runlist = false; 1044 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 1045 1046 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1047 1048 return 0; 1049 } 1050 1051 static int start_cpsch(struct device_queue_manager *dqm) 1052 { 1053 int retval; 1054 1055 retval = 0; 1056 1057 retval = pm_init(&dqm->packets, dqm); 1058 if (retval) 1059 goto fail_packet_manager_init; 1060 1061 retval = set_sched_resources(dqm); 1062 if (retval) 1063 goto fail_set_sched_resources; 1064 1065 pr_debug("Allocating fence memory\n"); 1066 1067 /* allocate fence memory on the gart */ 1068 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1069 &dqm->fence_mem); 1070 1071 if (retval) 1072 goto fail_allocate_vidmem; 1073 1074 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 1075 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1076 1077 init_interrupts(dqm); 1078 1079 dqm_lock(dqm); 1080 /* clear hang status when driver try to start the hw scheduler */ 1081 dqm->is_hws_hang = false; 1082 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1083 dqm_unlock(dqm); 1084 1085 return 0; 1086 fail_allocate_vidmem: 1087 fail_set_sched_resources: 1088 pm_uninit(&dqm->packets); 1089 fail_packet_manager_init: 1090 return retval; 1091 } 1092 1093 static int stop_cpsch(struct device_queue_manager *dqm) 1094 { 1095 dqm_lock(dqm); 1096 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1097 dqm_unlock(dqm); 1098 1099 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1100 pm_uninit(&dqm->packets); 1101 1102 return 0; 1103 } 1104 1105 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1106 struct kernel_queue *kq, 1107 struct qcm_process_device *qpd) 1108 { 1109 dqm_lock(dqm); 1110 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1111 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1112 dqm->total_queue_count); 1113 dqm_unlock(dqm); 1114 return -EPERM; 1115 } 1116 1117 /* 1118 * Unconditionally increment this counter, regardless of the queue's 1119 * type or whether the queue is active. 1120 */ 1121 dqm->total_queue_count++; 1122 pr_debug("Total of %d queues are accountable so far\n", 1123 dqm->total_queue_count); 1124 1125 list_add(&kq->list, &qpd->priv_queue_list); 1126 dqm->queue_count++; 1127 qpd->is_debug = true; 1128 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1129 dqm_unlock(dqm); 1130 1131 return 0; 1132 } 1133 1134 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1135 struct kernel_queue *kq, 1136 struct qcm_process_device *qpd) 1137 { 1138 dqm_lock(dqm); 1139 list_del(&kq->list); 1140 dqm->queue_count--; 1141 qpd->is_debug = false; 1142 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1143 /* 1144 * Unconditionally decrement this counter, regardless of the queue's 1145 * type. 1146 */ 1147 dqm->total_queue_count--; 1148 pr_debug("Total of %d queues are accountable so far\n", 1149 dqm->total_queue_count); 1150 dqm_unlock(dqm); 1151 } 1152 1153 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1154 struct qcm_process_device *qpd) 1155 { 1156 int retval; 1157 struct mqd_manager *mqd_mgr; 1158 1159 retval = 0; 1160 1161 dqm_lock(dqm); 1162 1163 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1164 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1165 dqm->total_queue_count); 1166 retval = -EPERM; 1167 goto out_unlock; 1168 } 1169 1170 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1171 retval = allocate_sdma_queue(dqm, &q->sdma_id); 1172 if (retval) 1173 goto out_unlock; 1174 q->properties.sdma_queue_id = 1175 q->sdma_id / get_num_sdma_engines(dqm); 1176 q->properties.sdma_engine_id = 1177 q->sdma_id % get_num_sdma_engines(dqm); 1178 } 1179 1180 retval = allocate_doorbell(qpd, q); 1181 if (retval) 1182 goto out_deallocate_sdma_queue; 1183 1184 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1185 get_mqd_type_from_queue_type(q->properties.type)); 1186 1187 if (!mqd_mgr) { 1188 retval = -ENOMEM; 1189 goto out_deallocate_doorbell; 1190 } 1191 /* 1192 * Eviction state logic: we only mark active queues as evicted 1193 * to avoid the overhead of restoring inactive queues later 1194 */ 1195 if (qpd->evicted) 1196 q->properties.is_evicted = (q->properties.queue_size > 0 && 1197 q->properties.queue_percent > 0 && 1198 q->properties.queue_address != 0); 1199 1200 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1201 1202 q->properties.tba_addr = qpd->tba_addr; 1203 q->properties.tma_addr = qpd->tma_addr; 1204 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 1205 &q->gart_mqd_addr, &q->properties); 1206 if (retval) 1207 goto out_deallocate_doorbell; 1208 1209 list_add(&q->list, &qpd->queues_list); 1210 qpd->queue_count++; 1211 if (q->properties.is_active) { 1212 dqm->queue_count++; 1213 retval = execute_queues_cpsch(dqm, 1214 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1215 } 1216 1217 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1218 dqm->sdma_queue_count++; 1219 /* 1220 * Unconditionally increment this counter, regardless of the queue's 1221 * type or whether the queue is active. 1222 */ 1223 dqm->total_queue_count++; 1224 1225 pr_debug("Total of %d queues are accountable so far\n", 1226 dqm->total_queue_count); 1227 1228 dqm_unlock(dqm); 1229 return retval; 1230 1231 out_deallocate_doorbell: 1232 deallocate_doorbell(qpd, q); 1233 out_deallocate_sdma_queue: 1234 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1235 deallocate_sdma_queue(dqm, q->sdma_id); 1236 out_unlock: 1237 dqm_unlock(dqm); 1238 1239 return retval; 1240 } 1241 1242 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 1243 unsigned int fence_value, 1244 unsigned int timeout_ms) 1245 { 1246 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1247 1248 while (*fence_addr != fence_value) { 1249 if (time_after(jiffies, end_jiffies)) { 1250 pr_err("qcm fence wait loop timeout expired\n"); 1251 /* In HWS case, this is used to halt the driver thread 1252 * in order not to mess up CP states before doing 1253 * scandumps for FW debugging. 1254 */ 1255 while (halt_if_hws_hang) 1256 schedule(); 1257 1258 return -ETIME; 1259 } 1260 schedule(); 1261 } 1262 1263 return 0; 1264 } 1265 1266 static int unmap_sdma_queues(struct device_queue_manager *dqm, 1267 unsigned int sdma_engine) 1268 { 1269 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1270 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 1271 sdma_engine); 1272 } 1273 1274 /* dqm->lock mutex has to be locked before calling this function */ 1275 static int map_queues_cpsch(struct device_queue_manager *dqm) 1276 { 1277 int retval; 1278 1279 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 1280 return 0; 1281 1282 if (dqm->active_runlist) 1283 return 0; 1284 1285 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1286 if (retval) { 1287 pr_err("failed to execute runlist\n"); 1288 return retval; 1289 } 1290 dqm->active_runlist = true; 1291 1292 return retval; 1293 } 1294 1295 /* dqm->lock mutex has to be locked before calling this function */ 1296 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1297 enum kfd_unmap_queues_filter filter, 1298 uint32_t filter_param) 1299 { 1300 int retval = 0; 1301 1302 if (dqm->is_hws_hang) 1303 return -EIO; 1304 if (!dqm->active_runlist) 1305 return retval; 1306 1307 pr_debug("Before destroying queues, sdma queue count is : %u\n", 1308 dqm->sdma_queue_count); 1309 1310 if (dqm->sdma_queue_count > 0) { 1311 unmap_sdma_queues(dqm, 0); 1312 unmap_sdma_queues(dqm, 1); 1313 } 1314 1315 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1316 filter, filter_param, false, 0); 1317 if (retval) 1318 return retval; 1319 1320 *dqm->fence_addr = KFD_FENCE_INIT; 1321 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 1322 KFD_FENCE_COMPLETED); 1323 /* should be timed out */ 1324 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1325 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 1326 if (retval) 1327 return retval; 1328 1329 pm_release_ib(&dqm->packets); 1330 dqm->active_runlist = false; 1331 1332 return retval; 1333 } 1334 1335 /* dqm->lock mutex has to be locked before calling this function */ 1336 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1337 enum kfd_unmap_queues_filter filter, 1338 uint32_t filter_param) 1339 { 1340 int retval; 1341 1342 if (dqm->is_hws_hang) 1343 return -EIO; 1344 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1345 if (retval) { 1346 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1347 dqm->is_hws_hang = true; 1348 schedule_work(&dqm->hw_exception_work); 1349 return retval; 1350 } 1351 1352 return map_queues_cpsch(dqm); 1353 } 1354 1355 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1356 struct qcm_process_device *qpd, 1357 struct queue *q) 1358 { 1359 int retval; 1360 struct mqd_manager *mqd_mgr; 1361 1362 retval = 0; 1363 1364 /* remove queue from list to prevent rescheduling after preemption */ 1365 dqm_lock(dqm); 1366 1367 if (qpd->is_debug) { 1368 /* 1369 * error, currently we do not allow to destroy a queue 1370 * of a currently debugged process 1371 */ 1372 retval = -EBUSY; 1373 goto failed_try_destroy_debugged_queue; 1374 1375 } 1376 1377 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1378 get_mqd_type_from_queue_type(q->properties.type)); 1379 if (!mqd_mgr) { 1380 retval = -ENOMEM; 1381 goto failed; 1382 } 1383 1384 deallocate_doorbell(qpd, q); 1385 1386 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1387 dqm->sdma_queue_count--; 1388 deallocate_sdma_queue(dqm, q->sdma_id); 1389 } 1390 1391 list_del(&q->list); 1392 qpd->queue_count--; 1393 if (q->properties.is_active) { 1394 dqm->queue_count--; 1395 retval = execute_queues_cpsch(dqm, 1396 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1397 if (retval == -ETIME) 1398 qpd->reset_wavefronts = true; 1399 } 1400 1401 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1402 1403 /* 1404 * Unconditionally decrement this counter, regardless of the queue's 1405 * type 1406 */ 1407 dqm->total_queue_count--; 1408 pr_debug("Total of %d queues are accountable so far\n", 1409 dqm->total_queue_count); 1410 1411 dqm_unlock(dqm); 1412 1413 return retval; 1414 1415 failed: 1416 failed_try_destroy_debugged_queue: 1417 1418 dqm_unlock(dqm); 1419 return retval; 1420 } 1421 1422 /* 1423 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1424 * stay in user mode. 1425 */ 1426 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1427 /* APE1 limit is inclusive and 64K aligned. */ 1428 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1429 1430 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1431 struct qcm_process_device *qpd, 1432 enum cache_policy default_policy, 1433 enum cache_policy alternate_policy, 1434 void __user *alternate_aperture_base, 1435 uint64_t alternate_aperture_size) 1436 { 1437 bool retval = true; 1438 1439 if (!dqm->asic_ops.set_cache_memory_policy) 1440 return retval; 1441 1442 dqm_lock(dqm); 1443 1444 if (alternate_aperture_size == 0) { 1445 /* base > limit disables APE1 */ 1446 qpd->sh_mem_ape1_base = 1; 1447 qpd->sh_mem_ape1_limit = 0; 1448 } else { 1449 /* 1450 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1451 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1452 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1453 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1454 * Verify that the base and size parameters can be 1455 * represented in this format and convert them. 1456 * Additionally restrict APE1 to user-mode addresses. 1457 */ 1458 1459 uint64_t base = (uintptr_t)alternate_aperture_base; 1460 uint64_t limit = base + alternate_aperture_size - 1; 1461 1462 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1463 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1464 retval = false; 1465 goto out; 1466 } 1467 1468 qpd->sh_mem_ape1_base = base >> 16; 1469 qpd->sh_mem_ape1_limit = limit >> 16; 1470 } 1471 1472 retval = dqm->asic_ops.set_cache_memory_policy( 1473 dqm, 1474 qpd, 1475 default_policy, 1476 alternate_policy, 1477 alternate_aperture_base, 1478 alternate_aperture_size); 1479 1480 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1481 program_sh_mem_settings(dqm, qpd); 1482 1483 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1484 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1485 qpd->sh_mem_ape1_limit); 1486 1487 out: 1488 dqm_unlock(dqm); 1489 return retval; 1490 } 1491 1492 static int set_trap_handler(struct device_queue_manager *dqm, 1493 struct qcm_process_device *qpd, 1494 uint64_t tba_addr, 1495 uint64_t tma_addr) 1496 { 1497 uint64_t *tma; 1498 1499 if (dqm->dev->cwsr_enabled) { 1500 /* Jump from CWSR trap handler to user trap */ 1501 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); 1502 tma[0] = tba_addr; 1503 tma[1] = tma_addr; 1504 } else { 1505 qpd->tba_addr = tba_addr; 1506 qpd->tma_addr = tma_addr; 1507 } 1508 1509 return 0; 1510 } 1511 1512 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1513 struct qcm_process_device *qpd) 1514 { 1515 struct queue *q, *next; 1516 struct device_process_node *cur, *next_dpn; 1517 int retval = 0; 1518 1519 dqm_lock(dqm); 1520 1521 /* Clear all user mode queues */ 1522 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1523 int ret; 1524 1525 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1526 if (ret) 1527 retval = ret; 1528 } 1529 1530 /* Unregister process */ 1531 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1532 if (qpd == cur->qpd) { 1533 list_del(&cur->list); 1534 kfree(cur); 1535 dqm->processes_count--; 1536 break; 1537 } 1538 } 1539 1540 dqm_unlock(dqm); 1541 return retval; 1542 } 1543 1544 static int get_wave_state(struct device_queue_manager *dqm, 1545 struct queue *q, 1546 void __user *ctl_stack, 1547 u32 *ctl_stack_used_size, 1548 u32 *save_area_used_size) 1549 { 1550 struct mqd_manager *mqd_mgr; 1551 int r; 1552 1553 dqm_lock(dqm); 1554 1555 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 1556 q->properties.is_active || !q->device->cwsr_enabled) { 1557 r = -EINVAL; 1558 goto dqm_unlock; 1559 } 1560 1561 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 1562 if (!mqd_mgr) { 1563 r = -ENOMEM; 1564 goto dqm_unlock; 1565 } 1566 1567 if (!mqd_mgr->get_wave_state) { 1568 r = -EINVAL; 1569 goto dqm_unlock; 1570 } 1571 1572 r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 1573 ctl_stack_used_size, save_area_used_size); 1574 1575 dqm_unlock: 1576 dqm_unlock(dqm); 1577 return r; 1578 } 1579 1580 static int process_termination_cpsch(struct device_queue_manager *dqm, 1581 struct qcm_process_device *qpd) 1582 { 1583 int retval; 1584 struct queue *q, *next; 1585 struct kernel_queue *kq, *kq_next; 1586 struct mqd_manager *mqd_mgr; 1587 struct device_process_node *cur, *next_dpn; 1588 enum kfd_unmap_queues_filter filter = 1589 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1590 1591 retval = 0; 1592 1593 dqm_lock(dqm); 1594 1595 /* Clean all kernel queues */ 1596 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1597 list_del(&kq->list); 1598 dqm->queue_count--; 1599 qpd->is_debug = false; 1600 dqm->total_queue_count--; 1601 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1602 } 1603 1604 /* Clear all user mode queues */ 1605 list_for_each_entry(q, &qpd->queues_list, list) { 1606 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1607 dqm->sdma_queue_count--; 1608 deallocate_sdma_queue(dqm, q->sdma_id); 1609 } 1610 1611 if (q->properties.is_active) 1612 dqm->queue_count--; 1613 1614 dqm->total_queue_count--; 1615 } 1616 1617 /* Unregister process */ 1618 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1619 if (qpd == cur->qpd) { 1620 list_del(&cur->list); 1621 kfree(cur); 1622 dqm->processes_count--; 1623 break; 1624 } 1625 } 1626 1627 retval = execute_queues_cpsch(dqm, filter, 0); 1628 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1629 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1630 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1631 qpd->reset_wavefronts = false; 1632 } 1633 1634 /* lastly, free mqd resources */ 1635 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1636 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1637 get_mqd_type_from_queue_type(q->properties.type)); 1638 if (!mqd_mgr) { 1639 retval = -ENOMEM; 1640 goto out; 1641 } 1642 list_del(&q->list); 1643 qpd->queue_count--; 1644 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1645 } 1646 1647 out: 1648 dqm_unlock(dqm); 1649 return retval; 1650 } 1651 1652 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1653 { 1654 struct device_queue_manager *dqm; 1655 1656 pr_debug("Loading device queue manager\n"); 1657 1658 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1659 if (!dqm) 1660 return NULL; 1661 1662 switch (dev->device_info->asic_family) { 1663 /* HWS is not available on Hawaii. */ 1664 case CHIP_HAWAII: 1665 /* HWS depends on CWSR for timely dequeue. CWSR is not 1666 * available on Tonga. 1667 * 1668 * FIXME: This argument also applies to Kaveri. 1669 */ 1670 case CHIP_TONGA: 1671 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1672 break; 1673 default: 1674 dqm->sched_policy = sched_policy; 1675 break; 1676 } 1677 1678 dqm->dev = dev; 1679 switch (dqm->sched_policy) { 1680 case KFD_SCHED_POLICY_HWS: 1681 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1682 /* initialize dqm for cp scheduling */ 1683 dqm->ops.create_queue = create_queue_cpsch; 1684 dqm->ops.initialize = initialize_cpsch; 1685 dqm->ops.start = start_cpsch; 1686 dqm->ops.stop = stop_cpsch; 1687 dqm->ops.destroy_queue = destroy_queue_cpsch; 1688 dqm->ops.update_queue = update_queue; 1689 dqm->ops.get_mqd_manager = get_mqd_manager; 1690 dqm->ops.register_process = register_process; 1691 dqm->ops.unregister_process = unregister_process; 1692 dqm->ops.uninitialize = uninitialize; 1693 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1694 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1695 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1696 dqm->ops.set_trap_handler = set_trap_handler; 1697 dqm->ops.process_termination = process_termination_cpsch; 1698 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1699 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1700 dqm->ops.get_wave_state = get_wave_state; 1701 break; 1702 case KFD_SCHED_POLICY_NO_HWS: 1703 /* initialize dqm for no cp scheduling */ 1704 dqm->ops.start = start_nocpsch; 1705 dqm->ops.stop = stop_nocpsch; 1706 dqm->ops.create_queue = create_queue_nocpsch; 1707 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1708 dqm->ops.update_queue = update_queue; 1709 dqm->ops.get_mqd_manager = get_mqd_manager; 1710 dqm->ops.register_process = register_process; 1711 dqm->ops.unregister_process = unregister_process; 1712 dqm->ops.initialize = initialize_nocpsch; 1713 dqm->ops.uninitialize = uninitialize; 1714 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1715 dqm->ops.set_trap_handler = set_trap_handler; 1716 dqm->ops.process_termination = process_termination_nocpsch; 1717 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1718 dqm->ops.restore_process_queues = 1719 restore_process_queues_nocpsch; 1720 dqm->ops.get_wave_state = get_wave_state; 1721 break; 1722 default: 1723 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1724 goto out_free; 1725 } 1726 1727 switch (dev->device_info->asic_family) { 1728 case CHIP_CARRIZO: 1729 device_queue_manager_init_vi(&dqm->asic_ops); 1730 break; 1731 1732 case CHIP_KAVERI: 1733 device_queue_manager_init_cik(&dqm->asic_ops); 1734 break; 1735 1736 case CHIP_HAWAII: 1737 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1738 break; 1739 1740 case CHIP_TONGA: 1741 case CHIP_FIJI: 1742 case CHIP_POLARIS10: 1743 case CHIP_POLARIS11: 1744 case CHIP_POLARIS12: 1745 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1746 break; 1747 1748 case CHIP_VEGA10: 1749 case CHIP_VEGA12: 1750 case CHIP_VEGA20: 1751 case CHIP_RAVEN: 1752 device_queue_manager_init_v9(&dqm->asic_ops); 1753 break; 1754 default: 1755 WARN(1, "Unexpected ASIC family %u", 1756 dev->device_info->asic_family); 1757 goto out_free; 1758 } 1759 1760 if (!dqm->ops.initialize(dqm)) 1761 return dqm; 1762 1763 out_free: 1764 kfree(dqm); 1765 return NULL; 1766 } 1767 1768 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1769 { 1770 dqm->ops.uninitialize(dqm); 1771 kfree(dqm); 1772 } 1773 1774 int kfd_process_vm_fault(struct device_queue_manager *dqm, 1775 unsigned int pasid) 1776 { 1777 struct kfd_process_device *pdd; 1778 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 1779 int ret = 0; 1780 1781 if (!p) 1782 return -EINVAL; 1783 pdd = kfd_get_process_device_data(dqm->dev, p); 1784 if (pdd) 1785 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 1786 kfd_unref_process(p); 1787 1788 return ret; 1789 } 1790 1791 static void kfd_process_hw_exception(struct work_struct *work) 1792 { 1793 struct device_queue_manager *dqm = container_of(work, 1794 struct device_queue_manager, hw_exception_work); 1795 amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); 1796 } 1797 1798 #if defined(CONFIG_DEBUG_FS) 1799 1800 static void seq_reg_dump(struct seq_file *m, 1801 uint32_t (*dump)[2], uint32_t n_regs) 1802 { 1803 uint32_t i, count; 1804 1805 for (i = 0, count = 0; i < n_regs; i++) { 1806 if (count == 0 || 1807 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 1808 seq_printf(m, "%s %08x: %08x", 1809 i ? "\n" : "", 1810 dump[i][0], dump[i][1]); 1811 count = 7; 1812 } else { 1813 seq_printf(m, " %08x", dump[i][1]); 1814 count--; 1815 } 1816 } 1817 1818 seq_puts(m, "\n"); 1819 } 1820 1821 int dqm_debugfs_hqds(struct seq_file *m, void *data) 1822 { 1823 struct device_queue_manager *dqm = data; 1824 uint32_t (*dump)[2], n_regs; 1825 int pipe, queue; 1826 int r = 0; 1827 1828 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, 1829 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); 1830 if (!r) { 1831 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 1832 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 1833 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 1834 KFD_CIK_HIQ_QUEUE); 1835 seq_reg_dump(m, dump, n_regs); 1836 1837 kfree(dump); 1838 } 1839 1840 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1841 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1842 1843 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 1844 if (!test_bit(pipe_offset + queue, 1845 dqm->dev->shared_resources.queue_bitmap)) 1846 continue; 1847 1848 r = dqm->dev->kfd2kgd->hqd_dump( 1849 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1850 if (r) 1851 break; 1852 1853 seq_printf(m, " CP Pipe %d, Queue %d\n", 1854 pipe, queue); 1855 seq_reg_dump(m, dump, n_regs); 1856 1857 kfree(dump); 1858 } 1859 } 1860 1861 for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { 1862 for (queue = 0; 1863 queue < dqm->dev->device_info->num_sdma_queues_per_engine; 1864 queue++) { 1865 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 1866 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1867 if (r) 1868 break; 1869 1870 seq_printf(m, " SDMA Engine %d, RLC %d\n", 1871 pipe, queue); 1872 seq_reg_dump(m, dump, n_regs); 1873 1874 kfree(dump); 1875 } 1876 } 1877 1878 return r; 1879 } 1880 1881 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) 1882 { 1883 int r = 0; 1884 1885 dqm_lock(dqm); 1886 dqm->active_runlist = true; 1887 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1888 dqm_unlock(dqm); 1889 1890 return r; 1891 } 1892 1893 #endif 1894