1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 #include "amdgpu_amdkfd.h" 37 38 /* Size of the per-pipe EOP queue */ 39 #define CIK_HPD_EOP_BYTES_LOG2 11 40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 41 42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 43 unsigned int pasid, unsigned int vmid); 44 45 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 46 struct queue *q, 47 struct qcm_process_device *qpd); 48 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param); 52 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param); 55 56 static int map_queues_cpsch(struct device_queue_manager *dqm); 57 58 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 59 struct queue *q, 60 struct qcm_process_device *qpd); 61 62 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 63 unsigned int sdma_queue_id); 64 65 static void kfd_process_hw_exception(struct work_struct *work); 66 67 static inline 68 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 69 { 70 if (type == KFD_QUEUE_TYPE_SDMA) 71 return KFD_MQD_TYPE_SDMA; 72 return KFD_MQD_TYPE_CP; 73 } 74 75 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 76 { 77 int i; 78 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 79 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 80 81 /* queue is available for KFD usage if bit is 1 */ 82 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 83 if (test_bit(pipe_offset + i, 84 dqm->dev->shared_resources.queue_bitmap)) 85 return true; 86 return false; 87 } 88 89 unsigned int get_queues_num(struct device_queue_manager *dqm) 90 { 91 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 92 KGD_MAX_QUEUES); 93 } 94 95 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 96 { 97 return dqm->dev->shared_resources.num_queue_per_pipe; 98 } 99 100 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 101 { 102 return dqm->dev->shared_resources.num_pipe_per_mec; 103 } 104 105 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) 106 { 107 return dqm->dev->device_info->num_sdma_engines; 108 } 109 110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 111 { 112 return dqm->dev->device_info->num_sdma_engines 113 * dqm->dev->device_info->num_sdma_queues_per_engine; 114 } 115 116 void program_sh_mem_settings(struct device_queue_manager *dqm, 117 struct qcm_process_device *qpd) 118 { 119 return dqm->dev->kfd2kgd->program_sh_mem_settings( 120 dqm->dev->kgd, qpd->vmid, 121 qpd->sh_mem_config, 122 qpd->sh_mem_ape1_base, 123 qpd->sh_mem_ape1_limit, 124 qpd->sh_mem_bases); 125 } 126 127 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) 128 { 129 struct kfd_dev *dev = qpd->dqm->dev; 130 131 if (!KFD_IS_SOC15(dev->device_info->asic_family)) { 132 /* On pre-SOC15 chips we need to use the queue ID to 133 * preserve the user mode ABI. 134 */ 135 q->doorbell_id = q->properties.queue_id; 136 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 137 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 138 * doorbell assignments based on the engine and queue id. 139 * The doobell index distance between RLC (2*i) and (2*i+1) 140 * for a SDMA engine is 512. 141 */ 142 uint32_t *idx_offset = 143 dev->shared_resources.sdma_doorbell_idx; 144 145 q->doorbell_id = idx_offset[q->properties.sdma_engine_id] 146 + (q->properties.sdma_queue_id & 1) 147 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 148 + (q->properties.sdma_queue_id >> 1); 149 } else { 150 /* For CP queues on SOC15 reserve a free doorbell ID */ 151 unsigned int found; 152 153 found = find_first_zero_bit(qpd->doorbell_bitmap, 154 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 155 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 156 pr_debug("No doorbells available"); 157 return -EBUSY; 158 } 159 set_bit(found, qpd->doorbell_bitmap); 160 q->doorbell_id = found; 161 } 162 163 q->properties.doorbell_off = 164 kfd_doorbell_id_to_offset(dev, q->process, 165 q->doorbell_id); 166 167 return 0; 168 } 169 170 static void deallocate_doorbell(struct qcm_process_device *qpd, 171 struct queue *q) 172 { 173 unsigned int old; 174 struct kfd_dev *dev = qpd->dqm->dev; 175 176 if (!KFD_IS_SOC15(dev->device_info->asic_family) || 177 q->properties.type == KFD_QUEUE_TYPE_SDMA) 178 return; 179 180 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 181 WARN_ON(!old); 182 } 183 184 static int allocate_vmid(struct device_queue_manager *dqm, 185 struct qcm_process_device *qpd, 186 struct queue *q) 187 { 188 int bit, allocated_vmid; 189 190 if (dqm->vmid_bitmap == 0) 191 return -ENOMEM; 192 193 bit = ffs(dqm->vmid_bitmap) - 1; 194 dqm->vmid_bitmap &= ~(1 << bit); 195 196 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 197 pr_debug("vmid allocation %d\n", allocated_vmid); 198 qpd->vmid = allocated_vmid; 199 q->properties.vmid = allocated_vmid; 200 201 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 202 program_sh_mem_settings(dqm, qpd); 203 204 /* qpd->page_table_base is set earlier when register_process() 205 * is called, i.e. when the first queue is created. 206 */ 207 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, 208 qpd->vmid, 209 qpd->page_table_base); 210 /* invalidate the VM context after pasid and vmid mapping is set up */ 211 kfd_flush_tlb(qpd_to_pdd(qpd)); 212 213 return 0; 214 } 215 216 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 217 struct qcm_process_device *qpd) 218 { 219 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; 220 int ret; 221 222 if (!qpd->ib_kaddr) 223 return -ENOMEM; 224 225 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 226 if (ret) 227 return ret; 228 229 return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, 230 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 231 pmf->release_mem_size / sizeof(uint32_t)); 232 } 233 234 static void deallocate_vmid(struct device_queue_manager *dqm, 235 struct qcm_process_device *qpd, 236 struct queue *q) 237 { 238 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 239 240 /* On GFX v7, CP doesn't flush TC at dequeue */ 241 if (q->device->device_info->asic_family == CHIP_HAWAII) 242 if (flush_texture_cache_nocpsch(q->device, qpd)) 243 pr_err("Failed to flush TC\n"); 244 245 kfd_flush_tlb(qpd_to_pdd(qpd)); 246 247 /* Release the vmid mapping */ 248 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 249 250 dqm->vmid_bitmap |= (1 << bit); 251 qpd->vmid = 0; 252 q->properties.vmid = 0; 253 } 254 255 static int create_queue_nocpsch(struct device_queue_manager *dqm, 256 struct queue *q, 257 struct qcm_process_device *qpd) 258 { 259 int retval; 260 261 print_queue(q); 262 263 dqm_lock(dqm); 264 265 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 266 pr_warn("Can't create new usermode queue because %d queues were already created\n", 267 dqm->total_queue_count); 268 retval = -EPERM; 269 goto out_unlock; 270 } 271 272 if (list_empty(&qpd->queues_list)) { 273 retval = allocate_vmid(dqm, qpd, q); 274 if (retval) 275 goto out_unlock; 276 } 277 q->properties.vmid = qpd->vmid; 278 /* 279 * Eviction state logic: we only mark active queues as evicted 280 * to avoid the overhead of restoring inactive queues later 281 */ 282 if (qpd->evicted) 283 q->properties.is_evicted = (q->properties.queue_size > 0 && 284 q->properties.queue_percent > 0 && 285 q->properties.queue_address != 0); 286 287 q->properties.tba_addr = qpd->tba_addr; 288 q->properties.tma_addr = qpd->tma_addr; 289 290 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 291 retval = create_compute_queue_nocpsch(dqm, q, qpd); 292 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 293 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 294 else 295 retval = -EINVAL; 296 297 if (retval) { 298 if (list_empty(&qpd->queues_list)) 299 deallocate_vmid(dqm, qpd, q); 300 goto out_unlock; 301 } 302 303 list_add(&q->list, &qpd->queues_list); 304 qpd->queue_count++; 305 if (q->properties.is_active) 306 dqm->queue_count++; 307 308 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 309 dqm->sdma_queue_count++; 310 311 /* 312 * Unconditionally increment this counter, regardless of the queue's 313 * type or whether the queue is active. 314 */ 315 dqm->total_queue_count++; 316 pr_debug("Total of %d queues are accountable so far\n", 317 dqm->total_queue_count); 318 319 out_unlock: 320 dqm_unlock(dqm); 321 return retval; 322 } 323 324 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 325 { 326 bool set; 327 int pipe, bit, i; 328 329 set = false; 330 331 for (pipe = dqm->next_pipe_to_allocate, i = 0; 332 i < get_pipes_per_mec(dqm); 333 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 334 335 if (!is_pipe_enabled(dqm, 0, pipe)) 336 continue; 337 338 if (dqm->allocated_queues[pipe] != 0) { 339 bit = ffs(dqm->allocated_queues[pipe]) - 1; 340 dqm->allocated_queues[pipe] &= ~(1 << bit); 341 q->pipe = pipe; 342 q->queue = bit; 343 set = true; 344 break; 345 } 346 } 347 348 if (!set) 349 return -EBUSY; 350 351 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 352 /* horizontal hqd allocation */ 353 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 354 355 return 0; 356 } 357 358 static inline void deallocate_hqd(struct device_queue_manager *dqm, 359 struct queue *q) 360 { 361 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 362 } 363 364 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 365 struct queue *q, 366 struct qcm_process_device *qpd) 367 { 368 struct mqd_manager *mqd_mgr; 369 int retval; 370 371 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 372 if (!mqd_mgr) 373 return -ENOMEM; 374 375 retval = allocate_hqd(dqm, q); 376 if (retval) 377 return retval; 378 379 retval = allocate_doorbell(qpd, q); 380 if (retval) 381 goto out_deallocate_hqd; 382 383 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 384 &q->gart_mqd_addr, &q->properties); 385 if (retval) 386 goto out_deallocate_doorbell; 387 388 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 389 q->pipe, q->queue); 390 391 dqm->dev->kfd2kgd->set_scratch_backing_va( 392 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 393 394 if (!q->properties.is_active) 395 return 0; 396 397 if (WARN(q->process->mm != current->mm, 398 "should only run in user thread")) 399 retval = -EFAULT; 400 else 401 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 402 &q->properties, current->mm); 403 if (retval) 404 goto out_uninit_mqd; 405 406 return 0; 407 408 out_uninit_mqd: 409 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 410 out_deallocate_doorbell: 411 deallocate_doorbell(qpd, q); 412 out_deallocate_hqd: 413 deallocate_hqd(dqm, q); 414 415 return retval; 416 } 417 418 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 419 * to avoid asynchronized access 420 */ 421 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 422 struct qcm_process_device *qpd, 423 struct queue *q) 424 { 425 int retval; 426 struct mqd_manager *mqd_mgr; 427 428 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 429 get_mqd_type_from_queue_type(q->properties.type)); 430 if (!mqd_mgr) 431 return -ENOMEM; 432 433 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 434 deallocate_hqd(dqm, q); 435 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 436 dqm->sdma_queue_count--; 437 deallocate_sdma_queue(dqm, q->sdma_id); 438 } else { 439 pr_debug("q->properties.type %d is invalid\n", 440 q->properties.type); 441 return -EINVAL; 442 } 443 dqm->total_queue_count--; 444 445 deallocate_doorbell(qpd, q); 446 447 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 448 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 449 KFD_UNMAP_LATENCY_MS, 450 q->pipe, q->queue); 451 if (retval == -ETIME) 452 qpd->reset_wavefronts = true; 453 454 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 455 456 list_del(&q->list); 457 if (list_empty(&qpd->queues_list)) { 458 if (qpd->reset_wavefronts) { 459 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 460 dqm->dev); 461 /* dbgdev_wave_reset_wavefronts has to be called before 462 * deallocate_vmid(), i.e. when vmid is still in use. 463 */ 464 dbgdev_wave_reset_wavefronts(dqm->dev, 465 qpd->pqm->process); 466 qpd->reset_wavefronts = false; 467 } 468 469 deallocate_vmid(dqm, qpd, q); 470 } 471 qpd->queue_count--; 472 if (q->properties.is_active) 473 dqm->queue_count--; 474 475 return retval; 476 } 477 478 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 479 struct qcm_process_device *qpd, 480 struct queue *q) 481 { 482 int retval; 483 484 dqm_lock(dqm); 485 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 486 dqm_unlock(dqm); 487 488 return retval; 489 } 490 491 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 492 { 493 int retval; 494 struct mqd_manager *mqd_mgr; 495 struct kfd_process_device *pdd; 496 bool prev_active = false; 497 498 dqm_lock(dqm); 499 pdd = kfd_get_process_device_data(q->device, q->process); 500 if (!pdd) { 501 retval = -ENODEV; 502 goto out_unlock; 503 } 504 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 505 get_mqd_type_from_queue_type(q->properties.type)); 506 if (!mqd_mgr) { 507 retval = -ENOMEM; 508 goto out_unlock; 509 } 510 /* 511 * Eviction state logic: we only mark active queues as evicted 512 * to avoid the overhead of restoring inactive queues later 513 */ 514 if (pdd->qpd.evicted) 515 q->properties.is_evicted = (q->properties.queue_size > 0 && 516 q->properties.queue_percent > 0 && 517 q->properties.queue_address != 0); 518 519 /* Save previous activity state for counters */ 520 prev_active = q->properties.is_active; 521 522 /* Make sure the queue is unmapped before updating the MQD */ 523 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 524 retval = unmap_queues_cpsch(dqm, 525 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 526 if (retval) { 527 pr_err("unmap queue failed\n"); 528 goto out_unlock; 529 } 530 } else if (prev_active && 531 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 532 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 533 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 534 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 535 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 536 if (retval) { 537 pr_err("destroy mqd failed\n"); 538 goto out_unlock; 539 } 540 } 541 542 retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); 543 544 /* 545 * check active state vs. the previous state and modify 546 * counter accordingly. map_queues_cpsch uses the 547 * dqm->queue_count to determine whether a new runlist must be 548 * uploaded. 549 */ 550 if (q->properties.is_active && !prev_active) 551 dqm->queue_count++; 552 else if (!q->properties.is_active && prev_active) 553 dqm->queue_count--; 554 555 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 556 retval = map_queues_cpsch(dqm); 557 else if (q->properties.is_active && 558 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 559 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 560 if (WARN(q->process->mm != current->mm, 561 "should only run in user thread")) 562 retval = -EFAULT; 563 else 564 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 565 q->pipe, q->queue, 566 &q->properties, current->mm); 567 } 568 569 out_unlock: 570 dqm_unlock(dqm); 571 return retval; 572 } 573 574 static struct mqd_manager *get_mqd_manager( 575 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 576 { 577 struct mqd_manager *mqd_mgr; 578 579 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 580 return NULL; 581 582 pr_debug("mqd type %d\n", type); 583 584 mqd_mgr = dqm->mqd_mgrs[type]; 585 if (!mqd_mgr) { 586 mqd_mgr = mqd_manager_init(type, dqm->dev); 587 if (!mqd_mgr) 588 pr_err("mqd manager is NULL"); 589 dqm->mqd_mgrs[type] = mqd_mgr; 590 } 591 592 return mqd_mgr; 593 } 594 595 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 596 struct qcm_process_device *qpd) 597 { 598 struct queue *q; 599 struct mqd_manager *mqd_mgr; 600 struct kfd_process_device *pdd; 601 int retval = 0; 602 603 dqm_lock(dqm); 604 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 605 goto out; 606 607 pdd = qpd_to_pdd(qpd); 608 pr_info_ratelimited("Evicting PASID %u queues\n", 609 pdd->process->pasid); 610 611 /* unactivate all active queues on the qpd */ 612 list_for_each_entry(q, &qpd->queues_list, list) { 613 if (!q->properties.is_active) 614 continue; 615 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 616 get_mqd_type_from_queue_type(q->properties.type)); 617 if (!mqd_mgr) { /* should not be here */ 618 pr_err("Cannot evict queue, mqd mgr is NULL\n"); 619 retval = -ENOMEM; 620 goto out; 621 } 622 q->properties.is_evicted = true; 623 q->properties.is_active = false; 624 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 625 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 626 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 627 if (retval) 628 goto out; 629 dqm->queue_count--; 630 } 631 632 out: 633 dqm_unlock(dqm); 634 return retval; 635 } 636 637 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 638 struct qcm_process_device *qpd) 639 { 640 struct queue *q; 641 struct kfd_process_device *pdd; 642 int retval = 0; 643 644 dqm_lock(dqm); 645 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 646 goto out; 647 648 pdd = qpd_to_pdd(qpd); 649 pr_info_ratelimited("Evicting PASID %u queues\n", 650 pdd->process->pasid); 651 652 /* unactivate all active queues on the qpd */ 653 list_for_each_entry(q, &qpd->queues_list, list) { 654 if (!q->properties.is_active) 655 continue; 656 q->properties.is_evicted = true; 657 q->properties.is_active = false; 658 dqm->queue_count--; 659 } 660 retval = execute_queues_cpsch(dqm, 661 qpd->is_debug ? 662 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 663 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 664 665 out: 666 dqm_unlock(dqm); 667 return retval; 668 } 669 670 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 671 struct qcm_process_device *qpd) 672 { 673 struct mm_struct *mm = NULL; 674 struct queue *q; 675 struct mqd_manager *mqd_mgr; 676 struct kfd_process_device *pdd; 677 uint64_t pd_base; 678 int retval = 0; 679 680 pdd = qpd_to_pdd(qpd); 681 /* Retrieve PD base */ 682 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 683 684 dqm_lock(dqm); 685 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 686 goto out; 687 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 688 qpd->evicted--; 689 goto out; 690 } 691 692 pr_info_ratelimited("Restoring PASID %u queues\n", 693 pdd->process->pasid); 694 695 /* Update PD Base in QPD */ 696 qpd->page_table_base = pd_base; 697 pr_debug("Updated PD address to 0x%llx\n", pd_base); 698 699 if (!list_empty(&qpd->queues_list)) { 700 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 701 dqm->dev->kgd, 702 qpd->vmid, 703 qpd->page_table_base); 704 kfd_flush_tlb(pdd); 705 } 706 707 /* Take a safe reference to the mm_struct, which may otherwise 708 * disappear even while the kfd_process is still referenced. 709 */ 710 mm = get_task_mm(pdd->process->lead_thread); 711 if (!mm) { 712 retval = -EFAULT; 713 goto out; 714 } 715 716 /* activate all active queues on the qpd */ 717 list_for_each_entry(q, &qpd->queues_list, list) { 718 if (!q->properties.is_evicted) 719 continue; 720 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 721 get_mqd_type_from_queue_type(q->properties.type)); 722 if (!mqd_mgr) { /* should not be here */ 723 pr_err("Cannot restore queue, mqd mgr is NULL\n"); 724 retval = -ENOMEM; 725 goto out; 726 } 727 q->properties.is_evicted = false; 728 q->properties.is_active = true; 729 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 730 q->queue, &q->properties, mm); 731 if (retval) 732 goto out; 733 dqm->queue_count++; 734 } 735 qpd->evicted = 0; 736 out: 737 if (mm) 738 mmput(mm); 739 dqm_unlock(dqm); 740 return retval; 741 } 742 743 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 744 struct qcm_process_device *qpd) 745 { 746 struct queue *q; 747 struct kfd_process_device *pdd; 748 uint64_t pd_base; 749 int retval = 0; 750 751 pdd = qpd_to_pdd(qpd); 752 /* Retrieve PD base */ 753 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 754 755 dqm_lock(dqm); 756 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 757 goto out; 758 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 759 qpd->evicted--; 760 goto out; 761 } 762 763 pr_info_ratelimited("Restoring PASID %u queues\n", 764 pdd->process->pasid); 765 766 /* Update PD Base in QPD */ 767 qpd->page_table_base = pd_base; 768 pr_debug("Updated PD address to 0x%llx\n", pd_base); 769 770 /* activate all active queues on the qpd */ 771 list_for_each_entry(q, &qpd->queues_list, list) { 772 if (!q->properties.is_evicted) 773 continue; 774 q->properties.is_evicted = false; 775 q->properties.is_active = true; 776 dqm->queue_count++; 777 } 778 retval = execute_queues_cpsch(dqm, 779 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 780 if (!retval) 781 qpd->evicted = 0; 782 out: 783 dqm_unlock(dqm); 784 return retval; 785 } 786 787 static int register_process(struct device_queue_manager *dqm, 788 struct qcm_process_device *qpd) 789 { 790 struct device_process_node *n; 791 struct kfd_process_device *pdd; 792 uint64_t pd_base; 793 int retval; 794 795 n = kzalloc(sizeof(*n), GFP_KERNEL); 796 if (!n) 797 return -ENOMEM; 798 799 n->qpd = qpd; 800 801 pdd = qpd_to_pdd(qpd); 802 /* Retrieve PD base */ 803 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 804 805 dqm_lock(dqm); 806 list_add(&n->list, &dqm->queues); 807 808 /* Update PD Base in QPD */ 809 qpd->page_table_base = pd_base; 810 pr_debug("Updated PD address to 0x%llx\n", pd_base); 811 812 retval = dqm->asic_ops.update_qpd(dqm, qpd); 813 814 dqm->processes_count++; 815 kfd_inc_compute_active(dqm->dev); 816 817 dqm_unlock(dqm); 818 819 return retval; 820 } 821 822 static int unregister_process(struct device_queue_manager *dqm, 823 struct qcm_process_device *qpd) 824 { 825 int retval; 826 struct device_process_node *cur, *next; 827 828 pr_debug("qpd->queues_list is %s\n", 829 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 830 831 retval = 0; 832 dqm_lock(dqm); 833 834 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 835 if (qpd == cur->qpd) { 836 list_del(&cur->list); 837 kfree(cur); 838 dqm->processes_count--; 839 kfd_dec_compute_active(dqm->dev); 840 goto out; 841 } 842 } 843 /* qpd not found in dqm list */ 844 retval = 1; 845 out: 846 dqm_unlock(dqm); 847 return retval; 848 } 849 850 static int 851 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 852 unsigned int vmid) 853 { 854 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 855 dqm->dev->kgd, pasid, vmid); 856 } 857 858 static void init_interrupts(struct device_queue_manager *dqm) 859 { 860 unsigned int i; 861 862 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 863 if (is_pipe_enabled(dqm, 0, i)) 864 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 865 } 866 867 static int initialize_nocpsch(struct device_queue_manager *dqm) 868 { 869 int pipe, queue; 870 871 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 872 873 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 874 sizeof(unsigned int), GFP_KERNEL); 875 if (!dqm->allocated_queues) 876 return -ENOMEM; 877 878 mutex_init(&dqm->lock_hidden); 879 INIT_LIST_HEAD(&dqm->queues); 880 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 881 dqm->sdma_queue_count = 0; 882 883 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 884 int pipe_offset = pipe * get_queues_per_pipe(dqm); 885 886 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 887 if (test_bit(pipe_offset + queue, 888 dqm->dev->shared_resources.queue_bitmap)) 889 dqm->allocated_queues[pipe] |= 1 << queue; 890 } 891 892 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 893 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 894 895 return 0; 896 } 897 898 static void uninitialize(struct device_queue_manager *dqm) 899 { 900 int i; 901 902 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 903 904 kfree(dqm->allocated_queues); 905 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 906 kfree(dqm->mqd_mgrs[i]); 907 mutex_destroy(&dqm->lock_hidden); 908 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 909 } 910 911 static int start_nocpsch(struct device_queue_manager *dqm) 912 { 913 init_interrupts(dqm); 914 return pm_init(&dqm->packets, dqm); 915 } 916 917 static int stop_nocpsch(struct device_queue_manager *dqm) 918 { 919 pm_uninit(&dqm->packets); 920 return 0; 921 } 922 923 static int allocate_sdma_queue(struct device_queue_manager *dqm, 924 unsigned int *sdma_queue_id) 925 { 926 int bit; 927 928 if (dqm->sdma_bitmap == 0) 929 return -ENOMEM; 930 931 bit = ffs(dqm->sdma_bitmap) - 1; 932 dqm->sdma_bitmap &= ~(1 << bit); 933 *sdma_queue_id = bit; 934 935 return 0; 936 } 937 938 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 939 unsigned int sdma_queue_id) 940 { 941 if (sdma_queue_id >= get_num_sdma_queues(dqm)) 942 return; 943 dqm->sdma_bitmap |= (1 << sdma_queue_id); 944 } 945 946 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 947 struct queue *q, 948 struct qcm_process_device *qpd) 949 { 950 struct mqd_manager *mqd_mgr; 951 int retval; 952 953 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 954 if (!mqd_mgr) 955 return -ENOMEM; 956 957 retval = allocate_sdma_queue(dqm, &q->sdma_id); 958 if (retval) 959 return retval; 960 961 q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); 962 q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); 963 964 retval = allocate_doorbell(qpd, q); 965 if (retval) 966 goto out_deallocate_sdma_queue; 967 968 pr_debug("SDMA id is: %d\n", q->sdma_id); 969 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 970 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 971 972 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 973 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 974 &q->gart_mqd_addr, &q->properties); 975 if (retval) 976 goto out_deallocate_doorbell; 977 978 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, 979 NULL); 980 if (retval) 981 goto out_uninit_mqd; 982 983 return 0; 984 985 out_uninit_mqd: 986 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 987 out_deallocate_doorbell: 988 deallocate_doorbell(qpd, q); 989 out_deallocate_sdma_queue: 990 deallocate_sdma_queue(dqm, q->sdma_id); 991 992 return retval; 993 } 994 995 /* 996 * Device Queue Manager implementation for cp scheduler 997 */ 998 999 static int set_sched_resources(struct device_queue_manager *dqm) 1000 { 1001 int i, mec; 1002 struct scheduling_resources res; 1003 1004 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1005 1006 res.queue_mask = 0; 1007 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1008 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1009 / dqm->dev->shared_resources.num_pipe_per_mec; 1010 1011 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 1012 continue; 1013 1014 /* only acquire queues from the first MEC */ 1015 if (mec > 0) 1016 continue; 1017 1018 /* This situation may be hit in the future if a new HW 1019 * generation exposes more than 64 queues. If so, the 1020 * definition of res.queue_mask needs updating 1021 */ 1022 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1023 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1024 break; 1025 } 1026 1027 res.queue_mask |= (1ull << i); 1028 } 1029 res.gws_mask = res.oac_mask = res.gds_heap_base = 1030 res.gds_heap_size = 0; 1031 1032 pr_debug("Scheduling resources:\n" 1033 "vmid mask: 0x%8X\n" 1034 "queue mask: 0x%8llX\n", 1035 res.vmid_mask, res.queue_mask); 1036 1037 return pm_send_set_resources(&dqm->packets, &res); 1038 } 1039 1040 static int initialize_cpsch(struct device_queue_manager *dqm) 1041 { 1042 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1043 1044 mutex_init(&dqm->lock_hidden); 1045 INIT_LIST_HEAD(&dqm->queues); 1046 dqm->queue_count = dqm->processes_count = 0; 1047 dqm->sdma_queue_count = 0; 1048 dqm->active_runlist = false; 1049 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 1050 1051 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1052 1053 return 0; 1054 } 1055 1056 static int start_cpsch(struct device_queue_manager *dqm) 1057 { 1058 int retval; 1059 1060 retval = 0; 1061 1062 retval = pm_init(&dqm->packets, dqm); 1063 if (retval) 1064 goto fail_packet_manager_init; 1065 1066 retval = set_sched_resources(dqm); 1067 if (retval) 1068 goto fail_set_sched_resources; 1069 1070 pr_debug("Allocating fence memory\n"); 1071 1072 /* allocate fence memory on the gart */ 1073 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1074 &dqm->fence_mem); 1075 1076 if (retval) 1077 goto fail_allocate_vidmem; 1078 1079 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 1080 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1081 1082 init_interrupts(dqm); 1083 1084 dqm_lock(dqm); 1085 /* clear hang status when driver try to start the hw scheduler */ 1086 dqm->is_hws_hang = false; 1087 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1088 dqm_unlock(dqm); 1089 1090 return 0; 1091 fail_allocate_vidmem: 1092 fail_set_sched_resources: 1093 pm_uninit(&dqm->packets); 1094 fail_packet_manager_init: 1095 return retval; 1096 } 1097 1098 static int stop_cpsch(struct device_queue_manager *dqm) 1099 { 1100 dqm_lock(dqm); 1101 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1102 dqm_unlock(dqm); 1103 1104 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1105 pm_uninit(&dqm->packets); 1106 1107 return 0; 1108 } 1109 1110 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1111 struct kernel_queue *kq, 1112 struct qcm_process_device *qpd) 1113 { 1114 dqm_lock(dqm); 1115 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1116 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1117 dqm->total_queue_count); 1118 dqm_unlock(dqm); 1119 return -EPERM; 1120 } 1121 1122 /* 1123 * Unconditionally increment this counter, regardless of the queue's 1124 * type or whether the queue is active. 1125 */ 1126 dqm->total_queue_count++; 1127 pr_debug("Total of %d queues are accountable so far\n", 1128 dqm->total_queue_count); 1129 1130 list_add(&kq->list, &qpd->priv_queue_list); 1131 dqm->queue_count++; 1132 qpd->is_debug = true; 1133 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1134 dqm_unlock(dqm); 1135 1136 return 0; 1137 } 1138 1139 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1140 struct kernel_queue *kq, 1141 struct qcm_process_device *qpd) 1142 { 1143 dqm_lock(dqm); 1144 list_del(&kq->list); 1145 dqm->queue_count--; 1146 qpd->is_debug = false; 1147 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1148 /* 1149 * Unconditionally decrement this counter, regardless of the queue's 1150 * type. 1151 */ 1152 dqm->total_queue_count--; 1153 pr_debug("Total of %d queues are accountable so far\n", 1154 dqm->total_queue_count); 1155 dqm_unlock(dqm); 1156 } 1157 1158 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1159 struct qcm_process_device *qpd) 1160 { 1161 int retval; 1162 struct mqd_manager *mqd_mgr; 1163 1164 retval = 0; 1165 1166 dqm_lock(dqm); 1167 1168 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1169 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1170 dqm->total_queue_count); 1171 retval = -EPERM; 1172 goto out_unlock; 1173 } 1174 1175 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1176 retval = allocate_sdma_queue(dqm, &q->sdma_id); 1177 if (retval) 1178 goto out_unlock; 1179 q->properties.sdma_queue_id = 1180 q->sdma_id / get_num_sdma_engines(dqm); 1181 q->properties.sdma_engine_id = 1182 q->sdma_id % get_num_sdma_engines(dqm); 1183 } 1184 1185 retval = allocate_doorbell(qpd, q); 1186 if (retval) 1187 goto out_deallocate_sdma_queue; 1188 1189 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1190 get_mqd_type_from_queue_type(q->properties.type)); 1191 1192 if (!mqd_mgr) { 1193 retval = -ENOMEM; 1194 goto out_deallocate_doorbell; 1195 } 1196 /* 1197 * Eviction state logic: we only mark active queues as evicted 1198 * to avoid the overhead of restoring inactive queues later 1199 */ 1200 if (qpd->evicted) 1201 q->properties.is_evicted = (q->properties.queue_size > 0 && 1202 q->properties.queue_percent > 0 && 1203 q->properties.queue_address != 0); 1204 1205 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1206 1207 q->properties.tba_addr = qpd->tba_addr; 1208 q->properties.tma_addr = qpd->tma_addr; 1209 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 1210 &q->gart_mqd_addr, &q->properties); 1211 if (retval) 1212 goto out_deallocate_doorbell; 1213 1214 list_add(&q->list, &qpd->queues_list); 1215 qpd->queue_count++; 1216 if (q->properties.is_active) { 1217 dqm->queue_count++; 1218 retval = execute_queues_cpsch(dqm, 1219 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1220 } 1221 1222 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1223 dqm->sdma_queue_count++; 1224 /* 1225 * Unconditionally increment this counter, regardless of the queue's 1226 * type or whether the queue is active. 1227 */ 1228 dqm->total_queue_count++; 1229 1230 pr_debug("Total of %d queues are accountable so far\n", 1231 dqm->total_queue_count); 1232 1233 dqm_unlock(dqm); 1234 return retval; 1235 1236 out_deallocate_doorbell: 1237 deallocate_doorbell(qpd, q); 1238 out_deallocate_sdma_queue: 1239 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1240 deallocate_sdma_queue(dqm, q->sdma_id); 1241 out_unlock: 1242 dqm_unlock(dqm); 1243 1244 return retval; 1245 } 1246 1247 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 1248 unsigned int fence_value, 1249 unsigned int timeout_ms) 1250 { 1251 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1252 1253 while (*fence_addr != fence_value) { 1254 if (time_after(jiffies, end_jiffies)) { 1255 pr_err("qcm fence wait loop timeout expired\n"); 1256 /* In HWS case, this is used to halt the driver thread 1257 * in order not to mess up CP states before doing 1258 * scandumps for FW debugging. 1259 */ 1260 while (halt_if_hws_hang) 1261 schedule(); 1262 1263 return -ETIME; 1264 } 1265 schedule(); 1266 } 1267 1268 return 0; 1269 } 1270 1271 static int unmap_sdma_queues(struct device_queue_manager *dqm, 1272 unsigned int sdma_engine) 1273 { 1274 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1275 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 1276 sdma_engine); 1277 } 1278 1279 /* dqm->lock mutex has to be locked before calling this function */ 1280 static int map_queues_cpsch(struct device_queue_manager *dqm) 1281 { 1282 int retval; 1283 1284 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 1285 return 0; 1286 1287 if (dqm->active_runlist) 1288 return 0; 1289 1290 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1291 if (retval) { 1292 pr_err("failed to execute runlist\n"); 1293 return retval; 1294 } 1295 dqm->active_runlist = true; 1296 1297 return retval; 1298 } 1299 1300 /* dqm->lock mutex has to be locked before calling this function */ 1301 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1302 enum kfd_unmap_queues_filter filter, 1303 uint32_t filter_param) 1304 { 1305 int retval = 0; 1306 1307 if (dqm->is_hws_hang) 1308 return -EIO; 1309 if (!dqm->active_runlist) 1310 return retval; 1311 1312 pr_debug("Before destroying queues, sdma queue count is : %u\n", 1313 dqm->sdma_queue_count); 1314 1315 if (dqm->sdma_queue_count > 0) { 1316 unmap_sdma_queues(dqm, 0); 1317 unmap_sdma_queues(dqm, 1); 1318 } 1319 1320 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1321 filter, filter_param, false, 0); 1322 if (retval) 1323 return retval; 1324 1325 *dqm->fence_addr = KFD_FENCE_INIT; 1326 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 1327 KFD_FENCE_COMPLETED); 1328 /* should be timed out */ 1329 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1330 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 1331 if (retval) 1332 return retval; 1333 1334 pm_release_ib(&dqm->packets); 1335 dqm->active_runlist = false; 1336 1337 return retval; 1338 } 1339 1340 /* dqm->lock mutex has to be locked before calling this function */ 1341 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1342 enum kfd_unmap_queues_filter filter, 1343 uint32_t filter_param) 1344 { 1345 int retval; 1346 1347 if (dqm->is_hws_hang) 1348 return -EIO; 1349 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1350 if (retval) { 1351 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1352 dqm->is_hws_hang = true; 1353 schedule_work(&dqm->hw_exception_work); 1354 return retval; 1355 } 1356 1357 return map_queues_cpsch(dqm); 1358 } 1359 1360 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1361 struct qcm_process_device *qpd, 1362 struct queue *q) 1363 { 1364 int retval; 1365 struct mqd_manager *mqd_mgr; 1366 1367 retval = 0; 1368 1369 /* remove queue from list to prevent rescheduling after preemption */ 1370 dqm_lock(dqm); 1371 1372 if (qpd->is_debug) { 1373 /* 1374 * error, currently we do not allow to destroy a queue 1375 * of a currently debugged process 1376 */ 1377 retval = -EBUSY; 1378 goto failed_try_destroy_debugged_queue; 1379 1380 } 1381 1382 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1383 get_mqd_type_from_queue_type(q->properties.type)); 1384 if (!mqd_mgr) { 1385 retval = -ENOMEM; 1386 goto failed; 1387 } 1388 1389 deallocate_doorbell(qpd, q); 1390 1391 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1392 dqm->sdma_queue_count--; 1393 deallocate_sdma_queue(dqm, q->sdma_id); 1394 } 1395 1396 list_del(&q->list); 1397 qpd->queue_count--; 1398 if (q->properties.is_active) { 1399 dqm->queue_count--; 1400 retval = execute_queues_cpsch(dqm, 1401 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1402 if (retval == -ETIME) 1403 qpd->reset_wavefronts = true; 1404 } 1405 1406 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1407 1408 /* 1409 * Unconditionally decrement this counter, regardless of the queue's 1410 * type 1411 */ 1412 dqm->total_queue_count--; 1413 pr_debug("Total of %d queues are accountable so far\n", 1414 dqm->total_queue_count); 1415 1416 dqm_unlock(dqm); 1417 1418 return retval; 1419 1420 failed: 1421 failed_try_destroy_debugged_queue: 1422 1423 dqm_unlock(dqm); 1424 return retval; 1425 } 1426 1427 /* 1428 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1429 * stay in user mode. 1430 */ 1431 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1432 /* APE1 limit is inclusive and 64K aligned. */ 1433 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1434 1435 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1436 struct qcm_process_device *qpd, 1437 enum cache_policy default_policy, 1438 enum cache_policy alternate_policy, 1439 void __user *alternate_aperture_base, 1440 uint64_t alternate_aperture_size) 1441 { 1442 bool retval = true; 1443 1444 if (!dqm->asic_ops.set_cache_memory_policy) 1445 return retval; 1446 1447 dqm_lock(dqm); 1448 1449 if (alternate_aperture_size == 0) { 1450 /* base > limit disables APE1 */ 1451 qpd->sh_mem_ape1_base = 1; 1452 qpd->sh_mem_ape1_limit = 0; 1453 } else { 1454 /* 1455 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1456 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1457 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1458 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1459 * Verify that the base and size parameters can be 1460 * represented in this format and convert them. 1461 * Additionally restrict APE1 to user-mode addresses. 1462 */ 1463 1464 uint64_t base = (uintptr_t)alternate_aperture_base; 1465 uint64_t limit = base + alternate_aperture_size - 1; 1466 1467 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1468 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1469 retval = false; 1470 goto out; 1471 } 1472 1473 qpd->sh_mem_ape1_base = base >> 16; 1474 qpd->sh_mem_ape1_limit = limit >> 16; 1475 } 1476 1477 retval = dqm->asic_ops.set_cache_memory_policy( 1478 dqm, 1479 qpd, 1480 default_policy, 1481 alternate_policy, 1482 alternate_aperture_base, 1483 alternate_aperture_size); 1484 1485 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1486 program_sh_mem_settings(dqm, qpd); 1487 1488 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1489 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1490 qpd->sh_mem_ape1_limit); 1491 1492 out: 1493 dqm_unlock(dqm); 1494 return retval; 1495 } 1496 1497 static int set_trap_handler(struct device_queue_manager *dqm, 1498 struct qcm_process_device *qpd, 1499 uint64_t tba_addr, 1500 uint64_t tma_addr) 1501 { 1502 uint64_t *tma; 1503 1504 if (dqm->dev->cwsr_enabled) { 1505 /* Jump from CWSR trap handler to user trap */ 1506 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); 1507 tma[0] = tba_addr; 1508 tma[1] = tma_addr; 1509 } else { 1510 qpd->tba_addr = tba_addr; 1511 qpd->tma_addr = tma_addr; 1512 } 1513 1514 return 0; 1515 } 1516 1517 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1518 struct qcm_process_device *qpd) 1519 { 1520 struct queue *q, *next; 1521 struct device_process_node *cur, *next_dpn; 1522 int retval = 0; 1523 1524 dqm_lock(dqm); 1525 1526 /* Clear all user mode queues */ 1527 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1528 int ret; 1529 1530 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1531 if (ret) 1532 retval = ret; 1533 } 1534 1535 /* Unregister process */ 1536 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1537 if (qpd == cur->qpd) { 1538 list_del(&cur->list); 1539 kfree(cur); 1540 dqm->processes_count--; 1541 kfd_dec_compute_active(dqm->dev); 1542 break; 1543 } 1544 } 1545 1546 dqm_unlock(dqm); 1547 return retval; 1548 } 1549 1550 static int get_wave_state(struct device_queue_manager *dqm, 1551 struct queue *q, 1552 void __user *ctl_stack, 1553 u32 *ctl_stack_used_size, 1554 u32 *save_area_used_size) 1555 { 1556 struct mqd_manager *mqd_mgr; 1557 int r; 1558 1559 dqm_lock(dqm); 1560 1561 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 1562 q->properties.is_active || !q->device->cwsr_enabled) { 1563 r = -EINVAL; 1564 goto dqm_unlock; 1565 } 1566 1567 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 1568 if (!mqd_mgr) { 1569 r = -ENOMEM; 1570 goto dqm_unlock; 1571 } 1572 1573 if (!mqd_mgr->get_wave_state) { 1574 r = -EINVAL; 1575 goto dqm_unlock; 1576 } 1577 1578 r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 1579 ctl_stack_used_size, save_area_used_size); 1580 1581 dqm_unlock: 1582 dqm_unlock(dqm); 1583 return r; 1584 } 1585 1586 static int process_termination_cpsch(struct device_queue_manager *dqm, 1587 struct qcm_process_device *qpd) 1588 { 1589 int retval; 1590 struct queue *q, *next; 1591 struct kernel_queue *kq, *kq_next; 1592 struct mqd_manager *mqd_mgr; 1593 struct device_process_node *cur, *next_dpn; 1594 enum kfd_unmap_queues_filter filter = 1595 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1596 1597 retval = 0; 1598 1599 dqm_lock(dqm); 1600 1601 /* Clean all kernel queues */ 1602 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1603 list_del(&kq->list); 1604 dqm->queue_count--; 1605 qpd->is_debug = false; 1606 dqm->total_queue_count--; 1607 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1608 } 1609 1610 /* Clear all user mode queues */ 1611 list_for_each_entry(q, &qpd->queues_list, list) { 1612 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1613 dqm->sdma_queue_count--; 1614 deallocate_sdma_queue(dqm, q->sdma_id); 1615 } 1616 1617 if (q->properties.is_active) 1618 dqm->queue_count--; 1619 1620 dqm->total_queue_count--; 1621 } 1622 1623 /* Unregister process */ 1624 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1625 if (qpd == cur->qpd) { 1626 list_del(&cur->list); 1627 kfree(cur); 1628 dqm->processes_count--; 1629 kfd_dec_compute_active(dqm->dev); 1630 break; 1631 } 1632 } 1633 1634 retval = execute_queues_cpsch(dqm, filter, 0); 1635 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1636 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1637 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1638 qpd->reset_wavefronts = false; 1639 } 1640 1641 /* lastly, free mqd resources */ 1642 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1643 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1644 get_mqd_type_from_queue_type(q->properties.type)); 1645 if (!mqd_mgr) { 1646 retval = -ENOMEM; 1647 goto out; 1648 } 1649 list_del(&q->list); 1650 qpd->queue_count--; 1651 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1652 } 1653 1654 out: 1655 dqm_unlock(dqm); 1656 return retval; 1657 } 1658 1659 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1660 { 1661 struct device_queue_manager *dqm; 1662 1663 pr_debug("Loading device queue manager\n"); 1664 1665 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1666 if (!dqm) 1667 return NULL; 1668 1669 switch (dev->device_info->asic_family) { 1670 /* HWS is not available on Hawaii. */ 1671 case CHIP_HAWAII: 1672 /* HWS depends on CWSR for timely dequeue. CWSR is not 1673 * available on Tonga. 1674 * 1675 * FIXME: This argument also applies to Kaveri. 1676 */ 1677 case CHIP_TONGA: 1678 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1679 break; 1680 default: 1681 dqm->sched_policy = sched_policy; 1682 break; 1683 } 1684 1685 dqm->dev = dev; 1686 switch (dqm->sched_policy) { 1687 case KFD_SCHED_POLICY_HWS: 1688 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1689 /* initialize dqm for cp scheduling */ 1690 dqm->ops.create_queue = create_queue_cpsch; 1691 dqm->ops.initialize = initialize_cpsch; 1692 dqm->ops.start = start_cpsch; 1693 dqm->ops.stop = stop_cpsch; 1694 dqm->ops.destroy_queue = destroy_queue_cpsch; 1695 dqm->ops.update_queue = update_queue; 1696 dqm->ops.get_mqd_manager = get_mqd_manager; 1697 dqm->ops.register_process = register_process; 1698 dqm->ops.unregister_process = unregister_process; 1699 dqm->ops.uninitialize = uninitialize; 1700 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1701 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1702 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1703 dqm->ops.set_trap_handler = set_trap_handler; 1704 dqm->ops.process_termination = process_termination_cpsch; 1705 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1706 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1707 dqm->ops.get_wave_state = get_wave_state; 1708 break; 1709 case KFD_SCHED_POLICY_NO_HWS: 1710 /* initialize dqm for no cp scheduling */ 1711 dqm->ops.start = start_nocpsch; 1712 dqm->ops.stop = stop_nocpsch; 1713 dqm->ops.create_queue = create_queue_nocpsch; 1714 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1715 dqm->ops.update_queue = update_queue; 1716 dqm->ops.get_mqd_manager = get_mqd_manager; 1717 dqm->ops.register_process = register_process; 1718 dqm->ops.unregister_process = unregister_process; 1719 dqm->ops.initialize = initialize_nocpsch; 1720 dqm->ops.uninitialize = uninitialize; 1721 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1722 dqm->ops.set_trap_handler = set_trap_handler; 1723 dqm->ops.process_termination = process_termination_nocpsch; 1724 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1725 dqm->ops.restore_process_queues = 1726 restore_process_queues_nocpsch; 1727 dqm->ops.get_wave_state = get_wave_state; 1728 break; 1729 default: 1730 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1731 goto out_free; 1732 } 1733 1734 switch (dev->device_info->asic_family) { 1735 case CHIP_CARRIZO: 1736 device_queue_manager_init_vi(&dqm->asic_ops); 1737 break; 1738 1739 case CHIP_KAVERI: 1740 device_queue_manager_init_cik(&dqm->asic_ops); 1741 break; 1742 1743 case CHIP_HAWAII: 1744 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1745 break; 1746 1747 case CHIP_TONGA: 1748 case CHIP_FIJI: 1749 case CHIP_POLARIS10: 1750 case CHIP_POLARIS11: 1751 case CHIP_POLARIS12: 1752 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1753 break; 1754 1755 case CHIP_VEGA10: 1756 case CHIP_VEGA12: 1757 case CHIP_VEGA20: 1758 case CHIP_RAVEN: 1759 device_queue_manager_init_v9(&dqm->asic_ops); 1760 break; 1761 default: 1762 WARN(1, "Unexpected ASIC family %u", 1763 dev->device_info->asic_family); 1764 goto out_free; 1765 } 1766 1767 if (!dqm->ops.initialize(dqm)) 1768 return dqm; 1769 1770 out_free: 1771 kfree(dqm); 1772 return NULL; 1773 } 1774 1775 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1776 { 1777 dqm->ops.uninitialize(dqm); 1778 kfree(dqm); 1779 } 1780 1781 int kfd_process_vm_fault(struct device_queue_manager *dqm, 1782 unsigned int pasid) 1783 { 1784 struct kfd_process_device *pdd; 1785 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 1786 int ret = 0; 1787 1788 if (!p) 1789 return -EINVAL; 1790 pdd = kfd_get_process_device_data(dqm->dev, p); 1791 if (pdd) 1792 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 1793 kfd_unref_process(p); 1794 1795 return ret; 1796 } 1797 1798 static void kfd_process_hw_exception(struct work_struct *work) 1799 { 1800 struct device_queue_manager *dqm = container_of(work, 1801 struct device_queue_manager, hw_exception_work); 1802 amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); 1803 } 1804 1805 #if defined(CONFIG_DEBUG_FS) 1806 1807 static void seq_reg_dump(struct seq_file *m, 1808 uint32_t (*dump)[2], uint32_t n_regs) 1809 { 1810 uint32_t i, count; 1811 1812 for (i = 0, count = 0; i < n_regs; i++) { 1813 if (count == 0 || 1814 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 1815 seq_printf(m, "%s %08x: %08x", 1816 i ? "\n" : "", 1817 dump[i][0], dump[i][1]); 1818 count = 7; 1819 } else { 1820 seq_printf(m, " %08x", dump[i][1]); 1821 count--; 1822 } 1823 } 1824 1825 seq_puts(m, "\n"); 1826 } 1827 1828 int dqm_debugfs_hqds(struct seq_file *m, void *data) 1829 { 1830 struct device_queue_manager *dqm = data; 1831 uint32_t (*dump)[2], n_regs; 1832 int pipe, queue; 1833 int r = 0; 1834 1835 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, 1836 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); 1837 if (!r) { 1838 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 1839 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 1840 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 1841 KFD_CIK_HIQ_QUEUE); 1842 seq_reg_dump(m, dump, n_regs); 1843 1844 kfree(dump); 1845 } 1846 1847 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1848 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1849 1850 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 1851 if (!test_bit(pipe_offset + queue, 1852 dqm->dev->shared_resources.queue_bitmap)) 1853 continue; 1854 1855 r = dqm->dev->kfd2kgd->hqd_dump( 1856 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1857 if (r) 1858 break; 1859 1860 seq_printf(m, " CP Pipe %d, Queue %d\n", 1861 pipe, queue); 1862 seq_reg_dump(m, dump, n_regs); 1863 1864 kfree(dump); 1865 } 1866 } 1867 1868 for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { 1869 for (queue = 0; 1870 queue < dqm->dev->device_info->num_sdma_queues_per_engine; 1871 queue++) { 1872 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 1873 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1874 if (r) 1875 break; 1876 1877 seq_printf(m, " SDMA Engine %d, RLC %d\n", 1878 pipe, queue); 1879 seq_reg_dump(m, dump, n_regs); 1880 1881 kfree(dump); 1882 } 1883 } 1884 1885 return r; 1886 } 1887 1888 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) 1889 { 1890 int r = 0; 1891 1892 dqm_lock(dqm); 1893 dqm->active_runlist = true; 1894 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1895 dqm_unlock(dqm); 1896 1897 return r; 1898 } 1899 1900 #endif 1901