1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 #include "amdgpu_amdkfd.h" 37 38 /* Size of the per-pipe EOP queue */ 39 #define CIK_HPD_EOP_BYTES_LOG2 11 40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 41 42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 43 u32 pasid, unsigned int vmid); 44 45 static int execute_queues_cpsch(struct device_queue_manager *dqm, 46 enum kfd_unmap_queues_filter filter, 47 uint32_t filter_param); 48 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 49 enum kfd_unmap_queues_filter filter, 50 uint32_t filter_param); 51 52 static int map_queues_cpsch(struct device_queue_manager *dqm); 53 54 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 55 struct queue *q); 56 57 static inline void deallocate_hqd(struct device_queue_manager *dqm, 58 struct queue *q); 59 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 60 static int allocate_sdma_queue(struct device_queue_manager *dqm, 61 struct queue *q); 62 static void kfd_process_hw_exception(struct work_struct *work); 63 64 static inline 65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 66 { 67 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 68 return KFD_MQD_TYPE_SDMA; 69 return KFD_MQD_TYPE_CP; 70 } 71 72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 73 { 74 int i; 75 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec 76 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe; 77 78 /* queue is available for KFD usage if bit is 1 */ 79 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 80 if (test_bit(pipe_offset + i, 81 dqm->dev->shared_resources.cp_queue_bitmap)) 82 return true; 83 return false; 84 } 85 86 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 87 { 88 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, 89 KGD_MAX_QUEUES); 90 } 91 92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 93 { 94 return dqm->dev->shared_resources.num_queue_per_pipe; 95 } 96 97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 98 { 99 return dqm->dev->shared_resources.num_pipe_per_mec; 100 } 101 102 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 103 { 104 return kfd_get_num_sdma_engines(dqm->dev) + 105 kfd_get_num_xgmi_sdma_engines(dqm->dev); 106 } 107 108 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 109 { 110 return kfd_get_num_sdma_engines(dqm->dev) * 111 dqm->dev->device_info.num_sdma_queues_per_engine; 112 } 113 114 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 115 { 116 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 117 dqm->dev->device_info.num_sdma_queues_per_engine; 118 } 119 120 void program_sh_mem_settings(struct device_queue_manager *dqm, 121 struct qcm_process_device *qpd) 122 { 123 return dqm->dev->kfd2kgd->program_sh_mem_settings( 124 dqm->dev->adev, qpd->vmid, 125 qpd->sh_mem_config, 126 qpd->sh_mem_ape1_base, 127 qpd->sh_mem_ape1_limit, 128 qpd->sh_mem_bases); 129 } 130 131 static void increment_queue_count(struct device_queue_manager *dqm, 132 enum kfd_queue_type type) 133 { 134 dqm->active_queue_count++; 135 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) 136 dqm->active_cp_queue_count++; 137 } 138 139 static void decrement_queue_count(struct device_queue_manager *dqm, 140 enum kfd_queue_type type) 141 { 142 dqm->active_queue_count--; 143 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) 144 dqm->active_cp_queue_count--; 145 } 146 147 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) 148 { 149 struct kfd_dev *dev = qpd->dqm->dev; 150 151 if (!KFD_IS_SOC15(dev)) { 152 /* On pre-SOC15 chips we need to use the queue ID to 153 * preserve the user mode ABI. 154 */ 155 q->doorbell_id = q->properties.queue_id; 156 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 157 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 158 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 159 * doorbell assignments based on the engine and queue id. 160 * The doobell index distance between RLC (2*i) and (2*i+1) 161 * for a SDMA engine is 512. 162 */ 163 uint32_t *idx_offset = 164 dev->shared_resources.sdma_doorbell_idx; 165 166 q->doorbell_id = idx_offset[q->properties.sdma_engine_id] 167 + (q->properties.sdma_queue_id & 1) 168 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 169 + (q->properties.sdma_queue_id >> 1); 170 } else { 171 /* For CP queues on SOC15 reserve a free doorbell ID */ 172 unsigned int found; 173 174 found = find_first_zero_bit(qpd->doorbell_bitmap, 175 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 176 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 177 pr_debug("No doorbells available"); 178 return -EBUSY; 179 } 180 set_bit(found, qpd->doorbell_bitmap); 181 q->doorbell_id = found; 182 } 183 184 q->properties.doorbell_off = 185 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), 186 q->doorbell_id); 187 return 0; 188 } 189 190 static void deallocate_doorbell(struct qcm_process_device *qpd, 191 struct queue *q) 192 { 193 unsigned int old; 194 struct kfd_dev *dev = qpd->dqm->dev; 195 196 if (!KFD_IS_SOC15(dev) || 197 q->properties.type == KFD_QUEUE_TYPE_SDMA || 198 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 199 return; 200 201 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 202 WARN_ON(!old); 203 } 204 205 static void program_trap_handler_settings(struct device_queue_manager *dqm, 206 struct qcm_process_device *qpd) 207 { 208 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 209 dqm->dev->kfd2kgd->program_trap_handler_settings( 210 dqm->dev->adev, qpd->vmid, 211 qpd->tba_addr, qpd->tma_addr); 212 } 213 214 static int allocate_vmid(struct device_queue_manager *dqm, 215 struct qcm_process_device *qpd, 216 struct queue *q) 217 { 218 int allocated_vmid = -1, i; 219 220 for (i = dqm->dev->vm_info.first_vmid_kfd; 221 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 222 if (!dqm->vmid_pasid[i]) { 223 allocated_vmid = i; 224 break; 225 } 226 } 227 228 if (allocated_vmid < 0) { 229 pr_err("no more vmid to allocate\n"); 230 return -ENOSPC; 231 } 232 233 pr_debug("vmid allocated: %d\n", allocated_vmid); 234 235 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 236 237 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 238 239 qpd->vmid = allocated_vmid; 240 q->properties.vmid = allocated_vmid; 241 242 program_sh_mem_settings(dqm, qpd); 243 244 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) 245 program_trap_handler_settings(dqm, qpd); 246 247 /* qpd->page_table_base is set earlier when register_process() 248 * is called, i.e. when the first queue is created. 249 */ 250 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 251 qpd->vmid, 252 qpd->page_table_base); 253 /* invalidate the VM context after pasid and vmid mapping is set up */ 254 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 255 256 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 257 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 258 qpd->sh_hidden_private_base, qpd->vmid); 259 260 return 0; 261 } 262 263 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 264 struct qcm_process_device *qpd) 265 { 266 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 267 int ret; 268 269 if (!qpd->ib_kaddr) 270 return -ENOMEM; 271 272 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 273 if (ret) 274 return ret; 275 276 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 277 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 278 pmf->release_mem_size / sizeof(uint32_t)); 279 } 280 281 static void deallocate_vmid(struct device_queue_manager *dqm, 282 struct qcm_process_device *qpd, 283 struct queue *q) 284 { 285 /* On GFX v7, CP doesn't flush TC at dequeue */ 286 if (q->device->adev->asic_type == CHIP_HAWAII) 287 if (flush_texture_cache_nocpsch(q->device, qpd)) 288 pr_err("Failed to flush TC\n"); 289 290 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 291 292 /* Release the vmid mapping */ 293 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 294 dqm->vmid_pasid[qpd->vmid] = 0; 295 296 qpd->vmid = 0; 297 q->properties.vmid = 0; 298 } 299 300 static int create_queue_nocpsch(struct device_queue_manager *dqm, 301 struct queue *q, 302 struct qcm_process_device *qpd) 303 { 304 struct mqd_manager *mqd_mgr; 305 int retval; 306 307 dqm_lock(dqm); 308 309 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 310 pr_warn("Can't create new usermode queue because %d queues were already created\n", 311 dqm->total_queue_count); 312 retval = -EPERM; 313 goto out_unlock; 314 } 315 316 if (list_empty(&qpd->queues_list)) { 317 retval = allocate_vmid(dqm, qpd, q); 318 if (retval) 319 goto out_unlock; 320 } 321 q->properties.vmid = qpd->vmid; 322 /* 323 * Eviction state logic: mark all queues as evicted, even ones 324 * not currently active. Restoring inactive queues later only 325 * updates the is_evicted flag but is a no-op otherwise. 326 */ 327 q->properties.is_evicted = !!qpd->evicted; 328 329 q->properties.tba_addr = qpd->tba_addr; 330 q->properties.tma_addr = qpd->tma_addr; 331 332 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 333 q->properties.type)]; 334 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 335 retval = allocate_hqd(dqm, q); 336 if (retval) 337 goto deallocate_vmid; 338 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 339 q->pipe, q->queue); 340 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 341 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 342 retval = allocate_sdma_queue(dqm, q); 343 if (retval) 344 goto deallocate_vmid; 345 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 346 } 347 348 retval = allocate_doorbell(qpd, q); 349 if (retval) 350 goto out_deallocate_hqd; 351 352 /* Temporarily release dqm lock to avoid a circular lock dependency */ 353 dqm_unlock(dqm); 354 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 355 dqm_lock(dqm); 356 357 if (!q->mqd_mem_obj) { 358 retval = -ENOMEM; 359 goto out_deallocate_doorbell; 360 } 361 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 362 &q->gart_mqd_addr, &q->properties); 363 if (q->properties.is_active) { 364 if (!dqm->sched_running) { 365 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 366 goto add_queue_to_list; 367 } 368 369 if (WARN(q->process->mm != current->mm, 370 "should only run in user thread")) 371 retval = -EFAULT; 372 else 373 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 374 q->queue, &q->properties, current->mm); 375 if (retval) 376 goto out_free_mqd; 377 } 378 379 add_queue_to_list: 380 list_add(&q->list, &qpd->queues_list); 381 qpd->queue_count++; 382 if (q->properties.is_active) 383 increment_queue_count(dqm, q->properties.type); 384 385 /* 386 * Unconditionally increment this counter, regardless of the queue's 387 * type or whether the queue is active. 388 */ 389 dqm->total_queue_count++; 390 pr_debug("Total of %d queues are accountable so far\n", 391 dqm->total_queue_count); 392 goto out_unlock; 393 394 out_free_mqd: 395 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 396 out_deallocate_doorbell: 397 deallocate_doorbell(qpd, q); 398 out_deallocate_hqd: 399 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 400 deallocate_hqd(dqm, q); 401 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 402 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 403 deallocate_sdma_queue(dqm, q); 404 deallocate_vmid: 405 if (list_empty(&qpd->queues_list)) 406 deallocate_vmid(dqm, qpd, q); 407 out_unlock: 408 dqm_unlock(dqm); 409 return retval; 410 } 411 412 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 413 { 414 bool set; 415 int pipe, bit, i; 416 417 set = false; 418 419 for (pipe = dqm->next_pipe_to_allocate, i = 0; 420 i < get_pipes_per_mec(dqm); 421 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 422 423 if (!is_pipe_enabled(dqm, 0, pipe)) 424 continue; 425 426 if (dqm->allocated_queues[pipe] != 0) { 427 bit = ffs(dqm->allocated_queues[pipe]) - 1; 428 dqm->allocated_queues[pipe] &= ~(1 << bit); 429 q->pipe = pipe; 430 q->queue = bit; 431 set = true; 432 break; 433 } 434 } 435 436 if (!set) 437 return -EBUSY; 438 439 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 440 /* horizontal hqd allocation */ 441 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 442 443 return 0; 444 } 445 446 static inline void deallocate_hqd(struct device_queue_manager *dqm, 447 struct queue *q) 448 { 449 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 450 } 451 452 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 453 * to avoid asynchronized access 454 */ 455 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 456 struct qcm_process_device *qpd, 457 struct queue *q) 458 { 459 int retval; 460 struct mqd_manager *mqd_mgr; 461 462 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 463 q->properties.type)]; 464 465 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 466 deallocate_hqd(dqm, q); 467 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 468 deallocate_sdma_queue(dqm, q); 469 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 470 deallocate_sdma_queue(dqm, q); 471 else { 472 pr_debug("q->properties.type %d is invalid\n", 473 q->properties.type); 474 return -EINVAL; 475 } 476 dqm->total_queue_count--; 477 478 deallocate_doorbell(qpd, q); 479 480 if (!dqm->sched_running) { 481 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 482 return 0; 483 } 484 485 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 486 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 487 KFD_UNMAP_LATENCY_MS, 488 q->pipe, q->queue); 489 if (retval == -ETIME) 490 qpd->reset_wavefronts = true; 491 492 list_del(&q->list); 493 if (list_empty(&qpd->queues_list)) { 494 if (qpd->reset_wavefronts) { 495 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 496 dqm->dev); 497 /* dbgdev_wave_reset_wavefronts has to be called before 498 * deallocate_vmid(), i.e. when vmid is still in use. 499 */ 500 dbgdev_wave_reset_wavefronts(dqm->dev, 501 qpd->pqm->process); 502 qpd->reset_wavefronts = false; 503 } 504 505 deallocate_vmid(dqm, qpd, q); 506 } 507 qpd->queue_count--; 508 if (q->properties.is_active) { 509 decrement_queue_count(dqm, q->properties.type); 510 if (q->properties.is_gws) { 511 dqm->gws_queue_count--; 512 qpd->mapped_gws_queue = false; 513 } 514 } 515 516 return retval; 517 } 518 519 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 520 struct qcm_process_device *qpd, 521 struct queue *q) 522 { 523 int retval; 524 uint64_t sdma_val = 0; 525 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 526 struct mqd_manager *mqd_mgr = 527 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 528 529 /* Get the SDMA queue stats */ 530 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 531 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 532 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 533 &sdma_val); 534 if (retval) 535 pr_err("Failed to read SDMA queue counter for queue: %d\n", 536 q->properties.queue_id); 537 } 538 539 dqm_lock(dqm); 540 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 541 if (!retval) 542 pdd->sdma_past_activity_counter += sdma_val; 543 dqm_unlock(dqm); 544 545 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 546 547 return retval; 548 } 549 550 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 551 struct mqd_update_info *minfo) 552 { 553 int retval = 0; 554 struct mqd_manager *mqd_mgr; 555 struct kfd_process_device *pdd; 556 bool prev_active = false; 557 558 dqm_lock(dqm); 559 pdd = kfd_get_process_device_data(q->device, q->process); 560 if (!pdd) { 561 retval = -ENODEV; 562 goto out_unlock; 563 } 564 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 565 q->properties.type)]; 566 567 /* Save previous activity state for counters */ 568 prev_active = q->properties.is_active; 569 570 /* Make sure the queue is unmapped before updating the MQD */ 571 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 572 retval = unmap_queues_cpsch(dqm, 573 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 574 if (retval) { 575 pr_err("unmap queue failed\n"); 576 goto out_unlock; 577 } 578 } else if (prev_active && 579 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 580 q->properties.type == KFD_QUEUE_TYPE_SDMA || 581 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 582 583 if (!dqm->sched_running) { 584 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 585 goto out_unlock; 586 } 587 588 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 589 (dqm->dev->cwsr_enabled? 590 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 591 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 592 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 593 if (retval) { 594 pr_err("destroy mqd failed\n"); 595 goto out_unlock; 596 } 597 } 598 599 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 600 601 /* 602 * check active state vs. the previous state and modify 603 * counter accordingly. map_queues_cpsch uses the 604 * dqm->active_queue_count to determine whether a new runlist must be 605 * uploaded. 606 */ 607 if (q->properties.is_active && !prev_active) 608 increment_queue_count(dqm, q->properties.type); 609 else if (!q->properties.is_active && prev_active) 610 decrement_queue_count(dqm, q->properties.type); 611 612 if (q->gws && !q->properties.is_gws) { 613 if (q->properties.is_active) { 614 dqm->gws_queue_count++; 615 pdd->qpd.mapped_gws_queue = true; 616 } 617 q->properties.is_gws = true; 618 } else if (!q->gws && q->properties.is_gws) { 619 if (q->properties.is_active) { 620 dqm->gws_queue_count--; 621 pdd->qpd.mapped_gws_queue = false; 622 } 623 q->properties.is_gws = false; 624 } 625 626 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 627 retval = map_queues_cpsch(dqm); 628 else if (q->properties.is_active && 629 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 630 q->properties.type == KFD_QUEUE_TYPE_SDMA || 631 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 632 if (WARN(q->process->mm != current->mm, 633 "should only run in user thread")) 634 retval = -EFAULT; 635 else 636 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 637 q->pipe, q->queue, 638 &q->properties, current->mm); 639 } 640 641 out_unlock: 642 dqm_unlock(dqm); 643 return retval; 644 } 645 646 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 647 struct qcm_process_device *qpd) 648 { 649 struct queue *q; 650 struct mqd_manager *mqd_mgr; 651 struct kfd_process_device *pdd; 652 int retval, ret = 0; 653 654 dqm_lock(dqm); 655 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 656 goto out; 657 658 pdd = qpd_to_pdd(qpd); 659 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 660 pdd->process->pasid); 661 662 pdd->last_evict_timestamp = get_jiffies_64(); 663 /* Mark all queues as evicted. Deactivate all active queues on 664 * the qpd. 665 */ 666 list_for_each_entry(q, &qpd->queues_list, list) { 667 q->properties.is_evicted = true; 668 if (!q->properties.is_active) 669 continue; 670 671 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 672 q->properties.type)]; 673 q->properties.is_active = false; 674 decrement_queue_count(dqm, q->properties.type); 675 if (q->properties.is_gws) { 676 dqm->gws_queue_count--; 677 qpd->mapped_gws_queue = false; 678 } 679 680 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 681 continue; 682 683 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 684 (dqm->dev->cwsr_enabled? 685 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 686 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 687 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 688 if (retval && !ret) 689 /* Return the first error, but keep going to 690 * maintain a consistent eviction state 691 */ 692 ret = retval; 693 } 694 695 out: 696 dqm_unlock(dqm); 697 return ret; 698 } 699 700 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 701 struct qcm_process_device *qpd) 702 { 703 struct queue *q; 704 struct kfd_process_device *pdd; 705 int retval = 0; 706 707 dqm_lock(dqm); 708 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 709 goto out; 710 711 pdd = qpd_to_pdd(qpd); 712 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 713 pdd->process->pasid); 714 715 /* Mark all queues as evicted. Deactivate all active queues on 716 * the qpd. 717 */ 718 list_for_each_entry(q, &qpd->queues_list, list) { 719 q->properties.is_evicted = true; 720 if (!q->properties.is_active) 721 continue; 722 723 q->properties.is_active = false; 724 decrement_queue_count(dqm, q->properties.type); 725 } 726 pdd->last_evict_timestamp = get_jiffies_64(); 727 retval = execute_queues_cpsch(dqm, 728 qpd->is_debug ? 729 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 730 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 731 732 out: 733 dqm_unlock(dqm); 734 return retval; 735 } 736 737 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 738 struct qcm_process_device *qpd) 739 { 740 struct mm_struct *mm = NULL; 741 struct queue *q; 742 struct mqd_manager *mqd_mgr; 743 struct kfd_process_device *pdd; 744 uint64_t pd_base; 745 uint64_t eviction_duration; 746 int retval, ret = 0; 747 748 pdd = qpd_to_pdd(qpd); 749 /* Retrieve PD base */ 750 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 751 752 dqm_lock(dqm); 753 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 754 goto out; 755 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 756 qpd->evicted--; 757 goto out; 758 } 759 760 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 761 pdd->process->pasid); 762 763 /* Update PD Base in QPD */ 764 qpd->page_table_base = pd_base; 765 pr_debug("Updated PD address to 0x%llx\n", pd_base); 766 767 if (!list_empty(&qpd->queues_list)) { 768 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 769 dqm->dev->adev, 770 qpd->vmid, 771 qpd->page_table_base); 772 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 773 } 774 775 /* Take a safe reference to the mm_struct, which may otherwise 776 * disappear even while the kfd_process is still referenced. 777 */ 778 mm = get_task_mm(pdd->process->lead_thread); 779 if (!mm) { 780 ret = -EFAULT; 781 goto out; 782 } 783 784 /* Remove the eviction flags. Activate queues that are not 785 * inactive for other reasons. 786 */ 787 list_for_each_entry(q, &qpd->queues_list, list) { 788 q->properties.is_evicted = false; 789 if (!QUEUE_IS_ACTIVE(q->properties)) 790 continue; 791 792 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 793 q->properties.type)]; 794 q->properties.is_active = true; 795 increment_queue_count(dqm, q->properties.type); 796 if (q->properties.is_gws) { 797 dqm->gws_queue_count++; 798 qpd->mapped_gws_queue = true; 799 } 800 801 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 802 continue; 803 804 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 805 q->queue, &q->properties, mm); 806 if (retval && !ret) 807 /* Return the first error, but keep going to 808 * maintain a consistent eviction state 809 */ 810 ret = retval; 811 } 812 qpd->evicted = 0; 813 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 814 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 815 out: 816 if (mm) 817 mmput(mm); 818 dqm_unlock(dqm); 819 return ret; 820 } 821 822 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 823 struct qcm_process_device *qpd) 824 { 825 struct queue *q; 826 struct kfd_process_device *pdd; 827 uint64_t pd_base; 828 uint64_t eviction_duration; 829 int retval = 0; 830 831 pdd = qpd_to_pdd(qpd); 832 /* Retrieve PD base */ 833 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 834 835 dqm_lock(dqm); 836 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 837 goto out; 838 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 839 qpd->evicted--; 840 goto out; 841 } 842 843 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 844 pdd->process->pasid); 845 846 /* Update PD Base in QPD */ 847 qpd->page_table_base = pd_base; 848 pr_debug("Updated PD address to 0x%llx\n", pd_base); 849 850 /* activate all active queues on the qpd */ 851 list_for_each_entry(q, &qpd->queues_list, list) { 852 q->properties.is_evicted = false; 853 if (!QUEUE_IS_ACTIVE(q->properties)) 854 continue; 855 856 q->properties.is_active = true; 857 increment_queue_count(dqm, q->properties.type); 858 } 859 retval = execute_queues_cpsch(dqm, 860 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 861 qpd->evicted = 0; 862 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 863 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 864 out: 865 dqm_unlock(dqm); 866 return retval; 867 } 868 869 static int register_process(struct device_queue_manager *dqm, 870 struct qcm_process_device *qpd) 871 { 872 struct device_process_node *n; 873 struct kfd_process_device *pdd; 874 uint64_t pd_base; 875 int retval; 876 877 n = kzalloc(sizeof(*n), GFP_KERNEL); 878 if (!n) 879 return -ENOMEM; 880 881 n->qpd = qpd; 882 883 pdd = qpd_to_pdd(qpd); 884 /* Retrieve PD base */ 885 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 886 887 dqm_lock(dqm); 888 list_add(&n->list, &dqm->queues); 889 890 /* Update PD Base in QPD */ 891 qpd->page_table_base = pd_base; 892 pr_debug("Updated PD address to 0x%llx\n", pd_base); 893 894 retval = dqm->asic_ops.update_qpd(dqm, qpd); 895 896 dqm->processes_count++; 897 898 dqm_unlock(dqm); 899 900 /* Outside the DQM lock because under the DQM lock we can't do 901 * reclaim or take other locks that others hold while reclaiming. 902 */ 903 kfd_inc_compute_active(dqm->dev); 904 905 return retval; 906 } 907 908 static int unregister_process(struct device_queue_manager *dqm, 909 struct qcm_process_device *qpd) 910 { 911 int retval; 912 struct device_process_node *cur, *next; 913 914 pr_debug("qpd->queues_list is %s\n", 915 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 916 917 retval = 0; 918 dqm_lock(dqm); 919 920 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 921 if (qpd == cur->qpd) { 922 list_del(&cur->list); 923 kfree(cur); 924 dqm->processes_count--; 925 goto out; 926 } 927 } 928 /* qpd not found in dqm list */ 929 retval = 1; 930 out: 931 dqm_unlock(dqm); 932 933 /* Outside the DQM lock because under the DQM lock we can't do 934 * reclaim or take other locks that others hold while reclaiming. 935 */ 936 if (!retval) 937 kfd_dec_compute_active(dqm->dev); 938 939 return retval; 940 } 941 942 static int 943 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 944 unsigned int vmid) 945 { 946 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 947 dqm->dev->adev, pasid, vmid); 948 } 949 950 static void init_interrupts(struct device_queue_manager *dqm) 951 { 952 unsigned int i; 953 954 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 955 if (is_pipe_enabled(dqm, 0, i)) 956 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); 957 } 958 959 static int initialize_nocpsch(struct device_queue_manager *dqm) 960 { 961 int pipe, queue; 962 963 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 964 965 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 966 sizeof(unsigned int), GFP_KERNEL); 967 if (!dqm->allocated_queues) 968 return -ENOMEM; 969 970 mutex_init(&dqm->lock_hidden); 971 INIT_LIST_HEAD(&dqm->queues); 972 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 973 dqm->active_cp_queue_count = 0; 974 dqm->gws_queue_count = 0; 975 976 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 977 int pipe_offset = pipe * get_queues_per_pipe(dqm); 978 979 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 980 if (test_bit(pipe_offset + queue, 981 dqm->dev->shared_resources.cp_queue_bitmap)) 982 dqm->allocated_queues[pipe] |= 1 << queue; 983 } 984 985 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 986 987 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); 988 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); 989 990 return 0; 991 } 992 993 static void uninitialize(struct device_queue_manager *dqm) 994 { 995 int i; 996 997 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 998 999 kfree(dqm->allocated_queues); 1000 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1001 kfree(dqm->mqd_mgrs[i]); 1002 mutex_destroy(&dqm->lock_hidden); 1003 } 1004 1005 static int start_nocpsch(struct device_queue_manager *dqm) 1006 { 1007 pr_info("SW scheduler is used"); 1008 init_interrupts(dqm); 1009 1010 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1011 return pm_init(&dqm->packet_mgr, dqm); 1012 dqm->sched_running = true; 1013 1014 return 0; 1015 } 1016 1017 static int stop_nocpsch(struct device_queue_manager *dqm) 1018 { 1019 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1020 pm_uninit(&dqm->packet_mgr, false); 1021 dqm->sched_running = false; 1022 1023 return 0; 1024 } 1025 1026 static void pre_reset(struct device_queue_manager *dqm) 1027 { 1028 dqm_lock(dqm); 1029 dqm->is_resetting = true; 1030 dqm_unlock(dqm); 1031 } 1032 1033 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1034 struct queue *q) 1035 { 1036 int bit; 1037 1038 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1039 if (dqm->sdma_bitmap == 0) { 1040 pr_err("No more SDMA queue to allocate\n"); 1041 return -ENOMEM; 1042 } 1043 1044 bit = __ffs64(dqm->sdma_bitmap); 1045 dqm->sdma_bitmap &= ~(1ULL << bit); 1046 q->sdma_id = bit; 1047 q->properties.sdma_engine_id = q->sdma_id % 1048 kfd_get_num_sdma_engines(dqm->dev); 1049 q->properties.sdma_queue_id = q->sdma_id / 1050 kfd_get_num_sdma_engines(dqm->dev); 1051 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1052 if (dqm->xgmi_sdma_bitmap == 0) { 1053 pr_err("No more XGMI SDMA queue to allocate\n"); 1054 return -ENOMEM; 1055 } 1056 bit = __ffs64(dqm->xgmi_sdma_bitmap); 1057 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); 1058 q->sdma_id = bit; 1059 /* sdma_engine_id is sdma id including 1060 * both PCIe-optimized SDMAs and XGMI- 1061 * optimized SDMAs. The calculation below 1062 * assumes the first N engines are always 1063 * PCIe-optimized ones 1064 */ 1065 q->properties.sdma_engine_id = 1066 kfd_get_num_sdma_engines(dqm->dev) + 1067 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1068 q->properties.sdma_queue_id = q->sdma_id / 1069 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1070 } 1071 1072 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1073 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1074 1075 return 0; 1076 } 1077 1078 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1079 struct queue *q) 1080 { 1081 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1082 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1083 return; 1084 dqm->sdma_bitmap |= (1ULL << q->sdma_id); 1085 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1086 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1087 return; 1088 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); 1089 } 1090 } 1091 1092 /* 1093 * Device Queue Manager implementation for cp scheduler 1094 */ 1095 1096 static int set_sched_resources(struct device_queue_manager *dqm) 1097 { 1098 int i, mec; 1099 struct scheduling_resources res; 1100 1101 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1102 1103 res.queue_mask = 0; 1104 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1105 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1106 / dqm->dev->shared_resources.num_pipe_per_mec; 1107 1108 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) 1109 continue; 1110 1111 /* only acquire queues from the first MEC */ 1112 if (mec > 0) 1113 continue; 1114 1115 /* This situation may be hit in the future if a new HW 1116 * generation exposes more than 64 queues. If so, the 1117 * definition of res.queue_mask needs updating 1118 */ 1119 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1120 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1121 break; 1122 } 1123 1124 res.queue_mask |= 1ull 1125 << amdgpu_queue_mask_bit_to_set_resource_bit( 1126 dqm->dev->adev, i); 1127 } 1128 res.gws_mask = ~0ull; 1129 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1130 1131 pr_debug("Scheduling resources:\n" 1132 "vmid mask: 0x%8X\n" 1133 "queue mask: 0x%8llX\n", 1134 res.vmid_mask, res.queue_mask); 1135 1136 return pm_send_set_resources(&dqm->packet_mgr, &res); 1137 } 1138 1139 static int initialize_cpsch(struct device_queue_manager *dqm) 1140 { 1141 uint64_t num_sdma_queues; 1142 uint64_t num_xgmi_sdma_queues; 1143 1144 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1145 1146 mutex_init(&dqm->lock_hidden); 1147 INIT_LIST_HEAD(&dqm->queues); 1148 dqm->active_queue_count = dqm->processes_count = 0; 1149 dqm->active_cp_queue_count = 0; 1150 dqm->gws_queue_count = 0; 1151 dqm->active_runlist = false; 1152 1153 num_sdma_queues = get_num_sdma_queues(dqm); 1154 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap)) 1155 dqm->sdma_bitmap = ULLONG_MAX; 1156 else 1157 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); 1158 1159 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); 1160 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) 1161 dqm->xgmi_sdma_bitmap = ULLONG_MAX; 1162 else 1163 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1); 1164 1165 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1166 1167 return 0; 1168 } 1169 1170 static int start_cpsch(struct device_queue_manager *dqm) 1171 { 1172 int retval; 1173 1174 retval = 0; 1175 1176 dqm_lock(dqm); 1177 retval = pm_init(&dqm->packet_mgr, dqm); 1178 if (retval) 1179 goto fail_packet_manager_init; 1180 1181 retval = set_sched_resources(dqm); 1182 if (retval) 1183 goto fail_set_sched_resources; 1184 1185 pr_debug("Allocating fence memory\n"); 1186 1187 /* allocate fence memory on the gart */ 1188 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1189 &dqm->fence_mem); 1190 1191 if (retval) 1192 goto fail_allocate_vidmem; 1193 1194 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1195 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1196 1197 init_interrupts(dqm); 1198 1199 /* clear hang status when driver try to start the hw scheduler */ 1200 dqm->is_hws_hang = false; 1201 dqm->is_resetting = false; 1202 dqm->sched_running = true; 1203 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1204 dqm_unlock(dqm); 1205 1206 return 0; 1207 fail_allocate_vidmem: 1208 fail_set_sched_resources: 1209 pm_uninit(&dqm->packet_mgr, false); 1210 fail_packet_manager_init: 1211 dqm_unlock(dqm); 1212 return retval; 1213 } 1214 1215 static int stop_cpsch(struct device_queue_manager *dqm) 1216 { 1217 bool hanging; 1218 1219 dqm_lock(dqm); 1220 if (!dqm->sched_running) { 1221 dqm_unlock(dqm); 1222 return 0; 1223 } 1224 1225 if (!dqm->is_hws_hang) 1226 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1227 hanging = dqm->is_hws_hang || dqm->is_resetting; 1228 dqm->sched_running = false; 1229 1230 pm_release_ib(&dqm->packet_mgr); 1231 1232 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1233 pm_uninit(&dqm->packet_mgr, hanging); 1234 dqm_unlock(dqm); 1235 1236 return 0; 1237 } 1238 1239 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1240 struct kernel_queue *kq, 1241 struct qcm_process_device *qpd) 1242 { 1243 dqm_lock(dqm); 1244 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1245 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1246 dqm->total_queue_count); 1247 dqm_unlock(dqm); 1248 return -EPERM; 1249 } 1250 1251 /* 1252 * Unconditionally increment this counter, regardless of the queue's 1253 * type or whether the queue is active. 1254 */ 1255 dqm->total_queue_count++; 1256 pr_debug("Total of %d queues are accountable so far\n", 1257 dqm->total_queue_count); 1258 1259 list_add(&kq->list, &qpd->priv_queue_list); 1260 increment_queue_count(dqm, kq->queue->properties.type); 1261 qpd->is_debug = true; 1262 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1263 dqm_unlock(dqm); 1264 1265 return 0; 1266 } 1267 1268 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1269 struct kernel_queue *kq, 1270 struct qcm_process_device *qpd) 1271 { 1272 dqm_lock(dqm); 1273 list_del(&kq->list); 1274 decrement_queue_count(dqm, kq->queue->properties.type); 1275 qpd->is_debug = false; 1276 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1277 /* 1278 * Unconditionally decrement this counter, regardless of the queue's 1279 * type. 1280 */ 1281 dqm->total_queue_count--; 1282 pr_debug("Total of %d queues are accountable so far\n", 1283 dqm->total_queue_count); 1284 dqm_unlock(dqm); 1285 } 1286 1287 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1288 struct qcm_process_device *qpd) 1289 { 1290 int retval; 1291 struct mqd_manager *mqd_mgr; 1292 1293 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1294 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1295 dqm->total_queue_count); 1296 retval = -EPERM; 1297 goto out; 1298 } 1299 1300 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1301 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1302 dqm_lock(dqm); 1303 retval = allocate_sdma_queue(dqm, q); 1304 dqm_unlock(dqm); 1305 if (retval) 1306 goto out; 1307 } 1308 1309 retval = allocate_doorbell(qpd, q); 1310 if (retval) 1311 goto out_deallocate_sdma_queue; 1312 1313 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1314 q->properties.type)]; 1315 1316 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1317 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1318 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1319 q->properties.tba_addr = qpd->tba_addr; 1320 q->properties.tma_addr = qpd->tma_addr; 1321 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1322 if (!q->mqd_mem_obj) { 1323 retval = -ENOMEM; 1324 goto out_deallocate_doorbell; 1325 } 1326 1327 dqm_lock(dqm); 1328 /* 1329 * Eviction state logic: mark all queues as evicted, even ones 1330 * not currently active. Restoring inactive queues later only 1331 * updates the is_evicted flag but is a no-op otherwise. 1332 */ 1333 q->properties.is_evicted = !!qpd->evicted; 1334 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1335 &q->gart_mqd_addr, &q->properties); 1336 1337 list_add(&q->list, &qpd->queues_list); 1338 qpd->queue_count++; 1339 1340 if (q->properties.is_active) { 1341 increment_queue_count(dqm, q->properties.type); 1342 1343 execute_queues_cpsch(dqm, 1344 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1345 } 1346 1347 /* 1348 * Unconditionally increment this counter, regardless of the queue's 1349 * type or whether the queue is active. 1350 */ 1351 dqm->total_queue_count++; 1352 1353 pr_debug("Total of %d queues are accountable so far\n", 1354 dqm->total_queue_count); 1355 1356 dqm_unlock(dqm); 1357 return retval; 1358 1359 out_deallocate_doorbell: 1360 deallocate_doorbell(qpd, q); 1361 out_deallocate_sdma_queue: 1362 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1363 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1364 dqm_lock(dqm); 1365 deallocate_sdma_queue(dqm, q); 1366 dqm_unlock(dqm); 1367 } 1368 out: 1369 return retval; 1370 } 1371 1372 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1373 uint64_t fence_value, 1374 unsigned int timeout_ms) 1375 { 1376 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1377 1378 while (*fence_addr != fence_value) { 1379 if (time_after(jiffies, end_jiffies)) { 1380 pr_err("qcm fence wait loop timeout expired\n"); 1381 /* In HWS case, this is used to halt the driver thread 1382 * in order not to mess up CP states before doing 1383 * scandumps for FW debugging. 1384 */ 1385 while (halt_if_hws_hang) 1386 schedule(); 1387 1388 return -ETIME; 1389 } 1390 schedule(); 1391 } 1392 1393 return 0; 1394 } 1395 1396 /* dqm->lock mutex has to be locked before calling this function */ 1397 static int map_queues_cpsch(struct device_queue_manager *dqm) 1398 { 1399 int retval; 1400 1401 if (!dqm->sched_running) 1402 return 0; 1403 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1404 return 0; 1405 if (dqm->active_runlist) 1406 return 0; 1407 1408 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1409 pr_debug("%s sent runlist\n", __func__); 1410 if (retval) { 1411 pr_err("failed to execute runlist\n"); 1412 return retval; 1413 } 1414 dqm->active_runlist = true; 1415 1416 return retval; 1417 } 1418 1419 /* dqm->lock mutex has to be locked before calling this function */ 1420 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1421 enum kfd_unmap_queues_filter filter, 1422 uint32_t filter_param) 1423 { 1424 int retval = 0; 1425 struct mqd_manager *mqd_mgr; 1426 1427 if (!dqm->sched_running) 1428 return 0; 1429 if (dqm->is_hws_hang || dqm->is_resetting) 1430 return -EIO; 1431 if (!dqm->active_runlist) 1432 return retval; 1433 1434 retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE, 1435 filter, filter_param, false, 0); 1436 if (retval) 1437 return retval; 1438 1439 *dqm->fence_addr = KFD_FENCE_INIT; 1440 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1441 KFD_FENCE_COMPLETED); 1442 /* should be timed out */ 1443 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1444 queue_preemption_timeout_ms); 1445 if (retval) { 1446 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1447 dqm->is_hws_hang = true; 1448 /* It's possible we're detecting a HWS hang in the 1449 * middle of a GPU reset. No need to schedule another 1450 * reset in this case. 1451 */ 1452 if (!dqm->is_resetting) 1453 schedule_work(&dqm->hw_exception_work); 1454 return retval; 1455 } 1456 1457 /* In the current MEC firmware implementation, if compute queue 1458 * doesn't response to the preemption request in time, HIQ will 1459 * abandon the unmap request without returning any timeout error 1460 * to driver. Instead, MEC firmware will log the doorbell of the 1461 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1462 * To make sure the queue unmap was successful, driver need to 1463 * check those fields 1464 */ 1465 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1466 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1467 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1468 while (halt_if_hws_hang) 1469 schedule(); 1470 return -ETIME; 1471 } 1472 1473 pm_release_ib(&dqm->packet_mgr); 1474 dqm->active_runlist = false; 1475 1476 return retval; 1477 } 1478 1479 /* dqm->lock mutex has to be locked before calling this function */ 1480 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1481 enum kfd_unmap_queues_filter filter, 1482 uint32_t filter_param) 1483 { 1484 int retval; 1485 1486 if (dqm->is_hws_hang) 1487 return -EIO; 1488 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1489 if (retval) 1490 return retval; 1491 1492 return map_queues_cpsch(dqm); 1493 } 1494 1495 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1496 struct qcm_process_device *qpd, 1497 struct queue *q) 1498 { 1499 int retval; 1500 struct mqd_manager *mqd_mgr; 1501 uint64_t sdma_val = 0; 1502 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1503 1504 /* Get the SDMA queue stats */ 1505 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1506 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1507 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1508 &sdma_val); 1509 if (retval) 1510 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1511 q->properties.queue_id); 1512 } 1513 1514 retval = 0; 1515 1516 /* remove queue from list to prevent rescheduling after preemption */ 1517 dqm_lock(dqm); 1518 1519 if (qpd->is_debug) { 1520 /* 1521 * error, currently we do not allow to destroy a queue 1522 * of a currently debugged process 1523 */ 1524 retval = -EBUSY; 1525 goto failed_try_destroy_debugged_queue; 1526 1527 } 1528 1529 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1530 q->properties.type)]; 1531 1532 deallocate_doorbell(qpd, q); 1533 1534 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1535 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1536 deallocate_sdma_queue(dqm, q); 1537 pdd->sdma_past_activity_counter += sdma_val; 1538 } 1539 1540 list_del(&q->list); 1541 qpd->queue_count--; 1542 if (q->properties.is_active) { 1543 decrement_queue_count(dqm, q->properties.type); 1544 retval = execute_queues_cpsch(dqm, 1545 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1546 if (retval == -ETIME) 1547 qpd->reset_wavefronts = true; 1548 if (q->properties.is_gws) { 1549 dqm->gws_queue_count--; 1550 qpd->mapped_gws_queue = false; 1551 } 1552 } 1553 1554 /* 1555 * Unconditionally decrement this counter, regardless of the queue's 1556 * type 1557 */ 1558 dqm->total_queue_count--; 1559 pr_debug("Total of %d queues are accountable so far\n", 1560 dqm->total_queue_count); 1561 1562 dqm_unlock(dqm); 1563 1564 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1565 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1566 1567 return retval; 1568 1569 failed_try_destroy_debugged_queue: 1570 1571 dqm_unlock(dqm); 1572 return retval; 1573 } 1574 1575 /* 1576 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1577 * stay in user mode. 1578 */ 1579 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1580 /* APE1 limit is inclusive and 64K aligned. */ 1581 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1582 1583 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1584 struct qcm_process_device *qpd, 1585 enum cache_policy default_policy, 1586 enum cache_policy alternate_policy, 1587 void __user *alternate_aperture_base, 1588 uint64_t alternate_aperture_size) 1589 { 1590 bool retval = true; 1591 1592 if (!dqm->asic_ops.set_cache_memory_policy) 1593 return retval; 1594 1595 dqm_lock(dqm); 1596 1597 if (alternate_aperture_size == 0) { 1598 /* base > limit disables APE1 */ 1599 qpd->sh_mem_ape1_base = 1; 1600 qpd->sh_mem_ape1_limit = 0; 1601 } else { 1602 /* 1603 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1604 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1605 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1606 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1607 * Verify that the base and size parameters can be 1608 * represented in this format and convert them. 1609 * Additionally restrict APE1 to user-mode addresses. 1610 */ 1611 1612 uint64_t base = (uintptr_t)alternate_aperture_base; 1613 uint64_t limit = base + alternate_aperture_size - 1; 1614 1615 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1616 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1617 retval = false; 1618 goto out; 1619 } 1620 1621 qpd->sh_mem_ape1_base = base >> 16; 1622 qpd->sh_mem_ape1_limit = limit >> 16; 1623 } 1624 1625 retval = dqm->asic_ops.set_cache_memory_policy( 1626 dqm, 1627 qpd, 1628 default_policy, 1629 alternate_policy, 1630 alternate_aperture_base, 1631 alternate_aperture_size); 1632 1633 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1634 program_sh_mem_settings(dqm, qpd); 1635 1636 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1637 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1638 qpd->sh_mem_ape1_limit); 1639 1640 out: 1641 dqm_unlock(dqm); 1642 return retval; 1643 } 1644 1645 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1646 struct qcm_process_device *qpd) 1647 { 1648 struct queue *q; 1649 struct device_process_node *cur, *next_dpn; 1650 int retval = 0; 1651 bool found = false; 1652 1653 dqm_lock(dqm); 1654 1655 /* Clear all user mode queues */ 1656 while (!list_empty(&qpd->queues_list)) { 1657 struct mqd_manager *mqd_mgr; 1658 int ret; 1659 1660 q = list_first_entry(&qpd->queues_list, struct queue, list); 1661 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1662 q->properties.type)]; 1663 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1664 if (ret) 1665 retval = ret; 1666 dqm_unlock(dqm); 1667 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1668 dqm_lock(dqm); 1669 } 1670 1671 /* Unregister process */ 1672 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1673 if (qpd == cur->qpd) { 1674 list_del(&cur->list); 1675 kfree(cur); 1676 dqm->processes_count--; 1677 found = true; 1678 break; 1679 } 1680 } 1681 1682 dqm_unlock(dqm); 1683 1684 /* Outside the DQM lock because under the DQM lock we can't do 1685 * reclaim or take other locks that others hold while reclaiming. 1686 */ 1687 if (found) 1688 kfd_dec_compute_active(dqm->dev); 1689 1690 return retval; 1691 } 1692 1693 static int get_wave_state(struct device_queue_manager *dqm, 1694 struct queue *q, 1695 void __user *ctl_stack, 1696 u32 *ctl_stack_used_size, 1697 u32 *save_area_used_size) 1698 { 1699 struct mqd_manager *mqd_mgr; 1700 1701 dqm_lock(dqm); 1702 1703 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 1704 1705 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 1706 q->properties.is_active || !q->device->cwsr_enabled || 1707 !mqd_mgr->get_wave_state) { 1708 dqm_unlock(dqm); 1709 return -EINVAL; 1710 } 1711 1712 dqm_unlock(dqm); 1713 1714 /* 1715 * get_wave_state is outside the dqm lock to prevent circular locking 1716 * and the queue should be protected against destruction by the process 1717 * lock. 1718 */ 1719 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 1720 ctl_stack_used_size, save_area_used_size); 1721 } 1722 1723 static int process_termination_cpsch(struct device_queue_manager *dqm, 1724 struct qcm_process_device *qpd) 1725 { 1726 int retval; 1727 struct queue *q; 1728 struct kernel_queue *kq, *kq_next; 1729 struct mqd_manager *mqd_mgr; 1730 struct device_process_node *cur, *next_dpn; 1731 enum kfd_unmap_queues_filter filter = 1732 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1733 bool found = false; 1734 1735 retval = 0; 1736 1737 dqm_lock(dqm); 1738 1739 /* Clean all kernel queues */ 1740 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1741 list_del(&kq->list); 1742 decrement_queue_count(dqm, kq->queue->properties.type); 1743 qpd->is_debug = false; 1744 dqm->total_queue_count--; 1745 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1746 } 1747 1748 /* Clear all user mode queues */ 1749 list_for_each_entry(q, &qpd->queues_list, list) { 1750 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1751 deallocate_sdma_queue(dqm, q); 1752 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1753 deallocate_sdma_queue(dqm, q); 1754 1755 if (q->properties.is_active) { 1756 decrement_queue_count(dqm, q->properties.type); 1757 if (q->properties.is_gws) { 1758 dqm->gws_queue_count--; 1759 qpd->mapped_gws_queue = false; 1760 } 1761 } 1762 1763 dqm->total_queue_count--; 1764 } 1765 1766 /* Unregister process */ 1767 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1768 if (qpd == cur->qpd) { 1769 list_del(&cur->list); 1770 kfree(cur); 1771 dqm->processes_count--; 1772 found = true; 1773 break; 1774 } 1775 } 1776 1777 retval = execute_queues_cpsch(dqm, filter, 0); 1778 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1779 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1780 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1781 qpd->reset_wavefronts = false; 1782 } 1783 1784 /* Lastly, free mqd resources. 1785 * Do free_mqd() after dqm_unlock to avoid circular locking. 1786 */ 1787 while (!list_empty(&qpd->queues_list)) { 1788 q = list_first_entry(&qpd->queues_list, struct queue, list); 1789 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1790 q->properties.type)]; 1791 list_del(&q->list); 1792 qpd->queue_count--; 1793 dqm_unlock(dqm); 1794 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1795 dqm_lock(dqm); 1796 } 1797 dqm_unlock(dqm); 1798 1799 /* Outside the DQM lock because under the DQM lock we can't do 1800 * reclaim or take other locks that others hold while reclaiming. 1801 */ 1802 if (found) 1803 kfd_dec_compute_active(dqm->dev); 1804 1805 return retval; 1806 } 1807 1808 static int init_mqd_managers(struct device_queue_manager *dqm) 1809 { 1810 int i, j; 1811 struct mqd_manager *mqd_mgr; 1812 1813 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 1814 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 1815 if (!mqd_mgr) { 1816 pr_err("mqd manager [%d] initialization failed\n", i); 1817 goto out_free; 1818 } 1819 dqm->mqd_mgrs[i] = mqd_mgr; 1820 } 1821 1822 return 0; 1823 1824 out_free: 1825 for (j = 0; j < i; j++) { 1826 kfree(dqm->mqd_mgrs[j]); 1827 dqm->mqd_mgrs[j] = NULL; 1828 } 1829 1830 return -ENOMEM; 1831 } 1832 1833 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 1834 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 1835 { 1836 int retval; 1837 struct kfd_dev *dev = dqm->dev; 1838 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 1839 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 1840 get_num_all_sdma_engines(dqm) * 1841 dev->device_info.num_sdma_queues_per_engine + 1842 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 1843 1844 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 1845 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 1846 (void *)&(mem_obj->cpu_ptr), false); 1847 1848 return retval; 1849 } 1850 1851 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1852 { 1853 struct device_queue_manager *dqm; 1854 1855 pr_debug("Loading device queue manager\n"); 1856 1857 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1858 if (!dqm) 1859 return NULL; 1860 1861 switch (dev->adev->asic_type) { 1862 /* HWS is not available on Hawaii. */ 1863 case CHIP_HAWAII: 1864 /* HWS depends on CWSR for timely dequeue. CWSR is not 1865 * available on Tonga. 1866 * 1867 * FIXME: This argument also applies to Kaveri. 1868 */ 1869 case CHIP_TONGA: 1870 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1871 break; 1872 default: 1873 dqm->sched_policy = sched_policy; 1874 break; 1875 } 1876 1877 dqm->dev = dev; 1878 switch (dqm->sched_policy) { 1879 case KFD_SCHED_POLICY_HWS: 1880 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1881 /* initialize dqm for cp scheduling */ 1882 dqm->ops.create_queue = create_queue_cpsch; 1883 dqm->ops.initialize = initialize_cpsch; 1884 dqm->ops.start = start_cpsch; 1885 dqm->ops.stop = stop_cpsch; 1886 dqm->ops.pre_reset = pre_reset; 1887 dqm->ops.destroy_queue = destroy_queue_cpsch; 1888 dqm->ops.update_queue = update_queue; 1889 dqm->ops.register_process = register_process; 1890 dqm->ops.unregister_process = unregister_process; 1891 dqm->ops.uninitialize = uninitialize; 1892 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1893 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1894 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1895 dqm->ops.process_termination = process_termination_cpsch; 1896 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1897 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1898 dqm->ops.get_wave_state = get_wave_state; 1899 break; 1900 case KFD_SCHED_POLICY_NO_HWS: 1901 /* initialize dqm for no cp scheduling */ 1902 dqm->ops.start = start_nocpsch; 1903 dqm->ops.stop = stop_nocpsch; 1904 dqm->ops.pre_reset = pre_reset; 1905 dqm->ops.create_queue = create_queue_nocpsch; 1906 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1907 dqm->ops.update_queue = update_queue; 1908 dqm->ops.register_process = register_process; 1909 dqm->ops.unregister_process = unregister_process; 1910 dqm->ops.initialize = initialize_nocpsch; 1911 dqm->ops.uninitialize = uninitialize; 1912 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1913 dqm->ops.process_termination = process_termination_nocpsch; 1914 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1915 dqm->ops.restore_process_queues = 1916 restore_process_queues_nocpsch; 1917 dqm->ops.get_wave_state = get_wave_state; 1918 break; 1919 default: 1920 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1921 goto out_free; 1922 } 1923 1924 switch (dev->adev->asic_type) { 1925 case CHIP_CARRIZO: 1926 device_queue_manager_init_vi(&dqm->asic_ops); 1927 break; 1928 1929 case CHIP_KAVERI: 1930 device_queue_manager_init_cik(&dqm->asic_ops); 1931 break; 1932 1933 case CHIP_HAWAII: 1934 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1935 break; 1936 1937 case CHIP_TONGA: 1938 case CHIP_FIJI: 1939 case CHIP_POLARIS10: 1940 case CHIP_POLARIS11: 1941 case CHIP_POLARIS12: 1942 case CHIP_VEGAM: 1943 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1944 break; 1945 1946 default: 1947 if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 1948 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 1949 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 1950 device_queue_manager_init_v9(&dqm->asic_ops); 1951 else { 1952 WARN(1, "Unexpected ASIC family %u", 1953 dev->adev->asic_type); 1954 goto out_free; 1955 } 1956 } 1957 1958 if (init_mqd_managers(dqm)) 1959 goto out_free; 1960 1961 if (allocate_hiq_sdma_mqd(dqm)) { 1962 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 1963 goto out_free; 1964 } 1965 1966 if (!dqm->ops.initialize(dqm)) 1967 return dqm; 1968 1969 out_free: 1970 kfree(dqm); 1971 return NULL; 1972 } 1973 1974 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, 1975 struct kfd_mem_obj *mqd) 1976 { 1977 WARN(!mqd, "No hiq sdma mqd trunk to free"); 1978 1979 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 1980 } 1981 1982 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1983 { 1984 dqm->ops.uninitialize(dqm); 1985 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 1986 kfree(dqm); 1987 } 1988 1989 int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid) 1990 { 1991 struct kfd_process_device *pdd; 1992 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 1993 int ret = 0; 1994 1995 if (!p) 1996 return -EINVAL; 1997 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 1998 pdd = kfd_get_process_device_data(dqm->dev, p); 1999 if (pdd) 2000 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2001 kfd_unref_process(p); 2002 2003 return ret; 2004 } 2005 2006 static void kfd_process_hw_exception(struct work_struct *work) 2007 { 2008 struct device_queue_manager *dqm = container_of(work, 2009 struct device_queue_manager, hw_exception_work); 2010 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2011 } 2012 2013 #if defined(CONFIG_DEBUG_FS) 2014 2015 static void seq_reg_dump(struct seq_file *m, 2016 uint32_t (*dump)[2], uint32_t n_regs) 2017 { 2018 uint32_t i, count; 2019 2020 for (i = 0, count = 0; i < n_regs; i++) { 2021 if (count == 0 || 2022 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2023 seq_printf(m, "%s %08x: %08x", 2024 i ? "\n" : "", 2025 dump[i][0], dump[i][1]); 2026 count = 7; 2027 } else { 2028 seq_printf(m, " %08x", dump[i][1]); 2029 count--; 2030 } 2031 } 2032 2033 seq_puts(m, "\n"); 2034 } 2035 2036 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2037 { 2038 struct device_queue_manager *dqm = data; 2039 uint32_t (*dump)[2], n_regs; 2040 int pipe, queue; 2041 int r = 0; 2042 2043 if (!dqm->sched_running) { 2044 seq_printf(m, " Device is stopped\n"); 2045 2046 return 0; 2047 } 2048 2049 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2050 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, 2051 &dump, &n_regs); 2052 if (!r) { 2053 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 2054 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 2055 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 2056 KFD_CIK_HIQ_QUEUE); 2057 seq_reg_dump(m, dump, n_regs); 2058 2059 kfree(dump); 2060 } 2061 2062 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2063 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2064 2065 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2066 if (!test_bit(pipe_offset + queue, 2067 dqm->dev->shared_resources.cp_queue_bitmap)) 2068 continue; 2069 2070 r = dqm->dev->kfd2kgd->hqd_dump( 2071 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2072 if (r) 2073 break; 2074 2075 seq_printf(m, " CP Pipe %d, Queue %d\n", 2076 pipe, queue); 2077 seq_reg_dump(m, dump, n_regs); 2078 2079 kfree(dump); 2080 } 2081 } 2082 2083 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { 2084 for (queue = 0; 2085 queue < dqm->dev->device_info.num_sdma_queues_per_engine; 2086 queue++) { 2087 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2088 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2089 if (r) 2090 break; 2091 2092 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2093 pipe, queue); 2094 seq_reg_dump(m, dump, n_regs); 2095 2096 kfree(dump); 2097 } 2098 } 2099 2100 return r; 2101 } 2102 2103 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2104 { 2105 int r = 0; 2106 2107 dqm_lock(dqm); 2108 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2109 if (r) { 2110 dqm_unlock(dqm); 2111 return r; 2112 } 2113 dqm->active_runlist = true; 2114 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2115 dqm_unlock(dqm); 2116 2117 return r; 2118 } 2119 2120 #endif 2121