1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 37 /* Size of the per-pipe EOP queue */ 38 #define CIK_HPD_EOP_BYTES_LOG2 11 39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 40 41 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 42 unsigned int pasid, unsigned int vmid); 43 44 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 45 struct queue *q, 46 struct qcm_process_device *qpd); 47 48 static int execute_queues_cpsch(struct device_queue_manager *dqm, 49 enum kfd_unmap_queues_filter filter, 50 uint32_t filter_param); 51 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 52 enum kfd_unmap_queues_filter filter, 53 uint32_t filter_param); 54 55 static int map_queues_cpsch(struct device_queue_manager *dqm); 56 57 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 58 struct queue *q, 59 struct qcm_process_device *qpd); 60 61 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62 unsigned int sdma_queue_id); 63 64 static void kfd_process_hw_exception(struct work_struct *work); 65 66 static inline 67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 68 { 69 if (type == KFD_QUEUE_TYPE_SDMA) 70 return KFD_MQD_TYPE_SDMA; 71 return KFD_MQD_TYPE_CP; 72 } 73 74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 75 { 76 int i; 77 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 78 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 79 80 /* queue is available for KFD usage if bit is 1 */ 81 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 82 if (test_bit(pipe_offset + i, 83 dqm->dev->shared_resources.queue_bitmap)) 84 return true; 85 return false; 86 } 87 88 unsigned int get_queues_num(struct device_queue_manager *dqm) 89 { 90 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 91 KGD_MAX_QUEUES); 92 } 93 94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 95 { 96 return dqm->dev->shared_resources.num_queue_per_pipe; 97 } 98 99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->shared_resources.num_pipe_per_mec; 102 } 103 104 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) 105 { 106 return dqm->dev->device_info->num_sdma_engines; 107 } 108 109 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 110 { 111 return dqm->dev->device_info->num_sdma_engines 112 * KFD_SDMA_QUEUES_PER_ENGINE; 113 } 114 115 void program_sh_mem_settings(struct device_queue_manager *dqm, 116 struct qcm_process_device *qpd) 117 { 118 return dqm->dev->kfd2kgd->program_sh_mem_settings( 119 dqm->dev->kgd, qpd->vmid, 120 qpd->sh_mem_config, 121 qpd->sh_mem_ape1_base, 122 qpd->sh_mem_ape1_limit, 123 qpd->sh_mem_bases); 124 } 125 126 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) 127 { 128 struct kfd_dev *dev = qpd->dqm->dev; 129 130 if (!KFD_IS_SOC15(dev->device_info->asic_family)) { 131 /* On pre-SOC15 chips we need to use the queue ID to 132 * preserve the user mode ABI. 133 */ 134 q->doorbell_id = q->properties.queue_id; 135 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 136 /* For SDMA queues on SOC15, use static doorbell 137 * assignments based on the engine and queue. 138 */ 139 q->doorbell_id = dev->shared_resources.sdma_doorbell 140 [q->properties.sdma_engine_id] 141 [q->properties.sdma_queue_id]; 142 } else { 143 /* For CP queues on SOC15 reserve a free doorbell ID */ 144 unsigned int found; 145 146 found = find_first_zero_bit(qpd->doorbell_bitmap, 147 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 148 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 149 pr_debug("No doorbells available"); 150 return -EBUSY; 151 } 152 set_bit(found, qpd->doorbell_bitmap); 153 q->doorbell_id = found; 154 } 155 156 q->properties.doorbell_off = 157 kfd_doorbell_id_to_offset(dev, q->process, 158 q->doorbell_id); 159 160 return 0; 161 } 162 163 static void deallocate_doorbell(struct qcm_process_device *qpd, 164 struct queue *q) 165 { 166 unsigned int old; 167 struct kfd_dev *dev = qpd->dqm->dev; 168 169 if (!KFD_IS_SOC15(dev->device_info->asic_family) || 170 q->properties.type == KFD_QUEUE_TYPE_SDMA) 171 return; 172 173 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 174 WARN_ON(!old); 175 } 176 177 static int allocate_vmid(struct device_queue_manager *dqm, 178 struct qcm_process_device *qpd, 179 struct queue *q) 180 { 181 int bit, allocated_vmid; 182 183 if (dqm->vmid_bitmap == 0) 184 return -ENOMEM; 185 186 bit = ffs(dqm->vmid_bitmap) - 1; 187 dqm->vmid_bitmap &= ~(1 << bit); 188 189 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 190 pr_debug("vmid allocation %d\n", allocated_vmid); 191 qpd->vmid = allocated_vmid; 192 q->properties.vmid = allocated_vmid; 193 194 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 195 program_sh_mem_settings(dqm, qpd); 196 197 /* qpd->page_table_base is set earlier when register_process() 198 * is called, i.e. when the first queue is created. 199 */ 200 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, 201 qpd->vmid, 202 qpd->page_table_base); 203 /* invalidate the VM context after pasid and vmid mapping is set up */ 204 kfd_flush_tlb(qpd_to_pdd(qpd)); 205 206 return 0; 207 } 208 209 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 210 struct qcm_process_device *qpd) 211 { 212 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; 213 int ret; 214 215 if (!qpd->ib_kaddr) 216 return -ENOMEM; 217 218 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 219 if (ret) 220 return ret; 221 222 return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, 223 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 224 pmf->release_mem_size / sizeof(uint32_t)); 225 } 226 227 static void deallocate_vmid(struct device_queue_manager *dqm, 228 struct qcm_process_device *qpd, 229 struct queue *q) 230 { 231 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 232 233 /* On GFX v7, CP doesn't flush TC at dequeue */ 234 if (q->device->device_info->asic_family == CHIP_HAWAII) 235 if (flush_texture_cache_nocpsch(q->device, qpd)) 236 pr_err("Failed to flush TC\n"); 237 238 kfd_flush_tlb(qpd_to_pdd(qpd)); 239 240 /* Release the vmid mapping */ 241 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 242 243 dqm->vmid_bitmap |= (1 << bit); 244 qpd->vmid = 0; 245 q->properties.vmid = 0; 246 } 247 248 static int create_queue_nocpsch(struct device_queue_manager *dqm, 249 struct queue *q, 250 struct qcm_process_device *qpd) 251 { 252 int retval; 253 254 print_queue(q); 255 256 dqm_lock(dqm); 257 258 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 259 pr_warn("Can't create new usermode queue because %d queues were already created\n", 260 dqm->total_queue_count); 261 retval = -EPERM; 262 goto out_unlock; 263 } 264 265 if (list_empty(&qpd->queues_list)) { 266 retval = allocate_vmid(dqm, qpd, q); 267 if (retval) 268 goto out_unlock; 269 } 270 q->properties.vmid = qpd->vmid; 271 /* 272 * Eviction state logic: we only mark active queues as evicted 273 * to avoid the overhead of restoring inactive queues later 274 */ 275 if (qpd->evicted) 276 q->properties.is_evicted = (q->properties.queue_size > 0 && 277 q->properties.queue_percent > 0 && 278 q->properties.queue_address != 0); 279 280 q->properties.tba_addr = qpd->tba_addr; 281 q->properties.tma_addr = qpd->tma_addr; 282 283 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 284 retval = create_compute_queue_nocpsch(dqm, q, qpd); 285 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 286 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 287 else 288 retval = -EINVAL; 289 290 if (retval) { 291 if (list_empty(&qpd->queues_list)) 292 deallocate_vmid(dqm, qpd, q); 293 goto out_unlock; 294 } 295 296 list_add(&q->list, &qpd->queues_list); 297 qpd->queue_count++; 298 if (q->properties.is_active) 299 dqm->queue_count++; 300 301 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 302 dqm->sdma_queue_count++; 303 304 /* 305 * Unconditionally increment this counter, regardless of the queue's 306 * type or whether the queue is active. 307 */ 308 dqm->total_queue_count++; 309 pr_debug("Total of %d queues are accountable so far\n", 310 dqm->total_queue_count); 311 312 out_unlock: 313 dqm_unlock(dqm); 314 return retval; 315 } 316 317 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 318 { 319 bool set; 320 int pipe, bit, i; 321 322 set = false; 323 324 for (pipe = dqm->next_pipe_to_allocate, i = 0; 325 i < get_pipes_per_mec(dqm); 326 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 327 328 if (!is_pipe_enabled(dqm, 0, pipe)) 329 continue; 330 331 if (dqm->allocated_queues[pipe] != 0) { 332 bit = ffs(dqm->allocated_queues[pipe]) - 1; 333 dqm->allocated_queues[pipe] &= ~(1 << bit); 334 q->pipe = pipe; 335 q->queue = bit; 336 set = true; 337 break; 338 } 339 } 340 341 if (!set) 342 return -EBUSY; 343 344 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 345 /* horizontal hqd allocation */ 346 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 347 348 return 0; 349 } 350 351 static inline void deallocate_hqd(struct device_queue_manager *dqm, 352 struct queue *q) 353 { 354 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 355 } 356 357 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 358 struct queue *q, 359 struct qcm_process_device *qpd) 360 { 361 int retval; 362 struct mqd_manager *mqd_mgr; 363 364 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 365 if (!mqd_mgr) 366 return -ENOMEM; 367 368 retval = allocate_hqd(dqm, q); 369 if (retval) 370 return retval; 371 372 retval = allocate_doorbell(qpd, q); 373 if (retval) 374 goto out_deallocate_hqd; 375 376 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 377 &q->gart_mqd_addr, &q->properties); 378 if (retval) 379 goto out_deallocate_doorbell; 380 381 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 382 q->pipe, q->queue); 383 384 dqm->dev->kfd2kgd->set_scratch_backing_va( 385 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 386 387 if (!q->properties.is_active) 388 return 0; 389 390 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 391 &q->properties, q->process->mm); 392 if (retval) 393 goto out_uninit_mqd; 394 395 return 0; 396 397 out_uninit_mqd: 398 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 399 out_deallocate_doorbell: 400 deallocate_doorbell(qpd, q); 401 out_deallocate_hqd: 402 deallocate_hqd(dqm, q); 403 404 return retval; 405 } 406 407 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 408 * to avoid asynchronized access 409 */ 410 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 411 struct qcm_process_device *qpd, 412 struct queue *q) 413 { 414 int retval; 415 struct mqd_manager *mqd_mgr; 416 417 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 418 get_mqd_type_from_queue_type(q->properties.type)); 419 if (!mqd_mgr) 420 return -ENOMEM; 421 422 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 423 deallocate_hqd(dqm, q); 424 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 425 dqm->sdma_queue_count--; 426 deallocate_sdma_queue(dqm, q->sdma_id); 427 } else { 428 pr_debug("q->properties.type %d is invalid\n", 429 q->properties.type); 430 return -EINVAL; 431 } 432 dqm->total_queue_count--; 433 434 deallocate_doorbell(qpd, q); 435 436 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 437 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 438 KFD_UNMAP_LATENCY_MS, 439 q->pipe, q->queue); 440 if (retval == -ETIME) 441 qpd->reset_wavefronts = true; 442 443 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 444 445 list_del(&q->list); 446 if (list_empty(&qpd->queues_list)) { 447 if (qpd->reset_wavefronts) { 448 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 449 dqm->dev); 450 /* dbgdev_wave_reset_wavefronts has to be called before 451 * deallocate_vmid(), i.e. when vmid is still in use. 452 */ 453 dbgdev_wave_reset_wavefronts(dqm->dev, 454 qpd->pqm->process); 455 qpd->reset_wavefronts = false; 456 } 457 458 deallocate_vmid(dqm, qpd, q); 459 } 460 qpd->queue_count--; 461 if (q->properties.is_active) 462 dqm->queue_count--; 463 464 return retval; 465 } 466 467 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 468 struct qcm_process_device *qpd, 469 struct queue *q) 470 { 471 int retval; 472 473 dqm_lock(dqm); 474 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 475 dqm_unlock(dqm); 476 477 return retval; 478 } 479 480 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 481 { 482 int retval; 483 struct mqd_manager *mqd_mgr; 484 struct kfd_process_device *pdd; 485 bool prev_active = false; 486 487 dqm_lock(dqm); 488 pdd = kfd_get_process_device_data(q->device, q->process); 489 if (!pdd) { 490 retval = -ENODEV; 491 goto out_unlock; 492 } 493 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 494 get_mqd_type_from_queue_type(q->properties.type)); 495 if (!mqd_mgr) { 496 retval = -ENOMEM; 497 goto out_unlock; 498 } 499 /* 500 * Eviction state logic: we only mark active queues as evicted 501 * to avoid the overhead of restoring inactive queues later 502 */ 503 if (pdd->qpd.evicted) 504 q->properties.is_evicted = (q->properties.queue_size > 0 && 505 q->properties.queue_percent > 0 && 506 q->properties.queue_address != 0); 507 508 /* Save previous activity state for counters */ 509 prev_active = q->properties.is_active; 510 511 /* Make sure the queue is unmapped before updating the MQD */ 512 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 513 retval = unmap_queues_cpsch(dqm, 514 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 515 if (retval) { 516 pr_err("unmap queue failed\n"); 517 goto out_unlock; 518 } 519 } else if (prev_active && 520 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 521 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 522 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 523 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 524 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 525 if (retval) { 526 pr_err("destroy mqd failed\n"); 527 goto out_unlock; 528 } 529 } 530 531 retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); 532 533 /* 534 * check active state vs. the previous state and modify 535 * counter accordingly. map_queues_cpsch uses the 536 * dqm->queue_count to determine whether a new runlist must be 537 * uploaded. 538 */ 539 if (q->properties.is_active && !prev_active) 540 dqm->queue_count++; 541 else if (!q->properties.is_active && prev_active) 542 dqm->queue_count--; 543 544 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 545 retval = map_queues_cpsch(dqm); 546 else if (q->properties.is_active && 547 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 548 q->properties.type == KFD_QUEUE_TYPE_SDMA)) 549 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 550 &q->properties, q->process->mm); 551 552 out_unlock: 553 dqm_unlock(dqm); 554 return retval; 555 } 556 557 static struct mqd_manager *get_mqd_manager( 558 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 559 { 560 struct mqd_manager *mqd_mgr; 561 562 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 563 return NULL; 564 565 pr_debug("mqd type %d\n", type); 566 567 mqd_mgr = dqm->mqd_mgrs[type]; 568 if (!mqd_mgr) { 569 mqd_mgr = mqd_manager_init(type, dqm->dev); 570 if (!mqd_mgr) 571 pr_err("mqd manager is NULL"); 572 dqm->mqd_mgrs[type] = mqd_mgr; 573 } 574 575 return mqd_mgr; 576 } 577 578 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 579 struct qcm_process_device *qpd) 580 { 581 struct queue *q; 582 struct mqd_manager *mqd_mgr; 583 struct kfd_process_device *pdd; 584 int retval = 0; 585 586 dqm_lock(dqm); 587 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 588 goto out; 589 590 pdd = qpd_to_pdd(qpd); 591 pr_info_ratelimited("Evicting PASID %u queues\n", 592 pdd->process->pasid); 593 594 /* unactivate all active queues on the qpd */ 595 list_for_each_entry(q, &qpd->queues_list, list) { 596 if (!q->properties.is_active) 597 continue; 598 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 599 get_mqd_type_from_queue_type(q->properties.type)); 600 if (!mqd_mgr) { /* should not be here */ 601 pr_err("Cannot evict queue, mqd mgr is NULL\n"); 602 retval = -ENOMEM; 603 goto out; 604 } 605 q->properties.is_evicted = true; 606 q->properties.is_active = false; 607 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 608 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 609 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 610 if (retval) 611 goto out; 612 dqm->queue_count--; 613 } 614 615 out: 616 dqm_unlock(dqm); 617 return retval; 618 } 619 620 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 621 struct qcm_process_device *qpd) 622 { 623 struct queue *q; 624 struct kfd_process_device *pdd; 625 int retval = 0; 626 627 dqm_lock(dqm); 628 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 629 goto out; 630 631 pdd = qpd_to_pdd(qpd); 632 pr_info_ratelimited("Evicting PASID %u queues\n", 633 pdd->process->pasid); 634 635 /* unactivate all active queues on the qpd */ 636 list_for_each_entry(q, &qpd->queues_list, list) { 637 if (!q->properties.is_active) 638 continue; 639 q->properties.is_evicted = true; 640 q->properties.is_active = false; 641 dqm->queue_count--; 642 } 643 retval = execute_queues_cpsch(dqm, 644 qpd->is_debug ? 645 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 646 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 647 648 out: 649 dqm_unlock(dqm); 650 return retval; 651 } 652 653 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 654 struct qcm_process_device *qpd) 655 { 656 struct queue *q; 657 struct mqd_manager *mqd_mgr; 658 struct kfd_process_device *pdd; 659 uint32_t pd_base; 660 int retval = 0; 661 662 pdd = qpd_to_pdd(qpd); 663 /* Retrieve PD base */ 664 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 665 666 dqm_lock(dqm); 667 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 668 goto out; 669 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 670 qpd->evicted--; 671 goto out; 672 } 673 674 pr_info_ratelimited("Restoring PASID %u queues\n", 675 pdd->process->pasid); 676 677 /* Update PD Base in QPD */ 678 qpd->page_table_base = pd_base; 679 pr_debug("Updated PD address to 0x%08x\n", pd_base); 680 681 if (!list_empty(&qpd->queues_list)) { 682 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 683 dqm->dev->kgd, 684 qpd->vmid, 685 qpd->page_table_base); 686 kfd_flush_tlb(pdd); 687 } 688 689 /* activate all active queues on the qpd */ 690 list_for_each_entry(q, &qpd->queues_list, list) { 691 if (!q->properties.is_evicted) 692 continue; 693 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 694 get_mqd_type_from_queue_type(q->properties.type)); 695 if (!mqd_mgr) { /* should not be here */ 696 pr_err("Cannot restore queue, mqd mgr is NULL\n"); 697 retval = -ENOMEM; 698 goto out; 699 } 700 q->properties.is_evicted = false; 701 q->properties.is_active = true; 702 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 703 q->queue, &q->properties, 704 q->process->mm); 705 if (retval) 706 goto out; 707 dqm->queue_count++; 708 } 709 qpd->evicted = 0; 710 out: 711 dqm_unlock(dqm); 712 return retval; 713 } 714 715 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 716 struct qcm_process_device *qpd) 717 { 718 struct queue *q; 719 struct kfd_process_device *pdd; 720 uint32_t pd_base; 721 int retval = 0; 722 723 pdd = qpd_to_pdd(qpd); 724 /* Retrieve PD base */ 725 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 726 727 dqm_lock(dqm); 728 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 729 goto out; 730 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 731 qpd->evicted--; 732 goto out; 733 } 734 735 pr_info_ratelimited("Restoring PASID %u queues\n", 736 pdd->process->pasid); 737 738 /* Update PD Base in QPD */ 739 qpd->page_table_base = pd_base; 740 pr_debug("Updated PD address to 0x%08x\n", pd_base); 741 742 /* activate all active queues on the qpd */ 743 list_for_each_entry(q, &qpd->queues_list, list) { 744 if (!q->properties.is_evicted) 745 continue; 746 q->properties.is_evicted = false; 747 q->properties.is_active = true; 748 dqm->queue_count++; 749 } 750 retval = execute_queues_cpsch(dqm, 751 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 752 if (!retval) 753 qpd->evicted = 0; 754 out: 755 dqm_unlock(dqm); 756 return retval; 757 } 758 759 static int register_process(struct device_queue_manager *dqm, 760 struct qcm_process_device *qpd) 761 { 762 struct device_process_node *n; 763 struct kfd_process_device *pdd; 764 uint32_t pd_base; 765 int retval; 766 767 n = kzalloc(sizeof(*n), GFP_KERNEL); 768 if (!n) 769 return -ENOMEM; 770 771 n->qpd = qpd; 772 773 pdd = qpd_to_pdd(qpd); 774 /* Retrieve PD base */ 775 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 776 777 dqm_lock(dqm); 778 list_add(&n->list, &dqm->queues); 779 780 /* Update PD Base in QPD */ 781 qpd->page_table_base = pd_base; 782 783 retval = dqm->asic_ops.update_qpd(dqm, qpd); 784 785 if (dqm->processes_count++ == 0) 786 dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false); 787 788 dqm_unlock(dqm); 789 790 return retval; 791 } 792 793 static int unregister_process(struct device_queue_manager *dqm, 794 struct qcm_process_device *qpd) 795 { 796 int retval; 797 struct device_process_node *cur, *next; 798 799 pr_debug("qpd->queues_list is %s\n", 800 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 801 802 retval = 0; 803 dqm_lock(dqm); 804 805 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 806 if (qpd == cur->qpd) { 807 list_del(&cur->list); 808 kfree(cur); 809 if (--dqm->processes_count == 0) 810 dqm->dev->kfd2kgd->set_compute_idle( 811 dqm->dev->kgd, true); 812 goto out; 813 } 814 } 815 /* qpd not found in dqm list */ 816 retval = 1; 817 out: 818 dqm_unlock(dqm); 819 return retval; 820 } 821 822 static int 823 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 824 unsigned int vmid) 825 { 826 uint32_t pasid_mapping; 827 828 pasid_mapping = (pasid == 0) ? 0 : 829 (uint32_t)pasid | 830 ATC_VMID_PASID_MAPPING_VALID; 831 832 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 833 dqm->dev->kgd, pasid_mapping, 834 vmid); 835 } 836 837 static void init_interrupts(struct device_queue_manager *dqm) 838 { 839 unsigned int i; 840 841 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 842 if (is_pipe_enabled(dqm, 0, i)) 843 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 844 } 845 846 static int initialize_nocpsch(struct device_queue_manager *dqm) 847 { 848 int pipe, queue; 849 850 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 851 852 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 853 sizeof(unsigned int), GFP_KERNEL); 854 if (!dqm->allocated_queues) 855 return -ENOMEM; 856 857 mutex_init(&dqm->lock_hidden); 858 INIT_LIST_HEAD(&dqm->queues); 859 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 860 dqm->sdma_queue_count = 0; 861 862 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 863 int pipe_offset = pipe * get_queues_per_pipe(dqm); 864 865 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 866 if (test_bit(pipe_offset + queue, 867 dqm->dev->shared_resources.queue_bitmap)) 868 dqm->allocated_queues[pipe] |= 1 << queue; 869 } 870 871 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 872 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 873 874 return 0; 875 } 876 877 static void uninitialize(struct device_queue_manager *dqm) 878 { 879 int i; 880 881 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 882 883 kfree(dqm->allocated_queues); 884 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 885 kfree(dqm->mqd_mgrs[i]); 886 mutex_destroy(&dqm->lock_hidden); 887 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 888 } 889 890 static int start_nocpsch(struct device_queue_manager *dqm) 891 { 892 init_interrupts(dqm); 893 return pm_init(&dqm->packets, dqm); 894 } 895 896 static int stop_nocpsch(struct device_queue_manager *dqm) 897 { 898 pm_uninit(&dqm->packets); 899 return 0; 900 } 901 902 static int allocate_sdma_queue(struct device_queue_manager *dqm, 903 unsigned int *sdma_queue_id) 904 { 905 int bit; 906 907 if (dqm->sdma_bitmap == 0) 908 return -ENOMEM; 909 910 bit = ffs(dqm->sdma_bitmap) - 1; 911 dqm->sdma_bitmap &= ~(1 << bit); 912 *sdma_queue_id = bit; 913 914 return 0; 915 } 916 917 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 918 unsigned int sdma_queue_id) 919 { 920 if (sdma_queue_id >= get_num_sdma_queues(dqm)) 921 return; 922 dqm->sdma_bitmap |= (1 << sdma_queue_id); 923 } 924 925 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 926 struct queue *q, 927 struct qcm_process_device *qpd) 928 { 929 struct mqd_manager *mqd_mgr; 930 int retval; 931 932 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 933 if (!mqd_mgr) 934 return -ENOMEM; 935 936 retval = allocate_sdma_queue(dqm, &q->sdma_id); 937 if (retval) 938 return retval; 939 940 q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); 941 q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); 942 943 retval = allocate_doorbell(qpd, q); 944 if (retval) 945 goto out_deallocate_sdma_queue; 946 947 pr_debug("SDMA id is: %d\n", q->sdma_id); 948 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 949 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 950 951 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 952 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 953 &q->gart_mqd_addr, &q->properties); 954 if (retval) 955 goto out_deallocate_doorbell; 956 957 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, 958 NULL); 959 if (retval) 960 goto out_uninit_mqd; 961 962 return 0; 963 964 out_uninit_mqd: 965 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 966 out_deallocate_doorbell: 967 deallocate_doorbell(qpd, q); 968 out_deallocate_sdma_queue: 969 deallocate_sdma_queue(dqm, q->sdma_id); 970 971 return retval; 972 } 973 974 /* 975 * Device Queue Manager implementation for cp scheduler 976 */ 977 978 static int set_sched_resources(struct device_queue_manager *dqm) 979 { 980 int i, mec; 981 struct scheduling_resources res; 982 983 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 984 985 res.queue_mask = 0; 986 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 987 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 988 / dqm->dev->shared_resources.num_pipe_per_mec; 989 990 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 991 continue; 992 993 /* only acquire queues from the first MEC */ 994 if (mec > 0) 995 continue; 996 997 /* This situation may be hit in the future if a new HW 998 * generation exposes more than 64 queues. If so, the 999 * definition of res.queue_mask needs updating 1000 */ 1001 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1002 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1003 break; 1004 } 1005 1006 res.queue_mask |= (1ull << i); 1007 } 1008 res.gws_mask = res.oac_mask = res.gds_heap_base = 1009 res.gds_heap_size = 0; 1010 1011 pr_debug("Scheduling resources:\n" 1012 "vmid mask: 0x%8X\n" 1013 "queue mask: 0x%8llX\n", 1014 res.vmid_mask, res.queue_mask); 1015 1016 return pm_send_set_resources(&dqm->packets, &res); 1017 } 1018 1019 static int initialize_cpsch(struct device_queue_manager *dqm) 1020 { 1021 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1022 1023 mutex_init(&dqm->lock_hidden); 1024 INIT_LIST_HEAD(&dqm->queues); 1025 dqm->queue_count = dqm->processes_count = 0; 1026 dqm->sdma_queue_count = 0; 1027 dqm->active_runlist = false; 1028 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 1029 1030 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1031 1032 return 0; 1033 } 1034 1035 static int start_cpsch(struct device_queue_manager *dqm) 1036 { 1037 int retval; 1038 1039 retval = 0; 1040 1041 retval = pm_init(&dqm->packets, dqm); 1042 if (retval) 1043 goto fail_packet_manager_init; 1044 1045 retval = set_sched_resources(dqm); 1046 if (retval) 1047 goto fail_set_sched_resources; 1048 1049 pr_debug("Allocating fence memory\n"); 1050 1051 /* allocate fence memory on the gart */ 1052 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1053 &dqm->fence_mem); 1054 1055 if (retval) 1056 goto fail_allocate_vidmem; 1057 1058 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 1059 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1060 1061 init_interrupts(dqm); 1062 1063 dqm_lock(dqm); 1064 /* clear hang status when driver try to start the hw scheduler */ 1065 dqm->is_hws_hang = false; 1066 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1067 dqm_unlock(dqm); 1068 1069 return 0; 1070 fail_allocate_vidmem: 1071 fail_set_sched_resources: 1072 pm_uninit(&dqm->packets); 1073 fail_packet_manager_init: 1074 return retval; 1075 } 1076 1077 static int stop_cpsch(struct device_queue_manager *dqm) 1078 { 1079 dqm_lock(dqm); 1080 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1081 dqm_unlock(dqm); 1082 1083 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1084 pm_uninit(&dqm->packets); 1085 1086 return 0; 1087 } 1088 1089 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1090 struct kernel_queue *kq, 1091 struct qcm_process_device *qpd) 1092 { 1093 dqm_lock(dqm); 1094 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1095 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1096 dqm->total_queue_count); 1097 dqm_unlock(dqm); 1098 return -EPERM; 1099 } 1100 1101 /* 1102 * Unconditionally increment this counter, regardless of the queue's 1103 * type or whether the queue is active. 1104 */ 1105 dqm->total_queue_count++; 1106 pr_debug("Total of %d queues are accountable so far\n", 1107 dqm->total_queue_count); 1108 1109 list_add(&kq->list, &qpd->priv_queue_list); 1110 dqm->queue_count++; 1111 qpd->is_debug = true; 1112 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1113 dqm_unlock(dqm); 1114 1115 return 0; 1116 } 1117 1118 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1119 struct kernel_queue *kq, 1120 struct qcm_process_device *qpd) 1121 { 1122 dqm_lock(dqm); 1123 list_del(&kq->list); 1124 dqm->queue_count--; 1125 qpd->is_debug = false; 1126 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1127 /* 1128 * Unconditionally decrement this counter, regardless of the queue's 1129 * type. 1130 */ 1131 dqm->total_queue_count--; 1132 pr_debug("Total of %d queues are accountable so far\n", 1133 dqm->total_queue_count); 1134 dqm_unlock(dqm); 1135 } 1136 1137 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1138 struct qcm_process_device *qpd) 1139 { 1140 int retval; 1141 struct mqd_manager *mqd_mgr; 1142 1143 retval = 0; 1144 1145 dqm_lock(dqm); 1146 1147 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1148 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1149 dqm->total_queue_count); 1150 retval = -EPERM; 1151 goto out_unlock; 1152 } 1153 1154 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1155 retval = allocate_sdma_queue(dqm, &q->sdma_id); 1156 if (retval) 1157 goto out_unlock; 1158 q->properties.sdma_queue_id = 1159 q->sdma_id / get_num_sdma_engines(dqm); 1160 q->properties.sdma_engine_id = 1161 q->sdma_id % get_num_sdma_engines(dqm); 1162 } 1163 1164 retval = allocate_doorbell(qpd, q); 1165 if (retval) 1166 goto out_deallocate_sdma_queue; 1167 1168 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1169 get_mqd_type_from_queue_type(q->properties.type)); 1170 1171 if (!mqd_mgr) { 1172 retval = -ENOMEM; 1173 goto out_deallocate_doorbell; 1174 } 1175 /* 1176 * Eviction state logic: we only mark active queues as evicted 1177 * to avoid the overhead of restoring inactive queues later 1178 */ 1179 if (qpd->evicted) 1180 q->properties.is_evicted = (q->properties.queue_size > 0 && 1181 q->properties.queue_percent > 0 && 1182 q->properties.queue_address != 0); 1183 1184 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1185 1186 q->properties.tba_addr = qpd->tba_addr; 1187 q->properties.tma_addr = qpd->tma_addr; 1188 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 1189 &q->gart_mqd_addr, &q->properties); 1190 if (retval) 1191 goto out_deallocate_doorbell; 1192 1193 list_add(&q->list, &qpd->queues_list); 1194 qpd->queue_count++; 1195 if (q->properties.is_active) { 1196 dqm->queue_count++; 1197 retval = execute_queues_cpsch(dqm, 1198 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1199 } 1200 1201 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1202 dqm->sdma_queue_count++; 1203 /* 1204 * Unconditionally increment this counter, regardless of the queue's 1205 * type or whether the queue is active. 1206 */ 1207 dqm->total_queue_count++; 1208 1209 pr_debug("Total of %d queues are accountable so far\n", 1210 dqm->total_queue_count); 1211 1212 dqm_unlock(dqm); 1213 return retval; 1214 1215 out_deallocate_doorbell: 1216 deallocate_doorbell(qpd, q); 1217 out_deallocate_sdma_queue: 1218 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1219 deallocate_sdma_queue(dqm, q->sdma_id); 1220 out_unlock: 1221 dqm_unlock(dqm); 1222 1223 return retval; 1224 } 1225 1226 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 1227 unsigned int fence_value, 1228 unsigned int timeout_ms) 1229 { 1230 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1231 1232 while (*fence_addr != fence_value) { 1233 if (time_after(jiffies, end_jiffies)) { 1234 pr_err("qcm fence wait loop timeout expired\n"); 1235 /* In HWS case, this is used to halt the driver thread 1236 * in order not to mess up CP states before doing 1237 * scandumps for FW debugging. 1238 */ 1239 while (halt_if_hws_hang) 1240 schedule(); 1241 1242 return -ETIME; 1243 } 1244 schedule(); 1245 } 1246 1247 return 0; 1248 } 1249 1250 static int unmap_sdma_queues(struct device_queue_manager *dqm, 1251 unsigned int sdma_engine) 1252 { 1253 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1254 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 1255 sdma_engine); 1256 } 1257 1258 /* dqm->lock mutex has to be locked before calling this function */ 1259 static int map_queues_cpsch(struct device_queue_manager *dqm) 1260 { 1261 int retval; 1262 1263 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 1264 return 0; 1265 1266 if (dqm->active_runlist) 1267 return 0; 1268 1269 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1270 if (retval) { 1271 pr_err("failed to execute runlist\n"); 1272 return retval; 1273 } 1274 dqm->active_runlist = true; 1275 1276 return retval; 1277 } 1278 1279 /* dqm->lock mutex has to be locked before calling this function */ 1280 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1281 enum kfd_unmap_queues_filter filter, 1282 uint32_t filter_param) 1283 { 1284 int retval = 0; 1285 1286 if (dqm->is_hws_hang) 1287 return -EIO; 1288 if (!dqm->active_runlist) 1289 return retval; 1290 1291 pr_debug("Before destroying queues, sdma queue count is : %u\n", 1292 dqm->sdma_queue_count); 1293 1294 if (dqm->sdma_queue_count > 0) { 1295 unmap_sdma_queues(dqm, 0); 1296 unmap_sdma_queues(dqm, 1); 1297 } 1298 1299 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1300 filter, filter_param, false, 0); 1301 if (retval) 1302 return retval; 1303 1304 *dqm->fence_addr = KFD_FENCE_INIT; 1305 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 1306 KFD_FENCE_COMPLETED); 1307 /* should be timed out */ 1308 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1309 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 1310 if (retval) 1311 return retval; 1312 1313 pm_release_ib(&dqm->packets); 1314 dqm->active_runlist = false; 1315 1316 return retval; 1317 } 1318 1319 /* dqm->lock mutex has to be locked before calling this function */ 1320 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1321 enum kfd_unmap_queues_filter filter, 1322 uint32_t filter_param) 1323 { 1324 int retval; 1325 1326 if (dqm->is_hws_hang) 1327 return -EIO; 1328 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1329 if (retval) { 1330 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1331 dqm->is_hws_hang = true; 1332 schedule_work(&dqm->hw_exception_work); 1333 return retval; 1334 } 1335 1336 return map_queues_cpsch(dqm); 1337 } 1338 1339 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1340 struct qcm_process_device *qpd, 1341 struct queue *q) 1342 { 1343 int retval; 1344 struct mqd_manager *mqd_mgr; 1345 bool preempt_all_queues; 1346 1347 preempt_all_queues = false; 1348 1349 retval = 0; 1350 1351 /* remove queue from list to prevent rescheduling after preemption */ 1352 dqm_lock(dqm); 1353 1354 if (qpd->is_debug) { 1355 /* 1356 * error, currently we do not allow to destroy a queue 1357 * of a currently debugged process 1358 */ 1359 retval = -EBUSY; 1360 goto failed_try_destroy_debugged_queue; 1361 1362 } 1363 1364 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1365 get_mqd_type_from_queue_type(q->properties.type)); 1366 if (!mqd_mgr) { 1367 retval = -ENOMEM; 1368 goto failed; 1369 } 1370 1371 deallocate_doorbell(qpd, q); 1372 1373 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1374 dqm->sdma_queue_count--; 1375 deallocate_sdma_queue(dqm, q->sdma_id); 1376 } 1377 1378 list_del(&q->list); 1379 qpd->queue_count--; 1380 if (q->properties.is_active) { 1381 dqm->queue_count--; 1382 retval = execute_queues_cpsch(dqm, 1383 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1384 if (retval == -ETIME) 1385 qpd->reset_wavefronts = true; 1386 } 1387 1388 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1389 1390 /* 1391 * Unconditionally decrement this counter, regardless of the queue's 1392 * type 1393 */ 1394 dqm->total_queue_count--; 1395 pr_debug("Total of %d queues are accountable so far\n", 1396 dqm->total_queue_count); 1397 1398 dqm_unlock(dqm); 1399 1400 return retval; 1401 1402 failed: 1403 failed_try_destroy_debugged_queue: 1404 1405 dqm_unlock(dqm); 1406 return retval; 1407 } 1408 1409 /* 1410 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1411 * stay in user mode. 1412 */ 1413 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1414 /* APE1 limit is inclusive and 64K aligned. */ 1415 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1416 1417 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1418 struct qcm_process_device *qpd, 1419 enum cache_policy default_policy, 1420 enum cache_policy alternate_policy, 1421 void __user *alternate_aperture_base, 1422 uint64_t alternate_aperture_size) 1423 { 1424 bool retval = true; 1425 1426 if (!dqm->asic_ops.set_cache_memory_policy) 1427 return retval; 1428 1429 dqm_lock(dqm); 1430 1431 if (alternate_aperture_size == 0) { 1432 /* base > limit disables APE1 */ 1433 qpd->sh_mem_ape1_base = 1; 1434 qpd->sh_mem_ape1_limit = 0; 1435 } else { 1436 /* 1437 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1438 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1439 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1440 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1441 * Verify that the base and size parameters can be 1442 * represented in this format and convert them. 1443 * Additionally restrict APE1 to user-mode addresses. 1444 */ 1445 1446 uint64_t base = (uintptr_t)alternate_aperture_base; 1447 uint64_t limit = base + alternate_aperture_size - 1; 1448 1449 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1450 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1451 retval = false; 1452 goto out; 1453 } 1454 1455 qpd->sh_mem_ape1_base = base >> 16; 1456 qpd->sh_mem_ape1_limit = limit >> 16; 1457 } 1458 1459 retval = dqm->asic_ops.set_cache_memory_policy( 1460 dqm, 1461 qpd, 1462 default_policy, 1463 alternate_policy, 1464 alternate_aperture_base, 1465 alternate_aperture_size); 1466 1467 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1468 program_sh_mem_settings(dqm, qpd); 1469 1470 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1471 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1472 qpd->sh_mem_ape1_limit); 1473 1474 out: 1475 dqm_unlock(dqm); 1476 return retval; 1477 } 1478 1479 static int set_trap_handler(struct device_queue_manager *dqm, 1480 struct qcm_process_device *qpd, 1481 uint64_t tba_addr, 1482 uint64_t tma_addr) 1483 { 1484 uint64_t *tma; 1485 1486 if (dqm->dev->cwsr_enabled) { 1487 /* Jump from CWSR trap handler to user trap */ 1488 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); 1489 tma[0] = tba_addr; 1490 tma[1] = tma_addr; 1491 } else { 1492 qpd->tba_addr = tba_addr; 1493 qpd->tma_addr = tma_addr; 1494 } 1495 1496 return 0; 1497 } 1498 1499 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1500 struct qcm_process_device *qpd) 1501 { 1502 struct queue *q, *next; 1503 struct device_process_node *cur, *next_dpn; 1504 int retval = 0; 1505 1506 dqm_lock(dqm); 1507 1508 /* Clear all user mode queues */ 1509 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1510 int ret; 1511 1512 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1513 if (ret) 1514 retval = ret; 1515 } 1516 1517 /* Unregister process */ 1518 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1519 if (qpd == cur->qpd) { 1520 list_del(&cur->list); 1521 kfree(cur); 1522 dqm->processes_count--; 1523 break; 1524 } 1525 } 1526 1527 dqm_unlock(dqm); 1528 return retval; 1529 } 1530 1531 1532 static int process_termination_cpsch(struct device_queue_manager *dqm, 1533 struct qcm_process_device *qpd) 1534 { 1535 int retval; 1536 struct queue *q, *next; 1537 struct kernel_queue *kq, *kq_next; 1538 struct mqd_manager *mqd_mgr; 1539 struct device_process_node *cur, *next_dpn; 1540 enum kfd_unmap_queues_filter filter = 1541 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1542 1543 retval = 0; 1544 1545 dqm_lock(dqm); 1546 1547 /* Clean all kernel queues */ 1548 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1549 list_del(&kq->list); 1550 dqm->queue_count--; 1551 qpd->is_debug = false; 1552 dqm->total_queue_count--; 1553 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1554 } 1555 1556 /* Clear all user mode queues */ 1557 list_for_each_entry(q, &qpd->queues_list, list) { 1558 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1559 dqm->sdma_queue_count--; 1560 deallocate_sdma_queue(dqm, q->sdma_id); 1561 } 1562 1563 if (q->properties.is_active) 1564 dqm->queue_count--; 1565 1566 dqm->total_queue_count--; 1567 } 1568 1569 /* Unregister process */ 1570 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1571 if (qpd == cur->qpd) { 1572 list_del(&cur->list); 1573 kfree(cur); 1574 dqm->processes_count--; 1575 break; 1576 } 1577 } 1578 1579 retval = execute_queues_cpsch(dqm, filter, 0); 1580 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1581 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1582 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1583 qpd->reset_wavefronts = false; 1584 } 1585 1586 /* lastly, free mqd resources */ 1587 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1588 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1589 get_mqd_type_from_queue_type(q->properties.type)); 1590 if (!mqd_mgr) { 1591 retval = -ENOMEM; 1592 goto out; 1593 } 1594 list_del(&q->list); 1595 qpd->queue_count--; 1596 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1597 } 1598 1599 out: 1600 dqm_unlock(dqm); 1601 return retval; 1602 } 1603 1604 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1605 { 1606 struct device_queue_manager *dqm; 1607 1608 pr_debug("Loading device queue manager\n"); 1609 1610 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1611 if (!dqm) 1612 return NULL; 1613 1614 switch (dev->device_info->asic_family) { 1615 /* HWS is not available on Hawaii. */ 1616 case CHIP_HAWAII: 1617 /* HWS depends on CWSR for timely dequeue. CWSR is not 1618 * available on Tonga. 1619 * 1620 * FIXME: This argument also applies to Kaveri. 1621 */ 1622 case CHIP_TONGA: 1623 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1624 break; 1625 default: 1626 dqm->sched_policy = sched_policy; 1627 break; 1628 } 1629 1630 dqm->dev = dev; 1631 switch (dqm->sched_policy) { 1632 case KFD_SCHED_POLICY_HWS: 1633 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1634 /* initialize dqm for cp scheduling */ 1635 dqm->ops.create_queue = create_queue_cpsch; 1636 dqm->ops.initialize = initialize_cpsch; 1637 dqm->ops.start = start_cpsch; 1638 dqm->ops.stop = stop_cpsch; 1639 dqm->ops.destroy_queue = destroy_queue_cpsch; 1640 dqm->ops.update_queue = update_queue; 1641 dqm->ops.get_mqd_manager = get_mqd_manager; 1642 dqm->ops.register_process = register_process; 1643 dqm->ops.unregister_process = unregister_process; 1644 dqm->ops.uninitialize = uninitialize; 1645 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1646 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1647 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1648 dqm->ops.set_trap_handler = set_trap_handler; 1649 dqm->ops.process_termination = process_termination_cpsch; 1650 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1651 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1652 break; 1653 case KFD_SCHED_POLICY_NO_HWS: 1654 /* initialize dqm for no cp scheduling */ 1655 dqm->ops.start = start_nocpsch; 1656 dqm->ops.stop = stop_nocpsch; 1657 dqm->ops.create_queue = create_queue_nocpsch; 1658 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1659 dqm->ops.update_queue = update_queue; 1660 dqm->ops.get_mqd_manager = get_mqd_manager; 1661 dqm->ops.register_process = register_process; 1662 dqm->ops.unregister_process = unregister_process; 1663 dqm->ops.initialize = initialize_nocpsch; 1664 dqm->ops.uninitialize = uninitialize; 1665 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1666 dqm->ops.set_trap_handler = set_trap_handler; 1667 dqm->ops.process_termination = process_termination_nocpsch; 1668 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1669 dqm->ops.restore_process_queues = 1670 restore_process_queues_nocpsch; 1671 break; 1672 default: 1673 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1674 goto out_free; 1675 } 1676 1677 switch (dev->device_info->asic_family) { 1678 case CHIP_CARRIZO: 1679 device_queue_manager_init_vi(&dqm->asic_ops); 1680 break; 1681 1682 case CHIP_KAVERI: 1683 device_queue_manager_init_cik(&dqm->asic_ops); 1684 break; 1685 1686 case CHIP_HAWAII: 1687 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1688 break; 1689 1690 case CHIP_TONGA: 1691 case CHIP_FIJI: 1692 case CHIP_POLARIS10: 1693 case CHIP_POLARIS11: 1694 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1695 break; 1696 1697 case CHIP_VEGA10: 1698 case CHIP_RAVEN: 1699 device_queue_manager_init_v9(&dqm->asic_ops); 1700 break; 1701 default: 1702 WARN(1, "Unexpected ASIC family %u", 1703 dev->device_info->asic_family); 1704 goto out_free; 1705 } 1706 1707 if (!dqm->ops.initialize(dqm)) 1708 return dqm; 1709 1710 out_free: 1711 kfree(dqm); 1712 return NULL; 1713 } 1714 1715 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1716 { 1717 dqm->ops.uninitialize(dqm); 1718 kfree(dqm); 1719 } 1720 1721 int kfd_process_vm_fault(struct device_queue_manager *dqm, 1722 unsigned int pasid) 1723 { 1724 struct kfd_process_device *pdd; 1725 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 1726 int ret = 0; 1727 1728 if (!p) 1729 return -EINVAL; 1730 pdd = kfd_get_process_device_data(dqm->dev, p); 1731 if (pdd) 1732 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 1733 kfd_unref_process(p); 1734 1735 return ret; 1736 } 1737 1738 static void kfd_process_hw_exception(struct work_struct *work) 1739 { 1740 struct device_queue_manager *dqm = container_of(work, 1741 struct device_queue_manager, hw_exception_work); 1742 dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); 1743 } 1744 1745 #if defined(CONFIG_DEBUG_FS) 1746 1747 static void seq_reg_dump(struct seq_file *m, 1748 uint32_t (*dump)[2], uint32_t n_regs) 1749 { 1750 uint32_t i, count; 1751 1752 for (i = 0, count = 0; i < n_regs; i++) { 1753 if (count == 0 || 1754 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 1755 seq_printf(m, "%s %08x: %08x", 1756 i ? "\n" : "", 1757 dump[i][0], dump[i][1]); 1758 count = 7; 1759 } else { 1760 seq_printf(m, " %08x", dump[i][1]); 1761 count--; 1762 } 1763 } 1764 1765 seq_puts(m, "\n"); 1766 } 1767 1768 int dqm_debugfs_hqds(struct seq_file *m, void *data) 1769 { 1770 struct device_queue_manager *dqm = data; 1771 uint32_t (*dump)[2], n_regs; 1772 int pipe, queue; 1773 int r = 0; 1774 1775 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, 1776 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); 1777 if (!r) { 1778 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 1779 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 1780 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 1781 KFD_CIK_HIQ_QUEUE); 1782 seq_reg_dump(m, dump, n_regs); 1783 1784 kfree(dump); 1785 } 1786 1787 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1788 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1789 1790 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 1791 if (!test_bit(pipe_offset + queue, 1792 dqm->dev->shared_resources.queue_bitmap)) 1793 continue; 1794 1795 r = dqm->dev->kfd2kgd->hqd_dump( 1796 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1797 if (r) 1798 break; 1799 1800 seq_printf(m, " CP Pipe %d, Queue %d\n", 1801 pipe, queue); 1802 seq_reg_dump(m, dump, n_regs); 1803 1804 kfree(dump); 1805 } 1806 } 1807 1808 for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { 1809 for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) { 1810 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 1811 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1812 if (r) 1813 break; 1814 1815 seq_printf(m, " SDMA Engine %d, RLC %d\n", 1816 pipe, queue); 1817 seq_reg_dump(m, dump, n_regs); 1818 1819 kfree(dump); 1820 } 1821 } 1822 1823 return r; 1824 } 1825 1826 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) 1827 { 1828 int r = 0; 1829 1830 dqm_lock(dqm); 1831 dqm->active_runlist = true; 1832 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1833 dqm_unlock(dqm); 1834 1835 return r; 1836 } 1837 1838 #endif 1839