1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 37 /* Size of the per-pipe EOP queue */ 38 #define CIK_HPD_EOP_BYTES_LOG2 11 39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 40 41 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 42 unsigned int pasid, unsigned int vmid); 43 44 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 45 struct queue *q, 46 struct qcm_process_device *qpd); 47 48 static int execute_queues_cpsch(struct device_queue_manager *dqm, 49 enum kfd_unmap_queues_filter filter, 50 uint32_t filter_param); 51 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 52 enum kfd_unmap_queues_filter filter, 53 uint32_t filter_param); 54 55 static int map_queues_cpsch(struct device_queue_manager *dqm); 56 57 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 58 struct queue *q, 59 struct qcm_process_device *qpd); 60 61 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62 unsigned int sdma_queue_id); 63 64 static inline 65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 66 { 67 if (type == KFD_QUEUE_TYPE_SDMA) 68 return KFD_MQD_TYPE_SDMA; 69 return KFD_MQD_TYPE_CP; 70 } 71 72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 73 { 74 int i; 75 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 76 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 77 78 /* queue is available for KFD usage if bit is 1 */ 79 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 80 if (test_bit(pipe_offset + i, 81 dqm->dev->shared_resources.queue_bitmap)) 82 return true; 83 return false; 84 } 85 86 unsigned int get_queues_num(struct device_queue_manager *dqm) 87 { 88 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 89 KGD_MAX_QUEUES); 90 } 91 92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 93 { 94 return dqm->dev->shared_resources.num_queue_per_pipe; 95 } 96 97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 98 { 99 return dqm->dev->shared_resources.num_pipe_per_mec; 100 } 101 102 void program_sh_mem_settings(struct device_queue_manager *dqm, 103 struct qcm_process_device *qpd) 104 { 105 return dqm->dev->kfd2kgd->program_sh_mem_settings( 106 dqm->dev->kgd, qpd->vmid, 107 qpd->sh_mem_config, 108 qpd->sh_mem_ape1_base, 109 qpd->sh_mem_ape1_limit, 110 qpd->sh_mem_bases); 111 } 112 113 static int allocate_vmid(struct device_queue_manager *dqm, 114 struct qcm_process_device *qpd, 115 struct queue *q) 116 { 117 int bit, allocated_vmid; 118 119 if (dqm->vmid_bitmap == 0) 120 return -ENOMEM; 121 122 bit = ffs(dqm->vmid_bitmap) - 1; 123 dqm->vmid_bitmap &= ~(1 << bit); 124 125 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 126 pr_debug("vmid allocation %d\n", allocated_vmid); 127 qpd->vmid = allocated_vmid; 128 q->properties.vmid = allocated_vmid; 129 130 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 131 program_sh_mem_settings(dqm, qpd); 132 133 /* qpd->page_table_base is set earlier when register_process() 134 * is called, i.e. when the first queue is created. 135 */ 136 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, 137 qpd->vmid, 138 qpd->page_table_base); 139 /* invalidate the VM context after pasid and vmid mapping is set up */ 140 kfd_flush_tlb(qpd_to_pdd(qpd)); 141 142 return 0; 143 } 144 145 static void deallocate_vmid(struct device_queue_manager *dqm, 146 struct qcm_process_device *qpd, 147 struct queue *q) 148 { 149 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 150 151 kfd_flush_tlb(qpd_to_pdd(qpd)); 152 153 /* Release the vmid mapping */ 154 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 155 156 dqm->vmid_bitmap |= (1 << bit); 157 qpd->vmid = 0; 158 q->properties.vmid = 0; 159 } 160 161 static int create_queue_nocpsch(struct device_queue_manager *dqm, 162 struct queue *q, 163 struct qcm_process_device *qpd) 164 { 165 int retval; 166 167 print_queue(q); 168 169 mutex_lock(&dqm->lock); 170 171 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 172 pr_warn("Can't create new usermode queue because %d queues were already created\n", 173 dqm->total_queue_count); 174 retval = -EPERM; 175 goto out_unlock; 176 } 177 178 if (list_empty(&qpd->queues_list)) { 179 retval = allocate_vmid(dqm, qpd, q); 180 if (retval) 181 goto out_unlock; 182 } 183 q->properties.vmid = qpd->vmid; 184 /* 185 * Eviction state logic: we only mark active queues as evicted 186 * to avoid the overhead of restoring inactive queues later 187 */ 188 if (qpd->evicted) 189 q->properties.is_evicted = (q->properties.queue_size > 0 && 190 q->properties.queue_percent > 0 && 191 q->properties.queue_address != 0); 192 193 q->properties.tba_addr = qpd->tba_addr; 194 q->properties.tma_addr = qpd->tma_addr; 195 196 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 197 retval = create_compute_queue_nocpsch(dqm, q, qpd); 198 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 199 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 200 else 201 retval = -EINVAL; 202 203 if (retval) { 204 if (list_empty(&qpd->queues_list)) 205 deallocate_vmid(dqm, qpd, q); 206 goto out_unlock; 207 } 208 209 list_add(&q->list, &qpd->queues_list); 210 qpd->queue_count++; 211 if (q->properties.is_active) 212 dqm->queue_count++; 213 214 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 215 dqm->sdma_queue_count++; 216 217 /* 218 * Unconditionally increment this counter, regardless of the queue's 219 * type or whether the queue is active. 220 */ 221 dqm->total_queue_count++; 222 pr_debug("Total of %d queues are accountable so far\n", 223 dqm->total_queue_count); 224 225 out_unlock: 226 mutex_unlock(&dqm->lock); 227 return retval; 228 } 229 230 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 231 { 232 bool set; 233 int pipe, bit, i; 234 235 set = false; 236 237 for (pipe = dqm->next_pipe_to_allocate, i = 0; 238 i < get_pipes_per_mec(dqm); 239 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 240 241 if (!is_pipe_enabled(dqm, 0, pipe)) 242 continue; 243 244 if (dqm->allocated_queues[pipe] != 0) { 245 bit = ffs(dqm->allocated_queues[pipe]) - 1; 246 dqm->allocated_queues[pipe] &= ~(1 << bit); 247 q->pipe = pipe; 248 q->queue = bit; 249 set = true; 250 break; 251 } 252 } 253 254 if (!set) 255 return -EBUSY; 256 257 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 258 /* horizontal hqd allocation */ 259 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 260 261 return 0; 262 } 263 264 static inline void deallocate_hqd(struct device_queue_manager *dqm, 265 struct queue *q) 266 { 267 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 268 } 269 270 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 271 struct queue *q, 272 struct qcm_process_device *qpd) 273 { 274 int retval; 275 struct mqd_manager *mqd; 276 277 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 278 if (!mqd) 279 return -ENOMEM; 280 281 retval = allocate_hqd(dqm, q); 282 if (retval) 283 return retval; 284 285 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 286 &q->gart_mqd_addr, &q->properties); 287 if (retval) 288 goto out_deallocate_hqd; 289 290 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 291 q->pipe, q->queue); 292 293 dqm->dev->kfd2kgd->set_scratch_backing_va( 294 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 295 296 if (!q->properties.is_active) 297 return 0; 298 299 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, 300 q->process->mm); 301 if (retval) 302 goto out_uninit_mqd; 303 304 return 0; 305 306 out_uninit_mqd: 307 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 308 out_deallocate_hqd: 309 deallocate_hqd(dqm, q); 310 311 return retval; 312 } 313 314 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 315 * to avoid asynchronized access 316 */ 317 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 318 struct qcm_process_device *qpd, 319 struct queue *q) 320 { 321 int retval; 322 struct mqd_manager *mqd; 323 324 mqd = dqm->ops.get_mqd_manager(dqm, 325 get_mqd_type_from_queue_type(q->properties.type)); 326 if (!mqd) 327 return -ENOMEM; 328 329 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 330 deallocate_hqd(dqm, q); 331 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 332 dqm->sdma_queue_count--; 333 deallocate_sdma_queue(dqm, q->sdma_id); 334 } else { 335 pr_debug("q->properties.type %d is invalid\n", 336 q->properties.type); 337 return -EINVAL; 338 } 339 dqm->total_queue_count--; 340 341 retval = mqd->destroy_mqd(mqd, q->mqd, 342 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 343 KFD_UNMAP_LATENCY_MS, 344 q->pipe, q->queue); 345 if (retval == -ETIME) 346 qpd->reset_wavefronts = true; 347 348 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 349 350 list_del(&q->list); 351 if (list_empty(&qpd->queues_list)) { 352 if (qpd->reset_wavefronts) { 353 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 354 dqm->dev); 355 /* dbgdev_wave_reset_wavefronts has to be called before 356 * deallocate_vmid(), i.e. when vmid is still in use. 357 */ 358 dbgdev_wave_reset_wavefronts(dqm->dev, 359 qpd->pqm->process); 360 qpd->reset_wavefronts = false; 361 } 362 363 deallocate_vmid(dqm, qpd, q); 364 } 365 qpd->queue_count--; 366 if (q->properties.is_active) 367 dqm->queue_count--; 368 369 return retval; 370 } 371 372 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 373 struct qcm_process_device *qpd, 374 struct queue *q) 375 { 376 int retval; 377 378 mutex_lock(&dqm->lock); 379 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 380 mutex_unlock(&dqm->lock); 381 382 return retval; 383 } 384 385 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 386 { 387 int retval; 388 struct mqd_manager *mqd; 389 struct kfd_process_device *pdd; 390 bool prev_active = false; 391 392 mutex_lock(&dqm->lock); 393 pdd = kfd_get_process_device_data(q->device, q->process); 394 if (!pdd) { 395 retval = -ENODEV; 396 goto out_unlock; 397 } 398 mqd = dqm->ops.get_mqd_manager(dqm, 399 get_mqd_type_from_queue_type(q->properties.type)); 400 if (!mqd) { 401 retval = -ENOMEM; 402 goto out_unlock; 403 } 404 /* 405 * Eviction state logic: we only mark active queues as evicted 406 * to avoid the overhead of restoring inactive queues later 407 */ 408 if (pdd->qpd.evicted) 409 q->properties.is_evicted = (q->properties.queue_size > 0 && 410 q->properties.queue_percent > 0 && 411 q->properties.queue_address != 0); 412 413 /* Save previous activity state for counters */ 414 prev_active = q->properties.is_active; 415 416 /* Make sure the queue is unmapped before updating the MQD */ 417 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 418 retval = unmap_queues_cpsch(dqm, 419 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 420 if (retval) { 421 pr_err("unmap queue failed\n"); 422 goto out_unlock; 423 } 424 } else if (prev_active && 425 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 426 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 427 retval = mqd->destroy_mqd(mqd, q->mqd, 428 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 429 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 430 if (retval) { 431 pr_err("destroy mqd failed\n"); 432 goto out_unlock; 433 } 434 } 435 436 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 437 438 /* 439 * check active state vs. the previous state and modify 440 * counter accordingly. map_queues_cpsch uses the 441 * dqm->queue_count to determine whether a new runlist must be 442 * uploaded. 443 */ 444 if (q->properties.is_active && !prev_active) 445 dqm->queue_count++; 446 else if (!q->properties.is_active && prev_active) 447 dqm->queue_count--; 448 449 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 450 retval = map_queues_cpsch(dqm); 451 else if (q->properties.is_active && 452 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 453 q->properties.type == KFD_QUEUE_TYPE_SDMA)) 454 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, 455 &q->properties, q->process->mm); 456 457 out_unlock: 458 mutex_unlock(&dqm->lock); 459 return retval; 460 } 461 462 static struct mqd_manager *get_mqd_manager( 463 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 464 { 465 struct mqd_manager *mqd; 466 467 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 468 return NULL; 469 470 pr_debug("mqd type %d\n", type); 471 472 mqd = dqm->mqds[type]; 473 if (!mqd) { 474 mqd = mqd_manager_init(type, dqm->dev); 475 if (!mqd) 476 pr_err("mqd manager is NULL"); 477 dqm->mqds[type] = mqd; 478 } 479 480 return mqd; 481 } 482 483 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 484 struct qcm_process_device *qpd) 485 { 486 struct queue *q; 487 struct mqd_manager *mqd; 488 struct kfd_process_device *pdd; 489 int retval = 0; 490 491 mutex_lock(&dqm->lock); 492 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 493 goto out; 494 495 pdd = qpd_to_pdd(qpd); 496 pr_info_ratelimited("Evicting PASID %u queues\n", 497 pdd->process->pasid); 498 499 /* unactivate all active queues on the qpd */ 500 list_for_each_entry(q, &qpd->queues_list, list) { 501 if (!q->properties.is_active) 502 continue; 503 mqd = dqm->ops.get_mqd_manager(dqm, 504 get_mqd_type_from_queue_type(q->properties.type)); 505 if (!mqd) { /* should not be here */ 506 pr_err("Cannot evict queue, mqd mgr is NULL\n"); 507 retval = -ENOMEM; 508 goto out; 509 } 510 q->properties.is_evicted = true; 511 q->properties.is_active = false; 512 retval = mqd->destroy_mqd(mqd, q->mqd, 513 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 514 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 515 if (retval) 516 goto out; 517 dqm->queue_count--; 518 } 519 520 out: 521 mutex_unlock(&dqm->lock); 522 return retval; 523 } 524 525 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 526 struct qcm_process_device *qpd) 527 { 528 struct queue *q; 529 struct kfd_process_device *pdd; 530 int retval = 0; 531 532 mutex_lock(&dqm->lock); 533 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 534 goto out; 535 536 pdd = qpd_to_pdd(qpd); 537 pr_info_ratelimited("Evicting PASID %u queues\n", 538 pdd->process->pasid); 539 540 /* unactivate all active queues on the qpd */ 541 list_for_each_entry(q, &qpd->queues_list, list) { 542 if (!q->properties.is_active) 543 continue; 544 q->properties.is_evicted = true; 545 q->properties.is_active = false; 546 dqm->queue_count--; 547 } 548 retval = execute_queues_cpsch(dqm, 549 qpd->is_debug ? 550 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 551 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 552 553 out: 554 mutex_unlock(&dqm->lock); 555 return retval; 556 } 557 558 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 559 struct qcm_process_device *qpd) 560 { 561 struct queue *q; 562 struct mqd_manager *mqd; 563 struct kfd_process_device *pdd; 564 uint32_t pd_base; 565 int retval = 0; 566 567 pdd = qpd_to_pdd(qpd); 568 /* Retrieve PD base */ 569 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 570 571 mutex_lock(&dqm->lock); 572 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 573 goto out; 574 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 575 qpd->evicted--; 576 goto out; 577 } 578 579 pr_info_ratelimited("Restoring PASID %u queues\n", 580 pdd->process->pasid); 581 582 /* Update PD Base in QPD */ 583 qpd->page_table_base = pd_base; 584 pr_debug("Updated PD address to 0x%08x\n", pd_base); 585 586 if (!list_empty(&qpd->queues_list)) { 587 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 588 dqm->dev->kgd, 589 qpd->vmid, 590 qpd->page_table_base); 591 kfd_flush_tlb(pdd); 592 } 593 594 /* activate all active queues on the qpd */ 595 list_for_each_entry(q, &qpd->queues_list, list) { 596 if (!q->properties.is_evicted) 597 continue; 598 mqd = dqm->ops.get_mqd_manager(dqm, 599 get_mqd_type_from_queue_type(q->properties.type)); 600 if (!mqd) { /* should not be here */ 601 pr_err("Cannot restore queue, mqd mgr is NULL\n"); 602 retval = -ENOMEM; 603 goto out; 604 } 605 q->properties.is_evicted = false; 606 q->properties.is_active = true; 607 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, 608 q->queue, &q->properties, 609 q->process->mm); 610 if (retval) 611 goto out; 612 dqm->queue_count++; 613 } 614 qpd->evicted = 0; 615 out: 616 mutex_unlock(&dqm->lock); 617 return retval; 618 } 619 620 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 621 struct qcm_process_device *qpd) 622 { 623 struct queue *q; 624 struct kfd_process_device *pdd; 625 uint32_t pd_base; 626 int retval = 0; 627 628 pdd = qpd_to_pdd(qpd); 629 /* Retrieve PD base */ 630 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 631 632 mutex_lock(&dqm->lock); 633 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 634 goto out; 635 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 636 qpd->evicted--; 637 goto out; 638 } 639 640 pr_info_ratelimited("Restoring PASID %u queues\n", 641 pdd->process->pasid); 642 643 /* Update PD Base in QPD */ 644 qpd->page_table_base = pd_base; 645 pr_debug("Updated PD address to 0x%08x\n", pd_base); 646 647 /* activate all active queues on the qpd */ 648 list_for_each_entry(q, &qpd->queues_list, list) { 649 if (!q->properties.is_evicted) 650 continue; 651 q->properties.is_evicted = false; 652 q->properties.is_active = true; 653 dqm->queue_count++; 654 } 655 retval = execute_queues_cpsch(dqm, 656 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 657 if (!retval) 658 qpd->evicted = 0; 659 out: 660 mutex_unlock(&dqm->lock); 661 return retval; 662 } 663 664 static int register_process(struct device_queue_manager *dqm, 665 struct qcm_process_device *qpd) 666 { 667 struct device_process_node *n; 668 struct kfd_process_device *pdd; 669 uint32_t pd_base; 670 int retval; 671 672 n = kzalloc(sizeof(*n), GFP_KERNEL); 673 if (!n) 674 return -ENOMEM; 675 676 n->qpd = qpd; 677 678 pdd = qpd_to_pdd(qpd); 679 /* Retrieve PD base */ 680 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 681 682 mutex_lock(&dqm->lock); 683 list_add(&n->list, &dqm->queues); 684 685 /* Update PD Base in QPD */ 686 qpd->page_table_base = pd_base; 687 688 retval = dqm->asic_ops.update_qpd(dqm, qpd); 689 690 dqm->processes_count++; 691 692 mutex_unlock(&dqm->lock); 693 694 return retval; 695 } 696 697 static int unregister_process(struct device_queue_manager *dqm, 698 struct qcm_process_device *qpd) 699 { 700 int retval; 701 struct device_process_node *cur, *next; 702 703 pr_debug("qpd->queues_list is %s\n", 704 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 705 706 retval = 0; 707 mutex_lock(&dqm->lock); 708 709 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 710 if (qpd == cur->qpd) { 711 list_del(&cur->list); 712 kfree(cur); 713 dqm->processes_count--; 714 goto out; 715 } 716 } 717 /* qpd not found in dqm list */ 718 retval = 1; 719 out: 720 mutex_unlock(&dqm->lock); 721 return retval; 722 } 723 724 static int 725 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 726 unsigned int vmid) 727 { 728 uint32_t pasid_mapping; 729 730 pasid_mapping = (pasid == 0) ? 0 : 731 (uint32_t)pasid | 732 ATC_VMID_PASID_MAPPING_VALID; 733 734 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 735 dqm->dev->kgd, pasid_mapping, 736 vmid); 737 } 738 739 static void init_interrupts(struct device_queue_manager *dqm) 740 { 741 unsigned int i; 742 743 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 744 if (is_pipe_enabled(dqm, 0, i)) 745 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 746 } 747 748 static int initialize_nocpsch(struct device_queue_manager *dqm) 749 { 750 int pipe, queue; 751 752 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 753 754 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 755 sizeof(unsigned int), GFP_KERNEL); 756 if (!dqm->allocated_queues) 757 return -ENOMEM; 758 759 mutex_init(&dqm->lock); 760 INIT_LIST_HEAD(&dqm->queues); 761 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 762 dqm->sdma_queue_count = 0; 763 764 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 765 int pipe_offset = pipe * get_queues_per_pipe(dqm); 766 767 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 768 if (test_bit(pipe_offset + queue, 769 dqm->dev->shared_resources.queue_bitmap)) 770 dqm->allocated_queues[pipe] |= 1 << queue; 771 } 772 773 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 774 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 775 776 return 0; 777 } 778 779 static void uninitialize(struct device_queue_manager *dqm) 780 { 781 int i; 782 783 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 784 785 kfree(dqm->allocated_queues); 786 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 787 kfree(dqm->mqds[i]); 788 mutex_destroy(&dqm->lock); 789 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 790 } 791 792 static int start_nocpsch(struct device_queue_manager *dqm) 793 { 794 init_interrupts(dqm); 795 return 0; 796 } 797 798 static int stop_nocpsch(struct device_queue_manager *dqm) 799 { 800 return 0; 801 } 802 803 static int allocate_sdma_queue(struct device_queue_manager *dqm, 804 unsigned int *sdma_queue_id) 805 { 806 int bit; 807 808 if (dqm->sdma_bitmap == 0) 809 return -ENOMEM; 810 811 bit = ffs(dqm->sdma_bitmap) - 1; 812 dqm->sdma_bitmap &= ~(1 << bit); 813 *sdma_queue_id = bit; 814 815 return 0; 816 } 817 818 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 819 unsigned int sdma_queue_id) 820 { 821 if (sdma_queue_id >= CIK_SDMA_QUEUES) 822 return; 823 dqm->sdma_bitmap |= (1 << sdma_queue_id); 824 } 825 826 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 827 struct queue *q, 828 struct qcm_process_device *qpd) 829 { 830 struct mqd_manager *mqd; 831 int retval; 832 833 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 834 if (!mqd) 835 return -ENOMEM; 836 837 retval = allocate_sdma_queue(dqm, &q->sdma_id); 838 if (retval) 839 return retval; 840 841 q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; 842 q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 843 844 pr_debug("SDMA id is: %d\n", q->sdma_id); 845 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 846 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 847 848 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 849 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 850 &q->gart_mqd_addr, &q->properties); 851 if (retval) 852 goto out_deallocate_sdma_queue; 853 854 retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); 855 if (retval) 856 goto out_uninit_mqd; 857 858 return 0; 859 860 out_uninit_mqd: 861 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 862 out_deallocate_sdma_queue: 863 deallocate_sdma_queue(dqm, q->sdma_id); 864 865 return retval; 866 } 867 868 /* 869 * Device Queue Manager implementation for cp scheduler 870 */ 871 872 static int set_sched_resources(struct device_queue_manager *dqm) 873 { 874 int i, mec; 875 struct scheduling_resources res; 876 877 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 878 879 res.queue_mask = 0; 880 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 881 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 882 / dqm->dev->shared_resources.num_pipe_per_mec; 883 884 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 885 continue; 886 887 /* only acquire queues from the first MEC */ 888 if (mec > 0) 889 continue; 890 891 /* This situation may be hit in the future if a new HW 892 * generation exposes more than 64 queues. If so, the 893 * definition of res.queue_mask needs updating 894 */ 895 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 896 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 897 break; 898 } 899 900 res.queue_mask |= (1ull << i); 901 } 902 res.gws_mask = res.oac_mask = res.gds_heap_base = 903 res.gds_heap_size = 0; 904 905 pr_debug("Scheduling resources:\n" 906 "vmid mask: 0x%8X\n" 907 "queue mask: 0x%8llX\n", 908 res.vmid_mask, res.queue_mask); 909 910 return pm_send_set_resources(&dqm->packets, &res); 911 } 912 913 static int initialize_cpsch(struct device_queue_manager *dqm) 914 { 915 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 916 917 mutex_init(&dqm->lock); 918 INIT_LIST_HEAD(&dqm->queues); 919 dqm->queue_count = dqm->processes_count = 0; 920 dqm->sdma_queue_count = 0; 921 dqm->active_runlist = false; 922 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 923 924 return 0; 925 } 926 927 static int start_cpsch(struct device_queue_manager *dqm) 928 { 929 int retval; 930 931 retval = 0; 932 933 retval = pm_init(&dqm->packets, dqm); 934 if (retval) 935 goto fail_packet_manager_init; 936 937 retval = set_sched_resources(dqm); 938 if (retval) 939 goto fail_set_sched_resources; 940 941 pr_debug("Allocating fence memory\n"); 942 943 /* allocate fence memory on the gart */ 944 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 945 &dqm->fence_mem); 946 947 if (retval) 948 goto fail_allocate_vidmem; 949 950 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 951 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 952 953 init_interrupts(dqm); 954 955 mutex_lock(&dqm->lock); 956 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 957 mutex_unlock(&dqm->lock); 958 959 return 0; 960 fail_allocate_vidmem: 961 fail_set_sched_resources: 962 pm_uninit(&dqm->packets); 963 fail_packet_manager_init: 964 return retval; 965 } 966 967 static int stop_cpsch(struct device_queue_manager *dqm) 968 { 969 mutex_lock(&dqm->lock); 970 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 971 mutex_unlock(&dqm->lock); 972 973 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 974 pm_uninit(&dqm->packets); 975 976 return 0; 977 } 978 979 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 980 struct kernel_queue *kq, 981 struct qcm_process_device *qpd) 982 { 983 mutex_lock(&dqm->lock); 984 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 985 pr_warn("Can't create new kernel queue because %d queues were already created\n", 986 dqm->total_queue_count); 987 mutex_unlock(&dqm->lock); 988 return -EPERM; 989 } 990 991 /* 992 * Unconditionally increment this counter, regardless of the queue's 993 * type or whether the queue is active. 994 */ 995 dqm->total_queue_count++; 996 pr_debug("Total of %d queues are accountable so far\n", 997 dqm->total_queue_count); 998 999 list_add(&kq->list, &qpd->priv_queue_list); 1000 dqm->queue_count++; 1001 qpd->is_debug = true; 1002 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1003 mutex_unlock(&dqm->lock); 1004 1005 return 0; 1006 } 1007 1008 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1009 struct kernel_queue *kq, 1010 struct qcm_process_device *qpd) 1011 { 1012 mutex_lock(&dqm->lock); 1013 list_del(&kq->list); 1014 dqm->queue_count--; 1015 qpd->is_debug = false; 1016 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1017 /* 1018 * Unconditionally decrement this counter, regardless of the queue's 1019 * type. 1020 */ 1021 dqm->total_queue_count--; 1022 pr_debug("Total of %d queues are accountable so far\n", 1023 dqm->total_queue_count); 1024 mutex_unlock(&dqm->lock); 1025 } 1026 1027 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1028 struct qcm_process_device *qpd) 1029 { 1030 int retval; 1031 struct mqd_manager *mqd; 1032 1033 retval = 0; 1034 1035 mutex_lock(&dqm->lock); 1036 1037 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1038 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1039 dqm->total_queue_count); 1040 retval = -EPERM; 1041 goto out; 1042 } 1043 1044 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1045 retval = allocate_sdma_queue(dqm, &q->sdma_id); 1046 if (retval) 1047 goto out; 1048 q->properties.sdma_queue_id = 1049 q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; 1050 q->properties.sdma_engine_id = 1051 q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 1052 } 1053 mqd = dqm->ops.get_mqd_manager(dqm, 1054 get_mqd_type_from_queue_type(q->properties.type)); 1055 1056 if (!mqd) { 1057 retval = -ENOMEM; 1058 goto out; 1059 } 1060 /* 1061 * Eviction state logic: we only mark active queues as evicted 1062 * to avoid the overhead of restoring inactive queues later 1063 */ 1064 if (qpd->evicted) 1065 q->properties.is_evicted = (q->properties.queue_size > 0 && 1066 q->properties.queue_percent > 0 && 1067 q->properties.queue_address != 0); 1068 1069 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1070 1071 q->properties.tba_addr = qpd->tba_addr; 1072 q->properties.tma_addr = qpd->tma_addr; 1073 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 1074 &q->gart_mqd_addr, &q->properties); 1075 if (retval) 1076 goto out; 1077 1078 list_add(&q->list, &qpd->queues_list); 1079 qpd->queue_count++; 1080 if (q->properties.is_active) { 1081 dqm->queue_count++; 1082 retval = execute_queues_cpsch(dqm, 1083 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1084 } 1085 1086 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1087 dqm->sdma_queue_count++; 1088 /* 1089 * Unconditionally increment this counter, regardless of the queue's 1090 * type or whether the queue is active. 1091 */ 1092 dqm->total_queue_count++; 1093 1094 pr_debug("Total of %d queues are accountable so far\n", 1095 dqm->total_queue_count); 1096 1097 out: 1098 mutex_unlock(&dqm->lock); 1099 return retval; 1100 } 1101 1102 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 1103 unsigned int fence_value, 1104 unsigned int timeout_ms) 1105 { 1106 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1107 1108 while (*fence_addr != fence_value) { 1109 if (time_after(jiffies, end_jiffies)) { 1110 pr_err("qcm fence wait loop timeout expired\n"); 1111 return -ETIME; 1112 } 1113 schedule(); 1114 } 1115 1116 return 0; 1117 } 1118 1119 static int unmap_sdma_queues(struct device_queue_manager *dqm, 1120 unsigned int sdma_engine) 1121 { 1122 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1123 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 1124 sdma_engine); 1125 } 1126 1127 /* dqm->lock mutex has to be locked before calling this function */ 1128 static int map_queues_cpsch(struct device_queue_manager *dqm) 1129 { 1130 int retval; 1131 1132 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 1133 return 0; 1134 1135 if (dqm->active_runlist) 1136 return 0; 1137 1138 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1139 if (retval) { 1140 pr_err("failed to execute runlist\n"); 1141 return retval; 1142 } 1143 dqm->active_runlist = true; 1144 1145 return retval; 1146 } 1147 1148 /* dqm->lock mutex has to be locked before calling this function */ 1149 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1150 enum kfd_unmap_queues_filter filter, 1151 uint32_t filter_param) 1152 { 1153 int retval = 0; 1154 1155 if (!dqm->active_runlist) 1156 return retval; 1157 1158 pr_debug("Before destroying queues, sdma queue count is : %u\n", 1159 dqm->sdma_queue_count); 1160 1161 if (dqm->sdma_queue_count > 0) { 1162 unmap_sdma_queues(dqm, 0); 1163 unmap_sdma_queues(dqm, 1); 1164 } 1165 1166 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1167 filter, filter_param, false, 0); 1168 if (retval) 1169 return retval; 1170 1171 *dqm->fence_addr = KFD_FENCE_INIT; 1172 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 1173 KFD_FENCE_COMPLETED); 1174 /* should be timed out */ 1175 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1176 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 1177 if (retval) 1178 return retval; 1179 1180 pm_release_ib(&dqm->packets); 1181 dqm->active_runlist = false; 1182 1183 return retval; 1184 } 1185 1186 /* dqm->lock mutex has to be locked before calling this function */ 1187 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1188 enum kfd_unmap_queues_filter filter, 1189 uint32_t filter_param) 1190 { 1191 int retval; 1192 1193 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1194 if (retval) { 1195 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1196 return retval; 1197 } 1198 1199 return map_queues_cpsch(dqm); 1200 } 1201 1202 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1203 struct qcm_process_device *qpd, 1204 struct queue *q) 1205 { 1206 int retval; 1207 struct mqd_manager *mqd; 1208 bool preempt_all_queues; 1209 1210 preempt_all_queues = false; 1211 1212 retval = 0; 1213 1214 /* remove queue from list to prevent rescheduling after preemption */ 1215 mutex_lock(&dqm->lock); 1216 1217 if (qpd->is_debug) { 1218 /* 1219 * error, currently we do not allow to destroy a queue 1220 * of a currently debugged process 1221 */ 1222 retval = -EBUSY; 1223 goto failed_try_destroy_debugged_queue; 1224 1225 } 1226 1227 mqd = dqm->ops.get_mqd_manager(dqm, 1228 get_mqd_type_from_queue_type(q->properties.type)); 1229 if (!mqd) { 1230 retval = -ENOMEM; 1231 goto failed; 1232 } 1233 1234 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1235 dqm->sdma_queue_count--; 1236 deallocate_sdma_queue(dqm, q->sdma_id); 1237 } 1238 1239 list_del(&q->list); 1240 qpd->queue_count--; 1241 if (q->properties.is_active) { 1242 dqm->queue_count--; 1243 retval = execute_queues_cpsch(dqm, 1244 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1245 if (retval == -ETIME) 1246 qpd->reset_wavefronts = true; 1247 } 1248 1249 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1250 1251 /* 1252 * Unconditionally decrement this counter, regardless of the queue's 1253 * type 1254 */ 1255 dqm->total_queue_count--; 1256 pr_debug("Total of %d queues are accountable so far\n", 1257 dqm->total_queue_count); 1258 1259 mutex_unlock(&dqm->lock); 1260 1261 return retval; 1262 1263 failed: 1264 failed_try_destroy_debugged_queue: 1265 1266 mutex_unlock(&dqm->lock); 1267 return retval; 1268 } 1269 1270 /* 1271 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1272 * stay in user mode. 1273 */ 1274 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1275 /* APE1 limit is inclusive and 64K aligned. */ 1276 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1277 1278 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1279 struct qcm_process_device *qpd, 1280 enum cache_policy default_policy, 1281 enum cache_policy alternate_policy, 1282 void __user *alternate_aperture_base, 1283 uint64_t alternate_aperture_size) 1284 { 1285 bool retval; 1286 1287 mutex_lock(&dqm->lock); 1288 1289 if (alternate_aperture_size == 0) { 1290 /* base > limit disables APE1 */ 1291 qpd->sh_mem_ape1_base = 1; 1292 qpd->sh_mem_ape1_limit = 0; 1293 } else { 1294 /* 1295 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1296 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1297 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1298 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1299 * Verify that the base and size parameters can be 1300 * represented in this format and convert them. 1301 * Additionally restrict APE1 to user-mode addresses. 1302 */ 1303 1304 uint64_t base = (uintptr_t)alternate_aperture_base; 1305 uint64_t limit = base + alternate_aperture_size - 1; 1306 1307 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1308 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1309 retval = false; 1310 goto out; 1311 } 1312 1313 qpd->sh_mem_ape1_base = base >> 16; 1314 qpd->sh_mem_ape1_limit = limit >> 16; 1315 } 1316 1317 retval = dqm->asic_ops.set_cache_memory_policy( 1318 dqm, 1319 qpd, 1320 default_policy, 1321 alternate_policy, 1322 alternate_aperture_base, 1323 alternate_aperture_size); 1324 1325 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1326 program_sh_mem_settings(dqm, qpd); 1327 1328 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1329 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1330 qpd->sh_mem_ape1_limit); 1331 1332 out: 1333 mutex_unlock(&dqm->lock); 1334 return retval; 1335 } 1336 1337 static int set_trap_handler(struct device_queue_manager *dqm, 1338 struct qcm_process_device *qpd, 1339 uint64_t tba_addr, 1340 uint64_t tma_addr) 1341 { 1342 uint64_t *tma; 1343 1344 if (dqm->dev->cwsr_enabled) { 1345 /* Jump from CWSR trap handler to user trap */ 1346 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); 1347 tma[0] = tba_addr; 1348 tma[1] = tma_addr; 1349 } else { 1350 qpd->tba_addr = tba_addr; 1351 qpd->tma_addr = tma_addr; 1352 } 1353 1354 return 0; 1355 } 1356 1357 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1358 struct qcm_process_device *qpd) 1359 { 1360 struct queue *q, *next; 1361 struct device_process_node *cur, *next_dpn; 1362 int retval = 0; 1363 1364 mutex_lock(&dqm->lock); 1365 1366 /* Clear all user mode queues */ 1367 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1368 int ret; 1369 1370 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1371 if (ret) 1372 retval = ret; 1373 } 1374 1375 /* Unregister process */ 1376 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1377 if (qpd == cur->qpd) { 1378 list_del(&cur->list); 1379 kfree(cur); 1380 dqm->processes_count--; 1381 break; 1382 } 1383 } 1384 1385 mutex_unlock(&dqm->lock); 1386 return retval; 1387 } 1388 1389 1390 static int process_termination_cpsch(struct device_queue_manager *dqm, 1391 struct qcm_process_device *qpd) 1392 { 1393 int retval; 1394 struct queue *q, *next; 1395 struct kernel_queue *kq, *kq_next; 1396 struct mqd_manager *mqd; 1397 struct device_process_node *cur, *next_dpn; 1398 enum kfd_unmap_queues_filter filter = 1399 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1400 1401 retval = 0; 1402 1403 mutex_lock(&dqm->lock); 1404 1405 /* Clean all kernel queues */ 1406 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1407 list_del(&kq->list); 1408 dqm->queue_count--; 1409 qpd->is_debug = false; 1410 dqm->total_queue_count--; 1411 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1412 } 1413 1414 /* Clear all user mode queues */ 1415 list_for_each_entry(q, &qpd->queues_list, list) { 1416 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1417 dqm->sdma_queue_count--; 1418 1419 if (q->properties.is_active) 1420 dqm->queue_count--; 1421 1422 dqm->total_queue_count--; 1423 } 1424 1425 /* Unregister process */ 1426 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1427 if (qpd == cur->qpd) { 1428 list_del(&cur->list); 1429 kfree(cur); 1430 dqm->processes_count--; 1431 break; 1432 } 1433 } 1434 1435 retval = execute_queues_cpsch(dqm, filter, 0); 1436 if (retval || qpd->reset_wavefronts) { 1437 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1438 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1439 qpd->reset_wavefronts = false; 1440 } 1441 1442 /* lastly, free mqd resources */ 1443 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1444 mqd = dqm->ops.get_mqd_manager(dqm, 1445 get_mqd_type_from_queue_type(q->properties.type)); 1446 if (!mqd) { 1447 retval = -ENOMEM; 1448 goto out; 1449 } 1450 list_del(&q->list); 1451 qpd->queue_count--; 1452 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1453 } 1454 1455 out: 1456 mutex_unlock(&dqm->lock); 1457 return retval; 1458 } 1459 1460 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1461 { 1462 struct device_queue_manager *dqm; 1463 1464 pr_debug("Loading device queue manager\n"); 1465 1466 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1467 if (!dqm) 1468 return NULL; 1469 1470 switch (dev->device_info->asic_family) { 1471 /* HWS is not available on Hawaii. */ 1472 case CHIP_HAWAII: 1473 /* HWS depends on CWSR for timely dequeue. CWSR is not 1474 * available on Tonga. 1475 * 1476 * FIXME: This argument also applies to Kaveri. 1477 */ 1478 case CHIP_TONGA: 1479 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1480 break; 1481 default: 1482 dqm->sched_policy = sched_policy; 1483 break; 1484 } 1485 1486 dqm->dev = dev; 1487 switch (dqm->sched_policy) { 1488 case KFD_SCHED_POLICY_HWS: 1489 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1490 /* initialize dqm for cp scheduling */ 1491 dqm->ops.create_queue = create_queue_cpsch; 1492 dqm->ops.initialize = initialize_cpsch; 1493 dqm->ops.start = start_cpsch; 1494 dqm->ops.stop = stop_cpsch; 1495 dqm->ops.destroy_queue = destroy_queue_cpsch; 1496 dqm->ops.update_queue = update_queue; 1497 dqm->ops.get_mqd_manager = get_mqd_manager; 1498 dqm->ops.register_process = register_process; 1499 dqm->ops.unregister_process = unregister_process; 1500 dqm->ops.uninitialize = uninitialize; 1501 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1502 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1503 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1504 dqm->ops.set_trap_handler = set_trap_handler; 1505 dqm->ops.process_termination = process_termination_cpsch; 1506 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1507 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1508 break; 1509 case KFD_SCHED_POLICY_NO_HWS: 1510 /* initialize dqm for no cp scheduling */ 1511 dqm->ops.start = start_nocpsch; 1512 dqm->ops.stop = stop_nocpsch; 1513 dqm->ops.create_queue = create_queue_nocpsch; 1514 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1515 dqm->ops.update_queue = update_queue; 1516 dqm->ops.get_mqd_manager = get_mqd_manager; 1517 dqm->ops.register_process = register_process; 1518 dqm->ops.unregister_process = unregister_process; 1519 dqm->ops.initialize = initialize_nocpsch; 1520 dqm->ops.uninitialize = uninitialize; 1521 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1522 dqm->ops.set_trap_handler = set_trap_handler; 1523 dqm->ops.process_termination = process_termination_nocpsch; 1524 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1525 dqm->ops.restore_process_queues = 1526 restore_process_queues_nocpsch; 1527 break; 1528 default: 1529 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1530 goto out_free; 1531 } 1532 1533 switch (dev->device_info->asic_family) { 1534 case CHIP_CARRIZO: 1535 device_queue_manager_init_vi(&dqm->asic_ops); 1536 break; 1537 1538 case CHIP_KAVERI: 1539 device_queue_manager_init_cik(&dqm->asic_ops); 1540 break; 1541 1542 case CHIP_HAWAII: 1543 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1544 break; 1545 1546 case CHIP_TONGA: 1547 case CHIP_FIJI: 1548 case CHIP_POLARIS10: 1549 case CHIP_POLARIS11: 1550 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1551 break; 1552 default: 1553 WARN(1, "Unexpected ASIC family %u", 1554 dev->device_info->asic_family); 1555 goto out_free; 1556 } 1557 1558 if (!dqm->ops.initialize(dqm)) 1559 return dqm; 1560 1561 out_free: 1562 kfree(dqm); 1563 return NULL; 1564 } 1565 1566 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1567 { 1568 dqm->ops.uninitialize(dqm); 1569 kfree(dqm); 1570 } 1571 1572 #if defined(CONFIG_DEBUG_FS) 1573 1574 static void seq_reg_dump(struct seq_file *m, 1575 uint32_t (*dump)[2], uint32_t n_regs) 1576 { 1577 uint32_t i, count; 1578 1579 for (i = 0, count = 0; i < n_regs; i++) { 1580 if (count == 0 || 1581 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 1582 seq_printf(m, "%s %08x: %08x", 1583 i ? "\n" : "", 1584 dump[i][0], dump[i][1]); 1585 count = 7; 1586 } else { 1587 seq_printf(m, " %08x", dump[i][1]); 1588 count--; 1589 } 1590 } 1591 1592 seq_puts(m, "\n"); 1593 } 1594 1595 int dqm_debugfs_hqds(struct seq_file *m, void *data) 1596 { 1597 struct device_queue_manager *dqm = data; 1598 uint32_t (*dump)[2], n_regs; 1599 int pipe, queue; 1600 int r = 0; 1601 1602 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1603 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1604 1605 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 1606 if (!test_bit(pipe_offset + queue, 1607 dqm->dev->shared_resources.queue_bitmap)) 1608 continue; 1609 1610 r = dqm->dev->kfd2kgd->hqd_dump( 1611 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1612 if (r) 1613 break; 1614 1615 seq_printf(m, " CP Pipe %d, Queue %d\n", 1616 pipe, queue); 1617 seq_reg_dump(m, dump, n_regs); 1618 1619 kfree(dump); 1620 } 1621 } 1622 1623 for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) { 1624 for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) { 1625 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 1626 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1627 if (r) 1628 break; 1629 1630 seq_printf(m, " SDMA Engine %d, RLC %d\n", 1631 pipe, queue); 1632 seq_reg_dump(m, dump, n_regs); 1633 1634 kfree(dump); 1635 } 1636 } 1637 1638 return r; 1639 } 1640 1641 #endif 1642