1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/slab.h> 25 #include <linux/list.h> 26 #include <linux/types.h> 27 #include <linux/printk.h> 28 #include <linux/bitops.h> 29 #include <linux/sched.h> 30 #include "kfd_priv.h" 31 #include "kfd_device_queue_manager.h" 32 #include "kfd_mqd_manager.h" 33 #include "cik_regs.h" 34 #include "kfd_kernel_queue.h" 35 36 /* Size of the per-pipe EOP queue */ 37 #define CIK_HPD_EOP_BYTES_LOG2 11 38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 39 40 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 41 unsigned int pasid, unsigned int vmid); 42 43 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 44 struct queue *q, 45 struct qcm_process_device *qpd); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); 48 static int destroy_queues_cpsch(struct device_queue_manager *dqm, 49 bool preempt_static_queues, bool lock); 50 51 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 52 struct queue *q, 53 struct qcm_process_device *qpd); 54 55 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 56 unsigned int sdma_queue_id); 57 58 static inline 59 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 60 { 61 if (type == KFD_QUEUE_TYPE_SDMA) 62 return KFD_MQD_TYPE_SDMA; 63 return KFD_MQD_TYPE_CP; 64 } 65 66 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 67 { 68 int i; 69 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 70 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 71 72 /* queue is available for KFD usage if bit is 1 */ 73 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 74 if (test_bit(pipe_offset + i, 75 dqm->dev->shared_resources.queue_bitmap)) 76 return true; 77 return false; 78 } 79 80 unsigned int get_queues_num(struct device_queue_manager *dqm) 81 { 82 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 83 KGD_MAX_QUEUES); 84 } 85 86 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 87 { 88 return dqm->dev->shared_resources.num_queue_per_pipe; 89 } 90 91 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 92 { 93 return dqm->dev->shared_resources.num_pipe_per_mec; 94 } 95 96 void program_sh_mem_settings(struct device_queue_manager *dqm, 97 struct qcm_process_device *qpd) 98 { 99 return dqm->dev->kfd2kgd->program_sh_mem_settings( 100 dqm->dev->kgd, qpd->vmid, 101 qpd->sh_mem_config, 102 qpd->sh_mem_ape1_base, 103 qpd->sh_mem_ape1_limit, 104 qpd->sh_mem_bases); 105 } 106 107 static int allocate_vmid(struct device_queue_manager *dqm, 108 struct qcm_process_device *qpd, 109 struct queue *q) 110 { 111 int bit, allocated_vmid; 112 113 if (dqm->vmid_bitmap == 0) 114 return -ENOMEM; 115 116 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); 117 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 118 119 /* Kaveri kfd vmid's starts from vmid 8 */ 120 allocated_vmid = bit + KFD_VMID_START_OFFSET; 121 pr_debug("vmid allocation %d\n", allocated_vmid); 122 qpd->vmid = allocated_vmid; 123 q->properties.vmid = allocated_vmid; 124 125 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 126 program_sh_mem_settings(dqm, qpd); 127 128 return 0; 129 } 130 131 static void deallocate_vmid(struct device_queue_manager *dqm, 132 struct qcm_process_device *qpd, 133 struct queue *q) 134 { 135 int bit = qpd->vmid - KFD_VMID_START_OFFSET; 136 137 /* Release the vmid mapping */ 138 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 139 140 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 141 qpd->vmid = 0; 142 q->properties.vmid = 0; 143 } 144 145 static int create_queue_nocpsch(struct device_queue_manager *dqm, 146 struct queue *q, 147 struct qcm_process_device *qpd, 148 int *allocated_vmid) 149 { 150 int retval; 151 152 print_queue(q); 153 154 mutex_lock(&dqm->lock); 155 156 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 157 pr_warn("Can't create new usermode queue because %d queues were already created\n", 158 dqm->total_queue_count); 159 retval = -EPERM; 160 goto out_unlock; 161 } 162 163 if (list_empty(&qpd->queues_list)) { 164 retval = allocate_vmid(dqm, qpd, q); 165 if (retval) 166 goto out_unlock; 167 } 168 *allocated_vmid = qpd->vmid; 169 q->properties.vmid = qpd->vmid; 170 171 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 172 retval = create_compute_queue_nocpsch(dqm, q, qpd); 173 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 174 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 175 else 176 retval = -EINVAL; 177 178 if (retval) { 179 if (list_empty(&qpd->queues_list)) { 180 deallocate_vmid(dqm, qpd, q); 181 *allocated_vmid = 0; 182 } 183 goto out_unlock; 184 } 185 186 list_add(&q->list, &qpd->queues_list); 187 if (q->properties.is_active) 188 dqm->queue_count++; 189 190 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 191 dqm->sdma_queue_count++; 192 193 /* 194 * Unconditionally increment this counter, regardless of the queue's 195 * type or whether the queue is active. 196 */ 197 dqm->total_queue_count++; 198 pr_debug("Total of %d queues are accountable so far\n", 199 dqm->total_queue_count); 200 201 out_unlock: 202 mutex_unlock(&dqm->lock); 203 return retval; 204 } 205 206 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 207 { 208 bool set; 209 int pipe, bit, i; 210 211 set = false; 212 213 for (pipe = dqm->next_pipe_to_allocate, i = 0; 214 i < get_pipes_per_mec(dqm); 215 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 216 217 if (!is_pipe_enabled(dqm, 0, pipe)) 218 continue; 219 220 if (dqm->allocated_queues[pipe] != 0) { 221 bit = find_first_bit( 222 (unsigned long *)&dqm->allocated_queues[pipe], 223 get_queues_per_pipe(dqm)); 224 225 clear_bit(bit, 226 (unsigned long *)&dqm->allocated_queues[pipe]); 227 q->pipe = pipe; 228 q->queue = bit; 229 set = true; 230 break; 231 } 232 } 233 234 if (!set) 235 return -EBUSY; 236 237 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 238 /* horizontal hqd allocation */ 239 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 240 241 return 0; 242 } 243 244 static inline void deallocate_hqd(struct device_queue_manager *dqm, 245 struct queue *q) 246 { 247 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); 248 } 249 250 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 251 struct queue *q, 252 struct qcm_process_device *qpd) 253 { 254 int retval; 255 struct mqd_manager *mqd; 256 257 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 258 if (!mqd) 259 return -ENOMEM; 260 261 retval = allocate_hqd(dqm, q); 262 if (retval) 263 return retval; 264 265 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 266 &q->gart_mqd_addr, &q->properties); 267 if (retval) 268 goto out_deallocate_hqd; 269 270 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 271 q->pipe, q->queue); 272 273 dqm->dev->kfd2kgd->set_scratch_backing_va( 274 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 275 276 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, 277 q->process->mm); 278 if (retval) 279 goto out_uninit_mqd; 280 281 return 0; 282 283 out_uninit_mqd: 284 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 285 out_deallocate_hqd: 286 deallocate_hqd(dqm, q); 287 288 return retval; 289 } 290 291 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 292 struct qcm_process_device *qpd, 293 struct queue *q) 294 { 295 int retval; 296 struct mqd_manager *mqd; 297 298 retval = 0; 299 300 mutex_lock(&dqm->lock); 301 302 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 303 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 304 if (mqd == NULL) { 305 retval = -ENOMEM; 306 goto out; 307 } 308 deallocate_hqd(dqm, q); 309 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 310 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 311 if (mqd == NULL) { 312 retval = -ENOMEM; 313 goto out; 314 } 315 dqm->sdma_queue_count--; 316 deallocate_sdma_queue(dqm, q->sdma_id); 317 } else { 318 pr_debug("q->properties.type %d is invalid\n", 319 q->properties.type); 320 retval = -EINVAL; 321 goto out; 322 } 323 324 retval = mqd->destroy_mqd(mqd, q->mqd, 325 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 326 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 327 q->pipe, q->queue); 328 329 if (retval) 330 goto out; 331 332 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 333 334 list_del(&q->list); 335 if (list_empty(&qpd->queues_list)) 336 deallocate_vmid(dqm, qpd, q); 337 if (q->properties.is_active) 338 dqm->queue_count--; 339 340 /* 341 * Unconditionally decrement this counter, regardless of the queue's 342 * type 343 */ 344 dqm->total_queue_count--; 345 pr_debug("Total of %d queues are accountable so far\n", 346 dqm->total_queue_count); 347 348 out: 349 mutex_unlock(&dqm->lock); 350 return retval; 351 } 352 353 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 354 { 355 int retval; 356 struct mqd_manager *mqd; 357 bool prev_active = false; 358 359 mutex_lock(&dqm->lock); 360 mqd = dqm->ops.get_mqd_manager(dqm, 361 get_mqd_type_from_queue_type(q->properties.type)); 362 if (!mqd) { 363 retval = -ENOMEM; 364 goto out_unlock; 365 } 366 367 if (q->properties.is_active) 368 prev_active = true; 369 370 /* 371 * 372 * check active state vs. the previous state 373 * and modify counter accordingly 374 */ 375 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 376 if ((q->properties.is_active) && (!prev_active)) 377 dqm->queue_count++; 378 else if (!q->properties.is_active && prev_active) 379 dqm->queue_count--; 380 381 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) 382 retval = execute_queues_cpsch(dqm, false); 383 384 out_unlock: 385 mutex_unlock(&dqm->lock); 386 return retval; 387 } 388 389 static struct mqd_manager *get_mqd_manager_nocpsch( 390 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 391 { 392 struct mqd_manager *mqd; 393 394 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 395 return NULL; 396 397 pr_debug("mqd type %d\n", type); 398 399 mqd = dqm->mqds[type]; 400 if (!mqd) { 401 mqd = mqd_manager_init(type, dqm->dev); 402 if (!mqd) 403 pr_err("mqd manager is NULL"); 404 dqm->mqds[type] = mqd; 405 } 406 407 return mqd; 408 } 409 410 static int register_process_nocpsch(struct device_queue_manager *dqm, 411 struct qcm_process_device *qpd) 412 { 413 struct device_process_node *n; 414 int retval; 415 416 n = kzalloc(sizeof(*n), GFP_KERNEL); 417 if (!n) 418 return -ENOMEM; 419 420 n->qpd = qpd; 421 422 mutex_lock(&dqm->lock); 423 list_add(&n->list, &dqm->queues); 424 425 retval = dqm->ops_asic_specific.register_process(dqm, qpd); 426 427 dqm->processes_count++; 428 429 mutex_unlock(&dqm->lock); 430 431 return retval; 432 } 433 434 static int unregister_process_nocpsch(struct device_queue_manager *dqm, 435 struct qcm_process_device *qpd) 436 { 437 int retval; 438 struct device_process_node *cur, *next; 439 440 pr_debug("qpd->queues_list is %s\n", 441 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 442 443 retval = 0; 444 mutex_lock(&dqm->lock); 445 446 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 447 if (qpd == cur->qpd) { 448 list_del(&cur->list); 449 kfree(cur); 450 dqm->processes_count--; 451 goto out; 452 } 453 } 454 /* qpd not found in dqm list */ 455 retval = 1; 456 out: 457 mutex_unlock(&dqm->lock); 458 return retval; 459 } 460 461 static int 462 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 463 unsigned int vmid) 464 { 465 uint32_t pasid_mapping; 466 467 pasid_mapping = (pasid == 0) ? 0 : 468 (uint32_t)pasid | 469 ATC_VMID_PASID_MAPPING_VALID; 470 471 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 472 dqm->dev->kgd, pasid_mapping, 473 vmid); 474 } 475 476 static void init_interrupts(struct device_queue_manager *dqm) 477 { 478 unsigned int i; 479 480 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 481 if (is_pipe_enabled(dqm, 0, i)) 482 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 483 } 484 485 static int initialize_nocpsch(struct device_queue_manager *dqm) 486 { 487 int pipe, queue; 488 489 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 490 491 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 492 sizeof(unsigned int), GFP_KERNEL); 493 if (!dqm->allocated_queues) 494 return -ENOMEM; 495 496 mutex_init(&dqm->lock); 497 INIT_LIST_HEAD(&dqm->queues); 498 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 499 dqm->sdma_queue_count = 0; 500 501 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 502 int pipe_offset = pipe * get_queues_per_pipe(dqm); 503 504 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 505 if (test_bit(pipe_offset + queue, 506 dqm->dev->shared_resources.queue_bitmap)) 507 dqm->allocated_queues[pipe] |= 1 << queue; 508 } 509 510 dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; 511 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 512 513 return 0; 514 } 515 516 static void uninitialize_nocpsch(struct device_queue_manager *dqm) 517 { 518 int i; 519 520 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 521 522 kfree(dqm->allocated_queues); 523 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 524 kfree(dqm->mqds[i]); 525 mutex_destroy(&dqm->lock); 526 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 527 } 528 529 static int start_nocpsch(struct device_queue_manager *dqm) 530 { 531 init_interrupts(dqm); 532 return 0; 533 } 534 535 static int stop_nocpsch(struct device_queue_manager *dqm) 536 { 537 return 0; 538 } 539 540 static int allocate_sdma_queue(struct device_queue_manager *dqm, 541 unsigned int *sdma_queue_id) 542 { 543 int bit; 544 545 if (dqm->sdma_bitmap == 0) 546 return -ENOMEM; 547 548 bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, 549 CIK_SDMA_QUEUES); 550 551 clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); 552 *sdma_queue_id = bit; 553 554 return 0; 555 } 556 557 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 558 unsigned int sdma_queue_id) 559 { 560 if (sdma_queue_id >= CIK_SDMA_QUEUES) 561 return; 562 set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); 563 } 564 565 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 566 struct queue *q, 567 struct qcm_process_device *qpd) 568 { 569 struct mqd_manager *mqd; 570 int retval; 571 572 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 573 if (!mqd) 574 return -ENOMEM; 575 576 retval = allocate_sdma_queue(dqm, &q->sdma_id); 577 if (retval) 578 return retval; 579 580 q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 581 q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; 582 583 pr_debug("SDMA id is: %d\n", q->sdma_id); 584 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 585 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 586 587 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 588 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 589 &q->gart_mqd_addr, &q->properties); 590 if (retval) 591 goto out_deallocate_sdma_queue; 592 593 retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); 594 if (retval) 595 goto out_uninit_mqd; 596 597 return 0; 598 599 out_uninit_mqd: 600 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 601 out_deallocate_sdma_queue: 602 deallocate_sdma_queue(dqm, q->sdma_id); 603 604 return retval; 605 } 606 607 /* 608 * Device Queue Manager implementation for cp scheduler 609 */ 610 611 static int set_sched_resources(struct device_queue_manager *dqm) 612 { 613 int i, mec; 614 struct scheduling_resources res; 615 616 res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; 617 res.vmid_mask <<= KFD_VMID_START_OFFSET; 618 619 res.queue_mask = 0; 620 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 621 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 622 / dqm->dev->shared_resources.num_pipe_per_mec; 623 624 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 625 continue; 626 627 /* only acquire queues from the first MEC */ 628 if (mec > 0) 629 continue; 630 631 /* This situation may be hit in the future if a new HW 632 * generation exposes more than 64 queues. If so, the 633 * definition of res.queue_mask needs updating 634 */ 635 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 636 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 637 break; 638 } 639 640 res.queue_mask |= (1ull << i); 641 } 642 res.gws_mask = res.oac_mask = res.gds_heap_base = 643 res.gds_heap_size = 0; 644 645 pr_debug("Scheduling resources:\n" 646 "vmid mask: 0x%8X\n" 647 "queue mask: 0x%8llX\n", 648 res.vmid_mask, res.queue_mask); 649 650 return pm_send_set_resources(&dqm->packets, &res); 651 } 652 653 static int initialize_cpsch(struct device_queue_manager *dqm) 654 { 655 int retval; 656 657 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 658 659 mutex_init(&dqm->lock); 660 INIT_LIST_HEAD(&dqm->queues); 661 dqm->queue_count = dqm->processes_count = 0; 662 dqm->sdma_queue_count = 0; 663 dqm->active_runlist = false; 664 retval = dqm->ops_asic_specific.initialize(dqm); 665 if (retval) 666 mutex_destroy(&dqm->lock); 667 668 return retval; 669 } 670 671 static int start_cpsch(struct device_queue_manager *dqm) 672 { 673 struct device_process_node *node; 674 int retval; 675 676 retval = 0; 677 678 retval = pm_init(&dqm->packets, dqm); 679 if (retval) 680 goto fail_packet_manager_init; 681 682 retval = set_sched_resources(dqm); 683 if (retval) 684 goto fail_set_sched_resources; 685 686 pr_debug("Allocating fence memory\n"); 687 688 /* allocate fence memory on the gart */ 689 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 690 &dqm->fence_mem); 691 692 if (retval) 693 goto fail_allocate_vidmem; 694 695 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 696 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 697 698 init_interrupts(dqm); 699 700 list_for_each_entry(node, &dqm->queues, list) 701 if (node->qpd->pqm->process && dqm->dev) 702 kfd_bind_process_to_device(dqm->dev, 703 node->qpd->pqm->process); 704 705 execute_queues_cpsch(dqm, true); 706 707 return 0; 708 fail_allocate_vidmem: 709 fail_set_sched_resources: 710 pm_uninit(&dqm->packets); 711 fail_packet_manager_init: 712 return retval; 713 } 714 715 static int stop_cpsch(struct device_queue_manager *dqm) 716 { 717 struct device_process_node *node; 718 struct kfd_process_device *pdd; 719 720 destroy_queues_cpsch(dqm, true, true); 721 722 list_for_each_entry(node, &dqm->queues, list) { 723 pdd = qpd_to_pdd(node->qpd); 724 pdd->bound = false; 725 } 726 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 727 pm_uninit(&dqm->packets); 728 729 return 0; 730 } 731 732 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 733 struct kernel_queue *kq, 734 struct qcm_process_device *qpd) 735 { 736 mutex_lock(&dqm->lock); 737 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 738 pr_warn("Can't create new kernel queue because %d queues were already created\n", 739 dqm->total_queue_count); 740 mutex_unlock(&dqm->lock); 741 return -EPERM; 742 } 743 744 /* 745 * Unconditionally increment this counter, regardless of the queue's 746 * type or whether the queue is active. 747 */ 748 dqm->total_queue_count++; 749 pr_debug("Total of %d queues are accountable so far\n", 750 dqm->total_queue_count); 751 752 list_add(&kq->list, &qpd->priv_queue_list); 753 dqm->queue_count++; 754 qpd->is_debug = true; 755 execute_queues_cpsch(dqm, false); 756 mutex_unlock(&dqm->lock); 757 758 return 0; 759 } 760 761 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 762 struct kernel_queue *kq, 763 struct qcm_process_device *qpd) 764 { 765 mutex_lock(&dqm->lock); 766 /* here we actually preempt the DIQ */ 767 destroy_queues_cpsch(dqm, true, false); 768 list_del(&kq->list); 769 dqm->queue_count--; 770 qpd->is_debug = false; 771 execute_queues_cpsch(dqm, false); 772 /* 773 * Unconditionally decrement this counter, regardless of the queue's 774 * type. 775 */ 776 dqm->total_queue_count--; 777 pr_debug("Total of %d queues are accountable so far\n", 778 dqm->total_queue_count); 779 mutex_unlock(&dqm->lock); 780 } 781 782 static void select_sdma_engine_id(struct queue *q) 783 { 784 static int sdma_id; 785 786 q->sdma_id = sdma_id; 787 sdma_id = (sdma_id + 1) % 2; 788 } 789 790 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 791 struct qcm_process_device *qpd, int *allocate_vmid) 792 { 793 int retval; 794 struct mqd_manager *mqd; 795 796 retval = 0; 797 798 if (allocate_vmid) 799 *allocate_vmid = 0; 800 801 mutex_lock(&dqm->lock); 802 803 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 804 pr_warn("Can't create new usermode queue because %d queues were already created\n", 805 dqm->total_queue_count); 806 retval = -EPERM; 807 goto out; 808 } 809 810 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 811 select_sdma_engine_id(q); 812 813 mqd = dqm->ops.get_mqd_manager(dqm, 814 get_mqd_type_from_queue_type(q->properties.type)); 815 816 if (!mqd) { 817 retval = -ENOMEM; 818 goto out; 819 } 820 821 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 822 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 823 &q->gart_mqd_addr, &q->properties); 824 if (retval) 825 goto out; 826 827 list_add(&q->list, &qpd->queues_list); 828 if (q->properties.is_active) { 829 dqm->queue_count++; 830 retval = execute_queues_cpsch(dqm, false); 831 } 832 833 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 834 dqm->sdma_queue_count++; 835 /* 836 * Unconditionally increment this counter, regardless of the queue's 837 * type or whether the queue is active. 838 */ 839 dqm->total_queue_count++; 840 841 pr_debug("Total of %d queues are accountable so far\n", 842 dqm->total_queue_count); 843 844 out: 845 mutex_unlock(&dqm->lock); 846 return retval; 847 } 848 849 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 850 unsigned int fence_value, 851 unsigned long timeout) 852 { 853 timeout += jiffies; 854 855 while (*fence_addr != fence_value) { 856 if (time_after(jiffies, timeout)) { 857 pr_err("qcm fence wait loop timeout expired\n"); 858 return -ETIME; 859 } 860 schedule(); 861 } 862 863 return 0; 864 } 865 866 static int destroy_sdma_queues(struct device_queue_manager *dqm, 867 unsigned int sdma_engine) 868 { 869 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 870 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, 871 sdma_engine); 872 } 873 874 static int destroy_queues_cpsch(struct device_queue_manager *dqm, 875 bool preempt_static_queues, bool lock) 876 { 877 int retval; 878 enum kfd_preempt_type_filter preempt_type; 879 struct kfd_process_device *pdd; 880 881 retval = 0; 882 883 if (lock) 884 mutex_lock(&dqm->lock); 885 if (!dqm->active_runlist) 886 goto out; 887 888 pr_debug("Before destroying queues, sdma queue count is : %u\n", 889 dqm->sdma_queue_count); 890 891 if (dqm->sdma_queue_count > 0) { 892 destroy_sdma_queues(dqm, 0); 893 destroy_sdma_queues(dqm, 1); 894 } 895 896 preempt_type = preempt_static_queues ? 897 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : 898 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; 899 900 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 901 preempt_type, 0, false, 0); 902 if (retval) 903 goto out; 904 905 *dqm->fence_addr = KFD_FENCE_INIT; 906 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 907 KFD_FENCE_COMPLETED); 908 /* should be timed out */ 909 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 910 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 911 if (retval) { 912 pdd = kfd_get_process_device_data(dqm->dev, 913 kfd_get_process(current)); 914 pdd->reset_wavefronts = true; 915 goto out; 916 } 917 pm_release_ib(&dqm->packets); 918 dqm->active_runlist = false; 919 920 out: 921 if (lock) 922 mutex_unlock(&dqm->lock); 923 return retval; 924 } 925 926 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) 927 { 928 int retval; 929 930 if (lock) 931 mutex_lock(&dqm->lock); 932 933 retval = destroy_queues_cpsch(dqm, false, false); 934 if (retval) { 935 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); 936 goto out; 937 } 938 939 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { 940 retval = 0; 941 goto out; 942 } 943 944 if (dqm->active_runlist) { 945 retval = 0; 946 goto out; 947 } 948 949 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 950 if (retval) { 951 pr_err("failed to execute runlist"); 952 goto out; 953 } 954 dqm->active_runlist = true; 955 956 out: 957 if (lock) 958 mutex_unlock(&dqm->lock); 959 return retval; 960 } 961 962 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 963 struct qcm_process_device *qpd, 964 struct queue *q) 965 { 966 int retval; 967 struct mqd_manager *mqd; 968 bool preempt_all_queues; 969 970 preempt_all_queues = false; 971 972 retval = 0; 973 974 /* remove queue from list to prevent rescheduling after preemption */ 975 mutex_lock(&dqm->lock); 976 977 if (qpd->is_debug) { 978 /* 979 * error, currently we do not allow to destroy a queue 980 * of a currently debugged process 981 */ 982 retval = -EBUSY; 983 goto failed_try_destroy_debugged_queue; 984 985 } 986 987 mqd = dqm->ops.get_mqd_manager(dqm, 988 get_mqd_type_from_queue_type(q->properties.type)); 989 if (!mqd) { 990 retval = -ENOMEM; 991 goto failed; 992 } 993 994 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 995 dqm->sdma_queue_count--; 996 997 list_del(&q->list); 998 if (q->properties.is_active) 999 dqm->queue_count--; 1000 1001 execute_queues_cpsch(dqm, false); 1002 1003 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1004 1005 /* 1006 * Unconditionally decrement this counter, regardless of the queue's 1007 * type 1008 */ 1009 dqm->total_queue_count--; 1010 pr_debug("Total of %d queues are accountable so far\n", 1011 dqm->total_queue_count); 1012 1013 mutex_unlock(&dqm->lock); 1014 1015 return 0; 1016 1017 failed: 1018 failed_try_destroy_debugged_queue: 1019 1020 mutex_unlock(&dqm->lock); 1021 return retval; 1022 } 1023 1024 /* 1025 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1026 * stay in user mode. 1027 */ 1028 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1029 /* APE1 limit is inclusive and 64K aligned. */ 1030 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1031 1032 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1033 struct qcm_process_device *qpd, 1034 enum cache_policy default_policy, 1035 enum cache_policy alternate_policy, 1036 void __user *alternate_aperture_base, 1037 uint64_t alternate_aperture_size) 1038 { 1039 bool retval; 1040 1041 mutex_lock(&dqm->lock); 1042 1043 if (alternate_aperture_size == 0) { 1044 /* base > limit disables APE1 */ 1045 qpd->sh_mem_ape1_base = 1; 1046 qpd->sh_mem_ape1_limit = 0; 1047 } else { 1048 /* 1049 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1050 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1051 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1052 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1053 * Verify that the base and size parameters can be 1054 * represented in this format and convert them. 1055 * Additionally restrict APE1 to user-mode addresses. 1056 */ 1057 1058 uint64_t base = (uintptr_t)alternate_aperture_base; 1059 uint64_t limit = base + alternate_aperture_size - 1; 1060 1061 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1062 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1063 retval = false; 1064 goto out; 1065 } 1066 1067 qpd->sh_mem_ape1_base = base >> 16; 1068 qpd->sh_mem_ape1_limit = limit >> 16; 1069 } 1070 1071 retval = dqm->ops_asic_specific.set_cache_memory_policy( 1072 dqm, 1073 qpd, 1074 default_policy, 1075 alternate_policy, 1076 alternate_aperture_base, 1077 alternate_aperture_size); 1078 1079 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1080 program_sh_mem_settings(dqm, qpd); 1081 1082 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1083 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1084 qpd->sh_mem_ape1_limit); 1085 1086 out: 1087 mutex_unlock(&dqm->lock); 1088 return retval; 1089 } 1090 1091 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1092 { 1093 struct device_queue_manager *dqm; 1094 1095 pr_debug("Loading device queue manager\n"); 1096 1097 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1098 if (!dqm) 1099 return NULL; 1100 1101 dqm->dev = dev; 1102 switch (sched_policy) { 1103 case KFD_SCHED_POLICY_HWS: 1104 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1105 /* initialize dqm for cp scheduling */ 1106 dqm->ops.create_queue = create_queue_cpsch; 1107 dqm->ops.initialize = initialize_cpsch; 1108 dqm->ops.start = start_cpsch; 1109 dqm->ops.stop = stop_cpsch; 1110 dqm->ops.destroy_queue = destroy_queue_cpsch; 1111 dqm->ops.update_queue = update_queue; 1112 dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; 1113 dqm->ops.register_process = register_process_nocpsch; 1114 dqm->ops.unregister_process = unregister_process_nocpsch; 1115 dqm->ops.uninitialize = uninitialize_nocpsch; 1116 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1117 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1118 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1119 break; 1120 case KFD_SCHED_POLICY_NO_HWS: 1121 /* initialize dqm for no cp scheduling */ 1122 dqm->ops.start = start_nocpsch; 1123 dqm->ops.stop = stop_nocpsch; 1124 dqm->ops.create_queue = create_queue_nocpsch; 1125 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1126 dqm->ops.update_queue = update_queue; 1127 dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; 1128 dqm->ops.register_process = register_process_nocpsch; 1129 dqm->ops.unregister_process = unregister_process_nocpsch; 1130 dqm->ops.initialize = initialize_nocpsch; 1131 dqm->ops.uninitialize = uninitialize_nocpsch; 1132 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1133 break; 1134 default: 1135 pr_err("Invalid scheduling policy %d\n", sched_policy); 1136 goto out_free; 1137 } 1138 1139 switch (dev->device_info->asic_family) { 1140 case CHIP_CARRIZO: 1141 device_queue_manager_init_vi(&dqm->ops_asic_specific); 1142 break; 1143 1144 case CHIP_KAVERI: 1145 device_queue_manager_init_cik(&dqm->ops_asic_specific); 1146 break; 1147 } 1148 1149 if (!dqm->ops.initialize(dqm)) 1150 return dqm; 1151 1152 out_free: 1153 kfree(dqm); 1154 return NULL; 1155 } 1156 1157 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1158 { 1159 dqm->ops.uninitialize(dqm); 1160 kfree(dqm); 1161 } 1162