1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/slab.h> 25 #include <linux/list.h> 26 #include <linux/types.h> 27 #include <linux/printk.h> 28 #include <linux/bitops.h> 29 #include "kfd_priv.h" 30 #include "kfd_device_queue_manager.h" 31 #include "kfd_mqd_manager.h" 32 #include "cik_regs.h" 33 #include "kfd_kernel_queue.h" 34 #include "../../radeon/cik_reg.h" 35 36 /* Size of the per-pipe EOP queue */ 37 #define CIK_HPD_EOP_BYTES_LOG2 11 38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 39 40 static bool is_mem_initialized; 41 42 static int init_memory(struct device_queue_manager *dqm); 43 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 44 unsigned int pasid, unsigned int vmid); 45 46 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 47 struct queue *q, 48 struct qcm_process_device *qpd); 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); 50 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); 51 52 53 static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) 54 { 55 BUG_ON(!dqm || !dqm->dev); 56 return dqm->dev->shared_resources.compute_pipe_count; 57 } 58 59 static inline unsigned int get_first_pipe(struct device_queue_manager *dqm) 60 { 61 BUG_ON(!dqm); 62 return dqm->dev->shared_resources.first_compute_pipe; 63 } 64 65 static inline unsigned int get_pipes_num_cpsch(void) 66 { 67 return PIPE_PER_ME_CP_SCHEDULING; 68 } 69 70 static inline unsigned int 71 get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) 72 { 73 uint32_t nybble; 74 75 nybble = (pdd->lds_base >> 60) & 0x0E; 76 77 return nybble; 78 79 } 80 81 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) 82 { 83 unsigned int shared_base; 84 85 shared_base = (pdd->lds_base >> 16) & 0xFF; 86 87 return shared_base; 88 } 89 90 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble); 91 static void init_process_memory(struct device_queue_manager *dqm, 92 struct qcm_process_device *qpd) 93 { 94 struct kfd_process_device *pdd; 95 unsigned int temp; 96 97 BUG_ON(!dqm || !qpd); 98 99 pdd = qpd_to_pdd(qpd); 100 101 /* check if sh_mem_config register already configured */ 102 if (qpd->sh_mem_config == 0) { 103 qpd->sh_mem_config = 104 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | 105 DEFAULT_MTYPE(MTYPE_NONCACHED) | 106 APE1_MTYPE(MTYPE_NONCACHED); 107 qpd->sh_mem_ape1_limit = 0; 108 qpd->sh_mem_ape1_base = 0; 109 } 110 111 if (qpd->pqm->process->is_32bit_user_mode) { 112 temp = get_sh_mem_bases_32(pdd); 113 qpd->sh_mem_bases = SHARED_BASE(temp); 114 qpd->sh_mem_config |= PTR32; 115 } else { 116 temp = get_sh_mem_bases_nybble_64(pdd); 117 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); 118 } 119 120 pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", 121 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); 122 } 123 124 static void program_sh_mem_settings(struct device_queue_manager *dqm, 125 struct qcm_process_device *qpd) 126 { 127 return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, 128 qpd->sh_mem_config, 129 qpd->sh_mem_ape1_base, 130 qpd->sh_mem_ape1_limit, 131 qpd->sh_mem_bases); 132 } 133 134 static int allocate_vmid(struct device_queue_manager *dqm, 135 struct qcm_process_device *qpd, 136 struct queue *q) 137 { 138 int bit, allocated_vmid; 139 140 if (dqm->vmid_bitmap == 0) 141 return -ENOMEM; 142 143 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); 144 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 145 146 /* Kaveri kfd vmid's starts from vmid 8 */ 147 allocated_vmid = bit + KFD_VMID_START_OFFSET; 148 pr_debug("kfd: vmid allocation %d\n", allocated_vmid); 149 qpd->vmid = allocated_vmid; 150 q->properties.vmid = allocated_vmid; 151 152 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 153 program_sh_mem_settings(dqm, qpd); 154 155 return 0; 156 } 157 158 static void deallocate_vmid(struct device_queue_manager *dqm, 159 struct qcm_process_device *qpd, 160 struct queue *q) 161 { 162 int bit = qpd->vmid - KFD_VMID_START_OFFSET; 163 164 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 165 qpd->vmid = 0; 166 q->properties.vmid = 0; 167 } 168 169 static int create_queue_nocpsch(struct device_queue_manager *dqm, 170 struct queue *q, 171 struct qcm_process_device *qpd, 172 int *allocated_vmid) 173 { 174 int retval; 175 176 BUG_ON(!dqm || !q || !qpd || !allocated_vmid); 177 178 pr_debug("kfd: In func %s\n", __func__); 179 print_queue(q); 180 181 mutex_lock(&dqm->lock); 182 183 if (list_empty(&qpd->queues_list)) { 184 retval = allocate_vmid(dqm, qpd, q); 185 if (retval != 0) { 186 mutex_unlock(&dqm->lock); 187 return retval; 188 } 189 } 190 *allocated_vmid = qpd->vmid; 191 q->properties.vmid = qpd->vmid; 192 193 retval = create_compute_queue_nocpsch(dqm, q, qpd); 194 195 if (retval != 0) { 196 if (list_empty(&qpd->queues_list)) { 197 deallocate_vmid(dqm, qpd, q); 198 *allocated_vmid = 0; 199 } 200 mutex_unlock(&dqm->lock); 201 return retval; 202 } 203 204 list_add(&q->list, &qpd->queues_list); 205 dqm->queue_count++; 206 207 mutex_unlock(&dqm->lock); 208 return 0; 209 } 210 211 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 212 { 213 bool set; 214 int pipe, bit; 215 216 set = false; 217 218 for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm); 219 pipe = (pipe + 1) % get_pipes_num(dqm)) { 220 if (dqm->allocated_queues[pipe] != 0) { 221 bit = find_first_bit( 222 (unsigned long *)&dqm->allocated_queues[pipe], 223 QUEUES_PER_PIPE); 224 225 clear_bit(bit, 226 (unsigned long *)&dqm->allocated_queues[pipe]); 227 q->pipe = pipe; 228 q->queue = bit; 229 set = true; 230 break; 231 } 232 } 233 234 if (set == false) 235 return -EBUSY; 236 237 pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", 238 __func__, q->pipe, q->queue); 239 /* horizontal hqd allocation */ 240 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm); 241 242 return 0; 243 } 244 245 static inline void deallocate_hqd(struct device_queue_manager *dqm, 246 struct queue *q) 247 { 248 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); 249 } 250 251 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 252 struct queue *q, 253 struct qcm_process_device *qpd) 254 { 255 int retval; 256 struct mqd_manager *mqd; 257 258 BUG_ON(!dqm || !q || !qpd); 259 260 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 261 if (mqd == NULL) 262 return -ENOMEM; 263 264 retval = allocate_hqd(dqm, q); 265 if (retval != 0) 266 return retval; 267 268 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 269 &q->gart_mqd_addr, &q->properties); 270 if (retval != 0) { 271 deallocate_hqd(dqm, q); 272 return retval; 273 } 274 275 return 0; 276 } 277 278 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 279 struct qcm_process_device *qpd, 280 struct queue *q) 281 { 282 int retval; 283 struct mqd_manager *mqd; 284 285 BUG_ON(!dqm || !q || !q->mqd || !qpd); 286 287 retval = 0; 288 289 pr_debug("kfd: In Func %s\n", __func__); 290 291 mutex_lock(&dqm->lock); 292 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 293 if (mqd == NULL) { 294 retval = -ENOMEM; 295 goto out; 296 } 297 298 retval = mqd->destroy_mqd(mqd, q->mqd, 299 KFD_PREEMPT_TYPE_WAVEFRONT, 300 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 301 q->pipe, q->queue); 302 303 if (retval != 0) 304 goto out; 305 306 deallocate_hqd(dqm, q); 307 308 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 309 310 list_del(&q->list); 311 if (list_empty(&qpd->queues_list)) 312 deallocate_vmid(dqm, qpd, q); 313 dqm->queue_count--; 314 out: 315 mutex_unlock(&dqm->lock); 316 return retval; 317 } 318 319 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 320 { 321 int retval; 322 struct mqd_manager *mqd; 323 324 BUG_ON(!dqm || !q || !q->mqd); 325 326 mutex_lock(&dqm->lock); 327 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 328 if (mqd == NULL) { 329 mutex_unlock(&dqm->lock); 330 return -ENOMEM; 331 } 332 333 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 334 if (q->properties.is_active == true) 335 dqm->queue_count++; 336 else 337 dqm->queue_count--; 338 339 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) 340 retval = execute_queues_cpsch(dqm, false); 341 342 mutex_unlock(&dqm->lock); 343 return retval; 344 } 345 346 static struct mqd_manager *get_mqd_manager_nocpsch( 347 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 348 { 349 struct mqd_manager *mqd; 350 351 BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); 352 353 pr_debug("kfd: In func %s mqd type %d\n", __func__, type); 354 355 mqd = dqm->mqds[type]; 356 if (!mqd) { 357 mqd = mqd_manager_init(type, dqm->dev); 358 if (mqd == NULL) 359 pr_err("kfd: mqd manager is NULL"); 360 dqm->mqds[type] = mqd; 361 } 362 363 return mqd; 364 } 365 366 static int register_process_nocpsch(struct device_queue_manager *dqm, 367 struct qcm_process_device *qpd) 368 { 369 struct device_process_node *n; 370 371 BUG_ON(!dqm || !qpd); 372 373 pr_debug("kfd: In func %s\n", __func__); 374 375 n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); 376 if (!n) 377 return -ENOMEM; 378 379 n->qpd = qpd; 380 381 mutex_lock(&dqm->lock); 382 list_add(&n->list, &dqm->queues); 383 384 init_process_memory(dqm, qpd); 385 dqm->processes_count++; 386 387 mutex_unlock(&dqm->lock); 388 389 return 0; 390 } 391 392 static int unregister_process_nocpsch(struct device_queue_manager *dqm, 393 struct qcm_process_device *qpd) 394 { 395 int retval; 396 struct device_process_node *cur, *next; 397 398 BUG_ON(!dqm || !qpd); 399 400 BUG_ON(!list_empty(&qpd->queues_list)); 401 402 pr_debug("kfd: In func %s\n", __func__); 403 404 retval = 0; 405 mutex_lock(&dqm->lock); 406 407 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 408 if (qpd == cur->qpd) { 409 list_del(&cur->list); 410 kfree(cur); 411 dqm->processes_count--; 412 goto out; 413 } 414 } 415 /* qpd not found in dqm list */ 416 retval = 1; 417 out: 418 mutex_unlock(&dqm->lock); 419 return retval; 420 } 421 422 static int 423 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 424 unsigned int vmid) 425 { 426 uint32_t pasid_mapping; 427 428 pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 429 ATC_VMID_PASID_MAPPING_VALID; 430 return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping, 431 vmid); 432 } 433 434 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) 435 { 436 /* In 64-bit mode, we can only control the top 3 bits of the LDS, 437 * scratch and GPUVM apertures. 438 * The hardware fills in the remaining 59 bits according to the 439 * following pattern: 440 * LDS: X0000000'00000000 - X0000001'00000000 (4GB) 441 * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) 442 * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) 443 * 444 * (where X/Y is the configurable nybble with the low-bit 0) 445 * 446 * LDS and scratch will have the same top nybble programmed in the 447 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. 448 * GPUVM can have a different top nybble programmed in the 449 * top 3 bits of SH_MEM_BASES.SHARED_BASE. 450 * We don't bother to support different top nybbles 451 * for LDS/Scratch and GPUVM. 452 */ 453 454 BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || 455 top_address_nybble == 0); 456 457 return PRIVATE_BASE(top_address_nybble << 12) | 458 SHARED_BASE(top_address_nybble << 12); 459 } 460 461 static int init_memory(struct device_queue_manager *dqm) 462 { 463 int i, retval; 464 465 for (i = 8; i < 16; i++) 466 set_pasid_vmid_mapping(dqm, 0, i); 467 468 retval = kfd2kgd->init_memory(dqm->dev->kgd); 469 if (retval == 0) 470 is_mem_initialized = true; 471 return retval; 472 } 473 474 475 static int init_pipelines(struct device_queue_manager *dqm, 476 unsigned int pipes_num, unsigned int first_pipe) 477 { 478 void *hpdptr; 479 struct mqd_manager *mqd; 480 unsigned int i, err, inx; 481 uint64_t pipe_hpd_addr; 482 483 BUG_ON(!dqm || !dqm->dev); 484 485 pr_debug("kfd: In func %s\n", __func__); 486 487 /* 488 * Allocate memory for the HPDs. This is hardware-owned per-pipe data. 489 * The driver never accesses this memory after zeroing it. 490 * It doesn't even have to be saved/restored on suspend/resume 491 * because it contains no data when there are no active queues. 492 */ 493 494 err = kfd2kgd->allocate_mem(dqm->dev->kgd, 495 CIK_HPD_EOP_BYTES * pipes_num, 496 PAGE_SIZE, 497 KFD_MEMPOOL_SYSTEM_WRITECOMBINE, 498 (struct kgd_mem **) &dqm->pipeline_mem); 499 500 if (err) { 501 pr_err("kfd: error allocate vidmem num pipes: %d\n", 502 pipes_num); 503 return -ENOMEM; 504 } 505 506 hpdptr = dqm->pipeline_mem->cpu_ptr; 507 dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr; 508 509 memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num); 510 511 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 512 if (mqd == NULL) { 513 kfd2kgd->free_mem(dqm->dev->kgd, 514 (struct kgd_mem *) dqm->pipeline_mem); 515 return -ENOMEM; 516 } 517 518 for (i = 0; i < pipes_num; i++) { 519 inx = i + first_pipe; 520 pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES; 521 pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr); 522 /* = log2(bytes/4)-1 */ 523 kfd2kgd->init_pipeline(dqm->dev->kgd, i, 524 CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr); 525 } 526 527 return 0; 528 } 529 530 531 static int init_scheduler(struct device_queue_manager *dqm) 532 { 533 int retval; 534 535 BUG_ON(!dqm); 536 537 pr_debug("kfd: In %s\n", __func__); 538 539 retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE); 540 if (retval != 0) 541 return retval; 542 543 retval = init_memory(dqm); 544 545 return retval; 546 } 547 548 static int initialize_nocpsch(struct device_queue_manager *dqm) 549 { 550 int i; 551 552 BUG_ON(!dqm); 553 554 pr_debug("kfd: In func %s num of pipes: %d\n", 555 __func__, get_pipes_num(dqm)); 556 557 mutex_init(&dqm->lock); 558 INIT_LIST_HEAD(&dqm->queues); 559 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 560 dqm->allocated_queues = kcalloc(get_pipes_num(dqm), 561 sizeof(unsigned int), GFP_KERNEL); 562 if (!dqm->allocated_queues) { 563 mutex_destroy(&dqm->lock); 564 return -ENOMEM; 565 } 566 567 for (i = 0; i < get_pipes_num(dqm); i++) 568 dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; 569 570 dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; 571 572 init_scheduler(dqm); 573 return 0; 574 } 575 576 static void uninitialize_nocpsch(struct device_queue_manager *dqm) 577 { 578 int i; 579 580 BUG_ON(!dqm); 581 582 BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 583 584 kfree(dqm->allocated_queues); 585 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 586 kfree(dqm->mqds[i]); 587 mutex_destroy(&dqm->lock); 588 kfd2kgd->free_mem(dqm->dev->kgd, 589 (struct kgd_mem *) dqm->pipeline_mem); 590 } 591 592 static int start_nocpsch(struct device_queue_manager *dqm) 593 { 594 return 0; 595 } 596 597 static int stop_nocpsch(struct device_queue_manager *dqm) 598 { 599 return 0; 600 } 601 602 /* 603 * Device Queue Manager implementation for cp scheduler 604 */ 605 606 static int set_sched_resources(struct device_queue_manager *dqm) 607 { 608 struct scheduling_resources res; 609 unsigned int queue_num, queue_mask; 610 611 BUG_ON(!dqm); 612 613 pr_debug("kfd: In func %s\n", __func__); 614 615 queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE; 616 queue_mask = (1 << queue_num) - 1; 617 res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; 618 res.vmid_mask <<= KFD_VMID_START_OFFSET; 619 res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE); 620 res.gws_mask = res.oac_mask = res.gds_heap_base = 621 res.gds_heap_size = 0; 622 623 pr_debug("kfd: scheduling resources:\n" 624 " vmid mask: 0x%8X\n" 625 " queue mask: 0x%8llX\n", 626 res.vmid_mask, res.queue_mask); 627 628 return pm_send_set_resources(&dqm->packets, &res); 629 } 630 631 static int initialize_cpsch(struct device_queue_manager *dqm) 632 { 633 int retval; 634 635 BUG_ON(!dqm); 636 637 pr_debug("kfd: In func %s num of pipes: %d\n", 638 __func__, get_pipes_num_cpsch()); 639 640 mutex_init(&dqm->lock); 641 INIT_LIST_HEAD(&dqm->queues); 642 dqm->queue_count = dqm->processes_count = 0; 643 dqm->active_runlist = false; 644 retval = init_pipelines(dqm, get_pipes_num(dqm), 0); 645 if (retval != 0) 646 goto fail_init_pipelines; 647 648 return 0; 649 650 fail_init_pipelines: 651 mutex_destroy(&dqm->lock); 652 return retval; 653 } 654 655 static int start_cpsch(struct device_queue_manager *dqm) 656 { 657 struct device_process_node *node; 658 int retval; 659 660 BUG_ON(!dqm); 661 662 retval = 0; 663 664 retval = pm_init(&dqm->packets, dqm); 665 if (retval != 0) 666 goto fail_packet_manager_init; 667 668 retval = set_sched_resources(dqm); 669 if (retval != 0) 670 goto fail_set_sched_resources; 671 672 pr_debug("kfd: allocating fence memory\n"); 673 674 /* allocate fence memory on the gart */ 675 retval = kfd2kgd->allocate_mem(dqm->dev->kgd, 676 sizeof(*dqm->fence_addr), 677 32, 678 KFD_MEMPOOL_SYSTEM_WRITECOMBINE, 679 (struct kgd_mem **) &dqm->fence_mem); 680 681 if (retval != 0) 682 goto fail_allocate_vidmem; 683 684 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 685 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 686 687 list_for_each_entry(node, &dqm->queues, list) 688 if (node->qpd->pqm->process && dqm->dev) 689 kfd_bind_process_to_device(dqm->dev, 690 node->qpd->pqm->process); 691 692 execute_queues_cpsch(dqm, true); 693 694 return 0; 695 fail_allocate_vidmem: 696 fail_set_sched_resources: 697 pm_uninit(&dqm->packets); 698 fail_packet_manager_init: 699 return retval; 700 } 701 702 static int stop_cpsch(struct device_queue_manager *dqm) 703 { 704 struct device_process_node *node; 705 struct kfd_process_device *pdd; 706 707 BUG_ON(!dqm); 708 709 destroy_queues_cpsch(dqm, true); 710 711 list_for_each_entry(node, &dqm->queues, list) { 712 pdd = qpd_to_pdd(node->qpd); 713 pdd->bound = false; 714 } 715 kfd2kgd->free_mem(dqm->dev->kgd, 716 (struct kgd_mem *) dqm->fence_mem); 717 pm_uninit(&dqm->packets); 718 719 return 0; 720 } 721 722 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 723 struct kernel_queue *kq, 724 struct qcm_process_device *qpd) 725 { 726 BUG_ON(!dqm || !kq || !qpd); 727 728 pr_debug("kfd: In func %s\n", __func__); 729 730 mutex_lock(&dqm->lock); 731 list_add(&kq->list, &qpd->priv_queue_list); 732 dqm->queue_count++; 733 qpd->is_debug = true; 734 execute_queues_cpsch(dqm, false); 735 mutex_unlock(&dqm->lock); 736 737 return 0; 738 } 739 740 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 741 struct kernel_queue *kq, 742 struct qcm_process_device *qpd) 743 { 744 BUG_ON(!dqm || !kq); 745 746 pr_debug("kfd: In %s\n", __func__); 747 748 mutex_lock(&dqm->lock); 749 destroy_queues_cpsch(dqm, false); 750 list_del(&kq->list); 751 dqm->queue_count--; 752 qpd->is_debug = false; 753 execute_queues_cpsch(dqm, false); 754 mutex_unlock(&dqm->lock); 755 } 756 757 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 758 struct qcm_process_device *qpd, int *allocate_vmid) 759 { 760 int retval; 761 struct mqd_manager *mqd; 762 763 BUG_ON(!dqm || !q || !qpd); 764 765 retval = 0; 766 767 if (allocate_vmid) 768 *allocate_vmid = 0; 769 770 mutex_lock(&dqm->lock); 771 772 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); 773 if (mqd == NULL) { 774 mutex_unlock(&dqm->lock); 775 return -ENOMEM; 776 } 777 778 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 779 &q->gart_mqd_addr, &q->properties); 780 if (retval != 0) 781 goto out; 782 783 list_add(&q->list, &qpd->queues_list); 784 if (q->properties.is_active) { 785 dqm->queue_count++; 786 retval = execute_queues_cpsch(dqm, false); 787 } 788 789 out: 790 mutex_unlock(&dqm->lock); 791 return retval; 792 } 793 794 static int fence_wait_timeout(unsigned int *fence_addr, 795 unsigned int fence_value, 796 unsigned long timeout) 797 { 798 BUG_ON(!fence_addr); 799 timeout += jiffies; 800 801 while (*fence_addr != fence_value) { 802 if (time_after(jiffies, timeout)) { 803 pr_err("kfd: qcm fence wait loop timeout expired\n"); 804 return -ETIME; 805 } 806 cpu_relax(); 807 } 808 809 return 0; 810 } 811 812 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) 813 { 814 int retval; 815 816 BUG_ON(!dqm); 817 818 retval = 0; 819 820 if (lock) 821 mutex_lock(&dqm->lock); 822 if (dqm->active_runlist == false) 823 goto out; 824 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 825 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); 826 if (retval != 0) 827 goto out; 828 829 *dqm->fence_addr = KFD_FENCE_INIT; 830 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 831 KFD_FENCE_COMPLETED); 832 /* should be timed out */ 833 fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 834 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 835 pm_release_ib(&dqm->packets); 836 dqm->active_runlist = false; 837 838 out: 839 if (lock) 840 mutex_unlock(&dqm->lock); 841 return retval; 842 } 843 844 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) 845 { 846 int retval; 847 848 BUG_ON(!dqm); 849 850 if (lock) 851 mutex_lock(&dqm->lock); 852 853 retval = destroy_queues_cpsch(dqm, false); 854 if (retval != 0) { 855 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); 856 goto out; 857 } 858 859 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { 860 retval = 0; 861 goto out; 862 } 863 864 if (dqm->active_runlist) { 865 retval = 0; 866 goto out; 867 } 868 869 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 870 if (retval != 0) { 871 pr_err("kfd: failed to execute runlist"); 872 goto out; 873 } 874 dqm->active_runlist = true; 875 876 out: 877 if (lock) 878 mutex_unlock(&dqm->lock); 879 return retval; 880 } 881 882 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 883 struct qcm_process_device *qpd, 884 struct queue *q) 885 { 886 int retval; 887 struct mqd_manager *mqd; 888 889 BUG_ON(!dqm || !qpd || !q); 890 891 retval = 0; 892 893 /* remove queue from list to prevent rescheduling after preemption */ 894 mutex_lock(&dqm->lock); 895 896 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); 897 if (!mqd) { 898 retval = -ENOMEM; 899 goto failed; 900 } 901 902 list_del(&q->list); 903 dqm->queue_count--; 904 905 execute_queues_cpsch(dqm, false); 906 907 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 908 909 mutex_unlock(&dqm->lock); 910 911 return 0; 912 913 failed: 914 mutex_unlock(&dqm->lock); 915 return retval; 916 } 917 918 /* 919 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 920 * stay in user mode. 921 */ 922 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 923 /* APE1 limit is inclusive and 64K aligned. */ 924 #define APE1_LIMIT_ALIGNMENT 0xFFFF 925 926 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 927 struct qcm_process_device *qpd, 928 enum cache_policy default_policy, 929 enum cache_policy alternate_policy, 930 void __user *alternate_aperture_base, 931 uint64_t alternate_aperture_size) 932 { 933 uint32_t default_mtype; 934 uint32_t ape1_mtype; 935 936 pr_debug("kfd: In func %s\n", __func__); 937 938 mutex_lock(&dqm->lock); 939 940 if (alternate_aperture_size == 0) { 941 /* base > limit disables APE1 */ 942 qpd->sh_mem_ape1_base = 1; 943 qpd->sh_mem_ape1_limit = 0; 944 } else { 945 /* 946 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 947 * SH_MEM_APE1_BASE[31:0], 0x0000 } 948 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 949 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 950 * Verify that the base and size parameters can be 951 * represented in this format and convert them. 952 * Additionally restrict APE1 to user-mode addresses. 953 */ 954 955 uint64_t base = (uintptr_t)alternate_aperture_base; 956 uint64_t limit = base + alternate_aperture_size - 1; 957 958 if (limit <= base) 959 goto out; 960 961 if ((base & APE1_FIXED_BITS_MASK) != 0) 962 goto out; 963 964 if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) 965 goto out; 966 967 qpd->sh_mem_ape1_base = base >> 16; 968 qpd->sh_mem_ape1_limit = limit >> 16; 969 } 970 971 default_mtype = (default_policy == cache_policy_coherent) ? 972 MTYPE_NONCACHED : 973 MTYPE_CACHED; 974 975 ape1_mtype = (alternate_policy == cache_policy_coherent) ? 976 MTYPE_NONCACHED : 977 MTYPE_CACHED; 978 979 qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) 980 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) 981 | DEFAULT_MTYPE(default_mtype) 982 | APE1_MTYPE(ape1_mtype); 983 984 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 985 program_sh_mem_settings(dqm, qpd); 986 987 pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 988 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 989 qpd->sh_mem_ape1_limit); 990 991 mutex_unlock(&dqm->lock); 992 return true; 993 994 out: 995 mutex_unlock(&dqm->lock); 996 return false; 997 } 998 999 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1000 { 1001 struct device_queue_manager *dqm; 1002 1003 BUG_ON(!dev); 1004 1005 dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); 1006 if (!dqm) 1007 return NULL; 1008 1009 dqm->dev = dev; 1010 switch (sched_policy) { 1011 case KFD_SCHED_POLICY_HWS: 1012 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1013 /* initialize dqm for cp scheduling */ 1014 dqm->create_queue = create_queue_cpsch; 1015 dqm->initialize = initialize_cpsch; 1016 dqm->start = start_cpsch; 1017 dqm->stop = stop_cpsch; 1018 dqm->destroy_queue = destroy_queue_cpsch; 1019 dqm->update_queue = update_queue; 1020 dqm->get_mqd_manager = get_mqd_manager_nocpsch; 1021 dqm->register_process = register_process_nocpsch; 1022 dqm->unregister_process = unregister_process_nocpsch; 1023 dqm->uninitialize = uninitialize_nocpsch; 1024 dqm->create_kernel_queue = create_kernel_queue_cpsch; 1025 dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch; 1026 dqm->set_cache_memory_policy = set_cache_memory_policy; 1027 break; 1028 case KFD_SCHED_POLICY_NO_HWS: 1029 /* initialize dqm for no cp scheduling */ 1030 dqm->start = start_nocpsch; 1031 dqm->stop = stop_nocpsch; 1032 dqm->create_queue = create_queue_nocpsch; 1033 dqm->destroy_queue = destroy_queue_nocpsch; 1034 dqm->update_queue = update_queue; 1035 dqm->get_mqd_manager = get_mqd_manager_nocpsch; 1036 dqm->register_process = register_process_nocpsch; 1037 dqm->unregister_process = unregister_process_nocpsch; 1038 dqm->initialize = initialize_nocpsch; 1039 dqm->uninitialize = uninitialize_nocpsch; 1040 dqm->set_cache_memory_policy = set_cache_memory_policy; 1041 break; 1042 default: 1043 BUG(); 1044 break; 1045 } 1046 1047 if (dqm->initialize(dqm) != 0) { 1048 kfree(dqm); 1049 return NULL; 1050 } 1051 1052 return dqm; 1053 } 1054 1055 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1056 { 1057 BUG_ON(!dqm); 1058 1059 dqm->uninitialize(dqm); 1060 kfree(dqm); 1061 } 1062 1063