1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/topology.h> 48 #include <linux/cpumask.h> 49 #include <linux/module.h> 50 51 #include "hfi.h" 52 #include "affinity.h" 53 #include "sdma.h" 54 #include "trace.h" 55 56 struct hfi1_affinity_node_list node_affinity = { 57 .list = LIST_HEAD_INIT(node_affinity.list), 58 .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), 59 }; 60 61 /* Name of IRQ types, indexed by enum irq_type */ 62 static const char * const irq_type_names[] = { 63 "SDMA", 64 "RCVCTXT", 65 "GENERAL", 66 "OTHER", 67 }; 68 69 /* Per NUMA node count of HFI devices */ 70 static unsigned int *hfi1_per_node_cntr; 71 72 static inline void init_cpu_mask_set(struct cpu_mask_set *set) 73 { 74 cpumask_clear(&set->mask); 75 cpumask_clear(&set->used); 76 set->gen = 0; 77 } 78 79 /* Initialize non-HT cpu cores mask */ 80 void init_real_cpu_mask(void) 81 { 82 int possible, curr_cpu, i, ht; 83 84 cpumask_clear(&node_affinity.real_cpu_mask); 85 86 /* Start with cpu online mask as the real cpu mask */ 87 cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); 88 89 /* 90 * Remove HT cores from the real cpu mask. Do this in two steps below. 91 */ 92 possible = cpumask_weight(&node_affinity.real_cpu_mask); 93 ht = cpumask_weight(topology_sibling_cpumask( 94 cpumask_first(&node_affinity.real_cpu_mask))); 95 /* 96 * Step 1. Skip over the first N HT siblings and use them as the 97 * "real" cores. Assumes that HT cores are not enumerated in 98 * succession (except in the single core case). 99 */ 100 curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); 101 for (i = 0; i < possible / ht; i++) 102 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 103 /* 104 * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 105 * skip any gaps. 106 */ 107 for (; i < possible; i++) { 108 cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); 109 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 110 } 111 } 112 113 int node_affinity_init(void) 114 { 115 int node; 116 struct pci_dev *dev = NULL; 117 const struct pci_device_id *ids = hfi1_pci_tbl; 118 119 cpumask_clear(&node_affinity.proc.used); 120 cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); 121 122 node_affinity.proc.gen = 0; 123 node_affinity.num_core_siblings = 124 cpumask_weight(topology_sibling_cpumask( 125 cpumask_first(&node_affinity.proc.mask) 126 )); 127 node_affinity.num_online_nodes = num_online_nodes(); 128 node_affinity.num_online_cpus = num_online_cpus(); 129 130 /* 131 * The real cpu mask is part of the affinity struct but it has to be 132 * initialized early. It is needed to calculate the number of user 133 * contexts in set_up_context_variables(). 134 */ 135 init_real_cpu_mask(); 136 137 hfi1_per_node_cntr = kcalloc(num_possible_nodes(), 138 sizeof(*hfi1_per_node_cntr), GFP_KERNEL); 139 if (!hfi1_per_node_cntr) 140 return -ENOMEM; 141 142 while (ids->vendor) { 143 dev = NULL; 144 while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { 145 node = pcibus_to_node(dev->bus); 146 if (node < 0) 147 node = numa_node_id(); 148 149 hfi1_per_node_cntr[node]++; 150 } 151 ids++; 152 } 153 154 return 0; 155 } 156 157 void node_affinity_destroy(void) 158 { 159 struct list_head *pos, *q; 160 struct hfi1_affinity_node *entry; 161 162 spin_lock(&node_affinity.lock); 163 list_for_each_safe(pos, q, &node_affinity.list) { 164 entry = list_entry(pos, struct hfi1_affinity_node, 165 list); 166 list_del(pos); 167 kfree(entry); 168 } 169 spin_unlock(&node_affinity.lock); 170 kfree(hfi1_per_node_cntr); 171 } 172 173 static struct hfi1_affinity_node *node_affinity_allocate(int node) 174 { 175 struct hfi1_affinity_node *entry; 176 177 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 178 if (!entry) 179 return NULL; 180 entry->node = node; 181 INIT_LIST_HEAD(&entry->list); 182 183 return entry; 184 } 185 186 /* 187 * It appends an entry to the list. 188 * It *must* be called with node_affinity.lock held. 189 */ 190 static void node_affinity_add_tail(struct hfi1_affinity_node *entry) 191 { 192 list_add_tail(&entry->list, &node_affinity.list); 193 } 194 195 /* It must be called with node_affinity.lock held */ 196 static struct hfi1_affinity_node *node_affinity_lookup(int node) 197 { 198 struct list_head *pos; 199 struct hfi1_affinity_node *entry; 200 201 list_for_each(pos, &node_affinity.list) { 202 entry = list_entry(pos, struct hfi1_affinity_node, list); 203 if (entry->node == node) 204 return entry; 205 } 206 207 return NULL; 208 } 209 210 /* 211 * Interrupt affinity. 212 * 213 * non-rcv avail gets a default mask that 214 * starts as possible cpus with threads reset 215 * and each rcv avail reset. 216 * 217 * rcv avail gets node relative 1 wrapping back 218 * to the node relative 1 as necessary. 219 * 220 */ 221 int hfi1_dev_affinity_init(struct hfi1_devdata *dd) 222 { 223 int node = pcibus_to_node(dd->pcidev->bus); 224 struct hfi1_affinity_node *entry; 225 const struct cpumask *local_mask; 226 int curr_cpu, possible, i; 227 228 if (node < 0) 229 node = numa_node_id(); 230 dd->node = node; 231 232 local_mask = cpumask_of_node(dd->node); 233 if (cpumask_first(local_mask) >= nr_cpu_ids) 234 local_mask = topology_core_cpumask(0); 235 236 spin_lock(&node_affinity.lock); 237 entry = node_affinity_lookup(dd->node); 238 spin_unlock(&node_affinity.lock); 239 240 /* 241 * If this is the first time this NUMA node's affinity is used, 242 * create an entry in the global affinity structure and initialize it. 243 */ 244 if (!entry) { 245 entry = node_affinity_allocate(node); 246 if (!entry) { 247 dd_dev_err(dd, 248 "Unable to allocate global affinity node\n"); 249 return -ENOMEM; 250 } 251 init_cpu_mask_set(&entry->def_intr); 252 init_cpu_mask_set(&entry->rcv_intr); 253 cpumask_clear(&entry->general_intr_mask); 254 /* Use the "real" cpu mask of this node as the default */ 255 cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, 256 local_mask); 257 258 /* fill in the receive list */ 259 possible = cpumask_weight(&entry->def_intr.mask); 260 curr_cpu = cpumask_first(&entry->def_intr.mask); 261 262 if (possible == 1) { 263 /* only one CPU, everyone will use it */ 264 cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); 265 cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 266 } else { 267 /* 268 * The general/control context will be the first CPU in 269 * the default list, so it is removed from the default 270 * list and added to the general interrupt list. 271 */ 272 cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); 273 cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 274 curr_cpu = cpumask_next(curr_cpu, 275 &entry->def_intr.mask); 276 277 /* 278 * Remove the remaining kernel receive queues from 279 * the default list and add them to the receive list. 280 */ 281 for (i = 0; 282 i < (dd->n_krcv_queues - 1) * 283 hfi1_per_node_cntr[dd->node]; 284 i++) { 285 cpumask_clear_cpu(curr_cpu, 286 &entry->def_intr.mask); 287 cpumask_set_cpu(curr_cpu, 288 &entry->rcv_intr.mask); 289 curr_cpu = cpumask_next(curr_cpu, 290 &entry->def_intr.mask); 291 if (curr_cpu >= nr_cpu_ids) 292 break; 293 } 294 295 /* 296 * If there ends up being 0 CPU cores leftover for SDMA 297 * engines, use the same CPU cores as general/control 298 * context. 299 */ 300 if (cpumask_weight(&entry->def_intr.mask) == 0) 301 cpumask_copy(&entry->def_intr.mask, 302 &entry->general_intr_mask); 303 } 304 305 spin_lock(&node_affinity.lock); 306 node_affinity_add_tail(entry); 307 spin_unlock(&node_affinity.lock); 308 } 309 310 return 0; 311 } 312 313 int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) 314 { 315 int ret; 316 cpumask_var_t diff; 317 struct hfi1_affinity_node *entry; 318 struct cpu_mask_set *set = NULL; 319 struct sdma_engine *sde = NULL; 320 struct hfi1_ctxtdata *rcd = NULL; 321 char extra[64]; 322 int cpu = -1; 323 324 extra[0] = '\0'; 325 cpumask_clear(&msix->mask); 326 327 ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 328 if (!ret) 329 return -ENOMEM; 330 331 spin_lock(&node_affinity.lock); 332 entry = node_affinity_lookup(dd->node); 333 spin_unlock(&node_affinity.lock); 334 335 switch (msix->type) { 336 case IRQ_SDMA: 337 sde = (struct sdma_engine *)msix->arg; 338 scnprintf(extra, 64, "engine %u", sde->this_idx); 339 set = &entry->def_intr; 340 break; 341 case IRQ_GENERAL: 342 cpu = cpumask_first(&entry->general_intr_mask); 343 break; 344 case IRQ_RCVCTXT: 345 rcd = (struct hfi1_ctxtdata *)msix->arg; 346 if (rcd->ctxt == HFI1_CTRL_CTXT) 347 cpu = cpumask_first(&entry->general_intr_mask); 348 else 349 set = &entry->rcv_intr; 350 scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 351 break; 352 default: 353 dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); 354 return -EINVAL; 355 } 356 357 /* 358 * The general and control contexts are placed on a particular 359 * CPU, which is set above. Skip accounting for it. Everything else 360 * finds its CPU here. 361 */ 362 if (cpu == -1 && set) { 363 spin_lock(&node_affinity.lock); 364 if (cpumask_equal(&set->mask, &set->used)) { 365 /* 366 * We've used up all the CPUs, bump up the generation 367 * and reset the 'used' map 368 */ 369 set->gen++; 370 cpumask_clear(&set->used); 371 } 372 cpumask_andnot(diff, &set->mask, &set->used); 373 cpu = cpumask_first(diff); 374 cpumask_set_cpu(cpu, &set->used); 375 spin_unlock(&node_affinity.lock); 376 } 377 378 switch (msix->type) { 379 case IRQ_SDMA: 380 sde->cpu = cpu; 381 break; 382 case IRQ_GENERAL: 383 case IRQ_RCVCTXT: 384 case IRQ_OTHER: 385 break; 386 } 387 388 cpumask_set_cpu(cpu, &msix->mask); 389 dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", 390 msix->msix.vector, irq_type_names[msix->type], 391 extra, cpu); 392 irq_set_affinity_hint(msix->msix.vector, &msix->mask); 393 394 free_cpumask_var(diff); 395 return 0; 396 } 397 398 void hfi1_put_irq_affinity(struct hfi1_devdata *dd, 399 struct hfi1_msix_entry *msix) 400 { 401 struct cpu_mask_set *set = NULL; 402 struct hfi1_ctxtdata *rcd; 403 struct hfi1_affinity_node *entry; 404 405 spin_lock(&node_affinity.lock); 406 entry = node_affinity_lookup(dd->node); 407 spin_unlock(&node_affinity.lock); 408 409 switch (msix->type) { 410 case IRQ_SDMA: 411 set = &entry->def_intr; 412 break; 413 case IRQ_GENERAL: 414 /* Don't do accounting for general contexts */ 415 break; 416 case IRQ_RCVCTXT: 417 rcd = (struct hfi1_ctxtdata *)msix->arg; 418 /* Don't do accounting for control contexts */ 419 if (rcd->ctxt != HFI1_CTRL_CTXT) 420 set = &entry->rcv_intr; 421 break; 422 default: 423 return; 424 } 425 426 if (set) { 427 spin_lock(&node_affinity.lock); 428 cpumask_andnot(&set->used, &set->used, &msix->mask); 429 if (cpumask_empty(&set->used) && set->gen) { 430 set->gen--; 431 cpumask_copy(&set->used, &set->mask); 432 } 433 spin_unlock(&node_affinity.lock); 434 } 435 436 irq_set_affinity_hint(msix->msix.vector, NULL); 437 cpumask_clear(&msix->mask); 438 } 439 440 /* This should be called with node_affinity.lock held */ 441 static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, 442 struct hfi1_affinity_node_list *affinity) 443 { 444 int possible, curr_cpu, i; 445 uint num_cores_per_socket = node_affinity.num_online_cpus / 446 affinity->num_core_siblings / 447 node_affinity.num_online_nodes; 448 449 cpumask_copy(hw_thread_mask, &affinity->proc.mask); 450 if (affinity->num_core_siblings > 0) { 451 /* Removing other siblings not needed for now */ 452 possible = cpumask_weight(hw_thread_mask); 453 curr_cpu = cpumask_first(hw_thread_mask); 454 for (i = 0; 455 i < num_cores_per_socket * node_affinity.num_online_nodes; 456 i++) 457 curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 458 459 for (; i < possible; i++) { 460 cpumask_clear_cpu(curr_cpu, hw_thread_mask); 461 curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 462 } 463 464 /* Identifying correct HW threads within physical cores */ 465 cpumask_shift_left(hw_thread_mask, hw_thread_mask, 466 num_cores_per_socket * 467 node_affinity.num_online_nodes * 468 hw_thread_no); 469 } 470 } 471 472 int hfi1_get_proc_affinity(int node) 473 { 474 int cpu = -1, ret, i; 475 struct hfi1_affinity_node *entry; 476 cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; 477 const struct cpumask *node_mask, 478 *proc_mask = tsk_cpus_allowed(current); 479 struct hfi1_affinity_node_list *affinity = &node_affinity; 480 struct cpu_mask_set *set = &affinity->proc; 481 482 /* 483 * check whether process/context affinity has already 484 * been set 485 */ 486 if (cpumask_weight(proc_mask) == 1) { 487 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", 488 current->pid, current->comm, 489 cpumask_pr_args(proc_mask)); 490 /* 491 * Mark the pre-set CPU as used. This is atomic so we don't 492 * need the lock 493 */ 494 cpu = cpumask_first(proc_mask); 495 cpumask_set_cpu(cpu, &set->used); 496 goto done; 497 } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { 498 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", 499 current->pid, current->comm, 500 cpumask_pr_args(proc_mask)); 501 goto done; 502 } 503 504 /* 505 * The process does not have a preset CPU affinity so find one to 506 * recommend using the following algorithm: 507 * 508 * For each user process that is opening a context on HFI Y: 509 * a) If all cores are filled, reinitialize the bitmask 510 * b) Fill real cores first, then HT cores (First set of HT 511 * cores on all physical cores, then second set of HT core, 512 * and, so on) in the following order: 513 * 514 * 1. Same NUMA node as HFI Y and not running an IRQ 515 * handler 516 * 2. Same NUMA node as HFI Y and running an IRQ handler 517 * 3. Different NUMA node to HFI Y and not running an IRQ 518 * handler 519 * 4. Different NUMA node to HFI Y and running an IRQ 520 * handler 521 * c) Mark core as filled in the bitmask. As user processes are 522 * done, clear cores from the bitmask. 523 */ 524 525 ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 526 if (!ret) 527 goto done; 528 ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); 529 if (!ret) 530 goto free_diff; 531 ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); 532 if (!ret) 533 goto free_hw_thread_mask; 534 ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); 535 if (!ret) 536 goto free_available_mask; 537 538 spin_lock(&affinity->lock); 539 /* 540 * If we've used all available HW threads, clear the mask and start 541 * overloading. 542 */ 543 if (cpumask_equal(&set->mask, &set->used)) { 544 set->gen++; 545 cpumask_clear(&set->used); 546 } 547 548 /* 549 * If NUMA node has CPUs used by interrupt handlers, include them in the 550 * interrupt handler mask. 551 */ 552 entry = node_affinity_lookup(node); 553 if (entry) { 554 cpumask_copy(intrs_mask, (entry->def_intr.gen ? 555 &entry->def_intr.mask : 556 &entry->def_intr.used)); 557 cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? 558 &entry->rcv_intr.mask : 559 &entry->rcv_intr.used)); 560 cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); 561 } 562 hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", 563 cpumask_pr_args(intrs_mask)); 564 565 cpumask_copy(hw_thread_mask, &set->mask); 566 567 /* 568 * If HT cores are enabled, identify which HW threads within the 569 * physical cores should be used. 570 */ 571 if (affinity->num_core_siblings > 0) { 572 for (i = 0; i < affinity->num_core_siblings; i++) { 573 find_hw_thread_mask(i, hw_thread_mask, affinity); 574 575 /* 576 * If there's at least one available core for this HW 577 * thread number, stop looking for a core. 578 * 579 * diff will always be not empty at least once in this 580 * loop as the used mask gets reset when 581 * (set->mask == set->used) before this loop. 582 */ 583 cpumask_andnot(diff, hw_thread_mask, &set->used); 584 if (!cpumask_empty(diff)) 585 break; 586 } 587 } 588 hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", 589 cpumask_pr_args(hw_thread_mask)); 590 591 node_mask = cpumask_of_node(node); 592 hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, 593 cpumask_pr_args(node_mask)); 594 595 /* Get cpumask of available CPUs on preferred NUMA */ 596 cpumask_and(available_mask, hw_thread_mask, node_mask); 597 cpumask_andnot(available_mask, available_mask, &set->used); 598 hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, 599 cpumask_pr_args(available_mask)); 600 601 /* 602 * At first, we don't want to place processes on the same 603 * CPUs as interrupt handlers. Then, CPUs running interrupt 604 * handlers are used. 605 * 606 * 1) If diff is not empty, then there are CPUs not running 607 * non-interrupt handlers available, so diff gets copied 608 * over to available_mask. 609 * 2) If diff is empty, then all CPUs not running interrupt 610 * handlers are taken, so available_mask contains all 611 * available CPUs running interrupt handlers. 612 * 3) If available_mask is empty, then all CPUs on the 613 * preferred NUMA node are taken, so other NUMA nodes are 614 * used for process assignments using the same method as 615 * the preferred NUMA node. 616 */ 617 cpumask_andnot(diff, available_mask, intrs_mask); 618 if (!cpumask_empty(diff)) 619 cpumask_copy(available_mask, diff); 620 621 /* If we don't have CPUs on the preferred node, use other NUMA nodes */ 622 if (cpumask_empty(available_mask)) { 623 cpumask_andnot(available_mask, hw_thread_mask, &set->used); 624 /* Excluding preferred NUMA cores */ 625 cpumask_andnot(available_mask, available_mask, node_mask); 626 hfi1_cdbg(PROC, 627 "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", 628 cpumask_pr_args(available_mask)); 629 630 /* 631 * At first, we don't want to place processes on the same 632 * CPUs as interrupt handlers. 633 */ 634 cpumask_andnot(diff, available_mask, intrs_mask); 635 if (!cpumask_empty(diff)) 636 cpumask_copy(available_mask, diff); 637 } 638 hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", 639 cpumask_pr_args(available_mask)); 640 641 cpu = cpumask_first(available_mask); 642 if (cpu >= nr_cpu_ids) /* empty */ 643 cpu = -1; 644 else 645 cpumask_set_cpu(cpu, &set->used); 646 spin_unlock(&affinity->lock); 647 hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); 648 649 free_cpumask_var(intrs_mask); 650 free_available_mask: 651 free_cpumask_var(available_mask); 652 free_hw_thread_mask: 653 free_cpumask_var(hw_thread_mask); 654 free_diff: 655 free_cpumask_var(diff); 656 done: 657 return cpu; 658 } 659 660 void hfi1_put_proc_affinity(int cpu) 661 { 662 struct hfi1_affinity_node_list *affinity = &node_affinity; 663 struct cpu_mask_set *set = &affinity->proc; 664 665 if (cpu < 0) 666 return; 667 spin_lock(&affinity->lock); 668 cpumask_clear_cpu(cpu, &set->used); 669 hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); 670 if (cpumask_empty(&set->used) && set->gen) { 671 set->gen--; 672 cpumask_copy(&set->used, &set->mask); 673 } 674 spin_unlock(&affinity->lock); 675 } 676 677 /* Prevents concurrent reads and writes of the sdma_affinity attrib */ 678 static DEFINE_MUTEX(sdma_affinity_mutex); 679 680 int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, 681 size_t count) 682 { 683 struct hfi1_affinity_node *entry; 684 cpumask_var_t mask; 685 int ret, i; 686 687 spin_lock(&node_affinity.lock); 688 entry = node_affinity_lookup(dd->node); 689 spin_unlock(&node_affinity.lock); 690 691 if (!entry) 692 return -EINVAL; 693 694 ret = zalloc_cpumask_var(&mask, GFP_KERNEL); 695 if (!ret) 696 return -ENOMEM; 697 698 ret = cpulist_parse(buf, mask); 699 if (ret) 700 goto out; 701 702 if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { 703 dd_dev_warn(dd, "Invalid CPU mask\n"); 704 ret = -EINVAL; 705 goto out; 706 } 707 708 mutex_lock(&sdma_affinity_mutex); 709 /* reset the SDMA interrupt affinity details */ 710 init_cpu_mask_set(&entry->def_intr); 711 cpumask_copy(&entry->def_intr.mask, mask); 712 /* 713 * Reassign the affinity for each SDMA interrupt. 714 */ 715 for (i = 0; i < dd->num_msix_entries; i++) { 716 struct hfi1_msix_entry *msix; 717 718 msix = &dd->msix_entries[i]; 719 if (msix->type != IRQ_SDMA) 720 continue; 721 722 ret = hfi1_get_irq_affinity(dd, msix); 723 724 if (ret) 725 break; 726 } 727 mutex_unlock(&sdma_affinity_mutex); 728 out: 729 free_cpumask_var(mask); 730 return ret ? ret : strnlen(buf, PAGE_SIZE); 731 } 732 733 int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) 734 { 735 struct hfi1_affinity_node *entry; 736 737 spin_lock(&node_affinity.lock); 738 entry = node_affinity_lookup(dd->node); 739 spin_unlock(&node_affinity.lock); 740 741 if (!entry) 742 return -EINVAL; 743 744 mutex_lock(&sdma_affinity_mutex); 745 cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); 746 mutex_unlock(&sdma_affinity_mutex); 747 return strnlen(buf, PAGE_SIZE); 748 } 749