1 /* 2 * Copyright(c) 2015-2018 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/delay.h> 49 #include "hfi.h" 50 #include "qp.h" 51 #include "trace.h" 52 53 #define SC(name) SEND_CTXT_##name 54 /* 55 * Send Context functions 56 */ 57 static void sc_wait_for_packet_egress(struct send_context *sc, int pause); 58 59 /* 60 * Set the CM reset bit and wait for it to clear. Use the provided 61 * sendctrl register. This routine has no locking. 62 */ 63 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl) 64 { 65 write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK); 66 while (1) { 67 udelay(1); 68 sendctrl = read_csr(dd, SEND_CTRL); 69 if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0) 70 break; 71 } 72 } 73 74 /* defined in header release 48 and higher */ 75 #ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT 76 #define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3 77 #define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull 78 #define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \ 79 << SEND_CTRL_UNSUPPORTED_VL_SHIFT) 80 #endif 81 82 /* global control of PIO send */ 83 void pio_send_control(struct hfi1_devdata *dd, int op) 84 { 85 u64 reg, mask; 86 unsigned long flags; 87 int write = 1; /* write sendctrl back */ 88 int flush = 0; /* re-read sendctrl to make sure it is flushed */ 89 int i; 90 91 spin_lock_irqsave(&dd->sendctrl_lock, flags); 92 93 reg = read_csr(dd, SEND_CTRL); 94 switch (op) { 95 case PSC_GLOBAL_ENABLE: 96 reg |= SEND_CTRL_SEND_ENABLE_SMASK; 97 /* Fall through */ 98 case PSC_DATA_VL_ENABLE: 99 mask = 0; 100 for (i = 0; i < ARRAY_SIZE(dd->vld); i++) 101 if (!dd->vld[i].mtu) 102 mask |= BIT_ULL(i); 103 /* Disallow sending on VLs not enabled */ 104 mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) << 105 SEND_CTRL_UNSUPPORTED_VL_SHIFT; 106 reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask; 107 break; 108 case PSC_GLOBAL_DISABLE: 109 reg &= ~SEND_CTRL_SEND_ENABLE_SMASK; 110 break; 111 case PSC_GLOBAL_VLARB_ENABLE: 112 reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 113 break; 114 case PSC_GLOBAL_VLARB_DISABLE: 115 reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 116 break; 117 case PSC_CM_RESET: 118 __cm_reset(dd, reg); 119 write = 0; /* CSR already written (and flushed) */ 120 break; 121 case PSC_DATA_VL_DISABLE: 122 reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK; 123 flush = 1; 124 break; 125 default: 126 dd_dev_err(dd, "%s: invalid control %d\n", __func__, op); 127 break; 128 } 129 130 if (write) { 131 write_csr(dd, SEND_CTRL, reg); 132 if (flush) 133 (void)read_csr(dd, SEND_CTRL); /* flush write */ 134 } 135 136 spin_unlock_irqrestore(&dd->sendctrl_lock, flags); 137 } 138 139 /* number of send context memory pools */ 140 #define NUM_SC_POOLS 2 141 142 /* Send Context Size (SCS) wildcards */ 143 #define SCS_POOL_0 -1 144 #define SCS_POOL_1 -2 145 146 /* Send Context Count (SCC) wildcards */ 147 #define SCC_PER_VL -1 148 #define SCC_PER_CPU -2 149 #define SCC_PER_KRCVQ -3 150 151 /* Send Context Size (SCS) constants */ 152 #define SCS_ACK_CREDITS 32 153 #define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */ 154 155 #define PIO_THRESHOLD_CEILING 4096 156 157 #define PIO_WAIT_BATCH_SIZE 5 158 159 /* default send context sizes */ 160 static struct sc_config_sizes sc_config_sizes[SC_MAX] = { 161 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 162 .count = SCC_PER_VL }, /* one per NUMA */ 163 [SC_ACK] = { .size = SCS_ACK_CREDITS, 164 .count = SCC_PER_KRCVQ }, 165 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 166 .count = SCC_PER_CPU }, /* one per CPU */ 167 [SC_VL15] = { .size = SCS_VL15_CREDITS, 168 .count = 1 }, 169 170 }; 171 172 /* send context memory pool configuration */ 173 struct mem_pool_config { 174 int centipercent; /* % of memory, in 100ths of 1% */ 175 int absolute_blocks; /* absolute block count */ 176 }; 177 178 /* default memory pool configuration: 100% in pool 0 */ 179 static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = { 180 /* centi%, abs blocks */ 181 { 10000, -1 }, /* pool 0 */ 182 { 0, -1 }, /* pool 1 */ 183 }; 184 185 /* memory pool information, used when calculating final sizes */ 186 struct mem_pool_info { 187 int centipercent; /* 188 * 100th of 1% of memory to use, -1 if blocks 189 * already set 190 */ 191 int count; /* count of contexts in the pool */ 192 int blocks; /* block size of the pool */ 193 int size; /* context size, in blocks */ 194 }; 195 196 /* 197 * Convert a pool wildcard to a valid pool index. The wildcards 198 * start at -1 and increase negatively. Map them as: 199 * -1 => 0 200 * -2 => 1 201 * etc. 202 * 203 * Return -1 on non-wildcard input, otherwise convert to a pool number. 204 */ 205 static int wildcard_to_pool(int wc) 206 { 207 if (wc >= 0) 208 return -1; /* non-wildcard */ 209 return -wc - 1; 210 } 211 212 static const char *sc_type_names[SC_MAX] = { 213 "kernel", 214 "ack", 215 "user", 216 "vl15" 217 }; 218 219 static const char *sc_type_name(int index) 220 { 221 if (index < 0 || index >= SC_MAX) 222 return "unknown"; 223 return sc_type_names[index]; 224 } 225 226 /* 227 * Read the send context memory pool configuration and send context 228 * size configuration. Replace any wildcards and come up with final 229 * counts and sizes for the send context types. 230 */ 231 int init_sc_pools_and_sizes(struct hfi1_devdata *dd) 232 { 233 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; 234 int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; 235 int total_contexts = 0; 236 int fixed_blocks; 237 int pool_blocks; 238 int used_blocks; 239 int cp_total; /* centipercent total */ 240 int ab_total; /* absolute block total */ 241 int extra; 242 int i; 243 244 /* 245 * When SDMA is enabled, kernel context pio packet size is capped by 246 * "piothreshold". Reduce pio buffer allocation for kernel context by 247 * setting it to a fixed size. The allocation allows 3-deep buffering 248 * of the largest pio packets plus up to 128 bytes header, sufficient 249 * to maintain verbs performance. 250 * 251 * When SDMA is disabled, keep the default pooling allocation. 252 */ 253 if (HFI1_CAP_IS_KSET(SDMA)) { 254 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ? 255 piothreshold : PIO_THRESHOLD_CEILING; 256 sc_config_sizes[SC_KERNEL].size = 257 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE; 258 } 259 260 /* 261 * Step 0: 262 * - copy the centipercents/absolute sizes from the pool config 263 * - sanity check these values 264 * - add up centipercents, then later check for full value 265 * - add up absolute blocks, then later check for over-commit 266 */ 267 cp_total = 0; 268 ab_total = 0; 269 for (i = 0; i < NUM_SC_POOLS; i++) { 270 int cp = sc_mem_pool_config[i].centipercent; 271 int ab = sc_mem_pool_config[i].absolute_blocks; 272 273 /* 274 * A negative value is "unused" or "invalid". Both *can* 275 * be valid, but centipercent wins, so check that first 276 */ 277 if (cp >= 0) { /* centipercent valid */ 278 cp_total += cp; 279 } else if (ab >= 0) { /* absolute blocks valid */ 280 ab_total += ab; 281 } else { /* neither valid */ 282 dd_dev_err( 283 dd, 284 "Send context memory pool %d: both the block count and centipercent are invalid\n", 285 i); 286 return -EINVAL; 287 } 288 289 mem_pool_info[i].centipercent = cp; 290 mem_pool_info[i].blocks = ab; 291 } 292 293 /* do not use both % and absolute blocks for different pools */ 294 if (cp_total != 0 && ab_total != 0) { 295 dd_dev_err( 296 dd, 297 "All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n"); 298 return -EINVAL; 299 } 300 301 /* if any percentages are present, they must add up to 100% x 100 */ 302 if (cp_total != 0 && cp_total != 10000) { 303 dd_dev_err( 304 dd, 305 "Send context memory pool centipercent is %d, expecting 10000\n", 306 cp_total); 307 return -EINVAL; 308 } 309 310 /* the absolute pool total cannot be more than the mem total */ 311 if (ab_total > total_blocks) { 312 dd_dev_err( 313 dd, 314 "Send context memory pool absolute block count %d is larger than the memory size %d\n", 315 ab_total, total_blocks); 316 return -EINVAL; 317 } 318 319 /* 320 * Step 2: 321 * - copy from the context size config 322 * - replace context type wildcard counts with real values 323 * - add up non-memory pool block sizes 324 * - add up memory pool user counts 325 */ 326 fixed_blocks = 0; 327 for (i = 0; i < SC_MAX; i++) { 328 int count = sc_config_sizes[i].count; 329 int size = sc_config_sizes[i].size; 330 int pool; 331 332 /* 333 * Sanity check count: Either a positive value or 334 * one of the expected wildcards is valid. The positive 335 * value is checked later when we compare against total 336 * memory available. 337 */ 338 if (i == SC_ACK) { 339 count = dd->n_krcv_queues; 340 } else if (i == SC_KERNEL) { 341 count = INIT_SC_PER_VL * num_vls; 342 } else if (count == SCC_PER_CPU) { 343 count = dd->num_rcv_contexts - dd->n_krcv_queues; 344 } else if (count < 0) { 345 dd_dev_err( 346 dd, 347 "%s send context invalid count wildcard %d\n", 348 sc_type_name(i), count); 349 return -EINVAL; 350 } 351 if (total_contexts + count > chip_send_contexts(dd)) 352 count = chip_send_contexts(dd) - total_contexts; 353 354 total_contexts += count; 355 356 /* 357 * Sanity check pool: The conversion will return a pool 358 * number or -1 if a fixed (non-negative) value. The fixed 359 * value is checked later when we compare against 360 * total memory available. 361 */ 362 pool = wildcard_to_pool(size); 363 if (pool == -1) { /* non-wildcard */ 364 fixed_blocks += size * count; 365 } else if (pool < NUM_SC_POOLS) { /* valid wildcard */ 366 mem_pool_info[pool].count += count; 367 } else { /* invalid wildcard */ 368 dd_dev_err( 369 dd, 370 "%s send context invalid pool wildcard %d\n", 371 sc_type_name(i), size); 372 return -EINVAL; 373 } 374 375 dd->sc_sizes[i].count = count; 376 dd->sc_sizes[i].size = size; 377 } 378 if (fixed_blocks > total_blocks) { 379 dd_dev_err( 380 dd, 381 "Send context fixed block count, %u, larger than total block count %u\n", 382 fixed_blocks, total_blocks); 383 return -EINVAL; 384 } 385 386 /* step 3: calculate the blocks in the pools, and pool context sizes */ 387 pool_blocks = total_blocks - fixed_blocks; 388 if (ab_total > pool_blocks) { 389 dd_dev_err( 390 dd, 391 "Send context fixed pool sizes, %u, larger than pool block count %u\n", 392 ab_total, pool_blocks); 393 return -EINVAL; 394 } 395 /* subtract off the fixed pool blocks */ 396 pool_blocks -= ab_total; 397 398 for (i = 0; i < NUM_SC_POOLS; i++) { 399 struct mem_pool_info *pi = &mem_pool_info[i]; 400 401 /* % beats absolute blocks */ 402 if (pi->centipercent >= 0) 403 pi->blocks = (pool_blocks * pi->centipercent) / 10000; 404 405 if (pi->blocks == 0 && pi->count != 0) { 406 dd_dev_err( 407 dd, 408 "Send context memory pool %d has %u contexts, but no blocks\n", 409 i, pi->count); 410 return -EINVAL; 411 } 412 if (pi->count == 0) { 413 /* warn about wasted blocks */ 414 if (pi->blocks != 0) 415 dd_dev_err( 416 dd, 417 "Send context memory pool %d has %u blocks, but zero contexts\n", 418 i, pi->blocks); 419 pi->size = 0; 420 } else { 421 pi->size = pi->blocks / pi->count; 422 } 423 } 424 425 /* step 4: fill in the context type sizes from the pool sizes */ 426 used_blocks = 0; 427 for (i = 0; i < SC_MAX; i++) { 428 if (dd->sc_sizes[i].size < 0) { 429 unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size); 430 431 WARN_ON_ONCE(pool >= NUM_SC_POOLS); 432 dd->sc_sizes[i].size = mem_pool_info[pool].size; 433 } 434 /* make sure we are not larger than what is allowed by the HW */ 435 #define PIO_MAX_BLOCKS 1024 436 if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS) 437 dd->sc_sizes[i].size = PIO_MAX_BLOCKS; 438 439 /* calculate our total usage */ 440 used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count; 441 } 442 extra = total_blocks - used_blocks; 443 if (extra != 0) 444 dd_dev_info(dd, "unused send context blocks: %d\n", extra); 445 446 return total_contexts; 447 } 448 449 int init_send_contexts(struct hfi1_devdata *dd) 450 { 451 u16 base; 452 int ret, i, j, context; 453 454 ret = init_credit_return(dd); 455 if (ret) 456 return ret; 457 458 dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8), 459 GFP_KERNEL); 460 dd->send_contexts = kcalloc(dd->num_send_contexts, 461 sizeof(struct send_context_info), 462 GFP_KERNEL); 463 if (!dd->send_contexts || !dd->hw_to_sw) { 464 kfree(dd->hw_to_sw); 465 kfree(dd->send_contexts); 466 free_credit_return(dd); 467 return -ENOMEM; 468 } 469 470 /* hardware context map starts with invalid send context indices */ 471 for (i = 0; i < TXE_NUM_CONTEXTS; i++) 472 dd->hw_to_sw[i] = INVALID_SCI; 473 474 /* 475 * All send contexts have their credit sizes. Allocate credits 476 * for each context one after another from the global space. 477 */ 478 context = 0; 479 base = 1; 480 for (i = 0; i < SC_MAX; i++) { 481 struct sc_config_sizes *scs = &dd->sc_sizes[i]; 482 483 for (j = 0; j < scs->count; j++) { 484 struct send_context_info *sci = 485 &dd->send_contexts[context]; 486 sci->type = i; 487 sci->base = base; 488 sci->credits = scs->size; 489 490 context++; 491 base += scs->size; 492 } 493 } 494 495 return 0; 496 } 497 498 /* 499 * Allocate a software index and hardware context of the given type. 500 * 501 * Must be called with dd->sc_lock held. 502 */ 503 static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, 504 u32 *hw_context) 505 { 506 struct send_context_info *sci; 507 u32 index; 508 u32 context; 509 510 for (index = 0, sci = &dd->send_contexts[0]; 511 index < dd->num_send_contexts; index++, sci++) { 512 if (sci->type == type && sci->allocated == 0) { 513 sci->allocated = 1; 514 /* use a 1:1 mapping, but make them non-equal */ 515 context = chip_send_contexts(dd) - index - 1; 516 dd->hw_to_sw[context] = index; 517 *sw_index = index; 518 *hw_context = context; 519 return 0; /* success */ 520 } 521 } 522 dd_dev_err(dd, "Unable to locate a free type %d send context\n", type); 523 return -ENOSPC; 524 } 525 526 /* 527 * Free the send context given by its software index. 528 * 529 * Must be called with dd->sc_lock held. 530 */ 531 static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context) 532 { 533 struct send_context_info *sci; 534 535 sci = &dd->send_contexts[sw_index]; 536 if (!sci->allocated) { 537 dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n", 538 __func__, sw_index, hw_context); 539 } 540 sci->allocated = 0; 541 dd->hw_to_sw[hw_context] = INVALID_SCI; 542 } 543 544 /* return the base context of a context in a group */ 545 static inline u32 group_context(u32 context, u32 group) 546 { 547 return (context >> group) << group; 548 } 549 550 /* return the size of a group */ 551 static inline u32 group_size(u32 group) 552 { 553 return 1 << group; 554 } 555 556 /* 557 * Obtain the credit return addresses, kernel virtual and bus, for the 558 * given sc. 559 * 560 * To understand this routine: 561 * o va and dma are arrays of struct credit_return. One for each physical 562 * send context, per NUMA. 563 * o Each send context always looks in its relative location in a struct 564 * credit_return for its credit return. 565 * o Each send context in a group must have its return address CSR programmed 566 * with the same value. Use the address of the first send context in the 567 * group. 568 */ 569 static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) 570 { 571 u32 gc = group_context(sc->hw_context, sc->group); 572 u32 index = sc->hw_context & 0x7; 573 574 sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; 575 *dma = (unsigned long) 576 &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; 577 } 578 579 /* 580 * Work queue function triggered in error interrupt routine for 581 * kernel contexts. 582 */ 583 static void sc_halted(struct work_struct *work) 584 { 585 struct send_context *sc; 586 587 sc = container_of(work, struct send_context, halt_work); 588 sc_restart(sc); 589 } 590 591 /* 592 * Calculate PIO block threshold for this send context using the given MTU. 593 * Trigger a return when one MTU plus optional header of credits remain. 594 * 595 * Parameter mtu is in bytes. 596 * Parameter hdrqentsize is in DWORDs. 597 * 598 * Return value is what to write into the CSR: trigger return when 599 * unreturned credits pass this count. 600 */ 601 u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize) 602 { 603 u32 release_credits; 604 u32 threshold; 605 606 /* add in the header size, then divide by the PIO block size */ 607 mtu += hdrqentsize << 2; 608 release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE); 609 610 /* check against this context's credits */ 611 if (sc->credits <= release_credits) 612 threshold = 1; 613 else 614 threshold = sc->credits - release_credits; 615 616 return threshold; 617 } 618 619 /* 620 * Calculate credit threshold in terms of percent of the allocated credits. 621 * Trigger when unreturned credits equal or exceed the percentage of the whole. 622 * 623 * Return value is what to write into the CSR: trigger return when 624 * unreturned credits pass this count. 625 */ 626 u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 627 { 628 return (sc->credits * percent) / 100; 629 } 630 631 /* 632 * Set the credit return threshold. 633 */ 634 void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) 635 { 636 unsigned long flags; 637 u32 old_threshold; 638 int force_return = 0; 639 640 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 641 642 old_threshold = (sc->credit_ctrl >> 643 SC(CREDIT_CTRL_THRESHOLD_SHIFT)) 644 & SC(CREDIT_CTRL_THRESHOLD_MASK); 645 646 if (new_threshold != old_threshold) { 647 sc->credit_ctrl = 648 (sc->credit_ctrl 649 & ~SC(CREDIT_CTRL_THRESHOLD_SMASK)) 650 | ((new_threshold 651 & SC(CREDIT_CTRL_THRESHOLD_MASK)) 652 << SC(CREDIT_CTRL_THRESHOLD_SHIFT)); 653 write_kctxt_csr(sc->dd, sc->hw_context, 654 SC(CREDIT_CTRL), sc->credit_ctrl); 655 656 /* force a credit return on change to avoid a possible stall */ 657 force_return = 1; 658 } 659 660 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 661 662 if (force_return) 663 sc_return_credits(sc); 664 } 665 666 /* 667 * set_pio_integrity 668 * 669 * Set the CHECK_ENABLE register for the send context 'sc'. 670 */ 671 void set_pio_integrity(struct send_context *sc) 672 { 673 struct hfi1_devdata *dd = sc->dd; 674 u32 hw_context = sc->hw_context; 675 int type = sc->type; 676 677 write_kctxt_csr(dd, hw_context, 678 SC(CHECK_ENABLE), 679 hfi1_pkt_default_send_ctxt_mask(dd, type)); 680 } 681 682 static u32 get_buffers_allocated(struct send_context *sc) 683 { 684 int cpu; 685 u32 ret = 0; 686 687 for_each_possible_cpu(cpu) 688 ret += *per_cpu_ptr(sc->buffers_allocated, cpu); 689 return ret; 690 } 691 692 static void reset_buffers_allocated(struct send_context *sc) 693 { 694 int cpu; 695 696 for_each_possible_cpu(cpu) 697 (*per_cpu_ptr(sc->buffers_allocated, cpu)) = 0; 698 } 699 700 /* 701 * Allocate a NUMA relative send context structure of the given type along 702 * with a HW context. 703 */ 704 struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, 705 uint hdrqentsize, int numa) 706 { 707 struct send_context_info *sci; 708 struct send_context *sc = NULL; 709 dma_addr_t dma; 710 unsigned long flags; 711 u64 reg; 712 u32 thresh; 713 u32 sw_index; 714 u32 hw_context; 715 int ret; 716 u8 opval, opmask; 717 718 /* do not allocate while frozen */ 719 if (dd->flags & HFI1_FROZEN) 720 return NULL; 721 722 sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa); 723 if (!sc) 724 return NULL; 725 726 sc->buffers_allocated = alloc_percpu(u32); 727 if (!sc->buffers_allocated) { 728 kfree(sc); 729 dd_dev_err(dd, 730 "Cannot allocate buffers_allocated per cpu counters\n" 731 ); 732 return NULL; 733 } 734 735 spin_lock_irqsave(&dd->sc_lock, flags); 736 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); 737 if (ret) { 738 spin_unlock_irqrestore(&dd->sc_lock, flags); 739 free_percpu(sc->buffers_allocated); 740 kfree(sc); 741 return NULL; 742 } 743 744 sci = &dd->send_contexts[sw_index]; 745 sci->sc = sc; 746 747 sc->dd = dd; 748 sc->node = numa; 749 sc->type = type; 750 spin_lock_init(&sc->alloc_lock); 751 spin_lock_init(&sc->release_lock); 752 spin_lock_init(&sc->credit_ctrl_lock); 753 INIT_LIST_HEAD(&sc->piowait); 754 INIT_WORK(&sc->halt_work, sc_halted); 755 init_waitqueue_head(&sc->halt_wait); 756 757 /* grouping is always single context for now */ 758 sc->group = 0; 759 760 sc->sw_index = sw_index; 761 sc->hw_context = hw_context; 762 cr_group_addresses(sc, &dma); 763 sc->credits = sci->credits; 764 sc->size = sc->credits * PIO_BLOCK_SIZE; 765 766 /* PIO Send Memory Address details */ 767 #define PIO_ADDR_CONTEXT_MASK 0xfful 768 #define PIO_ADDR_CONTEXT_SHIFT 16 769 sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK) 770 << PIO_ADDR_CONTEXT_SHIFT); 771 772 /* set base and credits */ 773 reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK)) 774 << SC(CTRL_CTXT_DEPTH_SHIFT)) 775 | ((sci->base & SC(CTRL_CTXT_BASE_MASK)) 776 << SC(CTRL_CTXT_BASE_SHIFT)); 777 write_kctxt_csr(dd, hw_context, SC(CTRL), reg); 778 779 set_pio_integrity(sc); 780 781 /* unmask all errors */ 782 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1); 783 784 /* set the default partition key */ 785 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 786 (SC(CHECK_PARTITION_KEY_VALUE_MASK) & 787 DEFAULT_PKEY) << 788 SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); 789 790 /* per context type checks */ 791 if (type == SC_USER) { 792 opval = USER_OPCODE_CHECK_VAL; 793 opmask = USER_OPCODE_CHECK_MASK; 794 } else { 795 opval = OPCODE_CHECK_VAL_DISABLED; 796 opmask = OPCODE_CHECK_MASK_DISABLED; 797 } 798 799 /* set the send context check opcode mask and value */ 800 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 801 ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) | 802 ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); 803 804 /* set up credit return */ 805 reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); 806 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); 807 808 /* 809 * Calculate the initial credit return threshold. 810 * 811 * For Ack contexts, set a threshold for half the credits. 812 * For User contexts use the given percentage. This has been 813 * sanitized on driver start-up. 814 * For Kernel contexts, use the default MTU plus a header 815 * or half the credits, whichever is smaller. This should 816 * work for both the 3-deep buffering allocation and the 817 * pooling allocation. 818 */ 819 if (type == SC_ACK) { 820 thresh = sc_percent_to_threshold(sc, 50); 821 } else if (type == SC_USER) { 822 thresh = sc_percent_to_threshold(sc, 823 user_credit_return_threshold); 824 } else { /* kernel */ 825 thresh = min(sc_percent_to_threshold(sc, 50), 826 sc_mtu_to_threshold(sc, hfi1_max_mtu, 827 hdrqentsize)); 828 } 829 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); 830 /* add in early return */ 831 if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN)) 832 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 833 else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN)) /* kernel, ack */ 834 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 835 836 /* set up write-through credit_ctrl */ 837 sc->credit_ctrl = reg; 838 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg); 839 840 /* User send contexts should not allow sending on VL15 */ 841 if (type == SC_USER) { 842 reg = 1ULL << 15; 843 write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg); 844 } 845 846 spin_unlock_irqrestore(&dd->sc_lock, flags); 847 848 /* 849 * Allocate shadow ring to track outstanding PIO buffers _after_ 850 * unlocking. We don't know the size until the lock is held and 851 * we can't allocate while the lock is held. No one is using 852 * the context yet, so allocate it now. 853 * 854 * User contexts do not get a shadow ring. 855 */ 856 if (type != SC_USER) { 857 /* 858 * Size the shadow ring 1 larger than the number of credits 859 * so head == tail can mean empty. 860 */ 861 sc->sr_size = sci->credits + 1; 862 sc->sr = kcalloc_node(sc->sr_size, 863 sizeof(union pio_shadow_ring), 864 GFP_KERNEL, numa); 865 if (!sc->sr) { 866 sc_free(sc); 867 return NULL; 868 } 869 } 870 871 hfi1_cdbg(PIO, 872 "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n", 873 sw_index, 874 hw_context, 875 sc_type_name(type), 876 sc->group, 877 sc->credits, 878 sc->credit_ctrl, 879 thresh); 880 881 return sc; 882 } 883 884 /* free a per-NUMA send context structure */ 885 void sc_free(struct send_context *sc) 886 { 887 struct hfi1_devdata *dd; 888 unsigned long flags; 889 u32 sw_index; 890 u32 hw_context; 891 892 if (!sc) 893 return; 894 895 sc->flags |= SCF_IN_FREE; /* ensure no restarts */ 896 dd = sc->dd; 897 if (!list_empty(&sc->piowait)) 898 dd_dev_err(dd, "piowait list not empty!\n"); 899 sw_index = sc->sw_index; 900 hw_context = sc->hw_context; 901 sc_disable(sc); /* make sure the HW is disabled */ 902 flush_work(&sc->halt_work); 903 904 spin_lock_irqsave(&dd->sc_lock, flags); 905 dd->send_contexts[sw_index].sc = NULL; 906 907 /* clear/disable all registers set in sc_alloc */ 908 write_kctxt_csr(dd, hw_context, SC(CTRL), 0); 909 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0); 910 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0); 911 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0); 912 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0); 913 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0); 914 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0); 915 916 /* release the index and context for re-use */ 917 sc_hw_free(dd, sw_index, hw_context); 918 spin_unlock_irqrestore(&dd->sc_lock, flags); 919 920 kfree(sc->sr); 921 free_percpu(sc->buffers_allocated); 922 kfree(sc); 923 } 924 925 /* disable the context */ 926 void sc_disable(struct send_context *sc) 927 { 928 u64 reg; 929 struct pio_buf *pbuf; 930 931 if (!sc) 932 return; 933 934 /* do all steps, even if already disabled */ 935 spin_lock_irq(&sc->alloc_lock); 936 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL)); 937 reg &= ~SC(CTRL_CTXT_ENABLE_SMASK); 938 sc->flags &= ~SCF_ENABLED; 939 sc_wait_for_packet_egress(sc, 1); 940 write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg); 941 942 /* 943 * Flush any waiters. Once the context is disabled, 944 * credit return interrupts are stopped (although there 945 * could be one in-process when the context is disabled). 946 * Wait one microsecond for any lingering interrupts, then 947 * proceed with the flush. 948 */ 949 udelay(1); 950 spin_lock(&sc->release_lock); 951 if (sc->sr) { /* this context has a shadow ring */ 952 while (sc->sr_tail != sc->sr_head) { 953 pbuf = &sc->sr[sc->sr_tail].pbuf; 954 if (pbuf->cb) 955 (*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE); 956 sc->sr_tail++; 957 if (sc->sr_tail >= sc->sr_size) 958 sc->sr_tail = 0; 959 } 960 } 961 spin_unlock(&sc->release_lock); 962 spin_unlock_irq(&sc->alloc_lock); 963 } 964 965 /* return SendEgressCtxtStatus.PacketOccupancy */ 966 static u64 packet_occupancy(u64 reg) 967 { 968 return (reg & 969 SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) 970 >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; 971 } 972 973 /* is egress halted on the context? */ 974 static bool egress_halted(u64 reg) 975 { 976 return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); 977 } 978 979 /* is the send context halted? */ 980 static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) 981 { 982 return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & 983 SC(STATUS_CTXT_HALTED_SMASK)); 984 } 985 986 /** 987 * sc_wait_for_packet_egress 988 * @sc: valid send context 989 * @pause: wait for credit return 990 * 991 * Wait for packet egress, optionally pause for credit return 992 * 993 * Egress halt and Context halt are not necessarily the same thing, so 994 * check for both. 995 * 996 * NOTE: The context halt bit may not be set immediately. Because of this, 997 * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW 998 * context bit to determine if the context is halted. 999 */ 1000 static void sc_wait_for_packet_egress(struct send_context *sc, int pause) 1001 { 1002 struct hfi1_devdata *dd = sc->dd; 1003 u64 reg = 0; 1004 u64 reg_prev; 1005 u32 loop = 0; 1006 1007 while (1) { 1008 reg_prev = reg; 1009 reg = read_csr(dd, sc->hw_context * 8 + 1010 SEND_EGRESS_CTXT_STATUS); 1011 /* done if any halt bits, SW or HW are set */ 1012 if (sc->flags & SCF_HALTED || 1013 is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) 1014 break; 1015 reg = packet_occupancy(reg); 1016 if (reg == 0) 1017 break; 1018 /* counter is reset if occupancy count changes */ 1019 if (reg != reg_prev) 1020 loop = 0; 1021 if (loop > 50000) { 1022 /* timed out - bounce the link */ 1023 dd_dev_err(dd, 1024 "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", 1025 __func__, sc->sw_index, 1026 sc->hw_context, (u32)reg); 1027 queue_work(dd->pport->link_wq, 1028 &dd->pport->link_bounce_work); 1029 break; 1030 } 1031 loop++; 1032 udelay(1); 1033 } 1034 1035 if (pause) 1036 /* Add additional delay to ensure chip returns all credits */ 1037 pause_for_credit_return(dd); 1038 } 1039 1040 void sc_wait(struct hfi1_devdata *dd) 1041 { 1042 int i; 1043 1044 for (i = 0; i < dd->num_send_contexts; i++) { 1045 struct send_context *sc = dd->send_contexts[i].sc; 1046 1047 if (!sc) 1048 continue; 1049 sc_wait_for_packet_egress(sc, 0); 1050 } 1051 } 1052 1053 /* 1054 * Restart a context after it has been halted due to error. 1055 * 1056 * If the first step fails - wait for the halt to be asserted, return early. 1057 * Otherwise complain about timeouts but keep going. 1058 * 1059 * It is expected that allocations (enabled flag bit) have been shut off 1060 * already (only applies to kernel contexts). 1061 */ 1062 int sc_restart(struct send_context *sc) 1063 { 1064 struct hfi1_devdata *dd = sc->dd; 1065 u64 reg; 1066 u32 loop; 1067 int count; 1068 1069 /* bounce off if not halted, or being free'd */ 1070 if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE)) 1071 return -EINVAL; 1072 1073 dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index, 1074 sc->hw_context); 1075 1076 /* 1077 * Step 1: Wait for the context to actually halt. 1078 * 1079 * The error interrupt is asynchronous to actually setting halt 1080 * on the context. 1081 */ 1082 loop = 0; 1083 while (1) { 1084 reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS)); 1085 if (reg & SC(STATUS_CTXT_HALTED_SMASK)) 1086 break; 1087 if (loop > 100) { 1088 dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n", 1089 __func__, sc->sw_index, sc->hw_context); 1090 return -ETIME; 1091 } 1092 loop++; 1093 udelay(1); 1094 } 1095 1096 /* 1097 * Step 2: Ensure no users are still trying to write to PIO. 1098 * 1099 * For kernel contexts, we have already turned off buffer allocation. 1100 * Now wait for the buffer count to go to zero. 1101 * 1102 * For user contexts, the user handling code has cut off write access 1103 * to the context's PIO pages before calling this routine and will 1104 * restore write access after this routine returns. 1105 */ 1106 if (sc->type != SC_USER) { 1107 /* kernel context */ 1108 loop = 0; 1109 while (1) { 1110 count = get_buffers_allocated(sc); 1111 if (count == 0) 1112 break; 1113 if (loop > 100) { 1114 dd_dev_err(dd, 1115 "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n", 1116 __func__, sc->sw_index, 1117 sc->hw_context, count); 1118 } 1119 loop++; 1120 udelay(1); 1121 } 1122 } 1123 1124 /* 1125 * Step 3: Wait for all packets to egress. 1126 * This is done while disabling the send context 1127 * 1128 * Step 4: Disable the context 1129 * 1130 * This is a superset of the halt. After the disable, the 1131 * errors can be cleared. 1132 */ 1133 sc_disable(sc); 1134 1135 /* 1136 * Step 5: Enable the context 1137 * 1138 * This enable will clear the halted flag and per-send context 1139 * error flags. 1140 */ 1141 return sc_enable(sc); 1142 } 1143 1144 /* 1145 * PIO freeze processing. To be called after the TXE block is fully frozen. 1146 * Go through all frozen send contexts and disable them. The contexts are 1147 * already stopped by the freeze. 1148 */ 1149 void pio_freeze(struct hfi1_devdata *dd) 1150 { 1151 struct send_context *sc; 1152 int i; 1153 1154 for (i = 0; i < dd->num_send_contexts; i++) { 1155 sc = dd->send_contexts[i].sc; 1156 /* 1157 * Don't disable unallocated, unfrozen, or user send contexts. 1158 * User send contexts will be disabled when the process 1159 * calls into the driver to reset its context. 1160 */ 1161 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1162 continue; 1163 1164 /* only need to disable, the context is already stopped */ 1165 sc_disable(sc); 1166 } 1167 } 1168 1169 /* 1170 * Unfreeze PIO for kernel send contexts. The precondition for calling this 1171 * is that all PIO send contexts have been disabled and the SPC freeze has 1172 * been cleared. Now perform the last step and re-enable each kernel context. 1173 * User (PSM) processing will occur when PSM calls into the kernel to 1174 * acknowledge the freeze. 1175 */ 1176 void pio_kernel_unfreeze(struct hfi1_devdata *dd) 1177 { 1178 struct send_context *sc; 1179 int i; 1180 1181 for (i = 0; i < dd->num_send_contexts; i++) { 1182 sc = dd->send_contexts[i].sc; 1183 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1184 continue; 1185 if (sc->flags & SCF_LINK_DOWN) 1186 continue; 1187 1188 sc_enable(sc); /* will clear the sc frozen flag */ 1189 } 1190 } 1191 1192 /** 1193 * pio_kernel_linkup() - Re-enable send contexts after linkup event 1194 * @dd: valid devive data 1195 * 1196 * When the link goes down, the freeze path is taken. However, a link down 1197 * event is different from a freeze because if the send context is re-enabled 1198 * whowever is sending data will start sending data again, which will hang 1199 * any QP that is sending data. 1200 * 1201 * The freeze path now looks at the type of event that occurs and takes this 1202 * path for link down event. 1203 */ 1204 void pio_kernel_linkup(struct hfi1_devdata *dd) 1205 { 1206 struct send_context *sc; 1207 int i; 1208 1209 for (i = 0; i < dd->num_send_contexts; i++) { 1210 sc = dd->send_contexts[i].sc; 1211 if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER) 1212 continue; 1213 1214 sc_enable(sc); /* will clear the sc link down flag */ 1215 } 1216 } 1217 1218 /* 1219 * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear. 1220 * Returns: 1221 * -ETIMEDOUT - if we wait too long 1222 * -EIO - if there was an error 1223 */ 1224 static int pio_init_wait_progress(struct hfi1_devdata *dd) 1225 { 1226 u64 reg; 1227 int max, count = 0; 1228 1229 /* max is the longest possible HW init time / delay */ 1230 max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5; 1231 while (1) { 1232 reg = read_csr(dd, SEND_PIO_INIT_CTXT); 1233 if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK)) 1234 break; 1235 if (count >= max) 1236 return -ETIMEDOUT; 1237 udelay(5); 1238 count++; 1239 } 1240 1241 return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0; 1242 } 1243 1244 /* 1245 * Reset all of the send contexts to their power-on state. Used 1246 * only during manual init - no lock against sc_enable needed. 1247 */ 1248 void pio_reset_all(struct hfi1_devdata *dd) 1249 { 1250 int ret; 1251 1252 /* make sure the init engine is not busy */ 1253 ret = pio_init_wait_progress(dd); 1254 /* ignore any timeout */ 1255 if (ret == -EIO) { 1256 /* clear the error */ 1257 write_csr(dd, SEND_PIO_ERR_CLEAR, 1258 SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK); 1259 } 1260 1261 /* reset init all */ 1262 write_csr(dd, SEND_PIO_INIT_CTXT, 1263 SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK); 1264 udelay(2); 1265 ret = pio_init_wait_progress(dd); 1266 if (ret < 0) { 1267 dd_dev_err(dd, 1268 "PIO send context init %s while initializing all PIO blocks\n", 1269 ret == -ETIMEDOUT ? "is stuck" : "had an error"); 1270 } 1271 } 1272 1273 /* enable the context */ 1274 int sc_enable(struct send_context *sc) 1275 { 1276 u64 sc_ctrl, reg, pio; 1277 struct hfi1_devdata *dd; 1278 unsigned long flags; 1279 int ret = 0; 1280 1281 if (!sc) 1282 return -EINVAL; 1283 dd = sc->dd; 1284 1285 /* 1286 * Obtain the allocator lock to guard against any allocation 1287 * attempts (which should not happen prior to context being 1288 * enabled). On the release/disable side we don't need to 1289 * worry about locking since the releaser will not do anything 1290 * if the context accounting values have not changed. 1291 */ 1292 spin_lock_irqsave(&sc->alloc_lock, flags); 1293 sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1294 if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK))) 1295 goto unlock; /* already enabled */ 1296 1297 /* IMPORTANT: only clear free and fill if transitioning 0 -> 1 */ 1298 1299 *sc->hw_free = 0; 1300 sc->free = 0; 1301 sc->alloc_free = 0; 1302 sc->fill = 0; 1303 sc->fill_wrap = 0; 1304 sc->sr_head = 0; 1305 sc->sr_tail = 0; 1306 sc->flags = 0; 1307 /* the alloc lock insures no fast path allocation */ 1308 reset_buffers_allocated(sc); 1309 1310 /* 1311 * Clear all per-context errors. Some of these will be set when 1312 * we are re-enabling after a context halt. Now that the context 1313 * is disabled, the halt will not clear until after the PIO init 1314 * engine runs below. 1315 */ 1316 reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS)); 1317 if (reg) 1318 write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg); 1319 1320 /* 1321 * The HW PIO initialization engine can handle only one init 1322 * request at a time. Serialize access to each device's engine. 1323 */ 1324 spin_lock(&dd->sc_init_lock); 1325 /* 1326 * Since access to this code block is serialized and 1327 * each access waits for the initialization to complete 1328 * before releasing the lock, the PIO initialization engine 1329 * should not be in use, so we don't have to wait for the 1330 * InProgress bit to go down. 1331 */ 1332 pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) << 1333 SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) | 1334 SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK; 1335 write_csr(dd, SEND_PIO_INIT_CTXT, pio); 1336 /* 1337 * Wait until the engine is done. Give the chip the required time 1338 * so, hopefully, we read the register just once. 1339 */ 1340 udelay(2); 1341 ret = pio_init_wait_progress(dd); 1342 spin_unlock(&dd->sc_init_lock); 1343 if (ret) { 1344 dd_dev_err(dd, 1345 "sctxt%u(%u): Context not enabled due to init failure %d\n", 1346 sc->sw_index, sc->hw_context, ret); 1347 goto unlock; 1348 } 1349 1350 /* 1351 * All is well. Enable the context. 1352 */ 1353 sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK); 1354 write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl); 1355 /* 1356 * Read SendCtxtCtrl to force the write out and prevent a timing 1357 * hazard where a PIO write may reach the context before the enable. 1358 */ 1359 read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1360 sc->flags |= SCF_ENABLED; 1361 1362 unlock: 1363 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1364 1365 return ret; 1366 } 1367 1368 /* force a credit return on the context */ 1369 void sc_return_credits(struct send_context *sc) 1370 { 1371 if (!sc) 1372 return; 1373 1374 /* a 0->1 transition schedules a credit return */ 1375 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 1376 SC(CREDIT_FORCE_FORCE_RETURN_SMASK)); 1377 /* 1378 * Ensure that the write is flushed and the credit return is 1379 * scheduled. We care more about the 0 -> 1 transition. 1380 */ 1381 read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE)); 1382 /* set back to 0 for next time */ 1383 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0); 1384 } 1385 1386 /* allow all in-flight packets to drain on the context */ 1387 void sc_flush(struct send_context *sc) 1388 { 1389 if (!sc) 1390 return; 1391 1392 sc_wait_for_packet_egress(sc, 1); 1393 } 1394 1395 /* drop all packets on the context, no waiting until they are sent */ 1396 void sc_drop(struct send_context *sc) 1397 { 1398 if (!sc) 1399 return; 1400 1401 dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n", 1402 __func__, sc->sw_index, sc->hw_context); 1403 } 1404 1405 /* 1406 * Start the software reaction to a context halt or SPC freeze: 1407 * - mark the context as halted or frozen 1408 * - stop buffer allocations 1409 * 1410 * Called from the error interrupt. Other work is deferred until 1411 * out of the interrupt. 1412 */ 1413 void sc_stop(struct send_context *sc, int flag) 1414 { 1415 unsigned long flags; 1416 1417 /* stop buffer allocations */ 1418 spin_lock_irqsave(&sc->alloc_lock, flags); 1419 /* mark the context */ 1420 sc->flags |= flag; 1421 sc->flags &= ~SCF_ENABLED; 1422 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1423 wake_up(&sc->halt_wait); 1424 } 1425 1426 #define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32)) 1427 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS) 1428 1429 /* 1430 * The send context buffer "allocator". 1431 * 1432 * @sc: the PIO send context we are allocating from 1433 * @len: length of whole packet - including PBC - in dwords 1434 * @cb: optional callback to call when the buffer is finished sending 1435 * @arg: argument for cb 1436 * 1437 * Return a pointer to a PIO buffer if successful, NULL if not enough room. 1438 */ 1439 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, 1440 pio_release_cb cb, void *arg) 1441 { 1442 struct pio_buf *pbuf = NULL; 1443 unsigned long flags; 1444 unsigned long avail; 1445 unsigned long blocks = dwords_to_blocks(dw_len); 1446 u32 fill_wrap; 1447 int trycount = 0; 1448 u32 head, next; 1449 1450 spin_lock_irqsave(&sc->alloc_lock, flags); 1451 if (!(sc->flags & SCF_ENABLED)) { 1452 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1453 goto done; 1454 } 1455 1456 retry: 1457 avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free); 1458 if (blocks > avail) { 1459 /* not enough room */ 1460 if (unlikely(trycount)) { /* already tried to get more room */ 1461 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1462 goto done; 1463 } 1464 /* copy from receiver cache line and recalculate */ 1465 sc->alloc_free = READ_ONCE(sc->free); 1466 avail = 1467 (unsigned long)sc->credits - 1468 (sc->fill - sc->alloc_free); 1469 if (blocks > avail) { 1470 /* still no room, actively update */ 1471 sc_release_update(sc); 1472 sc->alloc_free = READ_ONCE(sc->free); 1473 trycount++; 1474 goto retry; 1475 } 1476 } 1477 1478 /* there is enough room */ 1479 1480 preempt_disable(); 1481 this_cpu_inc(*sc->buffers_allocated); 1482 1483 /* read this once */ 1484 head = sc->sr_head; 1485 1486 /* "allocate" the buffer */ 1487 sc->fill += blocks; 1488 fill_wrap = sc->fill_wrap; 1489 sc->fill_wrap += blocks; 1490 if (sc->fill_wrap >= sc->credits) 1491 sc->fill_wrap = sc->fill_wrap - sc->credits; 1492 1493 /* 1494 * Fill the parts that the releaser looks at before moving the head. 1495 * The only necessary piece is the sent_at field. The credits 1496 * we have just allocated cannot have been returned yet, so the 1497 * cb and arg will not be looked at for a "while". Put them 1498 * on this side of the memory barrier anyway. 1499 */ 1500 pbuf = &sc->sr[head].pbuf; 1501 pbuf->sent_at = sc->fill; 1502 pbuf->cb = cb; 1503 pbuf->arg = arg; 1504 pbuf->sc = sc; /* could be filled in at sc->sr init time */ 1505 /* make sure this is in memory before updating the head */ 1506 1507 /* calculate next head index, do not store */ 1508 next = head + 1; 1509 if (next >= sc->sr_size) 1510 next = 0; 1511 /* 1512 * update the head - must be last! - the releaser can look at fields 1513 * in pbuf once we move the head 1514 */ 1515 smp_wmb(); 1516 sc->sr_head = next; 1517 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1518 1519 /* finish filling in the buffer outside the lock */ 1520 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; 1521 pbuf->end = sc->base_addr + sc->size; 1522 pbuf->qw_written = 0; 1523 pbuf->carry_bytes = 0; 1524 pbuf->carry.val64 = 0; 1525 done: 1526 return pbuf; 1527 } 1528 1529 /* 1530 * There are at least two entities that can turn on credit return 1531 * interrupts and they can overlap. Avoid problems by implementing 1532 * a count scheme that is enforced by a lock. The lock is needed because 1533 * the count and CSR write must be paired. 1534 */ 1535 1536 /* 1537 * Start credit return interrupts. This is managed by a count. If already 1538 * on, just increment the count. 1539 */ 1540 void sc_add_credit_return_intr(struct send_context *sc) 1541 { 1542 unsigned long flags; 1543 1544 /* lock must surround both the count change and the CSR update */ 1545 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1546 if (sc->credit_intr_count == 0) { 1547 sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1548 write_kctxt_csr(sc->dd, sc->hw_context, 1549 SC(CREDIT_CTRL), sc->credit_ctrl); 1550 } 1551 sc->credit_intr_count++; 1552 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1553 } 1554 1555 /* 1556 * Stop credit return interrupts. This is managed by a count. Decrement the 1557 * count, if the last user, then turn the credit interrupts off. 1558 */ 1559 void sc_del_credit_return_intr(struct send_context *sc) 1560 { 1561 unsigned long flags; 1562 1563 WARN_ON(sc->credit_intr_count == 0); 1564 1565 /* lock must surround both the count change and the CSR update */ 1566 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1567 sc->credit_intr_count--; 1568 if (sc->credit_intr_count == 0) { 1569 sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1570 write_kctxt_csr(sc->dd, sc->hw_context, 1571 SC(CREDIT_CTRL), sc->credit_ctrl); 1572 } 1573 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1574 } 1575 1576 /* 1577 * The caller must be careful when calling this. All needint calls 1578 * must be paired with !needint. 1579 */ 1580 void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) 1581 { 1582 if (needint) 1583 sc_add_credit_return_intr(sc); 1584 else 1585 sc_del_credit_return_intr(sc); 1586 trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); 1587 if (needint) { 1588 mmiowb(); 1589 sc_return_credits(sc); 1590 } 1591 } 1592 1593 /** 1594 * sc_piobufavail - callback when a PIO buffer is available 1595 * @sc: the send context 1596 * 1597 * This is called from the interrupt handler when a PIO buffer is 1598 * available after hfi1_verbs_send() returned an error that no buffers were 1599 * available. Disable the interrupt if there are no more QPs waiting. 1600 */ 1601 static void sc_piobufavail(struct send_context *sc) 1602 { 1603 struct hfi1_devdata *dd = sc->dd; 1604 struct hfi1_ibdev *dev = &dd->verbs_dev; 1605 struct list_head *list; 1606 struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; 1607 struct rvt_qp *qp; 1608 struct hfi1_qp_priv *priv; 1609 unsigned long flags; 1610 uint i, n = 0, max_idx = 0; 1611 u8 max_starved_cnt = 0; 1612 1613 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && 1614 dd->send_contexts[sc->sw_index].type != SC_VL15) 1615 return; 1616 list = &sc->piowait; 1617 /* 1618 * Note: checking that the piowait list is empty and clearing 1619 * the buffer available interrupt needs to be atomic or we 1620 * could end up with QPs on the wait list with the interrupt 1621 * disabled. 1622 */ 1623 write_seqlock_irqsave(&dev->iowait_lock, flags); 1624 while (!list_empty(list)) { 1625 struct iowait *wait; 1626 1627 if (n == ARRAY_SIZE(qps)) 1628 break; 1629 wait = list_first_entry(list, struct iowait, list); 1630 qp = iowait_to_qp(wait); 1631 priv = qp->priv; 1632 list_del_init(&priv->s_iowait.list); 1633 priv->s_iowait.lock = NULL; 1634 iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx); 1635 /* refcount held until actual wake up */ 1636 qps[n++] = qp; 1637 } 1638 /* 1639 * If there had been waiters and there are more 1640 * insure that we redo the force to avoid a potential hang. 1641 */ 1642 if (n) { 1643 hfi1_sc_wantpiobuf_intr(sc, 0); 1644 if (!list_empty(list)) 1645 hfi1_sc_wantpiobuf_intr(sc, 1); 1646 } 1647 write_sequnlock_irqrestore(&dev->iowait_lock, flags); 1648 1649 /* Wake up the most starved one first */ 1650 if (n) 1651 hfi1_qp_wakeup(qps[max_idx], 1652 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1653 for (i = 0; i < n; i++) 1654 if (i != max_idx) 1655 hfi1_qp_wakeup(qps[i], 1656 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1657 } 1658 1659 /* translate a send credit update to a bit code of reasons */ 1660 static inline int fill_code(u64 hw_free) 1661 { 1662 int code = 0; 1663 1664 if (hw_free & CR_STATUS_SMASK) 1665 code |= PRC_STATUS_ERR; 1666 if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK) 1667 code |= PRC_PBC; 1668 if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK) 1669 code |= PRC_THRESHOLD; 1670 if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK) 1671 code |= PRC_FILL_ERR; 1672 if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK) 1673 code |= PRC_SC_DISABLE; 1674 return code; 1675 } 1676 1677 /* use the jiffies compare to get the wrap right */ 1678 #define sent_before(a, b) time_before(a, b) /* a < b */ 1679 1680 /* 1681 * The send context buffer "releaser". 1682 */ 1683 void sc_release_update(struct send_context *sc) 1684 { 1685 struct pio_buf *pbuf; 1686 u64 hw_free; 1687 u32 head, tail; 1688 unsigned long old_free; 1689 unsigned long free; 1690 unsigned long extra; 1691 unsigned long flags; 1692 int code; 1693 1694 if (!sc) 1695 return; 1696 1697 spin_lock_irqsave(&sc->release_lock, flags); 1698 /* update free */ 1699 hw_free = le64_to_cpu(*sc->hw_free); /* volatile read */ 1700 old_free = sc->free; 1701 extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT) 1702 - (old_free & CR_COUNTER_MASK)) 1703 & CR_COUNTER_MASK; 1704 free = old_free + extra; 1705 trace_hfi1_piofree(sc, extra); 1706 1707 /* call sent buffer callbacks */ 1708 code = -1; /* code not yet set */ 1709 head = READ_ONCE(sc->sr_head); /* snapshot the head */ 1710 tail = sc->sr_tail; 1711 while (head != tail) { 1712 pbuf = &sc->sr[tail].pbuf; 1713 1714 if (sent_before(free, pbuf->sent_at)) { 1715 /* not sent yet */ 1716 break; 1717 } 1718 if (pbuf->cb) { 1719 if (code < 0) /* fill in code on first user */ 1720 code = fill_code(hw_free); 1721 (*pbuf->cb)(pbuf->arg, code); 1722 } 1723 1724 tail++; 1725 if (tail >= sc->sr_size) 1726 tail = 0; 1727 } 1728 sc->sr_tail = tail; 1729 /* make sure tail is updated before free */ 1730 smp_wmb(); 1731 sc->free = free; 1732 spin_unlock_irqrestore(&sc->release_lock, flags); 1733 sc_piobufavail(sc); 1734 } 1735 1736 /* 1737 * Send context group releaser. Argument is the send context that caused 1738 * the interrupt. Called from the send context interrupt handler. 1739 * 1740 * Call release on all contexts in the group. 1741 * 1742 * This routine takes the sc_lock without an irqsave because it is only 1743 * called from an interrupt handler. Adjust if that changes. 1744 */ 1745 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context) 1746 { 1747 struct send_context *sc; 1748 u32 sw_index; 1749 u32 gc, gc_end; 1750 1751 spin_lock(&dd->sc_lock); 1752 sw_index = dd->hw_to_sw[hw_context]; 1753 if (unlikely(sw_index >= dd->num_send_contexts)) { 1754 dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n", 1755 __func__, hw_context, sw_index); 1756 goto done; 1757 } 1758 sc = dd->send_contexts[sw_index].sc; 1759 if (unlikely(!sc)) 1760 goto done; 1761 1762 gc = group_context(hw_context, sc->group); 1763 gc_end = gc + group_size(sc->group); 1764 for (; gc < gc_end; gc++) { 1765 sw_index = dd->hw_to_sw[gc]; 1766 if (unlikely(sw_index >= dd->num_send_contexts)) { 1767 dd_dev_err(dd, 1768 "%s: invalid hw (%u) to sw (%u) mapping\n", 1769 __func__, hw_context, sw_index); 1770 continue; 1771 } 1772 sc_release_update(dd->send_contexts[sw_index].sc); 1773 } 1774 done: 1775 spin_unlock(&dd->sc_lock); 1776 } 1777 1778 /* 1779 * pio_select_send_context_vl() - select send context 1780 * @dd: devdata 1781 * @selector: a spreading factor 1782 * @vl: this vl 1783 * 1784 * This function returns a send context based on the selector and a vl. 1785 * The mapping fields are protected by RCU 1786 */ 1787 struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd, 1788 u32 selector, u8 vl) 1789 { 1790 struct pio_vl_map *m; 1791 struct pio_map_elem *e; 1792 struct send_context *rval; 1793 1794 /* 1795 * NOTE This should only happen if SC->VL changed after the initial 1796 * checks on the QP/AH 1797 * Default will return VL0's send context below 1798 */ 1799 if (unlikely(vl >= num_vls)) { 1800 rval = NULL; 1801 goto done; 1802 } 1803 1804 rcu_read_lock(); 1805 m = rcu_dereference(dd->pio_map); 1806 if (unlikely(!m)) { 1807 rcu_read_unlock(); 1808 return dd->vld[0].sc; 1809 } 1810 e = m->map[vl & m->mask]; 1811 rval = e->ksc[selector & e->mask]; 1812 rcu_read_unlock(); 1813 1814 done: 1815 rval = !rval ? dd->vld[0].sc : rval; 1816 return rval; 1817 } 1818 1819 /* 1820 * pio_select_send_context_sc() - select send context 1821 * @dd: devdata 1822 * @selector: a spreading factor 1823 * @sc5: the 5 bit sc 1824 * 1825 * This function returns an send context based on the selector and an sc 1826 */ 1827 struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd, 1828 u32 selector, u8 sc5) 1829 { 1830 u8 vl = sc_to_vlt(dd, sc5); 1831 1832 return pio_select_send_context_vl(dd, selector, vl); 1833 } 1834 1835 /* 1836 * Free the indicated map struct 1837 */ 1838 static void pio_map_free(struct pio_vl_map *m) 1839 { 1840 int i; 1841 1842 for (i = 0; m && i < m->actual_vls; i++) 1843 kfree(m->map[i]); 1844 kfree(m); 1845 } 1846 1847 /* 1848 * Handle RCU callback 1849 */ 1850 static void pio_map_rcu_callback(struct rcu_head *list) 1851 { 1852 struct pio_vl_map *m = container_of(list, struct pio_vl_map, list); 1853 1854 pio_map_free(m); 1855 } 1856 1857 /* 1858 * Set credit return threshold for the kernel send context 1859 */ 1860 static void set_threshold(struct hfi1_devdata *dd, int scontext, int i) 1861 { 1862 u32 thres; 1863 1864 thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext], 1865 50), 1866 sc_mtu_to_threshold(dd->kernel_send_context[scontext], 1867 dd->vld[i].mtu, 1868 dd->rcd[0]->rcvhdrqentsize)); 1869 sc_set_cr_threshold(dd->kernel_send_context[scontext], thres); 1870 } 1871 1872 /* 1873 * pio_map_init - called when #vls change 1874 * @dd: hfi1_devdata 1875 * @port: port number 1876 * @num_vls: number of vls 1877 * @vl_scontexts: per vl send context mapping (optional) 1878 * 1879 * This routine changes the mapping based on the number of vls. 1880 * 1881 * vl_scontexts is used to specify a non-uniform vl/send context 1882 * loading. NULL implies auto computing the loading and giving each 1883 * VL an uniform distribution of send contexts per VL. 1884 * 1885 * The auto algorithm computers the sc_per_vl and the number of extra 1886 * send contexts. Any extra send contexts are added from the last VL 1887 * on down 1888 * 1889 * rcu locking is used here to control access to the mapping fields. 1890 * 1891 * If either the num_vls or num_send_contexts are non-power of 2, the 1892 * array sizes in the struct pio_vl_map and the struct pio_map_elem are 1893 * rounded up to the next highest power of 2 and the first entry is 1894 * reused in a round robin fashion. 1895 * 1896 * If an error occurs the map change is not done and the mapping is not 1897 * chaged. 1898 * 1899 */ 1900 int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) 1901 { 1902 int i, j; 1903 int extra, sc_per_vl; 1904 int scontext = 1; 1905 int num_kernel_send_contexts = 0; 1906 u8 lvl_scontexts[OPA_MAX_VLS]; 1907 struct pio_vl_map *oldmap, *newmap; 1908 1909 if (!vl_scontexts) { 1910 for (i = 0; i < dd->num_send_contexts; i++) 1911 if (dd->send_contexts[i].type == SC_KERNEL) 1912 num_kernel_send_contexts++; 1913 /* truncate divide */ 1914 sc_per_vl = num_kernel_send_contexts / num_vls; 1915 /* extras */ 1916 extra = num_kernel_send_contexts % num_vls; 1917 vl_scontexts = lvl_scontexts; 1918 /* add extras from last vl down */ 1919 for (i = num_vls - 1; i >= 0; i--, extra--) 1920 vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); 1921 } 1922 /* build new map */ 1923 newmap = kzalloc(sizeof(*newmap) + 1924 roundup_pow_of_two(num_vls) * 1925 sizeof(struct pio_map_elem *), 1926 GFP_KERNEL); 1927 if (!newmap) 1928 goto bail; 1929 newmap->actual_vls = num_vls; 1930 newmap->vls = roundup_pow_of_two(num_vls); 1931 newmap->mask = (1 << ilog2(newmap->vls)) - 1; 1932 for (i = 0; i < newmap->vls; i++) { 1933 /* save for wrap around */ 1934 int first_scontext = scontext; 1935 1936 if (i < newmap->actual_vls) { 1937 int sz = roundup_pow_of_two(vl_scontexts[i]); 1938 1939 /* only allocate once */ 1940 newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) + 1941 sz * sizeof(struct 1942 send_context *), 1943 GFP_KERNEL); 1944 if (!newmap->map[i]) 1945 goto bail; 1946 newmap->map[i]->mask = (1 << ilog2(sz)) - 1; 1947 /* 1948 * assign send contexts and 1949 * adjust credit return threshold 1950 */ 1951 for (j = 0; j < sz; j++) { 1952 if (dd->kernel_send_context[scontext]) { 1953 newmap->map[i]->ksc[j] = 1954 dd->kernel_send_context[scontext]; 1955 set_threshold(dd, scontext, i); 1956 } 1957 if (++scontext >= first_scontext + 1958 vl_scontexts[i]) 1959 /* wrap back to first send context */ 1960 scontext = first_scontext; 1961 } 1962 } else { 1963 /* just re-use entry without allocating */ 1964 newmap->map[i] = newmap->map[i % num_vls]; 1965 } 1966 scontext = first_scontext + vl_scontexts[i]; 1967 } 1968 /* newmap in hand, save old map */ 1969 spin_lock_irq(&dd->pio_map_lock); 1970 oldmap = rcu_dereference_protected(dd->pio_map, 1971 lockdep_is_held(&dd->pio_map_lock)); 1972 1973 /* publish newmap */ 1974 rcu_assign_pointer(dd->pio_map, newmap); 1975 1976 spin_unlock_irq(&dd->pio_map_lock); 1977 /* success, free any old map after grace period */ 1978 if (oldmap) 1979 call_rcu(&oldmap->list, pio_map_rcu_callback); 1980 return 0; 1981 bail: 1982 /* free any partial allocation */ 1983 pio_map_free(newmap); 1984 return -ENOMEM; 1985 } 1986 1987 void free_pio_map(struct hfi1_devdata *dd) 1988 { 1989 /* Free PIO map if allocated */ 1990 if (rcu_access_pointer(dd->pio_map)) { 1991 spin_lock_irq(&dd->pio_map_lock); 1992 pio_map_free(rcu_access_pointer(dd->pio_map)); 1993 RCU_INIT_POINTER(dd->pio_map, NULL); 1994 spin_unlock_irq(&dd->pio_map_lock); 1995 synchronize_rcu(); 1996 } 1997 kfree(dd->kernel_send_context); 1998 dd->kernel_send_context = NULL; 1999 } 2000 2001 int init_pervl_scs(struct hfi1_devdata *dd) 2002 { 2003 int i; 2004 u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */ 2005 u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */ 2006 u32 ctxt; 2007 struct hfi1_pportdata *ppd = dd->pport; 2008 2009 dd->vld[15].sc = sc_alloc(dd, SC_VL15, 2010 dd->rcd[0]->rcvhdrqentsize, dd->node); 2011 if (!dd->vld[15].sc) 2012 return -ENOMEM; 2013 2014 hfi1_init_ctxt(dd->vld[15].sc); 2015 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); 2016 2017 dd->kernel_send_context = kcalloc_node(dd->num_send_contexts, 2018 sizeof(struct send_context *), 2019 GFP_KERNEL, dd->node); 2020 if (!dd->kernel_send_context) 2021 goto freesc15; 2022 2023 dd->kernel_send_context[0] = dd->vld[15].sc; 2024 2025 for (i = 0; i < num_vls; i++) { 2026 /* 2027 * Since this function does not deal with a specific 2028 * receive context but we need the RcvHdrQ entry size, 2029 * use the size from rcd[0]. It is guaranteed to be 2030 * valid at this point and will remain the same for all 2031 * receive contexts. 2032 */ 2033 dd->vld[i].sc = sc_alloc(dd, SC_KERNEL, 2034 dd->rcd[0]->rcvhdrqentsize, dd->node); 2035 if (!dd->vld[i].sc) 2036 goto nomem; 2037 dd->kernel_send_context[i + 1] = dd->vld[i].sc; 2038 hfi1_init_ctxt(dd->vld[i].sc); 2039 /* non VL15 start with the max MTU */ 2040 dd->vld[i].mtu = hfi1_max_mtu; 2041 } 2042 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2043 dd->kernel_send_context[i + 1] = 2044 sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node); 2045 if (!dd->kernel_send_context[i + 1]) 2046 goto nomem; 2047 hfi1_init_ctxt(dd->kernel_send_context[i + 1]); 2048 } 2049 2050 sc_enable(dd->vld[15].sc); 2051 ctxt = dd->vld[15].sc->hw_context; 2052 mask = all_vl_mask & ~(1LL << 15); 2053 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2054 dd_dev_info(dd, 2055 "Using send context %u(%u) for VL15\n", 2056 dd->vld[15].sc->sw_index, ctxt); 2057 2058 for (i = 0; i < num_vls; i++) { 2059 sc_enable(dd->vld[i].sc); 2060 ctxt = dd->vld[i].sc->hw_context; 2061 mask = all_vl_mask & ~(data_vls_mask); 2062 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2063 } 2064 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2065 sc_enable(dd->kernel_send_context[i + 1]); 2066 ctxt = dd->kernel_send_context[i + 1]->hw_context; 2067 mask = all_vl_mask & ~(data_vls_mask); 2068 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2069 } 2070 2071 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) 2072 goto nomem; 2073 return 0; 2074 2075 nomem: 2076 for (i = 0; i < num_vls; i++) { 2077 sc_free(dd->vld[i].sc); 2078 dd->vld[i].sc = NULL; 2079 } 2080 2081 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) 2082 sc_free(dd->kernel_send_context[i + 1]); 2083 2084 kfree(dd->kernel_send_context); 2085 dd->kernel_send_context = NULL; 2086 2087 freesc15: 2088 sc_free(dd->vld[15].sc); 2089 return -ENOMEM; 2090 } 2091 2092 int init_credit_return(struct hfi1_devdata *dd) 2093 { 2094 int ret; 2095 int i; 2096 2097 dd->cr_base = kcalloc( 2098 node_affinity.num_possible_nodes, 2099 sizeof(struct credit_return_base), 2100 GFP_KERNEL); 2101 if (!dd->cr_base) { 2102 ret = -ENOMEM; 2103 goto done; 2104 } 2105 for_each_node_with_cpus(i) { 2106 int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); 2107 2108 set_dev_node(&dd->pcidev->dev, i); 2109 dd->cr_base[i].va = dma_zalloc_coherent( 2110 &dd->pcidev->dev, 2111 bytes, 2112 &dd->cr_base[i].dma, 2113 GFP_KERNEL); 2114 if (!dd->cr_base[i].va) { 2115 set_dev_node(&dd->pcidev->dev, dd->node); 2116 dd_dev_err(dd, 2117 "Unable to allocate credit return DMA range for NUMA %d\n", 2118 i); 2119 ret = -ENOMEM; 2120 goto done; 2121 } 2122 } 2123 set_dev_node(&dd->pcidev->dev, dd->node); 2124 2125 ret = 0; 2126 done: 2127 return ret; 2128 } 2129 2130 void free_credit_return(struct hfi1_devdata *dd) 2131 { 2132 int i; 2133 2134 if (!dd->cr_base) 2135 return; 2136 for (i = 0; i < node_affinity.num_possible_nodes; i++) { 2137 if (dd->cr_base[i].va) { 2138 dma_free_coherent(&dd->pcidev->dev, 2139 TXE_NUM_CONTEXTS * 2140 sizeof(struct credit_return), 2141 dd->cr_base[i].va, 2142 dd->cr_base[i].dma); 2143 } 2144 } 2145 kfree(dd->cr_base); 2146 dd->cr_base = NULL; 2147 } 2148