1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 /* 3 * Copyright(c) 2015-2018 Intel Corporation. 4 */ 5 6 #include <linux/delay.h> 7 #include "hfi.h" 8 #include "qp.h" 9 #include "trace.h" 10 11 #define SC(name) SEND_CTXT_##name 12 /* 13 * Send Context functions 14 */ 15 static void sc_wait_for_packet_egress(struct send_context *sc, int pause); 16 17 /* 18 * Set the CM reset bit and wait for it to clear. Use the provided 19 * sendctrl register. This routine has no locking. 20 */ 21 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl) 22 { 23 write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK); 24 while (1) { 25 udelay(1); 26 sendctrl = read_csr(dd, SEND_CTRL); 27 if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0) 28 break; 29 } 30 } 31 32 /* global control of PIO send */ 33 void pio_send_control(struct hfi1_devdata *dd, int op) 34 { 35 u64 reg, mask; 36 unsigned long flags; 37 int write = 1; /* write sendctrl back */ 38 int flush = 0; /* re-read sendctrl to make sure it is flushed */ 39 int i; 40 41 spin_lock_irqsave(&dd->sendctrl_lock, flags); 42 43 reg = read_csr(dd, SEND_CTRL); 44 switch (op) { 45 case PSC_GLOBAL_ENABLE: 46 reg |= SEND_CTRL_SEND_ENABLE_SMASK; 47 fallthrough; 48 case PSC_DATA_VL_ENABLE: 49 mask = 0; 50 for (i = 0; i < ARRAY_SIZE(dd->vld); i++) 51 if (!dd->vld[i].mtu) 52 mask |= BIT_ULL(i); 53 /* Disallow sending on VLs not enabled */ 54 mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) << 55 SEND_CTRL_UNSUPPORTED_VL_SHIFT; 56 reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask; 57 break; 58 case PSC_GLOBAL_DISABLE: 59 reg &= ~SEND_CTRL_SEND_ENABLE_SMASK; 60 break; 61 case PSC_GLOBAL_VLARB_ENABLE: 62 reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 63 break; 64 case PSC_GLOBAL_VLARB_DISABLE: 65 reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 66 break; 67 case PSC_CM_RESET: 68 __cm_reset(dd, reg); 69 write = 0; /* CSR already written (and flushed) */ 70 break; 71 case PSC_DATA_VL_DISABLE: 72 reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK; 73 flush = 1; 74 break; 75 default: 76 dd_dev_err(dd, "%s: invalid control %d\n", __func__, op); 77 break; 78 } 79 80 if (write) { 81 write_csr(dd, SEND_CTRL, reg); 82 if (flush) 83 (void)read_csr(dd, SEND_CTRL); /* flush write */ 84 } 85 86 spin_unlock_irqrestore(&dd->sendctrl_lock, flags); 87 } 88 89 /* number of send context memory pools */ 90 #define NUM_SC_POOLS 2 91 92 /* Send Context Size (SCS) wildcards */ 93 #define SCS_POOL_0 -1 94 #define SCS_POOL_1 -2 95 96 /* Send Context Count (SCC) wildcards */ 97 #define SCC_PER_VL -1 98 #define SCC_PER_CPU -2 99 #define SCC_PER_KRCVQ -3 100 101 /* Send Context Size (SCS) constants */ 102 #define SCS_ACK_CREDITS 32 103 #define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */ 104 105 #define PIO_THRESHOLD_CEILING 4096 106 107 #define PIO_WAIT_BATCH_SIZE 5 108 109 /* default send context sizes */ 110 static struct sc_config_sizes sc_config_sizes[SC_MAX] = { 111 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 112 .count = SCC_PER_VL }, /* one per NUMA */ 113 [SC_ACK] = { .size = SCS_ACK_CREDITS, 114 .count = SCC_PER_KRCVQ }, 115 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 116 .count = SCC_PER_CPU }, /* one per CPU */ 117 [SC_VL15] = { .size = SCS_VL15_CREDITS, 118 .count = 1 }, 119 120 }; 121 122 /* send context memory pool configuration */ 123 struct mem_pool_config { 124 int centipercent; /* % of memory, in 100ths of 1% */ 125 int absolute_blocks; /* absolute block count */ 126 }; 127 128 /* default memory pool configuration: 100% in pool 0 */ 129 static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = { 130 /* centi%, abs blocks */ 131 { 10000, -1 }, /* pool 0 */ 132 { 0, -1 }, /* pool 1 */ 133 }; 134 135 /* memory pool information, used when calculating final sizes */ 136 struct mem_pool_info { 137 int centipercent; /* 138 * 100th of 1% of memory to use, -1 if blocks 139 * already set 140 */ 141 int count; /* count of contexts in the pool */ 142 int blocks; /* block size of the pool */ 143 int size; /* context size, in blocks */ 144 }; 145 146 /* 147 * Convert a pool wildcard to a valid pool index. The wildcards 148 * start at -1 and increase negatively. Map them as: 149 * -1 => 0 150 * -2 => 1 151 * etc. 152 * 153 * Return -1 on non-wildcard input, otherwise convert to a pool number. 154 */ 155 static int wildcard_to_pool(int wc) 156 { 157 if (wc >= 0) 158 return -1; /* non-wildcard */ 159 return -wc - 1; 160 } 161 162 static const char *sc_type_names[SC_MAX] = { 163 "kernel", 164 "ack", 165 "user", 166 "vl15" 167 }; 168 169 static const char *sc_type_name(int index) 170 { 171 if (index < 0 || index >= SC_MAX) 172 return "unknown"; 173 return sc_type_names[index]; 174 } 175 176 /* 177 * Read the send context memory pool configuration and send context 178 * size configuration. Replace any wildcards and come up with final 179 * counts and sizes for the send context types. 180 */ 181 int init_sc_pools_and_sizes(struct hfi1_devdata *dd) 182 { 183 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; 184 int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; 185 int total_contexts = 0; 186 int fixed_blocks; 187 int pool_blocks; 188 int used_blocks; 189 int cp_total; /* centipercent total */ 190 int ab_total; /* absolute block total */ 191 int extra; 192 int i; 193 194 /* 195 * When SDMA is enabled, kernel context pio packet size is capped by 196 * "piothreshold". Reduce pio buffer allocation for kernel context by 197 * setting it to a fixed size. The allocation allows 3-deep buffering 198 * of the largest pio packets plus up to 128 bytes header, sufficient 199 * to maintain verbs performance. 200 * 201 * When SDMA is disabled, keep the default pooling allocation. 202 */ 203 if (HFI1_CAP_IS_KSET(SDMA)) { 204 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ? 205 piothreshold : PIO_THRESHOLD_CEILING; 206 sc_config_sizes[SC_KERNEL].size = 207 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE; 208 } 209 210 /* 211 * Step 0: 212 * - copy the centipercents/absolute sizes from the pool config 213 * - sanity check these values 214 * - add up centipercents, then later check for full value 215 * - add up absolute blocks, then later check for over-commit 216 */ 217 cp_total = 0; 218 ab_total = 0; 219 for (i = 0; i < NUM_SC_POOLS; i++) { 220 int cp = sc_mem_pool_config[i].centipercent; 221 int ab = sc_mem_pool_config[i].absolute_blocks; 222 223 /* 224 * A negative value is "unused" or "invalid". Both *can* 225 * be valid, but centipercent wins, so check that first 226 */ 227 if (cp >= 0) { /* centipercent valid */ 228 cp_total += cp; 229 } else if (ab >= 0) { /* absolute blocks valid */ 230 ab_total += ab; 231 } else { /* neither valid */ 232 dd_dev_err( 233 dd, 234 "Send context memory pool %d: both the block count and centipercent are invalid\n", 235 i); 236 return -EINVAL; 237 } 238 239 mem_pool_info[i].centipercent = cp; 240 mem_pool_info[i].blocks = ab; 241 } 242 243 /* do not use both % and absolute blocks for different pools */ 244 if (cp_total != 0 && ab_total != 0) { 245 dd_dev_err( 246 dd, 247 "All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n"); 248 return -EINVAL; 249 } 250 251 /* if any percentages are present, they must add up to 100% x 100 */ 252 if (cp_total != 0 && cp_total != 10000) { 253 dd_dev_err( 254 dd, 255 "Send context memory pool centipercent is %d, expecting 10000\n", 256 cp_total); 257 return -EINVAL; 258 } 259 260 /* the absolute pool total cannot be more than the mem total */ 261 if (ab_total > total_blocks) { 262 dd_dev_err( 263 dd, 264 "Send context memory pool absolute block count %d is larger than the memory size %d\n", 265 ab_total, total_blocks); 266 return -EINVAL; 267 } 268 269 /* 270 * Step 2: 271 * - copy from the context size config 272 * - replace context type wildcard counts with real values 273 * - add up non-memory pool block sizes 274 * - add up memory pool user counts 275 */ 276 fixed_blocks = 0; 277 for (i = 0; i < SC_MAX; i++) { 278 int count = sc_config_sizes[i].count; 279 int size = sc_config_sizes[i].size; 280 int pool; 281 282 /* 283 * Sanity check count: Either a positive value or 284 * one of the expected wildcards is valid. The positive 285 * value is checked later when we compare against total 286 * memory available. 287 */ 288 if (i == SC_ACK) { 289 count = dd->n_krcv_queues; 290 } else if (i == SC_KERNEL) { 291 count = INIT_SC_PER_VL * num_vls; 292 } else if (count == SCC_PER_CPU) { 293 count = dd->num_rcv_contexts - dd->n_krcv_queues; 294 } else if (count < 0) { 295 dd_dev_err( 296 dd, 297 "%s send context invalid count wildcard %d\n", 298 sc_type_name(i), count); 299 return -EINVAL; 300 } 301 if (total_contexts + count > chip_send_contexts(dd)) 302 count = chip_send_contexts(dd) - total_contexts; 303 304 total_contexts += count; 305 306 /* 307 * Sanity check pool: The conversion will return a pool 308 * number or -1 if a fixed (non-negative) value. The fixed 309 * value is checked later when we compare against 310 * total memory available. 311 */ 312 pool = wildcard_to_pool(size); 313 if (pool == -1) { /* non-wildcard */ 314 fixed_blocks += size * count; 315 } else if (pool < NUM_SC_POOLS) { /* valid wildcard */ 316 mem_pool_info[pool].count += count; 317 } else { /* invalid wildcard */ 318 dd_dev_err( 319 dd, 320 "%s send context invalid pool wildcard %d\n", 321 sc_type_name(i), size); 322 return -EINVAL; 323 } 324 325 dd->sc_sizes[i].count = count; 326 dd->sc_sizes[i].size = size; 327 } 328 if (fixed_blocks > total_blocks) { 329 dd_dev_err( 330 dd, 331 "Send context fixed block count, %u, larger than total block count %u\n", 332 fixed_blocks, total_blocks); 333 return -EINVAL; 334 } 335 336 /* step 3: calculate the blocks in the pools, and pool context sizes */ 337 pool_blocks = total_blocks - fixed_blocks; 338 if (ab_total > pool_blocks) { 339 dd_dev_err( 340 dd, 341 "Send context fixed pool sizes, %u, larger than pool block count %u\n", 342 ab_total, pool_blocks); 343 return -EINVAL; 344 } 345 /* subtract off the fixed pool blocks */ 346 pool_blocks -= ab_total; 347 348 for (i = 0; i < NUM_SC_POOLS; i++) { 349 struct mem_pool_info *pi = &mem_pool_info[i]; 350 351 /* % beats absolute blocks */ 352 if (pi->centipercent >= 0) 353 pi->blocks = (pool_blocks * pi->centipercent) / 10000; 354 355 if (pi->blocks == 0 && pi->count != 0) { 356 dd_dev_err( 357 dd, 358 "Send context memory pool %d has %u contexts, but no blocks\n", 359 i, pi->count); 360 return -EINVAL; 361 } 362 if (pi->count == 0) { 363 /* warn about wasted blocks */ 364 if (pi->blocks != 0) 365 dd_dev_err( 366 dd, 367 "Send context memory pool %d has %u blocks, but zero contexts\n", 368 i, pi->blocks); 369 pi->size = 0; 370 } else { 371 pi->size = pi->blocks / pi->count; 372 } 373 } 374 375 /* step 4: fill in the context type sizes from the pool sizes */ 376 used_blocks = 0; 377 for (i = 0; i < SC_MAX; i++) { 378 if (dd->sc_sizes[i].size < 0) { 379 unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size); 380 381 WARN_ON_ONCE(pool >= NUM_SC_POOLS); 382 dd->sc_sizes[i].size = mem_pool_info[pool].size; 383 } 384 /* make sure we are not larger than what is allowed by the HW */ 385 #define PIO_MAX_BLOCKS 1024 386 if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS) 387 dd->sc_sizes[i].size = PIO_MAX_BLOCKS; 388 389 /* calculate our total usage */ 390 used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count; 391 } 392 extra = total_blocks - used_blocks; 393 if (extra != 0) 394 dd_dev_info(dd, "unused send context blocks: %d\n", extra); 395 396 return total_contexts; 397 } 398 399 int init_send_contexts(struct hfi1_devdata *dd) 400 { 401 u16 base; 402 int ret, i, j, context; 403 404 ret = init_credit_return(dd); 405 if (ret) 406 return ret; 407 408 dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8), 409 GFP_KERNEL); 410 dd->send_contexts = kcalloc(dd->num_send_contexts, 411 sizeof(struct send_context_info), 412 GFP_KERNEL); 413 if (!dd->send_contexts || !dd->hw_to_sw) { 414 kfree(dd->hw_to_sw); 415 kfree(dd->send_contexts); 416 free_credit_return(dd); 417 return -ENOMEM; 418 } 419 420 /* hardware context map starts with invalid send context indices */ 421 for (i = 0; i < TXE_NUM_CONTEXTS; i++) 422 dd->hw_to_sw[i] = INVALID_SCI; 423 424 /* 425 * All send contexts have their credit sizes. Allocate credits 426 * for each context one after another from the global space. 427 */ 428 context = 0; 429 base = 1; 430 for (i = 0; i < SC_MAX; i++) { 431 struct sc_config_sizes *scs = &dd->sc_sizes[i]; 432 433 for (j = 0; j < scs->count; j++) { 434 struct send_context_info *sci = 435 &dd->send_contexts[context]; 436 sci->type = i; 437 sci->base = base; 438 sci->credits = scs->size; 439 440 context++; 441 base += scs->size; 442 } 443 } 444 445 return 0; 446 } 447 448 /* 449 * Allocate a software index and hardware context of the given type. 450 * 451 * Must be called with dd->sc_lock held. 452 */ 453 static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, 454 u32 *hw_context) 455 { 456 struct send_context_info *sci; 457 u32 index; 458 u32 context; 459 460 for (index = 0, sci = &dd->send_contexts[0]; 461 index < dd->num_send_contexts; index++, sci++) { 462 if (sci->type == type && sci->allocated == 0) { 463 sci->allocated = 1; 464 /* use a 1:1 mapping, but make them non-equal */ 465 context = chip_send_contexts(dd) - index - 1; 466 dd->hw_to_sw[context] = index; 467 *sw_index = index; 468 *hw_context = context; 469 return 0; /* success */ 470 } 471 } 472 dd_dev_err(dd, "Unable to locate a free type %d send context\n", type); 473 return -ENOSPC; 474 } 475 476 /* 477 * Free the send context given by its software index. 478 * 479 * Must be called with dd->sc_lock held. 480 */ 481 static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context) 482 { 483 struct send_context_info *sci; 484 485 sci = &dd->send_contexts[sw_index]; 486 if (!sci->allocated) { 487 dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n", 488 __func__, sw_index, hw_context); 489 } 490 sci->allocated = 0; 491 dd->hw_to_sw[hw_context] = INVALID_SCI; 492 } 493 494 /* return the base context of a context in a group */ 495 static inline u32 group_context(u32 context, u32 group) 496 { 497 return (context >> group) << group; 498 } 499 500 /* return the size of a group */ 501 static inline u32 group_size(u32 group) 502 { 503 return 1 << group; 504 } 505 506 /* 507 * Obtain the credit return addresses, kernel virtual and bus, for the 508 * given sc. 509 * 510 * To understand this routine: 511 * o va and dma are arrays of struct credit_return. One for each physical 512 * send context, per NUMA. 513 * o Each send context always looks in its relative location in a struct 514 * credit_return for its credit return. 515 * o Each send context in a group must have its return address CSR programmed 516 * with the same value. Use the address of the first send context in the 517 * group. 518 */ 519 static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) 520 { 521 u32 gc = group_context(sc->hw_context, sc->group); 522 u32 index = sc->hw_context & 0x7; 523 524 sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; 525 *dma = (unsigned long) 526 &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; 527 } 528 529 /* 530 * Work queue function triggered in error interrupt routine for 531 * kernel contexts. 532 */ 533 static void sc_halted(struct work_struct *work) 534 { 535 struct send_context *sc; 536 537 sc = container_of(work, struct send_context, halt_work); 538 sc_restart(sc); 539 } 540 541 /* 542 * Calculate PIO block threshold for this send context using the given MTU. 543 * Trigger a return when one MTU plus optional header of credits remain. 544 * 545 * Parameter mtu is in bytes. 546 * Parameter hdrqentsize is in DWORDs. 547 * 548 * Return value is what to write into the CSR: trigger return when 549 * unreturned credits pass this count. 550 */ 551 u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize) 552 { 553 u32 release_credits; 554 u32 threshold; 555 556 /* add in the header size, then divide by the PIO block size */ 557 mtu += hdrqentsize << 2; 558 release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE); 559 560 /* check against this context's credits */ 561 if (sc->credits <= release_credits) 562 threshold = 1; 563 else 564 threshold = sc->credits - release_credits; 565 566 return threshold; 567 } 568 569 /* 570 * Calculate credit threshold in terms of percent of the allocated credits. 571 * Trigger when unreturned credits equal or exceed the percentage of the whole. 572 * 573 * Return value is what to write into the CSR: trigger return when 574 * unreturned credits pass this count. 575 */ 576 u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 577 { 578 return (sc->credits * percent) / 100; 579 } 580 581 /* 582 * Set the credit return threshold. 583 */ 584 void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) 585 { 586 unsigned long flags; 587 u32 old_threshold; 588 int force_return = 0; 589 590 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 591 592 old_threshold = (sc->credit_ctrl >> 593 SC(CREDIT_CTRL_THRESHOLD_SHIFT)) 594 & SC(CREDIT_CTRL_THRESHOLD_MASK); 595 596 if (new_threshold != old_threshold) { 597 sc->credit_ctrl = 598 (sc->credit_ctrl 599 & ~SC(CREDIT_CTRL_THRESHOLD_SMASK)) 600 | ((new_threshold 601 & SC(CREDIT_CTRL_THRESHOLD_MASK)) 602 << SC(CREDIT_CTRL_THRESHOLD_SHIFT)); 603 write_kctxt_csr(sc->dd, sc->hw_context, 604 SC(CREDIT_CTRL), sc->credit_ctrl); 605 606 /* force a credit return on change to avoid a possible stall */ 607 force_return = 1; 608 } 609 610 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 611 612 if (force_return) 613 sc_return_credits(sc); 614 } 615 616 /* 617 * set_pio_integrity 618 * 619 * Set the CHECK_ENABLE register for the send context 'sc'. 620 */ 621 void set_pio_integrity(struct send_context *sc) 622 { 623 struct hfi1_devdata *dd = sc->dd; 624 u32 hw_context = sc->hw_context; 625 int type = sc->type; 626 627 write_kctxt_csr(dd, hw_context, 628 SC(CHECK_ENABLE), 629 hfi1_pkt_default_send_ctxt_mask(dd, type)); 630 } 631 632 static u32 get_buffers_allocated(struct send_context *sc) 633 { 634 int cpu; 635 u32 ret = 0; 636 637 for_each_possible_cpu(cpu) 638 ret += *per_cpu_ptr(sc->buffers_allocated, cpu); 639 return ret; 640 } 641 642 static void reset_buffers_allocated(struct send_context *sc) 643 { 644 int cpu; 645 646 for_each_possible_cpu(cpu) 647 (*per_cpu_ptr(sc->buffers_allocated, cpu)) = 0; 648 } 649 650 /* 651 * Allocate a NUMA relative send context structure of the given type along 652 * with a HW context. 653 */ 654 struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, 655 uint hdrqentsize, int numa) 656 { 657 struct send_context_info *sci; 658 struct send_context *sc = NULL; 659 dma_addr_t dma; 660 unsigned long flags; 661 u64 reg; 662 u32 thresh; 663 u32 sw_index; 664 u32 hw_context; 665 int ret; 666 u8 opval, opmask; 667 668 /* do not allocate while frozen */ 669 if (dd->flags & HFI1_FROZEN) 670 return NULL; 671 672 sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa); 673 if (!sc) 674 return NULL; 675 676 sc->buffers_allocated = alloc_percpu(u32); 677 if (!sc->buffers_allocated) { 678 kfree(sc); 679 dd_dev_err(dd, 680 "Cannot allocate buffers_allocated per cpu counters\n" 681 ); 682 return NULL; 683 } 684 685 spin_lock_irqsave(&dd->sc_lock, flags); 686 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); 687 if (ret) { 688 spin_unlock_irqrestore(&dd->sc_lock, flags); 689 free_percpu(sc->buffers_allocated); 690 kfree(sc); 691 return NULL; 692 } 693 694 sci = &dd->send_contexts[sw_index]; 695 sci->sc = sc; 696 697 sc->dd = dd; 698 sc->node = numa; 699 sc->type = type; 700 spin_lock_init(&sc->alloc_lock); 701 spin_lock_init(&sc->release_lock); 702 spin_lock_init(&sc->credit_ctrl_lock); 703 seqlock_init(&sc->waitlock); 704 INIT_LIST_HEAD(&sc->piowait); 705 INIT_WORK(&sc->halt_work, sc_halted); 706 init_waitqueue_head(&sc->halt_wait); 707 708 /* grouping is always single context for now */ 709 sc->group = 0; 710 711 sc->sw_index = sw_index; 712 sc->hw_context = hw_context; 713 cr_group_addresses(sc, &dma); 714 sc->credits = sci->credits; 715 sc->size = sc->credits * PIO_BLOCK_SIZE; 716 717 /* PIO Send Memory Address details */ 718 #define PIO_ADDR_CONTEXT_MASK 0xfful 719 #define PIO_ADDR_CONTEXT_SHIFT 16 720 sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK) 721 << PIO_ADDR_CONTEXT_SHIFT); 722 723 /* set base and credits */ 724 reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK)) 725 << SC(CTRL_CTXT_DEPTH_SHIFT)) 726 | ((sci->base & SC(CTRL_CTXT_BASE_MASK)) 727 << SC(CTRL_CTXT_BASE_SHIFT)); 728 write_kctxt_csr(dd, hw_context, SC(CTRL), reg); 729 730 set_pio_integrity(sc); 731 732 /* unmask all errors */ 733 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1); 734 735 /* set the default partition key */ 736 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 737 (SC(CHECK_PARTITION_KEY_VALUE_MASK) & 738 DEFAULT_PKEY) << 739 SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); 740 741 /* per context type checks */ 742 if (type == SC_USER) { 743 opval = USER_OPCODE_CHECK_VAL; 744 opmask = USER_OPCODE_CHECK_MASK; 745 } else { 746 opval = OPCODE_CHECK_VAL_DISABLED; 747 opmask = OPCODE_CHECK_MASK_DISABLED; 748 } 749 750 /* set the send context check opcode mask and value */ 751 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 752 ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) | 753 ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); 754 755 /* set up credit return */ 756 reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); 757 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); 758 759 /* 760 * Calculate the initial credit return threshold. 761 * 762 * For Ack contexts, set a threshold for half the credits. 763 * For User contexts use the given percentage. This has been 764 * sanitized on driver start-up. 765 * For Kernel contexts, use the default MTU plus a header 766 * or half the credits, whichever is smaller. This should 767 * work for both the 3-deep buffering allocation and the 768 * pooling allocation. 769 */ 770 if (type == SC_ACK) { 771 thresh = sc_percent_to_threshold(sc, 50); 772 } else if (type == SC_USER) { 773 thresh = sc_percent_to_threshold(sc, 774 user_credit_return_threshold); 775 } else { /* kernel */ 776 thresh = min(sc_percent_to_threshold(sc, 50), 777 sc_mtu_to_threshold(sc, hfi1_max_mtu, 778 hdrqentsize)); 779 } 780 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); 781 /* add in early return */ 782 if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN)) 783 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 784 else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN)) /* kernel, ack */ 785 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 786 787 /* set up write-through credit_ctrl */ 788 sc->credit_ctrl = reg; 789 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg); 790 791 /* User send contexts should not allow sending on VL15 */ 792 if (type == SC_USER) { 793 reg = 1ULL << 15; 794 write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg); 795 } 796 797 spin_unlock_irqrestore(&dd->sc_lock, flags); 798 799 /* 800 * Allocate shadow ring to track outstanding PIO buffers _after_ 801 * unlocking. We don't know the size until the lock is held and 802 * we can't allocate while the lock is held. No one is using 803 * the context yet, so allocate it now. 804 * 805 * User contexts do not get a shadow ring. 806 */ 807 if (type != SC_USER) { 808 /* 809 * Size the shadow ring 1 larger than the number of credits 810 * so head == tail can mean empty. 811 */ 812 sc->sr_size = sci->credits + 1; 813 sc->sr = kcalloc_node(sc->sr_size, 814 sizeof(union pio_shadow_ring), 815 GFP_KERNEL, numa); 816 if (!sc->sr) { 817 sc_free(sc); 818 return NULL; 819 } 820 } 821 822 hfi1_cdbg(PIO, 823 "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n", 824 sw_index, 825 hw_context, 826 sc_type_name(type), 827 sc->group, 828 sc->credits, 829 sc->credit_ctrl, 830 thresh); 831 832 return sc; 833 } 834 835 /* free a per-NUMA send context structure */ 836 void sc_free(struct send_context *sc) 837 { 838 struct hfi1_devdata *dd; 839 unsigned long flags; 840 u32 sw_index; 841 u32 hw_context; 842 843 if (!sc) 844 return; 845 846 sc->flags |= SCF_IN_FREE; /* ensure no restarts */ 847 dd = sc->dd; 848 if (!list_empty(&sc->piowait)) 849 dd_dev_err(dd, "piowait list not empty!\n"); 850 sw_index = sc->sw_index; 851 hw_context = sc->hw_context; 852 sc_disable(sc); /* make sure the HW is disabled */ 853 flush_work(&sc->halt_work); 854 855 spin_lock_irqsave(&dd->sc_lock, flags); 856 dd->send_contexts[sw_index].sc = NULL; 857 858 /* clear/disable all registers set in sc_alloc */ 859 write_kctxt_csr(dd, hw_context, SC(CTRL), 0); 860 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0); 861 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0); 862 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0); 863 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0); 864 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0); 865 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0); 866 867 /* release the index and context for re-use */ 868 sc_hw_free(dd, sw_index, hw_context); 869 spin_unlock_irqrestore(&dd->sc_lock, flags); 870 871 kfree(sc->sr); 872 free_percpu(sc->buffers_allocated); 873 kfree(sc); 874 } 875 876 /* disable the context */ 877 void sc_disable(struct send_context *sc) 878 { 879 u64 reg; 880 struct pio_buf *pbuf; 881 882 if (!sc) 883 return; 884 885 /* do all steps, even if already disabled */ 886 spin_lock_irq(&sc->alloc_lock); 887 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL)); 888 reg &= ~SC(CTRL_CTXT_ENABLE_SMASK); 889 sc->flags &= ~SCF_ENABLED; 890 sc_wait_for_packet_egress(sc, 1); 891 write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg); 892 893 /* 894 * Flush any waiters. Once the context is disabled, 895 * credit return interrupts are stopped (although there 896 * could be one in-process when the context is disabled). 897 * Wait one microsecond for any lingering interrupts, then 898 * proceed with the flush. 899 */ 900 udelay(1); 901 spin_lock(&sc->release_lock); 902 if (sc->sr) { /* this context has a shadow ring */ 903 while (sc->sr_tail != sc->sr_head) { 904 pbuf = &sc->sr[sc->sr_tail].pbuf; 905 if (pbuf->cb) 906 (*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE); 907 sc->sr_tail++; 908 if (sc->sr_tail >= sc->sr_size) 909 sc->sr_tail = 0; 910 } 911 } 912 spin_unlock(&sc->release_lock); 913 914 write_seqlock(&sc->waitlock); 915 while (!list_empty(&sc->piowait)) { 916 struct iowait *wait; 917 struct rvt_qp *qp; 918 struct hfi1_qp_priv *priv; 919 920 wait = list_first_entry(&sc->piowait, struct iowait, list); 921 qp = iowait_to_qp(wait); 922 priv = qp->priv; 923 list_del_init(&priv->s_iowait.list); 924 priv->s_iowait.lock = NULL; 925 hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 926 } 927 write_sequnlock(&sc->waitlock); 928 929 spin_unlock_irq(&sc->alloc_lock); 930 } 931 932 /* return SendEgressCtxtStatus.PacketOccupancy */ 933 static u64 packet_occupancy(u64 reg) 934 { 935 return (reg & 936 SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) 937 >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; 938 } 939 940 /* is egress halted on the context? */ 941 static bool egress_halted(u64 reg) 942 { 943 return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); 944 } 945 946 /* is the send context halted? */ 947 static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) 948 { 949 return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & 950 SC(STATUS_CTXT_HALTED_SMASK)); 951 } 952 953 /** 954 * sc_wait_for_packet_egress - wait for packet 955 * @sc: valid send context 956 * @pause: wait for credit return 957 * 958 * Wait for packet egress, optionally pause for credit return 959 * 960 * Egress halt and Context halt are not necessarily the same thing, so 961 * check for both. 962 * 963 * NOTE: The context halt bit may not be set immediately. Because of this, 964 * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW 965 * context bit to determine if the context is halted. 966 */ 967 static void sc_wait_for_packet_egress(struct send_context *sc, int pause) 968 { 969 struct hfi1_devdata *dd = sc->dd; 970 u64 reg = 0; 971 u64 reg_prev; 972 u32 loop = 0; 973 974 while (1) { 975 reg_prev = reg; 976 reg = read_csr(dd, sc->hw_context * 8 + 977 SEND_EGRESS_CTXT_STATUS); 978 /* done if any halt bits, SW or HW are set */ 979 if (sc->flags & SCF_HALTED || 980 is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) 981 break; 982 reg = packet_occupancy(reg); 983 if (reg == 0) 984 break; 985 /* counter is reset if occupancy count changes */ 986 if (reg != reg_prev) 987 loop = 0; 988 if (loop > 50000) { 989 /* timed out - bounce the link */ 990 dd_dev_err(dd, 991 "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", 992 __func__, sc->sw_index, 993 sc->hw_context, (u32)reg); 994 queue_work(dd->pport->link_wq, 995 &dd->pport->link_bounce_work); 996 break; 997 } 998 loop++; 999 udelay(1); 1000 } 1001 1002 if (pause) 1003 /* Add additional delay to ensure chip returns all credits */ 1004 pause_for_credit_return(dd); 1005 } 1006 1007 void sc_wait(struct hfi1_devdata *dd) 1008 { 1009 int i; 1010 1011 for (i = 0; i < dd->num_send_contexts; i++) { 1012 struct send_context *sc = dd->send_contexts[i].sc; 1013 1014 if (!sc) 1015 continue; 1016 sc_wait_for_packet_egress(sc, 0); 1017 } 1018 } 1019 1020 /* 1021 * Restart a context after it has been halted due to error. 1022 * 1023 * If the first step fails - wait for the halt to be asserted, return early. 1024 * Otherwise complain about timeouts but keep going. 1025 * 1026 * It is expected that allocations (enabled flag bit) have been shut off 1027 * already (only applies to kernel contexts). 1028 */ 1029 int sc_restart(struct send_context *sc) 1030 { 1031 struct hfi1_devdata *dd = sc->dd; 1032 u64 reg; 1033 u32 loop; 1034 int count; 1035 1036 /* bounce off if not halted, or being free'd */ 1037 if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE)) 1038 return -EINVAL; 1039 1040 dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index, 1041 sc->hw_context); 1042 1043 /* 1044 * Step 1: Wait for the context to actually halt. 1045 * 1046 * The error interrupt is asynchronous to actually setting halt 1047 * on the context. 1048 */ 1049 loop = 0; 1050 while (1) { 1051 reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS)); 1052 if (reg & SC(STATUS_CTXT_HALTED_SMASK)) 1053 break; 1054 if (loop > 100) { 1055 dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n", 1056 __func__, sc->sw_index, sc->hw_context); 1057 return -ETIME; 1058 } 1059 loop++; 1060 udelay(1); 1061 } 1062 1063 /* 1064 * Step 2: Ensure no users are still trying to write to PIO. 1065 * 1066 * For kernel contexts, we have already turned off buffer allocation. 1067 * Now wait for the buffer count to go to zero. 1068 * 1069 * For user contexts, the user handling code has cut off write access 1070 * to the context's PIO pages before calling this routine and will 1071 * restore write access after this routine returns. 1072 */ 1073 if (sc->type != SC_USER) { 1074 /* kernel context */ 1075 loop = 0; 1076 while (1) { 1077 count = get_buffers_allocated(sc); 1078 if (count == 0) 1079 break; 1080 if (loop > 100) { 1081 dd_dev_err(dd, 1082 "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n", 1083 __func__, sc->sw_index, 1084 sc->hw_context, count); 1085 } 1086 loop++; 1087 udelay(1); 1088 } 1089 } 1090 1091 /* 1092 * Step 3: Wait for all packets to egress. 1093 * This is done while disabling the send context 1094 * 1095 * Step 4: Disable the context 1096 * 1097 * This is a superset of the halt. After the disable, the 1098 * errors can be cleared. 1099 */ 1100 sc_disable(sc); 1101 1102 /* 1103 * Step 5: Enable the context 1104 * 1105 * This enable will clear the halted flag and per-send context 1106 * error flags. 1107 */ 1108 return sc_enable(sc); 1109 } 1110 1111 /* 1112 * PIO freeze processing. To be called after the TXE block is fully frozen. 1113 * Go through all frozen send contexts and disable them. The contexts are 1114 * already stopped by the freeze. 1115 */ 1116 void pio_freeze(struct hfi1_devdata *dd) 1117 { 1118 struct send_context *sc; 1119 int i; 1120 1121 for (i = 0; i < dd->num_send_contexts; i++) { 1122 sc = dd->send_contexts[i].sc; 1123 /* 1124 * Don't disable unallocated, unfrozen, or user send contexts. 1125 * User send contexts will be disabled when the process 1126 * calls into the driver to reset its context. 1127 */ 1128 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1129 continue; 1130 1131 /* only need to disable, the context is already stopped */ 1132 sc_disable(sc); 1133 } 1134 } 1135 1136 /* 1137 * Unfreeze PIO for kernel send contexts. The precondition for calling this 1138 * is that all PIO send contexts have been disabled and the SPC freeze has 1139 * been cleared. Now perform the last step and re-enable each kernel context. 1140 * User (PSM) processing will occur when PSM calls into the kernel to 1141 * acknowledge the freeze. 1142 */ 1143 void pio_kernel_unfreeze(struct hfi1_devdata *dd) 1144 { 1145 struct send_context *sc; 1146 int i; 1147 1148 for (i = 0; i < dd->num_send_contexts; i++) { 1149 sc = dd->send_contexts[i].sc; 1150 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1151 continue; 1152 if (sc->flags & SCF_LINK_DOWN) 1153 continue; 1154 1155 sc_enable(sc); /* will clear the sc frozen flag */ 1156 } 1157 } 1158 1159 /** 1160 * pio_kernel_linkup() - Re-enable send contexts after linkup event 1161 * @dd: valid devive data 1162 * 1163 * When the link goes down, the freeze path is taken. However, a link down 1164 * event is different from a freeze because if the send context is re-enabled 1165 * whowever is sending data will start sending data again, which will hang 1166 * any QP that is sending data. 1167 * 1168 * The freeze path now looks at the type of event that occurs and takes this 1169 * path for link down event. 1170 */ 1171 void pio_kernel_linkup(struct hfi1_devdata *dd) 1172 { 1173 struct send_context *sc; 1174 int i; 1175 1176 for (i = 0; i < dd->num_send_contexts; i++) { 1177 sc = dd->send_contexts[i].sc; 1178 if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER) 1179 continue; 1180 1181 sc_enable(sc); /* will clear the sc link down flag */ 1182 } 1183 } 1184 1185 /* 1186 * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear. 1187 * Returns: 1188 * -ETIMEDOUT - if we wait too long 1189 * -EIO - if there was an error 1190 */ 1191 static int pio_init_wait_progress(struct hfi1_devdata *dd) 1192 { 1193 u64 reg; 1194 int max, count = 0; 1195 1196 /* max is the longest possible HW init time / delay */ 1197 max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5; 1198 while (1) { 1199 reg = read_csr(dd, SEND_PIO_INIT_CTXT); 1200 if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK)) 1201 break; 1202 if (count >= max) 1203 return -ETIMEDOUT; 1204 udelay(5); 1205 count++; 1206 } 1207 1208 return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0; 1209 } 1210 1211 /* 1212 * Reset all of the send contexts to their power-on state. Used 1213 * only during manual init - no lock against sc_enable needed. 1214 */ 1215 void pio_reset_all(struct hfi1_devdata *dd) 1216 { 1217 int ret; 1218 1219 /* make sure the init engine is not busy */ 1220 ret = pio_init_wait_progress(dd); 1221 /* ignore any timeout */ 1222 if (ret == -EIO) { 1223 /* clear the error */ 1224 write_csr(dd, SEND_PIO_ERR_CLEAR, 1225 SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK); 1226 } 1227 1228 /* reset init all */ 1229 write_csr(dd, SEND_PIO_INIT_CTXT, 1230 SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK); 1231 udelay(2); 1232 ret = pio_init_wait_progress(dd); 1233 if (ret < 0) { 1234 dd_dev_err(dd, 1235 "PIO send context init %s while initializing all PIO blocks\n", 1236 ret == -ETIMEDOUT ? "is stuck" : "had an error"); 1237 } 1238 } 1239 1240 /* enable the context */ 1241 int sc_enable(struct send_context *sc) 1242 { 1243 u64 sc_ctrl, reg, pio; 1244 struct hfi1_devdata *dd; 1245 unsigned long flags; 1246 int ret = 0; 1247 1248 if (!sc) 1249 return -EINVAL; 1250 dd = sc->dd; 1251 1252 /* 1253 * Obtain the allocator lock to guard against any allocation 1254 * attempts (which should not happen prior to context being 1255 * enabled). On the release/disable side we don't need to 1256 * worry about locking since the releaser will not do anything 1257 * if the context accounting values have not changed. 1258 */ 1259 spin_lock_irqsave(&sc->alloc_lock, flags); 1260 sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1261 if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK))) 1262 goto unlock; /* already enabled */ 1263 1264 /* IMPORTANT: only clear free and fill if transitioning 0 -> 1 */ 1265 1266 *sc->hw_free = 0; 1267 sc->free = 0; 1268 sc->alloc_free = 0; 1269 sc->fill = 0; 1270 sc->fill_wrap = 0; 1271 sc->sr_head = 0; 1272 sc->sr_tail = 0; 1273 sc->flags = 0; 1274 /* the alloc lock insures no fast path allocation */ 1275 reset_buffers_allocated(sc); 1276 1277 /* 1278 * Clear all per-context errors. Some of these will be set when 1279 * we are re-enabling after a context halt. Now that the context 1280 * is disabled, the halt will not clear until after the PIO init 1281 * engine runs below. 1282 */ 1283 reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS)); 1284 if (reg) 1285 write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg); 1286 1287 /* 1288 * The HW PIO initialization engine can handle only one init 1289 * request at a time. Serialize access to each device's engine. 1290 */ 1291 spin_lock(&dd->sc_init_lock); 1292 /* 1293 * Since access to this code block is serialized and 1294 * each access waits for the initialization to complete 1295 * before releasing the lock, the PIO initialization engine 1296 * should not be in use, so we don't have to wait for the 1297 * InProgress bit to go down. 1298 */ 1299 pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) << 1300 SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) | 1301 SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK; 1302 write_csr(dd, SEND_PIO_INIT_CTXT, pio); 1303 /* 1304 * Wait until the engine is done. Give the chip the required time 1305 * so, hopefully, we read the register just once. 1306 */ 1307 udelay(2); 1308 ret = pio_init_wait_progress(dd); 1309 spin_unlock(&dd->sc_init_lock); 1310 if (ret) { 1311 dd_dev_err(dd, 1312 "sctxt%u(%u): Context not enabled due to init failure %d\n", 1313 sc->sw_index, sc->hw_context, ret); 1314 goto unlock; 1315 } 1316 1317 /* 1318 * All is well. Enable the context. 1319 */ 1320 sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK); 1321 write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl); 1322 /* 1323 * Read SendCtxtCtrl to force the write out and prevent a timing 1324 * hazard where a PIO write may reach the context before the enable. 1325 */ 1326 read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1327 sc->flags |= SCF_ENABLED; 1328 1329 unlock: 1330 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1331 1332 return ret; 1333 } 1334 1335 /* force a credit return on the context */ 1336 void sc_return_credits(struct send_context *sc) 1337 { 1338 if (!sc) 1339 return; 1340 1341 /* a 0->1 transition schedules a credit return */ 1342 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 1343 SC(CREDIT_FORCE_FORCE_RETURN_SMASK)); 1344 /* 1345 * Ensure that the write is flushed and the credit return is 1346 * scheduled. We care more about the 0 -> 1 transition. 1347 */ 1348 read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE)); 1349 /* set back to 0 for next time */ 1350 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0); 1351 } 1352 1353 /* allow all in-flight packets to drain on the context */ 1354 void sc_flush(struct send_context *sc) 1355 { 1356 if (!sc) 1357 return; 1358 1359 sc_wait_for_packet_egress(sc, 1); 1360 } 1361 1362 /* drop all packets on the context, no waiting until they are sent */ 1363 void sc_drop(struct send_context *sc) 1364 { 1365 if (!sc) 1366 return; 1367 1368 dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n", 1369 __func__, sc->sw_index, sc->hw_context); 1370 } 1371 1372 /* 1373 * Start the software reaction to a context halt or SPC freeze: 1374 * - mark the context as halted or frozen 1375 * - stop buffer allocations 1376 * 1377 * Called from the error interrupt. Other work is deferred until 1378 * out of the interrupt. 1379 */ 1380 void sc_stop(struct send_context *sc, int flag) 1381 { 1382 unsigned long flags; 1383 1384 /* stop buffer allocations */ 1385 spin_lock_irqsave(&sc->alloc_lock, flags); 1386 /* mark the context */ 1387 sc->flags |= flag; 1388 sc->flags &= ~SCF_ENABLED; 1389 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1390 wake_up(&sc->halt_wait); 1391 } 1392 1393 #define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32)) 1394 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS) 1395 1396 /* 1397 * The send context buffer "allocator". 1398 * 1399 * @sc: the PIO send context we are allocating from 1400 * @len: length of whole packet - including PBC - in dwords 1401 * @cb: optional callback to call when the buffer is finished sending 1402 * @arg: argument for cb 1403 * 1404 * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM 1405 * when link is down. 1406 */ 1407 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, 1408 pio_release_cb cb, void *arg) 1409 { 1410 struct pio_buf *pbuf = NULL; 1411 unsigned long flags; 1412 unsigned long avail; 1413 unsigned long blocks = dwords_to_blocks(dw_len); 1414 u32 fill_wrap; 1415 int trycount = 0; 1416 u32 head, next; 1417 1418 spin_lock_irqsave(&sc->alloc_lock, flags); 1419 if (!(sc->flags & SCF_ENABLED)) { 1420 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1421 return ERR_PTR(-ECOMM); 1422 } 1423 1424 retry: 1425 avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free); 1426 if (blocks > avail) { 1427 /* not enough room */ 1428 if (unlikely(trycount)) { /* already tried to get more room */ 1429 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1430 goto done; 1431 } 1432 /* copy from receiver cache line and recalculate */ 1433 sc->alloc_free = READ_ONCE(sc->free); 1434 avail = 1435 (unsigned long)sc->credits - 1436 (sc->fill - sc->alloc_free); 1437 if (blocks > avail) { 1438 /* still no room, actively update */ 1439 sc_release_update(sc); 1440 sc->alloc_free = READ_ONCE(sc->free); 1441 trycount++; 1442 goto retry; 1443 } 1444 } 1445 1446 /* there is enough room */ 1447 1448 preempt_disable(); 1449 this_cpu_inc(*sc->buffers_allocated); 1450 1451 /* read this once */ 1452 head = sc->sr_head; 1453 1454 /* "allocate" the buffer */ 1455 sc->fill += blocks; 1456 fill_wrap = sc->fill_wrap; 1457 sc->fill_wrap += blocks; 1458 if (sc->fill_wrap >= sc->credits) 1459 sc->fill_wrap = sc->fill_wrap - sc->credits; 1460 1461 /* 1462 * Fill the parts that the releaser looks at before moving the head. 1463 * The only necessary piece is the sent_at field. The credits 1464 * we have just allocated cannot have been returned yet, so the 1465 * cb and arg will not be looked at for a "while". Put them 1466 * on this side of the memory barrier anyway. 1467 */ 1468 pbuf = &sc->sr[head].pbuf; 1469 pbuf->sent_at = sc->fill; 1470 pbuf->cb = cb; 1471 pbuf->arg = arg; 1472 pbuf->sc = sc; /* could be filled in at sc->sr init time */ 1473 /* make sure this is in memory before updating the head */ 1474 1475 /* calculate next head index, do not store */ 1476 next = head + 1; 1477 if (next >= sc->sr_size) 1478 next = 0; 1479 /* 1480 * update the head - must be last! - the releaser can look at fields 1481 * in pbuf once we move the head 1482 */ 1483 smp_wmb(); 1484 sc->sr_head = next; 1485 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1486 1487 /* finish filling in the buffer outside the lock */ 1488 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; 1489 pbuf->end = sc->base_addr + sc->size; 1490 pbuf->qw_written = 0; 1491 pbuf->carry_bytes = 0; 1492 pbuf->carry.val64 = 0; 1493 done: 1494 return pbuf; 1495 } 1496 1497 /* 1498 * There are at least two entities that can turn on credit return 1499 * interrupts and they can overlap. Avoid problems by implementing 1500 * a count scheme that is enforced by a lock. The lock is needed because 1501 * the count and CSR write must be paired. 1502 */ 1503 1504 /* 1505 * Start credit return interrupts. This is managed by a count. If already 1506 * on, just increment the count. 1507 */ 1508 void sc_add_credit_return_intr(struct send_context *sc) 1509 { 1510 unsigned long flags; 1511 1512 /* lock must surround both the count change and the CSR update */ 1513 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1514 if (sc->credit_intr_count == 0) { 1515 sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1516 write_kctxt_csr(sc->dd, sc->hw_context, 1517 SC(CREDIT_CTRL), sc->credit_ctrl); 1518 } 1519 sc->credit_intr_count++; 1520 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1521 } 1522 1523 /* 1524 * Stop credit return interrupts. This is managed by a count. Decrement the 1525 * count, if the last user, then turn the credit interrupts off. 1526 */ 1527 void sc_del_credit_return_intr(struct send_context *sc) 1528 { 1529 unsigned long flags; 1530 1531 WARN_ON(sc->credit_intr_count == 0); 1532 1533 /* lock must surround both the count change and the CSR update */ 1534 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1535 sc->credit_intr_count--; 1536 if (sc->credit_intr_count == 0) { 1537 sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1538 write_kctxt_csr(sc->dd, sc->hw_context, 1539 SC(CREDIT_CTRL), sc->credit_ctrl); 1540 } 1541 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1542 } 1543 1544 /* 1545 * The caller must be careful when calling this. All needint calls 1546 * must be paired with !needint. 1547 */ 1548 void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) 1549 { 1550 if (needint) 1551 sc_add_credit_return_intr(sc); 1552 else 1553 sc_del_credit_return_intr(sc); 1554 trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); 1555 if (needint) 1556 sc_return_credits(sc); 1557 } 1558 1559 /** 1560 * sc_piobufavail - callback when a PIO buffer is available 1561 * @sc: the send context 1562 * 1563 * This is called from the interrupt handler when a PIO buffer is 1564 * available after hfi1_verbs_send() returned an error that no buffers were 1565 * available. Disable the interrupt if there are no more QPs waiting. 1566 */ 1567 static void sc_piobufavail(struct send_context *sc) 1568 { 1569 struct hfi1_devdata *dd = sc->dd; 1570 struct list_head *list; 1571 struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; 1572 struct rvt_qp *qp; 1573 struct hfi1_qp_priv *priv; 1574 unsigned long flags; 1575 uint i, n = 0, top_idx = 0; 1576 1577 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && 1578 dd->send_contexts[sc->sw_index].type != SC_VL15) 1579 return; 1580 list = &sc->piowait; 1581 /* 1582 * Note: checking that the piowait list is empty and clearing 1583 * the buffer available interrupt needs to be atomic or we 1584 * could end up with QPs on the wait list with the interrupt 1585 * disabled. 1586 */ 1587 write_seqlock_irqsave(&sc->waitlock, flags); 1588 while (!list_empty(list)) { 1589 struct iowait *wait; 1590 1591 if (n == ARRAY_SIZE(qps)) 1592 break; 1593 wait = list_first_entry(list, struct iowait, list); 1594 iowait_get_priority(wait); 1595 qp = iowait_to_qp(wait); 1596 priv = qp->priv; 1597 list_del_init(&priv->s_iowait.list); 1598 priv->s_iowait.lock = NULL; 1599 if (n) { 1600 priv = qps[top_idx]->priv; 1601 top_idx = iowait_priority_update_top(wait, 1602 &priv->s_iowait, 1603 n, top_idx); 1604 } 1605 1606 /* refcount held until actual wake up */ 1607 qps[n++] = qp; 1608 } 1609 /* 1610 * If there had been waiters and there are more 1611 * insure that we redo the force to avoid a potential hang. 1612 */ 1613 if (n) { 1614 hfi1_sc_wantpiobuf_intr(sc, 0); 1615 if (!list_empty(list)) 1616 hfi1_sc_wantpiobuf_intr(sc, 1); 1617 } 1618 write_sequnlock_irqrestore(&sc->waitlock, flags); 1619 1620 /* Wake up the top-priority one first */ 1621 if (n) 1622 hfi1_qp_wakeup(qps[top_idx], 1623 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1624 for (i = 0; i < n; i++) 1625 if (i != top_idx) 1626 hfi1_qp_wakeup(qps[i], 1627 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1628 } 1629 1630 /* translate a send credit update to a bit code of reasons */ 1631 static inline int fill_code(u64 hw_free) 1632 { 1633 int code = 0; 1634 1635 if (hw_free & CR_STATUS_SMASK) 1636 code |= PRC_STATUS_ERR; 1637 if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK) 1638 code |= PRC_PBC; 1639 if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK) 1640 code |= PRC_THRESHOLD; 1641 if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK) 1642 code |= PRC_FILL_ERR; 1643 if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK) 1644 code |= PRC_SC_DISABLE; 1645 return code; 1646 } 1647 1648 /* use the jiffies compare to get the wrap right */ 1649 #define sent_before(a, b) time_before(a, b) /* a < b */ 1650 1651 /* 1652 * The send context buffer "releaser". 1653 */ 1654 void sc_release_update(struct send_context *sc) 1655 { 1656 struct pio_buf *pbuf; 1657 u64 hw_free; 1658 u32 head, tail; 1659 unsigned long old_free; 1660 unsigned long free; 1661 unsigned long extra; 1662 unsigned long flags; 1663 int code; 1664 1665 if (!sc) 1666 return; 1667 1668 spin_lock_irqsave(&sc->release_lock, flags); 1669 /* update free */ 1670 hw_free = le64_to_cpu(*sc->hw_free); /* volatile read */ 1671 old_free = sc->free; 1672 extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT) 1673 - (old_free & CR_COUNTER_MASK)) 1674 & CR_COUNTER_MASK; 1675 free = old_free + extra; 1676 trace_hfi1_piofree(sc, extra); 1677 1678 /* call sent buffer callbacks */ 1679 code = -1; /* code not yet set */ 1680 head = READ_ONCE(sc->sr_head); /* snapshot the head */ 1681 tail = sc->sr_tail; 1682 while (head != tail) { 1683 pbuf = &sc->sr[tail].pbuf; 1684 1685 if (sent_before(free, pbuf->sent_at)) { 1686 /* not sent yet */ 1687 break; 1688 } 1689 if (pbuf->cb) { 1690 if (code < 0) /* fill in code on first user */ 1691 code = fill_code(hw_free); 1692 (*pbuf->cb)(pbuf->arg, code); 1693 } 1694 1695 tail++; 1696 if (tail >= sc->sr_size) 1697 tail = 0; 1698 } 1699 sc->sr_tail = tail; 1700 /* make sure tail is updated before free */ 1701 smp_wmb(); 1702 sc->free = free; 1703 spin_unlock_irqrestore(&sc->release_lock, flags); 1704 sc_piobufavail(sc); 1705 } 1706 1707 /* 1708 * Send context group releaser. Argument is the send context that caused 1709 * the interrupt. Called from the send context interrupt handler. 1710 * 1711 * Call release on all contexts in the group. 1712 * 1713 * This routine takes the sc_lock without an irqsave because it is only 1714 * called from an interrupt handler. Adjust if that changes. 1715 */ 1716 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context) 1717 { 1718 struct send_context *sc; 1719 u32 sw_index; 1720 u32 gc, gc_end; 1721 1722 spin_lock(&dd->sc_lock); 1723 sw_index = dd->hw_to_sw[hw_context]; 1724 if (unlikely(sw_index >= dd->num_send_contexts)) { 1725 dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n", 1726 __func__, hw_context, sw_index); 1727 goto done; 1728 } 1729 sc = dd->send_contexts[sw_index].sc; 1730 if (unlikely(!sc)) 1731 goto done; 1732 1733 gc = group_context(hw_context, sc->group); 1734 gc_end = gc + group_size(sc->group); 1735 for (; gc < gc_end; gc++) { 1736 sw_index = dd->hw_to_sw[gc]; 1737 if (unlikely(sw_index >= dd->num_send_contexts)) { 1738 dd_dev_err(dd, 1739 "%s: invalid hw (%u) to sw (%u) mapping\n", 1740 __func__, hw_context, sw_index); 1741 continue; 1742 } 1743 sc_release_update(dd->send_contexts[sw_index].sc); 1744 } 1745 done: 1746 spin_unlock(&dd->sc_lock); 1747 } 1748 1749 /* 1750 * pio_select_send_context_vl() - select send context 1751 * @dd: devdata 1752 * @selector: a spreading factor 1753 * @vl: this vl 1754 * 1755 * This function returns a send context based on the selector and a vl. 1756 * The mapping fields are protected by RCU 1757 */ 1758 struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd, 1759 u32 selector, u8 vl) 1760 { 1761 struct pio_vl_map *m; 1762 struct pio_map_elem *e; 1763 struct send_context *rval; 1764 1765 /* 1766 * NOTE This should only happen if SC->VL changed after the initial 1767 * checks on the QP/AH 1768 * Default will return VL0's send context below 1769 */ 1770 if (unlikely(vl >= num_vls)) { 1771 rval = NULL; 1772 goto done; 1773 } 1774 1775 rcu_read_lock(); 1776 m = rcu_dereference(dd->pio_map); 1777 if (unlikely(!m)) { 1778 rcu_read_unlock(); 1779 return dd->vld[0].sc; 1780 } 1781 e = m->map[vl & m->mask]; 1782 rval = e->ksc[selector & e->mask]; 1783 rcu_read_unlock(); 1784 1785 done: 1786 rval = !rval ? dd->vld[0].sc : rval; 1787 return rval; 1788 } 1789 1790 /* 1791 * pio_select_send_context_sc() - select send context 1792 * @dd: devdata 1793 * @selector: a spreading factor 1794 * @sc5: the 5 bit sc 1795 * 1796 * This function returns an send context based on the selector and an sc 1797 */ 1798 struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd, 1799 u32 selector, u8 sc5) 1800 { 1801 u8 vl = sc_to_vlt(dd, sc5); 1802 1803 return pio_select_send_context_vl(dd, selector, vl); 1804 } 1805 1806 /* 1807 * Free the indicated map struct 1808 */ 1809 static void pio_map_free(struct pio_vl_map *m) 1810 { 1811 int i; 1812 1813 for (i = 0; m && i < m->actual_vls; i++) 1814 kfree(m->map[i]); 1815 kfree(m); 1816 } 1817 1818 /* 1819 * Handle RCU callback 1820 */ 1821 static void pio_map_rcu_callback(struct rcu_head *list) 1822 { 1823 struct pio_vl_map *m = container_of(list, struct pio_vl_map, list); 1824 1825 pio_map_free(m); 1826 } 1827 1828 /* 1829 * Set credit return threshold for the kernel send context 1830 */ 1831 static void set_threshold(struct hfi1_devdata *dd, int scontext, int i) 1832 { 1833 u32 thres; 1834 1835 thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext], 1836 50), 1837 sc_mtu_to_threshold(dd->kernel_send_context[scontext], 1838 dd->vld[i].mtu, 1839 dd->rcd[0]->rcvhdrqentsize)); 1840 sc_set_cr_threshold(dd->kernel_send_context[scontext], thres); 1841 } 1842 1843 /* 1844 * pio_map_init - called when #vls change 1845 * @dd: hfi1_devdata 1846 * @port: port number 1847 * @num_vls: number of vls 1848 * @vl_scontexts: per vl send context mapping (optional) 1849 * 1850 * This routine changes the mapping based on the number of vls. 1851 * 1852 * vl_scontexts is used to specify a non-uniform vl/send context 1853 * loading. NULL implies auto computing the loading and giving each 1854 * VL an uniform distribution of send contexts per VL. 1855 * 1856 * The auto algorithm computers the sc_per_vl and the number of extra 1857 * send contexts. Any extra send contexts are added from the last VL 1858 * on down 1859 * 1860 * rcu locking is used here to control access to the mapping fields. 1861 * 1862 * If either the num_vls or num_send_contexts are non-power of 2, the 1863 * array sizes in the struct pio_vl_map and the struct pio_map_elem are 1864 * rounded up to the next highest power of 2 and the first entry is 1865 * reused in a round robin fashion. 1866 * 1867 * If an error occurs the map change is not done and the mapping is not 1868 * chaged. 1869 * 1870 */ 1871 int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) 1872 { 1873 int i, j; 1874 int extra, sc_per_vl; 1875 int scontext = 1; 1876 int num_kernel_send_contexts = 0; 1877 u8 lvl_scontexts[OPA_MAX_VLS]; 1878 struct pio_vl_map *oldmap, *newmap; 1879 1880 if (!vl_scontexts) { 1881 for (i = 0; i < dd->num_send_contexts; i++) 1882 if (dd->send_contexts[i].type == SC_KERNEL) 1883 num_kernel_send_contexts++; 1884 /* truncate divide */ 1885 sc_per_vl = num_kernel_send_contexts / num_vls; 1886 /* extras */ 1887 extra = num_kernel_send_contexts % num_vls; 1888 vl_scontexts = lvl_scontexts; 1889 /* add extras from last vl down */ 1890 for (i = num_vls - 1; i >= 0; i--, extra--) 1891 vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); 1892 } 1893 /* build new map */ 1894 newmap = kzalloc(sizeof(*newmap) + 1895 roundup_pow_of_two(num_vls) * 1896 sizeof(struct pio_map_elem *), 1897 GFP_KERNEL); 1898 if (!newmap) 1899 goto bail; 1900 newmap->actual_vls = num_vls; 1901 newmap->vls = roundup_pow_of_two(num_vls); 1902 newmap->mask = (1 << ilog2(newmap->vls)) - 1; 1903 for (i = 0; i < newmap->vls; i++) { 1904 /* save for wrap around */ 1905 int first_scontext = scontext; 1906 1907 if (i < newmap->actual_vls) { 1908 int sz = roundup_pow_of_two(vl_scontexts[i]); 1909 1910 /* only allocate once */ 1911 newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) + 1912 sz * sizeof(struct 1913 send_context *), 1914 GFP_KERNEL); 1915 if (!newmap->map[i]) 1916 goto bail; 1917 newmap->map[i]->mask = (1 << ilog2(sz)) - 1; 1918 /* 1919 * assign send contexts and 1920 * adjust credit return threshold 1921 */ 1922 for (j = 0; j < sz; j++) { 1923 if (dd->kernel_send_context[scontext]) { 1924 newmap->map[i]->ksc[j] = 1925 dd->kernel_send_context[scontext]; 1926 set_threshold(dd, scontext, i); 1927 } 1928 if (++scontext >= first_scontext + 1929 vl_scontexts[i]) 1930 /* wrap back to first send context */ 1931 scontext = first_scontext; 1932 } 1933 } else { 1934 /* just re-use entry without allocating */ 1935 newmap->map[i] = newmap->map[i % num_vls]; 1936 } 1937 scontext = first_scontext + vl_scontexts[i]; 1938 } 1939 /* newmap in hand, save old map */ 1940 spin_lock_irq(&dd->pio_map_lock); 1941 oldmap = rcu_dereference_protected(dd->pio_map, 1942 lockdep_is_held(&dd->pio_map_lock)); 1943 1944 /* publish newmap */ 1945 rcu_assign_pointer(dd->pio_map, newmap); 1946 1947 spin_unlock_irq(&dd->pio_map_lock); 1948 /* success, free any old map after grace period */ 1949 if (oldmap) 1950 call_rcu(&oldmap->list, pio_map_rcu_callback); 1951 return 0; 1952 bail: 1953 /* free any partial allocation */ 1954 pio_map_free(newmap); 1955 return -ENOMEM; 1956 } 1957 1958 void free_pio_map(struct hfi1_devdata *dd) 1959 { 1960 /* Free PIO map if allocated */ 1961 if (rcu_access_pointer(dd->pio_map)) { 1962 spin_lock_irq(&dd->pio_map_lock); 1963 pio_map_free(rcu_access_pointer(dd->pio_map)); 1964 RCU_INIT_POINTER(dd->pio_map, NULL); 1965 spin_unlock_irq(&dd->pio_map_lock); 1966 synchronize_rcu(); 1967 } 1968 kfree(dd->kernel_send_context); 1969 dd->kernel_send_context = NULL; 1970 } 1971 1972 int init_pervl_scs(struct hfi1_devdata *dd) 1973 { 1974 int i; 1975 u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */ 1976 u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */ 1977 u32 ctxt; 1978 struct hfi1_pportdata *ppd = dd->pport; 1979 1980 dd->vld[15].sc = sc_alloc(dd, SC_VL15, 1981 dd->rcd[0]->rcvhdrqentsize, dd->node); 1982 if (!dd->vld[15].sc) 1983 return -ENOMEM; 1984 1985 hfi1_init_ctxt(dd->vld[15].sc); 1986 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); 1987 1988 dd->kernel_send_context = kcalloc_node(dd->num_send_contexts, 1989 sizeof(struct send_context *), 1990 GFP_KERNEL, dd->node); 1991 if (!dd->kernel_send_context) 1992 goto freesc15; 1993 1994 dd->kernel_send_context[0] = dd->vld[15].sc; 1995 1996 for (i = 0; i < num_vls; i++) { 1997 /* 1998 * Since this function does not deal with a specific 1999 * receive context but we need the RcvHdrQ entry size, 2000 * use the size from rcd[0]. It is guaranteed to be 2001 * valid at this point and will remain the same for all 2002 * receive contexts. 2003 */ 2004 dd->vld[i].sc = sc_alloc(dd, SC_KERNEL, 2005 dd->rcd[0]->rcvhdrqentsize, dd->node); 2006 if (!dd->vld[i].sc) 2007 goto nomem; 2008 dd->kernel_send_context[i + 1] = dd->vld[i].sc; 2009 hfi1_init_ctxt(dd->vld[i].sc); 2010 /* non VL15 start with the max MTU */ 2011 dd->vld[i].mtu = hfi1_max_mtu; 2012 } 2013 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2014 dd->kernel_send_context[i + 1] = 2015 sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node); 2016 if (!dd->kernel_send_context[i + 1]) 2017 goto nomem; 2018 hfi1_init_ctxt(dd->kernel_send_context[i + 1]); 2019 } 2020 2021 sc_enable(dd->vld[15].sc); 2022 ctxt = dd->vld[15].sc->hw_context; 2023 mask = all_vl_mask & ~(1LL << 15); 2024 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2025 dd_dev_info(dd, 2026 "Using send context %u(%u) for VL15\n", 2027 dd->vld[15].sc->sw_index, ctxt); 2028 2029 for (i = 0; i < num_vls; i++) { 2030 sc_enable(dd->vld[i].sc); 2031 ctxt = dd->vld[i].sc->hw_context; 2032 mask = all_vl_mask & ~(data_vls_mask); 2033 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2034 } 2035 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2036 sc_enable(dd->kernel_send_context[i + 1]); 2037 ctxt = dd->kernel_send_context[i + 1]->hw_context; 2038 mask = all_vl_mask & ~(data_vls_mask); 2039 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2040 } 2041 2042 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) 2043 goto nomem; 2044 return 0; 2045 2046 nomem: 2047 for (i = 0; i < num_vls; i++) { 2048 sc_free(dd->vld[i].sc); 2049 dd->vld[i].sc = NULL; 2050 } 2051 2052 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) 2053 sc_free(dd->kernel_send_context[i + 1]); 2054 2055 kfree(dd->kernel_send_context); 2056 dd->kernel_send_context = NULL; 2057 2058 freesc15: 2059 sc_free(dd->vld[15].sc); 2060 return -ENOMEM; 2061 } 2062 2063 int init_credit_return(struct hfi1_devdata *dd) 2064 { 2065 int ret; 2066 int i; 2067 2068 dd->cr_base = kcalloc( 2069 node_affinity.num_possible_nodes, 2070 sizeof(struct credit_return_base), 2071 GFP_KERNEL); 2072 if (!dd->cr_base) { 2073 ret = -ENOMEM; 2074 goto done; 2075 } 2076 for_each_node_with_cpus(i) { 2077 int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); 2078 2079 set_dev_node(&dd->pcidev->dev, i); 2080 dd->cr_base[i].va = dma_alloc_coherent(&dd->pcidev->dev, 2081 bytes, 2082 &dd->cr_base[i].dma, 2083 GFP_KERNEL); 2084 if (!dd->cr_base[i].va) { 2085 set_dev_node(&dd->pcidev->dev, dd->node); 2086 dd_dev_err(dd, 2087 "Unable to allocate credit return DMA range for NUMA %d\n", 2088 i); 2089 ret = -ENOMEM; 2090 goto done; 2091 } 2092 } 2093 set_dev_node(&dd->pcidev->dev, dd->node); 2094 2095 ret = 0; 2096 done: 2097 return ret; 2098 } 2099 2100 void free_credit_return(struct hfi1_devdata *dd) 2101 { 2102 int i; 2103 2104 if (!dd->cr_base) 2105 return; 2106 for (i = 0; i < node_affinity.num_possible_nodes; i++) { 2107 if (dd->cr_base[i].va) { 2108 dma_free_coherent(&dd->pcidev->dev, 2109 TXE_NUM_CONTEXTS * 2110 sizeof(struct credit_return), 2111 dd->cr_base[i].va, 2112 dd->cr_base[i].dma); 2113 } 2114 } 2115 kfree(dd->cr_base); 2116 dd->cr_base = NULL; 2117 } 2118 2119 void seqfile_dump_sci(struct seq_file *s, u32 i, 2120 struct send_context_info *sci) 2121 { 2122 struct send_context *sc = sci->sc; 2123 u64 reg; 2124 2125 seq_printf(s, "SCI %u: type %u base %u credits %u\n", 2126 i, sci->type, sci->base, sci->credits); 2127 seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", 2128 sc->flags, sc->sw_index, sc->hw_context, sc->group); 2129 seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", 2130 sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); 2131 seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", 2132 sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); 2133 seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", 2134 sc->credit_intr_count, sc->credit_ctrl); 2135 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); 2136 seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", 2137 (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> 2138 CR_COUNTER_SHIFT, 2139 (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & 2140 SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), 2141 reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); 2142 } 2143