1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 /* 3 * Copyright(c) 2015-2018 Intel Corporation. 4 */ 5 6 #include <linux/delay.h> 7 #include "hfi.h" 8 #include "qp.h" 9 #include "trace.h" 10 11 #define SC(name) SEND_CTXT_##name 12 /* 13 * Send Context functions 14 */ 15 static void sc_wait_for_packet_egress(struct send_context *sc, int pause); 16 17 /* 18 * Set the CM reset bit and wait for it to clear. Use the provided 19 * sendctrl register. This routine has no locking. 20 */ 21 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl) 22 { 23 write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK); 24 while (1) { 25 udelay(1); 26 sendctrl = read_csr(dd, SEND_CTRL); 27 if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0) 28 break; 29 } 30 } 31 32 /* global control of PIO send */ 33 void pio_send_control(struct hfi1_devdata *dd, int op) 34 { 35 u64 reg, mask; 36 unsigned long flags; 37 int write = 1; /* write sendctrl back */ 38 int flush = 0; /* re-read sendctrl to make sure it is flushed */ 39 int i; 40 41 spin_lock_irqsave(&dd->sendctrl_lock, flags); 42 43 reg = read_csr(dd, SEND_CTRL); 44 switch (op) { 45 case PSC_GLOBAL_ENABLE: 46 reg |= SEND_CTRL_SEND_ENABLE_SMASK; 47 fallthrough; 48 case PSC_DATA_VL_ENABLE: 49 mask = 0; 50 for (i = 0; i < ARRAY_SIZE(dd->vld); i++) 51 if (!dd->vld[i].mtu) 52 mask |= BIT_ULL(i); 53 /* Disallow sending on VLs not enabled */ 54 mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) << 55 SEND_CTRL_UNSUPPORTED_VL_SHIFT; 56 reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask; 57 break; 58 case PSC_GLOBAL_DISABLE: 59 reg &= ~SEND_CTRL_SEND_ENABLE_SMASK; 60 break; 61 case PSC_GLOBAL_VLARB_ENABLE: 62 reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 63 break; 64 case PSC_GLOBAL_VLARB_DISABLE: 65 reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 66 break; 67 case PSC_CM_RESET: 68 __cm_reset(dd, reg); 69 write = 0; /* CSR already written (and flushed) */ 70 break; 71 case PSC_DATA_VL_DISABLE: 72 reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK; 73 flush = 1; 74 break; 75 default: 76 dd_dev_err(dd, "%s: invalid control %d\n", __func__, op); 77 break; 78 } 79 80 if (write) { 81 write_csr(dd, SEND_CTRL, reg); 82 if (flush) 83 (void)read_csr(dd, SEND_CTRL); /* flush write */ 84 } 85 86 spin_unlock_irqrestore(&dd->sendctrl_lock, flags); 87 } 88 89 /* number of send context memory pools */ 90 #define NUM_SC_POOLS 2 91 92 /* Send Context Size (SCS) wildcards */ 93 #define SCS_POOL_0 -1 94 #define SCS_POOL_1 -2 95 96 /* Send Context Count (SCC) wildcards */ 97 #define SCC_PER_VL -1 98 #define SCC_PER_CPU -2 99 #define SCC_PER_KRCVQ -3 100 101 /* Send Context Size (SCS) constants */ 102 #define SCS_ACK_CREDITS 32 103 #define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */ 104 105 #define PIO_THRESHOLD_CEILING 4096 106 107 #define PIO_WAIT_BATCH_SIZE 5 108 109 /* default send context sizes */ 110 static struct sc_config_sizes sc_config_sizes[SC_MAX] = { 111 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 112 .count = SCC_PER_VL }, /* one per NUMA */ 113 [SC_ACK] = { .size = SCS_ACK_CREDITS, 114 .count = SCC_PER_KRCVQ }, 115 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 116 .count = SCC_PER_CPU }, /* one per CPU */ 117 [SC_VL15] = { .size = SCS_VL15_CREDITS, 118 .count = 1 }, 119 120 }; 121 122 /* send context memory pool configuration */ 123 struct mem_pool_config { 124 int centipercent; /* % of memory, in 100ths of 1% */ 125 int absolute_blocks; /* absolute block count */ 126 }; 127 128 /* default memory pool configuration: 100% in pool 0 */ 129 static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = { 130 /* centi%, abs blocks */ 131 { 10000, -1 }, /* pool 0 */ 132 { 0, -1 }, /* pool 1 */ 133 }; 134 135 /* memory pool information, used when calculating final sizes */ 136 struct mem_pool_info { 137 int centipercent; /* 138 * 100th of 1% of memory to use, -1 if blocks 139 * already set 140 */ 141 int count; /* count of contexts in the pool */ 142 int blocks; /* block size of the pool */ 143 int size; /* context size, in blocks */ 144 }; 145 146 /* 147 * Convert a pool wildcard to a valid pool index. The wildcards 148 * start at -1 and increase negatively. Map them as: 149 * -1 => 0 150 * -2 => 1 151 * etc. 152 * 153 * Return -1 on non-wildcard input, otherwise convert to a pool number. 154 */ 155 static int wildcard_to_pool(int wc) 156 { 157 if (wc >= 0) 158 return -1; /* non-wildcard */ 159 return -wc - 1; 160 } 161 162 static const char *sc_type_names[SC_MAX] = { 163 "kernel", 164 "ack", 165 "user", 166 "vl15" 167 }; 168 169 static const char *sc_type_name(int index) 170 { 171 if (index < 0 || index >= SC_MAX) 172 return "unknown"; 173 return sc_type_names[index]; 174 } 175 176 /* 177 * Read the send context memory pool configuration and send context 178 * size configuration. Replace any wildcards and come up with final 179 * counts and sizes for the send context types. 180 */ 181 int init_sc_pools_and_sizes(struct hfi1_devdata *dd) 182 { 183 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; 184 int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; 185 int total_contexts = 0; 186 int fixed_blocks; 187 int pool_blocks; 188 int used_blocks; 189 int cp_total; /* centipercent total */ 190 int ab_total; /* absolute block total */ 191 int extra; 192 int i; 193 194 /* 195 * When SDMA is enabled, kernel context pio packet size is capped by 196 * "piothreshold". Reduce pio buffer allocation for kernel context by 197 * setting it to a fixed size. The allocation allows 3-deep buffering 198 * of the largest pio packets plus up to 128 bytes header, sufficient 199 * to maintain verbs performance. 200 * 201 * When SDMA is disabled, keep the default pooling allocation. 202 */ 203 if (HFI1_CAP_IS_KSET(SDMA)) { 204 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ? 205 piothreshold : PIO_THRESHOLD_CEILING; 206 sc_config_sizes[SC_KERNEL].size = 207 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE; 208 } 209 210 /* 211 * Step 0: 212 * - copy the centipercents/absolute sizes from the pool config 213 * - sanity check these values 214 * - add up centipercents, then later check for full value 215 * - add up absolute blocks, then later check for over-commit 216 */ 217 cp_total = 0; 218 ab_total = 0; 219 for (i = 0; i < NUM_SC_POOLS; i++) { 220 int cp = sc_mem_pool_config[i].centipercent; 221 int ab = sc_mem_pool_config[i].absolute_blocks; 222 223 /* 224 * A negative value is "unused" or "invalid". Both *can* 225 * be valid, but centipercent wins, so check that first 226 */ 227 if (cp >= 0) { /* centipercent valid */ 228 cp_total += cp; 229 } else if (ab >= 0) { /* absolute blocks valid */ 230 ab_total += ab; 231 } else { /* neither valid */ 232 dd_dev_err( 233 dd, 234 "Send context memory pool %d: both the block count and centipercent are invalid\n", 235 i); 236 return -EINVAL; 237 } 238 239 mem_pool_info[i].centipercent = cp; 240 mem_pool_info[i].blocks = ab; 241 } 242 243 /* do not use both % and absolute blocks for different pools */ 244 if (cp_total != 0 && ab_total != 0) { 245 dd_dev_err( 246 dd, 247 "All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n"); 248 return -EINVAL; 249 } 250 251 /* if any percentages are present, they must add up to 100% x 100 */ 252 if (cp_total != 0 && cp_total != 10000) { 253 dd_dev_err( 254 dd, 255 "Send context memory pool centipercent is %d, expecting 10000\n", 256 cp_total); 257 return -EINVAL; 258 } 259 260 /* the absolute pool total cannot be more than the mem total */ 261 if (ab_total > total_blocks) { 262 dd_dev_err( 263 dd, 264 "Send context memory pool absolute block count %d is larger than the memory size %d\n", 265 ab_total, total_blocks); 266 return -EINVAL; 267 } 268 269 /* 270 * Step 2: 271 * - copy from the context size config 272 * - replace context type wildcard counts with real values 273 * - add up non-memory pool block sizes 274 * - add up memory pool user counts 275 */ 276 fixed_blocks = 0; 277 for (i = 0; i < SC_MAX; i++) { 278 int count = sc_config_sizes[i].count; 279 int size = sc_config_sizes[i].size; 280 int pool; 281 282 /* 283 * Sanity check count: Either a positive value or 284 * one of the expected wildcards is valid. The positive 285 * value is checked later when we compare against total 286 * memory available. 287 */ 288 if (i == SC_ACK) { 289 count = dd->n_krcv_queues; 290 } else if (i == SC_KERNEL) { 291 count = INIT_SC_PER_VL * num_vls; 292 } else if (count == SCC_PER_CPU) { 293 count = dd->num_rcv_contexts - dd->n_krcv_queues; 294 } else if (count < 0) { 295 dd_dev_err( 296 dd, 297 "%s send context invalid count wildcard %d\n", 298 sc_type_name(i), count); 299 return -EINVAL; 300 } 301 if (total_contexts + count > chip_send_contexts(dd)) 302 count = chip_send_contexts(dd) - total_contexts; 303 304 total_contexts += count; 305 306 /* 307 * Sanity check pool: The conversion will return a pool 308 * number or -1 if a fixed (non-negative) value. The fixed 309 * value is checked later when we compare against 310 * total memory available. 311 */ 312 pool = wildcard_to_pool(size); 313 if (pool == -1) { /* non-wildcard */ 314 fixed_blocks += size * count; 315 } else if (pool < NUM_SC_POOLS) { /* valid wildcard */ 316 mem_pool_info[pool].count += count; 317 } else { /* invalid wildcard */ 318 dd_dev_err( 319 dd, 320 "%s send context invalid pool wildcard %d\n", 321 sc_type_name(i), size); 322 return -EINVAL; 323 } 324 325 dd->sc_sizes[i].count = count; 326 dd->sc_sizes[i].size = size; 327 } 328 if (fixed_blocks > total_blocks) { 329 dd_dev_err( 330 dd, 331 "Send context fixed block count, %u, larger than total block count %u\n", 332 fixed_blocks, total_blocks); 333 return -EINVAL; 334 } 335 336 /* step 3: calculate the blocks in the pools, and pool context sizes */ 337 pool_blocks = total_blocks - fixed_blocks; 338 if (ab_total > pool_blocks) { 339 dd_dev_err( 340 dd, 341 "Send context fixed pool sizes, %u, larger than pool block count %u\n", 342 ab_total, pool_blocks); 343 return -EINVAL; 344 } 345 /* subtract off the fixed pool blocks */ 346 pool_blocks -= ab_total; 347 348 for (i = 0; i < NUM_SC_POOLS; i++) { 349 struct mem_pool_info *pi = &mem_pool_info[i]; 350 351 /* % beats absolute blocks */ 352 if (pi->centipercent >= 0) 353 pi->blocks = (pool_blocks * pi->centipercent) / 10000; 354 355 if (pi->blocks == 0 && pi->count != 0) { 356 dd_dev_err( 357 dd, 358 "Send context memory pool %d has %u contexts, but no blocks\n", 359 i, pi->count); 360 return -EINVAL; 361 } 362 if (pi->count == 0) { 363 /* warn about wasted blocks */ 364 if (pi->blocks != 0) 365 dd_dev_err( 366 dd, 367 "Send context memory pool %d has %u blocks, but zero contexts\n", 368 i, pi->blocks); 369 pi->size = 0; 370 } else { 371 pi->size = pi->blocks / pi->count; 372 } 373 } 374 375 /* step 4: fill in the context type sizes from the pool sizes */ 376 used_blocks = 0; 377 for (i = 0; i < SC_MAX; i++) { 378 if (dd->sc_sizes[i].size < 0) { 379 unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size); 380 381 WARN_ON_ONCE(pool >= NUM_SC_POOLS); 382 dd->sc_sizes[i].size = mem_pool_info[pool].size; 383 } 384 /* make sure we are not larger than what is allowed by the HW */ 385 #define PIO_MAX_BLOCKS 1024 386 if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS) 387 dd->sc_sizes[i].size = PIO_MAX_BLOCKS; 388 389 /* calculate our total usage */ 390 used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count; 391 } 392 extra = total_blocks - used_blocks; 393 if (extra != 0) 394 dd_dev_info(dd, "unused send context blocks: %d\n", extra); 395 396 return total_contexts; 397 } 398 399 int init_send_contexts(struct hfi1_devdata *dd) 400 { 401 u16 base; 402 int ret, i, j, context; 403 404 ret = init_credit_return(dd); 405 if (ret) 406 return ret; 407 408 dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8), 409 GFP_KERNEL); 410 dd->send_contexts = kcalloc(dd->num_send_contexts, 411 sizeof(struct send_context_info), 412 GFP_KERNEL); 413 if (!dd->send_contexts || !dd->hw_to_sw) { 414 kfree(dd->hw_to_sw); 415 kfree(dd->send_contexts); 416 free_credit_return(dd); 417 return -ENOMEM; 418 } 419 420 /* hardware context map starts with invalid send context indices */ 421 for (i = 0; i < TXE_NUM_CONTEXTS; i++) 422 dd->hw_to_sw[i] = INVALID_SCI; 423 424 /* 425 * All send contexts have their credit sizes. Allocate credits 426 * for each context one after another from the global space. 427 */ 428 context = 0; 429 base = 1; 430 for (i = 0; i < SC_MAX; i++) { 431 struct sc_config_sizes *scs = &dd->sc_sizes[i]; 432 433 for (j = 0; j < scs->count; j++) { 434 struct send_context_info *sci = 435 &dd->send_contexts[context]; 436 sci->type = i; 437 sci->base = base; 438 sci->credits = scs->size; 439 440 context++; 441 base += scs->size; 442 } 443 } 444 445 return 0; 446 } 447 448 /* 449 * Allocate a software index and hardware context of the given type. 450 * 451 * Must be called with dd->sc_lock held. 452 */ 453 static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, 454 u32 *hw_context) 455 { 456 struct send_context_info *sci; 457 u32 index; 458 u32 context; 459 460 for (index = 0, sci = &dd->send_contexts[0]; 461 index < dd->num_send_contexts; index++, sci++) { 462 if (sci->type == type && sci->allocated == 0) { 463 sci->allocated = 1; 464 /* use a 1:1 mapping, but make them non-equal */ 465 context = chip_send_contexts(dd) - index - 1; 466 dd->hw_to_sw[context] = index; 467 *sw_index = index; 468 *hw_context = context; 469 return 0; /* success */ 470 } 471 } 472 dd_dev_err(dd, "Unable to locate a free type %d send context\n", type); 473 return -ENOSPC; 474 } 475 476 /* 477 * Free the send context given by its software index. 478 * 479 * Must be called with dd->sc_lock held. 480 */ 481 static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context) 482 { 483 struct send_context_info *sci; 484 485 sci = &dd->send_contexts[sw_index]; 486 if (!sci->allocated) { 487 dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n", 488 __func__, sw_index, hw_context); 489 } 490 sci->allocated = 0; 491 dd->hw_to_sw[hw_context] = INVALID_SCI; 492 } 493 494 /* return the base context of a context in a group */ 495 static inline u32 group_context(u32 context, u32 group) 496 { 497 return (context >> group) << group; 498 } 499 500 /* return the size of a group */ 501 static inline u32 group_size(u32 group) 502 { 503 return 1 << group; 504 } 505 506 /* 507 * Obtain the credit return addresses, kernel virtual and bus, for the 508 * given sc. 509 * 510 * To understand this routine: 511 * o va and dma are arrays of struct credit_return. One for each physical 512 * send context, per NUMA. 513 * o Each send context always looks in its relative location in a struct 514 * credit_return for its credit return. 515 * o Each send context in a group must have its return address CSR programmed 516 * with the same value. Use the address of the first send context in the 517 * group. 518 */ 519 static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) 520 { 521 u32 gc = group_context(sc->hw_context, sc->group); 522 u32 index = sc->hw_context & 0x7; 523 524 sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; 525 *dma = (unsigned long) 526 &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; 527 } 528 529 /* 530 * Work queue function triggered in error interrupt routine for 531 * kernel contexts. 532 */ 533 static void sc_halted(struct work_struct *work) 534 { 535 struct send_context *sc; 536 537 sc = container_of(work, struct send_context, halt_work); 538 sc_restart(sc); 539 } 540 541 /* 542 * Calculate PIO block threshold for this send context using the given MTU. 543 * Trigger a return when one MTU plus optional header of credits remain. 544 * 545 * Parameter mtu is in bytes. 546 * Parameter hdrqentsize is in DWORDs. 547 * 548 * Return value is what to write into the CSR: trigger return when 549 * unreturned credits pass this count. 550 */ 551 u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize) 552 { 553 u32 release_credits; 554 u32 threshold; 555 556 /* add in the header size, then divide by the PIO block size */ 557 mtu += hdrqentsize << 2; 558 release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE); 559 560 /* check against this context's credits */ 561 if (sc->credits <= release_credits) 562 threshold = 1; 563 else 564 threshold = sc->credits - release_credits; 565 566 return threshold; 567 } 568 569 /* 570 * Calculate credit threshold in terms of percent of the allocated credits. 571 * Trigger when unreturned credits equal or exceed the percentage of the whole. 572 * 573 * Return value is what to write into the CSR: trigger return when 574 * unreturned credits pass this count. 575 */ 576 u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 577 { 578 return (sc->credits * percent) / 100; 579 } 580 581 /* 582 * Set the credit return threshold. 583 */ 584 void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) 585 { 586 unsigned long flags; 587 u32 old_threshold; 588 int force_return = 0; 589 590 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 591 592 old_threshold = (sc->credit_ctrl >> 593 SC(CREDIT_CTRL_THRESHOLD_SHIFT)) 594 & SC(CREDIT_CTRL_THRESHOLD_MASK); 595 596 if (new_threshold != old_threshold) { 597 sc->credit_ctrl = 598 (sc->credit_ctrl 599 & ~SC(CREDIT_CTRL_THRESHOLD_SMASK)) 600 | ((new_threshold 601 & SC(CREDIT_CTRL_THRESHOLD_MASK)) 602 << SC(CREDIT_CTRL_THRESHOLD_SHIFT)); 603 write_kctxt_csr(sc->dd, sc->hw_context, 604 SC(CREDIT_CTRL), sc->credit_ctrl); 605 606 /* force a credit return on change to avoid a possible stall */ 607 force_return = 1; 608 } 609 610 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 611 612 if (force_return) 613 sc_return_credits(sc); 614 } 615 616 /* 617 * set_pio_integrity 618 * 619 * Set the CHECK_ENABLE register for the send context 'sc'. 620 */ 621 void set_pio_integrity(struct send_context *sc) 622 { 623 struct hfi1_devdata *dd = sc->dd; 624 u32 hw_context = sc->hw_context; 625 int type = sc->type; 626 627 write_kctxt_csr(dd, hw_context, 628 SC(CHECK_ENABLE), 629 hfi1_pkt_default_send_ctxt_mask(dd, type)); 630 } 631 632 static u32 get_buffers_allocated(struct send_context *sc) 633 { 634 int cpu; 635 u32 ret = 0; 636 637 for_each_possible_cpu(cpu) 638 ret += *per_cpu_ptr(sc->buffers_allocated, cpu); 639 return ret; 640 } 641 642 static void reset_buffers_allocated(struct send_context *sc) 643 { 644 int cpu; 645 646 for_each_possible_cpu(cpu) 647 (*per_cpu_ptr(sc->buffers_allocated, cpu)) = 0; 648 } 649 650 /* 651 * Allocate a NUMA relative send context structure of the given type along 652 * with a HW context. 653 */ 654 struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, 655 uint hdrqentsize, int numa) 656 { 657 struct send_context_info *sci; 658 struct send_context *sc = NULL; 659 dma_addr_t dma; 660 unsigned long flags; 661 u64 reg; 662 u32 thresh; 663 u32 sw_index; 664 u32 hw_context; 665 int ret; 666 u8 opval, opmask; 667 668 /* do not allocate while frozen */ 669 if (dd->flags & HFI1_FROZEN) 670 return NULL; 671 672 sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa); 673 if (!sc) 674 return NULL; 675 676 sc->buffers_allocated = alloc_percpu(u32); 677 if (!sc->buffers_allocated) { 678 kfree(sc); 679 dd_dev_err(dd, 680 "Cannot allocate buffers_allocated per cpu counters\n" 681 ); 682 return NULL; 683 } 684 685 spin_lock_irqsave(&dd->sc_lock, flags); 686 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); 687 if (ret) { 688 spin_unlock_irqrestore(&dd->sc_lock, flags); 689 free_percpu(sc->buffers_allocated); 690 kfree(sc); 691 return NULL; 692 } 693 694 sci = &dd->send_contexts[sw_index]; 695 sci->sc = sc; 696 697 sc->dd = dd; 698 sc->node = numa; 699 sc->type = type; 700 spin_lock_init(&sc->alloc_lock); 701 spin_lock_init(&sc->release_lock); 702 spin_lock_init(&sc->credit_ctrl_lock); 703 seqlock_init(&sc->waitlock); 704 INIT_LIST_HEAD(&sc->piowait); 705 INIT_WORK(&sc->halt_work, sc_halted); 706 init_waitqueue_head(&sc->halt_wait); 707 708 /* grouping is always single context for now */ 709 sc->group = 0; 710 711 sc->sw_index = sw_index; 712 sc->hw_context = hw_context; 713 cr_group_addresses(sc, &dma); 714 sc->credits = sci->credits; 715 sc->size = sc->credits * PIO_BLOCK_SIZE; 716 717 /* PIO Send Memory Address details */ 718 #define PIO_ADDR_CONTEXT_MASK 0xfful 719 #define PIO_ADDR_CONTEXT_SHIFT 16 720 sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK) 721 << PIO_ADDR_CONTEXT_SHIFT); 722 723 /* set base and credits */ 724 reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK)) 725 << SC(CTRL_CTXT_DEPTH_SHIFT)) 726 | ((sci->base & SC(CTRL_CTXT_BASE_MASK)) 727 << SC(CTRL_CTXT_BASE_SHIFT)); 728 write_kctxt_csr(dd, hw_context, SC(CTRL), reg); 729 730 set_pio_integrity(sc); 731 732 /* unmask all errors */ 733 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1); 734 735 /* set the default partition key */ 736 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 737 (SC(CHECK_PARTITION_KEY_VALUE_MASK) & 738 DEFAULT_PKEY) << 739 SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); 740 741 /* per context type checks */ 742 if (type == SC_USER) { 743 opval = USER_OPCODE_CHECK_VAL; 744 opmask = USER_OPCODE_CHECK_MASK; 745 } else { 746 opval = OPCODE_CHECK_VAL_DISABLED; 747 opmask = OPCODE_CHECK_MASK_DISABLED; 748 } 749 750 /* set the send context check opcode mask and value */ 751 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 752 ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) | 753 ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); 754 755 /* set up credit return */ 756 reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); 757 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); 758 759 /* 760 * Calculate the initial credit return threshold. 761 * 762 * For Ack contexts, set a threshold for half the credits. 763 * For User contexts use the given percentage. This has been 764 * sanitized on driver start-up. 765 * For Kernel contexts, use the default MTU plus a header 766 * or half the credits, whichever is smaller. This should 767 * work for both the 3-deep buffering allocation and the 768 * pooling allocation. 769 */ 770 if (type == SC_ACK) { 771 thresh = sc_percent_to_threshold(sc, 50); 772 } else if (type == SC_USER) { 773 thresh = sc_percent_to_threshold(sc, 774 user_credit_return_threshold); 775 } else { /* kernel */ 776 thresh = min(sc_percent_to_threshold(sc, 50), 777 sc_mtu_to_threshold(sc, hfi1_max_mtu, 778 hdrqentsize)); 779 } 780 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); 781 /* add in early return */ 782 if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN)) 783 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 784 else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN)) /* kernel, ack */ 785 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 786 787 /* set up write-through credit_ctrl */ 788 sc->credit_ctrl = reg; 789 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg); 790 791 /* User send contexts should not allow sending on VL15 */ 792 if (type == SC_USER) { 793 reg = 1ULL << 15; 794 write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg); 795 } 796 797 spin_unlock_irqrestore(&dd->sc_lock, flags); 798 799 /* 800 * Allocate shadow ring to track outstanding PIO buffers _after_ 801 * unlocking. We don't know the size until the lock is held and 802 * we can't allocate while the lock is held. No one is using 803 * the context yet, so allocate it now. 804 * 805 * User contexts do not get a shadow ring. 806 */ 807 if (type != SC_USER) { 808 /* 809 * Size the shadow ring 1 larger than the number of credits 810 * so head == tail can mean empty. 811 */ 812 sc->sr_size = sci->credits + 1; 813 sc->sr = kcalloc_node(sc->sr_size, 814 sizeof(union pio_shadow_ring), 815 GFP_KERNEL, numa); 816 if (!sc->sr) { 817 sc_free(sc); 818 return NULL; 819 } 820 } 821 822 hfi1_cdbg(PIO, 823 "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u", 824 sw_index, 825 hw_context, 826 sc_type_name(type), 827 sc->group, 828 sc->credits, 829 sc->credit_ctrl, 830 thresh); 831 832 return sc; 833 } 834 835 /* free a per-NUMA send context structure */ 836 void sc_free(struct send_context *sc) 837 { 838 struct hfi1_devdata *dd; 839 unsigned long flags; 840 u32 sw_index; 841 u32 hw_context; 842 843 if (!sc) 844 return; 845 846 sc->flags |= SCF_IN_FREE; /* ensure no restarts */ 847 dd = sc->dd; 848 if (!list_empty(&sc->piowait)) 849 dd_dev_err(dd, "piowait list not empty!\n"); 850 sw_index = sc->sw_index; 851 hw_context = sc->hw_context; 852 sc_disable(sc); /* make sure the HW is disabled */ 853 flush_work(&sc->halt_work); 854 855 spin_lock_irqsave(&dd->sc_lock, flags); 856 dd->send_contexts[sw_index].sc = NULL; 857 858 /* clear/disable all registers set in sc_alloc */ 859 write_kctxt_csr(dd, hw_context, SC(CTRL), 0); 860 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0); 861 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0); 862 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0); 863 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0); 864 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0); 865 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0); 866 867 /* release the index and context for re-use */ 868 sc_hw_free(dd, sw_index, hw_context); 869 spin_unlock_irqrestore(&dd->sc_lock, flags); 870 871 kfree(sc->sr); 872 free_percpu(sc->buffers_allocated); 873 kfree(sc); 874 } 875 876 /* disable the context */ 877 void sc_disable(struct send_context *sc) 878 { 879 u64 reg; 880 struct pio_buf *pbuf; 881 LIST_HEAD(wake_list); 882 883 if (!sc) 884 return; 885 886 /* do all steps, even if already disabled */ 887 spin_lock_irq(&sc->alloc_lock); 888 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL)); 889 reg &= ~SC(CTRL_CTXT_ENABLE_SMASK); 890 sc->flags &= ~SCF_ENABLED; 891 sc_wait_for_packet_egress(sc, 1); 892 write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg); 893 894 /* 895 * Flush any waiters. Once the context is disabled, 896 * credit return interrupts are stopped (although there 897 * could be one in-process when the context is disabled). 898 * Wait one microsecond for any lingering interrupts, then 899 * proceed with the flush. 900 */ 901 udelay(1); 902 spin_lock(&sc->release_lock); 903 if (sc->sr) { /* this context has a shadow ring */ 904 while (sc->sr_tail != sc->sr_head) { 905 pbuf = &sc->sr[sc->sr_tail].pbuf; 906 if (pbuf->cb) 907 (*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE); 908 sc->sr_tail++; 909 if (sc->sr_tail >= sc->sr_size) 910 sc->sr_tail = 0; 911 } 912 } 913 spin_unlock(&sc->release_lock); 914 915 write_seqlock(&sc->waitlock); 916 list_splice_init(&sc->piowait, &wake_list); 917 write_sequnlock(&sc->waitlock); 918 while (!list_empty(&wake_list)) { 919 struct iowait *wait; 920 struct rvt_qp *qp; 921 struct hfi1_qp_priv *priv; 922 923 wait = list_first_entry(&wake_list, struct iowait, list); 924 qp = iowait_to_qp(wait); 925 priv = qp->priv; 926 list_del_init(&priv->s_iowait.list); 927 priv->s_iowait.lock = NULL; 928 hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 929 } 930 931 spin_unlock_irq(&sc->alloc_lock); 932 } 933 934 /* return SendEgressCtxtStatus.PacketOccupancy */ 935 static u64 packet_occupancy(u64 reg) 936 { 937 return (reg & 938 SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) 939 >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; 940 } 941 942 /* is egress halted on the context? */ 943 static bool egress_halted(u64 reg) 944 { 945 return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); 946 } 947 948 /* is the send context halted? */ 949 static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) 950 { 951 return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & 952 SC(STATUS_CTXT_HALTED_SMASK)); 953 } 954 955 /** 956 * sc_wait_for_packet_egress - wait for packet 957 * @sc: valid send context 958 * @pause: wait for credit return 959 * 960 * Wait for packet egress, optionally pause for credit return 961 * 962 * Egress halt and Context halt are not necessarily the same thing, so 963 * check for both. 964 * 965 * NOTE: The context halt bit may not be set immediately. Because of this, 966 * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW 967 * context bit to determine if the context is halted. 968 */ 969 static void sc_wait_for_packet_egress(struct send_context *sc, int pause) 970 { 971 struct hfi1_devdata *dd = sc->dd; 972 u64 reg = 0; 973 u64 reg_prev; 974 u32 loop = 0; 975 976 while (1) { 977 reg_prev = reg; 978 reg = read_csr(dd, sc->hw_context * 8 + 979 SEND_EGRESS_CTXT_STATUS); 980 /* done if any halt bits, SW or HW are set */ 981 if (sc->flags & SCF_HALTED || 982 is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) 983 break; 984 reg = packet_occupancy(reg); 985 if (reg == 0) 986 break; 987 /* counter is reset if occupancy count changes */ 988 if (reg != reg_prev) 989 loop = 0; 990 if (loop > 50000) { 991 /* timed out - bounce the link */ 992 dd_dev_err(dd, 993 "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", 994 __func__, sc->sw_index, 995 sc->hw_context, (u32)reg); 996 queue_work(dd->pport->link_wq, 997 &dd->pport->link_bounce_work); 998 break; 999 } 1000 loop++; 1001 udelay(1); 1002 } 1003 1004 if (pause) 1005 /* Add additional delay to ensure chip returns all credits */ 1006 pause_for_credit_return(dd); 1007 } 1008 1009 void sc_wait(struct hfi1_devdata *dd) 1010 { 1011 int i; 1012 1013 for (i = 0; i < dd->num_send_contexts; i++) { 1014 struct send_context *sc = dd->send_contexts[i].sc; 1015 1016 if (!sc) 1017 continue; 1018 sc_wait_for_packet_egress(sc, 0); 1019 } 1020 } 1021 1022 /* 1023 * Restart a context after it has been halted due to error. 1024 * 1025 * If the first step fails - wait for the halt to be asserted, return early. 1026 * Otherwise complain about timeouts but keep going. 1027 * 1028 * It is expected that allocations (enabled flag bit) have been shut off 1029 * already (only applies to kernel contexts). 1030 */ 1031 int sc_restart(struct send_context *sc) 1032 { 1033 struct hfi1_devdata *dd = sc->dd; 1034 u64 reg; 1035 u32 loop; 1036 int count; 1037 1038 /* bounce off if not halted, or being free'd */ 1039 if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE)) 1040 return -EINVAL; 1041 1042 dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index, 1043 sc->hw_context); 1044 1045 /* 1046 * Step 1: Wait for the context to actually halt. 1047 * 1048 * The error interrupt is asynchronous to actually setting halt 1049 * on the context. 1050 */ 1051 loop = 0; 1052 while (1) { 1053 reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS)); 1054 if (reg & SC(STATUS_CTXT_HALTED_SMASK)) 1055 break; 1056 if (loop > 100) { 1057 dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n", 1058 __func__, sc->sw_index, sc->hw_context); 1059 return -ETIME; 1060 } 1061 loop++; 1062 udelay(1); 1063 } 1064 1065 /* 1066 * Step 2: Ensure no users are still trying to write to PIO. 1067 * 1068 * For kernel contexts, we have already turned off buffer allocation. 1069 * Now wait for the buffer count to go to zero. 1070 * 1071 * For user contexts, the user handling code has cut off write access 1072 * to the context's PIO pages before calling this routine and will 1073 * restore write access after this routine returns. 1074 */ 1075 if (sc->type != SC_USER) { 1076 /* kernel context */ 1077 loop = 0; 1078 while (1) { 1079 count = get_buffers_allocated(sc); 1080 if (count == 0) 1081 break; 1082 if (loop > 100) { 1083 dd_dev_err(dd, 1084 "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n", 1085 __func__, sc->sw_index, 1086 sc->hw_context, count); 1087 } 1088 loop++; 1089 udelay(1); 1090 } 1091 } 1092 1093 /* 1094 * Step 3: Wait for all packets to egress. 1095 * This is done while disabling the send context 1096 * 1097 * Step 4: Disable the context 1098 * 1099 * This is a superset of the halt. After the disable, the 1100 * errors can be cleared. 1101 */ 1102 sc_disable(sc); 1103 1104 /* 1105 * Step 5: Enable the context 1106 * 1107 * This enable will clear the halted flag and per-send context 1108 * error flags. 1109 */ 1110 return sc_enable(sc); 1111 } 1112 1113 /* 1114 * PIO freeze processing. To be called after the TXE block is fully frozen. 1115 * Go through all frozen send contexts and disable them. The contexts are 1116 * already stopped by the freeze. 1117 */ 1118 void pio_freeze(struct hfi1_devdata *dd) 1119 { 1120 struct send_context *sc; 1121 int i; 1122 1123 for (i = 0; i < dd->num_send_contexts; i++) { 1124 sc = dd->send_contexts[i].sc; 1125 /* 1126 * Don't disable unallocated, unfrozen, or user send contexts. 1127 * User send contexts will be disabled when the process 1128 * calls into the driver to reset its context. 1129 */ 1130 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1131 continue; 1132 1133 /* only need to disable, the context is already stopped */ 1134 sc_disable(sc); 1135 } 1136 } 1137 1138 /* 1139 * Unfreeze PIO for kernel send contexts. The precondition for calling this 1140 * is that all PIO send contexts have been disabled and the SPC freeze has 1141 * been cleared. Now perform the last step and re-enable each kernel context. 1142 * User (PSM) processing will occur when PSM calls into the kernel to 1143 * acknowledge the freeze. 1144 */ 1145 void pio_kernel_unfreeze(struct hfi1_devdata *dd) 1146 { 1147 struct send_context *sc; 1148 int i; 1149 1150 for (i = 0; i < dd->num_send_contexts; i++) { 1151 sc = dd->send_contexts[i].sc; 1152 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1153 continue; 1154 if (sc->flags & SCF_LINK_DOWN) 1155 continue; 1156 1157 sc_enable(sc); /* will clear the sc frozen flag */ 1158 } 1159 } 1160 1161 /** 1162 * pio_kernel_linkup() - Re-enable send contexts after linkup event 1163 * @dd: valid devive data 1164 * 1165 * When the link goes down, the freeze path is taken. However, a link down 1166 * event is different from a freeze because if the send context is re-enabled 1167 * whowever is sending data will start sending data again, which will hang 1168 * any QP that is sending data. 1169 * 1170 * The freeze path now looks at the type of event that occurs and takes this 1171 * path for link down event. 1172 */ 1173 void pio_kernel_linkup(struct hfi1_devdata *dd) 1174 { 1175 struct send_context *sc; 1176 int i; 1177 1178 for (i = 0; i < dd->num_send_contexts; i++) { 1179 sc = dd->send_contexts[i].sc; 1180 if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER) 1181 continue; 1182 1183 sc_enable(sc); /* will clear the sc link down flag */ 1184 } 1185 } 1186 1187 /* 1188 * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear. 1189 * Returns: 1190 * -ETIMEDOUT - if we wait too long 1191 * -EIO - if there was an error 1192 */ 1193 static int pio_init_wait_progress(struct hfi1_devdata *dd) 1194 { 1195 u64 reg; 1196 int max, count = 0; 1197 1198 /* max is the longest possible HW init time / delay */ 1199 max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5; 1200 while (1) { 1201 reg = read_csr(dd, SEND_PIO_INIT_CTXT); 1202 if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK)) 1203 break; 1204 if (count >= max) 1205 return -ETIMEDOUT; 1206 udelay(5); 1207 count++; 1208 } 1209 1210 return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0; 1211 } 1212 1213 /* 1214 * Reset all of the send contexts to their power-on state. Used 1215 * only during manual init - no lock against sc_enable needed. 1216 */ 1217 void pio_reset_all(struct hfi1_devdata *dd) 1218 { 1219 int ret; 1220 1221 /* make sure the init engine is not busy */ 1222 ret = pio_init_wait_progress(dd); 1223 /* ignore any timeout */ 1224 if (ret == -EIO) { 1225 /* clear the error */ 1226 write_csr(dd, SEND_PIO_ERR_CLEAR, 1227 SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK); 1228 } 1229 1230 /* reset init all */ 1231 write_csr(dd, SEND_PIO_INIT_CTXT, 1232 SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK); 1233 udelay(2); 1234 ret = pio_init_wait_progress(dd); 1235 if (ret < 0) { 1236 dd_dev_err(dd, 1237 "PIO send context init %s while initializing all PIO blocks\n", 1238 ret == -ETIMEDOUT ? "is stuck" : "had an error"); 1239 } 1240 } 1241 1242 /* enable the context */ 1243 int sc_enable(struct send_context *sc) 1244 { 1245 u64 sc_ctrl, reg, pio; 1246 struct hfi1_devdata *dd; 1247 unsigned long flags; 1248 int ret = 0; 1249 1250 if (!sc) 1251 return -EINVAL; 1252 dd = sc->dd; 1253 1254 /* 1255 * Obtain the allocator lock to guard against any allocation 1256 * attempts (which should not happen prior to context being 1257 * enabled). On the release/disable side we don't need to 1258 * worry about locking since the releaser will not do anything 1259 * if the context accounting values have not changed. 1260 */ 1261 spin_lock_irqsave(&sc->alloc_lock, flags); 1262 sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1263 if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK))) 1264 goto unlock; /* already enabled */ 1265 1266 /* IMPORTANT: only clear free and fill if transitioning 0 -> 1 */ 1267 1268 *sc->hw_free = 0; 1269 sc->free = 0; 1270 sc->alloc_free = 0; 1271 sc->fill = 0; 1272 sc->fill_wrap = 0; 1273 sc->sr_head = 0; 1274 sc->sr_tail = 0; 1275 sc->flags = 0; 1276 /* the alloc lock insures no fast path allocation */ 1277 reset_buffers_allocated(sc); 1278 1279 /* 1280 * Clear all per-context errors. Some of these will be set when 1281 * we are re-enabling after a context halt. Now that the context 1282 * is disabled, the halt will not clear until after the PIO init 1283 * engine runs below. 1284 */ 1285 reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS)); 1286 if (reg) 1287 write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg); 1288 1289 /* 1290 * The HW PIO initialization engine can handle only one init 1291 * request at a time. Serialize access to each device's engine. 1292 */ 1293 spin_lock(&dd->sc_init_lock); 1294 /* 1295 * Since access to this code block is serialized and 1296 * each access waits for the initialization to complete 1297 * before releasing the lock, the PIO initialization engine 1298 * should not be in use, so we don't have to wait for the 1299 * InProgress bit to go down. 1300 */ 1301 pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) << 1302 SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) | 1303 SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK; 1304 write_csr(dd, SEND_PIO_INIT_CTXT, pio); 1305 /* 1306 * Wait until the engine is done. Give the chip the required time 1307 * so, hopefully, we read the register just once. 1308 */ 1309 udelay(2); 1310 ret = pio_init_wait_progress(dd); 1311 spin_unlock(&dd->sc_init_lock); 1312 if (ret) { 1313 dd_dev_err(dd, 1314 "sctxt%u(%u): Context not enabled due to init failure %d\n", 1315 sc->sw_index, sc->hw_context, ret); 1316 goto unlock; 1317 } 1318 1319 /* 1320 * All is well. Enable the context. 1321 */ 1322 sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK); 1323 write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl); 1324 /* 1325 * Read SendCtxtCtrl to force the write out and prevent a timing 1326 * hazard where a PIO write may reach the context before the enable. 1327 */ 1328 read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1329 sc->flags |= SCF_ENABLED; 1330 1331 unlock: 1332 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1333 1334 return ret; 1335 } 1336 1337 /* force a credit return on the context */ 1338 void sc_return_credits(struct send_context *sc) 1339 { 1340 if (!sc) 1341 return; 1342 1343 /* a 0->1 transition schedules a credit return */ 1344 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 1345 SC(CREDIT_FORCE_FORCE_RETURN_SMASK)); 1346 /* 1347 * Ensure that the write is flushed and the credit return is 1348 * scheduled. We care more about the 0 -> 1 transition. 1349 */ 1350 read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE)); 1351 /* set back to 0 for next time */ 1352 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0); 1353 } 1354 1355 /* allow all in-flight packets to drain on the context */ 1356 void sc_flush(struct send_context *sc) 1357 { 1358 if (!sc) 1359 return; 1360 1361 sc_wait_for_packet_egress(sc, 1); 1362 } 1363 1364 /* drop all packets on the context, no waiting until they are sent */ 1365 void sc_drop(struct send_context *sc) 1366 { 1367 if (!sc) 1368 return; 1369 1370 dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n", 1371 __func__, sc->sw_index, sc->hw_context); 1372 } 1373 1374 /* 1375 * Start the software reaction to a context halt or SPC freeze: 1376 * - mark the context as halted or frozen 1377 * - stop buffer allocations 1378 * 1379 * Called from the error interrupt. Other work is deferred until 1380 * out of the interrupt. 1381 */ 1382 void sc_stop(struct send_context *sc, int flag) 1383 { 1384 unsigned long flags; 1385 1386 /* stop buffer allocations */ 1387 spin_lock_irqsave(&sc->alloc_lock, flags); 1388 /* mark the context */ 1389 sc->flags |= flag; 1390 sc->flags &= ~SCF_ENABLED; 1391 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1392 wake_up(&sc->halt_wait); 1393 } 1394 1395 #define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32)) 1396 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS) 1397 1398 /* 1399 * The send context buffer "allocator". 1400 * 1401 * @sc: the PIO send context we are allocating from 1402 * @len: length of whole packet - including PBC - in dwords 1403 * @cb: optional callback to call when the buffer is finished sending 1404 * @arg: argument for cb 1405 * 1406 * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM 1407 * when link is down. 1408 */ 1409 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, 1410 pio_release_cb cb, void *arg) 1411 { 1412 struct pio_buf *pbuf = NULL; 1413 unsigned long flags; 1414 unsigned long avail; 1415 unsigned long blocks = dwords_to_blocks(dw_len); 1416 u32 fill_wrap; 1417 int trycount = 0; 1418 u32 head, next; 1419 1420 spin_lock_irqsave(&sc->alloc_lock, flags); 1421 if (!(sc->flags & SCF_ENABLED)) { 1422 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1423 return ERR_PTR(-ECOMM); 1424 } 1425 1426 retry: 1427 avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free); 1428 if (blocks > avail) { 1429 /* not enough room */ 1430 if (unlikely(trycount)) { /* already tried to get more room */ 1431 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1432 goto done; 1433 } 1434 /* copy from receiver cache line and recalculate */ 1435 sc->alloc_free = READ_ONCE(sc->free); 1436 avail = 1437 (unsigned long)sc->credits - 1438 (sc->fill - sc->alloc_free); 1439 if (blocks > avail) { 1440 /* still no room, actively update */ 1441 sc_release_update(sc); 1442 sc->alloc_free = READ_ONCE(sc->free); 1443 trycount++; 1444 goto retry; 1445 } 1446 } 1447 1448 /* there is enough room */ 1449 1450 preempt_disable(); 1451 this_cpu_inc(*sc->buffers_allocated); 1452 1453 /* read this once */ 1454 head = sc->sr_head; 1455 1456 /* "allocate" the buffer */ 1457 sc->fill += blocks; 1458 fill_wrap = sc->fill_wrap; 1459 sc->fill_wrap += blocks; 1460 if (sc->fill_wrap >= sc->credits) 1461 sc->fill_wrap = sc->fill_wrap - sc->credits; 1462 1463 /* 1464 * Fill the parts that the releaser looks at before moving the head. 1465 * The only necessary piece is the sent_at field. The credits 1466 * we have just allocated cannot have been returned yet, so the 1467 * cb and arg will not be looked at for a "while". Put them 1468 * on this side of the memory barrier anyway. 1469 */ 1470 pbuf = &sc->sr[head].pbuf; 1471 pbuf->sent_at = sc->fill; 1472 pbuf->cb = cb; 1473 pbuf->arg = arg; 1474 pbuf->sc = sc; /* could be filled in at sc->sr init time */ 1475 /* make sure this is in memory before updating the head */ 1476 1477 /* calculate next head index, do not store */ 1478 next = head + 1; 1479 if (next >= sc->sr_size) 1480 next = 0; 1481 /* 1482 * update the head - must be last! - the releaser can look at fields 1483 * in pbuf once we move the head 1484 */ 1485 smp_wmb(); 1486 sc->sr_head = next; 1487 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1488 1489 /* finish filling in the buffer outside the lock */ 1490 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; 1491 pbuf->end = sc->base_addr + sc->size; 1492 pbuf->qw_written = 0; 1493 pbuf->carry_bytes = 0; 1494 pbuf->carry.val64 = 0; 1495 done: 1496 return pbuf; 1497 } 1498 1499 /* 1500 * There are at least two entities that can turn on credit return 1501 * interrupts and they can overlap. Avoid problems by implementing 1502 * a count scheme that is enforced by a lock. The lock is needed because 1503 * the count and CSR write must be paired. 1504 */ 1505 1506 /* 1507 * Start credit return interrupts. This is managed by a count. If already 1508 * on, just increment the count. 1509 */ 1510 void sc_add_credit_return_intr(struct send_context *sc) 1511 { 1512 unsigned long flags; 1513 1514 /* lock must surround both the count change and the CSR update */ 1515 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1516 if (sc->credit_intr_count == 0) { 1517 sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1518 write_kctxt_csr(sc->dd, sc->hw_context, 1519 SC(CREDIT_CTRL), sc->credit_ctrl); 1520 } 1521 sc->credit_intr_count++; 1522 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1523 } 1524 1525 /* 1526 * Stop credit return interrupts. This is managed by a count. Decrement the 1527 * count, if the last user, then turn the credit interrupts off. 1528 */ 1529 void sc_del_credit_return_intr(struct send_context *sc) 1530 { 1531 unsigned long flags; 1532 1533 WARN_ON(sc->credit_intr_count == 0); 1534 1535 /* lock must surround both the count change and the CSR update */ 1536 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1537 sc->credit_intr_count--; 1538 if (sc->credit_intr_count == 0) { 1539 sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1540 write_kctxt_csr(sc->dd, sc->hw_context, 1541 SC(CREDIT_CTRL), sc->credit_ctrl); 1542 } 1543 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1544 } 1545 1546 /* 1547 * The caller must be careful when calling this. All needint calls 1548 * must be paired with !needint. 1549 */ 1550 void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) 1551 { 1552 if (needint) 1553 sc_add_credit_return_intr(sc); 1554 else 1555 sc_del_credit_return_intr(sc); 1556 trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); 1557 if (needint) 1558 sc_return_credits(sc); 1559 } 1560 1561 /** 1562 * sc_piobufavail - callback when a PIO buffer is available 1563 * @sc: the send context 1564 * 1565 * This is called from the interrupt handler when a PIO buffer is 1566 * available after hfi1_verbs_send() returned an error that no buffers were 1567 * available. Disable the interrupt if there are no more QPs waiting. 1568 */ 1569 static void sc_piobufavail(struct send_context *sc) 1570 { 1571 struct hfi1_devdata *dd = sc->dd; 1572 struct list_head *list; 1573 struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; 1574 struct rvt_qp *qp; 1575 struct hfi1_qp_priv *priv; 1576 unsigned long flags; 1577 uint i, n = 0, top_idx = 0; 1578 1579 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && 1580 dd->send_contexts[sc->sw_index].type != SC_VL15) 1581 return; 1582 list = &sc->piowait; 1583 /* 1584 * Note: checking that the piowait list is empty and clearing 1585 * the buffer available interrupt needs to be atomic or we 1586 * could end up with QPs on the wait list with the interrupt 1587 * disabled. 1588 */ 1589 write_seqlock_irqsave(&sc->waitlock, flags); 1590 while (!list_empty(list)) { 1591 struct iowait *wait; 1592 1593 if (n == ARRAY_SIZE(qps)) 1594 break; 1595 wait = list_first_entry(list, struct iowait, list); 1596 iowait_get_priority(wait); 1597 qp = iowait_to_qp(wait); 1598 priv = qp->priv; 1599 list_del_init(&priv->s_iowait.list); 1600 priv->s_iowait.lock = NULL; 1601 if (n) { 1602 priv = qps[top_idx]->priv; 1603 top_idx = iowait_priority_update_top(wait, 1604 &priv->s_iowait, 1605 n, top_idx); 1606 } 1607 1608 /* refcount held until actual wake up */ 1609 qps[n++] = qp; 1610 } 1611 /* 1612 * If there had been waiters and there are more 1613 * insure that we redo the force to avoid a potential hang. 1614 */ 1615 if (n) { 1616 hfi1_sc_wantpiobuf_intr(sc, 0); 1617 if (!list_empty(list)) 1618 hfi1_sc_wantpiobuf_intr(sc, 1); 1619 } 1620 write_sequnlock_irqrestore(&sc->waitlock, flags); 1621 1622 /* Wake up the top-priority one first */ 1623 if (n) 1624 hfi1_qp_wakeup(qps[top_idx], 1625 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1626 for (i = 0; i < n; i++) 1627 if (i != top_idx) 1628 hfi1_qp_wakeup(qps[i], 1629 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1630 } 1631 1632 /* translate a send credit update to a bit code of reasons */ 1633 static inline int fill_code(u64 hw_free) 1634 { 1635 int code = 0; 1636 1637 if (hw_free & CR_STATUS_SMASK) 1638 code |= PRC_STATUS_ERR; 1639 if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK) 1640 code |= PRC_PBC; 1641 if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK) 1642 code |= PRC_THRESHOLD; 1643 if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK) 1644 code |= PRC_FILL_ERR; 1645 if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK) 1646 code |= PRC_SC_DISABLE; 1647 return code; 1648 } 1649 1650 /* use the jiffies compare to get the wrap right */ 1651 #define sent_before(a, b) time_before(a, b) /* a < b */ 1652 1653 /* 1654 * The send context buffer "releaser". 1655 */ 1656 void sc_release_update(struct send_context *sc) 1657 { 1658 struct pio_buf *pbuf; 1659 u64 hw_free; 1660 u32 head, tail; 1661 unsigned long old_free; 1662 unsigned long free; 1663 unsigned long extra; 1664 unsigned long flags; 1665 int code; 1666 1667 if (!sc) 1668 return; 1669 1670 spin_lock_irqsave(&sc->release_lock, flags); 1671 /* update free */ 1672 hw_free = le64_to_cpu(*sc->hw_free); /* volatile read */ 1673 old_free = sc->free; 1674 extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT) 1675 - (old_free & CR_COUNTER_MASK)) 1676 & CR_COUNTER_MASK; 1677 free = old_free + extra; 1678 trace_hfi1_piofree(sc, extra); 1679 1680 /* call sent buffer callbacks */ 1681 code = -1; /* code not yet set */ 1682 head = READ_ONCE(sc->sr_head); /* snapshot the head */ 1683 tail = sc->sr_tail; 1684 while (head != tail) { 1685 pbuf = &sc->sr[tail].pbuf; 1686 1687 if (sent_before(free, pbuf->sent_at)) { 1688 /* not sent yet */ 1689 break; 1690 } 1691 if (pbuf->cb) { 1692 if (code < 0) /* fill in code on first user */ 1693 code = fill_code(hw_free); 1694 (*pbuf->cb)(pbuf->arg, code); 1695 } 1696 1697 tail++; 1698 if (tail >= sc->sr_size) 1699 tail = 0; 1700 } 1701 sc->sr_tail = tail; 1702 /* make sure tail is updated before free */ 1703 smp_wmb(); 1704 sc->free = free; 1705 spin_unlock_irqrestore(&sc->release_lock, flags); 1706 sc_piobufavail(sc); 1707 } 1708 1709 /* 1710 * Send context group releaser. Argument is the send context that caused 1711 * the interrupt. Called from the send context interrupt handler. 1712 * 1713 * Call release on all contexts in the group. 1714 * 1715 * This routine takes the sc_lock without an irqsave because it is only 1716 * called from an interrupt handler. Adjust if that changes. 1717 */ 1718 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context) 1719 { 1720 struct send_context *sc; 1721 u32 sw_index; 1722 u32 gc, gc_end; 1723 1724 spin_lock(&dd->sc_lock); 1725 sw_index = dd->hw_to_sw[hw_context]; 1726 if (unlikely(sw_index >= dd->num_send_contexts)) { 1727 dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n", 1728 __func__, hw_context, sw_index); 1729 goto done; 1730 } 1731 sc = dd->send_contexts[sw_index].sc; 1732 if (unlikely(!sc)) 1733 goto done; 1734 1735 gc = group_context(hw_context, sc->group); 1736 gc_end = gc + group_size(sc->group); 1737 for (; gc < gc_end; gc++) { 1738 sw_index = dd->hw_to_sw[gc]; 1739 if (unlikely(sw_index >= dd->num_send_contexts)) { 1740 dd_dev_err(dd, 1741 "%s: invalid hw (%u) to sw (%u) mapping\n", 1742 __func__, hw_context, sw_index); 1743 continue; 1744 } 1745 sc_release_update(dd->send_contexts[sw_index].sc); 1746 } 1747 done: 1748 spin_unlock(&dd->sc_lock); 1749 } 1750 1751 /* 1752 * pio_select_send_context_vl() - select send context 1753 * @dd: devdata 1754 * @selector: a spreading factor 1755 * @vl: this vl 1756 * 1757 * This function returns a send context based on the selector and a vl. 1758 * The mapping fields are protected by RCU 1759 */ 1760 struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd, 1761 u32 selector, u8 vl) 1762 { 1763 struct pio_vl_map *m; 1764 struct pio_map_elem *e; 1765 struct send_context *rval; 1766 1767 /* 1768 * NOTE This should only happen if SC->VL changed after the initial 1769 * checks on the QP/AH 1770 * Default will return VL0's send context below 1771 */ 1772 if (unlikely(vl >= num_vls)) { 1773 rval = NULL; 1774 goto done; 1775 } 1776 1777 rcu_read_lock(); 1778 m = rcu_dereference(dd->pio_map); 1779 if (unlikely(!m)) { 1780 rcu_read_unlock(); 1781 return dd->vld[0].sc; 1782 } 1783 e = m->map[vl & m->mask]; 1784 rval = e->ksc[selector & e->mask]; 1785 rcu_read_unlock(); 1786 1787 done: 1788 rval = !rval ? dd->vld[0].sc : rval; 1789 return rval; 1790 } 1791 1792 /* 1793 * pio_select_send_context_sc() - select send context 1794 * @dd: devdata 1795 * @selector: a spreading factor 1796 * @sc5: the 5 bit sc 1797 * 1798 * This function returns an send context based on the selector and an sc 1799 */ 1800 struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd, 1801 u32 selector, u8 sc5) 1802 { 1803 u8 vl = sc_to_vlt(dd, sc5); 1804 1805 return pio_select_send_context_vl(dd, selector, vl); 1806 } 1807 1808 /* 1809 * Free the indicated map struct 1810 */ 1811 static void pio_map_free(struct pio_vl_map *m) 1812 { 1813 int i; 1814 1815 for (i = 0; m && i < m->actual_vls; i++) 1816 kfree(m->map[i]); 1817 kfree(m); 1818 } 1819 1820 /* 1821 * Handle RCU callback 1822 */ 1823 static void pio_map_rcu_callback(struct rcu_head *list) 1824 { 1825 struct pio_vl_map *m = container_of(list, struct pio_vl_map, list); 1826 1827 pio_map_free(m); 1828 } 1829 1830 /* 1831 * Set credit return threshold for the kernel send context 1832 */ 1833 static void set_threshold(struct hfi1_devdata *dd, int scontext, int i) 1834 { 1835 u32 thres; 1836 1837 thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext], 1838 50), 1839 sc_mtu_to_threshold(dd->kernel_send_context[scontext], 1840 dd->vld[i].mtu, 1841 dd->rcd[0]->rcvhdrqentsize)); 1842 sc_set_cr_threshold(dd->kernel_send_context[scontext], thres); 1843 } 1844 1845 /* 1846 * pio_map_init - called when #vls change 1847 * @dd: hfi1_devdata 1848 * @port: port number 1849 * @num_vls: number of vls 1850 * @vl_scontexts: per vl send context mapping (optional) 1851 * 1852 * This routine changes the mapping based on the number of vls. 1853 * 1854 * vl_scontexts is used to specify a non-uniform vl/send context 1855 * loading. NULL implies auto computing the loading and giving each 1856 * VL an uniform distribution of send contexts per VL. 1857 * 1858 * The auto algorithm computers the sc_per_vl and the number of extra 1859 * send contexts. Any extra send contexts are added from the last VL 1860 * on down 1861 * 1862 * rcu locking is used here to control access to the mapping fields. 1863 * 1864 * If either the num_vls or num_send_contexts are non-power of 2, the 1865 * array sizes in the struct pio_vl_map and the struct pio_map_elem are 1866 * rounded up to the next highest power of 2 and the first entry is 1867 * reused in a round robin fashion. 1868 * 1869 * If an error occurs the map change is not done and the mapping is not 1870 * chaged. 1871 * 1872 */ 1873 int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) 1874 { 1875 int i, j; 1876 int extra, sc_per_vl; 1877 int scontext = 1; 1878 int num_kernel_send_contexts = 0; 1879 u8 lvl_scontexts[OPA_MAX_VLS]; 1880 struct pio_vl_map *oldmap, *newmap; 1881 1882 if (!vl_scontexts) { 1883 for (i = 0; i < dd->num_send_contexts; i++) 1884 if (dd->send_contexts[i].type == SC_KERNEL) 1885 num_kernel_send_contexts++; 1886 /* truncate divide */ 1887 sc_per_vl = num_kernel_send_contexts / num_vls; 1888 /* extras */ 1889 extra = num_kernel_send_contexts % num_vls; 1890 vl_scontexts = lvl_scontexts; 1891 /* add extras from last vl down */ 1892 for (i = num_vls - 1; i >= 0; i--, extra--) 1893 vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); 1894 } 1895 /* build new map */ 1896 newmap = kzalloc(struct_size(newmap, map, roundup_pow_of_two(num_vls)), 1897 GFP_KERNEL); 1898 if (!newmap) 1899 goto bail; 1900 newmap->actual_vls = num_vls; 1901 newmap->vls = roundup_pow_of_two(num_vls); 1902 newmap->mask = (1 << ilog2(newmap->vls)) - 1; 1903 for (i = 0; i < newmap->vls; i++) { 1904 /* save for wrap around */ 1905 int first_scontext = scontext; 1906 1907 if (i < newmap->actual_vls) { 1908 int sz = roundup_pow_of_two(vl_scontexts[i]); 1909 1910 /* only allocate once */ 1911 newmap->map[i] = kzalloc(struct_size(newmap->map[i], 1912 ksc, sz), 1913 GFP_KERNEL); 1914 if (!newmap->map[i]) 1915 goto bail; 1916 newmap->map[i]->mask = (1 << ilog2(sz)) - 1; 1917 /* 1918 * assign send contexts and 1919 * adjust credit return threshold 1920 */ 1921 for (j = 0; j < sz; j++) { 1922 if (dd->kernel_send_context[scontext]) { 1923 newmap->map[i]->ksc[j] = 1924 dd->kernel_send_context[scontext]; 1925 set_threshold(dd, scontext, i); 1926 } 1927 if (++scontext >= first_scontext + 1928 vl_scontexts[i]) 1929 /* wrap back to first send context */ 1930 scontext = first_scontext; 1931 } 1932 } else { 1933 /* just re-use entry without allocating */ 1934 newmap->map[i] = newmap->map[i % num_vls]; 1935 } 1936 scontext = first_scontext + vl_scontexts[i]; 1937 } 1938 /* newmap in hand, save old map */ 1939 spin_lock_irq(&dd->pio_map_lock); 1940 oldmap = rcu_dereference_protected(dd->pio_map, 1941 lockdep_is_held(&dd->pio_map_lock)); 1942 1943 /* publish newmap */ 1944 rcu_assign_pointer(dd->pio_map, newmap); 1945 1946 spin_unlock_irq(&dd->pio_map_lock); 1947 /* success, free any old map after grace period */ 1948 if (oldmap) 1949 call_rcu(&oldmap->list, pio_map_rcu_callback); 1950 return 0; 1951 bail: 1952 /* free any partial allocation */ 1953 pio_map_free(newmap); 1954 return -ENOMEM; 1955 } 1956 1957 void free_pio_map(struct hfi1_devdata *dd) 1958 { 1959 /* Free PIO map if allocated */ 1960 if (rcu_access_pointer(dd->pio_map)) { 1961 spin_lock_irq(&dd->pio_map_lock); 1962 pio_map_free(rcu_access_pointer(dd->pio_map)); 1963 RCU_INIT_POINTER(dd->pio_map, NULL); 1964 spin_unlock_irq(&dd->pio_map_lock); 1965 synchronize_rcu(); 1966 } 1967 kfree(dd->kernel_send_context); 1968 dd->kernel_send_context = NULL; 1969 } 1970 1971 int init_pervl_scs(struct hfi1_devdata *dd) 1972 { 1973 int i; 1974 u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */ 1975 u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */ 1976 u32 ctxt; 1977 struct hfi1_pportdata *ppd = dd->pport; 1978 1979 dd->vld[15].sc = sc_alloc(dd, SC_VL15, 1980 dd->rcd[0]->rcvhdrqentsize, dd->node); 1981 if (!dd->vld[15].sc) 1982 return -ENOMEM; 1983 1984 hfi1_init_ctxt(dd->vld[15].sc); 1985 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); 1986 1987 dd->kernel_send_context = kcalloc_node(dd->num_send_contexts, 1988 sizeof(struct send_context *), 1989 GFP_KERNEL, dd->node); 1990 if (!dd->kernel_send_context) 1991 goto freesc15; 1992 1993 dd->kernel_send_context[0] = dd->vld[15].sc; 1994 1995 for (i = 0; i < num_vls; i++) { 1996 /* 1997 * Since this function does not deal with a specific 1998 * receive context but we need the RcvHdrQ entry size, 1999 * use the size from rcd[0]. It is guaranteed to be 2000 * valid at this point and will remain the same for all 2001 * receive contexts. 2002 */ 2003 dd->vld[i].sc = sc_alloc(dd, SC_KERNEL, 2004 dd->rcd[0]->rcvhdrqentsize, dd->node); 2005 if (!dd->vld[i].sc) 2006 goto nomem; 2007 dd->kernel_send_context[i + 1] = dd->vld[i].sc; 2008 hfi1_init_ctxt(dd->vld[i].sc); 2009 /* non VL15 start with the max MTU */ 2010 dd->vld[i].mtu = hfi1_max_mtu; 2011 } 2012 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2013 dd->kernel_send_context[i + 1] = 2014 sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node); 2015 if (!dd->kernel_send_context[i + 1]) 2016 goto nomem; 2017 hfi1_init_ctxt(dd->kernel_send_context[i + 1]); 2018 } 2019 2020 sc_enable(dd->vld[15].sc); 2021 ctxt = dd->vld[15].sc->hw_context; 2022 mask = all_vl_mask & ~(1LL << 15); 2023 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2024 dd_dev_info(dd, 2025 "Using send context %u(%u) for VL15\n", 2026 dd->vld[15].sc->sw_index, ctxt); 2027 2028 for (i = 0; i < num_vls; i++) { 2029 sc_enable(dd->vld[i].sc); 2030 ctxt = dd->vld[i].sc->hw_context; 2031 mask = all_vl_mask & ~(data_vls_mask); 2032 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2033 } 2034 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2035 sc_enable(dd->kernel_send_context[i + 1]); 2036 ctxt = dd->kernel_send_context[i + 1]->hw_context; 2037 mask = all_vl_mask & ~(data_vls_mask); 2038 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2039 } 2040 2041 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) 2042 goto nomem; 2043 return 0; 2044 2045 nomem: 2046 for (i = 0; i < num_vls; i++) { 2047 sc_free(dd->vld[i].sc); 2048 dd->vld[i].sc = NULL; 2049 } 2050 2051 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) 2052 sc_free(dd->kernel_send_context[i + 1]); 2053 2054 kfree(dd->kernel_send_context); 2055 dd->kernel_send_context = NULL; 2056 2057 freesc15: 2058 sc_free(dd->vld[15].sc); 2059 return -ENOMEM; 2060 } 2061 2062 int init_credit_return(struct hfi1_devdata *dd) 2063 { 2064 int ret; 2065 int i; 2066 2067 dd->cr_base = kcalloc( 2068 node_affinity.num_possible_nodes, 2069 sizeof(struct credit_return_base), 2070 GFP_KERNEL); 2071 if (!dd->cr_base) { 2072 ret = -ENOMEM; 2073 goto done; 2074 } 2075 for_each_node_with_cpus(i) { 2076 int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); 2077 2078 set_dev_node(&dd->pcidev->dev, i); 2079 dd->cr_base[i].va = dma_alloc_coherent(&dd->pcidev->dev, 2080 bytes, 2081 &dd->cr_base[i].dma, 2082 GFP_KERNEL); 2083 if (!dd->cr_base[i].va) { 2084 set_dev_node(&dd->pcidev->dev, dd->node); 2085 dd_dev_err(dd, 2086 "Unable to allocate credit return DMA range for NUMA %d\n", 2087 i); 2088 ret = -ENOMEM; 2089 goto done; 2090 } 2091 } 2092 set_dev_node(&dd->pcidev->dev, dd->node); 2093 2094 ret = 0; 2095 done: 2096 return ret; 2097 } 2098 2099 void free_credit_return(struct hfi1_devdata *dd) 2100 { 2101 int i; 2102 2103 if (!dd->cr_base) 2104 return; 2105 for (i = 0; i < node_affinity.num_possible_nodes; i++) { 2106 if (dd->cr_base[i].va) { 2107 dma_free_coherent(&dd->pcidev->dev, 2108 TXE_NUM_CONTEXTS * 2109 sizeof(struct credit_return), 2110 dd->cr_base[i].va, 2111 dd->cr_base[i].dma); 2112 } 2113 } 2114 kfree(dd->cr_base); 2115 dd->cr_base = NULL; 2116 } 2117 2118 void seqfile_dump_sci(struct seq_file *s, u32 i, 2119 struct send_context_info *sci) 2120 { 2121 struct send_context *sc = sci->sc; 2122 u64 reg; 2123 2124 seq_printf(s, "SCI %u: type %u base %u credits %u\n", 2125 i, sci->type, sci->base, sci->credits); 2126 seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", 2127 sc->flags, sc->sw_index, sc->hw_context, sc->group); 2128 seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", 2129 sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); 2130 seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", 2131 sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); 2132 seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", 2133 sc->credit_intr_count, sc->credit_ctrl); 2134 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); 2135 seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", 2136 (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> 2137 CR_COUNTER_SHIFT, 2138 (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & 2139 SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), 2140 reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); 2141 } 2142