1 /* 2 * Copyright(c) 2015-2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/spinlock.h> 49 #include <linux/pci.h> 50 #include <linux/io.h> 51 #include <linux/delay.h> 52 #include <linux/netdevice.h> 53 #include <linux/vmalloc.h> 54 #include <linux/module.h> 55 #include <linux/prefetch.h> 56 #include <rdma/ib_verbs.h> 57 58 #include "hfi.h" 59 #include "trace.h" 60 #include "qp.h" 61 #include "sdma.h" 62 #include "debugfs.h" 63 #include "vnic.h" 64 65 #undef pr_fmt 66 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 67 68 /* 69 * The size has to be longer than this string, so we can append 70 * board/chip information to it in the initialization code. 71 */ 72 const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; 73 74 DEFINE_SPINLOCK(hfi1_devs_lock); 75 LIST_HEAD(hfi1_dev_list); 76 DEFINE_MUTEX(hfi1_mutex); /* general driver use */ 77 78 unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; 79 module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO); 80 MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify( 81 HFI1_DEFAULT_MAX_MTU)); 82 83 unsigned int hfi1_cu = 1; 84 module_param_named(cu, hfi1_cu, uint, S_IRUGO); 85 MODULE_PARM_DESC(cu, "Credit return units"); 86 87 unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT; 88 static int hfi1_caps_set(const char *val, const struct kernel_param *kp); 89 static int hfi1_caps_get(char *buffer, const struct kernel_param *kp); 90 static const struct kernel_param_ops cap_ops = { 91 .set = hfi1_caps_set, 92 .get = hfi1_caps_get 93 }; 94 module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO); 95 MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); 96 97 MODULE_LICENSE("Dual BSD/GPL"); 98 MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); 99 100 /* 101 * MAX_PKT_RCV is the max # if packets processed per receive interrupt. 102 */ 103 #define MAX_PKT_RECV 64 104 /* 105 * MAX_PKT_THREAD_RCV is the max # of packets processed before 106 * the qp_wait_list queue is flushed. 107 */ 108 #define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4) 109 #define EGR_HEAD_UPDATE_THRESHOLD 16 110 111 struct hfi1_ib_stats hfi1_stats; 112 113 static int hfi1_caps_set(const char *val, const struct kernel_param *kp) 114 { 115 int ret = 0; 116 unsigned long *cap_mask_ptr = (unsigned long *)kp->arg, 117 cap_mask = *cap_mask_ptr, value, diff, 118 write_mask = ((HFI1_CAP_WRITABLE_MASK << HFI1_CAP_USER_SHIFT) | 119 HFI1_CAP_WRITABLE_MASK); 120 121 ret = kstrtoul(val, 0, &value); 122 if (ret) { 123 pr_warn("Invalid module parameter value for 'cap_mask'\n"); 124 goto done; 125 } 126 /* Get the changed bits (except the locked bit) */ 127 diff = value ^ (cap_mask & ~HFI1_CAP_LOCKED_SMASK); 128 129 /* Remove any bits that are not allowed to change after driver load */ 130 if (HFI1_CAP_LOCKED() && (diff & ~write_mask)) { 131 pr_warn("Ignoring non-writable capability bits %#lx\n", 132 diff & ~write_mask); 133 diff &= write_mask; 134 } 135 136 /* Mask off any reserved bits */ 137 diff &= ~HFI1_CAP_RESERVED_MASK; 138 /* Clear any previously set and changing bits */ 139 cap_mask &= ~diff; 140 /* Update the bits with the new capability */ 141 cap_mask |= (value & diff); 142 /* Check for any kernel/user restrictions */ 143 diff = (cap_mask & (HFI1_CAP_MUST_HAVE_KERN << HFI1_CAP_USER_SHIFT)) ^ 144 ((cap_mask & HFI1_CAP_MUST_HAVE_KERN) << HFI1_CAP_USER_SHIFT); 145 cap_mask &= ~diff; 146 /* Set the bitmask to the final set */ 147 *cap_mask_ptr = cap_mask; 148 done: 149 return ret; 150 } 151 152 static int hfi1_caps_get(char *buffer, const struct kernel_param *kp) 153 { 154 unsigned long cap_mask = *(unsigned long *)kp->arg; 155 156 cap_mask &= ~HFI1_CAP_LOCKED_SMASK; 157 cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT); 158 159 return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask); 160 } 161 162 struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi) 163 { 164 struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi); 165 struct hfi1_devdata *dd = container_of(ibdev, 166 struct hfi1_devdata, verbs_dev); 167 return dd->pcidev; 168 } 169 170 /* 171 * Return count of units with at least one port ACTIVE. 172 */ 173 int hfi1_count_active_units(void) 174 { 175 struct hfi1_devdata *dd; 176 struct hfi1_pportdata *ppd; 177 unsigned long flags; 178 int pidx, nunits_active = 0; 179 180 spin_lock_irqsave(&hfi1_devs_lock, flags); 181 list_for_each_entry(dd, &hfi1_dev_list, list) { 182 if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) 183 continue; 184 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 185 ppd = dd->pport + pidx; 186 if (ppd->lid && ppd->linkup) { 187 nunits_active++; 188 break; 189 } 190 } 191 } 192 spin_unlock_irqrestore(&hfi1_devs_lock, flags); 193 return nunits_active; 194 } 195 196 /* 197 * Get address of eager buffer from it's index (allocated in chunks, not 198 * contiguous). 199 */ 200 static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, 201 u8 *update) 202 { 203 u32 idx = rhf_egr_index(rhf), offset = rhf_egr_buf_offset(rhf); 204 205 *update |= !(idx & (rcd->egrbufs.threshold - 1)) && !offset; 206 return (void *)(((u64)(rcd->egrbufs.rcvtids[idx].addr)) + 207 (offset * RCV_BUF_BLOCK_SIZE)); 208 } 209 210 static inline void *hfi1_get_header(struct hfi1_devdata *dd, 211 __le32 *rhf_addr) 212 { 213 u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); 214 215 return (void *)(rhf_addr - dd->rhf_offset + offset); 216 } 217 218 static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, 219 __le32 *rhf_addr) 220 { 221 return (struct ib_header *)hfi1_get_header(dd, rhf_addr); 222 } 223 224 static inline struct hfi1_16b_header 225 *hfi1_get_16B_header(struct hfi1_devdata *dd, 226 __le32 *rhf_addr) 227 { 228 return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); 229 } 230 231 /* 232 * Validate and encode the a given RcvArray Buffer size. 233 * The function will check whether the given size falls within 234 * allowed size ranges for the respective type and, optionally, 235 * return the proper encoding. 236 */ 237 int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded) 238 { 239 if (unlikely(!PAGE_ALIGNED(size))) 240 return 0; 241 if (unlikely(size < MIN_EAGER_BUFFER)) 242 return 0; 243 if (size > 244 (type == PT_EAGER ? MAX_EAGER_BUFFER : MAX_EXPECTED_BUFFER)) 245 return 0; 246 if (encoded) 247 *encoded = ilog2(size / PAGE_SIZE) + 1; 248 return 1; 249 } 250 251 static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, 252 struct hfi1_packet *packet) 253 { 254 struct ib_header *rhdr = packet->hdr; 255 u32 rte = rhf_rcv_type_err(packet->rhf); 256 u32 mlid_base; 257 struct hfi1_ibport *ibp = rcd_to_iport(rcd); 258 struct hfi1_devdata *dd = ppd->dd; 259 struct hfi1_ibdev *verbs_dev = &dd->verbs_dev; 260 struct rvt_dev_info *rdi = &verbs_dev->rdi; 261 262 if ((packet->rhf & RHF_DC_ERR) && 263 hfi1_dbg_fault_suppress_err(verbs_dev)) 264 return; 265 266 if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) 267 return; 268 269 if (packet->etype == RHF_RCV_TYPE_BYPASS) { 270 goto drop; 271 } else { 272 u8 lnh = ib_get_lnh(rhdr); 273 274 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); 275 if (lnh == HFI1_LRH_BTH) { 276 packet->ohdr = &rhdr->u.oth; 277 } else if (lnh == HFI1_LRH_GRH) { 278 packet->ohdr = &rhdr->u.l.oth; 279 packet->grh = &rhdr->u.l.grh; 280 } else { 281 goto drop; 282 } 283 } 284 285 if (packet->rhf & RHF_TID_ERR) { 286 /* For TIDERR and RC QPs preemptively schedule a NAK */ 287 u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ 288 u32 dlid = ib_get_dlid(rhdr); 289 u32 qp_num; 290 291 /* Sanity check packet */ 292 if (tlen < 24) 293 goto drop; 294 295 /* Check for GRH */ 296 if (packet->grh) { 297 u32 vtf; 298 struct ib_grh *grh = packet->grh; 299 300 if (grh->next_hdr != IB_GRH_NEXT_HDR) 301 goto drop; 302 vtf = be32_to_cpu(grh->version_tclass_flow); 303 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) 304 goto drop; 305 } 306 307 /* Get the destination QP number. */ 308 qp_num = ib_bth_get_qpn(packet->ohdr); 309 if (dlid < mlid_base) { 310 struct rvt_qp *qp; 311 unsigned long flags; 312 313 rcu_read_lock(); 314 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); 315 if (!qp) { 316 rcu_read_unlock(); 317 goto drop; 318 } 319 320 /* 321 * Handle only RC QPs - for other QP types drop error 322 * packet. 323 */ 324 spin_lock_irqsave(&qp->r_lock, flags); 325 326 /* Check for valid receive state. */ 327 if (!(ib_rvt_state_ops[qp->state] & 328 RVT_PROCESS_RECV_OK)) { 329 ibp->rvp.n_pkt_drops++; 330 } 331 332 switch (qp->ibqp.qp_type) { 333 case IB_QPT_RC: 334 hfi1_rc_hdrerr(rcd, packet, qp); 335 break; 336 default: 337 /* For now don't handle any other QP types */ 338 break; 339 } 340 341 spin_unlock_irqrestore(&qp->r_lock, flags); 342 rcu_read_unlock(); 343 } /* Unicast QP */ 344 } /* Valid packet with TIDErr */ 345 346 /* handle "RcvTypeErr" flags */ 347 switch (rte) { 348 case RHF_RTE_ERROR_OP_CODE_ERR: 349 { 350 void *ebuf = NULL; 351 u8 opcode; 352 353 if (rhf_use_egr_bfr(packet->rhf)) 354 ebuf = packet->ebuf; 355 356 if (!ebuf) 357 goto drop; /* this should never happen */ 358 359 opcode = ib_bth_get_opcode(packet->ohdr); 360 if (opcode == IB_OPCODE_CNP) { 361 /* 362 * Only in pre-B0 h/w is the CNP_OPCODE handled 363 * via this code path. 364 */ 365 struct rvt_qp *qp = NULL; 366 u32 lqpn, rqpn; 367 u16 rlid; 368 u8 svc_type, sl, sc5; 369 370 sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); 371 sl = ibp->sc_to_sl[sc5]; 372 373 lqpn = ib_bth_get_qpn(packet->ohdr); 374 rcu_read_lock(); 375 qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); 376 if (!qp) { 377 rcu_read_unlock(); 378 goto drop; 379 } 380 381 switch (qp->ibqp.qp_type) { 382 case IB_QPT_UD: 383 rlid = 0; 384 rqpn = 0; 385 svc_type = IB_CC_SVCTYPE_UD; 386 break; 387 case IB_QPT_UC: 388 rlid = ib_get_slid(rhdr); 389 rqpn = qp->remote_qpn; 390 svc_type = IB_CC_SVCTYPE_UC; 391 break; 392 default: 393 rcu_read_unlock(); 394 goto drop; 395 } 396 397 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); 398 rcu_read_unlock(); 399 } 400 401 packet->rhf &= ~RHF_RCV_TYPE_ERR_SMASK; 402 break; 403 } 404 default: 405 break; 406 } 407 408 drop: 409 return; 410 } 411 412 static inline void init_packet(struct hfi1_ctxtdata *rcd, 413 struct hfi1_packet *packet) 414 { 415 packet->rsize = rcd->rcvhdrqentsize; /* words */ 416 packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */ 417 packet->rcd = rcd; 418 packet->updegr = 0; 419 packet->etail = -1; 420 packet->rhf_addr = get_rhf_addr(rcd); 421 packet->rhf = rhf_to_cpu(packet->rhf_addr); 422 packet->rhqoff = rcd->head; 423 packet->numpkt = 0; 424 } 425 426 /* We support only two types - 9B and 16B for now */ 427 static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { 428 [HFI1_PKT_TYPE_9B] = &return_cnp, 429 [HFI1_PKT_TYPE_16B] = &return_cnp_16B 430 }; 431 432 void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, 433 bool do_cnp) 434 { 435 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 436 struct ib_other_headers *ohdr = pkt->ohdr; 437 struct ib_grh *grh = pkt->grh; 438 u32 rqpn = 0, bth1; 439 u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr); 440 u8 hdr_type, sc, svc_type; 441 bool is_mcast = false; 442 443 if (pkt->etype == RHF_RCV_TYPE_BYPASS) { 444 is_mcast = hfi1_is_16B_mcast(dlid); 445 pkey = hfi1_16B_get_pkey(pkt->hdr); 446 sc = hfi1_16B_get_sc(pkt->hdr); 447 hdr_type = HFI1_PKT_TYPE_16B; 448 } else { 449 is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && 450 (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); 451 pkey = ib_bth_get_pkey(ohdr); 452 sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); 453 hdr_type = HFI1_PKT_TYPE_9B; 454 } 455 456 switch (qp->ibqp.qp_type) { 457 case IB_QPT_SMI: 458 case IB_QPT_GSI: 459 case IB_QPT_UD: 460 rlid = ib_get_slid(pkt->hdr); 461 rqpn = ib_get_sqpn(pkt->ohdr); 462 svc_type = IB_CC_SVCTYPE_UD; 463 break; 464 case IB_QPT_UC: 465 rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); 466 rqpn = qp->remote_qpn; 467 svc_type = IB_CC_SVCTYPE_UC; 468 break; 469 case IB_QPT_RC: 470 rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); 471 rqpn = qp->remote_qpn; 472 svc_type = IB_CC_SVCTYPE_RC; 473 break; 474 default: 475 return; 476 } 477 478 bth1 = be32_to_cpu(ohdr->bth[1]); 479 /* Call appropriate CNP handler */ 480 if (do_cnp && (bth1 & IB_FECN_SMASK)) 481 hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, 482 dlid, rlid, sc, grh); 483 484 if (!is_mcast && (bth1 & IB_BECN_SMASK)) { 485 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 486 u32 lqpn = bth1 & RVT_QPN_MASK; 487 u8 sl = ibp->sc_to_sl[sc]; 488 489 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); 490 } 491 492 } 493 494 struct ps_mdata { 495 struct hfi1_ctxtdata *rcd; 496 u32 rsize; 497 u32 maxcnt; 498 u32 ps_head; 499 u32 ps_tail; 500 u32 ps_seq; 501 }; 502 503 static inline void init_ps_mdata(struct ps_mdata *mdata, 504 struct hfi1_packet *packet) 505 { 506 struct hfi1_ctxtdata *rcd = packet->rcd; 507 508 mdata->rcd = rcd; 509 mdata->rsize = packet->rsize; 510 mdata->maxcnt = packet->maxcnt; 511 mdata->ps_head = packet->rhqoff; 512 513 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { 514 mdata->ps_tail = get_rcvhdrtail(rcd); 515 if (rcd->ctxt == HFI1_CTRL_CTXT) 516 mdata->ps_seq = rcd->seq_cnt; 517 else 518 mdata->ps_seq = 0; /* not used with DMA_RTAIL */ 519 } else { 520 mdata->ps_tail = 0; /* used only with DMA_RTAIL*/ 521 mdata->ps_seq = rcd->seq_cnt; 522 } 523 } 524 525 static inline int ps_done(struct ps_mdata *mdata, u64 rhf, 526 struct hfi1_ctxtdata *rcd) 527 { 528 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) 529 return mdata->ps_head == mdata->ps_tail; 530 return mdata->ps_seq != rhf_rcv_seq(rhf); 531 } 532 533 static inline int ps_skip(struct ps_mdata *mdata, u64 rhf, 534 struct hfi1_ctxtdata *rcd) 535 { 536 /* 537 * Control context can potentially receive an invalid rhf. 538 * Drop such packets. 539 */ 540 if ((rcd->ctxt == HFI1_CTRL_CTXT) && (mdata->ps_head != mdata->ps_tail)) 541 return mdata->ps_seq != rhf_rcv_seq(rhf); 542 543 return 0; 544 } 545 546 static inline void update_ps_mdata(struct ps_mdata *mdata, 547 struct hfi1_ctxtdata *rcd) 548 { 549 mdata->ps_head += mdata->rsize; 550 if (mdata->ps_head >= mdata->maxcnt) 551 mdata->ps_head = 0; 552 553 /* Control context must do seq counting */ 554 if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) || 555 (rcd->ctxt == HFI1_CTRL_CTXT)) { 556 if (++mdata->ps_seq > 13) 557 mdata->ps_seq = 1; 558 } 559 } 560 561 /* 562 * prescan_rxq - search through the receive queue looking for packets 563 * containing Excplicit Congestion Notifications (FECNs, or BECNs). 564 * When an ECN is found, process the Congestion Notification, and toggle 565 * it off. 566 * This is declared as a macro to allow quick checking of the port to avoid 567 * the overhead of a function call if not enabled. 568 */ 569 #define prescan_rxq(rcd, packet) \ 570 do { \ 571 if (rcd->ppd->cc_prescan) \ 572 __prescan_rxq(packet); \ 573 } while (0) 574 static void __prescan_rxq(struct hfi1_packet *packet) 575 { 576 struct hfi1_ctxtdata *rcd = packet->rcd; 577 struct ps_mdata mdata; 578 579 init_ps_mdata(&mdata, packet); 580 581 while (1) { 582 struct hfi1_devdata *dd = rcd->dd; 583 struct hfi1_ibport *ibp = rcd_to_iport(rcd); 584 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + 585 dd->rhf_offset; 586 struct rvt_qp *qp; 587 struct ib_header *hdr; 588 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 589 u64 rhf = rhf_to_cpu(rhf_addr); 590 u32 etype = rhf_rcv_type(rhf), qpn, bth1; 591 int is_ecn = 0; 592 u8 lnh; 593 594 if (ps_done(&mdata, rhf, rcd)) 595 break; 596 597 if (ps_skip(&mdata, rhf, rcd)) 598 goto next; 599 600 if (etype != RHF_RCV_TYPE_IB) 601 goto next; 602 603 packet->hdr = hfi1_get_msgheader(dd, rhf_addr); 604 hdr = packet->hdr; 605 lnh = ib_get_lnh(hdr); 606 607 if (lnh == HFI1_LRH_BTH) { 608 packet->ohdr = &hdr->u.oth; 609 packet->grh = NULL; 610 } else if (lnh == HFI1_LRH_GRH) { 611 packet->ohdr = &hdr->u.l.oth; 612 packet->grh = &hdr->u.l.grh; 613 } else { 614 goto next; /* just in case */ 615 } 616 617 bth1 = be32_to_cpu(packet->ohdr->bth[1]); 618 is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); 619 620 if (!is_ecn) 621 goto next; 622 623 qpn = bth1 & RVT_QPN_MASK; 624 rcu_read_lock(); 625 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); 626 627 if (!qp) { 628 rcu_read_unlock(); 629 goto next; 630 } 631 632 process_ecn(qp, packet, true); 633 rcu_read_unlock(); 634 635 /* turn off BECN, FECN */ 636 bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK); 637 packet->ohdr->bth[1] = cpu_to_be32(bth1); 638 next: 639 update_ps_mdata(&mdata, rcd); 640 } 641 } 642 643 static void process_rcv_qp_work(struct hfi1_packet *packet) 644 { 645 struct rvt_qp *qp, *nqp; 646 struct hfi1_ctxtdata *rcd = packet->rcd; 647 648 /* 649 * Iterate over all QPs waiting to respond. 650 * The list won't change since the IRQ is only run on one CPU. 651 */ 652 list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { 653 list_del_init(&qp->rspwait); 654 if (qp->r_flags & RVT_R_RSP_NAK) { 655 qp->r_flags &= ~RVT_R_RSP_NAK; 656 packet->qp = qp; 657 hfi1_send_rc_ack(packet, 0); 658 } 659 if (qp->r_flags & RVT_R_RSP_SEND) { 660 unsigned long flags; 661 662 qp->r_flags &= ~RVT_R_RSP_SEND; 663 spin_lock_irqsave(&qp->s_lock, flags); 664 if (ib_rvt_state_ops[qp->state] & 665 RVT_PROCESS_OR_FLUSH_SEND) 666 hfi1_schedule_send(qp); 667 spin_unlock_irqrestore(&qp->s_lock, flags); 668 } 669 rvt_put_qp(qp); 670 } 671 } 672 673 static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread) 674 { 675 if (thread) { 676 if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0) 677 /* allow defered processing */ 678 process_rcv_qp_work(packet); 679 cond_resched(); 680 return RCV_PKT_OK; 681 } else { 682 this_cpu_inc(*packet->rcd->dd->rcv_limit); 683 return RCV_PKT_LIMIT; 684 } 685 } 686 687 static inline int check_max_packet(struct hfi1_packet *packet, int thread) 688 { 689 int ret = RCV_PKT_OK; 690 691 if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) 692 ret = max_packet_exceeded(packet, thread); 693 return ret; 694 } 695 696 static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) 697 { 698 int ret; 699 700 /* Set up for the next packet */ 701 packet->rhqoff += packet->rsize; 702 if (packet->rhqoff >= packet->maxcnt) 703 packet->rhqoff = 0; 704 705 packet->numpkt++; 706 ret = check_max_packet(packet, thread); 707 708 packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + 709 packet->rcd->dd->rhf_offset; 710 packet->rhf = rhf_to_cpu(packet->rhf_addr); 711 712 return ret; 713 } 714 715 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) 716 { 717 int ret; 718 719 packet->etype = rhf_rcv_type(packet->rhf); 720 721 /* total length */ 722 packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ 723 /* retrieve eager buffer details */ 724 packet->ebuf = NULL; 725 if (rhf_use_egr_bfr(packet->rhf)) { 726 packet->etail = rhf_egr_index(packet->rhf); 727 packet->ebuf = get_egrbuf(packet->rcd, packet->rhf, 728 &packet->updegr); 729 /* 730 * Prefetch the contents of the eager buffer. It is 731 * OK to send a negative length to prefetch_range(). 732 * The +2 is the size of the RHF. 733 */ 734 prefetch_range(packet->ebuf, 735 packet->tlen - ((packet->rcd->rcvhdrqentsize - 736 (rhf_hdrq_offset(packet->rhf) 737 + 2)) * 4)); 738 } 739 740 /* 741 * Call a type specific handler for the packet. We 742 * should be able to trust that etype won't be beyond 743 * the range of valid indexes. If so something is really 744 * wrong and we can probably just let things come 745 * crashing down. There is no need to eat another 746 * comparison in this performance critical code. 747 */ 748 packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet); 749 packet->numpkt++; 750 751 /* Set up for the next packet */ 752 packet->rhqoff += packet->rsize; 753 if (packet->rhqoff >= packet->maxcnt) 754 packet->rhqoff = 0; 755 756 ret = check_max_packet(packet, thread); 757 758 packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + 759 packet->rcd->dd->rhf_offset; 760 packet->rhf = rhf_to_cpu(packet->rhf_addr); 761 762 return ret; 763 } 764 765 static inline void process_rcv_update(int last, struct hfi1_packet *packet) 766 { 767 /* 768 * Update head regs etc., every 16 packets, if not last pkt, 769 * to help prevent rcvhdrq overflows, when many packets 770 * are processed and queue is nearly full. 771 * Don't request an interrupt for intermediate updates. 772 */ 773 if (!last && !(packet->numpkt & 0xf)) { 774 update_usrhead(packet->rcd, packet->rhqoff, packet->updegr, 775 packet->etail, 0, 0); 776 packet->updegr = 0; 777 } 778 packet->grh = NULL; 779 } 780 781 static inline void finish_packet(struct hfi1_packet *packet) 782 { 783 /* 784 * Nothing we need to free for the packet. 785 * 786 * The only thing we need to do is a final update and call for an 787 * interrupt 788 */ 789 update_usrhead(packet->rcd, packet->rcd->head, packet->updegr, 790 packet->etail, rcv_intr_dynamic, packet->numpkt); 791 } 792 793 /* 794 * Handle receive interrupts when using the no dma rtail option. 795 */ 796 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) 797 { 798 u32 seq; 799 int last = RCV_PKT_OK; 800 struct hfi1_packet packet; 801 802 init_packet(rcd, &packet); 803 seq = rhf_rcv_seq(packet.rhf); 804 if (seq != rcd->seq_cnt) { 805 last = RCV_PKT_DONE; 806 goto bail; 807 } 808 809 prescan_rxq(rcd, &packet); 810 811 while (last == RCV_PKT_OK) { 812 last = process_rcv_packet(&packet, thread); 813 seq = rhf_rcv_seq(packet.rhf); 814 if (++rcd->seq_cnt > 13) 815 rcd->seq_cnt = 1; 816 if (seq != rcd->seq_cnt) 817 last = RCV_PKT_DONE; 818 process_rcv_update(last, &packet); 819 } 820 process_rcv_qp_work(&packet); 821 rcd->head = packet.rhqoff; 822 bail: 823 finish_packet(&packet); 824 return last; 825 } 826 827 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) 828 { 829 u32 hdrqtail; 830 int last = RCV_PKT_OK; 831 struct hfi1_packet packet; 832 833 init_packet(rcd, &packet); 834 hdrqtail = get_rcvhdrtail(rcd); 835 if (packet.rhqoff == hdrqtail) { 836 last = RCV_PKT_DONE; 837 goto bail; 838 } 839 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ 840 841 prescan_rxq(rcd, &packet); 842 843 while (last == RCV_PKT_OK) { 844 last = process_rcv_packet(&packet, thread); 845 if (packet.rhqoff == hdrqtail) 846 last = RCV_PKT_DONE; 847 process_rcv_update(last, &packet); 848 } 849 process_rcv_qp_work(&packet); 850 rcd->head = packet.rhqoff; 851 bail: 852 finish_packet(&packet); 853 return last; 854 } 855 856 static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) 857 { 858 struct hfi1_ctxtdata *rcd; 859 u16 i; 860 861 /* 862 * For dynamically allocated kernel contexts (like vnic) switch 863 * interrupt handler only for that context. Otherwise, switch 864 * interrupt handler for all statically allocated kernel contexts. 865 */ 866 if (ctxt >= dd->first_dyn_alloc_ctxt) { 867 rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); 868 if (rcd) { 869 rcd->do_interrupt = 870 &handle_receive_interrupt_nodma_rtail; 871 hfi1_rcd_put(rcd); 872 } 873 return; 874 } 875 876 for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { 877 rcd = hfi1_rcd_get_by_index(dd, i); 878 if (rcd) 879 rcd->do_interrupt = 880 &handle_receive_interrupt_nodma_rtail; 881 hfi1_rcd_put(rcd); 882 } 883 } 884 885 static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) 886 { 887 struct hfi1_ctxtdata *rcd; 888 u16 i; 889 890 /* 891 * For dynamically allocated kernel contexts (like vnic) switch 892 * interrupt handler only for that context. Otherwise, switch 893 * interrupt handler for all statically allocated kernel contexts. 894 */ 895 if (ctxt >= dd->first_dyn_alloc_ctxt) { 896 rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); 897 if (rcd) { 898 rcd->do_interrupt = 899 &handle_receive_interrupt_dma_rtail; 900 hfi1_rcd_put(rcd); 901 } 902 return; 903 } 904 905 for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { 906 rcd = hfi1_rcd_get_by_index(dd, i); 907 if (rcd) 908 rcd->do_interrupt = 909 &handle_receive_interrupt_dma_rtail; 910 hfi1_rcd_put(rcd); 911 } 912 } 913 914 void set_all_slowpath(struct hfi1_devdata *dd) 915 { 916 struct hfi1_ctxtdata *rcd; 917 u16 i; 918 919 /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ 920 for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { 921 rcd = hfi1_rcd_get_by_index(dd, i); 922 if (!rcd) 923 continue; 924 if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic) 925 rcd->do_interrupt = &handle_receive_interrupt; 926 927 hfi1_rcd_put(rcd); 928 } 929 } 930 931 static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, 932 struct hfi1_packet *packet, 933 struct hfi1_devdata *dd) 934 { 935 struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; 936 u8 etype = rhf_rcv_type(packet->rhf); 937 u8 sc = SC15_PACKET; 938 939 if (etype == RHF_RCV_TYPE_IB) { 940 struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, 941 packet->rhf_addr); 942 sc = hfi1_9B_get_sc5(hdr, packet->rhf); 943 } else if (etype == RHF_RCV_TYPE_BYPASS) { 944 struct hfi1_16b_header *hdr = hfi1_get_16B_header( 945 packet->rcd->dd, 946 packet->rhf_addr); 947 sc = hfi1_16B_get_sc(hdr); 948 } 949 if (sc != SC15_PACKET) { 950 int hwstate = driver_lstate(rcd->ppd); 951 952 if (hwstate != IB_PORT_ACTIVE) { 953 dd_dev_info(dd, 954 "Unexpected link state %s\n", 955 opa_lstate_name(hwstate)); 956 return 0; 957 } 958 959 queue_work(rcd->ppd->link_wq, lsaw); 960 return 1; 961 } 962 return 0; 963 } 964 965 /* 966 * handle_receive_interrupt - receive a packet 967 * @rcd: the context 968 * 969 * Called from interrupt handler for errors or receive interrupt. 970 * This is the slow path interrupt handler. 971 */ 972 int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) 973 { 974 struct hfi1_devdata *dd = rcd->dd; 975 u32 hdrqtail; 976 int needset, last = RCV_PKT_OK; 977 struct hfi1_packet packet; 978 int skip_pkt = 0; 979 980 /* Control context will always use the slow path interrupt handler */ 981 needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1; 982 983 init_packet(rcd, &packet); 984 985 if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { 986 u32 seq = rhf_rcv_seq(packet.rhf); 987 988 if (seq != rcd->seq_cnt) { 989 last = RCV_PKT_DONE; 990 goto bail; 991 } 992 hdrqtail = 0; 993 } else { 994 hdrqtail = get_rcvhdrtail(rcd); 995 if (packet.rhqoff == hdrqtail) { 996 last = RCV_PKT_DONE; 997 goto bail; 998 } 999 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ 1000 1001 /* 1002 * Control context can potentially receive an invalid 1003 * rhf. Drop such packets. 1004 */ 1005 if (rcd->ctxt == HFI1_CTRL_CTXT) { 1006 u32 seq = rhf_rcv_seq(packet.rhf); 1007 1008 if (seq != rcd->seq_cnt) 1009 skip_pkt = 1; 1010 } 1011 } 1012 1013 prescan_rxq(rcd, &packet); 1014 1015 while (last == RCV_PKT_OK) { 1016 if (unlikely(dd->do_drop && 1017 atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) == 1018 DROP_PACKET_ON)) { 1019 dd->do_drop = 0; 1020 1021 /* On to the next packet */ 1022 packet.rhqoff += packet.rsize; 1023 packet.rhf_addr = (__le32 *)rcd->rcvhdrq + 1024 packet.rhqoff + 1025 dd->rhf_offset; 1026 packet.rhf = rhf_to_cpu(packet.rhf_addr); 1027 1028 } else if (skip_pkt) { 1029 last = skip_rcv_packet(&packet, thread); 1030 skip_pkt = 0; 1031 } else { 1032 /* Auto activate link on non-SC15 packet receive */ 1033 if (unlikely(rcd->ppd->host_link_state == 1034 HLS_UP_ARMED) && 1035 set_armed_to_active(rcd, &packet, dd)) 1036 goto bail; 1037 last = process_rcv_packet(&packet, thread); 1038 } 1039 1040 if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { 1041 u32 seq = rhf_rcv_seq(packet.rhf); 1042 1043 if (++rcd->seq_cnt > 13) 1044 rcd->seq_cnt = 1; 1045 if (seq != rcd->seq_cnt) 1046 last = RCV_PKT_DONE; 1047 if (needset) { 1048 dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); 1049 set_nodma_rtail(dd, rcd->ctxt); 1050 needset = 0; 1051 } 1052 } else { 1053 if (packet.rhqoff == hdrqtail) 1054 last = RCV_PKT_DONE; 1055 /* 1056 * Control context can potentially receive an invalid 1057 * rhf. Drop such packets. 1058 */ 1059 if (rcd->ctxt == HFI1_CTRL_CTXT) { 1060 u32 seq = rhf_rcv_seq(packet.rhf); 1061 1062 if (++rcd->seq_cnt > 13) 1063 rcd->seq_cnt = 1; 1064 if (!last && (seq != rcd->seq_cnt)) 1065 skip_pkt = 1; 1066 } 1067 1068 if (needset) { 1069 dd_dev_info(dd, 1070 "Switching to DMA_RTAIL\n"); 1071 set_dma_rtail(dd, rcd->ctxt); 1072 needset = 0; 1073 } 1074 } 1075 1076 process_rcv_update(last, &packet); 1077 } 1078 1079 process_rcv_qp_work(&packet); 1080 rcd->head = packet.rhqoff; 1081 1082 bail: 1083 /* 1084 * Always write head at end, and setup rcv interrupt, even 1085 * if no packets were processed. 1086 */ 1087 finish_packet(&packet); 1088 return last; 1089 } 1090 1091 /* 1092 * We may discover in the interrupt that the hardware link state has 1093 * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet), 1094 * and we need to update the driver's notion of the link state. We cannot 1095 * run set_link_state from interrupt context, so we queue this function on 1096 * a workqueue. 1097 * 1098 * We delay the regular interrupt processing until after the state changes 1099 * so that the link will be in the correct state by the time any application 1100 * we wake up attempts to send a reply to any message it received. 1101 * (Subsequent receive interrupts may possibly force the wakeup before we 1102 * update the link state.) 1103 * 1104 * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes 1105 * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues, 1106 * so we're safe from use-after-free of the rcd. 1107 */ 1108 void receive_interrupt_work(struct work_struct *work) 1109 { 1110 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, 1111 linkstate_active_work); 1112 struct hfi1_devdata *dd = ppd->dd; 1113 struct hfi1_ctxtdata *rcd; 1114 u16 i; 1115 1116 /* Received non-SC15 packet implies neighbor_normal */ 1117 ppd->neighbor_normal = 1; 1118 set_link_state(ppd, HLS_UP_ACTIVE); 1119 1120 /* 1121 * Interrupt all statically allocated kernel contexts that could 1122 * have had an interrupt during auto activation. 1123 */ 1124 for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) { 1125 rcd = hfi1_rcd_get_by_index(dd, i); 1126 if (rcd) 1127 force_recv_intr(rcd); 1128 hfi1_rcd_put(rcd); 1129 } 1130 } 1131 1132 /* 1133 * Convert a given MTU size to the on-wire MAD packet enumeration. 1134 * Return -1 if the size is invalid. 1135 */ 1136 int mtu_to_enum(u32 mtu, int default_if_bad) 1137 { 1138 switch (mtu) { 1139 case 0: return OPA_MTU_0; 1140 case 256: return OPA_MTU_256; 1141 case 512: return OPA_MTU_512; 1142 case 1024: return OPA_MTU_1024; 1143 case 2048: return OPA_MTU_2048; 1144 case 4096: return OPA_MTU_4096; 1145 case 8192: return OPA_MTU_8192; 1146 case 10240: return OPA_MTU_10240; 1147 } 1148 return default_if_bad; 1149 } 1150 1151 u16 enum_to_mtu(int mtu) 1152 { 1153 switch (mtu) { 1154 case OPA_MTU_0: return 0; 1155 case OPA_MTU_256: return 256; 1156 case OPA_MTU_512: return 512; 1157 case OPA_MTU_1024: return 1024; 1158 case OPA_MTU_2048: return 2048; 1159 case OPA_MTU_4096: return 4096; 1160 case OPA_MTU_8192: return 8192; 1161 case OPA_MTU_10240: return 10240; 1162 default: return 0xffff; 1163 } 1164 } 1165 1166 /* 1167 * set_mtu - set the MTU 1168 * @ppd: the per port data 1169 * 1170 * We can handle "any" incoming size, the issue here is whether we 1171 * need to restrict our outgoing size. We do not deal with what happens 1172 * to programs that are already running when the size changes. 1173 */ 1174 int set_mtu(struct hfi1_pportdata *ppd) 1175 { 1176 struct hfi1_devdata *dd = ppd->dd; 1177 int i, drain, ret = 0, is_up = 0; 1178 1179 ppd->ibmtu = 0; 1180 for (i = 0; i < ppd->vls_supported; i++) 1181 if (ppd->ibmtu < dd->vld[i].mtu) 1182 ppd->ibmtu = dd->vld[i].mtu; 1183 ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd); 1184 1185 mutex_lock(&ppd->hls_lock); 1186 if (ppd->host_link_state == HLS_UP_INIT || 1187 ppd->host_link_state == HLS_UP_ARMED || 1188 ppd->host_link_state == HLS_UP_ACTIVE) 1189 is_up = 1; 1190 1191 drain = !is_ax(dd) && is_up; 1192 1193 if (drain) 1194 /* 1195 * MTU is specified per-VL. To ensure that no packet gets 1196 * stuck (due, e.g., to the MTU for the packet's VL being 1197 * reduced), empty the per-VL FIFOs before adjusting MTU. 1198 */ 1199 ret = stop_drain_data_vls(dd); 1200 1201 if (ret) { 1202 dd_dev_err(dd, "%s: cannot stop/drain VLs - refusing to change per-VL MTUs\n", 1203 __func__); 1204 goto err; 1205 } 1206 1207 hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_MTU, 0); 1208 1209 if (drain) 1210 open_fill_data_vls(dd); /* reopen all VLs */ 1211 1212 err: 1213 mutex_unlock(&ppd->hls_lock); 1214 1215 return ret; 1216 } 1217 1218 int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) 1219 { 1220 struct hfi1_devdata *dd = ppd->dd; 1221 1222 ppd->lid = lid; 1223 ppd->lmc = lmc; 1224 hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0); 1225 1226 dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid); 1227 1228 return 0; 1229 } 1230 1231 void shutdown_led_override(struct hfi1_pportdata *ppd) 1232 { 1233 struct hfi1_devdata *dd = ppd->dd; 1234 1235 /* 1236 * This pairs with the memory barrier in hfi1_start_led_override to 1237 * ensure that we read the correct state of LED beaconing represented 1238 * by led_override_timer_active 1239 */ 1240 smp_rmb(); 1241 if (atomic_read(&ppd->led_override_timer_active)) { 1242 del_timer_sync(&ppd->led_override_timer); 1243 atomic_set(&ppd->led_override_timer_active, 0); 1244 /* Ensure the atomic_set is visible to all CPUs */ 1245 smp_wmb(); 1246 } 1247 1248 /* Hand control of the LED to the DC for normal operation */ 1249 write_csr(dd, DCC_CFG_LED_CNTRL, 0); 1250 } 1251 1252 static void run_led_override(struct timer_list *t) 1253 { 1254 struct hfi1_pportdata *ppd = from_timer(ppd, t, led_override_timer); 1255 struct hfi1_devdata *dd = ppd->dd; 1256 unsigned long timeout; 1257 int phase_idx; 1258 1259 if (!(dd->flags & HFI1_INITTED)) 1260 return; 1261 1262 phase_idx = ppd->led_override_phase & 1; 1263 1264 setextled(dd, phase_idx); 1265 1266 timeout = ppd->led_override_vals[phase_idx]; 1267 1268 /* Set up for next phase */ 1269 ppd->led_override_phase = !ppd->led_override_phase; 1270 1271 mod_timer(&ppd->led_override_timer, jiffies + timeout); 1272 } 1273 1274 /* 1275 * To have the LED blink in a particular pattern, provide timeon and timeoff 1276 * in milliseconds. 1277 * To turn off custom blinking and return to normal operation, use 1278 * shutdown_led_override() 1279 */ 1280 void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, 1281 unsigned int timeoff) 1282 { 1283 if (!(ppd->dd->flags & HFI1_INITTED)) 1284 return; 1285 1286 /* Convert to jiffies for direct use in timer */ 1287 ppd->led_override_vals[0] = msecs_to_jiffies(timeoff); 1288 ppd->led_override_vals[1] = msecs_to_jiffies(timeon); 1289 1290 /* Arbitrarily start from LED on phase */ 1291 ppd->led_override_phase = 1; 1292 1293 /* 1294 * If the timer has not already been started, do so. Use a "quick" 1295 * timeout so the handler will be called soon to look at our request. 1296 */ 1297 if (!timer_pending(&ppd->led_override_timer)) { 1298 timer_setup(&ppd->led_override_timer, run_led_override, 0); 1299 ppd->led_override_timer.expires = jiffies + 1; 1300 add_timer(&ppd->led_override_timer); 1301 atomic_set(&ppd->led_override_timer_active, 1); 1302 /* Ensure the atomic_set is visible to all CPUs */ 1303 smp_wmb(); 1304 } 1305 } 1306 1307 /** 1308 * hfi1_reset_device - reset the chip if possible 1309 * @unit: the device to reset 1310 * 1311 * Whether or not reset is successful, we attempt to re-initialize the chip 1312 * (that is, much like a driver unload/reload). We clear the INITTED flag 1313 * so that the various entry points will fail until we reinitialize. For 1314 * now, we only allow this if no user contexts are open that use chip resources 1315 */ 1316 int hfi1_reset_device(int unit) 1317 { 1318 int ret; 1319 struct hfi1_devdata *dd = hfi1_lookup(unit); 1320 struct hfi1_pportdata *ppd; 1321 int pidx; 1322 1323 if (!dd) { 1324 ret = -ENODEV; 1325 goto bail; 1326 } 1327 1328 dd_dev_info(dd, "Reset on unit %u requested\n", unit); 1329 1330 if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) { 1331 dd_dev_info(dd, 1332 "Invalid unit number %u or not initialized or not present\n", 1333 unit); 1334 ret = -ENXIO; 1335 goto bail; 1336 } 1337 1338 /* If there are any user/vnic contexts, we cannot reset */ 1339 mutex_lock(&hfi1_mutex); 1340 if (dd->rcd) 1341 if (hfi1_stats.sps_ctxts) { 1342 mutex_unlock(&hfi1_mutex); 1343 ret = -EBUSY; 1344 goto bail; 1345 } 1346 mutex_unlock(&hfi1_mutex); 1347 1348 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1349 ppd = dd->pport + pidx; 1350 1351 shutdown_led_override(ppd); 1352 } 1353 if (dd->flags & HFI1_HAS_SEND_DMA) 1354 sdma_exit(dd); 1355 1356 hfi1_reset_cpu_counters(dd); 1357 1358 ret = hfi1_init(dd, 1); 1359 1360 if (ret) 1361 dd_dev_err(dd, 1362 "Reinitialize unit %u after reset failed with %d\n", 1363 unit, ret); 1364 else 1365 dd_dev_info(dd, "Reinitialized unit %u after resetting\n", 1366 unit); 1367 1368 bail: 1369 return ret; 1370 } 1371 1372 static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) 1373 { 1374 packet->hdr = (struct hfi1_ib_message_header *) 1375 hfi1_get_msgheader(packet->rcd->dd, 1376 packet->rhf_addr); 1377 packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; 1378 } 1379 1380 static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) 1381 { 1382 struct hfi1_pportdata *ppd = packet->rcd->ppd; 1383 1384 /* slid and dlid cannot be 0 */ 1385 if ((!packet->slid) || (!packet->dlid)) 1386 return -EINVAL; 1387 1388 /* Compare port lid with incoming packet dlid */ 1389 if ((!(hfi1_is_16B_mcast(packet->dlid))) && 1390 (packet->dlid != 1391 opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { 1392 if (packet->dlid != ppd->lid) 1393 return -EINVAL; 1394 } 1395 1396 /* No multicast packets with SC15 */ 1397 if ((hfi1_is_16B_mcast(packet->dlid)) && (packet->sc == 0xF)) 1398 return -EINVAL; 1399 1400 /* Packets with permissive DLID always on SC15 */ 1401 if ((packet->dlid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 1402 16B)) && 1403 (packet->sc != 0xF)) 1404 return -EINVAL; 1405 1406 return 0; 1407 } 1408 1409 static int hfi1_setup_9B_packet(struct hfi1_packet *packet) 1410 { 1411 struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); 1412 struct ib_header *hdr; 1413 u8 lnh; 1414 1415 hfi1_setup_ib_header(packet); 1416 hdr = packet->hdr; 1417 1418 lnh = ib_get_lnh(hdr); 1419 if (lnh == HFI1_LRH_BTH) { 1420 packet->ohdr = &hdr->u.oth; 1421 packet->grh = NULL; 1422 } else if (lnh == HFI1_LRH_GRH) { 1423 u32 vtf; 1424 1425 packet->ohdr = &hdr->u.l.oth; 1426 packet->grh = &hdr->u.l.grh; 1427 if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) 1428 goto drop; 1429 vtf = be32_to_cpu(packet->grh->version_tclass_flow); 1430 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) 1431 goto drop; 1432 } else { 1433 goto drop; 1434 } 1435 1436 /* Query commonly used fields from packet header */ 1437 packet->payload = packet->ebuf; 1438 packet->opcode = ib_bth_get_opcode(packet->ohdr); 1439 packet->slid = ib_get_slid(hdr); 1440 packet->dlid = ib_get_dlid(hdr); 1441 if (unlikely((packet->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && 1442 (packet->dlid != be16_to_cpu(IB_LID_PERMISSIVE)))) 1443 packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - 1444 be16_to_cpu(IB_MULTICAST_LID_BASE); 1445 packet->sl = ib_get_sl(hdr); 1446 packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); 1447 packet->pad = ib_bth_get_pad(packet->ohdr); 1448 packet->extra_byte = 0; 1449 packet->pkey = ib_bth_get_pkey(packet->ohdr); 1450 packet->migrated = ib_bth_is_migration(packet->ohdr); 1451 1452 return 0; 1453 drop: 1454 ibp->rvp.n_pkt_drops++; 1455 return -EINVAL; 1456 } 1457 1458 static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) 1459 { 1460 /* 1461 * Bypass packets have a different header/payload split 1462 * compared to an IB packet. 1463 * Current split is set such that 16 bytes of the actual 1464 * header is in the header buffer and the remining is in 1465 * the eager buffer. We chose 16 since hfi1 driver only 1466 * supports 16B bypass packets and we will be able to 1467 * receive the entire LRH with such a split. 1468 */ 1469 1470 struct hfi1_ctxtdata *rcd = packet->rcd; 1471 struct hfi1_pportdata *ppd = rcd->ppd; 1472 struct hfi1_ibport *ibp = &ppd->ibport_data; 1473 u8 l4; 1474 u8 grh_len; 1475 1476 packet->hdr = (struct hfi1_16b_header *) 1477 hfi1_get_16B_header(packet->rcd->dd, 1478 packet->rhf_addr); 1479 packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; 1480 1481 l4 = hfi1_16B_get_l4(packet->hdr); 1482 if (l4 == OPA_16B_L4_IB_LOCAL) { 1483 grh_len = 0; 1484 packet->ohdr = packet->ebuf; 1485 packet->grh = NULL; 1486 } else if (l4 == OPA_16B_L4_IB_GLOBAL) { 1487 u32 vtf; 1488 1489 grh_len = sizeof(struct ib_grh); 1490 packet->ohdr = packet->ebuf + grh_len; 1491 packet->grh = packet->ebuf; 1492 if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) 1493 goto drop; 1494 vtf = be32_to_cpu(packet->grh->version_tclass_flow); 1495 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) 1496 goto drop; 1497 } else { 1498 goto drop; 1499 } 1500 1501 /* Query commonly used fields from packet header */ 1502 packet->opcode = ib_bth_get_opcode(packet->ohdr); 1503 /* hdr_len_by_opcode already has an IB LRH factored in */ 1504 packet->hlen = hdr_len_by_opcode[packet->opcode] + 1505 (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; 1506 packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES; 1507 packet->slid = hfi1_16B_get_slid(packet->hdr); 1508 packet->dlid = hfi1_16B_get_dlid(packet->hdr); 1509 if (unlikely(hfi1_is_16B_mcast(packet->dlid))) 1510 packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - 1511 opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 1512 16B); 1513 packet->sc = hfi1_16B_get_sc(packet->hdr); 1514 packet->sl = ibp->sc_to_sl[packet->sc]; 1515 packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); 1516 packet->extra_byte = SIZE_OF_LT; 1517 packet->pkey = hfi1_16B_get_pkey(packet->hdr); 1518 packet->migrated = opa_bth_is_migration(packet->ohdr); 1519 1520 if (hfi1_bypass_ingress_pkt_check(packet)) 1521 goto drop; 1522 1523 return 0; 1524 drop: 1525 hfi1_cdbg(PKT, "%s: packet dropped\n", __func__); 1526 ibp->rvp.n_pkt_drops++; 1527 return -EINVAL; 1528 } 1529 1530 void handle_eflags(struct hfi1_packet *packet) 1531 { 1532 struct hfi1_ctxtdata *rcd = packet->rcd; 1533 u32 rte = rhf_rcv_type_err(packet->rhf); 1534 1535 rcv_hdrerr(rcd, rcd->ppd, packet); 1536 if (rhf_err_flags(packet->rhf)) 1537 dd_dev_err(rcd->dd, 1538 "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", 1539 rcd->ctxt, packet->rhf, 1540 packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", 1541 packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", 1542 packet->rhf & RHF_DC_ERR ? "dc " : "", 1543 packet->rhf & RHF_TID_ERR ? "tid " : "", 1544 packet->rhf & RHF_LEN_ERR ? "len " : "", 1545 packet->rhf & RHF_ECC_ERR ? "ecc " : "", 1546 packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", 1547 packet->rhf & RHF_ICRC_ERR ? "icrc " : "", 1548 rte); 1549 } 1550 1551 /* 1552 * The following functions are called by the interrupt handler. They are type 1553 * specific handlers for each packet type. 1554 */ 1555 int process_receive_ib(struct hfi1_packet *packet) 1556 { 1557 if (unlikely(hfi1_dbg_fault_packet(packet))) 1558 return RHF_RCV_CONTINUE; 1559 1560 if (hfi1_setup_9B_packet(packet)) 1561 return RHF_RCV_CONTINUE; 1562 1563 trace_hfi1_rcvhdr(packet); 1564 1565 if (unlikely(rhf_err_flags(packet->rhf))) { 1566 handle_eflags(packet); 1567 return RHF_RCV_CONTINUE; 1568 } 1569 1570 hfi1_ib_rcv(packet); 1571 return RHF_RCV_CONTINUE; 1572 } 1573 1574 static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) 1575 { 1576 /* Packet received in VNIC context via RSM */ 1577 if (packet->rcd->is_vnic) 1578 return true; 1579 1580 if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) && 1581 (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR)) 1582 return true; 1583 1584 return false; 1585 } 1586 1587 int process_receive_bypass(struct hfi1_packet *packet) 1588 { 1589 struct hfi1_devdata *dd = packet->rcd->dd; 1590 1591 if (hfi1_is_vnic_packet(packet)) { 1592 hfi1_vnic_bypass_rcv(packet); 1593 return RHF_RCV_CONTINUE; 1594 } 1595 1596 if (hfi1_setup_bypass_packet(packet)) 1597 return RHF_RCV_CONTINUE; 1598 1599 trace_hfi1_rcvhdr(packet); 1600 1601 if (unlikely(rhf_err_flags(packet->rhf))) { 1602 handle_eflags(packet); 1603 return RHF_RCV_CONTINUE; 1604 } 1605 1606 if (hfi1_16B_get_l2(packet->hdr) == 0x2) { 1607 hfi1_16B_rcv(packet); 1608 } else { 1609 dd_dev_err(dd, 1610 "Bypass packets other than 16B are not supported in normal operation. Dropping\n"); 1611 incr_cntr64(&dd->sw_rcv_bypass_packet_errors); 1612 if (!(dd->err_info_rcvport.status_and_code & 1613 OPA_EI_STATUS_SMASK)) { 1614 u64 *flits = packet->ebuf; 1615 1616 if (flits && !(packet->rhf & RHF_LEN_ERR)) { 1617 dd->err_info_rcvport.packet_flit1 = flits[0]; 1618 dd->err_info_rcvport.packet_flit2 = 1619 packet->tlen > sizeof(flits[0]) ? 1620 flits[1] : 0; 1621 } 1622 dd->err_info_rcvport.status_and_code |= 1623 (OPA_EI_STATUS_SMASK | BAD_L2_ERR); 1624 } 1625 } 1626 return RHF_RCV_CONTINUE; 1627 } 1628 1629 int process_receive_error(struct hfi1_packet *packet) 1630 { 1631 /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ 1632 if (unlikely( 1633 hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && 1634 rhf_rcv_type_err(packet->rhf) == 3)) 1635 return RHF_RCV_CONTINUE; 1636 1637 hfi1_setup_ib_header(packet); 1638 handle_eflags(packet); 1639 1640 if (unlikely(rhf_err_flags(packet->rhf))) 1641 dd_dev_err(packet->rcd->dd, 1642 "Unhandled error packet received. Dropping.\n"); 1643 1644 return RHF_RCV_CONTINUE; 1645 } 1646 1647 int kdeth_process_expected(struct hfi1_packet *packet) 1648 { 1649 if (unlikely(hfi1_dbg_fault_packet(packet))) 1650 return RHF_RCV_CONTINUE; 1651 1652 hfi1_setup_ib_header(packet); 1653 if (unlikely(rhf_err_flags(packet->rhf))) 1654 handle_eflags(packet); 1655 1656 dd_dev_err(packet->rcd->dd, 1657 "Unhandled expected packet received. Dropping.\n"); 1658 return RHF_RCV_CONTINUE; 1659 } 1660 1661 int kdeth_process_eager(struct hfi1_packet *packet) 1662 { 1663 hfi1_setup_ib_header(packet); 1664 if (unlikely(rhf_err_flags(packet->rhf))) 1665 handle_eflags(packet); 1666 if (unlikely(hfi1_dbg_fault_packet(packet))) 1667 return RHF_RCV_CONTINUE; 1668 1669 dd_dev_err(packet->rcd->dd, 1670 "Unhandled eager packet received. Dropping.\n"); 1671 return RHF_RCV_CONTINUE; 1672 } 1673 1674 int process_receive_invalid(struct hfi1_packet *packet) 1675 { 1676 dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", 1677 rhf_rcv_type(packet->rhf)); 1678 return RHF_RCV_CONTINUE; 1679 } 1680 1681 void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) 1682 { 1683 struct hfi1_packet packet; 1684 struct ps_mdata mdata; 1685 1686 seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n", 1687 rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize, 1688 HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? 1689 "dma_rtail" : "nodma_rtail", 1690 read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & 1691 RCV_HDR_HEAD_HEAD_MASK, 1692 read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL)); 1693 1694 init_packet(rcd, &packet); 1695 init_ps_mdata(&mdata, &packet); 1696 1697 while (1) { 1698 struct hfi1_devdata *dd = rcd->dd; 1699 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + 1700 dd->rhf_offset; 1701 struct ib_header *hdr; 1702 u64 rhf = rhf_to_cpu(rhf_addr); 1703 u32 etype = rhf_rcv_type(rhf), qpn; 1704 u8 opcode; 1705 u32 psn; 1706 u8 lnh; 1707 1708 if (ps_done(&mdata, rhf, rcd)) 1709 break; 1710 1711 if (ps_skip(&mdata, rhf, rcd)) 1712 goto next; 1713 1714 if (etype > RHF_RCV_TYPE_IB) 1715 goto next; 1716 1717 packet.hdr = hfi1_get_msgheader(dd, rhf_addr); 1718 hdr = packet.hdr; 1719 1720 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 1721 1722 if (lnh == HFI1_LRH_BTH) 1723 packet.ohdr = &hdr->u.oth; 1724 else if (lnh == HFI1_LRH_GRH) 1725 packet.ohdr = &hdr->u.l.oth; 1726 else 1727 goto next; /* just in case */ 1728 1729 opcode = (be32_to_cpu(packet.ohdr->bth[0]) >> 24); 1730 qpn = be32_to_cpu(packet.ohdr->bth[1]) & RVT_QPN_MASK; 1731 psn = mask_psn(be32_to_cpu(packet.ohdr->bth[2])); 1732 1733 seq_printf(s, "\tEnt %u: opcode 0x%x, qpn 0x%x, psn 0x%x\n", 1734 mdata.ps_head, opcode, qpn, psn); 1735 next: 1736 update_ps_mdata(&mdata, rcd); 1737 } 1738 } 1739