1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/poll.h> 48 #include <linux/cdev.h> 49 #include <linux/vmalloc.h> 50 #include <linux/io.h> 51 52 #include <rdma/ib.h> 53 54 #include "hfi.h" 55 #include "pio.h" 56 #include "device.h" 57 #include "common.h" 58 #include "trace.h" 59 #include "user_sdma.h" 60 #include "user_exp_rcv.h" 61 #include "eprom.h" 62 #include "aspm.h" 63 #include "mmu_rb.h" 64 65 #undef pr_fmt 66 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 67 68 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 69 70 /* 71 * File operation functions 72 */ 73 static int hfi1_file_open(struct inode *, struct file *); 74 static int hfi1_file_close(struct inode *, struct file *); 75 static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); 76 static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); 77 static int hfi1_file_mmap(struct file *, struct vm_area_struct *); 78 79 static u64 kvirt_to_phys(void *); 80 static int assign_ctxt(struct file *, struct hfi1_user_info *); 81 static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *); 82 static int user_init(struct file *); 83 static int get_ctxt_info(struct file *, void __user *, __u32); 84 static int get_base_info(struct file *, void __user *, __u32); 85 static int setup_ctxt(struct file *); 86 static int setup_subctxt(struct hfi1_ctxtdata *); 87 static int get_user_context(struct file *, struct hfi1_user_info *, int); 88 static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); 89 static int allocate_ctxt(struct file *, struct hfi1_devdata *, 90 struct hfi1_user_info *); 91 static unsigned int poll_urgent(struct file *, struct poll_table_struct *); 92 static unsigned int poll_next(struct file *, struct poll_table_struct *); 93 static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); 94 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); 95 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); 96 static int vma_fault(struct vm_area_struct *, struct vm_fault *); 97 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 98 unsigned long arg); 99 100 static const struct file_operations hfi1_file_ops = { 101 .owner = THIS_MODULE, 102 .write_iter = hfi1_write_iter, 103 .open = hfi1_file_open, 104 .release = hfi1_file_close, 105 .unlocked_ioctl = hfi1_file_ioctl, 106 .poll = hfi1_poll, 107 .mmap = hfi1_file_mmap, 108 .llseek = noop_llseek, 109 }; 110 111 static struct vm_operations_struct vm_ops = { 112 .fault = vma_fault, 113 }; 114 115 /* 116 * Types of memories mapped into user processes' space 117 */ 118 enum mmap_types { 119 PIO_BUFS = 1, 120 PIO_BUFS_SOP, 121 PIO_CRED, 122 RCV_HDRQ, 123 RCV_EGRBUF, 124 UREGS, 125 EVENTS, 126 STATUS, 127 RTAIL, 128 SUBCTXT_UREGS, 129 SUBCTXT_RCV_HDRQ, 130 SUBCTXT_EGRBUF, 131 SDMA_COMP 132 }; 133 134 /* 135 * Masks and offsets defining the mmap tokens 136 */ 137 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 138 #define HFI1_MMAP_OFFSET_SHIFT 0 139 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 140 #define HFI1_MMAP_SUBCTXT_SHIFT 12 141 #define HFI1_MMAP_CTXT_MASK 0xffULL 142 #define HFI1_MMAP_CTXT_SHIFT 16 143 #define HFI1_MMAP_TYPE_MASK 0xfULL 144 #define HFI1_MMAP_TYPE_SHIFT 24 145 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 146 #define HFI1_MMAP_MAGIC_SHIFT 32 147 148 #define HFI1_MMAP_MAGIC 0xdabbad00 149 150 #define HFI1_MMAP_TOKEN_SET(field, val) \ 151 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 152 #define HFI1_MMAP_TOKEN_GET(field, token) \ 153 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 154 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 155 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 156 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 157 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 158 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 159 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 160 161 #define dbg(fmt, ...) \ 162 pr_info(fmt, ##__VA_ARGS__) 163 164 static inline int is_valid_mmap(u64 token) 165 { 166 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 167 } 168 169 static int hfi1_file_open(struct inode *inode, struct file *fp) 170 { 171 struct hfi1_devdata *dd = container_of(inode->i_cdev, 172 struct hfi1_devdata, 173 user_cdev); 174 175 /* Just take a ref now. Not all opens result in a context assign */ 176 kobject_get(&dd->kobj); 177 178 /* The real work is performed later in assign_ctxt() */ 179 fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); 180 if (fp->private_data) /* no cpu affinity by default */ 181 ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1; 182 return fp->private_data ? 0 : -ENOMEM; 183 } 184 185 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 186 unsigned long arg) 187 { 188 struct hfi1_filedata *fd = fp->private_data; 189 struct hfi1_ctxtdata *uctxt = fd->uctxt; 190 struct hfi1_user_info uinfo; 191 struct hfi1_tid_info tinfo; 192 int ret = 0; 193 unsigned long addr; 194 int uval = 0; 195 unsigned long ul_uval = 0; 196 u16 uval16 = 0; 197 198 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 199 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 200 cmd != HFI1_IOCTL_GET_VERS && 201 !uctxt) 202 return -EINVAL; 203 204 switch (cmd) { 205 case HFI1_IOCTL_ASSIGN_CTXT: 206 if (uctxt) 207 return -EINVAL; 208 209 if (copy_from_user(&uinfo, 210 (struct hfi1_user_info __user *)arg, 211 sizeof(uinfo))) 212 return -EFAULT; 213 214 ret = assign_ctxt(fp, &uinfo); 215 if (ret < 0) 216 return ret; 217 setup_ctxt(fp); 218 if (ret) 219 return ret; 220 ret = user_init(fp); 221 break; 222 case HFI1_IOCTL_CTXT_INFO: 223 ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, 224 sizeof(struct hfi1_ctxt_info)); 225 break; 226 case HFI1_IOCTL_USER_INFO: 227 ret = get_base_info(fp, (void __user *)(unsigned long)arg, 228 sizeof(struct hfi1_base_info)); 229 break; 230 case HFI1_IOCTL_CREDIT_UPD: 231 if (uctxt && uctxt->sc) 232 sc_return_credits(uctxt->sc); 233 break; 234 235 case HFI1_IOCTL_TID_UPDATE: 236 if (copy_from_user(&tinfo, 237 (struct hfi11_tid_info __user *)arg, 238 sizeof(tinfo))) 239 return -EFAULT; 240 241 ret = hfi1_user_exp_rcv_setup(fp, &tinfo); 242 if (!ret) { 243 /* 244 * Copy the number of tidlist entries we used 245 * and the length of the buffer we registered. 246 * These fields are adjacent in the structure so 247 * we can copy them at the same time. 248 */ 249 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 250 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 251 sizeof(tinfo.tidcnt) + 252 sizeof(tinfo.length))) 253 ret = -EFAULT; 254 } 255 break; 256 257 case HFI1_IOCTL_TID_FREE: 258 if (copy_from_user(&tinfo, 259 (struct hfi11_tid_info __user *)arg, 260 sizeof(tinfo))) 261 return -EFAULT; 262 263 ret = hfi1_user_exp_rcv_clear(fp, &tinfo); 264 if (ret) 265 break; 266 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 267 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 268 sizeof(tinfo.tidcnt))) 269 ret = -EFAULT; 270 break; 271 272 case HFI1_IOCTL_TID_INVAL_READ: 273 if (copy_from_user(&tinfo, 274 (struct hfi11_tid_info __user *)arg, 275 sizeof(tinfo))) 276 return -EFAULT; 277 278 ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); 279 if (ret) 280 break; 281 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 282 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 283 sizeof(tinfo.tidcnt))) 284 ret = -EFAULT; 285 break; 286 287 case HFI1_IOCTL_RECV_CTRL: 288 ret = get_user(uval, (int __user *)arg); 289 if (ret != 0) 290 return -EFAULT; 291 ret = manage_rcvq(uctxt, fd->subctxt, uval); 292 break; 293 294 case HFI1_IOCTL_POLL_TYPE: 295 ret = get_user(uval, (int __user *)arg); 296 if (ret != 0) 297 return -EFAULT; 298 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 299 break; 300 301 case HFI1_IOCTL_ACK_EVENT: 302 ret = get_user(ul_uval, (unsigned long __user *)arg); 303 if (ret != 0) 304 return -EFAULT; 305 ret = user_event_ack(uctxt, fd->subctxt, ul_uval); 306 break; 307 308 case HFI1_IOCTL_SET_PKEY: 309 ret = get_user(uval16, (u16 __user *)arg); 310 if (ret != 0) 311 return -EFAULT; 312 if (HFI1_CAP_IS_USET(PKEY_CHECK)) 313 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); 314 else 315 return -EPERM; 316 break; 317 318 case HFI1_IOCTL_CTXT_RESET: { 319 struct send_context *sc; 320 struct hfi1_devdata *dd; 321 322 if (!uctxt || !uctxt->dd || !uctxt->sc) 323 return -EINVAL; 324 325 /* 326 * There is no protection here. User level has to 327 * guarantee that no one will be writing to the send 328 * context while it is being re-initialized. 329 * If user level breaks that guarantee, it will break 330 * it's own context and no one else's. 331 */ 332 dd = uctxt->dd; 333 sc = uctxt->sc; 334 /* 335 * Wait until the interrupt handler has marked the 336 * context as halted or frozen. Report error if we time 337 * out. 338 */ 339 wait_event_interruptible_timeout( 340 sc->halt_wait, (sc->flags & SCF_HALTED), 341 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 342 if (!(sc->flags & SCF_HALTED)) 343 return -ENOLCK; 344 345 /* 346 * If the send context was halted due to a Freeze, 347 * wait until the device has been "unfrozen" before 348 * resetting the context. 349 */ 350 if (sc->flags & SCF_FROZEN) { 351 wait_event_interruptible_timeout( 352 dd->event_queue, 353 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), 354 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 355 if (dd->flags & HFI1_FROZEN) 356 return -ENOLCK; 357 358 if (dd->flags & HFI1_FORCED_FREEZE) 359 /* 360 * Don't allow context reset if we are into 361 * forced freeze 362 */ 363 return -ENODEV; 364 365 sc_disable(sc); 366 ret = sc_enable(sc); 367 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, 368 uctxt->ctxt); 369 } else { 370 ret = sc_restart(sc); 371 } 372 if (!ret) 373 sc_return_credits(sc); 374 break; 375 } 376 377 case HFI1_IOCTL_GET_VERS: 378 uval = HFI1_USER_SWVERSION; 379 if (put_user(uval, (int __user *)arg)) 380 return -EFAULT; 381 break; 382 383 default: 384 return -EINVAL; 385 } 386 387 return ret; 388 } 389 390 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 391 { 392 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 393 struct hfi1_user_sdma_pkt_q *pq = fd->pq; 394 struct hfi1_user_sdma_comp_q *cq = fd->cq; 395 int ret = 0, done = 0, reqs = 0; 396 unsigned long dim = from->nr_segs; 397 398 if (!cq || !pq) { 399 ret = -EIO; 400 goto done; 401 } 402 403 if (!iter_is_iovec(from) || !dim) { 404 ret = -EINVAL; 405 goto done; 406 } 407 408 hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", 409 fd->uctxt->ctxt, fd->subctxt, dim); 410 411 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { 412 ret = -ENOSPC; 413 goto done; 414 } 415 416 while (dim) { 417 unsigned long count = 0; 418 419 ret = hfi1_user_sdma_process_request( 420 kiocb->ki_filp, (struct iovec *)(from->iov + done), 421 dim, &count); 422 if (ret) 423 goto done; 424 dim -= count; 425 done += count; 426 reqs++; 427 } 428 done: 429 return ret ? ret : reqs; 430 } 431 432 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 433 { 434 struct hfi1_filedata *fd = fp->private_data; 435 struct hfi1_ctxtdata *uctxt = fd->uctxt; 436 struct hfi1_devdata *dd; 437 unsigned long flags, pfn; 438 u64 token = vma->vm_pgoff << PAGE_SHIFT, 439 memaddr = 0; 440 u8 subctxt, mapio = 0, vmf = 0, type; 441 ssize_t memlen = 0; 442 int ret = 0; 443 u16 ctxt; 444 445 if (!is_valid_mmap(token) || !uctxt || 446 !(vma->vm_flags & VM_SHARED)) { 447 ret = -EINVAL; 448 goto done; 449 } 450 dd = uctxt->dd; 451 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 452 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 453 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 454 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 455 ret = -EINVAL; 456 goto done; 457 } 458 459 flags = vma->vm_flags; 460 461 switch (type) { 462 case PIO_BUFS: 463 case PIO_BUFS_SOP: 464 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 465 /* chip pio base */ 466 (uctxt->sc->hw_context * BIT(16))) + 467 /* 64K PIO space / ctxt */ 468 (type == PIO_BUFS_SOP ? 469 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 470 /* 471 * Map only the amount allocated to the context, not the 472 * entire available context's PIO space. 473 */ 474 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 475 flags &= ~VM_MAYREAD; 476 flags |= VM_DONTCOPY | VM_DONTEXPAND; 477 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 478 mapio = 1; 479 break; 480 case PIO_CRED: 481 if (flags & VM_WRITE) { 482 ret = -EPERM; 483 goto done; 484 } 485 /* 486 * The credit return location for this context could be on the 487 * second or third page allocated for credit returns (if number 488 * of enabled contexts > 64 and 128 respectively). 489 */ 490 memaddr = dd->cr_base[uctxt->numa_id].pa + 491 (((u64)uctxt->sc->hw_free - 492 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); 493 memlen = PAGE_SIZE; 494 flags &= ~VM_MAYWRITE; 495 flags |= VM_DONTCOPY | VM_DONTEXPAND; 496 /* 497 * The driver has already allocated memory for credit 498 * returns and programmed it into the chip. Has that 499 * memory been flagged as non-cached? 500 */ 501 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 502 mapio = 1; 503 break; 504 case RCV_HDRQ: 505 memaddr = uctxt->rcvhdrq_phys; 506 memlen = uctxt->rcvhdrq_size; 507 break; 508 case RCV_EGRBUF: { 509 unsigned long addr; 510 int i; 511 /* 512 * The RcvEgr buffer need to be handled differently 513 * as multiple non-contiguous pages need to be mapped 514 * into the user process. 515 */ 516 memlen = uctxt->egrbufs.size; 517 if ((vma->vm_end - vma->vm_start) != memlen) { 518 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 519 (vma->vm_end - vma->vm_start), memlen); 520 ret = -EINVAL; 521 goto done; 522 } 523 if (vma->vm_flags & VM_WRITE) { 524 ret = -EPERM; 525 goto done; 526 } 527 vma->vm_flags &= ~VM_MAYWRITE; 528 addr = vma->vm_start; 529 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 530 ret = remap_pfn_range( 531 vma, addr, 532 uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT, 533 uctxt->egrbufs.buffers[i].len, 534 vma->vm_page_prot); 535 if (ret < 0) 536 goto done; 537 addr += uctxt->egrbufs.buffers[i].len; 538 } 539 ret = 0; 540 goto done; 541 } 542 case UREGS: 543 /* 544 * Map only the page that contains this context's user 545 * registers. 546 */ 547 memaddr = (unsigned long) 548 (dd->physaddr + RXE_PER_CONTEXT_USER) 549 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 550 /* 551 * TidFlow table is on the same page as the rest of the 552 * user registers. 553 */ 554 memlen = PAGE_SIZE; 555 flags |= VM_DONTCOPY | VM_DONTEXPAND; 556 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 557 mapio = 1; 558 break; 559 case EVENTS: 560 /* 561 * Use the page where this context's flags are. User level 562 * knows where it's own bitmap is within the page. 563 */ 564 memaddr = (unsigned long)(dd->events + 565 ((uctxt->ctxt - dd->first_user_ctxt) * 566 HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; 567 memlen = PAGE_SIZE; 568 /* 569 * v3.7 removes VM_RESERVED but the effect is kept by 570 * using VM_IO. 571 */ 572 flags |= VM_IO | VM_DONTEXPAND; 573 vmf = 1; 574 break; 575 case STATUS: 576 memaddr = kvirt_to_phys((void *)dd->status); 577 memlen = PAGE_SIZE; 578 flags |= VM_IO | VM_DONTEXPAND; 579 break; 580 case RTAIL: 581 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 582 /* 583 * If the memory allocation failed, the context alloc 584 * also would have failed, so we would never get here 585 */ 586 ret = -EINVAL; 587 goto done; 588 } 589 if (flags & VM_WRITE) { 590 ret = -EPERM; 591 goto done; 592 } 593 memaddr = uctxt->rcvhdrqtailaddr_phys; 594 memlen = PAGE_SIZE; 595 flags &= ~VM_MAYWRITE; 596 break; 597 case SUBCTXT_UREGS: 598 memaddr = (u64)uctxt->subctxt_uregbase; 599 memlen = PAGE_SIZE; 600 flags |= VM_IO | VM_DONTEXPAND; 601 vmf = 1; 602 break; 603 case SUBCTXT_RCV_HDRQ: 604 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 605 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; 606 flags |= VM_IO | VM_DONTEXPAND; 607 vmf = 1; 608 break; 609 case SUBCTXT_EGRBUF: 610 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 611 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 612 flags |= VM_IO | VM_DONTEXPAND; 613 flags &= ~VM_MAYWRITE; 614 vmf = 1; 615 break; 616 case SDMA_COMP: { 617 struct hfi1_user_sdma_comp_q *cq = fd->cq; 618 619 if (!cq) { 620 ret = -EFAULT; 621 goto done; 622 } 623 memaddr = (u64)cq->comps; 624 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 625 flags |= VM_IO | VM_DONTEXPAND; 626 vmf = 1; 627 break; 628 } 629 default: 630 ret = -EINVAL; 631 break; 632 } 633 634 if ((vma->vm_end - vma->vm_start) != memlen) { 635 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 636 uctxt->ctxt, fd->subctxt, 637 (vma->vm_end - vma->vm_start), memlen); 638 ret = -EINVAL; 639 goto done; 640 } 641 642 vma->vm_flags = flags; 643 hfi1_cdbg(PROC, 644 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", 645 ctxt, subctxt, type, mapio, vmf, memaddr, memlen, 646 vma->vm_end - vma->vm_start, vma->vm_flags); 647 pfn = (unsigned long)(memaddr >> PAGE_SHIFT); 648 if (vmf) { 649 vma->vm_pgoff = pfn; 650 vma->vm_ops = &vm_ops; 651 ret = 0; 652 } else if (mapio) { 653 ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen, 654 vma->vm_page_prot); 655 } else { 656 ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen, 657 vma->vm_page_prot); 658 } 659 done: 660 return ret; 661 } 662 663 /* 664 * Local (non-chip) user memory is not mapped right away but as it is 665 * accessed by the user-level code. 666 */ 667 static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 668 { 669 struct page *page; 670 671 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 672 if (!page) 673 return VM_FAULT_SIGBUS; 674 675 get_page(page); 676 vmf->page = page; 677 678 return 0; 679 } 680 681 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt) 682 { 683 struct hfi1_ctxtdata *uctxt; 684 unsigned pollflag; 685 686 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 687 if (!uctxt) 688 pollflag = POLLERR; 689 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 690 pollflag = poll_urgent(fp, pt); 691 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 692 pollflag = poll_next(fp, pt); 693 else /* invalid */ 694 pollflag = POLLERR; 695 696 return pollflag; 697 } 698 699 static int hfi1_file_close(struct inode *inode, struct file *fp) 700 { 701 struct hfi1_filedata *fdata = fp->private_data; 702 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 703 struct hfi1_devdata *dd = container_of(inode->i_cdev, 704 struct hfi1_devdata, 705 user_cdev); 706 unsigned long flags, *ev; 707 708 fp->private_data = NULL; 709 710 if (!uctxt) 711 goto done; 712 713 hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 714 mutex_lock(&hfi1_mutex); 715 716 flush_wc(); 717 /* drain user sdma queue */ 718 hfi1_user_sdma_free_queues(fdata); 719 720 /* release the cpu */ 721 hfi1_put_proc_affinity(dd, fdata->rec_cpu_num); 722 723 /* 724 * Clear any left over, unhandled events so the next process that 725 * gets this context doesn't get confused. 726 */ 727 ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * 728 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; 729 *ev = 0; 730 731 if (--uctxt->cnt) { 732 uctxt->active_slaves &= ~(1 << fdata->subctxt); 733 uctxt->subpid[fdata->subctxt] = 0; 734 mutex_unlock(&hfi1_mutex); 735 goto done; 736 } 737 738 spin_lock_irqsave(&dd->uctxt_lock, flags); 739 /* 740 * Disable receive context and interrupt available, reset all 741 * RcvCtxtCtrl bits to default values. 742 */ 743 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 744 HFI1_RCVCTRL_TIDFLOW_DIS | 745 HFI1_RCVCTRL_INTRAVAIL_DIS | 746 HFI1_RCVCTRL_TAILUPD_DIS | 747 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 748 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 749 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); 750 /* Clear the context's J_KEY */ 751 hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); 752 /* 753 * Reset context integrity checks to default. 754 * (writes to CSRs probably belong in chip.c) 755 */ 756 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, 757 hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); 758 sc_disable(uctxt->sc); 759 uctxt->pid = 0; 760 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 761 762 dd->rcd[uctxt->ctxt] = NULL; 763 764 hfi1_user_exp_rcv_free(fdata); 765 hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); 766 767 uctxt->rcvwait_to = 0; 768 uctxt->piowait_to = 0; 769 uctxt->rcvnowait = 0; 770 uctxt->pionowait = 0; 771 uctxt->event_flags = 0; 772 773 hfi1_stats.sps_ctxts--; 774 if (++dd->freectxts == dd->num_user_contexts) 775 aspm_enable_all(dd); 776 mutex_unlock(&hfi1_mutex); 777 hfi1_free_ctxtdata(dd, uctxt); 778 done: 779 kobject_put(&dd->kobj); 780 kfree(fdata); 781 return 0; 782 } 783 784 /* 785 * Convert kernel *virtual* addresses to physical addresses. 786 * This is used to vmalloc'ed addresses. 787 */ 788 static u64 kvirt_to_phys(void *addr) 789 { 790 struct page *page; 791 u64 paddr = 0; 792 793 page = vmalloc_to_page(addr); 794 if (page) 795 paddr = page_to_pfn(page) << PAGE_SHIFT; 796 797 return paddr; 798 } 799 800 static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) 801 { 802 int i_minor, ret = 0; 803 unsigned int swmajor, swminor; 804 805 swmajor = uinfo->userversion >> 16; 806 if (swmajor != HFI1_USER_SWMAJOR) { 807 ret = -ENODEV; 808 goto done; 809 } 810 811 swminor = uinfo->userversion & 0xffff; 812 813 mutex_lock(&hfi1_mutex); 814 /* First, lets check if we need to setup a shared context? */ 815 if (uinfo->subctxt_cnt) { 816 struct hfi1_filedata *fd = fp->private_data; 817 818 ret = find_shared_ctxt(fp, uinfo); 819 if (ret < 0) 820 goto done_unlock; 821 if (ret) 822 fd->rec_cpu_num = hfi1_get_proc_affinity( 823 fd->uctxt->dd, fd->uctxt->numa_id); 824 } 825 826 /* 827 * We execute the following block if we couldn't find a 828 * shared context or if context sharing is not required. 829 */ 830 if (!ret) { 831 i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; 832 ret = get_user_context(fp, uinfo, i_minor); 833 } 834 done_unlock: 835 mutex_unlock(&hfi1_mutex); 836 done: 837 return ret; 838 } 839 840 static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, 841 int devno) 842 { 843 struct hfi1_devdata *dd = NULL; 844 int devmax, npresent, nup; 845 846 devmax = hfi1_count_units(&npresent, &nup); 847 if (!npresent) 848 return -ENXIO; 849 850 if (!nup) 851 return -ENETDOWN; 852 853 dd = hfi1_lookup(devno); 854 if (!dd) 855 return -ENODEV; 856 else if (!dd->freectxts) 857 return -EBUSY; 858 859 return allocate_ctxt(fp, dd, uinfo); 860 } 861 862 static int find_shared_ctxt(struct file *fp, 863 const struct hfi1_user_info *uinfo) 864 { 865 int devmax, ndev, i; 866 int ret = 0; 867 struct hfi1_filedata *fd = fp->private_data; 868 869 devmax = hfi1_count_units(NULL, NULL); 870 871 for (ndev = 0; ndev < devmax; ndev++) { 872 struct hfi1_devdata *dd = hfi1_lookup(ndev); 873 874 if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) 875 continue; 876 for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { 877 struct hfi1_ctxtdata *uctxt = dd->rcd[i]; 878 879 /* Skip ctxts which are not yet open */ 880 if (!uctxt || !uctxt->cnt) 881 continue; 882 /* Skip ctxt if it doesn't match the requested one */ 883 if (memcmp(uctxt->uuid, uinfo->uuid, 884 sizeof(uctxt->uuid)) || 885 uctxt->jkey != generate_jkey(current_uid()) || 886 uctxt->subctxt_id != uinfo->subctxt_id || 887 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 888 continue; 889 890 /* Verify the sharing process matches the master */ 891 if (uctxt->userversion != uinfo->userversion || 892 uctxt->cnt >= uctxt->subctxt_cnt) { 893 ret = -EINVAL; 894 goto done; 895 } 896 fd->uctxt = uctxt; 897 fd->subctxt = uctxt->cnt++; 898 uctxt->subpid[fd->subctxt] = current->pid; 899 uctxt->active_slaves |= 1 << fd->subctxt; 900 ret = 1; 901 goto done; 902 } 903 } 904 905 done: 906 return ret; 907 } 908 909 static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, 910 struct hfi1_user_info *uinfo) 911 { 912 struct hfi1_filedata *fd = fp->private_data; 913 struct hfi1_ctxtdata *uctxt; 914 unsigned ctxt; 915 int ret, numa; 916 917 if (dd->flags & HFI1_FROZEN) { 918 /* 919 * Pick an error that is unique from all other errors 920 * that are returned so the user process knows that 921 * it tried to allocate while the SPC was frozen. It 922 * it should be able to retry with success in a short 923 * while. 924 */ 925 return -EIO; 926 } 927 928 for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) 929 if (!dd->rcd[ctxt]) 930 break; 931 932 if (ctxt == dd->num_rcv_contexts) 933 return -EBUSY; 934 935 fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1); 936 if (fd->rec_cpu_num != -1) 937 numa = cpu_to_node(fd->rec_cpu_num); 938 else 939 numa = numa_node_id(); 940 uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa); 941 if (!uctxt) { 942 dd_dev_err(dd, 943 "Unable to allocate ctxtdata memory, failing open\n"); 944 return -ENOMEM; 945 } 946 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 947 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 948 uctxt->numa_id); 949 950 /* 951 * Allocate and enable a PIO send context. 952 */ 953 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, 954 uctxt->dd->node); 955 if (!uctxt->sc) 956 return -ENOMEM; 957 958 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 959 uctxt->sc->hw_context); 960 ret = sc_enable(uctxt->sc); 961 if (ret) 962 return ret; 963 /* 964 * Setup shared context resources if the user-level has requested 965 * shared contexts and this is the 'master' process. 966 * This has to be done here so the rest of the sub-contexts find the 967 * proper master. 968 */ 969 if (uinfo->subctxt_cnt && !fd->subctxt) { 970 ret = init_subctxts(uctxt, uinfo); 971 /* 972 * On error, we don't need to disable and de-allocate the 973 * send context because it will be done during file close 974 */ 975 if (ret) 976 return ret; 977 } 978 uctxt->userversion = uinfo->userversion; 979 uctxt->pid = current->pid; 980 uctxt->flags = HFI1_CAP_UGET(MASK); 981 init_waitqueue_head(&uctxt->wait); 982 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 983 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 984 uctxt->jkey = generate_jkey(current_uid()); 985 INIT_LIST_HEAD(&uctxt->sdma_queues); 986 spin_lock_init(&uctxt->sdma_qlock); 987 hfi1_stats.sps_ctxts++; 988 /* 989 * Disable ASPM when there are open user/PSM contexts to avoid 990 * issues with ASPM L1 exit latency 991 */ 992 if (dd->freectxts-- == dd->num_user_contexts) 993 aspm_disable_all(dd); 994 fd->uctxt = uctxt; 995 996 return 0; 997 } 998 999 static int init_subctxts(struct hfi1_ctxtdata *uctxt, 1000 const struct hfi1_user_info *uinfo) 1001 { 1002 unsigned num_subctxts; 1003 1004 num_subctxts = uinfo->subctxt_cnt; 1005 if (num_subctxts > HFI1_MAX_SHARED_CTXTS) 1006 return -EINVAL; 1007 1008 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1009 uctxt->subctxt_id = uinfo->subctxt_id; 1010 uctxt->active_slaves = 1; 1011 uctxt->redirect_seq_cnt = 1; 1012 set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); 1013 1014 return 0; 1015 } 1016 1017 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1018 { 1019 int ret = 0; 1020 unsigned num_subctxts = uctxt->subctxt_cnt; 1021 1022 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1023 if (!uctxt->subctxt_uregbase) { 1024 ret = -ENOMEM; 1025 goto bail; 1026 } 1027 /* We can take the size of the RcvHdr Queue from the master */ 1028 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * 1029 num_subctxts); 1030 if (!uctxt->subctxt_rcvhdr_base) { 1031 ret = -ENOMEM; 1032 goto bail_ureg; 1033 } 1034 1035 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1036 num_subctxts); 1037 if (!uctxt->subctxt_rcvegrbuf) { 1038 ret = -ENOMEM; 1039 goto bail_rhdr; 1040 } 1041 goto bail; 1042 bail_rhdr: 1043 vfree(uctxt->subctxt_rcvhdr_base); 1044 bail_ureg: 1045 vfree(uctxt->subctxt_uregbase); 1046 uctxt->subctxt_uregbase = NULL; 1047 bail: 1048 return ret; 1049 } 1050 1051 static int user_init(struct file *fp) 1052 { 1053 unsigned int rcvctrl_ops = 0; 1054 struct hfi1_filedata *fd = fp->private_data; 1055 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1056 1057 /* make sure that the context has already been setup */ 1058 if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) 1059 return -EFAULT; 1060 1061 /* initialize poll variables... */ 1062 uctxt->urgent = 0; 1063 uctxt->urgent_poll = 0; 1064 1065 /* 1066 * Now enable the ctxt for receive. 1067 * For chips that are set to DMA the tail register to memory 1068 * when they change (and when the update bit transitions from 1069 * 0 to 1. So for those chips, we turn it off and then back on. 1070 * This will (very briefly) affect any other open ctxts, but the 1071 * duration is very short, and therefore isn't an issue. We 1072 * explicitly set the in-memory tail copy to 0 beforehand, so we 1073 * don't have to wait to be sure the DMA update has happened 1074 * (chip resets head/tail to 0 on transition to enable). 1075 */ 1076 if (uctxt->rcvhdrtail_kvaddr) 1077 clear_rcvhdrtail(uctxt); 1078 1079 /* Setup J_KEY before enabling the context */ 1080 hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); 1081 1082 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1083 if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP)) 1084 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1085 /* 1086 * Ignore the bit in the flags for now until proper 1087 * support for multiple packet per rcv array entry is 1088 * added. 1089 */ 1090 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1091 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1092 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1093 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1094 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1095 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1096 /* 1097 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1098 * We can't rely on the correct value to be set from prior 1099 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1100 * for both cases. 1101 */ 1102 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) 1103 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1104 else 1105 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1106 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); 1107 1108 /* Notify any waiting slaves */ 1109 if (uctxt->subctxt_cnt) { 1110 clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); 1111 wake_up(&uctxt->wait); 1112 } 1113 1114 return 0; 1115 } 1116 1117 static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) 1118 { 1119 struct hfi1_ctxt_info cinfo; 1120 struct hfi1_filedata *fd = fp->private_data; 1121 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1122 int ret = 0; 1123 1124 memset(&cinfo, 0, sizeof(cinfo)); 1125 ret = hfi1_get_base_kinfo(uctxt, &cinfo); 1126 if (ret < 0) 1127 goto done; 1128 cinfo.num_active = hfi1_count_active_units(); 1129 cinfo.unit = uctxt->dd->unit; 1130 cinfo.ctxt = uctxt->ctxt; 1131 cinfo.subctxt = fd->subctxt; 1132 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1133 uctxt->dd->rcv_entries.group_size) + 1134 uctxt->expected_count; 1135 cinfo.credits = uctxt->sc->credits; 1136 cinfo.numa_node = uctxt->numa_id; 1137 cinfo.rec_cpu = fd->rec_cpu_num; 1138 cinfo.send_ctxt = uctxt->sc->hw_context; 1139 1140 cinfo.egrtids = uctxt->egrbufs.alloced; 1141 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 1142 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; 1143 cinfo.sdma_ring_size = fd->cq->nentries; 1144 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1145 1146 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1147 if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) 1148 ret = -EFAULT; 1149 done: 1150 return ret; 1151 } 1152 1153 static int setup_ctxt(struct file *fp) 1154 { 1155 struct hfi1_filedata *fd = fp->private_data; 1156 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1157 struct hfi1_devdata *dd = uctxt->dd; 1158 int ret = 0; 1159 1160 /* 1161 * Context should be set up only once, including allocation and 1162 * programming of eager buffers. This is done if context sharing 1163 * is not requested or by the master process. 1164 */ 1165 if (!uctxt->subctxt_cnt || !fd->subctxt) { 1166 ret = hfi1_init_ctxt(uctxt->sc); 1167 if (ret) 1168 goto done; 1169 1170 /* Now allocate the RcvHdr queue and eager buffers. */ 1171 ret = hfi1_create_rcvhdrq(dd, uctxt); 1172 if (ret) 1173 goto done; 1174 ret = hfi1_setup_eagerbufs(uctxt); 1175 if (ret) 1176 goto done; 1177 if (uctxt->subctxt_cnt && !fd->subctxt) { 1178 ret = setup_subctxt(uctxt); 1179 if (ret) 1180 goto done; 1181 } 1182 } else { 1183 ret = wait_event_interruptible(uctxt->wait, !test_bit( 1184 HFI1_CTXT_MASTER_UNINIT, 1185 &uctxt->event_flags)); 1186 if (ret) 1187 goto done; 1188 } 1189 1190 ret = hfi1_user_sdma_alloc_queues(uctxt, fp); 1191 if (ret) 1192 goto done; 1193 /* 1194 * Expected receive has to be setup for all processes (including 1195 * shared contexts). However, it has to be done after the master 1196 * context has been fully configured as it depends on the 1197 * eager/expected split of the RcvArray entries. 1198 * Setting it up here ensures that the subcontexts will be waiting 1199 * (due to the above wait_event_interruptible() until the master 1200 * is setup. 1201 */ 1202 ret = hfi1_user_exp_rcv_init(fp); 1203 if (ret) 1204 goto done; 1205 1206 set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); 1207 done: 1208 return ret; 1209 } 1210 1211 static int get_base_info(struct file *fp, void __user *ubase, __u32 len) 1212 { 1213 struct hfi1_base_info binfo; 1214 struct hfi1_filedata *fd = fp->private_data; 1215 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1216 struct hfi1_devdata *dd = uctxt->dd; 1217 ssize_t sz; 1218 unsigned offset; 1219 int ret = 0; 1220 1221 trace_hfi1_uctxtdata(uctxt->dd, uctxt); 1222 1223 memset(&binfo, 0, sizeof(binfo)); 1224 binfo.hw_version = dd->revision; 1225 binfo.sw_version = HFI1_KERN_SWVERSION; 1226 binfo.bthqp = kdeth_qp; 1227 binfo.jkey = uctxt->jkey; 1228 /* 1229 * If more than 64 contexts are enabled the allocated credit 1230 * return will span two or three contiguous pages. Since we only 1231 * map the page containing the context's credit return address, 1232 * we need to calculate the offset in the proper page. 1233 */ 1234 offset = ((u64)uctxt->sc->hw_free - 1235 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1236 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1237 fd->subctxt, offset); 1238 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1239 fd->subctxt, 1240 uctxt->sc->base_addr); 1241 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1242 uctxt->ctxt, 1243 fd->subctxt, 1244 uctxt->sc->base_addr); 1245 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1246 fd->subctxt, 1247 uctxt->rcvhdrq); 1248 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1249 fd->subctxt, 1250 uctxt->egrbufs.rcvtids[0].phys); 1251 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1252 fd->subctxt, 0); 1253 /* 1254 * user regs are at 1255 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1256 */ 1257 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1258 fd->subctxt, 0); 1259 offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) * 1260 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * 1261 sizeof(*dd->events)); 1262 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1263 fd->subctxt, 1264 offset); 1265 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1266 fd->subctxt, 1267 dd->status); 1268 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1269 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1270 fd->subctxt, 0); 1271 if (uctxt->subctxt_cnt) { 1272 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1273 uctxt->ctxt, 1274 fd->subctxt, 0); 1275 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1276 uctxt->ctxt, 1277 fd->subctxt, 0); 1278 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1279 uctxt->ctxt, 1280 fd->subctxt, 0); 1281 } 1282 sz = (len < sizeof(binfo)) ? len : sizeof(binfo); 1283 if (copy_to_user(ubase, &binfo, sz)) 1284 ret = -EFAULT; 1285 return ret; 1286 } 1287 1288 static unsigned int poll_urgent(struct file *fp, 1289 struct poll_table_struct *pt) 1290 { 1291 struct hfi1_filedata *fd = fp->private_data; 1292 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1293 struct hfi1_devdata *dd = uctxt->dd; 1294 unsigned pollflag; 1295 1296 poll_wait(fp, &uctxt->wait, pt); 1297 1298 spin_lock_irq(&dd->uctxt_lock); 1299 if (uctxt->urgent != uctxt->urgent_poll) { 1300 pollflag = POLLIN | POLLRDNORM; 1301 uctxt->urgent_poll = uctxt->urgent; 1302 } else { 1303 pollflag = 0; 1304 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1305 } 1306 spin_unlock_irq(&dd->uctxt_lock); 1307 1308 return pollflag; 1309 } 1310 1311 static unsigned int poll_next(struct file *fp, 1312 struct poll_table_struct *pt) 1313 { 1314 struct hfi1_filedata *fd = fp->private_data; 1315 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1316 struct hfi1_devdata *dd = uctxt->dd; 1317 unsigned pollflag; 1318 1319 poll_wait(fp, &uctxt->wait, pt); 1320 1321 spin_lock_irq(&dd->uctxt_lock); 1322 if (hdrqempty(uctxt)) { 1323 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1324 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt); 1325 pollflag = 0; 1326 } else { 1327 pollflag = POLLIN | POLLRDNORM; 1328 } 1329 spin_unlock_irq(&dd->uctxt_lock); 1330 1331 return pollflag; 1332 } 1333 1334 /* 1335 * Find all user contexts in use, and set the specified bit in their 1336 * event mask. 1337 * See also find_ctxt() for a similar use, that is specific to send buffers. 1338 */ 1339 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1340 { 1341 struct hfi1_ctxtdata *uctxt; 1342 struct hfi1_devdata *dd = ppd->dd; 1343 unsigned ctxt; 1344 int ret = 0; 1345 unsigned long flags; 1346 1347 if (!dd->events) { 1348 ret = -EINVAL; 1349 goto done; 1350 } 1351 1352 spin_lock_irqsave(&dd->uctxt_lock, flags); 1353 for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; 1354 ctxt++) { 1355 uctxt = dd->rcd[ctxt]; 1356 if (uctxt) { 1357 unsigned long *evs = dd->events + 1358 (uctxt->ctxt - dd->first_user_ctxt) * 1359 HFI1_MAX_SHARED_CTXTS; 1360 int i; 1361 /* 1362 * subctxt_cnt is 0 if not shared, so do base 1363 * separately, first, then remaining subctxt, if any 1364 */ 1365 set_bit(evtbit, evs); 1366 for (i = 1; i < uctxt->subctxt_cnt; i++) 1367 set_bit(evtbit, evs + i); 1368 } 1369 } 1370 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1371 done: 1372 return ret; 1373 } 1374 1375 /** 1376 * manage_rcvq - manage a context's receive queue 1377 * @uctxt: the context 1378 * @subctxt: the sub-context 1379 * @start_stop: action to carry out 1380 * 1381 * start_stop == 0 disables receive on the context, for use in queue 1382 * overflow conditions. start_stop==1 re-enables, to be used to 1383 * re-init the software copy of the head register 1384 */ 1385 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, 1386 int start_stop) 1387 { 1388 struct hfi1_devdata *dd = uctxt->dd; 1389 unsigned int rcvctrl_op; 1390 1391 if (subctxt) 1392 goto bail; 1393 /* atomically clear receive enable ctxt. */ 1394 if (start_stop) { 1395 /* 1396 * On enable, force in-memory copy of the tail register to 1397 * 0, so that protocol code doesn't have to worry about 1398 * whether or not the chip has yet updated the in-memory 1399 * copy or not on return from the system call. The chip 1400 * always resets it's tail register back to 0 on a 1401 * transition from disabled to enabled. 1402 */ 1403 if (uctxt->rcvhdrtail_kvaddr) 1404 clear_rcvhdrtail(uctxt); 1405 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1406 } else { 1407 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1408 } 1409 hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt); 1410 /* always; new head should be equal to new tail; see above */ 1411 bail: 1412 return 0; 1413 } 1414 1415 /* 1416 * clear the event notifier events for this context. 1417 * User process then performs actions appropriate to bit having been 1418 * set, if desired, and checks again in future. 1419 */ 1420 static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, 1421 unsigned long events) 1422 { 1423 int i; 1424 struct hfi1_devdata *dd = uctxt->dd; 1425 unsigned long *evs; 1426 1427 if (!dd->events) 1428 return 0; 1429 1430 evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * 1431 HFI1_MAX_SHARED_CTXTS) + subctxt; 1432 1433 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1434 if (!test_bit(i, &events)) 1435 continue; 1436 clear_bit(i, evs); 1437 } 1438 return 0; 1439 } 1440 1441 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, 1442 u16 pkey) 1443 { 1444 int ret = -ENOENT, i, intable = 0; 1445 struct hfi1_pportdata *ppd = uctxt->ppd; 1446 struct hfi1_devdata *dd = uctxt->dd; 1447 1448 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) { 1449 ret = -EINVAL; 1450 goto done; 1451 } 1452 1453 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1454 if (pkey == ppd->pkeys[i]) { 1455 intable = 1; 1456 break; 1457 } 1458 1459 if (intable) 1460 ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey); 1461 done: 1462 return ret; 1463 } 1464 1465 static void user_remove(struct hfi1_devdata *dd) 1466 { 1467 1468 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1469 } 1470 1471 static int user_add(struct hfi1_devdata *dd) 1472 { 1473 char name[10]; 1474 int ret; 1475 1476 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1477 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1478 &dd->user_cdev, &dd->user_device, 1479 true, &dd->kobj); 1480 if (ret) 1481 user_remove(dd); 1482 1483 return ret; 1484 } 1485 1486 /* 1487 * Create per-unit files in /dev 1488 */ 1489 int hfi1_device_create(struct hfi1_devdata *dd) 1490 { 1491 return user_add(dd); 1492 } 1493 1494 /* 1495 * Remove per-unit files in /dev 1496 * void, core kernel returns no errors for this stuff 1497 */ 1498 void hfi1_device_remove(struct hfi1_devdata *dd) 1499 { 1500 user_remove(dd); 1501 } 1502