1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/poll.h> 48 #include <linux/cdev.h> 49 #include <linux/vmalloc.h> 50 #include <linux/io.h> 51 52 #include <rdma/ib.h> 53 54 #include "hfi.h" 55 #include "pio.h" 56 #include "device.h" 57 #include "common.h" 58 #include "trace.h" 59 #include "user_sdma.h" 60 #include "user_exp_rcv.h" 61 #include "aspm.h" 62 #include "mmu_rb.h" 63 64 #undef pr_fmt 65 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 66 67 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 68 69 /* 70 * File operation functions 71 */ 72 static int hfi1_file_open(struct inode *, struct file *); 73 static int hfi1_file_close(struct inode *, struct file *); 74 static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); 75 static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); 76 static int hfi1_file_mmap(struct file *, struct vm_area_struct *); 77 78 static u64 kvirt_to_phys(void *); 79 static int assign_ctxt(struct file *, struct hfi1_user_info *); 80 static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *); 81 static int user_init(struct file *); 82 static int get_ctxt_info(struct file *, void __user *, __u32); 83 static int get_base_info(struct file *, void __user *, __u32); 84 static int setup_ctxt(struct file *); 85 static int setup_subctxt(struct hfi1_ctxtdata *); 86 static int get_user_context(struct file *, struct hfi1_user_info *, int); 87 static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); 88 static int allocate_ctxt(struct file *, struct hfi1_devdata *, 89 struct hfi1_user_info *); 90 static unsigned int poll_urgent(struct file *, struct poll_table_struct *); 91 static unsigned int poll_next(struct file *, struct poll_table_struct *); 92 static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); 93 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); 94 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); 95 static int vma_fault(struct vm_fault *); 96 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 97 unsigned long arg); 98 99 static const struct file_operations hfi1_file_ops = { 100 .owner = THIS_MODULE, 101 .write_iter = hfi1_write_iter, 102 .open = hfi1_file_open, 103 .release = hfi1_file_close, 104 .unlocked_ioctl = hfi1_file_ioctl, 105 .poll = hfi1_poll, 106 .mmap = hfi1_file_mmap, 107 .llseek = noop_llseek, 108 }; 109 110 static struct vm_operations_struct vm_ops = { 111 .fault = vma_fault, 112 }; 113 114 /* 115 * Types of memories mapped into user processes' space 116 */ 117 enum mmap_types { 118 PIO_BUFS = 1, 119 PIO_BUFS_SOP, 120 PIO_CRED, 121 RCV_HDRQ, 122 RCV_EGRBUF, 123 UREGS, 124 EVENTS, 125 STATUS, 126 RTAIL, 127 SUBCTXT_UREGS, 128 SUBCTXT_RCV_HDRQ, 129 SUBCTXT_EGRBUF, 130 SDMA_COMP 131 }; 132 133 /* 134 * Masks and offsets defining the mmap tokens 135 */ 136 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 137 #define HFI1_MMAP_OFFSET_SHIFT 0 138 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 139 #define HFI1_MMAP_SUBCTXT_SHIFT 12 140 #define HFI1_MMAP_CTXT_MASK 0xffULL 141 #define HFI1_MMAP_CTXT_SHIFT 16 142 #define HFI1_MMAP_TYPE_MASK 0xfULL 143 #define HFI1_MMAP_TYPE_SHIFT 24 144 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 145 #define HFI1_MMAP_MAGIC_SHIFT 32 146 147 #define HFI1_MMAP_MAGIC 0xdabbad00 148 149 #define HFI1_MMAP_TOKEN_SET(field, val) \ 150 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 151 #define HFI1_MMAP_TOKEN_GET(field, token) \ 152 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 153 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 154 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 155 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 156 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 157 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 158 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 159 160 #define dbg(fmt, ...) \ 161 pr_info(fmt, ##__VA_ARGS__) 162 163 static inline int is_valid_mmap(u64 token) 164 { 165 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 166 } 167 168 static int hfi1_file_open(struct inode *inode, struct file *fp) 169 { 170 struct hfi1_filedata *fd; 171 struct hfi1_devdata *dd = container_of(inode->i_cdev, 172 struct hfi1_devdata, 173 user_cdev); 174 175 if (!atomic_inc_not_zero(&dd->user_refcount)) 176 return -ENXIO; 177 178 /* Just take a ref now. Not all opens result in a context assign */ 179 kobject_get(&dd->kobj); 180 181 /* The real work is performed later in assign_ctxt() */ 182 183 fd = kzalloc(sizeof(*fd), GFP_KERNEL); 184 185 if (fd) { 186 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 187 fd->mm = current->mm; 188 atomic_inc(&fd->mm->mm_count); 189 fp->private_data = fd; 190 } else { 191 fp->private_data = NULL; 192 193 if (atomic_dec_and_test(&dd->user_refcount)) 194 complete(&dd->user_comp); 195 196 return -ENOMEM; 197 } 198 199 return 0; 200 } 201 202 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 203 unsigned long arg) 204 { 205 struct hfi1_filedata *fd = fp->private_data; 206 struct hfi1_ctxtdata *uctxt = fd->uctxt; 207 struct hfi1_user_info uinfo; 208 struct hfi1_tid_info tinfo; 209 int ret = 0; 210 unsigned long addr; 211 int uval = 0; 212 unsigned long ul_uval = 0; 213 u16 uval16 = 0; 214 215 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 216 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 217 cmd != HFI1_IOCTL_GET_VERS && 218 !uctxt) 219 return -EINVAL; 220 221 switch (cmd) { 222 case HFI1_IOCTL_ASSIGN_CTXT: 223 if (uctxt) 224 return -EINVAL; 225 226 if (copy_from_user(&uinfo, 227 (struct hfi1_user_info __user *)arg, 228 sizeof(uinfo))) 229 return -EFAULT; 230 231 ret = assign_ctxt(fp, &uinfo); 232 if (ret < 0) 233 return ret; 234 ret = setup_ctxt(fp); 235 if (ret) 236 return ret; 237 ret = user_init(fp); 238 break; 239 case HFI1_IOCTL_CTXT_INFO: 240 ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, 241 sizeof(struct hfi1_ctxt_info)); 242 break; 243 case HFI1_IOCTL_USER_INFO: 244 ret = get_base_info(fp, (void __user *)(unsigned long)arg, 245 sizeof(struct hfi1_base_info)); 246 break; 247 case HFI1_IOCTL_CREDIT_UPD: 248 if (uctxt) 249 sc_return_credits(uctxt->sc); 250 break; 251 252 case HFI1_IOCTL_TID_UPDATE: 253 if (copy_from_user(&tinfo, 254 (struct hfi11_tid_info __user *)arg, 255 sizeof(tinfo))) 256 return -EFAULT; 257 258 ret = hfi1_user_exp_rcv_setup(fp, &tinfo); 259 if (!ret) { 260 /* 261 * Copy the number of tidlist entries we used 262 * and the length of the buffer we registered. 263 * These fields are adjacent in the structure so 264 * we can copy them at the same time. 265 */ 266 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 267 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 268 sizeof(tinfo.tidcnt) + 269 sizeof(tinfo.length))) 270 ret = -EFAULT; 271 } 272 break; 273 274 case HFI1_IOCTL_TID_FREE: 275 if (copy_from_user(&tinfo, 276 (struct hfi11_tid_info __user *)arg, 277 sizeof(tinfo))) 278 return -EFAULT; 279 280 ret = hfi1_user_exp_rcv_clear(fp, &tinfo); 281 if (ret) 282 break; 283 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 284 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 285 sizeof(tinfo.tidcnt))) 286 ret = -EFAULT; 287 break; 288 289 case HFI1_IOCTL_TID_INVAL_READ: 290 if (copy_from_user(&tinfo, 291 (struct hfi11_tid_info __user *)arg, 292 sizeof(tinfo))) 293 return -EFAULT; 294 295 ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); 296 if (ret) 297 break; 298 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 299 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 300 sizeof(tinfo.tidcnt))) 301 ret = -EFAULT; 302 break; 303 304 case HFI1_IOCTL_RECV_CTRL: 305 ret = get_user(uval, (int __user *)arg); 306 if (ret != 0) 307 return -EFAULT; 308 ret = manage_rcvq(uctxt, fd->subctxt, uval); 309 break; 310 311 case HFI1_IOCTL_POLL_TYPE: 312 ret = get_user(uval, (int __user *)arg); 313 if (ret != 0) 314 return -EFAULT; 315 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 316 break; 317 318 case HFI1_IOCTL_ACK_EVENT: 319 ret = get_user(ul_uval, (unsigned long __user *)arg); 320 if (ret != 0) 321 return -EFAULT; 322 ret = user_event_ack(uctxt, fd->subctxt, ul_uval); 323 break; 324 325 case HFI1_IOCTL_SET_PKEY: 326 ret = get_user(uval16, (u16 __user *)arg); 327 if (ret != 0) 328 return -EFAULT; 329 if (HFI1_CAP_IS_USET(PKEY_CHECK)) 330 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); 331 else 332 return -EPERM; 333 break; 334 335 case HFI1_IOCTL_CTXT_RESET: { 336 struct send_context *sc; 337 struct hfi1_devdata *dd; 338 339 if (!uctxt || !uctxt->dd || !uctxt->sc) 340 return -EINVAL; 341 342 /* 343 * There is no protection here. User level has to 344 * guarantee that no one will be writing to the send 345 * context while it is being re-initialized. 346 * If user level breaks that guarantee, it will break 347 * it's own context and no one else's. 348 */ 349 dd = uctxt->dd; 350 sc = uctxt->sc; 351 /* 352 * Wait until the interrupt handler has marked the 353 * context as halted or frozen. Report error if we time 354 * out. 355 */ 356 wait_event_interruptible_timeout( 357 sc->halt_wait, (sc->flags & SCF_HALTED), 358 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 359 if (!(sc->flags & SCF_HALTED)) 360 return -ENOLCK; 361 362 /* 363 * If the send context was halted due to a Freeze, 364 * wait until the device has been "unfrozen" before 365 * resetting the context. 366 */ 367 if (sc->flags & SCF_FROZEN) { 368 wait_event_interruptible_timeout( 369 dd->event_queue, 370 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), 371 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 372 if (dd->flags & HFI1_FROZEN) 373 return -ENOLCK; 374 375 if (dd->flags & HFI1_FORCED_FREEZE) 376 /* 377 * Don't allow context reset if we are into 378 * forced freeze 379 */ 380 return -ENODEV; 381 382 sc_disable(sc); 383 ret = sc_enable(sc); 384 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, 385 uctxt->ctxt); 386 } else { 387 ret = sc_restart(sc); 388 } 389 if (!ret) 390 sc_return_credits(sc); 391 break; 392 } 393 394 case HFI1_IOCTL_GET_VERS: 395 uval = HFI1_USER_SWVERSION; 396 if (put_user(uval, (int __user *)arg)) 397 return -EFAULT; 398 break; 399 400 default: 401 return -EINVAL; 402 } 403 404 return ret; 405 } 406 407 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 408 { 409 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 410 struct hfi1_user_sdma_pkt_q *pq = fd->pq; 411 struct hfi1_user_sdma_comp_q *cq = fd->cq; 412 int done = 0, reqs = 0; 413 unsigned long dim = from->nr_segs; 414 415 if (!cq || !pq) 416 return -EIO; 417 418 if (!iter_is_iovec(from) || !dim) 419 return -EINVAL; 420 421 hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", 422 fd->uctxt->ctxt, fd->subctxt, dim); 423 424 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) 425 return -ENOSPC; 426 427 while (dim) { 428 int ret; 429 unsigned long count = 0; 430 431 ret = hfi1_user_sdma_process_request( 432 kiocb->ki_filp, (struct iovec *)(from->iov + done), 433 dim, &count); 434 if (ret) { 435 reqs = ret; 436 break; 437 } 438 dim -= count; 439 done += count; 440 reqs++; 441 } 442 443 return reqs; 444 } 445 446 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 447 { 448 struct hfi1_filedata *fd = fp->private_data; 449 struct hfi1_ctxtdata *uctxt = fd->uctxt; 450 struct hfi1_devdata *dd; 451 unsigned long flags; 452 u64 token = vma->vm_pgoff << PAGE_SHIFT, 453 memaddr = 0; 454 void *memvirt = NULL; 455 u8 subctxt, mapio = 0, vmf = 0, type; 456 ssize_t memlen = 0; 457 int ret = 0; 458 u16 ctxt; 459 460 if (!is_valid_mmap(token) || !uctxt || 461 !(vma->vm_flags & VM_SHARED)) { 462 ret = -EINVAL; 463 goto done; 464 } 465 dd = uctxt->dd; 466 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 467 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 468 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 469 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 470 ret = -EINVAL; 471 goto done; 472 } 473 474 flags = vma->vm_flags; 475 476 switch (type) { 477 case PIO_BUFS: 478 case PIO_BUFS_SOP: 479 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 480 /* chip pio base */ 481 (uctxt->sc->hw_context * BIT(16))) + 482 /* 64K PIO space / ctxt */ 483 (type == PIO_BUFS_SOP ? 484 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 485 /* 486 * Map only the amount allocated to the context, not the 487 * entire available context's PIO space. 488 */ 489 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 490 flags &= ~VM_MAYREAD; 491 flags |= VM_DONTCOPY | VM_DONTEXPAND; 492 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 493 mapio = 1; 494 break; 495 case PIO_CRED: 496 if (flags & VM_WRITE) { 497 ret = -EPERM; 498 goto done; 499 } 500 /* 501 * The credit return location for this context could be on the 502 * second or third page allocated for credit returns (if number 503 * of enabled contexts > 64 and 128 respectively). 504 */ 505 memvirt = dd->cr_base[uctxt->numa_id].va; 506 memaddr = virt_to_phys(memvirt) + 507 (((u64)uctxt->sc->hw_free - 508 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); 509 memlen = PAGE_SIZE; 510 flags &= ~VM_MAYWRITE; 511 flags |= VM_DONTCOPY | VM_DONTEXPAND; 512 /* 513 * The driver has already allocated memory for credit 514 * returns and programmed it into the chip. Has that 515 * memory been flagged as non-cached? 516 */ 517 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 518 mapio = 1; 519 break; 520 case RCV_HDRQ: 521 memlen = uctxt->rcvhdrq_size; 522 memvirt = uctxt->rcvhdrq; 523 break; 524 case RCV_EGRBUF: { 525 unsigned long addr; 526 int i; 527 /* 528 * The RcvEgr buffer need to be handled differently 529 * as multiple non-contiguous pages need to be mapped 530 * into the user process. 531 */ 532 memlen = uctxt->egrbufs.size; 533 if ((vma->vm_end - vma->vm_start) != memlen) { 534 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 535 (vma->vm_end - vma->vm_start), memlen); 536 ret = -EINVAL; 537 goto done; 538 } 539 if (vma->vm_flags & VM_WRITE) { 540 ret = -EPERM; 541 goto done; 542 } 543 vma->vm_flags &= ~VM_MAYWRITE; 544 addr = vma->vm_start; 545 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 546 memlen = uctxt->egrbufs.buffers[i].len; 547 memvirt = uctxt->egrbufs.buffers[i].addr; 548 ret = remap_pfn_range( 549 vma, addr, 550 /* 551 * virt_to_pfn() does the same, but 552 * it's not available on x86_64 553 * when CONFIG_MMU is enabled. 554 */ 555 PFN_DOWN(__pa(memvirt)), 556 memlen, 557 vma->vm_page_prot); 558 if (ret < 0) 559 goto done; 560 addr += memlen; 561 } 562 ret = 0; 563 goto done; 564 } 565 case UREGS: 566 /* 567 * Map only the page that contains this context's user 568 * registers. 569 */ 570 memaddr = (unsigned long) 571 (dd->physaddr + RXE_PER_CONTEXT_USER) 572 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 573 /* 574 * TidFlow table is on the same page as the rest of the 575 * user registers. 576 */ 577 memlen = PAGE_SIZE; 578 flags |= VM_DONTCOPY | VM_DONTEXPAND; 579 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 580 mapio = 1; 581 break; 582 case EVENTS: 583 /* 584 * Use the page where this context's flags are. User level 585 * knows where it's own bitmap is within the page. 586 */ 587 memaddr = (unsigned long)(dd->events + 588 ((uctxt->ctxt - dd->first_user_ctxt) * 589 HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; 590 memlen = PAGE_SIZE; 591 /* 592 * v3.7 removes VM_RESERVED but the effect is kept by 593 * using VM_IO. 594 */ 595 flags |= VM_IO | VM_DONTEXPAND; 596 vmf = 1; 597 break; 598 case STATUS: 599 memaddr = kvirt_to_phys((void *)dd->status); 600 memlen = PAGE_SIZE; 601 flags |= VM_IO | VM_DONTEXPAND; 602 break; 603 case RTAIL: 604 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 605 /* 606 * If the memory allocation failed, the context alloc 607 * also would have failed, so we would never get here 608 */ 609 ret = -EINVAL; 610 goto done; 611 } 612 if (flags & VM_WRITE) { 613 ret = -EPERM; 614 goto done; 615 } 616 memlen = PAGE_SIZE; 617 memvirt = (void *)uctxt->rcvhdrtail_kvaddr; 618 flags &= ~VM_MAYWRITE; 619 break; 620 case SUBCTXT_UREGS: 621 memaddr = (u64)uctxt->subctxt_uregbase; 622 memlen = PAGE_SIZE; 623 flags |= VM_IO | VM_DONTEXPAND; 624 vmf = 1; 625 break; 626 case SUBCTXT_RCV_HDRQ: 627 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 628 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; 629 flags |= VM_IO | VM_DONTEXPAND; 630 vmf = 1; 631 break; 632 case SUBCTXT_EGRBUF: 633 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 634 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 635 flags |= VM_IO | VM_DONTEXPAND; 636 flags &= ~VM_MAYWRITE; 637 vmf = 1; 638 break; 639 case SDMA_COMP: { 640 struct hfi1_user_sdma_comp_q *cq = fd->cq; 641 642 if (!cq) { 643 ret = -EFAULT; 644 goto done; 645 } 646 memaddr = (u64)cq->comps; 647 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 648 flags |= VM_IO | VM_DONTEXPAND; 649 vmf = 1; 650 break; 651 } 652 default: 653 ret = -EINVAL; 654 break; 655 } 656 657 if ((vma->vm_end - vma->vm_start) != memlen) { 658 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 659 uctxt->ctxt, fd->subctxt, 660 (vma->vm_end - vma->vm_start), memlen); 661 ret = -EINVAL; 662 goto done; 663 } 664 665 vma->vm_flags = flags; 666 hfi1_cdbg(PROC, 667 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", 668 ctxt, subctxt, type, mapio, vmf, memaddr, memlen, 669 vma->vm_end - vma->vm_start, vma->vm_flags); 670 if (vmf) { 671 vma->vm_pgoff = PFN_DOWN(memaddr); 672 vma->vm_ops = &vm_ops; 673 ret = 0; 674 } else if (mapio) { 675 ret = io_remap_pfn_range(vma, vma->vm_start, 676 PFN_DOWN(memaddr), 677 memlen, 678 vma->vm_page_prot); 679 } else if (memvirt) { 680 ret = remap_pfn_range(vma, vma->vm_start, 681 PFN_DOWN(__pa(memvirt)), 682 memlen, 683 vma->vm_page_prot); 684 } else { 685 ret = remap_pfn_range(vma, vma->vm_start, 686 PFN_DOWN(memaddr), 687 memlen, 688 vma->vm_page_prot); 689 } 690 done: 691 return ret; 692 } 693 694 /* 695 * Local (non-chip) user memory is not mapped right away but as it is 696 * accessed by the user-level code. 697 */ 698 static int vma_fault(struct vm_fault *vmf) 699 { 700 struct page *page; 701 702 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 703 if (!page) 704 return VM_FAULT_SIGBUS; 705 706 get_page(page); 707 vmf->page = page; 708 709 return 0; 710 } 711 712 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt) 713 { 714 struct hfi1_ctxtdata *uctxt; 715 unsigned pollflag; 716 717 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 718 if (!uctxt) 719 pollflag = POLLERR; 720 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 721 pollflag = poll_urgent(fp, pt); 722 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 723 pollflag = poll_next(fp, pt); 724 else /* invalid */ 725 pollflag = POLLERR; 726 727 return pollflag; 728 } 729 730 static int hfi1_file_close(struct inode *inode, struct file *fp) 731 { 732 struct hfi1_filedata *fdata = fp->private_data; 733 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 734 struct hfi1_devdata *dd = container_of(inode->i_cdev, 735 struct hfi1_devdata, 736 user_cdev); 737 unsigned long flags, *ev; 738 739 fp->private_data = NULL; 740 741 if (!uctxt) 742 goto done; 743 744 hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 745 mutex_lock(&hfi1_mutex); 746 747 flush_wc(); 748 /* drain user sdma queue */ 749 hfi1_user_sdma_free_queues(fdata); 750 751 /* release the cpu */ 752 hfi1_put_proc_affinity(fdata->rec_cpu_num); 753 754 /* 755 * Clear any left over, unhandled events so the next process that 756 * gets this context doesn't get confused. 757 */ 758 ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * 759 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; 760 *ev = 0; 761 762 if (--uctxt->cnt) { 763 uctxt->active_slaves &= ~(1 << fdata->subctxt); 764 mutex_unlock(&hfi1_mutex); 765 goto done; 766 } 767 768 spin_lock_irqsave(&dd->uctxt_lock, flags); 769 /* 770 * Disable receive context and interrupt available, reset all 771 * RcvCtxtCtrl bits to default values. 772 */ 773 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 774 HFI1_RCVCTRL_TIDFLOW_DIS | 775 HFI1_RCVCTRL_INTRAVAIL_DIS | 776 HFI1_RCVCTRL_TAILUPD_DIS | 777 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 778 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 779 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); 780 /* Clear the context's J_KEY */ 781 hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); 782 /* 783 * Reset context integrity checks to default. 784 * (writes to CSRs probably belong in chip.c) 785 */ 786 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, 787 hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); 788 sc_disable(uctxt->sc); 789 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 790 791 dd->rcd[uctxt->ctxt] = NULL; 792 793 hfi1_user_exp_rcv_free(fdata); 794 hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); 795 796 uctxt->rcvwait_to = 0; 797 uctxt->piowait_to = 0; 798 uctxt->rcvnowait = 0; 799 uctxt->pionowait = 0; 800 uctxt->event_flags = 0; 801 802 hfi1_stats.sps_ctxts--; 803 if (++dd->freectxts == dd->num_user_contexts) 804 aspm_enable_all(dd); 805 mutex_unlock(&hfi1_mutex); 806 hfi1_free_ctxtdata(dd, uctxt); 807 done: 808 mmdrop(fdata->mm); 809 kobject_put(&dd->kobj); 810 811 if (atomic_dec_and_test(&dd->user_refcount)) 812 complete(&dd->user_comp); 813 814 kfree(fdata); 815 return 0; 816 } 817 818 /* 819 * Convert kernel *virtual* addresses to physical addresses. 820 * This is used to vmalloc'ed addresses. 821 */ 822 static u64 kvirt_to_phys(void *addr) 823 { 824 struct page *page; 825 u64 paddr = 0; 826 827 page = vmalloc_to_page(addr); 828 if (page) 829 paddr = page_to_pfn(page) << PAGE_SHIFT; 830 831 return paddr; 832 } 833 834 static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) 835 { 836 int i_minor, ret = 0; 837 unsigned int swmajor, swminor; 838 839 swmajor = uinfo->userversion >> 16; 840 if (swmajor != HFI1_USER_SWMAJOR) { 841 ret = -ENODEV; 842 goto done; 843 } 844 845 swminor = uinfo->userversion & 0xffff; 846 847 mutex_lock(&hfi1_mutex); 848 /* First, lets check if we need to setup a shared context? */ 849 if (uinfo->subctxt_cnt) { 850 struct hfi1_filedata *fd = fp->private_data; 851 852 ret = find_shared_ctxt(fp, uinfo); 853 if (ret < 0) 854 goto done_unlock; 855 if (ret) { 856 fd->rec_cpu_num = 857 hfi1_get_proc_affinity(fd->uctxt->numa_id); 858 } 859 } 860 861 /* 862 * We execute the following block if we couldn't find a 863 * shared context or if context sharing is not required. 864 */ 865 if (!ret) { 866 i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; 867 ret = get_user_context(fp, uinfo, i_minor); 868 } 869 done_unlock: 870 mutex_unlock(&hfi1_mutex); 871 done: 872 return ret; 873 } 874 875 static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, 876 int devno) 877 { 878 struct hfi1_devdata *dd = NULL; 879 int devmax, npresent, nup; 880 881 devmax = hfi1_count_units(&npresent, &nup); 882 if (!npresent) 883 return -ENXIO; 884 885 if (!nup) 886 return -ENETDOWN; 887 888 dd = hfi1_lookup(devno); 889 if (!dd) 890 return -ENODEV; 891 else if (!dd->freectxts) 892 return -EBUSY; 893 894 return allocate_ctxt(fp, dd, uinfo); 895 } 896 897 static int find_shared_ctxt(struct file *fp, 898 const struct hfi1_user_info *uinfo) 899 { 900 int devmax, ndev, i; 901 int ret = 0; 902 struct hfi1_filedata *fd = fp->private_data; 903 904 devmax = hfi1_count_units(NULL, NULL); 905 906 for (ndev = 0; ndev < devmax; ndev++) { 907 struct hfi1_devdata *dd = hfi1_lookup(ndev); 908 909 if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) 910 continue; 911 for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { 912 struct hfi1_ctxtdata *uctxt = dd->rcd[i]; 913 914 /* Skip ctxts which are not yet open */ 915 if (!uctxt || !uctxt->cnt) 916 continue; 917 /* Skip ctxt if it doesn't match the requested one */ 918 if (memcmp(uctxt->uuid, uinfo->uuid, 919 sizeof(uctxt->uuid)) || 920 uctxt->jkey != generate_jkey(current_uid()) || 921 uctxt->subctxt_id != uinfo->subctxt_id || 922 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 923 continue; 924 925 /* Verify the sharing process matches the master */ 926 if (uctxt->userversion != uinfo->userversion || 927 uctxt->cnt >= uctxt->subctxt_cnt) { 928 ret = -EINVAL; 929 goto done; 930 } 931 fd->uctxt = uctxt; 932 fd->subctxt = uctxt->cnt++; 933 uctxt->active_slaves |= 1 << fd->subctxt; 934 ret = 1; 935 goto done; 936 } 937 } 938 939 done: 940 return ret; 941 } 942 943 static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, 944 struct hfi1_user_info *uinfo) 945 { 946 struct hfi1_filedata *fd = fp->private_data; 947 struct hfi1_ctxtdata *uctxt; 948 unsigned ctxt; 949 int ret, numa; 950 951 if (dd->flags & HFI1_FROZEN) { 952 /* 953 * Pick an error that is unique from all other errors 954 * that are returned so the user process knows that 955 * it tried to allocate while the SPC was frozen. It 956 * it should be able to retry with success in a short 957 * while. 958 */ 959 return -EIO; 960 } 961 962 for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) 963 if (!dd->rcd[ctxt]) 964 break; 965 966 if (ctxt == dd->num_rcv_contexts) 967 return -EBUSY; 968 969 /* 970 * If we don't have a NUMA node requested, preference is towards 971 * device NUMA node. 972 */ 973 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 974 if (fd->rec_cpu_num != -1) 975 numa = cpu_to_node(fd->rec_cpu_num); 976 else 977 numa = numa_node_id(); 978 uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa); 979 if (!uctxt) { 980 dd_dev_err(dd, 981 "Unable to allocate ctxtdata memory, failing open\n"); 982 return -ENOMEM; 983 } 984 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 985 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 986 uctxt->numa_id); 987 988 /* 989 * Allocate and enable a PIO send context. 990 */ 991 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, 992 uctxt->dd->node); 993 if (!uctxt->sc) { 994 ret = -ENOMEM; 995 goto ctxdata_free; 996 } 997 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 998 uctxt->sc->hw_context); 999 ret = sc_enable(uctxt->sc); 1000 if (ret) 1001 goto ctxdata_free; 1002 1003 /* 1004 * Setup shared context resources if the user-level has requested 1005 * shared contexts and this is the 'master' process. 1006 * This has to be done here so the rest of the sub-contexts find the 1007 * proper master. 1008 */ 1009 if (uinfo->subctxt_cnt && !fd->subctxt) { 1010 ret = init_subctxts(uctxt, uinfo); 1011 /* 1012 * On error, we don't need to disable and de-allocate the 1013 * send context because it will be done during file close 1014 */ 1015 if (ret) 1016 goto ctxdata_free; 1017 } 1018 uctxt->userversion = uinfo->userversion; 1019 uctxt->flags = hfi1_cap_mask; /* save current flag state */ 1020 init_waitqueue_head(&uctxt->wait); 1021 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 1022 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 1023 uctxt->jkey = generate_jkey(current_uid()); 1024 INIT_LIST_HEAD(&uctxt->sdma_queues); 1025 spin_lock_init(&uctxt->sdma_qlock); 1026 hfi1_stats.sps_ctxts++; 1027 /* 1028 * Disable ASPM when there are open user/PSM contexts to avoid 1029 * issues with ASPM L1 exit latency 1030 */ 1031 if (dd->freectxts-- == dd->num_user_contexts) 1032 aspm_disable_all(dd); 1033 fd->uctxt = uctxt; 1034 1035 return 0; 1036 1037 ctxdata_free: 1038 dd->rcd[ctxt] = NULL; 1039 hfi1_free_ctxtdata(dd, uctxt); 1040 return ret; 1041 } 1042 1043 static int init_subctxts(struct hfi1_ctxtdata *uctxt, 1044 const struct hfi1_user_info *uinfo) 1045 { 1046 unsigned num_subctxts; 1047 1048 num_subctxts = uinfo->subctxt_cnt; 1049 if (num_subctxts > HFI1_MAX_SHARED_CTXTS) 1050 return -EINVAL; 1051 1052 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1053 uctxt->subctxt_id = uinfo->subctxt_id; 1054 uctxt->active_slaves = 1; 1055 uctxt->redirect_seq_cnt = 1; 1056 set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); 1057 1058 return 0; 1059 } 1060 1061 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1062 { 1063 int ret = 0; 1064 unsigned num_subctxts = uctxt->subctxt_cnt; 1065 1066 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1067 if (!uctxt->subctxt_uregbase) { 1068 ret = -ENOMEM; 1069 goto bail; 1070 } 1071 /* We can take the size of the RcvHdr Queue from the master */ 1072 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * 1073 num_subctxts); 1074 if (!uctxt->subctxt_rcvhdr_base) { 1075 ret = -ENOMEM; 1076 goto bail_ureg; 1077 } 1078 1079 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1080 num_subctxts); 1081 if (!uctxt->subctxt_rcvegrbuf) { 1082 ret = -ENOMEM; 1083 goto bail_rhdr; 1084 } 1085 goto bail; 1086 bail_rhdr: 1087 vfree(uctxt->subctxt_rcvhdr_base); 1088 bail_ureg: 1089 vfree(uctxt->subctxt_uregbase); 1090 uctxt->subctxt_uregbase = NULL; 1091 bail: 1092 return ret; 1093 } 1094 1095 static int user_init(struct file *fp) 1096 { 1097 unsigned int rcvctrl_ops = 0; 1098 struct hfi1_filedata *fd = fp->private_data; 1099 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1100 1101 /* make sure that the context has already been setup */ 1102 if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) 1103 return -EFAULT; 1104 1105 /* initialize poll variables... */ 1106 uctxt->urgent = 0; 1107 uctxt->urgent_poll = 0; 1108 1109 /* 1110 * Now enable the ctxt for receive. 1111 * For chips that are set to DMA the tail register to memory 1112 * when they change (and when the update bit transitions from 1113 * 0 to 1. So for those chips, we turn it off and then back on. 1114 * This will (very briefly) affect any other open ctxts, but the 1115 * duration is very short, and therefore isn't an issue. We 1116 * explicitly set the in-memory tail copy to 0 beforehand, so we 1117 * don't have to wait to be sure the DMA update has happened 1118 * (chip resets head/tail to 0 on transition to enable). 1119 */ 1120 if (uctxt->rcvhdrtail_kvaddr) 1121 clear_rcvhdrtail(uctxt); 1122 1123 /* Setup J_KEY before enabling the context */ 1124 hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); 1125 1126 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1127 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1128 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1129 /* 1130 * Ignore the bit in the flags for now until proper 1131 * support for multiple packet per rcv array entry is 1132 * added. 1133 */ 1134 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1135 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1136 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1137 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1138 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1139 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1140 /* 1141 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1142 * We can't rely on the correct value to be set from prior 1143 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1144 * for both cases. 1145 */ 1146 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1147 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1148 else 1149 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1150 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); 1151 1152 /* Notify any waiting slaves */ 1153 if (uctxt->subctxt_cnt) { 1154 clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); 1155 wake_up(&uctxt->wait); 1156 } 1157 1158 return 0; 1159 } 1160 1161 static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) 1162 { 1163 struct hfi1_ctxt_info cinfo; 1164 struct hfi1_filedata *fd = fp->private_data; 1165 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1166 int ret = 0; 1167 1168 memset(&cinfo, 0, sizeof(cinfo)); 1169 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1170 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1171 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1172 HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1173 /* adjust flag if this fd is not able to cache */ 1174 if (!fd->handler) 1175 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1176 1177 cinfo.num_active = hfi1_count_active_units(); 1178 cinfo.unit = uctxt->dd->unit; 1179 cinfo.ctxt = uctxt->ctxt; 1180 cinfo.subctxt = fd->subctxt; 1181 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1182 uctxt->dd->rcv_entries.group_size) + 1183 uctxt->expected_count; 1184 cinfo.credits = uctxt->sc->credits; 1185 cinfo.numa_node = uctxt->numa_id; 1186 cinfo.rec_cpu = fd->rec_cpu_num; 1187 cinfo.send_ctxt = uctxt->sc->hw_context; 1188 1189 cinfo.egrtids = uctxt->egrbufs.alloced; 1190 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 1191 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; 1192 cinfo.sdma_ring_size = fd->cq->nentries; 1193 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1194 1195 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1196 if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) 1197 ret = -EFAULT; 1198 1199 return ret; 1200 } 1201 1202 static int setup_ctxt(struct file *fp) 1203 { 1204 struct hfi1_filedata *fd = fp->private_data; 1205 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1206 struct hfi1_devdata *dd = uctxt->dd; 1207 int ret = 0; 1208 1209 /* 1210 * Context should be set up only once, including allocation and 1211 * programming of eager buffers. This is done if context sharing 1212 * is not requested or by the master process. 1213 */ 1214 if (!uctxt->subctxt_cnt || !fd->subctxt) { 1215 ret = hfi1_init_ctxt(uctxt->sc); 1216 if (ret) 1217 goto done; 1218 1219 /* Now allocate the RcvHdr queue and eager buffers. */ 1220 ret = hfi1_create_rcvhdrq(dd, uctxt); 1221 if (ret) 1222 goto done; 1223 ret = hfi1_setup_eagerbufs(uctxt); 1224 if (ret) 1225 goto done; 1226 if (uctxt->subctxt_cnt && !fd->subctxt) { 1227 ret = setup_subctxt(uctxt); 1228 if (ret) 1229 goto done; 1230 } 1231 } else { 1232 ret = wait_event_interruptible(uctxt->wait, !test_bit( 1233 HFI1_CTXT_MASTER_UNINIT, 1234 &uctxt->event_flags)); 1235 if (ret) 1236 goto done; 1237 } 1238 1239 ret = hfi1_user_sdma_alloc_queues(uctxt, fp); 1240 if (ret) 1241 goto done; 1242 /* 1243 * Expected receive has to be setup for all processes (including 1244 * shared contexts). However, it has to be done after the master 1245 * context has been fully configured as it depends on the 1246 * eager/expected split of the RcvArray entries. 1247 * Setting it up here ensures that the subcontexts will be waiting 1248 * (due to the above wait_event_interruptible() until the master 1249 * is setup. 1250 */ 1251 ret = hfi1_user_exp_rcv_init(fp); 1252 if (ret) 1253 goto done; 1254 1255 set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); 1256 done: 1257 return ret; 1258 } 1259 1260 static int get_base_info(struct file *fp, void __user *ubase, __u32 len) 1261 { 1262 struct hfi1_base_info binfo; 1263 struct hfi1_filedata *fd = fp->private_data; 1264 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1265 struct hfi1_devdata *dd = uctxt->dd; 1266 ssize_t sz; 1267 unsigned offset; 1268 int ret = 0; 1269 1270 trace_hfi1_uctxtdata(uctxt->dd, uctxt); 1271 1272 memset(&binfo, 0, sizeof(binfo)); 1273 binfo.hw_version = dd->revision; 1274 binfo.sw_version = HFI1_KERN_SWVERSION; 1275 binfo.bthqp = kdeth_qp; 1276 binfo.jkey = uctxt->jkey; 1277 /* 1278 * If more than 64 contexts are enabled the allocated credit 1279 * return will span two or three contiguous pages. Since we only 1280 * map the page containing the context's credit return address, 1281 * we need to calculate the offset in the proper page. 1282 */ 1283 offset = ((u64)uctxt->sc->hw_free - 1284 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1285 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1286 fd->subctxt, offset); 1287 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1288 fd->subctxt, 1289 uctxt->sc->base_addr); 1290 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1291 uctxt->ctxt, 1292 fd->subctxt, 1293 uctxt->sc->base_addr); 1294 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1295 fd->subctxt, 1296 uctxt->rcvhdrq); 1297 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1298 fd->subctxt, 1299 uctxt->egrbufs.rcvtids[0].dma); 1300 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1301 fd->subctxt, 0); 1302 /* 1303 * user regs are at 1304 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1305 */ 1306 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1307 fd->subctxt, 0); 1308 offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) * 1309 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * 1310 sizeof(*dd->events)); 1311 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1312 fd->subctxt, 1313 offset); 1314 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1315 fd->subctxt, 1316 dd->status); 1317 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1318 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1319 fd->subctxt, 0); 1320 if (uctxt->subctxt_cnt) { 1321 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1322 uctxt->ctxt, 1323 fd->subctxt, 0); 1324 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1325 uctxt->ctxt, 1326 fd->subctxt, 0); 1327 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1328 uctxt->ctxt, 1329 fd->subctxt, 0); 1330 } 1331 sz = (len < sizeof(binfo)) ? len : sizeof(binfo); 1332 if (copy_to_user(ubase, &binfo, sz)) 1333 ret = -EFAULT; 1334 return ret; 1335 } 1336 1337 static unsigned int poll_urgent(struct file *fp, 1338 struct poll_table_struct *pt) 1339 { 1340 struct hfi1_filedata *fd = fp->private_data; 1341 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1342 struct hfi1_devdata *dd = uctxt->dd; 1343 unsigned pollflag; 1344 1345 poll_wait(fp, &uctxt->wait, pt); 1346 1347 spin_lock_irq(&dd->uctxt_lock); 1348 if (uctxt->urgent != uctxt->urgent_poll) { 1349 pollflag = POLLIN | POLLRDNORM; 1350 uctxt->urgent_poll = uctxt->urgent; 1351 } else { 1352 pollflag = 0; 1353 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1354 } 1355 spin_unlock_irq(&dd->uctxt_lock); 1356 1357 return pollflag; 1358 } 1359 1360 static unsigned int poll_next(struct file *fp, 1361 struct poll_table_struct *pt) 1362 { 1363 struct hfi1_filedata *fd = fp->private_data; 1364 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1365 struct hfi1_devdata *dd = uctxt->dd; 1366 unsigned pollflag; 1367 1368 poll_wait(fp, &uctxt->wait, pt); 1369 1370 spin_lock_irq(&dd->uctxt_lock); 1371 if (hdrqempty(uctxt)) { 1372 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1373 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt); 1374 pollflag = 0; 1375 } else { 1376 pollflag = POLLIN | POLLRDNORM; 1377 } 1378 spin_unlock_irq(&dd->uctxt_lock); 1379 1380 return pollflag; 1381 } 1382 1383 /* 1384 * Find all user contexts in use, and set the specified bit in their 1385 * event mask. 1386 * See also find_ctxt() for a similar use, that is specific to send buffers. 1387 */ 1388 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1389 { 1390 struct hfi1_ctxtdata *uctxt; 1391 struct hfi1_devdata *dd = ppd->dd; 1392 unsigned ctxt; 1393 int ret = 0; 1394 unsigned long flags; 1395 1396 if (!dd->events) { 1397 ret = -EINVAL; 1398 goto done; 1399 } 1400 1401 spin_lock_irqsave(&dd->uctxt_lock, flags); 1402 for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; 1403 ctxt++) { 1404 uctxt = dd->rcd[ctxt]; 1405 if (uctxt) { 1406 unsigned long *evs = dd->events + 1407 (uctxt->ctxt - dd->first_user_ctxt) * 1408 HFI1_MAX_SHARED_CTXTS; 1409 int i; 1410 /* 1411 * subctxt_cnt is 0 if not shared, so do base 1412 * separately, first, then remaining subctxt, if any 1413 */ 1414 set_bit(evtbit, evs); 1415 for (i = 1; i < uctxt->subctxt_cnt; i++) 1416 set_bit(evtbit, evs + i); 1417 } 1418 } 1419 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1420 done: 1421 return ret; 1422 } 1423 1424 /** 1425 * manage_rcvq - manage a context's receive queue 1426 * @uctxt: the context 1427 * @subctxt: the sub-context 1428 * @start_stop: action to carry out 1429 * 1430 * start_stop == 0 disables receive on the context, for use in queue 1431 * overflow conditions. start_stop==1 re-enables, to be used to 1432 * re-init the software copy of the head register 1433 */ 1434 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, 1435 int start_stop) 1436 { 1437 struct hfi1_devdata *dd = uctxt->dd; 1438 unsigned int rcvctrl_op; 1439 1440 if (subctxt) 1441 goto bail; 1442 /* atomically clear receive enable ctxt. */ 1443 if (start_stop) { 1444 /* 1445 * On enable, force in-memory copy of the tail register to 1446 * 0, so that protocol code doesn't have to worry about 1447 * whether or not the chip has yet updated the in-memory 1448 * copy or not on return from the system call. The chip 1449 * always resets it's tail register back to 0 on a 1450 * transition from disabled to enabled. 1451 */ 1452 if (uctxt->rcvhdrtail_kvaddr) 1453 clear_rcvhdrtail(uctxt); 1454 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1455 } else { 1456 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1457 } 1458 hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt); 1459 /* always; new head should be equal to new tail; see above */ 1460 bail: 1461 return 0; 1462 } 1463 1464 /* 1465 * clear the event notifier events for this context. 1466 * User process then performs actions appropriate to bit having been 1467 * set, if desired, and checks again in future. 1468 */ 1469 static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, 1470 unsigned long events) 1471 { 1472 int i; 1473 struct hfi1_devdata *dd = uctxt->dd; 1474 unsigned long *evs; 1475 1476 if (!dd->events) 1477 return 0; 1478 1479 evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * 1480 HFI1_MAX_SHARED_CTXTS) + subctxt; 1481 1482 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1483 if (!test_bit(i, &events)) 1484 continue; 1485 clear_bit(i, evs); 1486 } 1487 return 0; 1488 } 1489 1490 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, 1491 u16 pkey) 1492 { 1493 int ret = -ENOENT, i, intable = 0; 1494 struct hfi1_pportdata *ppd = uctxt->ppd; 1495 struct hfi1_devdata *dd = uctxt->dd; 1496 1497 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) { 1498 ret = -EINVAL; 1499 goto done; 1500 } 1501 1502 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1503 if (pkey == ppd->pkeys[i]) { 1504 intable = 1; 1505 break; 1506 } 1507 1508 if (intable) 1509 ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey); 1510 done: 1511 return ret; 1512 } 1513 1514 static void user_remove(struct hfi1_devdata *dd) 1515 { 1516 1517 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1518 } 1519 1520 static int user_add(struct hfi1_devdata *dd) 1521 { 1522 char name[10]; 1523 int ret; 1524 1525 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1526 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1527 &dd->user_cdev, &dd->user_device, 1528 true, &dd->kobj); 1529 if (ret) 1530 user_remove(dd); 1531 1532 return ret; 1533 } 1534 1535 /* 1536 * Create per-unit files in /dev 1537 */ 1538 int hfi1_device_create(struct hfi1_devdata *dd) 1539 { 1540 return user_add(dd); 1541 } 1542 1543 /* 1544 * Remove per-unit files in /dev 1545 * void, core kernel returns no errors for this stuff 1546 */ 1547 void hfi1_device_remove(struct hfi1_devdata *dd) 1548 { 1549 user_remove(dd); 1550 } 1551