1 /* 2 * Copyright(c) 2015-2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/poll.h> 48 #include <linux/cdev.h> 49 #include <linux/vmalloc.h> 50 #include <linux/io.h> 51 #include <linux/sched/mm.h> 52 #include <linux/bitmap.h> 53 54 #include <rdma/ib.h> 55 56 #include "hfi.h" 57 #include "pio.h" 58 #include "device.h" 59 #include "common.h" 60 #include "trace.h" 61 #include "mmu_rb.h" 62 #include "user_sdma.h" 63 #include "user_exp_rcv.h" 64 #include "aspm.h" 65 66 #undef pr_fmt 67 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 68 69 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 70 71 /* 72 * File operation functions 73 */ 74 static int hfi1_file_open(struct inode *inode, struct file *fp); 75 static int hfi1_file_close(struct inode *inode, struct file *fp); 76 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); 77 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); 78 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); 79 80 static u64 kvirt_to_phys(void *addr); 81 static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); 82 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 83 const struct hfi1_user_info *uinfo); 84 static int init_user_ctxt(struct hfi1_filedata *fd, 85 struct hfi1_ctxtdata *uctxt); 86 static void user_init(struct hfi1_ctxtdata *uctxt); 87 static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, 88 __u32 len); 89 static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, 90 __u32 len); 91 static int setup_base_ctxt(struct hfi1_filedata *fd, 92 struct hfi1_ctxtdata *uctxt); 93 static int setup_subctxt(struct hfi1_ctxtdata *uctxt); 94 95 static int find_sub_ctxt(struct hfi1_filedata *fd, 96 const struct hfi1_user_info *uinfo); 97 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 98 struct hfi1_user_info *uinfo, 99 struct hfi1_ctxtdata **cd); 100 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); 101 static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); 102 static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); 103 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 104 unsigned long events); 105 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); 106 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 107 int start_stop); 108 static int vma_fault(struct vm_fault *vmf); 109 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 110 unsigned long arg); 111 112 static const struct file_operations hfi1_file_ops = { 113 .owner = THIS_MODULE, 114 .write_iter = hfi1_write_iter, 115 .open = hfi1_file_open, 116 .release = hfi1_file_close, 117 .unlocked_ioctl = hfi1_file_ioctl, 118 .poll = hfi1_poll, 119 .mmap = hfi1_file_mmap, 120 .llseek = noop_llseek, 121 }; 122 123 static const struct vm_operations_struct vm_ops = { 124 .fault = vma_fault, 125 }; 126 127 /* 128 * Types of memories mapped into user processes' space 129 */ 130 enum mmap_types { 131 PIO_BUFS = 1, 132 PIO_BUFS_SOP, 133 PIO_CRED, 134 RCV_HDRQ, 135 RCV_EGRBUF, 136 UREGS, 137 EVENTS, 138 STATUS, 139 RTAIL, 140 SUBCTXT_UREGS, 141 SUBCTXT_RCV_HDRQ, 142 SUBCTXT_EGRBUF, 143 SDMA_COMP 144 }; 145 146 /* 147 * Masks and offsets defining the mmap tokens 148 */ 149 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 150 #define HFI1_MMAP_OFFSET_SHIFT 0 151 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 152 #define HFI1_MMAP_SUBCTXT_SHIFT 12 153 #define HFI1_MMAP_CTXT_MASK 0xffULL 154 #define HFI1_MMAP_CTXT_SHIFT 16 155 #define HFI1_MMAP_TYPE_MASK 0xfULL 156 #define HFI1_MMAP_TYPE_SHIFT 24 157 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 158 #define HFI1_MMAP_MAGIC_SHIFT 32 159 160 #define HFI1_MMAP_MAGIC 0xdabbad00 161 162 #define HFI1_MMAP_TOKEN_SET(field, val) \ 163 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 164 #define HFI1_MMAP_TOKEN_GET(field, token) \ 165 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 166 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 167 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 168 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 169 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 170 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 171 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 172 173 #define dbg(fmt, ...) \ 174 pr_info(fmt, ##__VA_ARGS__) 175 176 static inline int is_valid_mmap(u64 token) 177 { 178 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 179 } 180 181 static int hfi1_file_open(struct inode *inode, struct file *fp) 182 { 183 struct hfi1_filedata *fd; 184 struct hfi1_devdata *dd = container_of(inode->i_cdev, 185 struct hfi1_devdata, 186 user_cdev); 187 188 if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) 189 return -EINVAL; 190 191 if (!atomic_inc_not_zero(&dd->user_refcount)) 192 return -ENXIO; 193 194 /* Just take a ref now. Not all opens result in a context assign */ 195 kobject_get(&dd->kobj); 196 197 /* The real work is performed later in assign_ctxt() */ 198 199 fd = kzalloc(sizeof(*fd), GFP_KERNEL); 200 201 if (fd) { 202 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 203 fd->mm = current->mm; 204 mmgrab(fd->mm); 205 fd->dd = dd; 206 fp->private_data = fd; 207 } else { 208 fp->private_data = NULL; 209 210 if (atomic_dec_and_test(&dd->user_refcount)) 211 complete(&dd->user_comp); 212 213 return -ENOMEM; 214 } 215 216 return 0; 217 } 218 219 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 220 unsigned long arg) 221 { 222 struct hfi1_filedata *fd = fp->private_data; 223 struct hfi1_ctxtdata *uctxt = fd->uctxt; 224 struct hfi1_user_info uinfo; 225 struct hfi1_tid_info tinfo; 226 int ret = 0; 227 unsigned long addr; 228 int uval = 0; 229 unsigned long ul_uval = 0; 230 u16 uval16 = 0; 231 232 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 233 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 234 cmd != HFI1_IOCTL_GET_VERS && 235 !uctxt) 236 return -EINVAL; 237 238 switch (cmd) { 239 case HFI1_IOCTL_ASSIGN_CTXT: 240 if (uctxt) 241 return -EINVAL; 242 243 if (copy_from_user(&uinfo, 244 (struct hfi1_user_info __user *)arg, 245 sizeof(uinfo))) 246 return -EFAULT; 247 248 ret = assign_ctxt(fd, &uinfo); 249 break; 250 case HFI1_IOCTL_CTXT_INFO: 251 ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, 252 sizeof(struct hfi1_ctxt_info)); 253 break; 254 case HFI1_IOCTL_USER_INFO: 255 ret = get_base_info(fd, (void __user *)(unsigned long)arg, 256 sizeof(struct hfi1_base_info)); 257 break; 258 case HFI1_IOCTL_CREDIT_UPD: 259 if (uctxt) 260 sc_return_credits(uctxt->sc); 261 break; 262 263 case HFI1_IOCTL_TID_UPDATE: 264 if (copy_from_user(&tinfo, 265 (struct hfi11_tid_info __user *)arg, 266 sizeof(tinfo))) 267 return -EFAULT; 268 269 ret = hfi1_user_exp_rcv_setup(fd, &tinfo); 270 if (!ret) { 271 /* 272 * Copy the number of tidlist entries we used 273 * and the length of the buffer we registered. 274 */ 275 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 276 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 277 sizeof(tinfo.tidcnt))) 278 return -EFAULT; 279 280 addr = arg + offsetof(struct hfi1_tid_info, length); 281 if (copy_to_user((void __user *)addr, &tinfo.length, 282 sizeof(tinfo.length))) 283 ret = -EFAULT; 284 } 285 break; 286 287 case HFI1_IOCTL_TID_FREE: 288 if (copy_from_user(&tinfo, 289 (struct hfi11_tid_info __user *)arg, 290 sizeof(tinfo))) 291 return -EFAULT; 292 293 ret = hfi1_user_exp_rcv_clear(fd, &tinfo); 294 if (ret) 295 break; 296 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 297 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 298 sizeof(tinfo.tidcnt))) 299 ret = -EFAULT; 300 break; 301 302 case HFI1_IOCTL_TID_INVAL_READ: 303 if (copy_from_user(&tinfo, 304 (struct hfi11_tid_info __user *)arg, 305 sizeof(tinfo))) 306 return -EFAULT; 307 308 ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); 309 if (ret) 310 break; 311 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 312 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 313 sizeof(tinfo.tidcnt))) 314 ret = -EFAULT; 315 break; 316 317 case HFI1_IOCTL_RECV_CTRL: 318 ret = get_user(uval, (int __user *)arg); 319 if (ret != 0) 320 return -EFAULT; 321 ret = manage_rcvq(uctxt, fd->subctxt, uval); 322 break; 323 324 case HFI1_IOCTL_POLL_TYPE: 325 ret = get_user(uval, (int __user *)arg); 326 if (ret != 0) 327 return -EFAULT; 328 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 329 break; 330 331 case HFI1_IOCTL_ACK_EVENT: 332 ret = get_user(ul_uval, (unsigned long __user *)arg); 333 if (ret != 0) 334 return -EFAULT; 335 ret = user_event_ack(uctxt, fd->subctxt, ul_uval); 336 break; 337 338 case HFI1_IOCTL_SET_PKEY: 339 ret = get_user(uval16, (u16 __user *)arg); 340 if (ret != 0) 341 return -EFAULT; 342 if (HFI1_CAP_IS_USET(PKEY_CHECK)) 343 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); 344 else 345 return -EPERM; 346 break; 347 348 case HFI1_IOCTL_CTXT_RESET: { 349 struct send_context *sc; 350 struct hfi1_devdata *dd; 351 352 if (!uctxt || !uctxt->dd || !uctxt->sc) 353 return -EINVAL; 354 355 /* 356 * There is no protection here. User level has to 357 * guarantee that no one will be writing to the send 358 * context while it is being re-initialized. 359 * If user level breaks that guarantee, it will break 360 * it's own context and no one else's. 361 */ 362 dd = uctxt->dd; 363 sc = uctxt->sc; 364 /* 365 * Wait until the interrupt handler has marked the 366 * context as halted or frozen. Report error if we time 367 * out. 368 */ 369 wait_event_interruptible_timeout( 370 sc->halt_wait, (sc->flags & SCF_HALTED), 371 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 372 if (!(sc->flags & SCF_HALTED)) 373 return -ENOLCK; 374 375 /* 376 * If the send context was halted due to a Freeze, 377 * wait until the device has been "unfrozen" before 378 * resetting the context. 379 */ 380 if (sc->flags & SCF_FROZEN) { 381 wait_event_interruptible_timeout( 382 dd->event_queue, 383 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), 384 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 385 if (dd->flags & HFI1_FROZEN) 386 return -ENOLCK; 387 388 if (dd->flags & HFI1_FORCED_FREEZE) 389 /* 390 * Don't allow context reset if we are into 391 * forced freeze 392 */ 393 return -ENODEV; 394 395 sc_disable(sc); 396 ret = sc_enable(sc); 397 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); 398 } else { 399 ret = sc_restart(sc); 400 } 401 if (!ret) 402 sc_return_credits(sc); 403 break; 404 } 405 406 case HFI1_IOCTL_GET_VERS: 407 uval = HFI1_USER_SWVERSION; 408 if (put_user(uval, (int __user *)arg)) 409 return -EFAULT; 410 break; 411 412 default: 413 return -EINVAL; 414 } 415 416 return ret; 417 } 418 419 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 420 { 421 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 422 struct hfi1_user_sdma_pkt_q *pq = fd->pq; 423 struct hfi1_user_sdma_comp_q *cq = fd->cq; 424 int done = 0, reqs = 0; 425 unsigned long dim = from->nr_segs; 426 427 if (!cq || !pq) 428 return -EIO; 429 430 if (!iter_is_iovec(from) || !dim) 431 return -EINVAL; 432 433 trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); 434 435 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) 436 return -ENOSPC; 437 438 while (dim) { 439 int ret; 440 unsigned long count = 0; 441 442 ret = hfi1_user_sdma_process_request( 443 fd, (struct iovec *)(from->iov + done), 444 dim, &count); 445 if (ret) { 446 reqs = ret; 447 break; 448 } 449 dim -= count; 450 done += count; 451 reqs++; 452 } 453 454 return reqs; 455 } 456 457 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 458 { 459 struct hfi1_filedata *fd = fp->private_data; 460 struct hfi1_ctxtdata *uctxt = fd->uctxt; 461 struct hfi1_devdata *dd; 462 unsigned long flags; 463 u64 token = vma->vm_pgoff << PAGE_SHIFT, 464 memaddr = 0; 465 void *memvirt = NULL; 466 u8 subctxt, mapio = 0, vmf = 0, type; 467 ssize_t memlen = 0; 468 int ret = 0; 469 u16 ctxt; 470 471 if (!is_valid_mmap(token) || !uctxt || 472 !(vma->vm_flags & VM_SHARED)) { 473 ret = -EINVAL; 474 goto done; 475 } 476 dd = uctxt->dd; 477 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 478 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 479 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 480 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 481 ret = -EINVAL; 482 goto done; 483 } 484 485 flags = vma->vm_flags; 486 487 switch (type) { 488 case PIO_BUFS: 489 case PIO_BUFS_SOP: 490 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 491 /* chip pio base */ 492 (uctxt->sc->hw_context * BIT(16))) + 493 /* 64K PIO space / ctxt */ 494 (type == PIO_BUFS_SOP ? 495 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 496 /* 497 * Map only the amount allocated to the context, not the 498 * entire available context's PIO space. 499 */ 500 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 501 flags &= ~VM_MAYREAD; 502 flags |= VM_DONTCOPY | VM_DONTEXPAND; 503 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 504 mapio = 1; 505 break; 506 case PIO_CRED: 507 if (flags & VM_WRITE) { 508 ret = -EPERM; 509 goto done; 510 } 511 /* 512 * The credit return location for this context could be on the 513 * second or third page allocated for credit returns (if number 514 * of enabled contexts > 64 and 128 respectively). 515 */ 516 memvirt = dd->cr_base[uctxt->numa_id].va; 517 memaddr = virt_to_phys(memvirt) + 518 (((u64)uctxt->sc->hw_free - 519 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); 520 memlen = PAGE_SIZE; 521 flags &= ~VM_MAYWRITE; 522 flags |= VM_DONTCOPY | VM_DONTEXPAND; 523 /* 524 * The driver has already allocated memory for credit 525 * returns and programmed it into the chip. Has that 526 * memory been flagged as non-cached? 527 */ 528 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 529 mapio = 1; 530 break; 531 case RCV_HDRQ: 532 memlen = uctxt->rcvhdrq_size; 533 memvirt = uctxt->rcvhdrq; 534 break; 535 case RCV_EGRBUF: { 536 unsigned long addr; 537 int i; 538 /* 539 * The RcvEgr buffer need to be handled differently 540 * as multiple non-contiguous pages need to be mapped 541 * into the user process. 542 */ 543 memlen = uctxt->egrbufs.size; 544 if ((vma->vm_end - vma->vm_start) != memlen) { 545 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 546 (vma->vm_end - vma->vm_start), memlen); 547 ret = -EINVAL; 548 goto done; 549 } 550 if (vma->vm_flags & VM_WRITE) { 551 ret = -EPERM; 552 goto done; 553 } 554 vma->vm_flags &= ~VM_MAYWRITE; 555 addr = vma->vm_start; 556 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 557 memlen = uctxt->egrbufs.buffers[i].len; 558 memvirt = uctxt->egrbufs.buffers[i].addr; 559 ret = remap_pfn_range( 560 vma, addr, 561 /* 562 * virt_to_pfn() does the same, but 563 * it's not available on x86_64 564 * when CONFIG_MMU is enabled. 565 */ 566 PFN_DOWN(__pa(memvirt)), 567 memlen, 568 vma->vm_page_prot); 569 if (ret < 0) 570 goto done; 571 addr += memlen; 572 } 573 ret = 0; 574 goto done; 575 } 576 case UREGS: 577 /* 578 * Map only the page that contains this context's user 579 * registers. 580 */ 581 memaddr = (unsigned long) 582 (dd->physaddr + RXE_PER_CONTEXT_USER) 583 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 584 /* 585 * TidFlow table is on the same page as the rest of the 586 * user registers. 587 */ 588 memlen = PAGE_SIZE; 589 flags |= VM_DONTCOPY | VM_DONTEXPAND; 590 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 591 mapio = 1; 592 break; 593 case EVENTS: 594 /* 595 * Use the page where this context's flags are. User level 596 * knows where it's own bitmap is within the page. 597 */ 598 memaddr = (unsigned long)(dd->events + 599 ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 600 HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; 601 memlen = PAGE_SIZE; 602 /* 603 * v3.7 removes VM_RESERVED but the effect is kept by 604 * using VM_IO. 605 */ 606 flags |= VM_IO | VM_DONTEXPAND; 607 vmf = 1; 608 break; 609 case STATUS: 610 if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { 611 ret = -EPERM; 612 goto done; 613 } 614 memaddr = kvirt_to_phys((void *)dd->status); 615 memlen = PAGE_SIZE; 616 flags |= VM_IO | VM_DONTEXPAND; 617 break; 618 case RTAIL: 619 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 620 /* 621 * If the memory allocation failed, the context alloc 622 * also would have failed, so we would never get here 623 */ 624 ret = -EINVAL; 625 goto done; 626 } 627 if (flags & VM_WRITE) { 628 ret = -EPERM; 629 goto done; 630 } 631 memlen = PAGE_SIZE; 632 memvirt = (void *)uctxt->rcvhdrtail_kvaddr; 633 flags &= ~VM_MAYWRITE; 634 break; 635 case SUBCTXT_UREGS: 636 memaddr = (u64)uctxt->subctxt_uregbase; 637 memlen = PAGE_SIZE; 638 flags |= VM_IO | VM_DONTEXPAND; 639 vmf = 1; 640 break; 641 case SUBCTXT_RCV_HDRQ: 642 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 643 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; 644 flags |= VM_IO | VM_DONTEXPAND; 645 vmf = 1; 646 break; 647 case SUBCTXT_EGRBUF: 648 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 649 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 650 flags |= VM_IO | VM_DONTEXPAND; 651 flags &= ~VM_MAYWRITE; 652 vmf = 1; 653 break; 654 case SDMA_COMP: { 655 struct hfi1_user_sdma_comp_q *cq = fd->cq; 656 657 if (!cq) { 658 ret = -EFAULT; 659 goto done; 660 } 661 memaddr = (u64)cq->comps; 662 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 663 flags |= VM_IO | VM_DONTEXPAND; 664 vmf = 1; 665 break; 666 } 667 default: 668 ret = -EINVAL; 669 break; 670 } 671 672 if ((vma->vm_end - vma->vm_start) != memlen) { 673 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 674 uctxt->ctxt, fd->subctxt, 675 (vma->vm_end - vma->vm_start), memlen); 676 ret = -EINVAL; 677 goto done; 678 } 679 680 vma->vm_flags = flags; 681 hfi1_cdbg(PROC, 682 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", 683 ctxt, subctxt, type, mapio, vmf, memaddr, memlen, 684 vma->vm_end - vma->vm_start, vma->vm_flags); 685 if (vmf) { 686 vma->vm_pgoff = PFN_DOWN(memaddr); 687 vma->vm_ops = &vm_ops; 688 ret = 0; 689 } else if (mapio) { 690 ret = io_remap_pfn_range(vma, vma->vm_start, 691 PFN_DOWN(memaddr), 692 memlen, 693 vma->vm_page_prot); 694 } else if (memvirt) { 695 ret = remap_pfn_range(vma, vma->vm_start, 696 PFN_DOWN(__pa(memvirt)), 697 memlen, 698 vma->vm_page_prot); 699 } else { 700 ret = remap_pfn_range(vma, vma->vm_start, 701 PFN_DOWN(memaddr), 702 memlen, 703 vma->vm_page_prot); 704 } 705 done: 706 return ret; 707 } 708 709 /* 710 * Local (non-chip) user memory is not mapped right away but as it is 711 * accessed by the user-level code. 712 */ 713 static int vma_fault(struct vm_fault *vmf) 714 { 715 struct page *page; 716 717 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 718 if (!page) 719 return VM_FAULT_SIGBUS; 720 721 get_page(page); 722 vmf->page = page; 723 724 return 0; 725 } 726 727 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt) 728 { 729 struct hfi1_ctxtdata *uctxt; 730 unsigned pollflag; 731 732 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 733 if (!uctxt) 734 pollflag = POLLERR; 735 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 736 pollflag = poll_urgent(fp, pt); 737 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 738 pollflag = poll_next(fp, pt); 739 else /* invalid */ 740 pollflag = POLLERR; 741 742 return pollflag; 743 } 744 745 static int hfi1_file_close(struct inode *inode, struct file *fp) 746 { 747 struct hfi1_filedata *fdata = fp->private_data; 748 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 749 struct hfi1_devdata *dd = container_of(inode->i_cdev, 750 struct hfi1_devdata, 751 user_cdev); 752 unsigned long flags, *ev; 753 754 fp->private_data = NULL; 755 756 if (!uctxt) 757 goto done; 758 759 hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 760 761 flush_wc(); 762 /* drain user sdma queue */ 763 hfi1_user_sdma_free_queues(fdata, uctxt); 764 765 /* release the cpu */ 766 hfi1_put_proc_affinity(fdata->rec_cpu_num); 767 768 /* clean up rcv side */ 769 hfi1_user_exp_rcv_free(fdata); 770 771 /* 772 * fdata->uctxt is used in the above cleanup. It is not ready to be 773 * removed until here. 774 */ 775 fdata->uctxt = NULL; 776 hfi1_rcd_put(uctxt); 777 778 /* 779 * Clear any left over, unhandled events so the next process that 780 * gets this context doesn't get confused. 781 */ 782 ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 783 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; 784 *ev = 0; 785 786 spin_lock_irqsave(&dd->uctxt_lock, flags); 787 __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); 788 if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 789 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 790 goto done; 791 } 792 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 793 794 /* 795 * Disable receive context and interrupt available, reset all 796 * RcvCtxtCtrl bits to default values. 797 */ 798 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 799 HFI1_RCVCTRL_TIDFLOW_DIS | 800 HFI1_RCVCTRL_INTRAVAIL_DIS | 801 HFI1_RCVCTRL_TAILUPD_DIS | 802 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 803 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 804 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 805 /* Clear the context's J_KEY */ 806 hfi1_clear_ctxt_jkey(dd, uctxt); 807 /* 808 * If a send context is allocated, reset context integrity 809 * checks to default and disable the send context. 810 */ 811 if (uctxt->sc) { 812 set_pio_integrity(uctxt->sc); 813 sc_disable(uctxt->sc); 814 } 815 816 hfi1_free_ctxt_rcv_groups(uctxt); 817 hfi1_clear_ctxt_pkey(dd, uctxt); 818 819 uctxt->event_flags = 0; 820 821 deallocate_ctxt(uctxt); 822 done: 823 mmdrop(fdata->mm); 824 kobject_put(&dd->kobj); 825 826 if (atomic_dec_and_test(&dd->user_refcount)) 827 complete(&dd->user_comp); 828 829 kfree(fdata); 830 return 0; 831 } 832 833 /* 834 * Convert kernel *virtual* addresses to physical addresses. 835 * This is used to vmalloc'ed addresses. 836 */ 837 static u64 kvirt_to_phys(void *addr) 838 { 839 struct page *page; 840 u64 paddr = 0; 841 842 page = vmalloc_to_page(addr); 843 if (page) 844 paddr = page_to_pfn(page) << PAGE_SHIFT; 845 846 return paddr; 847 } 848 849 /** 850 * complete_subctxt 851 * @fd: valid filedata pointer 852 * 853 * Sub-context info can only be set up after the base context 854 * has been completed. This is indicated by the clearing of the 855 * HFI1_CTXT_BASE_UINIT bit. 856 * 857 * Wait for the bit to be cleared, and then complete the subcontext 858 * initialization. 859 * 860 */ 861 static int complete_subctxt(struct hfi1_filedata *fd) 862 { 863 int ret; 864 unsigned long flags; 865 866 /* 867 * sub-context info can only be set up after the base context 868 * has been completed. 869 */ 870 ret = wait_event_interruptible( 871 fd->uctxt->wait, 872 !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); 873 874 if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) 875 ret = -ENOMEM; 876 877 /* Finish the sub-context init */ 878 if (!ret) { 879 fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); 880 ret = init_user_ctxt(fd, fd->uctxt); 881 } 882 883 if (ret) { 884 hfi1_rcd_put(fd->uctxt); 885 fd->uctxt = NULL; 886 spin_lock_irqsave(&fd->dd->uctxt_lock, flags); 887 __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); 888 spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); 889 } 890 891 return ret; 892 } 893 894 static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) 895 { 896 int ret; 897 unsigned int swmajor, swminor; 898 struct hfi1_ctxtdata *uctxt = NULL; 899 900 swmajor = uinfo->userversion >> 16; 901 if (swmajor != HFI1_USER_SWMAJOR) 902 return -ENODEV; 903 904 if (uinfo->subctxt_cnt > HFI1_MAX_SHARED_CTXTS) 905 return -EINVAL; 906 907 swminor = uinfo->userversion & 0xffff; 908 909 /* 910 * Acquire the mutex to protect against multiple creations of what 911 * could be a shared base context. 912 */ 913 mutex_lock(&hfi1_mutex); 914 /* 915 * Get a sub context if available (fd->uctxt will be set). 916 * ret < 0 error, 0 no context, 1 sub-context found 917 */ 918 ret = find_sub_ctxt(fd, uinfo); 919 920 /* 921 * Allocate a base context if context sharing is not required or a 922 * sub context wasn't found. 923 */ 924 if (!ret) 925 ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); 926 927 mutex_unlock(&hfi1_mutex); 928 929 /* Depending on the context type, finish the appropriate init */ 930 switch (ret) { 931 case 0: 932 ret = setup_base_ctxt(fd, uctxt); 933 if (uctxt->subctxt_cnt) { 934 /* 935 * Base context is done (successfully or not), notify 936 * anybody using a sub-context that is waiting for 937 * this completion. 938 */ 939 clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 940 wake_up(&uctxt->wait); 941 } 942 break; 943 case 1: 944 ret = complete_subctxt(fd); 945 break; 946 default: 947 break; 948 } 949 950 return ret; 951 } 952 953 /** 954 * match_ctxt 955 * @fd: valid filedata pointer 956 * @uinfo: user info to compare base context with 957 * @uctxt: context to compare uinfo to. 958 * 959 * Compare the given context with the given information to see if it 960 * can be used for a sub context. 961 */ 962 static int match_ctxt(struct hfi1_filedata *fd, 963 const struct hfi1_user_info *uinfo, 964 struct hfi1_ctxtdata *uctxt) 965 { 966 struct hfi1_devdata *dd = fd->dd; 967 unsigned long flags; 968 u16 subctxt; 969 970 /* Skip dynamically allocated kernel contexts */ 971 if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) 972 return 0; 973 974 /* Skip ctxt if it doesn't match the requested one */ 975 if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || 976 uctxt->jkey != generate_jkey(current_uid()) || 977 uctxt->subctxt_id != uinfo->subctxt_id || 978 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 979 return 0; 980 981 /* Verify the sharing process matches the base */ 982 if (uctxt->userversion != uinfo->userversion) 983 return -EINVAL; 984 985 /* Find an unused sub context */ 986 spin_lock_irqsave(&dd->uctxt_lock, flags); 987 if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 988 /* context is being closed, do not use */ 989 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 990 return 0; 991 } 992 993 subctxt = find_first_zero_bit(uctxt->in_use_ctxts, 994 HFI1_MAX_SHARED_CTXTS); 995 if (subctxt >= uctxt->subctxt_cnt) { 996 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 997 return -EBUSY; 998 } 999 1000 fd->subctxt = subctxt; 1001 __set_bit(fd->subctxt, uctxt->in_use_ctxts); 1002 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1003 1004 fd->uctxt = uctxt; 1005 hfi1_rcd_get(uctxt); 1006 1007 return 1; 1008 } 1009 1010 /** 1011 * find_sub_ctxt 1012 * @fd: valid filedata pointer 1013 * @uinfo: matching info to use to find a possible context to share. 1014 * 1015 * The hfi1_mutex must be held when this function is called. It is 1016 * necessary to ensure serialized creation of shared contexts. 1017 * 1018 * Return: 1019 * 0 No sub-context found 1020 * 1 Subcontext found and allocated 1021 * errno EINVAL (incorrect parameters) 1022 * EBUSY (all sub contexts in use) 1023 */ 1024 static int find_sub_ctxt(struct hfi1_filedata *fd, 1025 const struct hfi1_user_info *uinfo) 1026 { 1027 struct hfi1_ctxtdata *uctxt; 1028 struct hfi1_devdata *dd = fd->dd; 1029 u16 i; 1030 int ret; 1031 1032 if (!uinfo->subctxt_cnt) 1033 return 0; 1034 1035 for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { 1036 uctxt = hfi1_rcd_get_by_index(dd, i); 1037 if (uctxt) { 1038 ret = match_ctxt(fd, uinfo, uctxt); 1039 hfi1_rcd_put(uctxt); 1040 /* value of != 0 will return */ 1041 if (ret) 1042 return ret; 1043 } 1044 } 1045 1046 return 0; 1047 } 1048 1049 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 1050 struct hfi1_user_info *uinfo, 1051 struct hfi1_ctxtdata **rcd) 1052 { 1053 struct hfi1_ctxtdata *uctxt; 1054 int ret, numa; 1055 1056 if (dd->flags & HFI1_FROZEN) { 1057 /* 1058 * Pick an error that is unique from all other errors 1059 * that are returned so the user process knows that 1060 * it tried to allocate while the SPC was frozen. It 1061 * it should be able to retry with success in a short 1062 * while. 1063 */ 1064 return -EIO; 1065 } 1066 1067 if (!dd->freectxts) 1068 return -EBUSY; 1069 1070 /* 1071 * If we don't have a NUMA node requested, preference is towards 1072 * device NUMA node. 1073 */ 1074 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 1075 if (fd->rec_cpu_num != -1) 1076 numa = cpu_to_node(fd->rec_cpu_num); 1077 else 1078 numa = numa_node_id(); 1079 ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt); 1080 if (ret < 0) { 1081 dd_dev_err(dd, "user ctxtdata allocation failed\n"); 1082 return ret; 1083 } 1084 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 1085 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 1086 uctxt->numa_id); 1087 1088 /* 1089 * Allocate and enable a PIO send context. 1090 */ 1091 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node); 1092 if (!uctxt->sc) { 1093 ret = -ENOMEM; 1094 goto ctxdata_free; 1095 } 1096 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 1097 uctxt->sc->hw_context); 1098 ret = sc_enable(uctxt->sc); 1099 if (ret) 1100 goto ctxdata_free; 1101 1102 /* 1103 * Setup sub context information if the user-level has requested 1104 * sub contexts. 1105 * This has to be done here so the rest of the sub-contexts find the 1106 * proper base context. 1107 */ 1108 if (uinfo->subctxt_cnt) 1109 init_subctxts(uctxt, uinfo); 1110 uctxt->userversion = uinfo->userversion; 1111 uctxt->flags = hfi1_cap_mask; /* save current flag state */ 1112 init_waitqueue_head(&uctxt->wait); 1113 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 1114 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 1115 uctxt->jkey = generate_jkey(current_uid()); 1116 hfi1_stats.sps_ctxts++; 1117 /* 1118 * Disable ASPM when there are open user/PSM contexts to avoid 1119 * issues with ASPM L1 exit latency 1120 */ 1121 if (dd->freectxts-- == dd->num_user_contexts) 1122 aspm_disable_all(dd); 1123 1124 *rcd = uctxt; 1125 1126 return 0; 1127 1128 ctxdata_free: 1129 hfi1_free_ctxt(uctxt); 1130 return ret; 1131 } 1132 1133 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) 1134 { 1135 mutex_lock(&hfi1_mutex); 1136 hfi1_stats.sps_ctxts--; 1137 if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) 1138 aspm_enable_all(uctxt->dd); 1139 mutex_unlock(&hfi1_mutex); 1140 1141 hfi1_free_ctxt(uctxt); 1142 } 1143 1144 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 1145 const struct hfi1_user_info *uinfo) 1146 { 1147 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1148 uctxt->subctxt_id = uinfo->subctxt_id; 1149 set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1150 } 1151 1152 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1153 { 1154 int ret = 0; 1155 u16 num_subctxts = uctxt->subctxt_cnt; 1156 1157 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1158 if (!uctxt->subctxt_uregbase) 1159 return -ENOMEM; 1160 1161 /* We can take the size of the RcvHdr Queue from the master */ 1162 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * 1163 num_subctxts); 1164 if (!uctxt->subctxt_rcvhdr_base) { 1165 ret = -ENOMEM; 1166 goto bail_ureg; 1167 } 1168 1169 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1170 num_subctxts); 1171 if (!uctxt->subctxt_rcvegrbuf) { 1172 ret = -ENOMEM; 1173 goto bail_rhdr; 1174 } 1175 1176 return 0; 1177 1178 bail_rhdr: 1179 vfree(uctxt->subctxt_rcvhdr_base); 1180 uctxt->subctxt_rcvhdr_base = NULL; 1181 bail_ureg: 1182 vfree(uctxt->subctxt_uregbase); 1183 uctxt->subctxt_uregbase = NULL; 1184 1185 return ret; 1186 } 1187 1188 static void user_init(struct hfi1_ctxtdata *uctxt) 1189 { 1190 unsigned int rcvctrl_ops = 0; 1191 1192 /* initialize poll variables... */ 1193 uctxt->urgent = 0; 1194 uctxt->urgent_poll = 0; 1195 1196 /* 1197 * Now enable the ctxt for receive. 1198 * For chips that are set to DMA the tail register to memory 1199 * when they change (and when the update bit transitions from 1200 * 0 to 1. So for those chips, we turn it off and then back on. 1201 * This will (very briefly) affect any other open ctxts, but the 1202 * duration is very short, and therefore isn't an issue. We 1203 * explicitly set the in-memory tail copy to 0 beforehand, so we 1204 * don't have to wait to be sure the DMA update has happened 1205 * (chip resets head/tail to 0 on transition to enable). 1206 */ 1207 if (uctxt->rcvhdrtail_kvaddr) 1208 clear_rcvhdrtail(uctxt); 1209 1210 /* Setup J_KEY before enabling the context */ 1211 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); 1212 1213 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1214 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1215 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1216 /* 1217 * Ignore the bit in the flags for now until proper 1218 * support for multiple packet per rcv array entry is 1219 * added. 1220 */ 1221 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1222 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1223 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1224 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1225 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1226 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1227 /* 1228 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1229 * We can't rely on the correct value to be set from prior 1230 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1231 * for both cases. 1232 */ 1233 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1234 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1235 else 1236 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1237 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); 1238 } 1239 1240 static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, 1241 __u32 len) 1242 { 1243 struct hfi1_ctxt_info cinfo; 1244 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1245 int ret = 0; 1246 1247 memset(&cinfo, 0, sizeof(cinfo)); 1248 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1249 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1250 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1251 HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1252 /* adjust flag if this fd is not able to cache */ 1253 if (!fd->handler) 1254 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1255 1256 cinfo.num_active = hfi1_count_active_units(); 1257 cinfo.unit = uctxt->dd->unit; 1258 cinfo.ctxt = uctxt->ctxt; 1259 cinfo.subctxt = fd->subctxt; 1260 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1261 uctxt->dd->rcv_entries.group_size) + 1262 uctxt->expected_count; 1263 cinfo.credits = uctxt->sc->credits; 1264 cinfo.numa_node = uctxt->numa_id; 1265 cinfo.rec_cpu = fd->rec_cpu_num; 1266 cinfo.send_ctxt = uctxt->sc->hw_context; 1267 1268 cinfo.egrtids = uctxt->egrbufs.alloced; 1269 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 1270 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; 1271 cinfo.sdma_ring_size = fd->cq->nentries; 1272 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1273 1274 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1275 if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) 1276 ret = -EFAULT; 1277 1278 return ret; 1279 } 1280 1281 static int init_user_ctxt(struct hfi1_filedata *fd, 1282 struct hfi1_ctxtdata *uctxt) 1283 { 1284 int ret; 1285 1286 ret = hfi1_user_sdma_alloc_queues(uctxt, fd); 1287 if (ret) 1288 return ret; 1289 1290 ret = hfi1_user_exp_rcv_init(fd, uctxt); 1291 if (ret) 1292 hfi1_user_sdma_free_queues(fd, uctxt); 1293 1294 return ret; 1295 } 1296 1297 static int setup_base_ctxt(struct hfi1_filedata *fd, 1298 struct hfi1_ctxtdata *uctxt) 1299 { 1300 struct hfi1_devdata *dd = uctxt->dd; 1301 int ret = 0; 1302 1303 hfi1_init_ctxt(uctxt->sc); 1304 1305 /* Now allocate the RcvHdr queue and eager buffers. */ 1306 ret = hfi1_create_rcvhdrq(dd, uctxt); 1307 if (ret) 1308 return ret; 1309 1310 ret = hfi1_setup_eagerbufs(uctxt); 1311 if (ret) 1312 goto setup_failed; 1313 1314 /* If sub-contexts are enabled, do the appropriate setup */ 1315 if (uctxt->subctxt_cnt) 1316 ret = setup_subctxt(uctxt); 1317 if (ret) 1318 goto setup_failed; 1319 1320 ret = hfi1_alloc_ctxt_rcv_groups(uctxt); 1321 if (ret) 1322 goto setup_failed; 1323 1324 ret = init_user_ctxt(fd, uctxt); 1325 if (ret) 1326 goto setup_failed; 1327 1328 user_init(uctxt); 1329 1330 /* Now that the context is set up, the fd can get a reference. */ 1331 fd->uctxt = uctxt; 1332 hfi1_rcd_get(uctxt); 1333 1334 return 0; 1335 1336 setup_failed: 1337 /* Set the failed bit so sub-context init can do the right thing */ 1338 set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1339 deallocate_ctxt(uctxt); 1340 1341 return ret; 1342 } 1343 1344 static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, 1345 __u32 len) 1346 { 1347 struct hfi1_base_info binfo; 1348 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1349 struct hfi1_devdata *dd = uctxt->dd; 1350 ssize_t sz; 1351 unsigned offset; 1352 int ret = 0; 1353 1354 trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); 1355 1356 memset(&binfo, 0, sizeof(binfo)); 1357 binfo.hw_version = dd->revision; 1358 binfo.sw_version = HFI1_KERN_SWVERSION; 1359 binfo.bthqp = kdeth_qp; 1360 binfo.jkey = uctxt->jkey; 1361 /* 1362 * If more than 64 contexts are enabled the allocated credit 1363 * return will span two or three contiguous pages. Since we only 1364 * map the page containing the context's credit return address, 1365 * we need to calculate the offset in the proper page. 1366 */ 1367 offset = ((u64)uctxt->sc->hw_free - 1368 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1369 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1370 fd->subctxt, offset); 1371 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1372 fd->subctxt, 1373 uctxt->sc->base_addr); 1374 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1375 uctxt->ctxt, 1376 fd->subctxt, 1377 uctxt->sc->base_addr); 1378 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1379 fd->subctxt, 1380 uctxt->rcvhdrq); 1381 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1382 fd->subctxt, 1383 uctxt->egrbufs.rcvtids[0].dma); 1384 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1385 fd->subctxt, 0); 1386 /* 1387 * user regs are at 1388 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1389 */ 1390 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1391 fd->subctxt, 0); 1392 offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1393 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * 1394 sizeof(*dd->events)); 1395 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1396 fd->subctxt, 1397 offset); 1398 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1399 fd->subctxt, 1400 dd->status); 1401 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1402 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1403 fd->subctxt, 0); 1404 if (uctxt->subctxt_cnt) { 1405 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1406 uctxt->ctxt, 1407 fd->subctxt, 0); 1408 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1409 uctxt->ctxt, 1410 fd->subctxt, 0); 1411 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1412 uctxt->ctxt, 1413 fd->subctxt, 0); 1414 } 1415 sz = (len < sizeof(binfo)) ? len : sizeof(binfo); 1416 if (copy_to_user(ubase, &binfo, sz)) 1417 ret = -EFAULT; 1418 return ret; 1419 } 1420 1421 static unsigned int poll_urgent(struct file *fp, 1422 struct poll_table_struct *pt) 1423 { 1424 struct hfi1_filedata *fd = fp->private_data; 1425 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1426 struct hfi1_devdata *dd = uctxt->dd; 1427 unsigned pollflag; 1428 1429 poll_wait(fp, &uctxt->wait, pt); 1430 1431 spin_lock_irq(&dd->uctxt_lock); 1432 if (uctxt->urgent != uctxt->urgent_poll) { 1433 pollflag = POLLIN | POLLRDNORM; 1434 uctxt->urgent_poll = uctxt->urgent; 1435 } else { 1436 pollflag = 0; 1437 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1438 } 1439 spin_unlock_irq(&dd->uctxt_lock); 1440 1441 return pollflag; 1442 } 1443 1444 static unsigned int poll_next(struct file *fp, 1445 struct poll_table_struct *pt) 1446 { 1447 struct hfi1_filedata *fd = fp->private_data; 1448 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1449 struct hfi1_devdata *dd = uctxt->dd; 1450 unsigned pollflag; 1451 1452 poll_wait(fp, &uctxt->wait, pt); 1453 1454 spin_lock_irq(&dd->uctxt_lock); 1455 if (hdrqempty(uctxt)) { 1456 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1457 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); 1458 pollflag = 0; 1459 } else { 1460 pollflag = POLLIN | POLLRDNORM; 1461 } 1462 spin_unlock_irq(&dd->uctxt_lock); 1463 1464 return pollflag; 1465 } 1466 1467 /* 1468 * Find all user contexts in use, and set the specified bit in their 1469 * event mask. 1470 * See also find_ctxt() for a similar use, that is specific to send buffers. 1471 */ 1472 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1473 { 1474 struct hfi1_ctxtdata *uctxt; 1475 struct hfi1_devdata *dd = ppd->dd; 1476 u16 ctxt; 1477 1478 if (!dd->events) 1479 return -EINVAL; 1480 1481 for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; 1482 ctxt++) { 1483 uctxt = hfi1_rcd_get_by_index(dd, ctxt); 1484 if (uctxt) { 1485 unsigned long *evs = dd->events + 1486 (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1487 HFI1_MAX_SHARED_CTXTS; 1488 int i; 1489 /* 1490 * subctxt_cnt is 0 if not shared, so do base 1491 * separately, first, then remaining subctxt, if any 1492 */ 1493 set_bit(evtbit, evs); 1494 for (i = 1; i < uctxt->subctxt_cnt; i++) 1495 set_bit(evtbit, evs + i); 1496 hfi1_rcd_put(uctxt); 1497 } 1498 } 1499 1500 return 0; 1501 } 1502 1503 /** 1504 * manage_rcvq - manage a context's receive queue 1505 * @uctxt: the context 1506 * @subctxt: the sub-context 1507 * @start_stop: action to carry out 1508 * 1509 * start_stop == 0 disables receive on the context, for use in queue 1510 * overflow conditions. start_stop==1 re-enables, to be used to 1511 * re-init the software copy of the head register 1512 */ 1513 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1514 int start_stop) 1515 { 1516 struct hfi1_devdata *dd = uctxt->dd; 1517 unsigned int rcvctrl_op; 1518 1519 if (subctxt) 1520 goto bail; 1521 /* atomically clear receive enable ctxt. */ 1522 if (start_stop) { 1523 /* 1524 * On enable, force in-memory copy of the tail register to 1525 * 0, so that protocol code doesn't have to worry about 1526 * whether or not the chip has yet updated the in-memory 1527 * copy or not on return from the system call. The chip 1528 * always resets it's tail register back to 0 on a 1529 * transition from disabled to enabled. 1530 */ 1531 if (uctxt->rcvhdrtail_kvaddr) 1532 clear_rcvhdrtail(uctxt); 1533 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1534 } else { 1535 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1536 } 1537 hfi1_rcvctrl(dd, rcvctrl_op, uctxt); 1538 /* always; new head should be equal to new tail; see above */ 1539 bail: 1540 return 0; 1541 } 1542 1543 /* 1544 * clear the event notifier events for this context. 1545 * User process then performs actions appropriate to bit having been 1546 * set, if desired, and checks again in future. 1547 */ 1548 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1549 unsigned long events) 1550 { 1551 int i; 1552 struct hfi1_devdata *dd = uctxt->dd; 1553 unsigned long *evs; 1554 1555 if (!dd->events) 1556 return 0; 1557 1558 evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1559 HFI1_MAX_SHARED_CTXTS) + subctxt; 1560 1561 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1562 if (!test_bit(i, &events)) 1563 continue; 1564 clear_bit(i, evs); 1565 } 1566 return 0; 1567 } 1568 1569 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) 1570 { 1571 int ret = -ENOENT, i, intable = 0; 1572 struct hfi1_pportdata *ppd = uctxt->ppd; 1573 struct hfi1_devdata *dd = uctxt->dd; 1574 1575 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) { 1576 ret = -EINVAL; 1577 goto done; 1578 } 1579 1580 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1581 if (pkey == ppd->pkeys[i]) { 1582 intable = 1; 1583 break; 1584 } 1585 1586 if (intable) 1587 ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey); 1588 done: 1589 return ret; 1590 } 1591 1592 static void user_remove(struct hfi1_devdata *dd) 1593 { 1594 1595 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1596 } 1597 1598 static int user_add(struct hfi1_devdata *dd) 1599 { 1600 char name[10]; 1601 int ret; 1602 1603 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1604 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1605 &dd->user_cdev, &dd->user_device, 1606 true, &dd->kobj); 1607 if (ret) 1608 user_remove(dd); 1609 1610 return ret; 1611 } 1612 1613 /* 1614 * Create per-unit files in /dev 1615 */ 1616 int hfi1_device_create(struct hfi1_devdata *dd) 1617 { 1618 return user_add(dd); 1619 } 1620 1621 /* 1622 * Remove per-unit files in /dev 1623 * void, core kernel returns no errors for this stuff 1624 */ 1625 void hfi1_device_remove(struct hfi1_devdata *dd) 1626 { 1627 user_remove(dd); 1628 } 1629