1 /* 2 * Copyright(c) 2015-2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/poll.h> 48 #include <linux/cdev.h> 49 #include <linux/vmalloc.h> 50 #include <linux/io.h> 51 #include <linux/sched/mm.h> 52 #include <linux/bitmap.h> 53 54 #include <rdma/ib.h> 55 56 #include "hfi.h" 57 #include "pio.h" 58 #include "device.h" 59 #include "common.h" 60 #include "trace.h" 61 #include "mmu_rb.h" 62 #include "user_sdma.h" 63 #include "user_exp_rcv.h" 64 #include "aspm.h" 65 66 #undef pr_fmt 67 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 68 69 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 70 71 /* 72 * File operation functions 73 */ 74 static int hfi1_file_open(struct inode *inode, struct file *fp); 75 static int hfi1_file_close(struct inode *inode, struct file *fp); 76 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); 77 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); 78 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); 79 80 static u64 kvirt_to_phys(void *addr); 81 static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); 82 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 83 const struct hfi1_user_info *uinfo); 84 static int init_user_ctxt(struct hfi1_filedata *fd, 85 struct hfi1_ctxtdata *uctxt); 86 static void user_init(struct hfi1_ctxtdata *uctxt); 87 static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, 88 __u32 len); 89 static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, 90 __u32 len); 91 static int setup_base_ctxt(struct hfi1_filedata *fd, 92 struct hfi1_ctxtdata *uctxt); 93 static int setup_subctxt(struct hfi1_ctxtdata *uctxt); 94 95 static int find_sub_ctxt(struct hfi1_filedata *fd, 96 const struct hfi1_user_info *uinfo); 97 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 98 struct hfi1_user_info *uinfo, 99 struct hfi1_ctxtdata **cd); 100 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); 101 static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); 102 static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); 103 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 104 unsigned long events); 105 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); 106 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 107 int start_stop); 108 static int vma_fault(struct vm_fault *vmf); 109 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 110 unsigned long arg); 111 112 static const struct file_operations hfi1_file_ops = { 113 .owner = THIS_MODULE, 114 .write_iter = hfi1_write_iter, 115 .open = hfi1_file_open, 116 .release = hfi1_file_close, 117 .unlocked_ioctl = hfi1_file_ioctl, 118 .poll = hfi1_poll, 119 .mmap = hfi1_file_mmap, 120 .llseek = noop_llseek, 121 }; 122 123 static const struct vm_operations_struct vm_ops = { 124 .fault = vma_fault, 125 }; 126 127 /* 128 * Types of memories mapped into user processes' space 129 */ 130 enum mmap_types { 131 PIO_BUFS = 1, 132 PIO_BUFS_SOP, 133 PIO_CRED, 134 RCV_HDRQ, 135 RCV_EGRBUF, 136 UREGS, 137 EVENTS, 138 STATUS, 139 RTAIL, 140 SUBCTXT_UREGS, 141 SUBCTXT_RCV_HDRQ, 142 SUBCTXT_EGRBUF, 143 SDMA_COMP 144 }; 145 146 /* 147 * Masks and offsets defining the mmap tokens 148 */ 149 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 150 #define HFI1_MMAP_OFFSET_SHIFT 0 151 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 152 #define HFI1_MMAP_SUBCTXT_SHIFT 12 153 #define HFI1_MMAP_CTXT_MASK 0xffULL 154 #define HFI1_MMAP_CTXT_SHIFT 16 155 #define HFI1_MMAP_TYPE_MASK 0xfULL 156 #define HFI1_MMAP_TYPE_SHIFT 24 157 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 158 #define HFI1_MMAP_MAGIC_SHIFT 32 159 160 #define HFI1_MMAP_MAGIC 0xdabbad00 161 162 #define HFI1_MMAP_TOKEN_SET(field, val) \ 163 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 164 #define HFI1_MMAP_TOKEN_GET(field, token) \ 165 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 166 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 167 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 168 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 169 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 170 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 171 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 172 173 #define dbg(fmt, ...) \ 174 pr_info(fmt, ##__VA_ARGS__) 175 176 static inline int is_valid_mmap(u64 token) 177 { 178 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 179 } 180 181 static int hfi1_file_open(struct inode *inode, struct file *fp) 182 { 183 struct hfi1_filedata *fd; 184 struct hfi1_devdata *dd = container_of(inode->i_cdev, 185 struct hfi1_devdata, 186 user_cdev); 187 188 if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) 189 return -EINVAL; 190 191 if (!atomic_inc_not_zero(&dd->user_refcount)) 192 return -ENXIO; 193 194 /* Just take a ref now. Not all opens result in a context assign */ 195 kobject_get(&dd->kobj); 196 197 /* The real work is performed later in assign_ctxt() */ 198 199 fd = kzalloc(sizeof(*fd), GFP_KERNEL); 200 201 if (fd) { 202 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 203 fd->mm = current->mm; 204 mmgrab(fd->mm); 205 fd->dd = dd; 206 fp->private_data = fd; 207 } else { 208 fp->private_data = NULL; 209 210 if (atomic_dec_and_test(&dd->user_refcount)) 211 complete(&dd->user_comp); 212 213 return -ENOMEM; 214 } 215 216 return 0; 217 } 218 219 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 220 unsigned long arg) 221 { 222 struct hfi1_filedata *fd = fp->private_data; 223 struct hfi1_ctxtdata *uctxt = fd->uctxt; 224 struct hfi1_user_info uinfo; 225 struct hfi1_tid_info tinfo; 226 int ret = 0; 227 unsigned long addr; 228 int uval = 0; 229 unsigned long ul_uval = 0; 230 u16 uval16 = 0; 231 232 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 233 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 234 cmd != HFI1_IOCTL_GET_VERS && 235 !uctxt) 236 return -EINVAL; 237 238 switch (cmd) { 239 case HFI1_IOCTL_ASSIGN_CTXT: 240 if (uctxt) 241 return -EINVAL; 242 243 if (copy_from_user(&uinfo, 244 (struct hfi1_user_info __user *)arg, 245 sizeof(uinfo))) 246 return -EFAULT; 247 248 ret = assign_ctxt(fd, &uinfo); 249 break; 250 case HFI1_IOCTL_CTXT_INFO: 251 ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, 252 sizeof(struct hfi1_ctxt_info)); 253 break; 254 case HFI1_IOCTL_USER_INFO: 255 ret = get_base_info(fd, (void __user *)(unsigned long)arg, 256 sizeof(struct hfi1_base_info)); 257 break; 258 case HFI1_IOCTL_CREDIT_UPD: 259 if (uctxt) 260 sc_return_credits(uctxt->sc); 261 break; 262 263 case HFI1_IOCTL_TID_UPDATE: 264 if (copy_from_user(&tinfo, 265 (struct hfi11_tid_info __user *)arg, 266 sizeof(tinfo))) 267 return -EFAULT; 268 269 ret = hfi1_user_exp_rcv_setup(fd, &tinfo); 270 if (!ret) { 271 /* 272 * Copy the number of tidlist entries we used 273 * and the length of the buffer we registered. 274 */ 275 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 276 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 277 sizeof(tinfo.tidcnt))) 278 return -EFAULT; 279 280 addr = arg + offsetof(struct hfi1_tid_info, length); 281 if (copy_to_user((void __user *)addr, &tinfo.length, 282 sizeof(tinfo.length))) 283 ret = -EFAULT; 284 } 285 break; 286 287 case HFI1_IOCTL_TID_FREE: 288 if (copy_from_user(&tinfo, 289 (struct hfi11_tid_info __user *)arg, 290 sizeof(tinfo))) 291 return -EFAULT; 292 293 ret = hfi1_user_exp_rcv_clear(fd, &tinfo); 294 if (ret) 295 break; 296 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 297 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 298 sizeof(tinfo.tidcnt))) 299 ret = -EFAULT; 300 break; 301 302 case HFI1_IOCTL_TID_INVAL_READ: 303 if (copy_from_user(&tinfo, 304 (struct hfi11_tid_info __user *)arg, 305 sizeof(tinfo))) 306 return -EFAULT; 307 308 ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); 309 if (ret) 310 break; 311 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 312 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 313 sizeof(tinfo.tidcnt))) 314 ret = -EFAULT; 315 break; 316 317 case HFI1_IOCTL_RECV_CTRL: 318 ret = get_user(uval, (int __user *)arg); 319 if (ret != 0) 320 return -EFAULT; 321 ret = manage_rcvq(uctxt, fd->subctxt, uval); 322 break; 323 324 case HFI1_IOCTL_POLL_TYPE: 325 ret = get_user(uval, (int __user *)arg); 326 if (ret != 0) 327 return -EFAULT; 328 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 329 break; 330 331 case HFI1_IOCTL_ACK_EVENT: 332 ret = get_user(ul_uval, (unsigned long __user *)arg); 333 if (ret != 0) 334 return -EFAULT; 335 ret = user_event_ack(uctxt, fd->subctxt, ul_uval); 336 break; 337 338 case HFI1_IOCTL_SET_PKEY: 339 ret = get_user(uval16, (u16 __user *)arg); 340 if (ret != 0) 341 return -EFAULT; 342 if (HFI1_CAP_IS_USET(PKEY_CHECK)) 343 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); 344 else 345 return -EPERM; 346 break; 347 348 case HFI1_IOCTL_CTXT_RESET: { 349 struct send_context *sc; 350 struct hfi1_devdata *dd; 351 352 if (!uctxt || !uctxt->dd || !uctxt->sc) 353 return -EINVAL; 354 355 /* 356 * There is no protection here. User level has to 357 * guarantee that no one will be writing to the send 358 * context while it is being re-initialized. 359 * If user level breaks that guarantee, it will break 360 * it's own context and no one else's. 361 */ 362 dd = uctxt->dd; 363 sc = uctxt->sc; 364 /* 365 * Wait until the interrupt handler has marked the 366 * context as halted or frozen. Report error if we time 367 * out. 368 */ 369 wait_event_interruptible_timeout( 370 sc->halt_wait, (sc->flags & SCF_HALTED), 371 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 372 if (!(sc->flags & SCF_HALTED)) 373 return -ENOLCK; 374 375 /* 376 * If the send context was halted due to a Freeze, 377 * wait until the device has been "unfrozen" before 378 * resetting the context. 379 */ 380 if (sc->flags & SCF_FROZEN) { 381 wait_event_interruptible_timeout( 382 dd->event_queue, 383 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), 384 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 385 if (dd->flags & HFI1_FROZEN) 386 return -ENOLCK; 387 388 if (dd->flags & HFI1_FORCED_FREEZE) 389 /* 390 * Don't allow context reset if we are into 391 * forced freeze 392 */ 393 return -ENODEV; 394 395 sc_disable(sc); 396 ret = sc_enable(sc); 397 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); 398 } else { 399 ret = sc_restart(sc); 400 } 401 if (!ret) 402 sc_return_credits(sc); 403 break; 404 } 405 406 case HFI1_IOCTL_GET_VERS: 407 uval = HFI1_USER_SWVERSION; 408 if (put_user(uval, (int __user *)arg)) 409 return -EFAULT; 410 break; 411 412 default: 413 return -EINVAL; 414 } 415 416 return ret; 417 } 418 419 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 420 { 421 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 422 struct hfi1_user_sdma_pkt_q *pq = fd->pq; 423 struct hfi1_user_sdma_comp_q *cq = fd->cq; 424 int done = 0, reqs = 0; 425 unsigned long dim = from->nr_segs; 426 427 if (!cq || !pq) 428 return -EIO; 429 430 if (!iter_is_iovec(from) || !dim) 431 return -EINVAL; 432 433 trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); 434 435 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) 436 return -ENOSPC; 437 438 while (dim) { 439 int ret; 440 unsigned long count = 0; 441 442 ret = hfi1_user_sdma_process_request( 443 fd, (struct iovec *)(from->iov + done), 444 dim, &count); 445 if (ret) { 446 reqs = ret; 447 break; 448 } 449 dim -= count; 450 done += count; 451 reqs++; 452 } 453 454 return reqs; 455 } 456 457 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 458 { 459 struct hfi1_filedata *fd = fp->private_data; 460 struct hfi1_ctxtdata *uctxt = fd->uctxt; 461 struct hfi1_devdata *dd; 462 unsigned long flags; 463 u64 token = vma->vm_pgoff << PAGE_SHIFT, 464 memaddr = 0; 465 void *memvirt = NULL; 466 u8 subctxt, mapio = 0, vmf = 0, type; 467 ssize_t memlen = 0; 468 int ret = 0; 469 u16 ctxt; 470 471 if (!is_valid_mmap(token) || !uctxt || 472 !(vma->vm_flags & VM_SHARED)) { 473 ret = -EINVAL; 474 goto done; 475 } 476 dd = uctxt->dd; 477 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 478 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 479 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 480 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 481 ret = -EINVAL; 482 goto done; 483 } 484 485 flags = vma->vm_flags; 486 487 switch (type) { 488 case PIO_BUFS: 489 case PIO_BUFS_SOP: 490 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 491 /* chip pio base */ 492 (uctxt->sc->hw_context * BIT(16))) + 493 /* 64K PIO space / ctxt */ 494 (type == PIO_BUFS_SOP ? 495 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 496 /* 497 * Map only the amount allocated to the context, not the 498 * entire available context's PIO space. 499 */ 500 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 501 flags &= ~VM_MAYREAD; 502 flags |= VM_DONTCOPY | VM_DONTEXPAND; 503 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 504 mapio = 1; 505 break; 506 case PIO_CRED: 507 if (flags & VM_WRITE) { 508 ret = -EPERM; 509 goto done; 510 } 511 /* 512 * The credit return location for this context could be on the 513 * second or third page allocated for credit returns (if number 514 * of enabled contexts > 64 and 128 respectively). 515 */ 516 memvirt = dd->cr_base[uctxt->numa_id].va; 517 memaddr = virt_to_phys(memvirt) + 518 (((u64)uctxt->sc->hw_free - 519 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); 520 memlen = PAGE_SIZE; 521 flags &= ~VM_MAYWRITE; 522 flags |= VM_DONTCOPY | VM_DONTEXPAND; 523 /* 524 * The driver has already allocated memory for credit 525 * returns and programmed it into the chip. Has that 526 * memory been flagged as non-cached? 527 */ 528 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 529 mapio = 1; 530 break; 531 case RCV_HDRQ: 532 memlen = uctxt->rcvhdrq_size; 533 memvirt = uctxt->rcvhdrq; 534 break; 535 case RCV_EGRBUF: { 536 unsigned long addr; 537 int i; 538 /* 539 * The RcvEgr buffer need to be handled differently 540 * as multiple non-contiguous pages need to be mapped 541 * into the user process. 542 */ 543 memlen = uctxt->egrbufs.size; 544 if ((vma->vm_end - vma->vm_start) != memlen) { 545 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 546 (vma->vm_end - vma->vm_start), memlen); 547 ret = -EINVAL; 548 goto done; 549 } 550 if (vma->vm_flags & VM_WRITE) { 551 ret = -EPERM; 552 goto done; 553 } 554 vma->vm_flags &= ~VM_MAYWRITE; 555 addr = vma->vm_start; 556 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 557 memlen = uctxt->egrbufs.buffers[i].len; 558 memvirt = uctxt->egrbufs.buffers[i].addr; 559 ret = remap_pfn_range( 560 vma, addr, 561 /* 562 * virt_to_pfn() does the same, but 563 * it's not available on x86_64 564 * when CONFIG_MMU is enabled. 565 */ 566 PFN_DOWN(__pa(memvirt)), 567 memlen, 568 vma->vm_page_prot); 569 if (ret < 0) 570 goto done; 571 addr += memlen; 572 } 573 ret = 0; 574 goto done; 575 } 576 case UREGS: 577 /* 578 * Map only the page that contains this context's user 579 * registers. 580 */ 581 memaddr = (unsigned long) 582 (dd->physaddr + RXE_PER_CONTEXT_USER) 583 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 584 /* 585 * TidFlow table is on the same page as the rest of the 586 * user registers. 587 */ 588 memlen = PAGE_SIZE; 589 flags |= VM_DONTCOPY | VM_DONTEXPAND; 590 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 591 mapio = 1; 592 break; 593 case EVENTS: 594 /* 595 * Use the page where this context's flags are. User level 596 * knows where it's own bitmap is within the page. 597 */ 598 memaddr = (unsigned long)(dd->events + 599 ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 600 HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; 601 memlen = PAGE_SIZE; 602 /* 603 * v3.7 removes VM_RESERVED but the effect is kept by 604 * using VM_IO. 605 */ 606 flags |= VM_IO | VM_DONTEXPAND; 607 vmf = 1; 608 break; 609 case STATUS: 610 if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { 611 ret = -EPERM; 612 goto done; 613 } 614 memaddr = kvirt_to_phys((void *)dd->status); 615 memlen = PAGE_SIZE; 616 flags |= VM_IO | VM_DONTEXPAND; 617 break; 618 case RTAIL: 619 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 620 /* 621 * If the memory allocation failed, the context alloc 622 * also would have failed, so we would never get here 623 */ 624 ret = -EINVAL; 625 goto done; 626 } 627 if (flags & VM_WRITE) { 628 ret = -EPERM; 629 goto done; 630 } 631 memlen = PAGE_SIZE; 632 memvirt = (void *)uctxt->rcvhdrtail_kvaddr; 633 flags &= ~VM_MAYWRITE; 634 break; 635 case SUBCTXT_UREGS: 636 memaddr = (u64)uctxt->subctxt_uregbase; 637 memlen = PAGE_SIZE; 638 flags |= VM_IO | VM_DONTEXPAND; 639 vmf = 1; 640 break; 641 case SUBCTXT_RCV_HDRQ: 642 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 643 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; 644 flags |= VM_IO | VM_DONTEXPAND; 645 vmf = 1; 646 break; 647 case SUBCTXT_EGRBUF: 648 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 649 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 650 flags |= VM_IO | VM_DONTEXPAND; 651 flags &= ~VM_MAYWRITE; 652 vmf = 1; 653 break; 654 case SDMA_COMP: { 655 struct hfi1_user_sdma_comp_q *cq = fd->cq; 656 657 if (!cq) { 658 ret = -EFAULT; 659 goto done; 660 } 661 memaddr = (u64)cq->comps; 662 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 663 flags |= VM_IO | VM_DONTEXPAND; 664 vmf = 1; 665 break; 666 } 667 default: 668 ret = -EINVAL; 669 break; 670 } 671 672 if ((vma->vm_end - vma->vm_start) != memlen) { 673 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 674 uctxt->ctxt, fd->subctxt, 675 (vma->vm_end - vma->vm_start), memlen); 676 ret = -EINVAL; 677 goto done; 678 } 679 680 vma->vm_flags = flags; 681 hfi1_cdbg(PROC, 682 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", 683 ctxt, subctxt, type, mapio, vmf, memaddr, memlen, 684 vma->vm_end - vma->vm_start, vma->vm_flags); 685 if (vmf) { 686 vma->vm_pgoff = PFN_DOWN(memaddr); 687 vma->vm_ops = &vm_ops; 688 ret = 0; 689 } else if (mapio) { 690 ret = io_remap_pfn_range(vma, vma->vm_start, 691 PFN_DOWN(memaddr), 692 memlen, 693 vma->vm_page_prot); 694 } else if (memvirt) { 695 ret = remap_pfn_range(vma, vma->vm_start, 696 PFN_DOWN(__pa(memvirt)), 697 memlen, 698 vma->vm_page_prot); 699 } else { 700 ret = remap_pfn_range(vma, vma->vm_start, 701 PFN_DOWN(memaddr), 702 memlen, 703 vma->vm_page_prot); 704 } 705 done: 706 return ret; 707 } 708 709 /* 710 * Local (non-chip) user memory is not mapped right away but as it is 711 * accessed by the user-level code. 712 */ 713 static int vma_fault(struct vm_fault *vmf) 714 { 715 struct page *page; 716 717 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 718 if (!page) 719 return VM_FAULT_SIGBUS; 720 721 get_page(page); 722 vmf->page = page; 723 724 return 0; 725 } 726 727 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt) 728 { 729 struct hfi1_ctxtdata *uctxt; 730 unsigned pollflag; 731 732 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 733 if (!uctxt) 734 pollflag = POLLERR; 735 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 736 pollflag = poll_urgent(fp, pt); 737 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 738 pollflag = poll_next(fp, pt); 739 else /* invalid */ 740 pollflag = POLLERR; 741 742 return pollflag; 743 } 744 745 static int hfi1_file_close(struct inode *inode, struct file *fp) 746 { 747 struct hfi1_filedata *fdata = fp->private_data; 748 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 749 struct hfi1_devdata *dd = container_of(inode->i_cdev, 750 struct hfi1_devdata, 751 user_cdev); 752 unsigned long flags, *ev; 753 754 fp->private_data = NULL; 755 756 if (!uctxt) 757 goto done; 758 759 hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 760 761 flush_wc(); 762 /* drain user sdma queue */ 763 hfi1_user_sdma_free_queues(fdata, uctxt); 764 765 /* release the cpu */ 766 hfi1_put_proc_affinity(fdata->rec_cpu_num); 767 768 /* clean up rcv side */ 769 hfi1_user_exp_rcv_free(fdata); 770 771 /* 772 * fdata->uctxt is used in the above cleanup. It is not ready to be 773 * removed until here. 774 */ 775 fdata->uctxt = NULL; 776 hfi1_rcd_put(uctxt); 777 778 /* 779 * Clear any left over, unhandled events so the next process that 780 * gets this context doesn't get confused. 781 */ 782 ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 783 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; 784 *ev = 0; 785 786 spin_lock_irqsave(&dd->uctxt_lock, flags); 787 __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); 788 if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 789 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 790 goto done; 791 } 792 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 793 794 /* 795 * Disable receive context and interrupt available, reset all 796 * RcvCtxtCtrl bits to default values. 797 */ 798 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 799 HFI1_RCVCTRL_TIDFLOW_DIS | 800 HFI1_RCVCTRL_INTRAVAIL_DIS | 801 HFI1_RCVCTRL_TAILUPD_DIS | 802 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 803 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 804 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 805 /* Clear the context's J_KEY */ 806 hfi1_clear_ctxt_jkey(dd, uctxt); 807 /* 808 * If a send context is allocated, reset context integrity 809 * checks to default and disable the send context. 810 */ 811 if (uctxt->sc) { 812 set_pio_integrity(uctxt->sc); 813 sc_disable(uctxt->sc); 814 } 815 816 hfi1_free_ctxt_rcv_groups(uctxt); 817 hfi1_clear_ctxt_pkey(dd, uctxt); 818 819 uctxt->event_flags = 0; 820 821 deallocate_ctxt(uctxt); 822 done: 823 mmdrop(fdata->mm); 824 kobject_put(&dd->kobj); 825 826 if (atomic_dec_and_test(&dd->user_refcount)) 827 complete(&dd->user_comp); 828 829 kfree(fdata); 830 return 0; 831 } 832 833 /* 834 * Convert kernel *virtual* addresses to physical addresses. 835 * This is used to vmalloc'ed addresses. 836 */ 837 static u64 kvirt_to_phys(void *addr) 838 { 839 struct page *page; 840 u64 paddr = 0; 841 842 page = vmalloc_to_page(addr); 843 if (page) 844 paddr = page_to_pfn(page) << PAGE_SHIFT; 845 846 return paddr; 847 } 848 849 /** 850 * complete_subctxt 851 * @fd: valid filedata pointer 852 * 853 * Sub-context info can only be set up after the base context 854 * has been completed. This is indicated by the clearing of the 855 * HFI1_CTXT_BASE_UINIT bit. 856 * 857 * Wait for the bit to be cleared, and then complete the subcontext 858 * initialization. 859 * 860 */ 861 static int complete_subctxt(struct hfi1_filedata *fd) 862 { 863 int ret; 864 unsigned long flags; 865 866 /* 867 * sub-context info can only be set up after the base context 868 * has been completed. 869 */ 870 ret = wait_event_interruptible( 871 fd->uctxt->wait, 872 !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); 873 874 if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) 875 ret = -ENOMEM; 876 877 /* Finish the sub-context init */ 878 if (!ret) { 879 fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); 880 ret = init_user_ctxt(fd, fd->uctxt); 881 } 882 883 if (ret) { 884 hfi1_rcd_put(fd->uctxt); 885 fd->uctxt = NULL; 886 spin_lock_irqsave(&fd->dd->uctxt_lock, flags); 887 __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); 888 spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); 889 } 890 891 return ret; 892 } 893 894 static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) 895 { 896 int ret; 897 unsigned int swmajor, swminor; 898 struct hfi1_ctxtdata *uctxt = NULL; 899 900 swmajor = uinfo->userversion >> 16; 901 if (swmajor != HFI1_USER_SWMAJOR) 902 return -ENODEV; 903 904 if (uinfo->subctxt_cnt > HFI1_MAX_SHARED_CTXTS) 905 return -EINVAL; 906 907 swminor = uinfo->userversion & 0xffff; 908 909 /* 910 * Acquire the mutex to protect against multiple creations of what 911 * could be a shared base context. 912 */ 913 mutex_lock(&hfi1_mutex); 914 /* 915 * Get a sub context if available (fd->uctxt will be set). 916 * ret < 0 error, 0 no context, 1 sub-context found 917 */ 918 ret = find_sub_ctxt(fd, uinfo); 919 920 /* 921 * Allocate a base context if context sharing is not required or a 922 * sub context wasn't found. 923 */ 924 if (!ret) 925 ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); 926 927 mutex_unlock(&hfi1_mutex); 928 929 /* Depending on the context type, finish the appropriate init */ 930 switch (ret) { 931 case 0: 932 ret = setup_base_ctxt(fd, uctxt); 933 if (ret) 934 deallocate_ctxt(uctxt); 935 break; 936 case 1: 937 ret = complete_subctxt(fd); 938 break; 939 default: 940 break; 941 } 942 943 return ret; 944 } 945 946 /** 947 * match_ctxt 948 * @fd: valid filedata pointer 949 * @uinfo: user info to compare base context with 950 * @uctxt: context to compare uinfo to. 951 * 952 * Compare the given context with the given information to see if it 953 * can be used for a sub context. 954 */ 955 static int match_ctxt(struct hfi1_filedata *fd, 956 const struct hfi1_user_info *uinfo, 957 struct hfi1_ctxtdata *uctxt) 958 { 959 struct hfi1_devdata *dd = fd->dd; 960 unsigned long flags; 961 u16 subctxt; 962 963 /* Skip dynamically allocated kernel contexts */ 964 if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) 965 return 0; 966 967 /* Skip ctxt if it doesn't match the requested one */ 968 if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || 969 uctxt->jkey != generate_jkey(current_uid()) || 970 uctxt->subctxt_id != uinfo->subctxt_id || 971 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 972 return 0; 973 974 /* Verify the sharing process matches the base */ 975 if (uctxt->userversion != uinfo->userversion) 976 return -EINVAL; 977 978 /* Find an unused sub context */ 979 spin_lock_irqsave(&dd->uctxt_lock, flags); 980 if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 981 /* context is being closed, do not use */ 982 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 983 return 0; 984 } 985 986 subctxt = find_first_zero_bit(uctxt->in_use_ctxts, 987 HFI1_MAX_SHARED_CTXTS); 988 if (subctxt >= uctxt->subctxt_cnt) { 989 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 990 return -EBUSY; 991 } 992 993 fd->subctxt = subctxt; 994 __set_bit(fd->subctxt, uctxt->in_use_ctxts); 995 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 996 997 fd->uctxt = uctxt; 998 hfi1_rcd_get(uctxt); 999 1000 return 1; 1001 } 1002 1003 /** 1004 * find_sub_ctxt 1005 * @fd: valid filedata pointer 1006 * @uinfo: matching info to use to find a possible context to share. 1007 * 1008 * The hfi1_mutex must be held when this function is called. It is 1009 * necessary to ensure serialized creation of shared contexts. 1010 * 1011 * Return: 1012 * 0 No sub-context found 1013 * 1 Subcontext found and allocated 1014 * errno EINVAL (incorrect parameters) 1015 * EBUSY (all sub contexts in use) 1016 */ 1017 static int find_sub_ctxt(struct hfi1_filedata *fd, 1018 const struct hfi1_user_info *uinfo) 1019 { 1020 struct hfi1_ctxtdata *uctxt; 1021 struct hfi1_devdata *dd = fd->dd; 1022 u16 i; 1023 int ret; 1024 1025 if (!uinfo->subctxt_cnt) 1026 return 0; 1027 1028 for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { 1029 uctxt = hfi1_rcd_get_by_index(dd, i); 1030 if (uctxt) { 1031 ret = match_ctxt(fd, uinfo, uctxt); 1032 hfi1_rcd_put(uctxt); 1033 /* value of != 0 will return */ 1034 if (ret) 1035 return ret; 1036 } 1037 } 1038 1039 return 0; 1040 } 1041 1042 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 1043 struct hfi1_user_info *uinfo, 1044 struct hfi1_ctxtdata **rcd) 1045 { 1046 struct hfi1_ctxtdata *uctxt; 1047 int ret, numa; 1048 1049 if (dd->flags & HFI1_FROZEN) { 1050 /* 1051 * Pick an error that is unique from all other errors 1052 * that are returned so the user process knows that 1053 * it tried to allocate while the SPC was frozen. It 1054 * it should be able to retry with success in a short 1055 * while. 1056 */ 1057 return -EIO; 1058 } 1059 1060 if (!dd->freectxts) 1061 return -EBUSY; 1062 1063 /* 1064 * If we don't have a NUMA node requested, preference is towards 1065 * device NUMA node. 1066 */ 1067 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 1068 if (fd->rec_cpu_num != -1) 1069 numa = cpu_to_node(fd->rec_cpu_num); 1070 else 1071 numa = numa_node_id(); 1072 ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt); 1073 if (ret < 0) { 1074 dd_dev_err(dd, "user ctxtdata allocation failed\n"); 1075 return ret; 1076 } 1077 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 1078 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 1079 uctxt->numa_id); 1080 1081 /* 1082 * Allocate and enable a PIO send context. 1083 */ 1084 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node); 1085 if (!uctxt->sc) { 1086 ret = -ENOMEM; 1087 goto ctxdata_free; 1088 } 1089 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 1090 uctxt->sc->hw_context); 1091 ret = sc_enable(uctxt->sc); 1092 if (ret) 1093 goto ctxdata_free; 1094 1095 /* 1096 * Setup sub context information if the user-level has requested 1097 * sub contexts. 1098 * This has to be done here so the rest of the sub-contexts find the 1099 * proper base context. 1100 */ 1101 if (uinfo->subctxt_cnt) 1102 init_subctxts(uctxt, uinfo); 1103 uctxt->userversion = uinfo->userversion; 1104 uctxt->flags = hfi1_cap_mask; /* save current flag state */ 1105 init_waitqueue_head(&uctxt->wait); 1106 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 1107 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 1108 uctxt->jkey = generate_jkey(current_uid()); 1109 hfi1_stats.sps_ctxts++; 1110 /* 1111 * Disable ASPM when there are open user/PSM contexts to avoid 1112 * issues with ASPM L1 exit latency 1113 */ 1114 if (dd->freectxts-- == dd->num_user_contexts) 1115 aspm_disable_all(dd); 1116 1117 *rcd = uctxt; 1118 1119 return 0; 1120 1121 ctxdata_free: 1122 hfi1_free_ctxt(uctxt); 1123 return ret; 1124 } 1125 1126 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) 1127 { 1128 mutex_lock(&hfi1_mutex); 1129 hfi1_stats.sps_ctxts--; 1130 if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) 1131 aspm_enable_all(uctxt->dd); 1132 mutex_unlock(&hfi1_mutex); 1133 1134 hfi1_free_ctxt(uctxt); 1135 } 1136 1137 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 1138 const struct hfi1_user_info *uinfo) 1139 { 1140 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1141 uctxt->subctxt_id = uinfo->subctxt_id; 1142 set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1143 } 1144 1145 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1146 { 1147 int ret = 0; 1148 u16 num_subctxts = uctxt->subctxt_cnt; 1149 1150 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1151 if (!uctxt->subctxt_uregbase) 1152 return -ENOMEM; 1153 1154 /* We can take the size of the RcvHdr Queue from the master */ 1155 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * 1156 num_subctxts); 1157 if (!uctxt->subctxt_rcvhdr_base) { 1158 ret = -ENOMEM; 1159 goto bail_ureg; 1160 } 1161 1162 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1163 num_subctxts); 1164 if (!uctxt->subctxt_rcvegrbuf) { 1165 ret = -ENOMEM; 1166 goto bail_rhdr; 1167 } 1168 1169 return 0; 1170 1171 bail_rhdr: 1172 vfree(uctxt->subctxt_rcvhdr_base); 1173 uctxt->subctxt_rcvhdr_base = NULL; 1174 bail_ureg: 1175 vfree(uctxt->subctxt_uregbase); 1176 uctxt->subctxt_uregbase = NULL; 1177 1178 return ret; 1179 } 1180 1181 static void user_init(struct hfi1_ctxtdata *uctxt) 1182 { 1183 unsigned int rcvctrl_ops = 0; 1184 1185 /* initialize poll variables... */ 1186 uctxt->urgent = 0; 1187 uctxt->urgent_poll = 0; 1188 1189 /* 1190 * Now enable the ctxt for receive. 1191 * For chips that are set to DMA the tail register to memory 1192 * when they change (and when the update bit transitions from 1193 * 0 to 1. So for those chips, we turn it off and then back on. 1194 * This will (very briefly) affect any other open ctxts, but the 1195 * duration is very short, and therefore isn't an issue. We 1196 * explicitly set the in-memory tail copy to 0 beforehand, so we 1197 * don't have to wait to be sure the DMA update has happened 1198 * (chip resets head/tail to 0 on transition to enable). 1199 */ 1200 if (uctxt->rcvhdrtail_kvaddr) 1201 clear_rcvhdrtail(uctxt); 1202 1203 /* Setup J_KEY before enabling the context */ 1204 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); 1205 1206 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1207 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1208 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1209 /* 1210 * Ignore the bit in the flags for now until proper 1211 * support for multiple packet per rcv array entry is 1212 * added. 1213 */ 1214 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1215 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1216 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1217 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1218 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1219 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1220 /* 1221 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1222 * We can't rely on the correct value to be set from prior 1223 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1224 * for both cases. 1225 */ 1226 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1227 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1228 else 1229 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1230 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); 1231 } 1232 1233 static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, 1234 __u32 len) 1235 { 1236 struct hfi1_ctxt_info cinfo; 1237 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1238 int ret = 0; 1239 1240 memset(&cinfo, 0, sizeof(cinfo)); 1241 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1242 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1243 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1244 HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1245 /* adjust flag if this fd is not able to cache */ 1246 if (!fd->handler) 1247 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1248 1249 cinfo.num_active = hfi1_count_active_units(); 1250 cinfo.unit = uctxt->dd->unit; 1251 cinfo.ctxt = uctxt->ctxt; 1252 cinfo.subctxt = fd->subctxt; 1253 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1254 uctxt->dd->rcv_entries.group_size) + 1255 uctxt->expected_count; 1256 cinfo.credits = uctxt->sc->credits; 1257 cinfo.numa_node = uctxt->numa_id; 1258 cinfo.rec_cpu = fd->rec_cpu_num; 1259 cinfo.send_ctxt = uctxt->sc->hw_context; 1260 1261 cinfo.egrtids = uctxt->egrbufs.alloced; 1262 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 1263 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; 1264 cinfo.sdma_ring_size = fd->cq->nentries; 1265 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1266 1267 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1268 if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) 1269 ret = -EFAULT; 1270 1271 return ret; 1272 } 1273 1274 static int init_user_ctxt(struct hfi1_filedata *fd, 1275 struct hfi1_ctxtdata *uctxt) 1276 { 1277 int ret; 1278 1279 ret = hfi1_user_sdma_alloc_queues(uctxt, fd); 1280 if (ret) 1281 return ret; 1282 1283 ret = hfi1_user_exp_rcv_init(fd, uctxt); 1284 if (ret) 1285 hfi1_user_sdma_free_queues(fd, uctxt); 1286 1287 return ret; 1288 } 1289 1290 static int setup_base_ctxt(struct hfi1_filedata *fd, 1291 struct hfi1_ctxtdata *uctxt) 1292 { 1293 struct hfi1_devdata *dd = uctxt->dd; 1294 int ret = 0; 1295 1296 hfi1_init_ctxt(uctxt->sc); 1297 1298 /* Now allocate the RcvHdr queue and eager buffers. */ 1299 ret = hfi1_create_rcvhdrq(dd, uctxt); 1300 if (ret) 1301 goto done; 1302 1303 ret = hfi1_setup_eagerbufs(uctxt); 1304 if (ret) 1305 goto done; 1306 1307 /* If sub-contexts are enabled, do the appropriate setup */ 1308 if (uctxt->subctxt_cnt) 1309 ret = setup_subctxt(uctxt); 1310 if (ret) 1311 goto done; 1312 1313 ret = hfi1_alloc_ctxt_rcv_groups(uctxt); 1314 if (ret) 1315 goto done; 1316 1317 ret = init_user_ctxt(fd, uctxt); 1318 if (ret) 1319 goto done; 1320 1321 user_init(uctxt); 1322 1323 /* Now that the context is set up, the fd can get a reference. */ 1324 fd->uctxt = uctxt; 1325 hfi1_rcd_get(uctxt); 1326 1327 done: 1328 if (uctxt->subctxt_cnt) { 1329 /* 1330 * On error, set the failed bit so sub-contexts will clean up 1331 * correctly. 1332 */ 1333 if (ret) 1334 set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1335 1336 /* 1337 * Base context is done (successfully or not), notify anybody 1338 * using a sub-context that is waiting for this completion. 1339 */ 1340 clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1341 wake_up(&uctxt->wait); 1342 } 1343 1344 return ret; 1345 } 1346 1347 static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, 1348 __u32 len) 1349 { 1350 struct hfi1_base_info binfo; 1351 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1352 struct hfi1_devdata *dd = uctxt->dd; 1353 ssize_t sz; 1354 unsigned offset; 1355 int ret = 0; 1356 1357 trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); 1358 1359 memset(&binfo, 0, sizeof(binfo)); 1360 binfo.hw_version = dd->revision; 1361 binfo.sw_version = HFI1_KERN_SWVERSION; 1362 binfo.bthqp = kdeth_qp; 1363 binfo.jkey = uctxt->jkey; 1364 /* 1365 * If more than 64 contexts are enabled the allocated credit 1366 * return will span two or three contiguous pages. Since we only 1367 * map the page containing the context's credit return address, 1368 * we need to calculate the offset in the proper page. 1369 */ 1370 offset = ((u64)uctxt->sc->hw_free - 1371 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1372 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1373 fd->subctxt, offset); 1374 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1375 fd->subctxt, 1376 uctxt->sc->base_addr); 1377 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1378 uctxt->ctxt, 1379 fd->subctxt, 1380 uctxt->sc->base_addr); 1381 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1382 fd->subctxt, 1383 uctxt->rcvhdrq); 1384 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1385 fd->subctxt, 1386 uctxt->egrbufs.rcvtids[0].dma); 1387 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1388 fd->subctxt, 0); 1389 /* 1390 * user regs are at 1391 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1392 */ 1393 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1394 fd->subctxt, 0); 1395 offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1396 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * 1397 sizeof(*dd->events)); 1398 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1399 fd->subctxt, 1400 offset); 1401 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1402 fd->subctxt, 1403 dd->status); 1404 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1405 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1406 fd->subctxt, 0); 1407 if (uctxt->subctxt_cnt) { 1408 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1409 uctxt->ctxt, 1410 fd->subctxt, 0); 1411 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1412 uctxt->ctxt, 1413 fd->subctxt, 0); 1414 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1415 uctxt->ctxt, 1416 fd->subctxt, 0); 1417 } 1418 sz = (len < sizeof(binfo)) ? len : sizeof(binfo); 1419 if (copy_to_user(ubase, &binfo, sz)) 1420 ret = -EFAULT; 1421 return ret; 1422 } 1423 1424 static unsigned int poll_urgent(struct file *fp, 1425 struct poll_table_struct *pt) 1426 { 1427 struct hfi1_filedata *fd = fp->private_data; 1428 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1429 struct hfi1_devdata *dd = uctxt->dd; 1430 unsigned pollflag; 1431 1432 poll_wait(fp, &uctxt->wait, pt); 1433 1434 spin_lock_irq(&dd->uctxt_lock); 1435 if (uctxt->urgent != uctxt->urgent_poll) { 1436 pollflag = POLLIN | POLLRDNORM; 1437 uctxt->urgent_poll = uctxt->urgent; 1438 } else { 1439 pollflag = 0; 1440 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1441 } 1442 spin_unlock_irq(&dd->uctxt_lock); 1443 1444 return pollflag; 1445 } 1446 1447 static unsigned int poll_next(struct file *fp, 1448 struct poll_table_struct *pt) 1449 { 1450 struct hfi1_filedata *fd = fp->private_data; 1451 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1452 struct hfi1_devdata *dd = uctxt->dd; 1453 unsigned pollflag; 1454 1455 poll_wait(fp, &uctxt->wait, pt); 1456 1457 spin_lock_irq(&dd->uctxt_lock); 1458 if (hdrqempty(uctxt)) { 1459 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1460 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); 1461 pollflag = 0; 1462 } else { 1463 pollflag = POLLIN | POLLRDNORM; 1464 } 1465 spin_unlock_irq(&dd->uctxt_lock); 1466 1467 return pollflag; 1468 } 1469 1470 /* 1471 * Find all user contexts in use, and set the specified bit in their 1472 * event mask. 1473 * See also find_ctxt() for a similar use, that is specific to send buffers. 1474 */ 1475 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1476 { 1477 struct hfi1_ctxtdata *uctxt; 1478 struct hfi1_devdata *dd = ppd->dd; 1479 u16 ctxt; 1480 1481 if (!dd->events) 1482 return -EINVAL; 1483 1484 for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; 1485 ctxt++) { 1486 uctxt = hfi1_rcd_get_by_index(dd, ctxt); 1487 if (uctxt) { 1488 unsigned long *evs = dd->events + 1489 (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1490 HFI1_MAX_SHARED_CTXTS; 1491 int i; 1492 /* 1493 * subctxt_cnt is 0 if not shared, so do base 1494 * separately, first, then remaining subctxt, if any 1495 */ 1496 set_bit(evtbit, evs); 1497 for (i = 1; i < uctxt->subctxt_cnt; i++) 1498 set_bit(evtbit, evs + i); 1499 hfi1_rcd_put(uctxt); 1500 } 1501 } 1502 1503 return 0; 1504 } 1505 1506 /** 1507 * manage_rcvq - manage a context's receive queue 1508 * @uctxt: the context 1509 * @subctxt: the sub-context 1510 * @start_stop: action to carry out 1511 * 1512 * start_stop == 0 disables receive on the context, for use in queue 1513 * overflow conditions. start_stop==1 re-enables, to be used to 1514 * re-init the software copy of the head register 1515 */ 1516 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1517 int start_stop) 1518 { 1519 struct hfi1_devdata *dd = uctxt->dd; 1520 unsigned int rcvctrl_op; 1521 1522 if (subctxt) 1523 goto bail; 1524 /* atomically clear receive enable ctxt. */ 1525 if (start_stop) { 1526 /* 1527 * On enable, force in-memory copy of the tail register to 1528 * 0, so that protocol code doesn't have to worry about 1529 * whether or not the chip has yet updated the in-memory 1530 * copy or not on return from the system call. The chip 1531 * always resets it's tail register back to 0 on a 1532 * transition from disabled to enabled. 1533 */ 1534 if (uctxt->rcvhdrtail_kvaddr) 1535 clear_rcvhdrtail(uctxt); 1536 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1537 } else { 1538 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1539 } 1540 hfi1_rcvctrl(dd, rcvctrl_op, uctxt); 1541 /* always; new head should be equal to new tail; see above */ 1542 bail: 1543 return 0; 1544 } 1545 1546 /* 1547 * clear the event notifier events for this context. 1548 * User process then performs actions appropriate to bit having been 1549 * set, if desired, and checks again in future. 1550 */ 1551 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1552 unsigned long events) 1553 { 1554 int i; 1555 struct hfi1_devdata *dd = uctxt->dd; 1556 unsigned long *evs; 1557 1558 if (!dd->events) 1559 return 0; 1560 1561 evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1562 HFI1_MAX_SHARED_CTXTS) + subctxt; 1563 1564 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1565 if (!test_bit(i, &events)) 1566 continue; 1567 clear_bit(i, evs); 1568 } 1569 return 0; 1570 } 1571 1572 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) 1573 { 1574 int ret = -ENOENT, i, intable = 0; 1575 struct hfi1_pportdata *ppd = uctxt->ppd; 1576 struct hfi1_devdata *dd = uctxt->dd; 1577 1578 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) { 1579 ret = -EINVAL; 1580 goto done; 1581 } 1582 1583 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1584 if (pkey == ppd->pkeys[i]) { 1585 intable = 1; 1586 break; 1587 } 1588 1589 if (intable) 1590 ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey); 1591 done: 1592 return ret; 1593 } 1594 1595 static void user_remove(struct hfi1_devdata *dd) 1596 { 1597 1598 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1599 } 1600 1601 static int user_add(struct hfi1_devdata *dd) 1602 { 1603 char name[10]; 1604 int ret; 1605 1606 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1607 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1608 &dd->user_cdev, &dd->user_device, 1609 true, &dd->kobj); 1610 if (ret) 1611 user_remove(dd); 1612 1613 return ret; 1614 } 1615 1616 /* 1617 * Create per-unit files in /dev 1618 */ 1619 int hfi1_device_create(struct hfi1_devdata *dd) 1620 { 1621 return user_add(dd); 1622 } 1623 1624 /* 1625 * Remove per-unit files in /dev 1626 * void, core kernel returns no errors for this stuff 1627 */ 1628 void hfi1_device_remove(struct hfi1_devdata *dd) 1629 { 1630 user_remove(dd); 1631 } 1632