1 /* 2 * Copyright(c) 2015-2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/poll.h> 48 #include <linux/cdev.h> 49 #include <linux/vmalloc.h> 50 #include <linux/io.h> 51 #include <linux/sched/mm.h> 52 #include <linux/bitmap.h> 53 54 #include <rdma/ib.h> 55 56 #include "hfi.h" 57 #include "pio.h" 58 #include "device.h" 59 #include "common.h" 60 #include "trace.h" 61 #include "mmu_rb.h" 62 #include "user_sdma.h" 63 #include "user_exp_rcv.h" 64 #include "aspm.h" 65 66 #undef pr_fmt 67 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 68 69 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 70 71 /* 72 * File operation functions 73 */ 74 static int hfi1_file_open(struct inode *inode, struct file *fp); 75 static int hfi1_file_close(struct inode *inode, struct file *fp); 76 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); 77 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); 78 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); 79 80 static u64 kvirt_to_phys(void *addr); 81 static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len); 82 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 83 const struct hfi1_user_info *uinfo); 84 static int init_user_ctxt(struct hfi1_filedata *fd, 85 struct hfi1_ctxtdata *uctxt); 86 static void user_init(struct hfi1_ctxtdata *uctxt); 87 static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); 88 static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); 89 static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, 90 u32 len); 91 static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, 92 u32 len); 93 static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, 94 u32 len); 95 static int setup_base_ctxt(struct hfi1_filedata *fd, 96 struct hfi1_ctxtdata *uctxt); 97 static int setup_subctxt(struct hfi1_ctxtdata *uctxt); 98 99 static int find_sub_ctxt(struct hfi1_filedata *fd, 100 const struct hfi1_user_info *uinfo); 101 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 102 struct hfi1_user_info *uinfo, 103 struct hfi1_ctxtdata **cd); 104 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); 105 static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); 106 static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); 107 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 108 unsigned long arg); 109 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); 110 static int ctxt_reset(struct hfi1_ctxtdata *uctxt); 111 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 112 unsigned long arg); 113 static int vma_fault(struct vm_fault *vmf); 114 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 115 unsigned long arg); 116 117 static const struct file_operations hfi1_file_ops = { 118 .owner = THIS_MODULE, 119 .write_iter = hfi1_write_iter, 120 .open = hfi1_file_open, 121 .release = hfi1_file_close, 122 .unlocked_ioctl = hfi1_file_ioctl, 123 .poll = hfi1_poll, 124 .mmap = hfi1_file_mmap, 125 .llseek = noop_llseek, 126 }; 127 128 static const struct vm_operations_struct vm_ops = { 129 .fault = vma_fault, 130 }; 131 132 /* 133 * Types of memories mapped into user processes' space 134 */ 135 enum mmap_types { 136 PIO_BUFS = 1, 137 PIO_BUFS_SOP, 138 PIO_CRED, 139 RCV_HDRQ, 140 RCV_EGRBUF, 141 UREGS, 142 EVENTS, 143 STATUS, 144 RTAIL, 145 SUBCTXT_UREGS, 146 SUBCTXT_RCV_HDRQ, 147 SUBCTXT_EGRBUF, 148 SDMA_COMP 149 }; 150 151 /* 152 * Masks and offsets defining the mmap tokens 153 */ 154 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 155 #define HFI1_MMAP_OFFSET_SHIFT 0 156 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 157 #define HFI1_MMAP_SUBCTXT_SHIFT 12 158 #define HFI1_MMAP_CTXT_MASK 0xffULL 159 #define HFI1_MMAP_CTXT_SHIFT 16 160 #define HFI1_MMAP_TYPE_MASK 0xfULL 161 #define HFI1_MMAP_TYPE_SHIFT 24 162 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 163 #define HFI1_MMAP_MAGIC_SHIFT 32 164 165 #define HFI1_MMAP_MAGIC 0xdabbad00 166 167 #define HFI1_MMAP_TOKEN_SET(field, val) \ 168 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 169 #define HFI1_MMAP_TOKEN_GET(field, token) \ 170 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 171 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 172 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 173 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 174 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 175 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 176 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 177 178 #define dbg(fmt, ...) \ 179 pr_info(fmt, ##__VA_ARGS__) 180 181 static inline int is_valid_mmap(u64 token) 182 { 183 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 184 } 185 186 static int hfi1_file_open(struct inode *inode, struct file *fp) 187 { 188 struct hfi1_filedata *fd; 189 struct hfi1_devdata *dd = container_of(inode->i_cdev, 190 struct hfi1_devdata, 191 user_cdev); 192 193 if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) 194 return -EINVAL; 195 196 if (!atomic_inc_not_zero(&dd->user_refcount)) 197 return -ENXIO; 198 199 /* Just take a ref now. Not all opens result in a context assign */ 200 kobject_get(&dd->kobj); 201 202 /* The real work is performed later in assign_ctxt() */ 203 204 fd = kzalloc(sizeof(*fd), GFP_KERNEL); 205 206 if (fd) { 207 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 208 fd->mm = current->mm; 209 mmgrab(fd->mm); 210 fd->dd = dd; 211 fp->private_data = fd; 212 } else { 213 fp->private_data = NULL; 214 215 if (atomic_dec_and_test(&dd->user_refcount)) 216 complete(&dd->user_comp); 217 218 return -ENOMEM; 219 } 220 221 return 0; 222 } 223 224 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 225 unsigned long arg) 226 { 227 struct hfi1_filedata *fd = fp->private_data; 228 struct hfi1_ctxtdata *uctxt = fd->uctxt; 229 int ret = 0; 230 int uval = 0; 231 232 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 233 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 234 cmd != HFI1_IOCTL_GET_VERS && 235 !uctxt) 236 return -EINVAL; 237 238 switch (cmd) { 239 case HFI1_IOCTL_ASSIGN_CTXT: 240 ret = assign_ctxt(fd, arg, _IOC_SIZE(cmd)); 241 break; 242 243 case HFI1_IOCTL_CTXT_INFO: 244 ret = get_ctxt_info(fd, arg, _IOC_SIZE(cmd)); 245 break; 246 247 case HFI1_IOCTL_USER_INFO: 248 ret = get_base_info(fd, arg, _IOC_SIZE(cmd)); 249 break; 250 251 case HFI1_IOCTL_CREDIT_UPD: 252 if (uctxt) 253 sc_return_credits(uctxt->sc); 254 break; 255 256 case HFI1_IOCTL_TID_UPDATE: 257 ret = user_exp_rcv_setup(fd, arg, _IOC_SIZE(cmd)); 258 break; 259 260 case HFI1_IOCTL_TID_FREE: 261 ret = user_exp_rcv_clear(fd, arg, _IOC_SIZE(cmd)); 262 break; 263 264 case HFI1_IOCTL_TID_INVAL_READ: 265 ret = user_exp_rcv_invalid(fd, arg, _IOC_SIZE(cmd)); 266 break; 267 268 case HFI1_IOCTL_RECV_CTRL: 269 ret = manage_rcvq(uctxt, fd->subctxt, arg); 270 break; 271 272 case HFI1_IOCTL_POLL_TYPE: 273 if (get_user(uval, (int __user *)arg)) 274 return -EFAULT; 275 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 276 break; 277 278 case HFI1_IOCTL_ACK_EVENT: 279 ret = user_event_ack(uctxt, fd->subctxt, arg); 280 break; 281 282 case HFI1_IOCTL_SET_PKEY: 283 ret = set_ctxt_pkey(uctxt, arg); 284 break; 285 286 case HFI1_IOCTL_CTXT_RESET: 287 ret = ctxt_reset(uctxt); 288 break; 289 290 case HFI1_IOCTL_GET_VERS: 291 uval = HFI1_USER_SWVERSION; 292 if (put_user(uval, (int __user *)arg)) 293 return -EFAULT; 294 break; 295 296 default: 297 return -EINVAL; 298 } 299 300 return ret; 301 } 302 303 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 304 { 305 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 306 struct hfi1_user_sdma_pkt_q *pq = fd->pq; 307 struct hfi1_user_sdma_comp_q *cq = fd->cq; 308 int done = 0, reqs = 0; 309 unsigned long dim = from->nr_segs; 310 311 if (!cq || !pq) 312 return -EIO; 313 314 if (!iter_is_iovec(from) || !dim) 315 return -EINVAL; 316 317 trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); 318 319 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) 320 return -ENOSPC; 321 322 while (dim) { 323 int ret; 324 unsigned long count = 0; 325 326 ret = hfi1_user_sdma_process_request( 327 fd, (struct iovec *)(from->iov + done), 328 dim, &count); 329 if (ret) { 330 reqs = ret; 331 break; 332 } 333 dim -= count; 334 done += count; 335 reqs++; 336 } 337 338 return reqs; 339 } 340 341 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 342 { 343 struct hfi1_filedata *fd = fp->private_data; 344 struct hfi1_ctxtdata *uctxt = fd->uctxt; 345 struct hfi1_devdata *dd; 346 unsigned long flags; 347 u64 token = vma->vm_pgoff << PAGE_SHIFT, 348 memaddr = 0; 349 void *memvirt = NULL; 350 u8 subctxt, mapio = 0, vmf = 0, type; 351 ssize_t memlen = 0; 352 int ret = 0; 353 u16 ctxt; 354 355 if (!is_valid_mmap(token) || !uctxt || 356 !(vma->vm_flags & VM_SHARED)) { 357 ret = -EINVAL; 358 goto done; 359 } 360 dd = uctxt->dd; 361 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 362 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 363 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 364 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 365 ret = -EINVAL; 366 goto done; 367 } 368 369 flags = vma->vm_flags; 370 371 switch (type) { 372 case PIO_BUFS: 373 case PIO_BUFS_SOP: 374 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 375 /* chip pio base */ 376 (uctxt->sc->hw_context * BIT(16))) + 377 /* 64K PIO space / ctxt */ 378 (type == PIO_BUFS_SOP ? 379 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 380 /* 381 * Map only the amount allocated to the context, not the 382 * entire available context's PIO space. 383 */ 384 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 385 flags &= ~VM_MAYREAD; 386 flags |= VM_DONTCOPY | VM_DONTEXPAND; 387 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 388 mapio = 1; 389 break; 390 case PIO_CRED: 391 if (flags & VM_WRITE) { 392 ret = -EPERM; 393 goto done; 394 } 395 /* 396 * The credit return location for this context could be on the 397 * second or third page allocated for credit returns (if number 398 * of enabled contexts > 64 and 128 respectively). 399 */ 400 memvirt = dd->cr_base[uctxt->numa_id].va; 401 memaddr = virt_to_phys(memvirt) + 402 (((u64)uctxt->sc->hw_free - 403 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); 404 memlen = PAGE_SIZE; 405 flags &= ~VM_MAYWRITE; 406 flags |= VM_DONTCOPY | VM_DONTEXPAND; 407 /* 408 * The driver has already allocated memory for credit 409 * returns and programmed it into the chip. Has that 410 * memory been flagged as non-cached? 411 */ 412 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 413 mapio = 1; 414 break; 415 case RCV_HDRQ: 416 memlen = uctxt->rcvhdrq_size; 417 memvirt = uctxt->rcvhdrq; 418 break; 419 case RCV_EGRBUF: { 420 unsigned long addr; 421 int i; 422 /* 423 * The RcvEgr buffer need to be handled differently 424 * as multiple non-contiguous pages need to be mapped 425 * into the user process. 426 */ 427 memlen = uctxt->egrbufs.size; 428 if ((vma->vm_end - vma->vm_start) != memlen) { 429 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 430 (vma->vm_end - vma->vm_start), memlen); 431 ret = -EINVAL; 432 goto done; 433 } 434 if (vma->vm_flags & VM_WRITE) { 435 ret = -EPERM; 436 goto done; 437 } 438 vma->vm_flags &= ~VM_MAYWRITE; 439 addr = vma->vm_start; 440 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 441 memlen = uctxt->egrbufs.buffers[i].len; 442 memvirt = uctxt->egrbufs.buffers[i].addr; 443 ret = remap_pfn_range( 444 vma, addr, 445 /* 446 * virt_to_pfn() does the same, but 447 * it's not available on x86_64 448 * when CONFIG_MMU is enabled. 449 */ 450 PFN_DOWN(__pa(memvirt)), 451 memlen, 452 vma->vm_page_prot); 453 if (ret < 0) 454 goto done; 455 addr += memlen; 456 } 457 ret = 0; 458 goto done; 459 } 460 case UREGS: 461 /* 462 * Map only the page that contains this context's user 463 * registers. 464 */ 465 memaddr = (unsigned long) 466 (dd->physaddr + RXE_PER_CONTEXT_USER) 467 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 468 /* 469 * TidFlow table is on the same page as the rest of the 470 * user registers. 471 */ 472 memlen = PAGE_SIZE; 473 flags |= VM_DONTCOPY | VM_DONTEXPAND; 474 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 475 mapio = 1; 476 break; 477 case EVENTS: 478 /* 479 * Use the page where this context's flags are. User level 480 * knows where it's own bitmap is within the page. 481 */ 482 memaddr = (unsigned long) 483 (dd->events + uctxt_offset(uctxt)) & PAGE_MASK; 484 memlen = PAGE_SIZE; 485 /* 486 * v3.7 removes VM_RESERVED but the effect is kept by 487 * using VM_IO. 488 */ 489 flags |= VM_IO | VM_DONTEXPAND; 490 vmf = 1; 491 break; 492 case STATUS: 493 if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { 494 ret = -EPERM; 495 goto done; 496 } 497 memaddr = kvirt_to_phys((void *)dd->status); 498 memlen = PAGE_SIZE; 499 flags |= VM_IO | VM_DONTEXPAND; 500 break; 501 case RTAIL: 502 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 503 /* 504 * If the memory allocation failed, the context alloc 505 * also would have failed, so we would never get here 506 */ 507 ret = -EINVAL; 508 goto done; 509 } 510 if (flags & VM_WRITE) { 511 ret = -EPERM; 512 goto done; 513 } 514 memlen = PAGE_SIZE; 515 memvirt = (void *)uctxt->rcvhdrtail_kvaddr; 516 flags &= ~VM_MAYWRITE; 517 break; 518 case SUBCTXT_UREGS: 519 memaddr = (u64)uctxt->subctxt_uregbase; 520 memlen = PAGE_SIZE; 521 flags |= VM_IO | VM_DONTEXPAND; 522 vmf = 1; 523 break; 524 case SUBCTXT_RCV_HDRQ: 525 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 526 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; 527 flags |= VM_IO | VM_DONTEXPAND; 528 vmf = 1; 529 break; 530 case SUBCTXT_EGRBUF: 531 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 532 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 533 flags |= VM_IO | VM_DONTEXPAND; 534 flags &= ~VM_MAYWRITE; 535 vmf = 1; 536 break; 537 case SDMA_COMP: { 538 struct hfi1_user_sdma_comp_q *cq = fd->cq; 539 540 if (!cq) { 541 ret = -EFAULT; 542 goto done; 543 } 544 memaddr = (u64)cq->comps; 545 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 546 flags |= VM_IO | VM_DONTEXPAND; 547 vmf = 1; 548 break; 549 } 550 default: 551 ret = -EINVAL; 552 break; 553 } 554 555 if ((vma->vm_end - vma->vm_start) != memlen) { 556 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 557 uctxt->ctxt, fd->subctxt, 558 (vma->vm_end - vma->vm_start), memlen); 559 ret = -EINVAL; 560 goto done; 561 } 562 563 vma->vm_flags = flags; 564 hfi1_cdbg(PROC, 565 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", 566 ctxt, subctxt, type, mapio, vmf, memaddr, memlen, 567 vma->vm_end - vma->vm_start, vma->vm_flags); 568 if (vmf) { 569 vma->vm_pgoff = PFN_DOWN(memaddr); 570 vma->vm_ops = &vm_ops; 571 ret = 0; 572 } else if (mapio) { 573 ret = io_remap_pfn_range(vma, vma->vm_start, 574 PFN_DOWN(memaddr), 575 memlen, 576 vma->vm_page_prot); 577 } else if (memvirt) { 578 ret = remap_pfn_range(vma, vma->vm_start, 579 PFN_DOWN(__pa(memvirt)), 580 memlen, 581 vma->vm_page_prot); 582 } else { 583 ret = remap_pfn_range(vma, vma->vm_start, 584 PFN_DOWN(memaddr), 585 memlen, 586 vma->vm_page_prot); 587 } 588 done: 589 return ret; 590 } 591 592 /* 593 * Local (non-chip) user memory is not mapped right away but as it is 594 * accessed by the user-level code. 595 */ 596 static int vma_fault(struct vm_fault *vmf) 597 { 598 struct page *page; 599 600 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 601 if (!page) 602 return VM_FAULT_SIGBUS; 603 604 get_page(page); 605 vmf->page = page; 606 607 return 0; 608 } 609 610 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt) 611 { 612 struct hfi1_ctxtdata *uctxt; 613 unsigned pollflag; 614 615 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 616 if (!uctxt) 617 pollflag = POLLERR; 618 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 619 pollflag = poll_urgent(fp, pt); 620 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 621 pollflag = poll_next(fp, pt); 622 else /* invalid */ 623 pollflag = POLLERR; 624 625 return pollflag; 626 } 627 628 static int hfi1_file_close(struct inode *inode, struct file *fp) 629 { 630 struct hfi1_filedata *fdata = fp->private_data; 631 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 632 struct hfi1_devdata *dd = container_of(inode->i_cdev, 633 struct hfi1_devdata, 634 user_cdev); 635 unsigned long flags, *ev; 636 637 fp->private_data = NULL; 638 639 if (!uctxt) 640 goto done; 641 642 hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 643 644 flush_wc(); 645 /* drain user sdma queue */ 646 hfi1_user_sdma_free_queues(fdata, uctxt); 647 648 /* release the cpu */ 649 hfi1_put_proc_affinity(fdata->rec_cpu_num); 650 651 /* clean up rcv side */ 652 hfi1_user_exp_rcv_free(fdata); 653 654 /* 655 * fdata->uctxt is used in the above cleanup. It is not ready to be 656 * removed until here. 657 */ 658 fdata->uctxt = NULL; 659 hfi1_rcd_put(uctxt); 660 661 /* 662 * Clear any left over, unhandled events so the next process that 663 * gets this context doesn't get confused. 664 */ 665 ev = dd->events + uctxt_offset(uctxt) + fdata->subctxt; 666 *ev = 0; 667 668 spin_lock_irqsave(&dd->uctxt_lock, flags); 669 __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); 670 if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 671 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 672 goto done; 673 } 674 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 675 676 /* 677 * Disable receive context and interrupt available, reset all 678 * RcvCtxtCtrl bits to default values. 679 */ 680 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 681 HFI1_RCVCTRL_TIDFLOW_DIS | 682 HFI1_RCVCTRL_INTRAVAIL_DIS | 683 HFI1_RCVCTRL_TAILUPD_DIS | 684 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 685 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 686 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 687 /* Clear the context's J_KEY */ 688 hfi1_clear_ctxt_jkey(dd, uctxt); 689 /* 690 * If a send context is allocated, reset context integrity 691 * checks to default and disable the send context. 692 */ 693 if (uctxt->sc) { 694 set_pio_integrity(uctxt->sc); 695 sc_disable(uctxt->sc); 696 } 697 698 hfi1_free_ctxt_rcv_groups(uctxt); 699 hfi1_clear_ctxt_pkey(dd, uctxt); 700 701 uctxt->event_flags = 0; 702 703 deallocate_ctxt(uctxt); 704 done: 705 mmdrop(fdata->mm); 706 kobject_put(&dd->kobj); 707 708 if (atomic_dec_and_test(&dd->user_refcount)) 709 complete(&dd->user_comp); 710 711 kfree(fdata); 712 return 0; 713 } 714 715 /* 716 * Convert kernel *virtual* addresses to physical addresses. 717 * This is used to vmalloc'ed addresses. 718 */ 719 static u64 kvirt_to_phys(void *addr) 720 { 721 struct page *page; 722 u64 paddr = 0; 723 724 page = vmalloc_to_page(addr); 725 if (page) 726 paddr = page_to_pfn(page) << PAGE_SHIFT; 727 728 return paddr; 729 } 730 731 /** 732 * complete_subctxt 733 * @fd: valid filedata pointer 734 * 735 * Sub-context info can only be set up after the base context 736 * has been completed. This is indicated by the clearing of the 737 * HFI1_CTXT_BASE_UINIT bit. 738 * 739 * Wait for the bit to be cleared, and then complete the subcontext 740 * initialization. 741 * 742 */ 743 static int complete_subctxt(struct hfi1_filedata *fd) 744 { 745 int ret; 746 unsigned long flags; 747 748 /* 749 * sub-context info can only be set up after the base context 750 * has been completed. 751 */ 752 ret = wait_event_interruptible( 753 fd->uctxt->wait, 754 !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); 755 756 if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) 757 ret = -ENOMEM; 758 759 /* Finish the sub-context init */ 760 if (!ret) { 761 fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); 762 ret = init_user_ctxt(fd, fd->uctxt); 763 } 764 765 if (ret) { 766 hfi1_rcd_put(fd->uctxt); 767 fd->uctxt = NULL; 768 spin_lock_irqsave(&fd->dd->uctxt_lock, flags); 769 __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); 770 spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); 771 } 772 773 return ret; 774 } 775 776 static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) 777 { 778 int ret; 779 unsigned int swmajor; 780 struct hfi1_ctxtdata *uctxt = NULL; 781 struct hfi1_user_info uinfo; 782 783 if (fd->uctxt) 784 return -EINVAL; 785 786 if (sizeof(uinfo) != len) 787 return -EINVAL; 788 789 if (copy_from_user(&uinfo, (void __user *)arg, sizeof(uinfo))) 790 return -EFAULT; 791 792 swmajor = uinfo.userversion >> 16; 793 if (swmajor != HFI1_USER_SWMAJOR) 794 return -ENODEV; 795 796 if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS) 797 return -EINVAL; 798 799 /* 800 * Acquire the mutex to protect against multiple creations of what 801 * could be a shared base context. 802 */ 803 mutex_lock(&hfi1_mutex); 804 /* 805 * Get a sub context if available (fd->uctxt will be set). 806 * ret < 0 error, 0 no context, 1 sub-context found 807 */ 808 ret = find_sub_ctxt(fd, &uinfo); 809 810 /* 811 * Allocate a base context if context sharing is not required or a 812 * sub context wasn't found. 813 */ 814 if (!ret) 815 ret = allocate_ctxt(fd, fd->dd, &uinfo, &uctxt); 816 817 mutex_unlock(&hfi1_mutex); 818 819 /* Depending on the context type, finish the appropriate init */ 820 switch (ret) { 821 case 0: 822 ret = setup_base_ctxt(fd, uctxt); 823 if (ret) 824 deallocate_ctxt(uctxt); 825 break; 826 case 1: 827 ret = complete_subctxt(fd); 828 break; 829 default: 830 break; 831 } 832 833 return ret; 834 } 835 836 /** 837 * match_ctxt 838 * @fd: valid filedata pointer 839 * @uinfo: user info to compare base context with 840 * @uctxt: context to compare uinfo to. 841 * 842 * Compare the given context with the given information to see if it 843 * can be used for a sub context. 844 */ 845 static int match_ctxt(struct hfi1_filedata *fd, 846 const struct hfi1_user_info *uinfo, 847 struct hfi1_ctxtdata *uctxt) 848 { 849 struct hfi1_devdata *dd = fd->dd; 850 unsigned long flags; 851 u16 subctxt; 852 853 /* Skip dynamically allocated kernel contexts */ 854 if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) 855 return 0; 856 857 /* Skip ctxt if it doesn't match the requested one */ 858 if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || 859 uctxt->jkey != generate_jkey(current_uid()) || 860 uctxt->subctxt_id != uinfo->subctxt_id || 861 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 862 return 0; 863 864 /* Verify the sharing process matches the base */ 865 if (uctxt->userversion != uinfo->userversion) 866 return -EINVAL; 867 868 /* Find an unused sub context */ 869 spin_lock_irqsave(&dd->uctxt_lock, flags); 870 if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 871 /* context is being closed, do not use */ 872 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 873 return 0; 874 } 875 876 subctxt = find_first_zero_bit(uctxt->in_use_ctxts, 877 HFI1_MAX_SHARED_CTXTS); 878 if (subctxt >= uctxt->subctxt_cnt) { 879 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 880 return -EBUSY; 881 } 882 883 fd->subctxt = subctxt; 884 __set_bit(fd->subctxt, uctxt->in_use_ctxts); 885 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 886 887 fd->uctxt = uctxt; 888 hfi1_rcd_get(uctxt); 889 890 return 1; 891 } 892 893 /** 894 * find_sub_ctxt 895 * @fd: valid filedata pointer 896 * @uinfo: matching info to use to find a possible context to share. 897 * 898 * The hfi1_mutex must be held when this function is called. It is 899 * necessary to ensure serialized creation of shared contexts. 900 * 901 * Return: 902 * 0 No sub-context found 903 * 1 Subcontext found and allocated 904 * errno EINVAL (incorrect parameters) 905 * EBUSY (all sub contexts in use) 906 */ 907 static int find_sub_ctxt(struct hfi1_filedata *fd, 908 const struct hfi1_user_info *uinfo) 909 { 910 struct hfi1_ctxtdata *uctxt; 911 struct hfi1_devdata *dd = fd->dd; 912 u16 i; 913 int ret; 914 915 if (!uinfo->subctxt_cnt) 916 return 0; 917 918 for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { 919 uctxt = hfi1_rcd_get_by_index(dd, i); 920 if (uctxt) { 921 ret = match_ctxt(fd, uinfo, uctxt); 922 hfi1_rcd_put(uctxt); 923 /* value of != 0 will return */ 924 if (ret) 925 return ret; 926 } 927 } 928 929 return 0; 930 } 931 932 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 933 struct hfi1_user_info *uinfo, 934 struct hfi1_ctxtdata **rcd) 935 { 936 struct hfi1_ctxtdata *uctxt; 937 int ret, numa; 938 939 if (dd->flags & HFI1_FROZEN) { 940 /* 941 * Pick an error that is unique from all other errors 942 * that are returned so the user process knows that 943 * it tried to allocate while the SPC was frozen. It 944 * it should be able to retry with success in a short 945 * while. 946 */ 947 return -EIO; 948 } 949 950 if (!dd->freectxts) 951 return -EBUSY; 952 953 /* 954 * If we don't have a NUMA node requested, preference is towards 955 * device NUMA node. 956 */ 957 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 958 if (fd->rec_cpu_num != -1) 959 numa = cpu_to_node(fd->rec_cpu_num); 960 else 961 numa = numa_node_id(); 962 ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt); 963 if (ret < 0) { 964 dd_dev_err(dd, "user ctxtdata allocation failed\n"); 965 return ret; 966 } 967 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 968 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 969 uctxt->numa_id); 970 971 /* 972 * Allocate and enable a PIO send context. 973 */ 974 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node); 975 if (!uctxt->sc) { 976 ret = -ENOMEM; 977 goto ctxdata_free; 978 } 979 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 980 uctxt->sc->hw_context); 981 ret = sc_enable(uctxt->sc); 982 if (ret) 983 goto ctxdata_free; 984 985 /* 986 * Setup sub context information if the user-level has requested 987 * sub contexts. 988 * This has to be done here so the rest of the sub-contexts find the 989 * proper base context. 990 */ 991 if (uinfo->subctxt_cnt) 992 init_subctxts(uctxt, uinfo); 993 uctxt->userversion = uinfo->userversion; 994 uctxt->flags = hfi1_cap_mask; /* save current flag state */ 995 init_waitqueue_head(&uctxt->wait); 996 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 997 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 998 uctxt->jkey = generate_jkey(current_uid()); 999 hfi1_stats.sps_ctxts++; 1000 /* 1001 * Disable ASPM when there are open user/PSM contexts to avoid 1002 * issues with ASPM L1 exit latency 1003 */ 1004 if (dd->freectxts-- == dd->num_user_contexts) 1005 aspm_disable_all(dd); 1006 1007 *rcd = uctxt; 1008 1009 return 0; 1010 1011 ctxdata_free: 1012 hfi1_free_ctxt(uctxt); 1013 return ret; 1014 } 1015 1016 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) 1017 { 1018 mutex_lock(&hfi1_mutex); 1019 hfi1_stats.sps_ctxts--; 1020 if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) 1021 aspm_enable_all(uctxt->dd); 1022 mutex_unlock(&hfi1_mutex); 1023 1024 hfi1_free_ctxt(uctxt); 1025 } 1026 1027 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 1028 const struct hfi1_user_info *uinfo) 1029 { 1030 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1031 uctxt->subctxt_id = uinfo->subctxt_id; 1032 set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1033 } 1034 1035 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1036 { 1037 int ret = 0; 1038 u16 num_subctxts = uctxt->subctxt_cnt; 1039 1040 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1041 if (!uctxt->subctxt_uregbase) 1042 return -ENOMEM; 1043 1044 /* We can take the size of the RcvHdr Queue from the master */ 1045 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * 1046 num_subctxts); 1047 if (!uctxt->subctxt_rcvhdr_base) { 1048 ret = -ENOMEM; 1049 goto bail_ureg; 1050 } 1051 1052 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1053 num_subctxts); 1054 if (!uctxt->subctxt_rcvegrbuf) { 1055 ret = -ENOMEM; 1056 goto bail_rhdr; 1057 } 1058 1059 return 0; 1060 1061 bail_rhdr: 1062 vfree(uctxt->subctxt_rcvhdr_base); 1063 uctxt->subctxt_rcvhdr_base = NULL; 1064 bail_ureg: 1065 vfree(uctxt->subctxt_uregbase); 1066 uctxt->subctxt_uregbase = NULL; 1067 1068 return ret; 1069 } 1070 1071 static void user_init(struct hfi1_ctxtdata *uctxt) 1072 { 1073 unsigned int rcvctrl_ops = 0; 1074 1075 /* initialize poll variables... */ 1076 uctxt->urgent = 0; 1077 uctxt->urgent_poll = 0; 1078 1079 /* 1080 * Now enable the ctxt for receive. 1081 * For chips that are set to DMA the tail register to memory 1082 * when they change (and when the update bit transitions from 1083 * 0 to 1. So for those chips, we turn it off and then back on. 1084 * This will (very briefly) affect any other open ctxts, but the 1085 * duration is very short, and therefore isn't an issue. We 1086 * explicitly set the in-memory tail copy to 0 beforehand, so we 1087 * don't have to wait to be sure the DMA update has happened 1088 * (chip resets head/tail to 0 on transition to enable). 1089 */ 1090 if (uctxt->rcvhdrtail_kvaddr) 1091 clear_rcvhdrtail(uctxt); 1092 1093 /* Setup J_KEY before enabling the context */ 1094 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); 1095 1096 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1097 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1098 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1099 /* 1100 * Ignore the bit in the flags for now until proper 1101 * support for multiple packet per rcv array entry is 1102 * added. 1103 */ 1104 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1105 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1106 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1107 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1108 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1109 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1110 /* 1111 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1112 * We can't rely on the correct value to be set from prior 1113 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1114 * for both cases. 1115 */ 1116 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1117 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1118 else 1119 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1120 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); 1121 } 1122 1123 static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) 1124 { 1125 struct hfi1_ctxt_info cinfo; 1126 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1127 1128 if (sizeof(cinfo) != len) 1129 return -EINVAL; 1130 1131 memset(&cinfo, 0, sizeof(cinfo)); 1132 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1133 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1134 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1135 HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1136 /* adjust flag if this fd is not able to cache */ 1137 if (!fd->handler) 1138 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1139 1140 cinfo.num_active = hfi1_count_active_units(); 1141 cinfo.unit = uctxt->dd->unit; 1142 cinfo.ctxt = uctxt->ctxt; 1143 cinfo.subctxt = fd->subctxt; 1144 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1145 uctxt->dd->rcv_entries.group_size) + 1146 uctxt->expected_count; 1147 cinfo.credits = uctxt->sc->credits; 1148 cinfo.numa_node = uctxt->numa_id; 1149 cinfo.rec_cpu = fd->rec_cpu_num; 1150 cinfo.send_ctxt = uctxt->sc->hw_context; 1151 1152 cinfo.egrtids = uctxt->egrbufs.alloced; 1153 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 1154 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; 1155 cinfo.sdma_ring_size = fd->cq->nentries; 1156 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1157 1158 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1159 if (copy_to_user((void __user *)arg, &cinfo, len)) 1160 return -EFAULT; 1161 1162 return 0; 1163 } 1164 1165 static int init_user_ctxt(struct hfi1_filedata *fd, 1166 struct hfi1_ctxtdata *uctxt) 1167 { 1168 int ret; 1169 1170 ret = hfi1_user_sdma_alloc_queues(uctxt, fd); 1171 if (ret) 1172 return ret; 1173 1174 ret = hfi1_user_exp_rcv_init(fd, uctxt); 1175 if (ret) 1176 hfi1_user_sdma_free_queues(fd, uctxt); 1177 1178 return ret; 1179 } 1180 1181 static int setup_base_ctxt(struct hfi1_filedata *fd, 1182 struct hfi1_ctxtdata *uctxt) 1183 { 1184 struct hfi1_devdata *dd = uctxt->dd; 1185 int ret = 0; 1186 1187 hfi1_init_ctxt(uctxt->sc); 1188 1189 /* Now allocate the RcvHdr queue and eager buffers. */ 1190 ret = hfi1_create_rcvhdrq(dd, uctxt); 1191 if (ret) 1192 goto done; 1193 1194 ret = hfi1_setup_eagerbufs(uctxt); 1195 if (ret) 1196 goto done; 1197 1198 /* If sub-contexts are enabled, do the appropriate setup */ 1199 if (uctxt->subctxt_cnt) 1200 ret = setup_subctxt(uctxt); 1201 if (ret) 1202 goto done; 1203 1204 ret = hfi1_alloc_ctxt_rcv_groups(uctxt); 1205 if (ret) 1206 goto done; 1207 1208 ret = init_user_ctxt(fd, uctxt); 1209 if (ret) 1210 goto done; 1211 1212 user_init(uctxt); 1213 1214 /* Now that the context is set up, the fd can get a reference. */ 1215 fd->uctxt = uctxt; 1216 hfi1_rcd_get(uctxt); 1217 1218 done: 1219 if (uctxt->subctxt_cnt) { 1220 /* 1221 * On error, set the failed bit so sub-contexts will clean up 1222 * correctly. 1223 */ 1224 if (ret) 1225 set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1226 1227 /* 1228 * Base context is done (successfully or not), notify anybody 1229 * using a sub-context that is waiting for this completion. 1230 */ 1231 clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1232 wake_up(&uctxt->wait); 1233 } 1234 1235 return ret; 1236 } 1237 1238 static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) 1239 { 1240 struct hfi1_base_info binfo; 1241 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1242 struct hfi1_devdata *dd = uctxt->dd; 1243 unsigned offset; 1244 1245 trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); 1246 1247 if (sizeof(binfo) != len) 1248 return -EINVAL; 1249 1250 memset(&binfo, 0, sizeof(binfo)); 1251 binfo.hw_version = dd->revision; 1252 binfo.sw_version = HFI1_KERN_SWVERSION; 1253 binfo.bthqp = kdeth_qp; 1254 binfo.jkey = uctxt->jkey; 1255 /* 1256 * If more than 64 contexts are enabled the allocated credit 1257 * return will span two or three contiguous pages. Since we only 1258 * map the page containing the context's credit return address, 1259 * we need to calculate the offset in the proper page. 1260 */ 1261 offset = ((u64)uctxt->sc->hw_free - 1262 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1263 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1264 fd->subctxt, offset); 1265 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1266 fd->subctxt, 1267 uctxt->sc->base_addr); 1268 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1269 uctxt->ctxt, 1270 fd->subctxt, 1271 uctxt->sc->base_addr); 1272 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1273 fd->subctxt, 1274 uctxt->rcvhdrq); 1275 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1276 fd->subctxt, 1277 uctxt->egrbufs.rcvtids[0].dma); 1278 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1279 fd->subctxt, 0); 1280 /* 1281 * user regs are at 1282 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1283 */ 1284 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1285 fd->subctxt, 0); 1286 offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) * 1287 sizeof(*dd->events)); 1288 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1289 fd->subctxt, 1290 offset); 1291 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1292 fd->subctxt, 1293 dd->status); 1294 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1295 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1296 fd->subctxt, 0); 1297 if (uctxt->subctxt_cnt) { 1298 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1299 uctxt->ctxt, 1300 fd->subctxt, 0); 1301 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1302 uctxt->ctxt, 1303 fd->subctxt, 0); 1304 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1305 uctxt->ctxt, 1306 fd->subctxt, 0); 1307 } 1308 1309 if (copy_to_user((void __user *)arg, &binfo, len)) 1310 return -EFAULT; 1311 1312 return 0; 1313 } 1314 1315 /** 1316 * user_exp_rcv_setup - Set up the given tid rcv list 1317 * @fd: file data of the current driver instance 1318 * @arg: ioctl argumnent for user space information 1319 * @len: length of data structure associated with ioctl command 1320 * 1321 * Wrapper to validate ioctl information before doing _rcv_setup. 1322 * 1323 */ 1324 static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, 1325 u32 len) 1326 { 1327 int ret; 1328 unsigned long addr; 1329 struct hfi1_tid_info tinfo; 1330 1331 if (sizeof(tinfo) != len) 1332 return -EINVAL; 1333 1334 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1335 return -EFAULT; 1336 1337 ret = hfi1_user_exp_rcv_setup(fd, &tinfo); 1338 if (!ret) { 1339 /* 1340 * Copy the number of tidlist entries we used 1341 * and the length of the buffer we registered. 1342 */ 1343 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1344 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1345 sizeof(tinfo.tidcnt))) 1346 return -EFAULT; 1347 1348 addr = arg + offsetof(struct hfi1_tid_info, length); 1349 if (copy_to_user((void __user *)addr, &tinfo.length, 1350 sizeof(tinfo.length))) 1351 ret = -EFAULT; 1352 } 1353 1354 return ret; 1355 } 1356 1357 /** 1358 * user_exp_rcv_clear - Clear the given tid rcv list 1359 * @fd: file data of the current driver instance 1360 * @arg: ioctl argumnent for user space information 1361 * @len: length of data structure associated with ioctl command 1362 * 1363 * The hfi1_user_exp_rcv_clear() can be called from the error path. Because 1364 * of this, we need to use this wrapper to copy the user space information 1365 * before doing the clear. 1366 */ 1367 static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, 1368 u32 len) 1369 { 1370 int ret; 1371 unsigned long addr; 1372 struct hfi1_tid_info tinfo; 1373 1374 if (sizeof(tinfo) != len) 1375 return -EINVAL; 1376 1377 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1378 return -EFAULT; 1379 1380 ret = hfi1_user_exp_rcv_clear(fd, &tinfo); 1381 if (!ret) { 1382 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1383 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1384 sizeof(tinfo.tidcnt))) 1385 return -EFAULT; 1386 } 1387 1388 return ret; 1389 } 1390 1391 /** 1392 * user_exp_rcv_invalid - Invalidate the given tid rcv list 1393 * @fd: file data of the current driver instance 1394 * @arg: ioctl argumnent for user space information 1395 * @len: length of data structure associated with ioctl command 1396 * 1397 * Wrapper to validate ioctl information before doing _rcv_invalid. 1398 * 1399 */ 1400 static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, 1401 u32 len) 1402 { 1403 int ret; 1404 unsigned long addr; 1405 struct hfi1_tid_info tinfo; 1406 1407 if (sizeof(tinfo) != len) 1408 return -EINVAL; 1409 1410 if (!fd->invalid_tids) 1411 return -EINVAL; 1412 1413 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1414 return -EFAULT; 1415 1416 ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); 1417 if (ret) 1418 return ret; 1419 1420 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1421 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1422 sizeof(tinfo.tidcnt))) 1423 ret = -EFAULT; 1424 1425 return ret; 1426 } 1427 1428 static unsigned int poll_urgent(struct file *fp, 1429 struct poll_table_struct *pt) 1430 { 1431 struct hfi1_filedata *fd = fp->private_data; 1432 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1433 struct hfi1_devdata *dd = uctxt->dd; 1434 unsigned pollflag; 1435 1436 poll_wait(fp, &uctxt->wait, pt); 1437 1438 spin_lock_irq(&dd->uctxt_lock); 1439 if (uctxt->urgent != uctxt->urgent_poll) { 1440 pollflag = POLLIN | POLLRDNORM; 1441 uctxt->urgent_poll = uctxt->urgent; 1442 } else { 1443 pollflag = 0; 1444 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1445 } 1446 spin_unlock_irq(&dd->uctxt_lock); 1447 1448 return pollflag; 1449 } 1450 1451 static unsigned int poll_next(struct file *fp, 1452 struct poll_table_struct *pt) 1453 { 1454 struct hfi1_filedata *fd = fp->private_data; 1455 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1456 struct hfi1_devdata *dd = uctxt->dd; 1457 unsigned pollflag; 1458 1459 poll_wait(fp, &uctxt->wait, pt); 1460 1461 spin_lock_irq(&dd->uctxt_lock); 1462 if (hdrqempty(uctxt)) { 1463 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1464 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); 1465 pollflag = 0; 1466 } else { 1467 pollflag = POLLIN | POLLRDNORM; 1468 } 1469 spin_unlock_irq(&dd->uctxt_lock); 1470 1471 return pollflag; 1472 } 1473 1474 /* 1475 * Find all user contexts in use, and set the specified bit in their 1476 * event mask. 1477 * See also find_ctxt() for a similar use, that is specific to send buffers. 1478 */ 1479 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1480 { 1481 struct hfi1_ctxtdata *uctxt; 1482 struct hfi1_devdata *dd = ppd->dd; 1483 u16 ctxt; 1484 1485 if (!dd->events) 1486 return -EINVAL; 1487 1488 for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; 1489 ctxt++) { 1490 uctxt = hfi1_rcd_get_by_index(dd, ctxt); 1491 if (uctxt) { 1492 unsigned long *evs; 1493 int i; 1494 /* 1495 * subctxt_cnt is 0 if not shared, so do base 1496 * separately, first, then remaining subctxt, if any 1497 */ 1498 evs = dd->events + uctxt_offset(uctxt); 1499 set_bit(evtbit, evs); 1500 for (i = 1; i < uctxt->subctxt_cnt; i++) 1501 set_bit(evtbit, evs + i); 1502 hfi1_rcd_put(uctxt); 1503 } 1504 } 1505 1506 return 0; 1507 } 1508 1509 /** 1510 * manage_rcvq - manage a context's receive queue 1511 * @uctxt: the context 1512 * @subctxt: the sub-context 1513 * @start_stop: action to carry out 1514 * 1515 * start_stop == 0 disables receive on the context, for use in queue 1516 * overflow conditions. start_stop==1 re-enables, to be used to 1517 * re-init the software copy of the head register 1518 */ 1519 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1520 unsigned long arg) 1521 { 1522 struct hfi1_devdata *dd = uctxt->dd; 1523 unsigned int rcvctrl_op; 1524 int start_stop; 1525 1526 if (subctxt) 1527 return 0; 1528 1529 if (get_user(start_stop, (int __user *)arg)) 1530 return -EFAULT; 1531 1532 /* atomically clear receive enable ctxt. */ 1533 if (start_stop) { 1534 /* 1535 * On enable, force in-memory copy of the tail register to 1536 * 0, so that protocol code doesn't have to worry about 1537 * whether or not the chip has yet updated the in-memory 1538 * copy or not on return from the system call. The chip 1539 * always resets it's tail register back to 0 on a 1540 * transition from disabled to enabled. 1541 */ 1542 if (uctxt->rcvhdrtail_kvaddr) 1543 clear_rcvhdrtail(uctxt); 1544 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1545 } else { 1546 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1547 } 1548 hfi1_rcvctrl(dd, rcvctrl_op, uctxt); 1549 /* always; new head should be equal to new tail; see above */ 1550 1551 return 0; 1552 } 1553 1554 /* 1555 * clear the event notifier events for this context. 1556 * User process then performs actions appropriate to bit having been 1557 * set, if desired, and checks again in future. 1558 */ 1559 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1560 unsigned long arg) 1561 { 1562 int i; 1563 struct hfi1_devdata *dd = uctxt->dd; 1564 unsigned long *evs; 1565 unsigned long events; 1566 1567 if (!dd->events) 1568 return 0; 1569 1570 if (get_user(events, (unsigned long __user *)arg)) 1571 return -EFAULT; 1572 1573 evs = dd->events + uctxt_offset(uctxt) + subctxt; 1574 1575 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1576 if (!test_bit(i, &events)) 1577 continue; 1578 clear_bit(i, evs); 1579 } 1580 return 0; 1581 } 1582 1583 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg) 1584 { 1585 int i; 1586 struct hfi1_pportdata *ppd = uctxt->ppd; 1587 struct hfi1_devdata *dd = uctxt->dd; 1588 u16 pkey; 1589 1590 if (!HFI1_CAP_IS_USET(PKEY_CHECK)) 1591 return -EPERM; 1592 1593 if (get_user(pkey, (u16 __user *)arg)) 1594 return -EFAULT; 1595 1596 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) 1597 return -EINVAL; 1598 1599 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1600 if (pkey == ppd->pkeys[i]) 1601 return hfi1_set_ctxt_pkey(dd, uctxt, pkey); 1602 1603 return -ENOENT; 1604 } 1605 1606 /** 1607 * ctxt_reset - Reset the user context 1608 * @uctxt: valid user context 1609 */ 1610 static int ctxt_reset(struct hfi1_ctxtdata *uctxt) 1611 { 1612 struct send_context *sc; 1613 struct hfi1_devdata *dd; 1614 int ret = 0; 1615 1616 if (!uctxt || !uctxt->dd || !uctxt->sc) 1617 return -EINVAL; 1618 1619 /* 1620 * There is no protection here. User level has to guarantee that 1621 * no one will be writing to the send context while it is being 1622 * re-initialized. If user level breaks that guarantee, it will 1623 * break it's own context and no one else's. 1624 */ 1625 dd = uctxt->dd; 1626 sc = uctxt->sc; 1627 1628 /* 1629 * Wait until the interrupt handler has marked the context as 1630 * halted or frozen. Report error if we time out. 1631 */ 1632 wait_event_interruptible_timeout( 1633 sc->halt_wait, (sc->flags & SCF_HALTED), 1634 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 1635 if (!(sc->flags & SCF_HALTED)) 1636 return -ENOLCK; 1637 1638 /* 1639 * If the send context was halted due to a Freeze, wait until the 1640 * device has been "unfrozen" before resetting the context. 1641 */ 1642 if (sc->flags & SCF_FROZEN) { 1643 wait_event_interruptible_timeout( 1644 dd->event_queue, 1645 !(READ_ONCE(dd->flags) & HFI1_FROZEN), 1646 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 1647 if (dd->flags & HFI1_FROZEN) 1648 return -ENOLCK; 1649 1650 if (dd->flags & HFI1_FORCED_FREEZE) 1651 /* 1652 * Don't allow context reset if we are into 1653 * forced freeze 1654 */ 1655 return -ENODEV; 1656 1657 sc_disable(sc); 1658 ret = sc_enable(sc); 1659 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); 1660 } else { 1661 ret = sc_restart(sc); 1662 } 1663 if (!ret) 1664 sc_return_credits(sc); 1665 1666 return ret; 1667 } 1668 1669 static void user_remove(struct hfi1_devdata *dd) 1670 { 1671 1672 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1673 } 1674 1675 static int user_add(struct hfi1_devdata *dd) 1676 { 1677 char name[10]; 1678 int ret; 1679 1680 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1681 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1682 &dd->user_cdev, &dd->user_device, 1683 true, &dd->kobj); 1684 if (ret) 1685 user_remove(dd); 1686 1687 return ret; 1688 } 1689 1690 /* 1691 * Create per-unit files in /dev 1692 */ 1693 int hfi1_device_create(struct hfi1_devdata *dd) 1694 { 1695 return user_add(dd); 1696 } 1697 1698 /* 1699 * Remove per-unit files in /dev 1700 * void, core kernel returns no errors for this stuff 1701 */ 1702 void hfi1_device_remove(struct hfi1_devdata *dd) 1703 { 1704 user_remove(dd); 1705 } 1706