1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 /* 3 * Copyright(c) 2020 Cornelis Networks, Inc. 4 * Copyright(c) 2015-2020 Intel Corporation. 5 */ 6 7 #include <linux/poll.h> 8 #include <linux/cdev.h> 9 #include <linux/vmalloc.h> 10 #include <linux/io.h> 11 #include <linux/sched/mm.h> 12 #include <linux/bitmap.h> 13 14 #include <rdma/ib.h> 15 16 #include "hfi.h" 17 #include "pio.h" 18 #include "device.h" 19 #include "common.h" 20 #include "trace.h" 21 #include "mmu_rb.h" 22 #include "user_sdma.h" 23 #include "user_exp_rcv.h" 24 #include "aspm.h" 25 26 #undef pr_fmt 27 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 28 29 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 30 31 /* 32 * File operation functions 33 */ 34 static int hfi1_file_open(struct inode *inode, struct file *fp); 35 static int hfi1_file_close(struct inode *inode, struct file *fp); 36 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); 37 static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt); 38 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); 39 40 static u64 kvirt_to_phys(void *addr); 41 static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len); 42 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 43 const struct hfi1_user_info *uinfo); 44 static int init_user_ctxt(struct hfi1_filedata *fd, 45 struct hfi1_ctxtdata *uctxt); 46 static void user_init(struct hfi1_ctxtdata *uctxt); 47 static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); 48 static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); 49 static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, 50 u32 len); 51 static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, 52 u32 len); 53 static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, 54 u32 len); 55 static int setup_base_ctxt(struct hfi1_filedata *fd, 56 struct hfi1_ctxtdata *uctxt); 57 static int setup_subctxt(struct hfi1_ctxtdata *uctxt); 58 59 static int find_sub_ctxt(struct hfi1_filedata *fd, 60 const struct hfi1_user_info *uinfo); 61 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 62 struct hfi1_user_info *uinfo, 63 struct hfi1_ctxtdata **cd); 64 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); 65 static __poll_t poll_urgent(struct file *fp, struct poll_table_struct *pt); 66 static __poll_t poll_next(struct file *fp, struct poll_table_struct *pt); 67 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 68 unsigned long arg); 69 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); 70 static int ctxt_reset(struct hfi1_ctxtdata *uctxt); 71 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 72 unsigned long arg); 73 static vm_fault_t vma_fault(struct vm_fault *vmf); 74 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 75 unsigned long arg); 76 77 static const struct file_operations hfi1_file_ops = { 78 .owner = THIS_MODULE, 79 .write_iter = hfi1_write_iter, 80 .open = hfi1_file_open, 81 .release = hfi1_file_close, 82 .unlocked_ioctl = hfi1_file_ioctl, 83 .poll = hfi1_poll, 84 .mmap = hfi1_file_mmap, 85 .llseek = noop_llseek, 86 }; 87 88 static const struct vm_operations_struct vm_ops = { 89 .fault = vma_fault, 90 }; 91 92 /* 93 * Types of memories mapped into user processes' space 94 */ 95 enum mmap_types { 96 PIO_BUFS = 1, 97 PIO_BUFS_SOP, 98 PIO_CRED, 99 RCV_HDRQ, 100 RCV_EGRBUF, 101 UREGS, 102 EVENTS, 103 STATUS, 104 RTAIL, 105 SUBCTXT_UREGS, 106 SUBCTXT_RCV_HDRQ, 107 SUBCTXT_EGRBUF, 108 SDMA_COMP 109 }; 110 111 /* 112 * Masks and offsets defining the mmap tokens 113 */ 114 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 115 #define HFI1_MMAP_OFFSET_SHIFT 0 116 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 117 #define HFI1_MMAP_SUBCTXT_SHIFT 12 118 #define HFI1_MMAP_CTXT_MASK 0xffULL 119 #define HFI1_MMAP_CTXT_SHIFT 16 120 #define HFI1_MMAP_TYPE_MASK 0xfULL 121 #define HFI1_MMAP_TYPE_SHIFT 24 122 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 123 #define HFI1_MMAP_MAGIC_SHIFT 32 124 125 #define HFI1_MMAP_MAGIC 0xdabbad00 126 127 #define HFI1_MMAP_TOKEN_SET(field, val) \ 128 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 129 #define HFI1_MMAP_TOKEN_GET(field, token) \ 130 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 131 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 132 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 133 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 134 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 135 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 136 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 137 138 #define dbg(fmt, ...) \ 139 pr_info(fmt, ##__VA_ARGS__) 140 141 static inline int is_valid_mmap(u64 token) 142 { 143 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 144 } 145 146 static int hfi1_file_open(struct inode *inode, struct file *fp) 147 { 148 struct hfi1_filedata *fd; 149 struct hfi1_devdata *dd = container_of(inode->i_cdev, 150 struct hfi1_devdata, 151 user_cdev); 152 153 if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) 154 return -EINVAL; 155 156 if (!refcount_inc_not_zero(&dd->user_refcount)) 157 return -ENXIO; 158 159 /* The real work is performed later in assign_ctxt() */ 160 161 fd = kzalloc(sizeof(*fd), GFP_KERNEL); 162 163 if (!fd || init_srcu_struct(&fd->pq_srcu)) 164 goto nomem; 165 spin_lock_init(&fd->pq_rcu_lock); 166 spin_lock_init(&fd->tid_lock); 167 spin_lock_init(&fd->invalid_lock); 168 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 169 fd->dd = dd; 170 fp->private_data = fd; 171 return 0; 172 nomem: 173 kfree(fd); 174 fp->private_data = NULL; 175 if (refcount_dec_and_test(&dd->user_refcount)) 176 complete(&dd->user_comp); 177 return -ENOMEM; 178 } 179 180 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 181 unsigned long arg) 182 { 183 struct hfi1_filedata *fd = fp->private_data; 184 struct hfi1_ctxtdata *uctxt = fd->uctxt; 185 int ret = 0; 186 int uval = 0; 187 188 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 189 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 190 cmd != HFI1_IOCTL_GET_VERS && 191 !uctxt) 192 return -EINVAL; 193 194 switch (cmd) { 195 case HFI1_IOCTL_ASSIGN_CTXT: 196 ret = assign_ctxt(fd, arg, _IOC_SIZE(cmd)); 197 break; 198 199 case HFI1_IOCTL_CTXT_INFO: 200 ret = get_ctxt_info(fd, arg, _IOC_SIZE(cmd)); 201 break; 202 203 case HFI1_IOCTL_USER_INFO: 204 ret = get_base_info(fd, arg, _IOC_SIZE(cmd)); 205 break; 206 207 case HFI1_IOCTL_CREDIT_UPD: 208 if (uctxt) 209 sc_return_credits(uctxt->sc); 210 break; 211 212 case HFI1_IOCTL_TID_UPDATE: 213 ret = user_exp_rcv_setup(fd, arg, _IOC_SIZE(cmd)); 214 break; 215 216 case HFI1_IOCTL_TID_FREE: 217 ret = user_exp_rcv_clear(fd, arg, _IOC_SIZE(cmd)); 218 break; 219 220 case HFI1_IOCTL_TID_INVAL_READ: 221 ret = user_exp_rcv_invalid(fd, arg, _IOC_SIZE(cmd)); 222 break; 223 224 case HFI1_IOCTL_RECV_CTRL: 225 ret = manage_rcvq(uctxt, fd->subctxt, arg); 226 break; 227 228 case HFI1_IOCTL_POLL_TYPE: 229 if (get_user(uval, (int __user *)arg)) 230 return -EFAULT; 231 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 232 break; 233 234 case HFI1_IOCTL_ACK_EVENT: 235 ret = user_event_ack(uctxt, fd->subctxt, arg); 236 break; 237 238 case HFI1_IOCTL_SET_PKEY: 239 ret = set_ctxt_pkey(uctxt, arg); 240 break; 241 242 case HFI1_IOCTL_CTXT_RESET: 243 ret = ctxt_reset(uctxt); 244 break; 245 246 case HFI1_IOCTL_GET_VERS: 247 uval = HFI1_USER_SWVERSION; 248 if (put_user(uval, (int __user *)arg)) 249 return -EFAULT; 250 break; 251 252 default: 253 return -EINVAL; 254 } 255 256 return ret; 257 } 258 259 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 260 { 261 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 262 struct hfi1_user_sdma_pkt_q *pq; 263 struct hfi1_user_sdma_comp_q *cq = fd->cq; 264 int done = 0, reqs = 0; 265 unsigned long dim = from->nr_segs; 266 int idx; 267 268 idx = srcu_read_lock(&fd->pq_srcu); 269 pq = srcu_dereference(fd->pq, &fd->pq_srcu); 270 if (!cq || !pq) { 271 srcu_read_unlock(&fd->pq_srcu, idx); 272 return -EIO; 273 } 274 275 if (!iter_is_iovec(from) || !dim) { 276 srcu_read_unlock(&fd->pq_srcu, idx); 277 return -EINVAL; 278 } 279 280 trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); 281 282 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { 283 srcu_read_unlock(&fd->pq_srcu, idx); 284 return -ENOSPC; 285 } 286 287 while (dim) { 288 int ret; 289 unsigned long count = 0; 290 291 ret = hfi1_user_sdma_process_request( 292 fd, (struct iovec *)(from->iov + done), 293 dim, &count); 294 if (ret) { 295 reqs = ret; 296 break; 297 } 298 dim -= count; 299 done += count; 300 reqs++; 301 } 302 303 srcu_read_unlock(&fd->pq_srcu, idx); 304 return reqs; 305 } 306 307 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 308 { 309 struct hfi1_filedata *fd = fp->private_data; 310 struct hfi1_ctxtdata *uctxt = fd->uctxt; 311 struct hfi1_devdata *dd; 312 unsigned long flags; 313 u64 token = vma->vm_pgoff << PAGE_SHIFT, 314 memaddr = 0; 315 void *memvirt = NULL; 316 u8 subctxt, mapio = 0, vmf = 0, type; 317 ssize_t memlen = 0; 318 int ret = 0; 319 u16 ctxt; 320 321 if (!is_valid_mmap(token) || !uctxt || 322 !(vma->vm_flags & VM_SHARED)) { 323 ret = -EINVAL; 324 goto done; 325 } 326 dd = uctxt->dd; 327 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 328 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 329 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 330 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 331 ret = -EINVAL; 332 goto done; 333 } 334 335 flags = vma->vm_flags; 336 337 switch (type) { 338 case PIO_BUFS: 339 case PIO_BUFS_SOP: 340 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 341 /* chip pio base */ 342 (uctxt->sc->hw_context * BIT(16))) + 343 /* 64K PIO space / ctxt */ 344 (type == PIO_BUFS_SOP ? 345 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 346 /* 347 * Map only the amount allocated to the context, not the 348 * entire available context's PIO space. 349 */ 350 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 351 flags &= ~VM_MAYREAD; 352 flags |= VM_DONTCOPY | VM_DONTEXPAND; 353 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 354 mapio = 1; 355 break; 356 case PIO_CRED: 357 if (flags & VM_WRITE) { 358 ret = -EPERM; 359 goto done; 360 } 361 /* 362 * The credit return location for this context could be on the 363 * second or third page allocated for credit returns (if number 364 * of enabled contexts > 64 and 128 respectively). 365 */ 366 memvirt = dd->cr_base[uctxt->numa_id].va; 367 memaddr = virt_to_phys(memvirt) + 368 (((u64)uctxt->sc->hw_free - 369 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); 370 memlen = PAGE_SIZE; 371 flags &= ~VM_MAYWRITE; 372 flags |= VM_DONTCOPY | VM_DONTEXPAND; 373 /* 374 * The driver has already allocated memory for credit 375 * returns and programmed it into the chip. Has that 376 * memory been flagged as non-cached? 377 */ 378 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 379 mapio = 1; 380 break; 381 case RCV_HDRQ: 382 memlen = rcvhdrq_size(uctxt); 383 memvirt = uctxt->rcvhdrq; 384 break; 385 case RCV_EGRBUF: { 386 unsigned long addr; 387 int i; 388 /* 389 * The RcvEgr buffer need to be handled differently 390 * as multiple non-contiguous pages need to be mapped 391 * into the user process. 392 */ 393 memlen = uctxt->egrbufs.size; 394 if ((vma->vm_end - vma->vm_start) != memlen) { 395 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 396 (vma->vm_end - vma->vm_start), memlen); 397 ret = -EINVAL; 398 goto done; 399 } 400 if (vma->vm_flags & VM_WRITE) { 401 ret = -EPERM; 402 goto done; 403 } 404 vma->vm_flags &= ~VM_MAYWRITE; 405 addr = vma->vm_start; 406 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 407 memlen = uctxt->egrbufs.buffers[i].len; 408 memvirt = uctxt->egrbufs.buffers[i].addr; 409 ret = remap_pfn_range( 410 vma, addr, 411 /* 412 * virt_to_pfn() does the same, but 413 * it's not available on x86_64 414 * when CONFIG_MMU is enabled. 415 */ 416 PFN_DOWN(__pa(memvirt)), 417 memlen, 418 vma->vm_page_prot); 419 if (ret < 0) 420 goto done; 421 addr += memlen; 422 } 423 ret = 0; 424 goto done; 425 } 426 case UREGS: 427 /* 428 * Map only the page that contains this context's user 429 * registers. 430 */ 431 memaddr = (unsigned long) 432 (dd->physaddr + RXE_PER_CONTEXT_USER) 433 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 434 /* 435 * TidFlow table is on the same page as the rest of the 436 * user registers. 437 */ 438 memlen = PAGE_SIZE; 439 flags |= VM_DONTCOPY | VM_DONTEXPAND; 440 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 441 mapio = 1; 442 break; 443 case EVENTS: 444 /* 445 * Use the page where this context's flags are. User level 446 * knows where it's own bitmap is within the page. 447 */ 448 memaddr = (unsigned long) 449 (dd->events + uctxt_offset(uctxt)) & PAGE_MASK; 450 memlen = PAGE_SIZE; 451 /* 452 * v3.7 removes VM_RESERVED but the effect is kept by 453 * using VM_IO. 454 */ 455 flags |= VM_IO | VM_DONTEXPAND; 456 vmf = 1; 457 break; 458 case STATUS: 459 if (flags & VM_WRITE) { 460 ret = -EPERM; 461 goto done; 462 } 463 memaddr = kvirt_to_phys((void *)dd->status); 464 memlen = PAGE_SIZE; 465 flags |= VM_IO | VM_DONTEXPAND; 466 break; 467 case RTAIL: 468 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 469 /* 470 * If the memory allocation failed, the context alloc 471 * also would have failed, so we would never get here 472 */ 473 ret = -EINVAL; 474 goto done; 475 } 476 if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) { 477 ret = -EPERM; 478 goto done; 479 } 480 memlen = PAGE_SIZE; 481 memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt); 482 flags &= ~VM_MAYWRITE; 483 break; 484 case SUBCTXT_UREGS: 485 memaddr = (u64)uctxt->subctxt_uregbase; 486 memlen = PAGE_SIZE; 487 flags |= VM_IO | VM_DONTEXPAND; 488 vmf = 1; 489 break; 490 case SUBCTXT_RCV_HDRQ: 491 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 492 memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt; 493 flags |= VM_IO | VM_DONTEXPAND; 494 vmf = 1; 495 break; 496 case SUBCTXT_EGRBUF: 497 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 498 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 499 flags |= VM_IO | VM_DONTEXPAND; 500 flags &= ~VM_MAYWRITE; 501 vmf = 1; 502 break; 503 case SDMA_COMP: { 504 struct hfi1_user_sdma_comp_q *cq = fd->cq; 505 506 if (!cq) { 507 ret = -EFAULT; 508 goto done; 509 } 510 memaddr = (u64)cq->comps; 511 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 512 flags |= VM_IO | VM_DONTEXPAND; 513 vmf = 1; 514 break; 515 } 516 default: 517 ret = -EINVAL; 518 break; 519 } 520 521 if ((vma->vm_end - vma->vm_start) != memlen) { 522 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 523 uctxt->ctxt, fd->subctxt, 524 (vma->vm_end - vma->vm_start), memlen); 525 ret = -EINVAL; 526 goto done; 527 } 528 529 vma->vm_flags = flags; 530 hfi1_cdbg(PROC, 531 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", 532 ctxt, subctxt, type, mapio, vmf, memaddr, memlen, 533 vma->vm_end - vma->vm_start, vma->vm_flags); 534 if (vmf) { 535 vma->vm_pgoff = PFN_DOWN(memaddr); 536 vma->vm_ops = &vm_ops; 537 ret = 0; 538 } else if (mapio) { 539 ret = io_remap_pfn_range(vma, vma->vm_start, 540 PFN_DOWN(memaddr), 541 memlen, 542 vma->vm_page_prot); 543 } else if (memvirt) { 544 ret = remap_pfn_range(vma, vma->vm_start, 545 PFN_DOWN(__pa(memvirt)), 546 memlen, 547 vma->vm_page_prot); 548 } else { 549 ret = remap_pfn_range(vma, vma->vm_start, 550 PFN_DOWN(memaddr), 551 memlen, 552 vma->vm_page_prot); 553 } 554 done: 555 return ret; 556 } 557 558 /* 559 * Local (non-chip) user memory is not mapped right away but as it is 560 * accessed by the user-level code. 561 */ 562 static vm_fault_t vma_fault(struct vm_fault *vmf) 563 { 564 struct page *page; 565 566 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 567 if (!page) 568 return VM_FAULT_SIGBUS; 569 570 get_page(page); 571 vmf->page = page; 572 573 return 0; 574 } 575 576 static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt) 577 { 578 struct hfi1_ctxtdata *uctxt; 579 __poll_t pollflag; 580 581 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 582 if (!uctxt) 583 pollflag = EPOLLERR; 584 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 585 pollflag = poll_urgent(fp, pt); 586 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 587 pollflag = poll_next(fp, pt); 588 else /* invalid */ 589 pollflag = EPOLLERR; 590 591 return pollflag; 592 } 593 594 static int hfi1_file_close(struct inode *inode, struct file *fp) 595 { 596 struct hfi1_filedata *fdata = fp->private_data; 597 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 598 struct hfi1_devdata *dd = container_of(inode->i_cdev, 599 struct hfi1_devdata, 600 user_cdev); 601 unsigned long flags, *ev; 602 603 fp->private_data = NULL; 604 605 if (!uctxt) 606 goto done; 607 608 hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 609 610 flush_wc(); 611 /* drain user sdma queue */ 612 hfi1_user_sdma_free_queues(fdata, uctxt); 613 614 /* release the cpu */ 615 hfi1_put_proc_affinity(fdata->rec_cpu_num); 616 617 /* clean up rcv side */ 618 hfi1_user_exp_rcv_free(fdata); 619 620 /* 621 * fdata->uctxt is used in the above cleanup. It is not ready to be 622 * removed until here. 623 */ 624 fdata->uctxt = NULL; 625 hfi1_rcd_put(uctxt); 626 627 /* 628 * Clear any left over, unhandled events so the next process that 629 * gets this context doesn't get confused. 630 */ 631 ev = dd->events + uctxt_offset(uctxt) + fdata->subctxt; 632 *ev = 0; 633 634 spin_lock_irqsave(&dd->uctxt_lock, flags); 635 __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); 636 if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 637 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 638 goto done; 639 } 640 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 641 642 /* 643 * Disable receive context and interrupt available, reset all 644 * RcvCtxtCtrl bits to default values. 645 */ 646 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 647 HFI1_RCVCTRL_TIDFLOW_DIS | 648 HFI1_RCVCTRL_INTRAVAIL_DIS | 649 HFI1_RCVCTRL_TAILUPD_DIS | 650 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 651 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 652 HFI1_RCVCTRL_NO_EGR_DROP_DIS | 653 HFI1_RCVCTRL_URGENT_DIS, uctxt); 654 /* Clear the context's J_KEY */ 655 hfi1_clear_ctxt_jkey(dd, uctxt); 656 /* 657 * If a send context is allocated, reset context integrity 658 * checks to default and disable the send context. 659 */ 660 if (uctxt->sc) { 661 sc_disable(uctxt->sc); 662 set_pio_integrity(uctxt->sc); 663 } 664 665 hfi1_free_ctxt_rcv_groups(uctxt); 666 hfi1_clear_ctxt_pkey(dd, uctxt); 667 668 uctxt->event_flags = 0; 669 670 deallocate_ctxt(uctxt); 671 done: 672 673 if (refcount_dec_and_test(&dd->user_refcount)) 674 complete(&dd->user_comp); 675 676 cleanup_srcu_struct(&fdata->pq_srcu); 677 kfree(fdata); 678 return 0; 679 } 680 681 /* 682 * Convert kernel *virtual* addresses to physical addresses. 683 * This is used to vmalloc'ed addresses. 684 */ 685 static u64 kvirt_to_phys(void *addr) 686 { 687 struct page *page; 688 u64 paddr = 0; 689 690 page = vmalloc_to_page(addr); 691 if (page) 692 paddr = page_to_pfn(page) << PAGE_SHIFT; 693 694 return paddr; 695 } 696 697 /** 698 * complete_subctxt - complete sub-context info 699 * @fd: valid filedata pointer 700 * 701 * Sub-context info can only be set up after the base context 702 * has been completed. This is indicated by the clearing of the 703 * HFI1_CTXT_BASE_UINIT bit. 704 * 705 * Wait for the bit to be cleared, and then complete the subcontext 706 * initialization. 707 * 708 */ 709 static int complete_subctxt(struct hfi1_filedata *fd) 710 { 711 int ret; 712 unsigned long flags; 713 714 /* 715 * sub-context info can only be set up after the base context 716 * has been completed. 717 */ 718 ret = wait_event_interruptible( 719 fd->uctxt->wait, 720 !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); 721 722 if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) 723 ret = -ENOMEM; 724 725 /* Finish the sub-context init */ 726 if (!ret) { 727 fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); 728 ret = init_user_ctxt(fd, fd->uctxt); 729 } 730 731 if (ret) { 732 spin_lock_irqsave(&fd->dd->uctxt_lock, flags); 733 __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); 734 spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); 735 hfi1_rcd_put(fd->uctxt); 736 fd->uctxt = NULL; 737 } 738 739 return ret; 740 } 741 742 static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) 743 { 744 int ret; 745 unsigned int swmajor; 746 struct hfi1_ctxtdata *uctxt = NULL; 747 struct hfi1_user_info uinfo; 748 749 if (fd->uctxt) 750 return -EINVAL; 751 752 if (sizeof(uinfo) != len) 753 return -EINVAL; 754 755 if (copy_from_user(&uinfo, (void __user *)arg, sizeof(uinfo))) 756 return -EFAULT; 757 758 swmajor = uinfo.userversion >> 16; 759 if (swmajor != HFI1_USER_SWMAJOR) 760 return -ENODEV; 761 762 if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS) 763 return -EINVAL; 764 765 /* 766 * Acquire the mutex to protect against multiple creations of what 767 * could be a shared base context. 768 */ 769 mutex_lock(&hfi1_mutex); 770 /* 771 * Get a sub context if available (fd->uctxt will be set). 772 * ret < 0 error, 0 no context, 1 sub-context found 773 */ 774 ret = find_sub_ctxt(fd, &uinfo); 775 776 /* 777 * Allocate a base context if context sharing is not required or a 778 * sub context wasn't found. 779 */ 780 if (!ret) 781 ret = allocate_ctxt(fd, fd->dd, &uinfo, &uctxt); 782 783 mutex_unlock(&hfi1_mutex); 784 785 /* Depending on the context type, finish the appropriate init */ 786 switch (ret) { 787 case 0: 788 ret = setup_base_ctxt(fd, uctxt); 789 if (ret) 790 deallocate_ctxt(uctxt); 791 break; 792 case 1: 793 ret = complete_subctxt(fd); 794 break; 795 default: 796 break; 797 } 798 799 return ret; 800 } 801 802 /** 803 * match_ctxt - match context 804 * @fd: valid filedata pointer 805 * @uinfo: user info to compare base context with 806 * @uctxt: context to compare uinfo to. 807 * 808 * Compare the given context with the given information to see if it 809 * can be used for a sub context. 810 */ 811 static int match_ctxt(struct hfi1_filedata *fd, 812 const struct hfi1_user_info *uinfo, 813 struct hfi1_ctxtdata *uctxt) 814 { 815 struct hfi1_devdata *dd = fd->dd; 816 unsigned long flags; 817 u16 subctxt; 818 819 /* Skip dynamically allocated kernel contexts */ 820 if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) 821 return 0; 822 823 /* Skip ctxt if it doesn't match the requested one */ 824 if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || 825 uctxt->jkey != generate_jkey(current_uid()) || 826 uctxt->subctxt_id != uinfo->subctxt_id || 827 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 828 return 0; 829 830 /* Verify the sharing process matches the base */ 831 if (uctxt->userversion != uinfo->userversion) 832 return -EINVAL; 833 834 /* Find an unused sub context */ 835 spin_lock_irqsave(&dd->uctxt_lock, flags); 836 if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 837 /* context is being closed, do not use */ 838 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 839 return 0; 840 } 841 842 subctxt = find_first_zero_bit(uctxt->in_use_ctxts, 843 HFI1_MAX_SHARED_CTXTS); 844 if (subctxt >= uctxt->subctxt_cnt) { 845 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 846 return -EBUSY; 847 } 848 849 fd->subctxt = subctxt; 850 __set_bit(fd->subctxt, uctxt->in_use_ctxts); 851 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 852 853 fd->uctxt = uctxt; 854 hfi1_rcd_get(uctxt); 855 856 return 1; 857 } 858 859 /** 860 * find_sub_ctxt - fund sub-context 861 * @fd: valid filedata pointer 862 * @uinfo: matching info to use to find a possible context to share. 863 * 864 * The hfi1_mutex must be held when this function is called. It is 865 * necessary to ensure serialized creation of shared contexts. 866 * 867 * Return: 868 * 0 No sub-context found 869 * 1 Subcontext found and allocated 870 * errno EINVAL (incorrect parameters) 871 * EBUSY (all sub contexts in use) 872 */ 873 static int find_sub_ctxt(struct hfi1_filedata *fd, 874 const struct hfi1_user_info *uinfo) 875 { 876 struct hfi1_ctxtdata *uctxt; 877 struct hfi1_devdata *dd = fd->dd; 878 u16 i; 879 int ret; 880 881 if (!uinfo->subctxt_cnt) 882 return 0; 883 884 for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { 885 uctxt = hfi1_rcd_get_by_index(dd, i); 886 if (uctxt) { 887 ret = match_ctxt(fd, uinfo, uctxt); 888 hfi1_rcd_put(uctxt); 889 /* value of != 0 will return */ 890 if (ret) 891 return ret; 892 } 893 } 894 895 return 0; 896 } 897 898 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 899 struct hfi1_user_info *uinfo, 900 struct hfi1_ctxtdata **rcd) 901 { 902 struct hfi1_ctxtdata *uctxt; 903 int ret, numa; 904 905 if (dd->flags & HFI1_FROZEN) { 906 /* 907 * Pick an error that is unique from all other errors 908 * that are returned so the user process knows that 909 * it tried to allocate while the SPC was frozen. It 910 * it should be able to retry with success in a short 911 * while. 912 */ 913 return -EIO; 914 } 915 916 if (!dd->freectxts) 917 return -EBUSY; 918 919 /* 920 * If we don't have a NUMA node requested, preference is towards 921 * device NUMA node. 922 */ 923 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 924 if (fd->rec_cpu_num != -1) 925 numa = cpu_to_node(fd->rec_cpu_num); 926 else 927 numa = numa_node_id(); 928 ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt); 929 if (ret < 0) { 930 dd_dev_err(dd, "user ctxtdata allocation failed\n"); 931 return ret; 932 } 933 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 934 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 935 uctxt->numa_id); 936 937 /* 938 * Allocate and enable a PIO send context. 939 */ 940 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node); 941 if (!uctxt->sc) { 942 ret = -ENOMEM; 943 goto ctxdata_free; 944 } 945 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 946 uctxt->sc->hw_context); 947 ret = sc_enable(uctxt->sc); 948 if (ret) 949 goto ctxdata_free; 950 951 /* 952 * Setup sub context information if the user-level has requested 953 * sub contexts. 954 * This has to be done here so the rest of the sub-contexts find the 955 * proper base context. 956 * NOTE: _set_bit() can be used here because the context creation is 957 * protected by the mutex (rather than the spin_lock), and will be the 958 * very first instance of this context. 959 */ 960 __set_bit(0, uctxt->in_use_ctxts); 961 if (uinfo->subctxt_cnt) 962 init_subctxts(uctxt, uinfo); 963 uctxt->userversion = uinfo->userversion; 964 uctxt->flags = hfi1_cap_mask; /* save current flag state */ 965 init_waitqueue_head(&uctxt->wait); 966 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 967 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 968 uctxt->jkey = generate_jkey(current_uid()); 969 hfi1_stats.sps_ctxts++; 970 /* 971 * Disable ASPM when there are open user/PSM contexts to avoid 972 * issues with ASPM L1 exit latency 973 */ 974 if (dd->freectxts-- == dd->num_user_contexts) 975 aspm_disable_all(dd); 976 977 *rcd = uctxt; 978 979 return 0; 980 981 ctxdata_free: 982 hfi1_free_ctxt(uctxt); 983 return ret; 984 } 985 986 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) 987 { 988 mutex_lock(&hfi1_mutex); 989 hfi1_stats.sps_ctxts--; 990 if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) 991 aspm_enable_all(uctxt->dd); 992 mutex_unlock(&hfi1_mutex); 993 994 hfi1_free_ctxt(uctxt); 995 } 996 997 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 998 const struct hfi1_user_info *uinfo) 999 { 1000 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1001 uctxt->subctxt_id = uinfo->subctxt_id; 1002 set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1003 } 1004 1005 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1006 { 1007 int ret = 0; 1008 u16 num_subctxts = uctxt->subctxt_cnt; 1009 1010 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1011 if (!uctxt->subctxt_uregbase) 1012 return -ENOMEM; 1013 1014 /* We can take the size of the RcvHdr Queue from the master */ 1015 uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) * 1016 num_subctxts); 1017 if (!uctxt->subctxt_rcvhdr_base) { 1018 ret = -ENOMEM; 1019 goto bail_ureg; 1020 } 1021 1022 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1023 num_subctxts); 1024 if (!uctxt->subctxt_rcvegrbuf) { 1025 ret = -ENOMEM; 1026 goto bail_rhdr; 1027 } 1028 1029 return 0; 1030 1031 bail_rhdr: 1032 vfree(uctxt->subctxt_rcvhdr_base); 1033 uctxt->subctxt_rcvhdr_base = NULL; 1034 bail_ureg: 1035 vfree(uctxt->subctxt_uregbase); 1036 uctxt->subctxt_uregbase = NULL; 1037 1038 return ret; 1039 } 1040 1041 static void user_init(struct hfi1_ctxtdata *uctxt) 1042 { 1043 unsigned int rcvctrl_ops = 0; 1044 1045 /* initialize poll variables... */ 1046 uctxt->urgent = 0; 1047 uctxt->urgent_poll = 0; 1048 1049 /* 1050 * Now enable the ctxt for receive. 1051 * For chips that are set to DMA the tail register to memory 1052 * when they change (and when the update bit transitions from 1053 * 0 to 1. So for those chips, we turn it off and then back on. 1054 * This will (very briefly) affect any other open ctxts, but the 1055 * duration is very short, and therefore isn't an issue. We 1056 * explicitly set the in-memory tail copy to 0 beforehand, so we 1057 * don't have to wait to be sure the DMA update has happened 1058 * (chip resets head/tail to 0 on transition to enable). 1059 */ 1060 if (hfi1_rcvhdrtail_kvaddr(uctxt)) 1061 clear_rcvhdrtail(uctxt); 1062 1063 /* Setup J_KEY before enabling the context */ 1064 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); 1065 1066 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1067 rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB; 1068 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1069 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1070 /* 1071 * Ignore the bit in the flags for now until proper 1072 * support for multiple packet per rcv array entry is 1073 * added. 1074 */ 1075 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1076 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1077 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1078 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1079 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1080 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1081 /* 1082 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1083 * We can't rely on the correct value to be set from prior 1084 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1085 * for both cases. 1086 */ 1087 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1088 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1089 else 1090 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1091 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); 1092 } 1093 1094 static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) 1095 { 1096 struct hfi1_ctxt_info cinfo; 1097 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1098 1099 if (sizeof(cinfo) != len) 1100 return -EINVAL; 1101 1102 memset(&cinfo, 0, sizeof(cinfo)); 1103 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1104 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1105 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1106 HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1107 /* adjust flag if this fd is not able to cache */ 1108 if (!fd->use_mn) 1109 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1110 1111 cinfo.num_active = hfi1_count_active_units(); 1112 cinfo.unit = uctxt->dd->unit; 1113 cinfo.ctxt = uctxt->ctxt; 1114 cinfo.subctxt = fd->subctxt; 1115 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1116 uctxt->dd->rcv_entries.group_size) + 1117 uctxt->expected_count; 1118 cinfo.credits = uctxt->sc->credits; 1119 cinfo.numa_node = uctxt->numa_id; 1120 cinfo.rec_cpu = fd->rec_cpu_num; 1121 cinfo.send_ctxt = uctxt->sc->hw_context; 1122 1123 cinfo.egrtids = uctxt->egrbufs.alloced; 1124 cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt); 1125 cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2; 1126 cinfo.sdma_ring_size = fd->cq->nentries; 1127 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1128 1129 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo); 1130 if (copy_to_user((void __user *)arg, &cinfo, len)) 1131 return -EFAULT; 1132 1133 return 0; 1134 } 1135 1136 static int init_user_ctxt(struct hfi1_filedata *fd, 1137 struct hfi1_ctxtdata *uctxt) 1138 { 1139 int ret; 1140 1141 ret = hfi1_user_sdma_alloc_queues(uctxt, fd); 1142 if (ret) 1143 return ret; 1144 1145 ret = hfi1_user_exp_rcv_init(fd, uctxt); 1146 if (ret) 1147 hfi1_user_sdma_free_queues(fd, uctxt); 1148 1149 return ret; 1150 } 1151 1152 static int setup_base_ctxt(struct hfi1_filedata *fd, 1153 struct hfi1_ctxtdata *uctxt) 1154 { 1155 struct hfi1_devdata *dd = uctxt->dd; 1156 int ret = 0; 1157 1158 hfi1_init_ctxt(uctxt->sc); 1159 1160 /* Now allocate the RcvHdr queue and eager buffers. */ 1161 ret = hfi1_create_rcvhdrq(dd, uctxt); 1162 if (ret) 1163 goto done; 1164 1165 ret = hfi1_setup_eagerbufs(uctxt); 1166 if (ret) 1167 goto done; 1168 1169 /* If sub-contexts are enabled, do the appropriate setup */ 1170 if (uctxt->subctxt_cnt) 1171 ret = setup_subctxt(uctxt); 1172 if (ret) 1173 goto done; 1174 1175 ret = hfi1_alloc_ctxt_rcv_groups(uctxt); 1176 if (ret) 1177 goto done; 1178 1179 ret = init_user_ctxt(fd, uctxt); 1180 if (ret) 1181 goto done; 1182 1183 user_init(uctxt); 1184 1185 /* Now that the context is set up, the fd can get a reference. */ 1186 fd->uctxt = uctxt; 1187 hfi1_rcd_get(uctxt); 1188 1189 done: 1190 if (uctxt->subctxt_cnt) { 1191 /* 1192 * On error, set the failed bit so sub-contexts will clean up 1193 * correctly. 1194 */ 1195 if (ret) 1196 set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1197 1198 /* 1199 * Base context is done (successfully or not), notify anybody 1200 * using a sub-context that is waiting for this completion. 1201 */ 1202 clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1203 wake_up(&uctxt->wait); 1204 } 1205 1206 return ret; 1207 } 1208 1209 static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) 1210 { 1211 struct hfi1_base_info binfo; 1212 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1213 struct hfi1_devdata *dd = uctxt->dd; 1214 unsigned offset; 1215 1216 trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); 1217 1218 if (sizeof(binfo) != len) 1219 return -EINVAL; 1220 1221 memset(&binfo, 0, sizeof(binfo)); 1222 binfo.hw_version = dd->revision; 1223 binfo.sw_version = HFI1_KERN_SWVERSION; 1224 binfo.bthqp = RVT_KDETH_QP_PREFIX; 1225 binfo.jkey = uctxt->jkey; 1226 /* 1227 * If more than 64 contexts are enabled the allocated credit 1228 * return will span two or three contiguous pages. Since we only 1229 * map the page containing the context's credit return address, 1230 * we need to calculate the offset in the proper page. 1231 */ 1232 offset = ((u64)uctxt->sc->hw_free - 1233 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1234 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1235 fd->subctxt, offset); 1236 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1237 fd->subctxt, 1238 uctxt->sc->base_addr); 1239 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1240 uctxt->ctxt, 1241 fd->subctxt, 1242 uctxt->sc->base_addr); 1243 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1244 fd->subctxt, 1245 uctxt->rcvhdrq); 1246 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1247 fd->subctxt, 1248 uctxt->egrbufs.rcvtids[0].dma); 1249 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1250 fd->subctxt, 0); 1251 /* 1252 * user regs are at 1253 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1254 */ 1255 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1256 fd->subctxt, 0); 1257 offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) * 1258 sizeof(*dd->events)); 1259 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1260 fd->subctxt, 1261 offset); 1262 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1263 fd->subctxt, 1264 dd->status); 1265 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1266 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1267 fd->subctxt, 0); 1268 if (uctxt->subctxt_cnt) { 1269 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1270 uctxt->ctxt, 1271 fd->subctxt, 0); 1272 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1273 uctxt->ctxt, 1274 fd->subctxt, 0); 1275 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1276 uctxt->ctxt, 1277 fd->subctxt, 0); 1278 } 1279 1280 if (copy_to_user((void __user *)arg, &binfo, len)) 1281 return -EFAULT; 1282 1283 return 0; 1284 } 1285 1286 /** 1287 * user_exp_rcv_setup - Set up the given tid rcv list 1288 * @fd: file data of the current driver instance 1289 * @arg: ioctl argumnent for user space information 1290 * @len: length of data structure associated with ioctl command 1291 * 1292 * Wrapper to validate ioctl information before doing _rcv_setup. 1293 * 1294 */ 1295 static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, 1296 u32 len) 1297 { 1298 int ret; 1299 unsigned long addr; 1300 struct hfi1_tid_info tinfo; 1301 1302 if (sizeof(tinfo) != len) 1303 return -EINVAL; 1304 1305 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1306 return -EFAULT; 1307 1308 ret = hfi1_user_exp_rcv_setup(fd, &tinfo); 1309 if (!ret) { 1310 /* 1311 * Copy the number of tidlist entries we used 1312 * and the length of the buffer we registered. 1313 */ 1314 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1315 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1316 sizeof(tinfo.tidcnt))) 1317 return -EFAULT; 1318 1319 addr = arg + offsetof(struct hfi1_tid_info, length); 1320 if (copy_to_user((void __user *)addr, &tinfo.length, 1321 sizeof(tinfo.length))) 1322 ret = -EFAULT; 1323 } 1324 1325 return ret; 1326 } 1327 1328 /** 1329 * user_exp_rcv_clear - Clear the given tid rcv list 1330 * @fd: file data of the current driver instance 1331 * @arg: ioctl argumnent for user space information 1332 * @len: length of data structure associated with ioctl command 1333 * 1334 * The hfi1_user_exp_rcv_clear() can be called from the error path. Because 1335 * of this, we need to use this wrapper to copy the user space information 1336 * before doing the clear. 1337 */ 1338 static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, 1339 u32 len) 1340 { 1341 int ret; 1342 unsigned long addr; 1343 struct hfi1_tid_info tinfo; 1344 1345 if (sizeof(tinfo) != len) 1346 return -EINVAL; 1347 1348 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1349 return -EFAULT; 1350 1351 ret = hfi1_user_exp_rcv_clear(fd, &tinfo); 1352 if (!ret) { 1353 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1354 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1355 sizeof(tinfo.tidcnt))) 1356 return -EFAULT; 1357 } 1358 1359 return ret; 1360 } 1361 1362 /** 1363 * user_exp_rcv_invalid - Invalidate the given tid rcv list 1364 * @fd: file data of the current driver instance 1365 * @arg: ioctl argumnent for user space information 1366 * @len: length of data structure associated with ioctl command 1367 * 1368 * Wrapper to validate ioctl information before doing _rcv_invalid. 1369 * 1370 */ 1371 static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, 1372 u32 len) 1373 { 1374 int ret; 1375 unsigned long addr; 1376 struct hfi1_tid_info tinfo; 1377 1378 if (sizeof(tinfo) != len) 1379 return -EINVAL; 1380 1381 if (!fd->invalid_tids) 1382 return -EINVAL; 1383 1384 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1385 return -EFAULT; 1386 1387 ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); 1388 if (ret) 1389 return ret; 1390 1391 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1392 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1393 sizeof(tinfo.tidcnt))) 1394 ret = -EFAULT; 1395 1396 return ret; 1397 } 1398 1399 static __poll_t poll_urgent(struct file *fp, 1400 struct poll_table_struct *pt) 1401 { 1402 struct hfi1_filedata *fd = fp->private_data; 1403 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1404 struct hfi1_devdata *dd = uctxt->dd; 1405 __poll_t pollflag; 1406 1407 poll_wait(fp, &uctxt->wait, pt); 1408 1409 spin_lock_irq(&dd->uctxt_lock); 1410 if (uctxt->urgent != uctxt->urgent_poll) { 1411 pollflag = EPOLLIN | EPOLLRDNORM; 1412 uctxt->urgent_poll = uctxt->urgent; 1413 } else { 1414 pollflag = 0; 1415 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1416 } 1417 spin_unlock_irq(&dd->uctxt_lock); 1418 1419 return pollflag; 1420 } 1421 1422 static __poll_t poll_next(struct file *fp, 1423 struct poll_table_struct *pt) 1424 { 1425 struct hfi1_filedata *fd = fp->private_data; 1426 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1427 struct hfi1_devdata *dd = uctxt->dd; 1428 __poll_t pollflag; 1429 1430 poll_wait(fp, &uctxt->wait, pt); 1431 1432 spin_lock_irq(&dd->uctxt_lock); 1433 if (hdrqempty(uctxt)) { 1434 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1435 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); 1436 pollflag = 0; 1437 } else { 1438 pollflag = EPOLLIN | EPOLLRDNORM; 1439 } 1440 spin_unlock_irq(&dd->uctxt_lock); 1441 1442 return pollflag; 1443 } 1444 1445 /* 1446 * Find all user contexts in use, and set the specified bit in their 1447 * event mask. 1448 * See also find_ctxt() for a similar use, that is specific to send buffers. 1449 */ 1450 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1451 { 1452 struct hfi1_ctxtdata *uctxt; 1453 struct hfi1_devdata *dd = ppd->dd; 1454 u16 ctxt; 1455 1456 if (!dd->events) 1457 return -EINVAL; 1458 1459 for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; 1460 ctxt++) { 1461 uctxt = hfi1_rcd_get_by_index(dd, ctxt); 1462 if (uctxt) { 1463 unsigned long *evs; 1464 int i; 1465 /* 1466 * subctxt_cnt is 0 if not shared, so do base 1467 * separately, first, then remaining subctxt, if any 1468 */ 1469 evs = dd->events + uctxt_offset(uctxt); 1470 set_bit(evtbit, evs); 1471 for (i = 1; i < uctxt->subctxt_cnt; i++) 1472 set_bit(evtbit, evs + i); 1473 hfi1_rcd_put(uctxt); 1474 } 1475 } 1476 1477 return 0; 1478 } 1479 1480 /** 1481 * manage_rcvq - manage a context's receive queue 1482 * @uctxt: the context 1483 * @subctxt: the sub-context 1484 * @arg: start/stop action to carry out 1485 * 1486 * start_stop == 0 disables receive on the context, for use in queue 1487 * overflow conditions. start_stop==1 re-enables, to be used to 1488 * re-init the software copy of the head register 1489 */ 1490 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1491 unsigned long arg) 1492 { 1493 struct hfi1_devdata *dd = uctxt->dd; 1494 unsigned int rcvctrl_op; 1495 int start_stop; 1496 1497 if (subctxt) 1498 return 0; 1499 1500 if (get_user(start_stop, (int __user *)arg)) 1501 return -EFAULT; 1502 1503 /* atomically clear receive enable ctxt. */ 1504 if (start_stop) { 1505 /* 1506 * On enable, force in-memory copy of the tail register to 1507 * 0, so that protocol code doesn't have to worry about 1508 * whether or not the chip has yet updated the in-memory 1509 * copy or not on return from the system call. The chip 1510 * always resets it's tail register back to 0 on a 1511 * transition from disabled to enabled. 1512 */ 1513 if (hfi1_rcvhdrtail_kvaddr(uctxt)) 1514 clear_rcvhdrtail(uctxt); 1515 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1516 } else { 1517 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1518 } 1519 hfi1_rcvctrl(dd, rcvctrl_op, uctxt); 1520 /* always; new head should be equal to new tail; see above */ 1521 1522 return 0; 1523 } 1524 1525 /* 1526 * clear the event notifier events for this context. 1527 * User process then performs actions appropriate to bit having been 1528 * set, if desired, and checks again in future. 1529 */ 1530 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1531 unsigned long arg) 1532 { 1533 int i; 1534 struct hfi1_devdata *dd = uctxt->dd; 1535 unsigned long *evs; 1536 unsigned long events; 1537 1538 if (!dd->events) 1539 return 0; 1540 1541 if (get_user(events, (unsigned long __user *)arg)) 1542 return -EFAULT; 1543 1544 evs = dd->events + uctxt_offset(uctxt) + subctxt; 1545 1546 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1547 if (!test_bit(i, &events)) 1548 continue; 1549 clear_bit(i, evs); 1550 } 1551 return 0; 1552 } 1553 1554 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg) 1555 { 1556 int i; 1557 struct hfi1_pportdata *ppd = uctxt->ppd; 1558 struct hfi1_devdata *dd = uctxt->dd; 1559 u16 pkey; 1560 1561 if (!HFI1_CAP_IS_USET(PKEY_CHECK)) 1562 return -EPERM; 1563 1564 if (get_user(pkey, (u16 __user *)arg)) 1565 return -EFAULT; 1566 1567 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) 1568 return -EINVAL; 1569 1570 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1571 if (pkey == ppd->pkeys[i]) 1572 return hfi1_set_ctxt_pkey(dd, uctxt, pkey); 1573 1574 return -ENOENT; 1575 } 1576 1577 /** 1578 * ctxt_reset - Reset the user context 1579 * @uctxt: valid user context 1580 */ 1581 static int ctxt_reset(struct hfi1_ctxtdata *uctxt) 1582 { 1583 struct send_context *sc; 1584 struct hfi1_devdata *dd; 1585 int ret = 0; 1586 1587 if (!uctxt || !uctxt->dd || !uctxt->sc) 1588 return -EINVAL; 1589 1590 /* 1591 * There is no protection here. User level has to guarantee that 1592 * no one will be writing to the send context while it is being 1593 * re-initialized. If user level breaks that guarantee, it will 1594 * break it's own context and no one else's. 1595 */ 1596 dd = uctxt->dd; 1597 sc = uctxt->sc; 1598 1599 /* 1600 * Wait until the interrupt handler has marked the context as 1601 * halted or frozen. Report error if we time out. 1602 */ 1603 wait_event_interruptible_timeout( 1604 sc->halt_wait, (sc->flags & SCF_HALTED), 1605 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 1606 if (!(sc->flags & SCF_HALTED)) 1607 return -ENOLCK; 1608 1609 /* 1610 * If the send context was halted due to a Freeze, wait until the 1611 * device has been "unfrozen" before resetting the context. 1612 */ 1613 if (sc->flags & SCF_FROZEN) { 1614 wait_event_interruptible_timeout( 1615 dd->event_queue, 1616 !(READ_ONCE(dd->flags) & HFI1_FROZEN), 1617 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 1618 if (dd->flags & HFI1_FROZEN) 1619 return -ENOLCK; 1620 1621 if (dd->flags & HFI1_FORCED_FREEZE) 1622 /* 1623 * Don't allow context reset if we are into 1624 * forced freeze 1625 */ 1626 return -ENODEV; 1627 1628 sc_disable(sc); 1629 ret = sc_enable(sc); 1630 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); 1631 } else { 1632 ret = sc_restart(sc); 1633 } 1634 if (!ret) 1635 sc_return_credits(sc); 1636 1637 return ret; 1638 } 1639 1640 static void user_remove(struct hfi1_devdata *dd) 1641 { 1642 1643 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1644 } 1645 1646 static int user_add(struct hfi1_devdata *dd) 1647 { 1648 char name[10]; 1649 int ret; 1650 1651 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1652 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1653 &dd->user_cdev, &dd->user_device, 1654 true, &dd->verbs_dev.rdi.ibdev.dev.kobj); 1655 if (ret) 1656 user_remove(dd); 1657 1658 return ret; 1659 } 1660 1661 /* 1662 * Create per-unit files in /dev 1663 */ 1664 int hfi1_device_create(struct hfi1_devdata *dd) 1665 { 1666 return user_add(dd); 1667 } 1668 1669 /* 1670 * Remove per-unit files in /dev 1671 * void, core kernel returns no errors for this stuff 1672 */ 1673 void hfi1_device_remove(struct hfi1_devdata *dd) 1674 { 1675 user_remove(dd); 1676 } 1677