1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 /* 3 * Copyright(c) 2020 Cornelis Networks, Inc. 4 * Copyright(c) 2015-2020 Intel Corporation. 5 */ 6 7 #include <linux/poll.h> 8 #include <linux/cdev.h> 9 #include <linux/vmalloc.h> 10 #include <linux/io.h> 11 #include <linux/sched/mm.h> 12 #include <linux/bitmap.h> 13 14 #include <rdma/ib.h> 15 16 #include "hfi.h" 17 #include "pio.h" 18 #include "device.h" 19 #include "common.h" 20 #include "trace.h" 21 #include "mmu_rb.h" 22 #include "user_sdma.h" 23 #include "user_exp_rcv.h" 24 #include "aspm.h" 25 26 #undef pr_fmt 27 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 28 29 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 30 31 /* 32 * File operation functions 33 */ 34 static int hfi1_file_open(struct inode *inode, struct file *fp); 35 static int hfi1_file_close(struct inode *inode, struct file *fp); 36 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); 37 static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt); 38 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); 39 40 static u64 kvirt_to_phys(void *addr); 41 static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len); 42 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 43 const struct hfi1_user_info *uinfo); 44 static int init_user_ctxt(struct hfi1_filedata *fd, 45 struct hfi1_ctxtdata *uctxt); 46 static void user_init(struct hfi1_ctxtdata *uctxt); 47 static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); 48 static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); 49 static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, 50 u32 len); 51 static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, 52 u32 len); 53 static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, 54 u32 len); 55 static int setup_base_ctxt(struct hfi1_filedata *fd, 56 struct hfi1_ctxtdata *uctxt); 57 static int setup_subctxt(struct hfi1_ctxtdata *uctxt); 58 59 static int find_sub_ctxt(struct hfi1_filedata *fd, 60 const struct hfi1_user_info *uinfo); 61 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 62 struct hfi1_user_info *uinfo, 63 struct hfi1_ctxtdata **cd); 64 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); 65 static __poll_t poll_urgent(struct file *fp, struct poll_table_struct *pt); 66 static __poll_t poll_next(struct file *fp, struct poll_table_struct *pt); 67 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 68 unsigned long arg); 69 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); 70 static int ctxt_reset(struct hfi1_ctxtdata *uctxt); 71 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 72 unsigned long arg); 73 static vm_fault_t vma_fault(struct vm_fault *vmf); 74 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 75 unsigned long arg); 76 77 static const struct file_operations hfi1_file_ops = { 78 .owner = THIS_MODULE, 79 .write_iter = hfi1_write_iter, 80 .open = hfi1_file_open, 81 .release = hfi1_file_close, 82 .unlocked_ioctl = hfi1_file_ioctl, 83 .poll = hfi1_poll, 84 .mmap = hfi1_file_mmap, 85 .llseek = noop_llseek, 86 }; 87 88 static const struct vm_operations_struct vm_ops = { 89 .fault = vma_fault, 90 }; 91 92 /* 93 * Types of memories mapped into user processes' space 94 */ 95 enum mmap_types { 96 PIO_BUFS = 1, 97 PIO_BUFS_SOP, 98 PIO_CRED, 99 RCV_HDRQ, 100 RCV_EGRBUF, 101 UREGS, 102 EVENTS, 103 STATUS, 104 RTAIL, 105 SUBCTXT_UREGS, 106 SUBCTXT_RCV_HDRQ, 107 SUBCTXT_EGRBUF, 108 SDMA_COMP 109 }; 110 111 /* 112 * Masks and offsets defining the mmap tokens 113 */ 114 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 115 #define HFI1_MMAP_OFFSET_SHIFT 0 116 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 117 #define HFI1_MMAP_SUBCTXT_SHIFT 12 118 #define HFI1_MMAP_CTXT_MASK 0xffULL 119 #define HFI1_MMAP_CTXT_SHIFT 16 120 #define HFI1_MMAP_TYPE_MASK 0xfULL 121 #define HFI1_MMAP_TYPE_SHIFT 24 122 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 123 #define HFI1_MMAP_MAGIC_SHIFT 32 124 125 #define HFI1_MMAP_MAGIC 0xdabbad00 126 127 #define HFI1_MMAP_TOKEN_SET(field, val) \ 128 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 129 #define HFI1_MMAP_TOKEN_GET(field, token) \ 130 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 131 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 132 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 133 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 134 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 135 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 136 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 137 138 #define dbg(fmt, ...) \ 139 pr_info(fmt, ##__VA_ARGS__) 140 141 static inline int is_valid_mmap(u64 token) 142 { 143 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 144 } 145 146 static int hfi1_file_open(struct inode *inode, struct file *fp) 147 { 148 struct hfi1_filedata *fd; 149 struct hfi1_devdata *dd = container_of(inode->i_cdev, 150 struct hfi1_devdata, 151 user_cdev); 152 153 if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) 154 return -EINVAL; 155 156 if (!refcount_inc_not_zero(&dd->user_refcount)) 157 return -ENXIO; 158 159 /* The real work is performed later in assign_ctxt() */ 160 161 fd = kzalloc(sizeof(*fd), GFP_KERNEL); 162 163 if (!fd || init_srcu_struct(&fd->pq_srcu)) 164 goto nomem; 165 spin_lock_init(&fd->pq_rcu_lock); 166 spin_lock_init(&fd->tid_lock); 167 spin_lock_init(&fd->invalid_lock); 168 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 169 fd->dd = dd; 170 fp->private_data = fd; 171 return 0; 172 nomem: 173 kfree(fd); 174 fp->private_data = NULL; 175 if (refcount_dec_and_test(&dd->user_refcount)) 176 complete(&dd->user_comp); 177 return -ENOMEM; 178 } 179 180 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 181 unsigned long arg) 182 { 183 struct hfi1_filedata *fd = fp->private_data; 184 struct hfi1_ctxtdata *uctxt = fd->uctxt; 185 int ret = 0; 186 int uval = 0; 187 188 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 189 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 190 cmd != HFI1_IOCTL_GET_VERS && 191 !uctxt) 192 return -EINVAL; 193 194 switch (cmd) { 195 case HFI1_IOCTL_ASSIGN_CTXT: 196 ret = assign_ctxt(fd, arg, _IOC_SIZE(cmd)); 197 break; 198 199 case HFI1_IOCTL_CTXT_INFO: 200 ret = get_ctxt_info(fd, arg, _IOC_SIZE(cmd)); 201 break; 202 203 case HFI1_IOCTL_USER_INFO: 204 ret = get_base_info(fd, arg, _IOC_SIZE(cmd)); 205 break; 206 207 case HFI1_IOCTL_CREDIT_UPD: 208 if (uctxt) 209 sc_return_credits(uctxt->sc); 210 break; 211 212 case HFI1_IOCTL_TID_UPDATE: 213 ret = user_exp_rcv_setup(fd, arg, _IOC_SIZE(cmd)); 214 break; 215 216 case HFI1_IOCTL_TID_FREE: 217 ret = user_exp_rcv_clear(fd, arg, _IOC_SIZE(cmd)); 218 break; 219 220 case HFI1_IOCTL_TID_INVAL_READ: 221 ret = user_exp_rcv_invalid(fd, arg, _IOC_SIZE(cmd)); 222 break; 223 224 case HFI1_IOCTL_RECV_CTRL: 225 ret = manage_rcvq(uctxt, fd->subctxt, arg); 226 break; 227 228 case HFI1_IOCTL_POLL_TYPE: 229 if (get_user(uval, (int __user *)arg)) 230 return -EFAULT; 231 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 232 break; 233 234 case HFI1_IOCTL_ACK_EVENT: 235 ret = user_event_ack(uctxt, fd->subctxt, arg); 236 break; 237 238 case HFI1_IOCTL_SET_PKEY: 239 ret = set_ctxt_pkey(uctxt, arg); 240 break; 241 242 case HFI1_IOCTL_CTXT_RESET: 243 ret = ctxt_reset(uctxt); 244 break; 245 246 case HFI1_IOCTL_GET_VERS: 247 uval = HFI1_USER_SWVERSION; 248 if (put_user(uval, (int __user *)arg)) 249 return -EFAULT; 250 break; 251 252 default: 253 return -EINVAL; 254 } 255 256 return ret; 257 } 258 259 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 260 { 261 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 262 struct hfi1_user_sdma_pkt_q *pq; 263 struct hfi1_user_sdma_comp_q *cq = fd->cq; 264 int done = 0, reqs = 0; 265 unsigned long dim = from->nr_segs; 266 int idx; 267 268 if (!HFI1_CAP_IS_KSET(SDMA)) 269 return -EINVAL; 270 idx = srcu_read_lock(&fd->pq_srcu); 271 pq = srcu_dereference(fd->pq, &fd->pq_srcu); 272 if (!cq || !pq) { 273 srcu_read_unlock(&fd->pq_srcu, idx); 274 return -EIO; 275 } 276 277 if (!iter_is_iovec(from) || !dim) { 278 srcu_read_unlock(&fd->pq_srcu, idx); 279 return -EINVAL; 280 } 281 282 trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); 283 284 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { 285 srcu_read_unlock(&fd->pq_srcu, idx); 286 return -ENOSPC; 287 } 288 289 while (dim) { 290 int ret; 291 unsigned long count = 0; 292 293 ret = hfi1_user_sdma_process_request( 294 fd, (struct iovec *)(from->iov + done), 295 dim, &count); 296 if (ret) { 297 reqs = ret; 298 break; 299 } 300 dim -= count; 301 done += count; 302 reqs++; 303 } 304 305 srcu_read_unlock(&fd->pq_srcu, idx); 306 return reqs; 307 } 308 309 static inline void mmap_cdbg(u16 ctxt, u8 subctxt, u8 type, u8 mapio, u8 vmf, 310 u64 memaddr, void *memvirt, dma_addr_t memdma, 311 ssize_t memlen, struct vm_area_struct *vma) 312 { 313 hfi1_cdbg(PROC, 314 "%u:%u type:%u io/vf/dma:%d/%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx", 315 ctxt, subctxt, type, mapio, vmf, !!memdma, 316 memaddr ?: (u64)memvirt, memlen, 317 vma->vm_end - vma->vm_start, vma->vm_flags); 318 } 319 320 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 321 { 322 struct hfi1_filedata *fd = fp->private_data; 323 struct hfi1_ctxtdata *uctxt = fd->uctxt; 324 struct hfi1_devdata *dd; 325 unsigned long flags; 326 u64 token = vma->vm_pgoff << PAGE_SHIFT, 327 memaddr = 0; 328 void *memvirt = NULL; 329 dma_addr_t memdma = 0; 330 u8 subctxt, mapio = 0, vmf = 0, type; 331 ssize_t memlen = 0; 332 int ret = 0; 333 u16 ctxt; 334 335 if (!is_valid_mmap(token) || !uctxt || 336 !(vma->vm_flags & VM_SHARED)) { 337 ret = -EINVAL; 338 goto done; 339 } 340 dd = uctxt->dd; 341 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 342 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 343 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 344 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 345 ret = -EINVAL; 346 goto done; 347 } 348 349 /* 350 * vm_pgoff is used as a buffer selector cookie. Always mmap from 351 * the beginning. 352 */ 353 vma->vm_pgoff = 0; 354 flags = vma->vm_flags; 355 356 switch (type) { 357 case PIO_BUFS: 358 case PIO_BUFS_SOP: 359 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 360 /* chip pio base */ 361 (uctxt->sc->hw_context * BIT(16))) + 362 /* 64K PIO space / ctxt */ 363 (type == PIO_BUFS_SOP ? 364 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 365 /* 366 * Map only the amount allocated to the context, not the 367 * entire available context's PIO space. 368 */ 369 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 370 flags &= ~VM_MAYREAD; 371 flags |= VM_DONTCOPY | VM_DONTEXPAND; 372 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 373 mapio = 1; 374 break; 375 case PIO_CRED: { 376 u64 cr_page_offset; 377 if (flags & VM_WRITE) { 378 ret = -EPERM; 379 goto done; 380 } 381 /* 382 * The credit return location for this context could be on the 383 * second or third page allocated for credit returns (if number 384 * of enabled contexts > 64 and 128 respectively). 385 */ 386 cr_page_offset = ((u64)uctxt->sc->hw_free - 387 (u64)dd->cr_base[uctxt->numa_id].va) & 388 PAGE_MASK; 389 memvirt = dd->cr_base[uctxt->numa_id].va + cr_page_offset; 390 memdma = dd->cr_base[uctxt->numa_id].dma + cr_page_offset; 391 memlen = PAGE_SIZE; 392 flags &= ~VM_MAYWRITE; 393 flags |= VM_DONTCOPY | VM_DONTEXPAND; 394 /* 395 * The driver has already allocated memory for credit 396 * returns and programmed it into the chip. Has that 397 * memory been flagged as non-cached? 398 */ 399 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 400 break; 401 } 402 case RCV_HDRQ: 403 memlen = rcvhdrq_size(uctxt); 404 memvirt = uctxt->rcvhdrq; 405 memdma = uctxt->rcvhdrq_dma; 406 break; 407 case RCV_EGRBUF: { 408 unsigned long vm_start_save; 409 unsigned long vm_end_save; 410 int i; 411 /* 412 * The RcvEgr buffer need to be handled differently 413 * as multiple non-contiguous pages need to be mapped 414 * into the user process. 415 */ 416 memlen = uctxt->egrbufs.size; 417 if ((vma->vm_end - vma->vm_start) != memlen) { 418 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 419 (vma->vm_end - vma->vm_start), memlen); 420 ret = -EINVAL; 421 goto done; 422 } 423 if (vma->vm_flags & VM_WRITE) { 424 ret = -EPERM; 425 goto done; 426 } 427 vm_flags_clear(vma, VM_MAYWRITE); 428 /* 429 * Mmap multiple separate allocations into a single vma. From 430 * here, dma_mmap_coherent() calls dma_direct_mmap(), which 431 * requires the mmap to exactly fill the vma starting at 432 * vma_start. Adjust the vma start and end for each eager 433 * buffer segment mapped. Restore the originals when done. 434 */ 435 vm_start_save = vma->vm_start; 436 vm_end_save = vma->vm_end; 437 vma->vm_end = vma->vm_start; 438 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 439 memlen = uctxt->egrbufs.buffers[i].len; 440 memvirt = uctxt->egrbufs.buffers[i].addr; 441 memdma = uctxt->egrbufs.buffers[i].dma; 442 vma->vm_end += memlen; 443 mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr, 444 memvirt, memdma, memlen, vma); 445 ret = dma_mmap_coherent(&dd->pcidev->dev, vma, 446 memvirt, memdma, memlen); 447 if (ret < 0) { 448 vma->vm_start = vm_start_save; 449 vma->vm_end = vm_end_save; 450 goto done; 451 } 452 vma->vm_start += memlen; 453 } 454 vma->vm_start = vm_start_save; 455 vma->vm_end = vm_end_save; 456 ret = 0; 457 goto done; 458 } 459 case UREGS: 460 /* 461 * Map only the page that contains this context's user 462 * registers. 463 */ 464 memaddr = (unsigned long) 465 (dd->physaddr + RXE_PER_CONTEXT_USER) 466 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 467 /* 468 * TidFlow table is on the same page as the rest of the 469 * user registers. 470 */ 471 memlen = PAGE_SIZE; 472 flags |= VM_DONTCOPY | VM_DONTEXPAND; 473 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 474 mapio = 1; 475 break; 476 case EVENTS: 477 /* 478 * Use the page where this context's flags are. User level 479 * knows where it's own bitmap is within the page. 480 */ 481 memaddr = (unsigned long) 482 (dd->events + uctxt_offset(uctxt)) & PAGE_MASK; 483 memlen = PAGE_SIZE; 484 /* 485 * v3.7 removes VM_RESERVED but the effect is kept by 486 * using VM_IO. 487 */ 488 flags |= VM_IO | VM_DONTEXPAND; 489 vmf = 1; 490 break; 491 case STATUS: 492 if (flags & VM_WRITE) { 493 ret = -EPERM; 494 goto done; 495 } 496 memaddr = kvirt_to_phys((void *)dd->status); 497 memlen = PAGE_SIZE; 498 flags |= VM_IO | VM_DONTEXPAND; 499 break; 500 case RTAIL: 501 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 502 /* 503 * If the memory allocation failed, the context alloc 504 * also would have failed, so we would never get here 505 */ 506 ret = -EINVAL; 507 goto done; 508 } 509 if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) { 510 ret = -EPERM; 511 goto done; 512 } 513 memlen = PAGE_SIZE; 514 memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt); 515 memdma = uctxt->rcvhdrqtailaddr_dma; 516 flags &= ~VM_MAYWRITE; 517 break; 518 case SUBCTXT_UREGS: 519 memaddr = (u64)uctxt->subctxt_uregbase; 520 memlen = PAGE_SIZE; 521 flags |= VM_IO | VM_DONTEXPAND; 522 vmf = 1; 523 break; 524 case SUBCTXT_RCV_HDRQ: 525 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 526 memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt; 527 flags |= VM_IO | VM_DONTEXPAND; 528 vmf = 1; 529 break; 530 case SUBCTXT_EGRBUF: 531 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 532 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 533 flags |= VM_IO | VM_DONTEXPAND; 534 flags &= ~VM_MAYWRITE; 535 vmf = 1; 536 break; 537 case SDMA_COMP: { 538 struct hfi1_user_sdma_comp_q *cq = fd->cq; 539 540 if (!cq) { 541 ret = -EFAULT; 542 goto done; 543 } 544 memaddr = (u64)cq->comps; 545 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 546 flags |= VM_IO | VM_DONTEXPAND; 547 vmf = 1; 548 break; 549 } 550 default: 551 ret = -EINVAL; 552 break; 553 } 554 555 if ((vma->vm_end - vma->vm_start) != memlen) { 556 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 557 uctxt->ctxt, fd->subctxt, 558 (vma->vm_end - vma->vm_start), memlen); 559 ret = -EINVAL; 560 goto done; 561 } 562 563 vm_flags_reset(vma, flags); 564 mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr, memvirt, memdma, 565 memlen, vma); 566 if (vmf) { 567 vma->vm_pgoff = PFN_DOWN(memaddr); 568 vma->vm_ops = &vm_ops; 569 ret = 0; 570 } else if (memdma) { 571 ret = dma_mmap_coherent(&dd->pcidev->dev, vma, 572 memvirt, memdma, memlen); 573 } else if (mapio) { 574 ret = io_remap_pfn_range(vma, vma->vm_start, 575 PFN_DOWN(memaddr), 576 memlen, 577 vma->vm_page_prot); 578 } else if (memvirt) { 579 ret = remap_pfn_range(vma, vma->vm_start, 580 PFN_DOWN(__pa(memvirt)), 581 memlen, 582 vma->vm_page_prot); 583 } else { 584 ret = remap_pfn_range(vma, vma->vm_start, 585 PFN_DOWN(memaddr), 586 memlen, 587 vma->vm_page_prot); 588 } 589 done: 590 return ret; 591 } 592 593 /* 594 * Local (non-chip) user memory is not mapped right away but as it is 595 * accessed by the user-level code. 596 */ 597 static vm_fault_t vma_fault(struct vm_fault *vmf) 598 { 599 struct page *page; 600 601 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 602 if (!page) 603 return VM_FAULT_SIGBUS; 604 605 get_page(page); 606 vmf->page = page; 607 608 return 0; 609 } 610 611 static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt) 612 { 613 struct hfi1_ctxtdata *uctxt; 614 __poll_t pollflag; 615 616 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 617 if (!uctxt) 618 pollflag = EPOLLERR; 619 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 620 pollflag = poll_urgent(fp, pt); 621 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 622 pollflag = poll_next(fp, pt); 623 else /* invalid */ 624 pollflag = EPOLLERR; 625 626 return pollflag; 627 } 628 629 static int hfi1_file_close(struct inode *inode, struct file *fp) 630 { 631 struct hfi1_filedata *fdata = fp->private_data; 632 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 633 struct hfi1_devdata *dd = container_of(inode->i_cdev, 634 struct hfi1_devdata, 635 user_cdev); 636 unsigned long flags, *ev; 637 638 fp->private_data = NULL; 639 640 if (!uctxt) 641 goto done; 642 643 hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 644 645 flush_wc(); 646 /* drain user sdma queue */ 647 hfi1_user_sdma_free_queues(fdata, uctxt); 648 649 /* release the cpu */ 650 hfi1_put_proc_affinity(fdata->rec_cpu_num); 651 652 /* clean up rcv side */ 653 hfi1_user_exp_rcv_free(fdata); 654 655 /* 656 * fdata->uctxt is used in the above cleanup. It is not ready to be 657 * removed until here. 658 */ 659 fdata->uctxt = NULL; 660 hfi1_rcd_put(uctxt); 661 662 /* 663 * Clear any left over, unhandled events so the next process that 664 * gets this context doesn't get confused. 665 */ 666 ev = dd->events + uctxt_offset(uctxt) + fdata->subctxt; 667 *ev = 0; 668 669 spin_lock_irqsave(&dd->uctxt_lock, flags); 670 __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); 671 if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 672 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 673 goto done; 674 } 675 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 676 677 /* 678 * Disable receive context and interrupt available, reset all 679 * RcvCtxtCtrl bits to default values. 680 */ 681 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 682 HFI1_RCVCTRL_TIDFLOW_DIS | 683 HFI1_RCVCTRL_INTRAVAIL_DIS | 684 HFI1_RCVCTRL_TAILUPD_DIS | 685 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 686 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 687 HFI1_RCVCTRL_NO_EGR_DROP_DIS | 688 HFI1_RCVCTRL_URGENT_DIS, uctxt); 689 /* Clear the context's J_KEY */ 690 hfi1_clear_ctxt_jkey(dd, uctxt); 691 /* 692 * If a send context is allocated, reset context integrity 693 * checks to default and disable the send context. 694 */ 695 if (uctxt->sc) { 696 sc_disable(uctxt->sc); 697 set_pio_integrity(uctxt->sc); 698 } 699 700 hfi1_free_ctxt_rcv_groups(uctxt); 701 hfi1_clear_ctxt_pkey(dd, uctxt); 702 703 uctxt->event_flags = 0; 704 705 deallocate_ctxt(uctxt); 706 done: 707 708 if (refcount_dec_and_test(&dd->user_refcount)) 709 complete(&dd->user_comp); 710 711 cleanup_srcu_struct(&fdata->pq_srcu); 712 kfree(fdata); 713 return 0; 714 } 715 716 /* 717 * Convert kernel *virtual* addresses to physical addresses. 718 * This is used to vmalloc'ed addresses. 719 */ 720 static u64 kvirt_to_phys(void *addr) 721 { 722 struct page *page; 723 u64 paddr = 0; 724 725 page = vmalloc_to_page(addr); 726 if (page) 727 paddr = page_to_pfn(page) << PAGE_SHIFT; 728 729 return paddr; 730 } 731 732 /** 733 * complete_subctxt - complete sub-context info 734 * @fd: valid filedata pointer 735 * 736 * Sub-context info can only be set up after the base context 737 * has been completed. This is indicated by the clearing of the 738 * HFI1_CTXT_BASE_UINIT bit. 739 * 740 * Wait for the bit to be cleared, and then complete the subcontext 741 * initialization. 742 * 743 */ 744 static int complete_subctxt(struct hfi1_filedata *fd) 745 { 746 int ret; 747 unsigned long flags; 748 749 /* 750 * sub-context info can only be set up after the base context 751 * has been completed. 752 */ 753 ret = wait_event_interruptible( 754 fd->uctxt->wait, 755 !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); 756 757 if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) 758 ret = -ENOMEM; 759 760 /* Finish the sub-context init */ 761 if (!ret) { 762 fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); 763 ret = init_user_ctxt(fd, fd->uctxt); 764 } 765 766 if (ret) { 767 spin_lock_irqsave(&fd->dd->uctxt_lock, flags); 768 __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); 769 spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); 770 hfi1_rcd_put(fd->uctxt); 771 fd->uctxt = NULL; 772 } 773 774 return ret; 775 } 776 777 static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) 778 { 779 int ret; 780 unsigned int swmajor; 781 struct hfi1_ctxtdata *uctxt = NULL; 782 struct hfi1_user_info uinfo; 783 784 if (fd->uctxt) 785 return -EINVAL; 786 787 if (sizeof(uinfo) != len) 788 return -EINVAL; 789 790 if (copy_from_user(&uinfo, (void __user *)arg, sizeof(uinfo))) 791 return -EFAULT; 792 793 swmajor = uinfo.userversion >> 16; 794 if (swmajor != HFI1_USER_SWMAJOR) 795 return -ENODEV; 796 797 if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS) 798 return -EINVAL; 799 800 /* 801 * Acquire the mutex to protect against multiple creations of what 802 * could be a shared base context. 803 */ 804 mutex_lock(&hfi1_mutex); 805 /* 806 * Get a sub context if available (fd->uctxt will be set). 807 * ret < 0 error, 0 no context, 1 sub-context found 808 */ 809 ret = find_sub_ctxt(fd, &uinfo); 810 811 /* 812 * Allocate a base context if context sharing is not required or a 813 * sub context wasn't found. 814 */ 815 if (!ret) 816 ret = allocate_ctxt(fd, fd->dd, &uinfo, &uctxt); 817 818 mutex_unlock(&hfi1_mutex); 819 820 /* Depending on the context type, finish the appropriate init */ 821 switch (ret) { 822 case 0: 823 ret = setup_base_ctxt(fd, uctxt); 824 if (ret) 825 deallocate_ctxt(uctxt); 826 break; 827 case 1: 828 ret = complete_subctxt(fd); 829 break; 830 default: 831 break; 832 } 833 834 return ret; 835 } 836 837 /** 838 * match_ctxt - match context 839 * @fd: valid filedata pointer 840 * @uinfo: user info to compare base context with 841 * @uctxt: context to compare uinfo to. 842 * 843 * Compare the given context with the given information to see if it 844 * can be used for a sub context. 845 */ 846 static int match_ctxt(struct hfi1_filedata *fd, 847 const struct hfi1_user_info *uinfo, 848 struct hfi1_ctxtdata *uctxt) 849 { 850 struct hfi1_devdata *dd = fd->dd; 851 unsigned long flags; 852 u16 subctxt; 853 854 /* Skip dynamically allocated kernel contexts */ 855 if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) 856 return 0; 857 858 /* Skip ctxt if it doesn't match the requested one */ 859 if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || 860 uctxt->jkey != generate_jkey(current_uid()) || 861 uctxt->subctxt_id != uinfo->subctxt_id || 862 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 863 return 0; 864 865 /* Verify the sharing process matches the base */ 866 if (uctxt->userversion != uinfo->userversion) 867 return -EINVAL; 868 869 /* Find an unused sub context */ 870 spin_lock_irqsave(&dd->uctxt_lock, flags); 871 if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 872 /* context is being closed, do not use */ 873 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 874 return 0; 875 } 876 877 subctxt = find_first_zero_bit(uctxt->in_use_ctxts, 878 HFI1_MAX_SHARED_CTXTS); 879 if (subctxt >= uctxt->subctxt_cnt) { 880 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 881 return -EBUSY; 882 } 883 884 fd->subctxt = subctxt; 885 __set_bit(fd->subctxt, uctxt->in_use_ctxts); 886 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 887 888 fd->uctxt = uctxt; 889 hfi1_rcd_get(uctxt); 890 891 return 1; 892 } 893 894 /** 895 * find_sub_ctxt - fund sub-context 896 * @fd: valid filedata pointer 897 * @uinfo: matching info to use to find a possible context to share. 898 * 899 * The hfi1_mutex must be held when this function is called. It is 900 * necessary to ensure serialized creation of shared contexts. 901 * 902 * Return: 903 * 0 No sub-context found 904 * 1 Subcontext found and allocated 905 * errno EINVAL (incorrect parameters) 906 * EBUSY (all sub contexts in use) 907 */ 908 static int find_sub_ctxt(struct hfi1_filedata *fd, 909 const struct hfi1_user_info *uinfo) 910 { 911 struct hfi1_ctxtdata *uctxt; 912 struct hfi1_devdata *dd = fd->dd; 913 u16 i; 914 int ret; 915 916 if (!uinfo->subctxt_cnt) 917 return 0; 918 919 for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { 920 uctxt = hfi1_rcd_get_by_index(dd, i); 921 if (uctxt) { 922 ret = match_ctxt(fd, uinfo, uctxt); 923 hfi1_rcd_put(uctxt); 924 /* value of != 0 will return */ 925 if (ret) 926 return ret; 927 } 928 } 929 930 return 0; 931 } 932 933 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 934 struct hfi1_user_info *uinfo, 935 struct hfi1_ctxtdata **rcd) 936 { 937 struct hfi1_ctxtdata *uctxt; 938 int ret, numa; 939 940 if (dd->flags & HFI1_FROZEN) { 941 /* 942 * Pick an error that is unique from all other errors 943 * that are returned so the user process knows that 944 * it tried to allocate while the SPC was frozen. It 945 * it should be able to retry with success in a short 946 * while. 947 */ 948 return -EIO; 949 } 950 951 if (!dd->freectxts) 952 return -EBUSY; 953 954 /* 955 * If we don't have a NUMA node requested, preference is towards 956 * device NUMA node. 957 */ 958 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 959 if (fd->rec_cpu_num != -1) 960 numa = cpu_to_node(fd->rec_cpu_num); 961 else 962 numa = numa_node_id(); 963 ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt); 964 if (ret < 0) { 965 dd_dev_err(dd, "user ctxtdata allocation failed\n"); 966 return ret; 967 } 968 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 969 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 970 uctxt->numa_id); 971 972 /* 973 * Allocate and enable a PIO send context. 974 */ 975 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node); 976 if (!uctxt->sc) { 977 ret = -ENOMEM; 978 goto ctxdata_free; 979 } 980 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 981 uctxt->sc->hw_context); 982 ret = sc_enable(uctxt->sc); 983 if (ret) 984 goto ctxdata_free; 985 986 /* 987 * Setup sub context information if the user-level has requested 988 * sub contexts. 989 * This has to be done here so the rest of the sub-contexts find the 990 * proper base context. 991 * NOTE: _set_bit() can be used here because the context creation is 992 * protected by the mutex (rather than the spin_lock), and will be the 993 * very first instance of this context. 994 */ 995 __set_bit(0, uctxt->in_use_ctxts); 996 if (uinfo->subctxt_cnt) 997 init_subctxts(uctxt, uinfo); 998 uctxt->userversion = uinfo->userversion; 999 uctxt->flags = hfi1_cap_mask; /* save current flag state */ 1000 init_waitqueue_head(&uctxt->wait); 1001 strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 1002 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 1003 uctxt->jkey = generate_jkey(current_uid()); 1004 hfi1_stats.sps_ctxts++; 1005 /* 1006 * Disable ASPM when there are open user/PSM contexts to avoid 1007 * issues with ASPM L1 exit latency 1008 */ 1009 if (dd->freectxts-- == dd->num_user_contexts) 1010 aspm_disable_all(dd); 1011 1012 *rcd = uctxt; 1013 1014 return 0; 1015 1016 ctxdata_free: 1017 hfi1_free_ctxt(uctxt); 1018 return ret; 1019 } 1020 1021 static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) 1022 { 1023 mutex_lock(&hfi1_mutex); 1024 hfi1_stats.sps_ctxts--; 1025 if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) 1026 aspm_enable_all(uctxt->dd); 1027 mutex_unlock(&hfi1_mutex); 1028 1029 hfi1_free_ctxt(uctxt); 1030 } 1031 1032 static void init_subctxts(struct hfi1_ctxtdata *uctxt, 1033 const struct hfi1_user_info *uinfo) 1034 { 1035 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1036 uctxt->subctxt_id = uinfo->subctxt_id; 1037 set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1038 } 1039 1040 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1041 { 1042 int ret = 0; 1043 u16 num_subctxts = uctxt->subctxt_cnt; 1044 1045 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1046 if (!uctxt->subctxt_uregbase) 1047 return -ENOMEM; 1048 1049 /* We can take the size of the RcvHdr Queue from the master */ 1050 uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) * 1051 num_subctxts); 1052 if (!uctxt->subctxt_rcvhdr_base) { 1053 ret = -ENOMEM; 1054 goto bail_ureg; 1055 } 1056 1057 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1058 num_subctxts); 1059 if (!uctxt->subctxt_rcvegrbuf) { 1060 ret = -ENOMEM; 1061 goto bail_rhdr; 1062 } 1063 1064 return 0; 1065 1066 bail_rhdr: 1067 vfree(uctxt->subctxt_rcvhdr_base); 1068 uctxt->subctxt_rcvhdr_base = NULL; 1069 bail_ureg: 1070 vfree(uctxt->subctxt_uregbase); 1071 uctxt->subctxt_uregbase = NULL; 1072 1073 return ret; 1074 } 1075 1076 static void user_init(struct hfi1_ctxtdata *uctxt) 1077 { 1078 unsigned int rcvctrl_ops = 0; 1079 1080 /* initialize poll variables... */ 1081 uctxt->urgent = 0; 1082 uctxt->urgent_poll = 0; 1083 1084 /* 1085 * Now enable the ctxt for receive. 1086 * For chips that are set to DMA the tail register to memory 1087 * when they change (and when the update bit transitions from 1088 * 0 to 1. So for those chips, we turn it off and then back on. 1089 * This will (very briefly) affect any other open ctxts, but the 1090 * duration is very short, and therefore isn't an issue. We 1091 * explicitly set the in-memory tail copy to 0 beforehand, so we 1092 * don't have to wait to be sure the DMA update has happened 1093 * (chip resets head/tail to 0 on transition to enable). 1094 */ 1095 if (hfi1_rcvhdrtail_kvaddr(uctxt)) 1096 clear_rcvhdrtail(uctxt); 1097 1098 /* Setup J_KEY before enabling the context */ 1099 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); 1100 1101 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1102 rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB; 1103 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1104 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1105 /* 1106 * Ignore the bit in the flags for now until proper 1107 * support for multiple packet per rcv array entry is 1108 * added. 1109 */ 1110 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1111 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1112 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1113 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1114 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1115 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1116 /* 1117 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1118 * We can't rely on the correct value to be set from prior 1119 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1120 * for both cases. 1121 */ 1122 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1123 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1124 else 1125 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1126 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); 1127 } 1128 1129 static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) 1130 { 1131 struct hfi1_ctxt_info cinfo; 1132 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1133 1134 if (sizeof(cinfo) != len) 1135 return -EINVAL; 1136 1137 memset(&cinfo, 0, sizeof(cinfo)); 1138 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1139 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1140 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1141 HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1142 /* adjust flag if this fd is not able to cache */ 1143 if (!fd->use_mn) 1144 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1145 1146 cinfo.num_active = hfi1_count_active_units(); 1147 cinfo.unit = uctxt->dd->unit; 1148 cinfo.ctxt = uctxt->ctxt; 1149 cinfo.subctxt = fd->subctxt; 1150 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1151 uctxt->dd->rcv_entries.group_size) + 1152 uctxt->expected_count; 1153 cinfo.credits = uctxt->sc->credits; 1154 cinfo.numa_node = uctxt->numa_id; 1155 cinfo.rec_cpu = fd->rec_cpu_num; 1156 cinfo.send_ctxt = uctxt->sc->hw_context; 1157 1158 cinfo.egrtids = uctxt->egrbufs.alloced; 1159 cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt); 1160 cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2; 1161 cinfo.sdma_ring_size = fd->cq->nentries; 1162 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1163 1164 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo); 1165 if (copy_to_user((void __user *)arg, &cinfo, len)) 1166 return -EFAULT; 1167 1168 return 0; 1169 } 1170 1171 static int init_user_ctxt(struct hfi1_filedata *fd, 1172 struct hfi1_ctxtdata *uctxt) 1173 { 1174 int ret; 1175 1176 ret = hfi1_user_sdma_alloc_queues(uctxt, fd); 1177 if (ret) 1178 return ret; 1179 1180 ret = hfi1_user_exp_rcv_init(fd, uctxt); 1181 if (ret) 1182 hfi1_user_sdma_free_queues(fd, uctxt); 1183 1184 return ret; 1185 } 1186 1187 static int setup_base_ctxt(struct hfi1_filedata *fd, 1188 struct hfi1_ctxtdata *uctxt) 1189 { 1190 struct hfi1_devdata *dd = uctxt->dd; 1191 int ret = 0; 1192 1193 hfi1_init_ctxt(uctxt->sc); 1194 1195 /* Now allocate the RcvHdr queue and eager buffers. */ 1196 ret = hfi1_create_rcvhdrq(dd, uctxt); 1197 if (ret) 1198 goto done; 1199 1200 ret = hfi1_setup_eagerbufs(uctxt); 1201 if (ret) 1202 goto done; 1203 1204 /* If sub-contexts are enabled, do the appropriate setup */ 1205 if (uctxt->subctxt_cnt) 1206 ret = setup_subctxt(uctxt); 1207 if (ret) 1208 goto done; 1209 1210 ret = hfi1_alloc_ctxt_rcv_groups(uctxt); 1211 if (ret) 1212 goto done; 1213 1214 ret = init_user_ctxt(fd, uctxt); 1215 if (ret) { 1216 hfi1_free_ctxt_rcv_groups(uctxt); 1217 goto done; 1218 } 1219 1220 user_init(uctxt); 1221 1222 /* Now that the context is set up, the fd can get a reference. */ 1223 fd->uctxt = uctxt; 1224 hfi1_rcd_get(uctxt); 1225 1226 done: 1227 if (uctxt->subctxt_cnt) { 1228 /* 1229 * On error, set the failed bit so sub-contexts will clean up 1230 * correctly. 1231 */ 1232 if (ret) 1233 set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1234 1235 /* 1236 * Base context is done (successfully or not), notify anybody 1237 * using a sub-context that is waiting for this completion. 1238 */ 1239 clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1240 wake_up(&uctxt->wait); 1241 } 1242 1243 return ret; 1244 } 1245 1246 static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) 1247 { 1248 struct hfi1_base_info binfo; 1249 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1250 struct hfi1_devdata *dd = uctxt->dd; 1251 unsigned offset; 1252 1253 trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); 1254 1255 if (sizeof(binfo) != len) 1256 return -EINVAL; 1257 1258 memset(&binfo, 0, sizeof(binfo)); 1259 binfo.hw_version = dd->revision; 1260 binfo.sw_version = HFI1_USER_SWVERSION; 1261 binfo.bthqp = RVT_KDETH_QP_PREFIX; 1262 binfo.jkey = uctxt->jkey; 1263 /* 1264 * If more than 64 contexts are enabled the allocated credit 1265 * return will span two or three contiguous pages. Since we only 1266 * map the page containing the context's credit return address, 1267 * we need to calculate the offset in the proper page. 1268 */ 1269 offset = ((u64)uctxt->sc->hw_free - 1270 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1271 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1272 fd->subctxt, offset); 1273 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1274 fd->subctxt, 1275 uctxt->sc->base_addr); 1276 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1277 uctxt->ctxt, 1278 fd->subctxt, 1279 uctxt->sc->base_addr); 1280 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1281 fd->subctxt, 1282 uctxt->rcvhdrq); 1283 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1284 fd->subctxt, 1285 uctxt->egrbufs.rcvtids[0].dma); 1286 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1287 fd->subctxt, 0); 1288 /* 1289 * user regs are at 1290 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1291 */ 1292 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1293 fd->subctxt, 0); 1294 offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) * 1295 sizeof(*dd->events)); 1296 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1297 fd->subctxt, 1298 offset); 1299 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1300 fd->subctxt, 1301 dd->status); 1302 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1303 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1304 fd->subctxt, 0); 1305 if (uctxt->subctxt_cnt) { 1306 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1307 uctxt->ctxt, 1308 fd->subctxt, 0); 1309 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1310 uctxt->ctxt, 1311 fd->subctxt, 0); 1312 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1313 uctxt->ctxt, 1314 fd->subctxt, 0); 1315 } 1316 1317 if (copy_to_user((void __user *)arg, &binfo, len)) 1318 return -EFAULT; 1319 1320 return 0; 1321 } 1322 1323 /** 1324 * user_exp_rcv_setup - Set up the given tid rcv list 1325 * @fd: file data of the current driver instance 1326 * @arg: ioctl argumnent for user space information 1327 * @len: length of data structure associated with ioctl command 1328 * 1329 * Wrapper to validate ioctl information before doing _rcv_setup. 1330 * 1331 */ 1332 static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, 1333 u32 len) 1334 { 1335 int ret; 1336 unsigned long addr; 1337 struct hfi1_tid_info tinfo; 1338 1339 if (sizeof(tinfo) != len) 1340 return -EINVAL; 1341 1342 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1343 return -EFAULT; 1344 1345 ret = hfi1_user_exp_rcv_setup(fd, &tinfo); 1346 if (!ret) { 1347 /* 1348 * Copy the number of tidlist entries we used 1349 * and the length of the buffer we registered. 1350 */ 1351 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1352 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1353 sizeof(tinfo.tidcnt))) 1354 ret = -EFAULT; 1355 1356 addr = arg + offsetof(struct hfi1_tid_info, length); 1357 if (!ret && copy_to_user((void __user *)addr, &tinfo.length, 1358 sizeof(tinfo.length))) 1359 ret = -EFAULT; 1360 1361 if (ret) 1362 hfi1_user_exp_rcv_invalid(fd, &tinfo); 1363 } 1364 1365 return ret; 1366 } 1367 1368 /** 1369 * user_exp_rcv_clear - Clear the given tid rcv list 1370 * @fd: file data of the current driver instance 1371 * @arg: ioctl argumnent for user space information 1372 * @len: length of data structure associated with ioctl command 1373 * 1374 * The hfi1_user_exp_rcv_clear() can be called from the error path. Because 1375 * of this, we need to use this wrapper to copy the user space information 1376 * before doing the clear. 1377 */ 1378 static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, 1379 u32 len) 1380 { 1381 int ret; 1382 unsigned long addr; 1383 struct hfi1_tid_info tinfo; 1384 1385 if (sizeof(tinfo) != len) 1386 return -EINVAL; 1387 1388 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1389 return -EFAULT; 1390 1391 ret = hfi1_user_exp_rcv_clear(fd, &tinfo); 1392 if (!ret) { 1393 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1394 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1395 sizeof(tinfo.tidcnt))) 1396 return -EFAULT; 1397 } 1398 1399 return ret; 1400 } 1401 1402 /** 1403 * user_exp_rcv_invalid - Invalidate the given tid rcv list 1404 * @fd: file data of the current driver instance 1405 * @arg: ioctl argumnent for user space information 1406 * @len: length of data structure associated with ioctl command 1407 * 1408 * Wrapper to validate ioctl information before doing _rcv_invalid. 1409 * 1410 */ 1411 static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, 1412 u32 len) 1413 { 1414 int ret; 1415 unsigned long addr; 1416 struct hfi1_tid_info tinfo; 1417 1418 if (sizeof(tinfo) != len) 1419 return -EINVAL; 1420 1421 if (!fd->invalid_tids) 1422 return -EINVAL; 1423 1424 if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) 1425 return -EFAULT; 1426 1427 ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); 1428 if (ret) 1429 return ret; 1430 1431 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 1432 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 1433 sizeof(tinfo.tidcnt))) 1434 ret = -EFAULT; 1435 1436 return ret; 1437 } 1438 1439 static __poll_t poll_urgent(struct file *fp, 1440 struct poll_table_struct *pt) 1441 { 1442 struct hfi1_filedata *fd = fp->private_data; 1443 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1444 struct hfi1_devdata *dd = uctxt->dd; 1445 __poll_t pollflag; 1446 1447 poll_wait(fp, &uctxt->wait, pt); 1448 1449 spin_lock_irq(&dd->uctxt_lock); 1450 if (uctxt->urgent != uctxt->urgent_poll) { 1451 pollflag = EPOLLIN | EPOLLRDNORM; 1452 uctxt->urgent_poll = uctxt->urgent; 1453 } else { 1454 pollflag = 0; 1455 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1456 } 1457 spin_unlock_irq(&dd->uctxt_lock); 1458 1459 return pollflag; 1460 } 1461 1462 static __poll_t poll_next(struct file *fp, 1463 struct poll_table_struct *pt) 1464 { 1465 struct hfi1_filedata *fd = fp->private_data; 1466 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1467 struct hfi1_devdata *dd = uctxt->dd; 1468 __poll_t pollflag; 1469 1470 poll_wait(fp, &uctxt->wait, pt); 1471 1472 spin_lock_irq(&dd->uctxt_lock); 1473 if (hdrqempty(uctxt)) { 1474 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1475 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); 1476 pollflag = 0; 1477 } else { 1478 pollflag = EPOLLIN | EPOLLRDNORM; 1479 } 1480 spin_unlock_irq(&dd->uctxt_lock); 1481 1482 return pollflag; 1483 } 1484 1485 /* 1486 * Find all user contexts in use, and set the specified bit in their 1487 * event mask. 1488 * See also find_ctxt() for a similar use, that is specific to send buffers. 1489 */ 1490 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1491 { 1492 struct hfi1_ctxtdata *uctxt; 1493 struct hfi1_devdata *dd = ppd->dd; 1494 u16 ctxt; 1495 1496 if (!dd->events) 1497 return -EINVAL; 1498 1499 for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; 1500 ctxt++) { 1501 uctxt = hfi1_rcd_get_by_index(dd, ctxt); 1502 if (uctxt) { 1503 unsigned long *evs; 1504 int i; 1505 /* 1506 * subctxt_cnt is 0 if not shared, so do base 1507 * separately, first, then remaining subctxt, if any 1508 */ 1509 evs = dd->events + uctxt_offset(uctxt); 1510 set_bit(evtbit, evs); 1511 for (i = 1; i < uctxt->subctxt_cnt; i++) 1512 set_bit(evtbit, evs + i); 1513 hfi1_rcd_put(uctxt); 1514 } 1515 } 1516 1517 return 0; 1518 } 1519 1520 /** 1521 * manage_rcvq - manage a context's receive queue 1522 * @uctxt: the context 1523 * @subctxt: the sub-context 1524 * @arg: start/stop action to carry out 1525 * 1526 * start_stop == 0 disables receive on the context, for use in queue 1527 * overflow conditions. start_stop==1 re-enables, to be used to 1528 * re-init the software copy of the head register 1529 */ 1530 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1531 unsigned long arg) 1532 { 1533 struct hfi1_devdata *dd = uctxt->dd; 1534 unsigned int rcvctrl_op; 1535 int start_stop; 1536 1537 if (subctxt) 1538 return 0; 1539 1540 if (get_user(start_stop, (int __user *)arg)) 1541 return -EFAULT; 1542 1543 /* atomically clear receive enable ctxt. */ 1544 if (start_stop) { 1545 /* 1546 * On enable, force in-memory copy of the tail register to 1547 * 0, so that protocol code doesn't have to worry about 1548 * whether or not the chip has yet updated the in-memory 1549 * copy or not on return from the system call. The chip 1550 * always resets it's tail register back to 0 on a 1551 * transition from disabled to enabled. 1552 */ 1553 if (hfi1_rcvhdrtail_kvaddr(uctxt)) 1554 clear_rcvhdrtail(uctxt); 1555 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1556 } else { 1557 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1558 } 1559 hfi1_rcvctrl(dd, rcvctrl_op, uctxt); 1560 /* always; new head should be equal to new tail; see above */ 1561 1562 return 0; 1563 } 1564 1565 /* 1566 * clear the event notifier events for this context. 1567 * User process then performs actions appropriate to bit having been 1568 * set, if desired, and checks again in future. 1569 */ 1570 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1571 unsigned long arg) 1572 { 1573 int i; 1574 struct hfi1_devdata *dd = uctxt->dd; 1575 unsigned long *evs; 1576 unsigned long events; 1577 1578 if (!dd->events) 1579 return 0; 1580 1581 if (get_user(events, (unsigned long __user *)arg)) 1582 return -EFAULT; 1583 1584 evs = dd->events + uctxt_offset(uctxt) + subctxt; 1585 1586 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1587 if (!test_bit(i, &events)) 1588 continue; 1589 clear_bit(i, evs); 1590 } 1591 return 0; 1592 } 1593 1594 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg) 1595 { 1596 int i; 1597 struct hfi1_pportdata *ppd = uctxt->ppd; 1598 struct hfi1_devdata *dd = uctxt->dd; 1599 u16 pkey; 1600 1601 if (!HFI1_CAP_IS_USET(PKEY_CHECK)) 1602 return -EPERM; 1603 1604 if (get_user(pkey, (u16 __user *)arg)) 1605 return -EFAULT; 1606 1607 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) 1608 return -EINVAL; 1609 1610 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1611 if (pkey == ppd->pkeys[i]) 1612 return hfi1_set_ctxt_pkey(dd, uctxt, pkey); 1613 1614 return -ENOENT; 1615 } 1616 1617 /** 1618 * ctxt_reset - Reset the user context 1619 * @uctxt: valid user context 1620 */ 1621 static int ctxt_reset(struct hfi1_ctxtdata *uctxt) 1622 { 1623 struct send_context *sc; 1624 struct hfi1_devdata *dd; 1625 int ret = 0; 1626 1627 if (!uctxt || !uctxt->dd || !uctxt->sc) 1628 return -EINVAL; 1629 1630 /* 1631 * There is no protection here. User level has to guarantee that 1632 * no one will be writing to the send context while it is being 1633 * re-initialized. If user level breaks that guarantee, it will 1634 * break it's own context and no one else's. 1635 */ 1636 dd = uctxt->dd; 1637 sc = uctxt->sc; 1638 1639 /* 1640 * Wait until the interrupt handler has marked the context as 1641 * halted or frozen. Report error if we time out. 1642 */ 1643 wait_event_interruptible_timeout( 1644 sc->halt_wait, (sc->flags & SCF_HALTED), 1645 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 1646 if (!(sc->flags & SCF_HALTED)) 1647 return -ENOLCK; 1648 1649 /* 1650 * If the send context was halted due to a Freeze, wait until the 1651 * device has been "unfrozen" before resetting the context. 1652 */ 1653 if (sc->flags & SCF_FROZEN) { 1654 wait_event_interruptible_timeout( 1655 dd->event_queue, 1656 !(READ_ONCE(dd->flags) & HFI1_FROZEN), 1657 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 1658 if (dd->flags & HFI1_FROZEN) 1659 return -ENOLCK; 1660 1661 if (dd->flags & HFI1_FORCED_FREEZE) 1662 /* 1663 * Don't allow context reset if we are into 1664 * forced freeze 1665 */ 1666 return -ENODEV; 1667 1668 sc_disable(sc); 1669 ret = sc_enable(sc); 1670 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); 1671 } else { 1672 ret = sc_restart(sc); 1673 } 1674 if (!ret) 1675 sc_return_credits(sc); 1676 1677 return ret; 1678 } 1679 1680 static void user_remove(struct hfi1_devdata *dd) 1681 { 1682 1683 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1684 } 1685 1686 static int user_add(struct hfi1_devdata *dd) 1687 { 1688 char name[10]; 1689 int ret; 1690 1691 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1692 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1693 &dd->user_cdev, &dd->user_device, 1694 true, &dd->verbs_dev.rdi.ibdev.dev.kobj); 1695 if (ret) 1696 user_remove(dd); 1697 1698 return ret; 1699 } 1700 1701 /* 1702 * Create per-unit files in /dev 1703 */ 1704 int hfi1_device_create(struct hfi1_devdata *dd) 1705 { 1706 return user_add(dd); 1707 } 1708 1709 /* 1710 * Remove per-unit files in /dev 1711 * void, core kernel returns no errors for this stuff 1712 */ 1713 void hfi1_device_remove(struct hfi1_devdata *dd) 1714 { 1715 user_remove(dd); 1716 } 1717