1 /* 2 * Copyright 2014 IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9 10 #include <linux/pci.h> 11 #include <linux/slab.h> 12 #include <linux/file.h> 13 #include <misc/cxl.h> 14 #include <linux/msi.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 18 #include "cxl.h" 19 20 /* 21 * Since we want to track memory mappings to be able to force-unmap 22 * when the AFU is no longer reachable, we need an inode. For devices 23 * opened through the cxl user API, this is not a problem, but a 24 * userland process can also get a cxl fd through the cxl_get_fd() 25 * API, which is used by the cxlflash driver. 26 * 27 * Therefore we implement our own simple pseudo-filesystem and inode 28 * allocator. We don't use the anonymous inode, as we need the 29 * meta-data associated with it (address_space) and it is shared by 30 * other drivers/processes, so it could lead to cxl unmapping VMAs 31 * from random processes. 32 */ 33 34 #define CXL_PSEUDO_FS_MAGIC 0x1697697f 35 36 static int cxl_fs_cnt; 37 static struct vfsmount *cxl_vfs_mount; 38 39 static const struct dentry_operations cxl_fs_dops = { 40 .d_dname = simple_dname, 41 }; 42 43 static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags, 44 const char *dev_name, void *data) 45 { 46 return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops, 47 CXL_PSEUDO_FS_MAGIC); 48 } 49 50 static struct file_system_type cxl_fs_type = { 51 .name = "cxl", 52 .owner = THIS_MODULE, 53 .mount = cxl_fs_mount, 54 .kill_sb = kill_anon_super, 55 }; 56 57 58 void cxl_release_mapping(struct cxl_context *ctx) 59 { 60 if (ctx->kernelapi && ctx->mapping) 61 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt); 62 } 63 64 static struct file *cxl_getfile(const char *name, 65 const struct file_operations *fops, 66 void *priv, int flags) 67 { 68 struct qstr this; 69 struct path path; 70 struct file *file; 71 struct inode *inode = NULL; 72 int rc; 73 74 /* strongly inspired by anon_inode_getfile() */ 75 76 if (fops->owner && !try_module_get(fops->owner)) 77 return ERR_PTR(-ENOENT); 78 79 rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt); 80 if (rc < 0) { 81 pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc); 82 file = ERR_PTR(rc); 83 goto err_module; 84 } 85 86 inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb); 87 if (IS_ERR(inode)) { 88 file = ERR_CAST(inode); 89 goto err_fs; 90 } 91 92 file = ERR_PTR(-ENOMEM); 93 this.name = name; 94 this.len = strlen(name); 95 this.hash = 0; 96 path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this); 97 if (!path.dentry) 98 goto err_inode; 99 100 path.mnt = mntget(cxl_vfs_mount); 101 d_instantiate(path.dentry, inode); 102 103 file = alloc_file(&path, OPEN_FMODE(flags), fops); 104 if (IS_ERR(file)) 105 goto err_dput; 106 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); 107 file->private_data = priv; 108 109 return file; 110 111 err_dput: 112 path_put(&path); 113 err_inode: 114 iput(inode); 115 err_fs: 116 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt); 117 err_module: 118 module_put(fops->owner); 119 return file; 120 } 121 122 struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) 123 { 124 struct cxl_afu *afu; 125 struct cxl_context *ctx; 126 int rc; 127 128 afu = cxl_pci_to_afu(dev); 129 if (IS_ERR(afu)) 130 return ERR_CAST(afu); 131 132 ctx = cxl_context_alloc(); 133 if (!ctx) 134 return ERR_PTR(-ENOMEM); 135 136 ctx->kernelapi = true; 137 138 /* Make it a slave context. We can promote it later? */ 139 rc = cxl_context_init(ctx, afu, false); 140 if (rc) 141 goto err_ctx; 142 143 return ctx; 144 145 err_ctx: 146 kfree(ctx); 147 return ERR_PTR(rc); 148 } 149 EXPORT_SYMBOL_GPL(cxl_dev_context_init); 150 151 struct cxl_context *cxl_get_context(struct pci_dev *dev) 152 { 153 return dev->dev.archdata.cxl_ctx; 154 } 155 EXPORT_SYMBOL_GPL(cxl_get_context); 156 157 int cxl_release_context(struct cxl_context *ctx) 158 { 159 if (ctx->status >= STARTED) 160 return -EBUSY; 161 162 cxl_context_free(ctx); 163 164 return 0; 165 } 166 EXPORT_SYMBOL_GPL(cxl_release_context); 167 168 static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) 169 { 170 __u16 range; 171 int r; 172 173 for (r = 0; r < CXL_IRQ_RANGES; r++) { 174 range = ctx->irqs.range[r]; 175 if (num < range) { 176 return ctx->irqs.offset[r] + num; 177 } 178 num -= range; 179 } 180 return 0; 181 } 182 183 int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) 184 { 185 if (*ctx == NULL || *afu_irq == 0) { 186 *afu_irq = 1; 187 *ctx = cxl_get_context(pdev); 188 } else { 189 (*afu_irq)++; 190 if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) { 191 *ctx = list_next_entry(*ctx, extra_irq_contexts); 192 *afu_irq = 1; 193 } 194 } 195 return cxl_find_afu_irq(*ctx, *afu_irq); 196 } 197 /* Exported via cxl_base */ 198 199 int cxl_set_priv(struct cxl_context *ctx, void *priv) 200 { 201 if (!ctx) 202 return -EINVAL; 203 204 ctx->priv = priv; 205 206 return 0; 207 } 208 EXPORT_SYMBOL_GPL(cxl_set_priv); 209 210 void *cxl_get_priv(struct cxl_context *ctx) 211 { 212 if (!ctx) 213 return ERR_PTR(-EINVAL); 214 215 return ctx->priv; 216 } 217 EXPORT_SYMBOL_GPL(cxl_get_priv); 218 219 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) 220 { 221 int res; 222 irq_hw_number_t hwirq; 223 224 if (num == 0) 225 num = ctx->afu->pp_irqs; 226 res = afu_allocate_irqs(ctx, num); 227 if (res) 228 return res; 229 230 if (!cpu_has_feature(CPU_FTR_HVMODE)) { 231 /* In a guest, the PSL interrupt is not multiplexed. It was 232 * allocated above, and we need to set its handler 233 */ 234 hwirq = cxl_find_afu_irq(ctx, 0); 235 if (hwirq) 236 cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); 237 } 238 239 if (ctx->status == STARTED) { 240 if (cxl_ops->update_ivtes) 241 cxl_ops->update_ivtes(ctx); 242 else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n"); 243 } 244 245 return res; 246 } 247 EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); 248 249 void cxl_free_afu_irqs(struct cxl_context *ctx) 250 { 251 irq_hw_number_t hwirq; 252 unsigned int virq; 253 254 if (!cpu_has_feature(CPU_FTR_HVMODE)) { 255 hwirq = cxl_find_afu_irq(ctx, 0); 256 if (hwirq) { 257 virq = irq_find_mapping(NULL, hwirq); 258 if (virq) 259 cxl_unmap_irq(virq, ctx); 260 } 261 } 262 afu_irq_name_free(ctx); 263 cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); 264 } 265 EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); 266 267 int cxl_map_afu_irq(struct cxl_context *ctx, int num, 268 irq_handler_t handler, void *cookie, char *name) 269 { 270 irq_hw_number_t hwirq; 271 272 /* 273 * Find interrupt we are to register. 274 */ 275 hwirq = cxl_find_afu_irq(ctx, num); 276 if (!hwirq) 277 return -ENOENT; 278 279 return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name); 280 } 281 EXPORT_SYMBOL_GPL(cxl_map_afu_irq); 282 283 void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie) 284 { 285 irq_hw_number_t hwirq; 286 unsigned int virq; 287 288 hwirq = cxl_find_afu_irq(ctx, num); 289 if (!hwirq) 290 return; 291 292 virq = irq_find_mapping(NULL, hwirq); 293 if (virq) 294 cxl_unmap_irq(virq, cookie); 295 } 296 EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq); 297 298 /* 299 * Start a context 300 * Code here similar to afu_ioctl_start_work(). 301 */ 302 int cxl_start_context(struct cxl_context *ctx, u64 wed, 303 struct task_struct *task) 304 { 305 int rc = 0; 306 bool kernel = true; 307 308 pr_devel("%s: pe: %i\n", __func__, ctx->pe); 309 310 mutex_lock(&ctx->status_mutex); 311 if (ctx->status == STARTED) 312 goto out; /* already started */ 313 314 /* 315 * Increment the mapped context count for adapter. This also checks 316 * if adapter_context_lock is taken. 317 */ 318 rc = cxl_adapter_context_get(ctx->afu->adapter); 319 if (rc) 320 goto out; 321 322 if (task) { 323 ctx->pid = get_task_pid(task, PIDTYPE_PID); 324 ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID); 325 kernel = false; 326 ctx->real_mode = false; 327 } 328 329 cxl_ctx_get(); 330 331 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { 332 put_pid(ctx->glpid); 333 put_pid(ctx->pid); 334 ctx->glpid = ctx->pid = NULL; 335 cxl_adapter_context_put(ctx->afu->adapter); 336 cxl_ctx_put(); 337 goto out; 338 } 339 340 ctx->status = STARTED; 341 out: 342 mutex_unlock(&ctx->status_mutex); 343 return rc; 344 } 345 EXPORT_SYMBOL_GPL(cxl_start_context); 346 347 int cxl_process_element(struct cxl_context *ctx) 348 { 349 return ctx->external_pe; 350 } 351 EXPORT_SYMBOL_GPL(cxl_process_element); 352 353 /* Stop a context. Returns 0 on success, otherwise -Errno */ 354 int cxl_stop_context(struct cxl_context *ctx) 355 { 356 return __detach_context(ctx); 357 } 358 EXPORT_SYMBOL_GPL(cxl_stop_context); 359 360 void cxl_set_master(struct cxl_context *ctx) 361 { 362 ctx->master = true; 363 } 364 EXPORT_SYMBOL_GPL(cxl_set_master); 365 366 int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode) 367 { 368 if (ctx->status == STARTED) { 369 /* 370 * We could potentially update the PE and issue an update LLCMD 371 * to support this, but it doesn't seem to have a good use case 372 * since it's trivial to just create a second kernel context 373 * with different translation modes, so until someone convinces 374 * me otherwise: 375 */ 376 return -EBUSY; 377 } 378 379 ctx->real_mode = real_mode; 380 return 0; 381 } 382 EXPORT_SYMBOL_GPL(cxl_set_translation_mode); 383 384 /* wrappers around afu_* file ops which are EXPORTED */ 385 int cxl_fd_open(struct inode *inode, struct file *file) 386 { 387 return afu_open(inode, file); 388 } 389 EXPORT_SYMBOL_GPL(cxl_fd_open); 390 int cxl_fd_release(struct inode *inode, struct file *file) 391 { 392 return afu_release(inode, file); 393 } 394 EXPORT_SYMBOL_GPL(cxl_fd_release); 395 long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 396 { 397 return afu_ioctl(file, cmd, arg); 398 } 399 EXPORT_SYMBOL_GPL(cxl_fd_ioctl); 400 int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm) 401 { 402 return afu_mmap(file, vm); 403 } 404 EXPORT_SYMBOL_GPL(cxl_fd_mmap); 405 unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll) 406 { 407 return afu_poll(file, poll); 408 } 409 EXPORT_SYMBOL_GPL(cxl_fd_poll); 410 ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, 411 loff_t *off) 412 { 413 return afu_read(file, buf, count, off); 414 } 415 EXPORT_SYMBOL_GPL(cxl_fd_read); 416 417 #define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME 418 419 /* Get a struct file and fd for a context and attach the ops */ 420 struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, 421 int *fd) 422 { 423 struct file *file; 424 int rc, flags, fdtmp; 425 char *name = NULL; 426 427 /* only allow one per context */ 428 if (ctx->mapping) 429 return ERR_PTR(-EEXIST); 430 431 flags = O_RDWR | O_CLOEXEC; 432 433 /* This code is similar to anon_inode_getfd() */ 434 rc = get_unused_fd_flags(flags); 435 if (rc < 0) 436 return ERR_PTR(rc); 437 fdtmp = rc; 438 439 /* 440 * Patch the file ops. Needs to be careful that this is rentrant safe. 441 */ 442 if (fops) { 443 PATCH_FOPS(open); 444 PATCH_FOPS(poll); 445 PATCH_FOPS(read); 446 PATCH_FOPS(release); 447 PATCH_FOPS(unlocked_ioctl); 448 PATCH_FOPS(compat_ioctl); 449 PATCH_FOPS(mmap); 450 } else /* use default ops */ 451 fops = (struct file_operations *)&afu_fops; 452 453 name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe); 454 file = cxl_getfile(name, fops, ctx, flags); 455 kfree(name); 456 if (IS_ERR(file)) 457 goto err_fd; 458 459 cxl_context_set_mapping(ctx, file->f_mapping); 460 *fd = fdtmp; 461 return file; 462 463 err_fd: 464 put_unused_fd(fdtmp); 465 return NULL; 466 } 467 EXPORT_SYMBOL_GPL(cxl_get_fd); 468 469 struct cxl_context *cxl_fops_get_context(struct file *file) 470 { 471 return file->private_data; 472 } 473 EXPORT_SYMBOL_GPL(cxl_fops_get_context); 474 475 void cxl_set_driver_ops(struct cxl_context *ctx, 476 struct cxl_afu_driver_ops *ops) 477 { 478 WARN_ON(!ops->fetch_event || !ops->event_delivered); 479 atomic_set(&ctx->afu_driver_events, 0); 480 ctx->afu_driver_ops = ops; 481 } 482 EXPORT_SYMBOL_GPL(cxl_set_driver_ops); 483 484 void cxl_context_events_pending(struct cxl_context *ctx, 485 unsigned int new_events) 486 { 487 atomic_add(new_events, &ctx->afu_driver_events); 488 wake_up_all(&ctx->wq); 489 } 490 EXPORT_SYMBOL_GPL(cxl_context_events_pending); 491 492 int cxl_start_work(struct cxl_context *ctx, 493 struct cxl_ioctl_start_work *work) 494 { 495 int rc; 496 497 /* code taken from afu_ioctl_start_work */ 498 if (!(work->flags & CXL_START_WORK_NUM_IRQS)) 499 work->num_interrupts = ctx->afu->pp_irqs; 500 else if ((work->num_interrupts < ctx->afu->pp_irqs) || 501 (work->num_interrupts > ctx->afu->irqs_max)) { 502 return -EINVAL; 503 } 504 505 rc = afu_register_irqs(ctx, work->num_interrupts); 506 if (rc) 507 return rc; 508 509 rc = cxl_start_context(ctx, work->work_element_descriptor, current); 510 if (rc < 0) { 511 afu_release_irqs(ctx, ctx); 512 return rc; 513 } 514 515 return 0; 516 } 517 EXPORT_SYMBOL_GPL(cxl_start_work); 518 519 void __iomem *cxl_psa_map(struct cxl_context *ctx) 520 { 521 if (ctx->status != STARTED) 522 return NULL; 523 524 pr_devel("%s: psn_phys%llx size:%llx\n", 525 __func__, ctx->psn_phys, ctx->psn_size); 526 return ioremap(ctx->psn_phys, ctx->psn_size); 527 } 528 EXPORT_SYMBOL_GPL(cxl_psa_map); 529 530 void cxl_psa_unmap(void __iomem *addr) 531 { 532 iounmap(addr); 533 } 534 EXPORT_SYMBOL_GPL(cxl_psa_unmap); 535 536 int cxl_afu_reset(struct cxl_context *ctx) 537 { 538 struct cxl_afu *afu = ctx->afu; 539 int rc; 540 541 rc = cxl_ops->afu_reset(afu); 542 if (rc) 543 return rc; 544 545 return cxl_ops->afu_check_and_enable(afu); 546 } 547 EXPORT_SYMBOL_GPL(cxl_afu_reset); 548 549 void cxl_perst_reloads_same_image(struct cxl_afu *afu, 550 bool perst_reloads_same_image) 551 { 552 afu->adapter->perst_same_image = perst_reloads_same_image; 553 } 554 EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); 555 556 ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) 557 { 558 struct cxl_afu *afu = cxl_pci_to_afu(dev); 559 if (IS_ERR(afu)) 560 return -ENODEV; 561 562 return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); 563 } 564 EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); 565 566 int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs) 567 { 568 struct cxl_afu *afu = cxl_pci_to_afu(dev); 569 if (IS_ERR(afu)) 570 return -ENODEV; 571 572 if (irqs > afu->adapter->user_irqs) 573 return -EINVAL; 574 575 /* Limit user_irqs to prevent the user increasing this via sysfs */ 576 afu->adapter->user_irqs = irqs; 577 afu->irqs_max = irqs; 578 579 return 0; 580 } 581 EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process); 582 583 int cxl_get_max_irqs_per_process(struct pci_dev *dev) 584 { 585 struct cxl_afu *afu = cxl_pci_to_afu(dev); 586 if (IS_ERR(afu)) 587 return -ENODEV; 588 589 return afu->irqs_max; 590 } 591 EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); 592 593 /* 594 * This is a special interrupt allocation routine called from the PHB's MSI 595 * setup function. When capi interrupts are allocated in this manner they must 596 * still be associated with a running context, but since the MSI APIs have no 597 * way to specify this we use the default context associated with the device. 598 * 599 * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU 600 * interrupt number, so in order to overcome this their driver informs us of 601 * the restriction by setting the maximum interrupts per context, and we 602 * allocate additional contexts as necessary so that we can keep the AFU 603 * interrupt number within the supported range. 604 */ 605 int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) 606 { 607 struct cxl_context *ctx, *new_ctx, *default_ctx; 608 int remaining; 609 int rc; 610 611 ctx = default_ctx = cxl_get_context(pdev); 612 if (WARN_ON(!default_ctx)) 613 return -ENODEV; 614 615 remaining = nvec; 616 while (remaining > 0) { 617 rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max)); 618 if (rc) { 619 pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev)); 620 return rc; 621 } 622 remaining -= ctx->afu->irqs_max; 623 624 if (ctx != default_ctx && default_ctx->status == STARTED) { 625 WARN_ON(cxl_start_context(ctx, 626 be64_to_cpu(default_ctx->elem->common.wed), 627 NULL)); 628 } 629 630 if (remaining > 0) { 631 new_ctx = cxl_dev_context_init(pdev); 632 if (IS_ERR(new_ctx)) { 633 pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev)); 634 return -ENOSPC; 635 } 636 list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts); 637 ctx = new_ctx; 638 } 639 } 640 641 return 0; 642 } 643 /* Exported via cxl_base */ 644 645 void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) 646 { 647 struct cxl_context *ctx, *pos, *tmp; 648 649 ctx = cxl_get_context(pdev); 650 if (WARN_ON(!ctx)) 651 return; 652 653 cxl_free_afu_irqs(ctx); 654 list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) { 655 cxl_stop_context(pos); 656 cxl_free_afu_irqs(pos); 657 list_del(&pos->extra_irq_contexts); 658 cxl_release_context(pos); 659 } 660 } 661 /* Exported via cxl_base */ 662