1 /* 2 * The Virtio 9p transport driver 3 * 4 * This is a block based transport driver based on the lguest block driver 5 * code. 6 * 7 * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation 8 * 9 * Based on virtio console driver 10 * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 14 * as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to: 23 * Free Software Foundation 24 * 51 Franklin Street, Fifth Floor 25 * Boston, MA 02111-1301 USA 26 * 27 */ 28 29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 30 31 #include <linux/in.h> 32 #include <linux/module.h> 33 #include <linux/net.h> 34 #include <linux/ipv6.h> 35 #include <linux/errno.h> 36 #include <linux/kernel.h> 37 #include <linux/un.h> 38 #include <linux/uaccess.h> 39 #include <linux/inet.h> 40 #include <linux/idr.h> 41 #include <linux/file.h> 42 #include <linux/slab.h> 43 #include <net/9p/9p.h> 44 #include <linux/parser.h> 45 #include <net/9p/client.h> 46 #include <net/9p/transport.h> 47 #include <linux/scatterlist.h> 48 #include <linux/swap.h> 49 #include <linux/virtio.h> 50 #include <linux/virtio_9p.h> 51 #include "trans_common.h" 52 53 #define VIRTQUEUE_NUM 128 54 55 /* a single mutex to manage channel initialization and attachment */ 56 static DEFINE_MUTEX(virtio_9p_lock); 57 static DECLARE_WAIT_QUEUE_HEAD(vp_wq); 58 static atomic_t vp_pinned = ATOMIC_INIT(0); 59 60 /** 61 * struct virtio_chan - per-instance transport information 62 * @initialized: whether the channel is initialized 63 * @inuse: whether the channel is in use 64 * @lock: protects multiple elements within this structure 65 * @client: client instance 66 * @vdev: virtio dev associated with this channel 67 * @vq: virtio queue associated with this channel 68 * @sg: scatter gather list which is used to pack a request (protected?) 69 * 70 * We keep all per-channel information in a structure. 71 * This structure is allocated within the devices dev->mem space. 72 * A pointer to the structure will get put in the transport private. 73 * 74 */ 75 76 struct virtio_chan { 77 bool inuse; 78 79 spinlock_t lock; 80 81 struct p9_client *client; 82 struct virtio_device *vdev; 83 struct virtqueue *vq; 84 int ring_bufs_avail; 85 wait_queue_head_t *vc_wq; 86 /* This is global limit. Since we don't have a global structure, 87 * will be placing it in each channel. 88 */ 89 int p9_max_pages; 90 /* Scatterlist: can be too big for stack. */ 91 struct scatterlist sg[VIRTQUEUE_NUM]; 92 93 int tag_len; 94 /* 95 * tag name to identify a mount Non-null terminated 96 */ 97 char *tag; 98 99 struct list_head chan_list; 100 }; 101 102 static struct list_head virtio_chan_list; 103 104 /* How many bytes left in this page. */ 105 static unsigned int rest_of_page(void *data) 106 { 107 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); 108 } 109 110 /** 111 * p9_virtio_close - reclaim resources of a channel 112 * @client: client instance 113 * 114 * This reclaims a channel by freeing its resources and 115 * reseting its inuse flag. 116 * 117 */ 118 119 static void p9_virtio_close(struct p9_client *client) 120 { 121 struct virtio_chan *chan = client->trans; 122 123 mutex_lock(&virtio_9p_lock); 124 if (chan) 125 chan->inuse = false; 126 mutex_unlock(&virtio_9p_lock); 127 } 128 129 /** 130 * req_done - callback which signals activity from the server 131 * @vq: virtio queue activity was received on 132 * 133 * This notifies us that the server has triggered some activity 134 * on the virtio channel - most likely a response to request we 135 * sent. Figure out which requests now have responses and wake up 136 * those threads. 137 * 138 * Bugs: could do with some additional sanity checking, but appears to work. 139 * 140 */ 141 142 static void req_done(struct virtqueue *vq) 143 { 144 struct virtio_chan *chan = vq->vdev->priv; 145 struct p9_fcall *rc; 146 unsigned int len; 147 struct p9_req_t *req; 148 unsigned long flags; 149 150 p9_debug(P9_DEBUG_TRANS, ": request done\n"); 151 152 while (1) { 153 spin_lock_irqsave(&chan->lock, flags); 154 rc = virtqueue_get_buf(chan->vq, &len); 155 if (rc == NULL) { 156 spin_unlock_irqrestore(&chan->lock, flags); 157 break; 158 } 159 chan->ring_bufs_avail = 1; 160 spin_unlock_irqrestore(&chan->lock, flags); 161 /* Wakeup if anyone waiting for VirtIO ring space. */ 162 wake_up(chan->vc_wq); 163 p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); 164 p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 165 req = p9_tag_lookup(chan->client, rc->tag); 166 req->status = REQ_STATUS_RCVD; 167 p9_client_cb(chan->client, req); 168 } 169 } 170 171 /** 172 * pack_sg_list - pack a scatter gather list from a linear buffer 173 * @sg: scatter/gather list to pack into 174 * @start: which segment of the sg_list to start at 175 * @limit: maximum segment to pack data to 176 * @data: data to pack into scatter/gather list 177 * @count: amount of data to pack into the scatter/gather list 178 * 179 * sg_lists have multiple segments of various sizes. This will pack 180 * arbitrary data into an existing scatter gather list, segmenting the 181 * data as necessary within constraints. 182 * 183 */ 184 185 static int pack_sg_list(struct scatterlist *sg, int start, 186 int limit, char *data, int count) 187 { 188 int s; 189 int index = start; 190 191 while (count) { 192 s = rest_of_page(data); 193 if (s > count) 194 s = count; 195 BUG_ON(index > limit); 196 sg_set_buf(&sg[index++], data, s); 197 count -= s; 198 data += s; 199 } 200 201 return index-start; 202 } 203 204 /* We don't currently allow canceling of virtio requests */ 205 static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) 206 { 207 return 1; 208 } 209 210 /** 211 * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, 212 * this takes a list of pages. 213 * @sg: scatter/gather list to pack into 214 * @start: which segment of the sg_list to start at 215 * @pdata: a list of pages to add into sg. 216 * @nr_pages: number of pages to pack into the scatter/gather list 217 * @data: data to pack into scatter/gather list 218 * @count: amount of data to pack into the scatter/gather list 219 */ 220 static int 221 pack_sg_list_p(struct scatterlist *sg, int start, int limit, 222 struct page **pdata, int nr_pages, char *data, int count) 223 { 224 int i = 0, s; 225 int data_off; 226 int index = start; 227 228 BUG_ON(nr_pages > (limit - start)); 229 /* 230 * if the first page doesn't start at 231 * page boundary find the offset 232 */ 233 data_off = offset_in_page(data); 234 while (nr_pages) { 235 s = rest_of_page(data); 236 if (s > count) 237 s = count; 238 sg_set_page(&sg[index++], pdata[i++], s, data_off); 239 data_off = 0; 240 data += s; 241 count -= s; 242 nr_pages--; 243 } 244 return index - start; 245 } 246 247 /** 248 * p9_virtio_request - issue a request 249 * @client: client instance issuing the request 250 * @req: request to be issued 251 * 252 */ 253 254 static int 255 p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 256 { 257 int err; 258 int in, out; 259 unsigned long flags; 260 struct virtio_chan *chan = client->trans; 261 262 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 263 264 req->status = REQ_STATUS_SENT; 265 req_retry: 266 spin_lock_irqsave(&chan->lock, flags); 267 268 /* Handle out VirtIO ring buffers */ 269 out = pack_sg_list(chan->sg, 0, 270 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 271 272 in = pack_sg_list(chan->sg, out, 273 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); 274 275 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 276 GFP_ATOMIC); 277 if (err < 0) { 278 if (err == -ENOSPC) { 279 chan->ring_bufs_avail = 0; 280 spin_unlock_irqrestore(&chan->lock, flags); 281 err = wait_event_interruptible(*chan->vc_wq, 282 chan->ring_bufs_avail); 283 if (err == -ERESTARTSYS) 284 return err; 285 286 p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); 287 goto req_retry; 288 } else { 289 spin_unlock_irqrestore(&chan->lock, flags); 290 p9_debug(P9_DEBUG_TRANS, 291 "virtio rpc add_buf returned failure\n"); 292 return -EIO; 293 } 294 } 295 virtqueue_kick(chan->vq); 296 spin_unlock_irqrestore(&chan->lock, flags); 297 298 p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); 299 return 0; 300 } 301 302 static int p9_get_mapped_pages(struct virtio_chan *chan, 303 struct page **pages, char *data, 304 int nr_pages, int write, int kern_buf) 305 { 306 int err; 307 if (!kern_buf) { 308 /* 309 * We allow only p9_max_pages pinned. We wait for the 310 * Other zc request to finish here 311 */ 312 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { 313 err = wait_event_interruptible(vp_wq, 314 (atomic_read(&vp_pinned) < chan->p9_max_pages)); 315 if (err == -ERESTARTSYS) 316 return err; 317 } 318 err = p9_payload_gup(data, &nr_pages, pages, write); 319 if (err < 0) 320 return err; 321 atomic_add(nr_pages, &vp_pinned); 322 } else { 323 /* kernel buffer, no need to pin pages */ 324 int s, index = 0; 325 int count = nr_pages; 326 while (nr_pages) { 327 s = rest_of_page(data); 328 pages[index++] = virt_to_page(data); 329 data += s; 330 nr_pages--; 331 } 332 nr_pages = count; 333 } 334 return nr_pages; 335 } 336 337 /** 338 * p9_virtio_zc_request - issue a zero copy request 339 * @client: client instance issuing the request 340 * @req: request to be issued 341 * @uidata: user bffer that should be ued for zero copy read 342 * @uodata: user buffer that shoud be user for zero copy write 343 * @inlen: read buffer size 344 * @olen: write buffer size 345 * @hdrlen: reader header size, This is the size of response protocol data 346 * 347 */ 348 static int 349 p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, 350 char *uidata, char *uodata, int inlen, 351 int outlen, int in_hdr_len, int kern_buf) 352 { 353 int in, out, err; 354 unsigned long flags; 355 int in_nr_pages = 0, out_nr_pages = 0; 356 struct page **in_pages = NULL, **out_pages = NULL; 357 struct virtio_chan *chan = client->trans; 358 359 p9_debug(P9_DEBUG_TRANS, "virtio request\n"); 360 361 if (uodata) { 362 out_nr_pages = p9_nr_pages(uodata, outlen); 363 out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, 364 GFP_NOFS); 365 if (!out_pages) { 366 err = -ENOMEM; 367 goto err_out; 368 } 369 out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, 370 out_nr_pages, 0, kern_buf); 371 if (out_nr_pages < 0) { 372 err = out_nr_pages; 373 kfree(out_pages); 374 out_pages = NULL; 375 goto err_out; 376 } 377 } 378 if (uidata) { 379 in_nr_pages = p9_nr_pages(uidata, inlen); 380 in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, 381 GFP_NOFS); 382 if (!in_pages) { 383 err = -ENOMEM; 384 goto err_out; 385 } 386 in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, 387 in_nr_pages, 1, kern_buf); 388 if (in_nr_pages < 0) { 389 err = in_nr_pages; 390 kfree(in_pages); 391 in_pages = NULL; 392 goto err_out; 393 } 394 } 395 req->status = REQ_STATUS_SENT; 396 req_retry_pinned: 397 spin_lock_irqsave(&chan->lock, flags); 398 /* out data */ 399 out = pack_sg_list(chan->sg, 0, 400 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 401 402 if (out_pages) 403 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 404 out_pages, out_nr_pages, uodata, outlen); 405 /* 406 * Take care of in data 407 * For example TREAD have 11. 408 * 11 is the read/write header = PDU Header(7) + IO Size (4). 409 * Arrange in such a way that server places header in the 410 * alloced memory and payload onto the user buffer. 411 */ 412 in = pack_sg_list(chan->sg, out, 413 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); 414 if (in_pages) 415 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, 416 in_pages, in_nr_pages, uidata, inlen); 417 418 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 419 GFP_ATOMIC); 420 if (err < 0) { 421 if (err == -ENOSPC) { 422 chan->ring_bufs_avail = 0; 423 spin_unlock_irqrestore(&chan->lock, flags); 424 err = wait_event_interruptible(*chan->vc_wq, 425 chan->ring_bufs_avail); 426 if (err == -ERESTARTSYS) 427 goto err_out; 428 429 p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); 430 goto req_retry_pinned; 431 } else { 432 spin_unlock_irqrestore(&chan->lock, flags); 433 p9_debug(P9_DEBUG_TRANS, 434 "virtio rpc add_buf returned failure\n"); 435 err = -EIO; 436 goto err_out; 437 } 438 } 439 virtqueue_kick(chan->vq); 440 spin_unlock_irqrestore(&chan->lock, flags); 441 p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); 442 err = wait_event_interruptible(*req->wq, 443 req->status >= REQ_STATUS_RCVD); 444 /* 445 * Non kernel buffers are pinned, unpin them 446 */ 447 err_out: 448 if (!kern_buf) { 449 if (in_pages) { 450 p9_release_pages(in_pages, in_nr_pages); 451 atomic_sub(in_nr_pages, &vp_pinned); 452 } 453 if (out_pages) { 454 p9_release_pages(out_pages, out_nr_pages); 455 atomic_sub(out_nr_pages, &vp_pinned); 456 } 457 /* wakeup anybody waiting for slots to pin pages */ 458 wake_up(&vp_wq); 459 } 460 kfree(in_pages); 461 kfree(out_pages); 462 return err; 463 } 464 465 static ssize_t p9_mount_tag_show(struct device *dev, 466 struct device_attribute *attr, char *buf) 467 { 468 struct virtio_chan *chan; 469 struct virtio_device *vdev; 470 471 vdev = dev_to_virtio(dev); 472 chan = vdev->priv; 473 474 return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); 475 } 476 477 static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); 478 479 /** 480 * p9_virtio_probe - probe for existence of 9P virtio channels 481 * @vdev: virtio device to probe 482 * 483 * This probes for existing virtio channels. 484 * 485 */ 486 487 static int p9_virtio_probe(struct virtio_device *vdev) 488 { 489 __u16 tag_len; 490 char *tag; 491 int err; 492 struct virtio_chan *chan; 493 494 chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); 495 if (!chan) { 496 pr_err("Failed to allocate virtio 9P channel\n"); 497 err = -ENOMEM; 498 goto fail; 499 } 500 501 chan->vdev = vdev; 502 503 /* We expect one virtqueue, for requests. */ 504 chan->vq = virtio_find_single_vq(vdev, req_done, "requests"); 505 if (IS_ERR(chan->vq)) { 506 err = PTR_ERR(chan->vq); 507 goto out_free_vq; 508 } 509 chan->vq->vdev->priv = chan; 510 spin_lock_init(&chan->lock); 511 512 sg_init_table(chan->sg, VIRTQUEUE_NUM); 513 514 chan->inuse = false; 515 if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { 516 vdev->config->get(vdev, 517 offsetof(struct virtio_9p_config, tag_len), 518 &tag_len, sizeof(tag_len)); 519 } else { 520 err = -EINVAL; 521 goto out_free_vq; 522 } 523 tag = kmalloc(tag_len, GFP_KERNEL); 524 if (!tag) { 525 err = -ENOMEM; 526 goto out_free_vq; 527 } 528 vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), 529 tag, tag_len); 530 chan->tag = tag; 531 chan->tag_len = tag_len; 532 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 533 if (err) { 534 goto out_free_tag; 535 } 536 chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); 537 if (!chan->vc_wq) { 538 err = -ENOMEM; 539 goto out_free_tag; 540 } 541 init_waitqueue_head(chan->vc_wq); 542 chan->ring_bufs_avail = 1; 543 /* Ceiling limit to avoid denial of service attacks */ 544 chan->p9_max_pages = nr_free_buffer_pages()/4; 545 546 mutex_lock(&virtio_9p_lock); 547 list_add_tail(&chan->chan_list, &virtio_chan_list); 548 mutex_unlock(&virtio_9p_lock); 549 return 0; 550 551 out_free_tag: 552 kfree(tag); 553 out_free_vq: 554 vdev->config->del_vqs(vdev); 555 kfree(chan); 556 fail: 557 return err; 558 } 559 560 561 /** 562 * p9_virtio_create - allocate a new virtio channel 563 * @client: client instance invoking this transport 564 * @devname: string identifying the channel to connect to (unused) 565 * @args: args passed from sys_mount() for per-transport options (unused) 566 * 567 * This sets up a transport channel for 9p communication. Right now 568 * we only match the first available channel, but eventually we couldlook up 569 * alternate channels by matching devname versus a virtio_config entry. 570 * We use a simple reference count mechanism to ensure that only a single 571 * mount has a channel open at a time. 572 * 573 */ 574 575 static int 576 p9_virtio_create(struct p9_client *client, const char *devname, char *args) 577 { 578 struct virtio_chan *chan; 579 int ret = -ENOENT; 580 int found = 0; 581 582 mutex_lock(&virtio_9p_lock); 583 list_for_each_entry(chan, &virtio_chan_list, chan_list) { 584 if (!strncmp(devname, chan->tag, chan->tag_len) && 585 strlen(devname) == chan->tag_len) { 586 if (!chan->inuse) { 587 chan->inuse = true; 588 found = 1; 589 break; 590 } 591 ret = -EBUSY; 592 } 593 } 594 mutex_unlock(&virtio_9p_lock); 595 596 if (!found) { 597 pr_err("no channels available\n"); 598 return ret; 599 } 600 601 client->trans = (void *)chan; 602 client->status = Connected; 603 chan->client = client; 604 605 return 0; 606 } 607 608 /** 609 * p9_virtio_remove - clean up resources associated with a virtio device 610 * @vdev: virtio device to remove 611 * 612 */ 613 614 static void p9_virtio_remove(struct virtio_device *vdev) 615 { 616 struct virtio_chan *chan = vdev->priv; 617 618 if (chan->inuse) 619 p9_virtio_close(chan->client); 620 vdev->config->del_vqs(vdev); 621 622 mutex_lock(&virtio_9p_lock); 623 list_del(&chan->chan_list); 624 mutex_unlock(&virtio_9p_lock); 625 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 626 kfree(chan->tag); 627 kfree(chan->vc_wq); 628 kfree(chan); 629 630 } 631 632 static struct virtio_device_id id_table[] = { 633 { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, 634 { 0 }, 635 }; 636 637 static unsigned int features[] = { 638 VIRTIO_9P_MOUNT_TAG, 639 }; 640 641 /* The standard "struct lguest_driver": */ 642 static struct virtio_driver p9_virtio_drv = { 643 .feature_table = features, 644 .feature_table_size = ARRAY_SIZE(features), 645 .driver.name = KBUILD_MODNAME, 646 .driver.owner = THIS_MODULE, 647 .id_table = id_table, 648 .probe = p9_virtio_probe, 649 .remove = p9_virtio_remove, 650 }; 651 652 static struct p9_trans_module p9_virtio_trans = { 653 .name = "virtio", 654 .create = p9_virtio_create, 655 .close = p9_virtio_close, 656 .request = p9_virtio_request, 657 .zc_request = p9_virtio_zc_request, 658 .cancel = p9_virtio_cancel, 659 /* 660 * We leave one entry for input and one entry for response 661 * headers. We also skip one more entry to accomodate, address 662 * that are not at page boundary, that can result in an extra 663 * page in zero copy. 664 */ 665 .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), 666 .def = 0, 667 .owner = THIS_MODULE, 668 }; 669 670 /* The standard init function */ 671 static int __init p9_virtio_init(void) 672 { 673 INIT_LIST_HEAD(&virtio_chan_list); 674 675 v9fs_register_trans(&p9_virtio_trans); 676 return register_virtio_driver(&p9_virtio_drv); 677 } 678 679 static void __exit p9_virtio_cleanup(void) 680 { 681 unregister_virtio_driver(&p9_virtio_drv); 682 v9fs_unregister_trans(&p9_virtio_trans); 683 } 684 685 module_init(p9_virtio_init); 686 module_exit(p9_virtio_cleanup); 687 688 MODULE_DEVICE_TABLE(virtio, id_table); 689 MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); 690 MODULE_DESCRIPTION("Virtio 9p Transport"); 691 MODULE_LICENSE("GPL"); 692