1 /* 2 * The Virtio 9p transport driver 3 * 4 * This is a block based transport driver based on the lguest block driver 5 * code. 6 * 7 * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation 8 * 9 * Based on virtio console driver 10 * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 14 * as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to: 23 * Free Software Foundation 24 * 51 Franklin Street, Fifth Floor 25 * Boston, MA 02111-1301 USA 26 * 27 */ 28 29 #include <linux/in.h> 30 #include <linux/module.h> 31 #include <linux/net.h> 32 #include <linux/ipv6.h> 33 #include <linux/errno.h> 34 #include <linux/kernel.h> 35 #include <linux/un.h> 36 #include <linux/uaccess.h> 37 #include <linux/inet.h> 38 #include <linux/idr.h> 39 #include <linux/file.h> 40 #include <linux/slab.h> 41 #include <net/9p/9p.h> 42 #include <linux/parser.h> 43 #include <net/9p/client.h> 44 #include <net/9p/transport.h> 45 #include <linux/scatterlist.h> 46 #include <linux/swap.h> 47 #include <linux/virtio.h> 48 #include <linux/virtio_9p.h> 49 #include "trans_common.h" 50 51 #define VIRTQUEUE_NUM 128 52 53 /* a single mutex to manage channel initialization and attachment */ 54 static DEFINE_MUTEX(virtio_9p_lock); 55 static DECLARE_WAIT_QUEUE_HEAD(vp_wq); 56 static atomic_t vp_pinned = ATOMIC_INIT(0); 57 58 /** 59 * struct virtio_chan - per-instance transport information 60 * @initialized: whether the channel is initialized 61 * @inuse: whether the channel is in use 62 * @lock: protects multiple elements within this structure 63 * @client: client instance 64 * @vdev: virtio dev associated with this channel 65 * @vq: virtio queue associated with this channel 66 * @sg: scatter gather list which is used to pack a request (protected?) 67 * 68 * We keep all per-channel information in a structure. 69 * This structure is allocated within the devices dev->mem space. 70 * A pointer to the structure will get put in the transport private. 71 * 72 */ 73 74 struct virtio_chan { 75 bool inuse; 76 77 spinlock_t lock; 78 79 struct p9_client *client; 80 struct virtio_device *vdev; 81 struct virtqueue *vq; 82 int ring_bufs_avail; 83 wait_queue_head_t *vc_wq; 84 /* This is global limit. Since we don't have a global structure, 85 * will be placing it in each channel. 86 */ 87 int p9_max_pages; 88 /* Scatterlist: can be too big for stack. */ 89 struct scatterlist sg[VIRTQUEUE_NUM]; 90 91 int tag_len; 92 /* 93 * tag name to identify a mount Non-null terminated 94 */ 95 char *tag; 96 97 struct list_head chan_list; 98 }; 99 100 static struct list_head virtio_chan_list; 101 102 /* How many bytes left in this page. */ 103 static unsigned int rest_of_page(void *data) 104 { 105 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); 106 } 107 108 /** 109 * p9_virtio_close - reclaim resources of a channel 110 * @client: client instance 111 * 112 * This reclaims a channel by freeing its resources and 113 * reseting its inuse flag. 114 * 115 */ 116 117 static void p9_virtio_close(struct p9_client *client) 118 { 119 struct virtio_chan *chan = client->trans; 120 121 mutex_lock(&virtio_9p_lock); 122 if (chan) 123 chan->inuse = false; 124 mutex_unlock(&virtio_9p_lock); 125 } 126 127 /** 128 * req_done - callback which signals activity from the server 129 * @vq: virtio queue activity was received on 130 * 131 * This notifies us that the server has triggered some activity 132 * on the virtio channel - most likely a response to request we 133 * sent. Figure out which requests now have responses and wake up 134 * those threads. 135 * 136 * Bugs: could do with some additional sanity checking, but appears to work. 137 * 138 */ 139 140 static void req_done(struct virtqueue *vq) 141 { 142 struct virtio_chan *chan = vq->vdev->priv; 143 struct p9_fcall *rc; 144 unsigned int len; 145 struct p9_req_t *req; 146 unsigned long flags; 147 148 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 149 150 while (1) { 151 spin_lock_irqsave(&chan->lock, flags); 152 rc = virtqueue_get_buf(chan->vq, &len); 153 154 if (rc == NULL) { 155 spin_unlock_irqrestore(&chan->lock, flags); 156 break; 157 } 158 159 chan->ring_bufs_avail = 1; 160 spin_unlock_irqrestore(&chan->lock, flags); 161 /* Wakeup if anyone waiting for VirtIO ring space. */ 162 wake_up(chan->vc_wq); 163 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 164 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 165 req = p9_tag_lookup(chan->client, rc->tag); 166 if (req->tc->private) { 167 struct trans_rpage_info *rp = req->tc->private; 168 int p = rp->rp_nr_pages; 169 /*Release pages */ 170 p9_release_req_pages(rp); 171 atomic_sub(p, &vp_pinned); 172 wake_up(&vp_wq); 173 if (rp->rp_alloc) 174 kfree(rp); 175 req->tc->private = NULL; 176 } 177 req->status = REQ_STATUS_RCVD; 178 p9_client_cb(chan->client, req); 179 } 180 } 181 182 /** 183 * pack_sg_list - pack a scatter gather list from a linear buffer 184 * @sg: scatter/gather list to pack into 185 * @start: which segment of the sg_list to start at 186 * @limit: maximum segment to pack data to 187 * @data: data to pack into scatter/gather list 188 * @count: amount of data to pack into the scatter/gather list 189 * 190 * sg_lists have multiple segments of various sizes. This will pack 191 * arbitrary data into an existing scatter gather list, segmenting the 192 * data as necessary within constraints. 193 * 194 */ 195 196 static int 197 pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, 198 int count) 199 { 200 int s; 201 int index = start; 202 203 while (count) { 204 s = rest_of_page(data); 205 if (s > count) 206 s = count; 207 sg_set_buf(&sg[index++], data, s); 208 count -= s; 209 data += s; 210 BUG_ON(index > limit); 211 } 212 213 return index-start; 214 } 215 216 /* We don't currently allow canceling of virtio requests */ 217 static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) 218 { 219 return 1; 220 } 221 222 /** 223 * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, 224 * this takes a list of pages. 225 * @sg: scatter/gather list to pack into 226 * @start: which segment of the sg_list to start at 227 * @pdata_off: Offset into the first page 228 * @**pdata: a list of pages to add into sg. 229 * @count: amount of data to pack into the scatter/gather list 230 */ 231 static int 232 pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, 233 struct page **pdata, int count) 234 { 235 int s; 236 int i = 0; 237 int index = start; 238 239 if (pdata_off) { 240 s = min((int)(PAGE_SIZE - pdata_off), count); 241 sg_set_page(&sg[index++], pdata[i++], s, pdata_off); 242 count -= s; 243 } 244 245 while (count) { 246 BUG_ON(index > limit); 247 s = min((int)PAGE_SIZE, count); 248 sg_set_page(&sg[index++], pdata[i++], s, 0); 249 count -= s; 250 } 251 return index-start; 252 } 253 254 /** 255 * p9_virtio_request - issue a request 256 * @client: client instance issuing the request 257 * @req: request to be issued 258 * 259 */ 260 261 static int 262 p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 263 { 264 int in, out, inp, outp; 265 struct virtio_chan *chan = client->trans; 266 char *rdata = (char *)req->rc+sizeof(struct p9_fcall); 267 unsigned long flags; 268 size_t pdata_off = 0; 269 struct trans_rpage_info *rpinfo = NULL; 270 int err, pdata_len = 0; 271 272 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 273 274 req->status = REQ_STATUS_SENT; 275 276 if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { 277 int nr_pages = p9_nr_pages(req); 278 int rpinfo_size = sizeof(struct trans_rpage_info) + 279 sizeof(struct page *) * nr_pages; 280 281 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { 282 err = wait_event_interruptible(vp_wq, 283 atomic_read(&vp_pinned) < chan->p9_max_pages); 284 if (err == -ERESTARTSYS) 285 return err; 286 P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); 287 } 288 289 if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { 290 /* We can use sdata */ 291 req->tc->private = req->tc->sdata + req->tc->size; 292 rpinfo = (struct trans_rpage_info *)req->tc->private; 293 rpinfo->rp_alloc = 0; 294 } else { 295 req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); 296 if (!req->tc->private) { 297 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " 298 "private kmalloc returned NULL"); 299 return -ENOMEM; 300 } 301 rpinfo = (struct trans_rpage_info *)req->tc->private; 302 rpinfo->rp_alloc = 1; 303 } 304 305 err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, 306 req->tc->id == P9_TREAD ? 1 : 0); 307 if (err < 0) { 308 if (rpinfo->rp_alloc) 309 kfree(rpinfo); 310 return err; 311 } else { 312 atomic_add(rpinfo->rp_nr_pages, &vp_pinned); 313 } 314 } 315 316 req_retry_pinned: 317 spin_lock_irqsave(&chan->lock, flags); 318 319 /* Handle out VirtIO ring buffers */ 320 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 321 req->tc->size); 322 323 if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { 324 /* We have additional write payload buffer to take care */ 325 if (req->tc->pubuf && P9_IS_USER_CONTEXT) { 326 outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 327 pdata_off, rpinfo->rp_data, pdata_len); 328 } else { 329 char *pbuf; 330 if (req->tc->pubuf) 331 pbuf = (__force char *) req->tc->pubuf; 332 else 333 pbuf = req->tc->pkbuf; 334 outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, 335 req->tc->pbuf_size); 336 } 337 out += outp; 338 } 339 340 /* Handle in VirtIO ring buffers */ 341 if (req->tc->pbuf_size && 342 ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { 343 /* 344 * Take care of additional Read payload. 345 * 11 is the read/write header = PDU Header(7) + IO Size (4). 346 * Arrange in such a way that server places header in the 347 * alloced memory and payload onto the user buffer. 348 */ 349 inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); 350 /* 351 * Running executables in the filesystem may result in 352 * a read request with kernel buffer as opposed to user buffer. 353 */ 354 if (req->tc->pubuf && P9_IS_USER_CONTEXT) { 355 in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, 356 pdata_off, rpinfo->rp_data, pdata_len); 357 } else { 358 char *pbuf; 359 if (req->tc->pubuf) 360 pbuf = (__force char *) req->tc->pubuf; 361 else 362 pbuf = req->tc->pkbuf; 363 364 in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, 365 pbuf, req->tc->pbuf_size); 366 } 367 in += inp; 368 } else { 369 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 370 req->rc->capacity); 371 } 372 373 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 374 if (err < 0) { 375 if (err == -ENOSPC) { 376 chan->ring_bufs_avail = 0; 377 spin_unlock_irqrestore(&chan->lock, flags); 378 err = wait_event_interruptible(*chan->vc_wq, 379 chan->ring_bufs_avail); 380 if (err == -ERESTARTSYS) 381 return err; 382 383 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); 384 goto req_retry_pinned; 385 } else { 386 spin_unlock_irqrestore(&chan->lock, flags); 387 P9_DPRINTK(P9_DEBUG_TRANS, 388 "9p debug: " 389 "virtio rpc add_buf returned failure"); 390 if (rpinfo && rpinfo->rp_alloc) 391 kfree(rpinfo); 392 return -EIO; 393 } 394 } 395 396 virtqueue_kick(chan->vq); 397 spin_unlock_irqrestore(&chan->lock, flags); 398 399 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 400 return 0; 401 } 402 403 static ssize_t p9_mount_tag_show(struct device *dev, 404 struct device_attribute *attr, char *buf) 405 { 406 struct virtio_chan *chan; 407 struct virtio_device *vdev; 408 409 vdev = dev_to_virtio(dev); 410 chan = vdev->priv; 411 412 return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); 413 } 414 415 static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); 416 417 /** 418 * p9_virtio_probe - probe for existence of 9P virtio channels 419 * @vdev: virtio device to probe 420 * 421 * This probes for existing virtio channels. 422 * 423 */ 424 425 static int p9_virtio_probe(struct virtio_device *vdev) 426 { 427 __u16 tag_len; 428 char *tag; 429 int err; 430 struct virtio_chan *chan; 431 432 chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); 433 if (!chan) { 434 printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n"); 435 err = -ENOMEM; 436 goto fail; 437 } 438 439 chan->vdev = vdev; 440 441 /* We expect one virtqueue, for requests. */ 442 chan->vq = virtio_find_single_vq(vdev, req_done, "requests"); 443 if (IS_ERR(chan->vq)) { 444 err = PTR_ERR(chan->vq); 445 goto out_free_vq; 446 } 447 chan->vq->vdev->priv = chan; 448 spin_lock_init(&chan->lock); 449 450 sg_init_table(chan->sg, VIRTQUEUE_NUM); 451 452 chan->inuse = false; 453 if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { 454 vdev->config->get(vdev, 455 offsetof(struct virtio_9p_config, tag_len), 456 &tag_len, sizeof(tag_len)); 457 } else { 458 err = -EINVAL; 459 goto out_free_vq; 460 } 461 tag = kmalloc(tag_len, GFP_KERNEL); 462 if (!tag) { 463 err = -ENOMEM; 464 goto out_free_vq; 465 } 466 vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), 467 tag, tag_len); 468 chan->tag = tag; 469 chan->tag_len = tag_len; 470 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 471 if (err) { 472 goto out_free_tag; 473 } 474 chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); 475 if (!chan->vc_wq) { 476 err = -ENOMEM; 477 goto out_free_tag; 478 } 479 init_waitqueue_head(chan->vc_wq); 480 chan->ring_bufs_avail = 1; 481 /* Ceiling limit to avoid denial of service attacks */ 482 chan->p9_max_pages = nr_free_buffer_pages()/4; 483 484 mutex_lock(&virtio_9p_lock); 485 list_add_tail(&chan->chan_list, &virtio_chan_list); 486 mutex_unlock(&virtio_9p_lock); 487 return 0; 488 489 out_free_tag: 490 kfree(tag); 491 out_free_vq: 492 vdev->config->del_vqs(vdev); 493 kfree(chan); 494 fail: 495 return err; 496 } 497 498 499 /** 500 * p9_virtio_create - allocate a new virtio channel 501 * @client: client instance invoking this transport 502 * @devname: string identifying the channel to connect to (unused) 503 * @args: args passed from sys_mount() for per-transport options (unused) 504 * 505 * This sets up a transport channel for 9p communication. Right now 506 * we only match the first available channel, but eventually we couldlook up 507 * alternate channels by matching devname versus a virtio_config entry. 508 * We use a simple reference count mechanism to ensure that only a single 509 * mount has a channel open at a time. 510 * 511 */ 512 513 static int 514 p9_virtio_create(struct p9_client *client, const char *devname, char *args) 515 { 516 struct virtio_chan *chan; 517 int ret = -ENOENT; 518 int found = 0; 519 520 mutex_lock(&virtio_9p_lock); 521 list_for_each_entry(chan, &virtio_chan_list, chan_list) { 522 if (!strncmp(devname, chan->tag, chan->tag_len) && 523 strlen(devname) == chan->tag_len) { 524 if (!chan->inuse) { 525 chan->inuse = true; 526 found = 1; 527 break; 528 } 529 ret = -EBUSY; 530 } 531 } 532 mutex_unlock(&virtio_9p_lock); 533 534 if (!found) { 535 printk(KERN_ERR "9p: no channels available\n"); 536 return ret; 537 } 538 539 client->trans = (void *)chan; 540 client->status = Connected; 541 chan->client = client; 542 543 return 0; 544 } 545 546 /** 547 * p9_virtio_remove - clean up resources associated with a virtio device 548 * @vdev: virtio device to remove 549 * 550 */ 551 552 static void p9_virtio_remove(struct virtio_device *vdev) 553 { 554 struct virtio_chan *chan = vdev->priv; 555 556 BUG_ON(chan->inuse); 557 vdev->config->del_vqs(vdev); 558 559 mutex_lock(&virtio_9p_lock); 560 list_del(&chan->chan_list); 561 mutex_unlock(&virtio_9p_lock); 562 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 563 kfree(chan->tag); 564 kfree(chan->vc_wq); 565 kfree(chan); 566 567 } 568 569 static struct virtio_device_id id_table[] = { 570 { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, 571 { 0 }, 572 }; 573 574 static unsigned int features[] = { 575 VIRTIO_9P_MOUNT_TAG, 576 }; 577 578 /* The standard "struct lguest_driver": */ 579 static struct virtio_driver p9_virtio_drv = { 580 .feature_table = features, 581 .feature_table_size = ARRAY_SIZE(features), 582 .driver.name = KBUILD_MODNAME, 583 .driver.owner = THIS_MODULE, 584 .id_table = id_table, 585 .probe = p9_virtio_probe, 586 .remove = p9_virtio_remove, 587 }; 588 589 static struct p9_trans_module p9_virtio_trans = { 590 .name = "virtio", 591 .create = p9_virtio_create, 592 .close = p9_virtio_close, 593 .request = p9_virtio_request, 594 .cancel = p9_virtio_cancel, 595 .maxsize = PAGE_SIZE*VIRTQUEUE_NUM, 596 .pref = P9_TRANS_PREF_PAYLOAD_SEP, 597 .def = 0, 598 .owner = THIS_MODULE, 599 }; 600 601 /* The standard init function */ 602 static int __init p9_virtio_init(void) 603 { 604 INIT_LIST_HEAD(&virtio_chan_list); 605 606 v9fs_register_trans(&p9_virtio_trans); 607 return register_virtio_driver(&p9_virtio_drv); 608 } 609 610 static void __exit p9_virtio_cleanup(void) 611 { 612 unregister_virtio_driver(&p9_virtio_drv); 613 v9fs_unregister_trans(&p9_virtio_trans); 614 } 615 616 module_init(p9_virtio_init); 617 module_exit(p9_virtio_cleanup); 618 619 MODULE_DEVICE_TABLE(virtio, id_table); 620 MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); 621 MODULE_DESCRIPTION("Virtio 9p Transport"); 622 MODULE_LICENSE("GPL"); 623