1 /*  Xenbus code for blkif backend
2     Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
3     Copyright (C) 2005 XenSource Ltd
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15 */
16 
17 #include <stdarg.h>
18 #include <linux/module.h>
19 #include <linux/kthread.h>
20 #include <xen/events.h>
21 #include <xen/grant_table.h>
22 #include "common.h"
23 
24 struct backend_info {
25 	struct xenbus_device	*dev;
26 	struct xen_blkif	*blkif;
27 	struct xenbus_watch	backend_watch;
28 	unsigned		major;
29 	unsigned		minor;
30 	char			*mode;
31 };
32 
33 static struct kmem_cache *xen_blkif_cachep;
34 static void connect(struct backend_info *);
35 static int connect_ring(struct backend_info *);
36 static void backend_changed(struct xenbus_watch *, const char **,
37 			    unsigned int);
38 
39 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
40 {
41 	return be->dev;
42 }
43 
44 static int blkback_name(struct xen_blkif *blkif, char *buf)
45 {
46 	char *devpath, *devname;
47 	struct xenbus_device *dev = blkif->be->dev;
48 
49 	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
50 	if (IS_ERR(devpath))
51 		return PTR_ERR(devpath);
52 
53 	devname = strstr(devpath, "/dev/");
54 	if (devname != NULL)
55 		devname += strlen("/dev/");
56 	else
57 		devname  = devpath;
58 
59 	snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
60 	kfree(devpath);
61 
62 	return 0;
63 }
64 
65 static void xen_update_blkif_status(struct xen_blkif *blkif)
66 {
67 	int err;
68 	char name[TASK_COMM_LEN];
69 
70 	/* Not ready to connect? */
71 	if (!blkif->irq || !blkif->vbd.bdev)
72 		return;
73 
74 	/* Already connected? */
75 	if (blkif->be->dev->state == XenbusStateConnected)
76 		return;
77 
78 	/* Attempt to connect: exit if we fail to. */
79 	connect(blkif->be);
80 	if (blkif->be->dev->state != XenbusStateConnected)
81 		return;
82 
83 	err = blkback_name(blkif, name);
84 	if (err) {
85 		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
86 		return;
87 	}
88 
89 	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
90 	if (err) {
91 		xenbus_dev_error(blkif->be->dev, err, "block flush");
92 		return;
93 	}
94 	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
95 
96 	blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
97 	if (IS_ERR(blkif->xenblkd)) {
98 		err = PTR_ERR(blkif->xenblkd);
99 		blkif->xenblkd = NULL;
100 		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
101 		return;
102 	}
103 }
104 
105 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
106 {
107 	struct xen_blkif *blkif;
108 	struct pending_req *req, *n;
109 	int i, j;
110 
111 	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
112 
113 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
114 	if (!blkif)
115 		return ERR_PTR(-ENOMEM);
116 
117 	blkif->domid = domid;
118 	spin_lock_init(&blkif->blk_ring_lock);
119 	atomic_set(&blkif->refcnt, 1);
120 	init_waitqueue_head(&blkif->wq);
121 	init_completion(&blkif->drain_complete);
122 	atomic_set(&blkif->drain, 0);
123 	blkif->st_print = jiffies;
124 	init_waitqueue_head(&blkif->waiting_to_free);
125 	blkif->persistent_gnts.rb_node = NULL;
126 	spin_lock_init(&blkif->free_pages_lock);
127 	INIT_LIST_HEAD(&blkif->free_pages);
128 	blkif->free_pages_num = 0;
129 	atomic_set(&blkif->persistent_gnt_in_use, 0);
130 
131 	INIT_LIST_HEAD(&blkif->pending_free);
132 
133 	for (i = 0; i < XEN_BLKIF_REQS; i++) {
134 		req = kzalloc(sizeof(*req), GFP_KERNEL);
135 		if (!req)
136 			goto fail;
137 		list_add_tail(&req->free_list,
138 		              &blkif->pending_free);
139 		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
140 			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
141 			                           GFP_KERNEL);
142 			if (!req->segments[j])
143 				goto fail;
144 		}
145 		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
146 			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
147 			                                 GFP_KERNEL);
148 			if (!req->indirect_pages[j])
149 				goto fail;
150 		}
151 	}
152 	spin_lock_init(&blkif->pending_free_lock);
153 	init_waitqueue_head(&blkif->pending_free_wq);
154 	init_waitqueue_head(&blkif->shutdown_wq);
155 
156 	return blkif;
157 
158 fail:
159 	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
160 		list_del(&req->free_list);
161 		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
162 			if (!req->segments[j])
163 				break;
164 			kfree(req->segments[j]);
165 		}
166 		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
167 			if (!req->indirect_pages[j])
168 				break;
169 			kfree(req->indirect_pages[j]);
170 		}
171 		kfree(req);
172 	}
173 
174 	kmem_cache_free(xen_blkif_cachep, blkif);
175 
176 	return ERR_PTR(-ENOMEM);
177 }
178 
179 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
180 			 unsigned int evtchn)
181 {
182 	int err;
183 
184 	/* Already connected through? */
185 	if (blkif->irq)
186 		return 0;
187 
188 	err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
189 	if (err < 0)
190 		return err;
191 
192 	switch (blkif->blk_protocol) {
193 	case BLKIF_PROTOCOL_NATIVE:
194 	{
195 		struct blkif_sring *sring;
196 		sring = (struct blkif_sring *)blkif->blk_ring;
197 		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
198 		break;
199 	}
200 	case BLKIF_PROTOCOL_X86_32:
201 	{
202 		struct blkif_x86_32_sring *sring_x86_32;
203 		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
204 		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
205 		break;
206 	}
207 	case BLKIF_PROTOCOL_X86_64:
208 	{
209 		struct blkif_x86_64_sring *sring_x86_64;
210 		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
211 		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
212 		break;
213 	}
214 	default:
215 		BUG();
216 	}
217 
218 	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
219 						    xen_blkif_be_int, 0,
220 						    "blkif-backend", blkif);
221 	if (err < 0) {
222 		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
223 		blkif->blk_rings.common.sring = NULL;
224 		return err;
225 	}
226 	blkif->irq = err;
227 
228 	return 0;
229 }
230 
231 static void xen_blkif_disconnect(struct xen_blkif *blkif)
232 {
233 	if (blkif->xenblkd) {
234 		kthread_stop(blkif->xenblkd);
235 		wake_up(&blkif->shutdown_wq);
236 		blkif->xenblkd = NULL;
237 	}
238 
239 	atomic_dec(&blkif->refcnt);
240 	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
241 	atomic_inc(&blkif->refcnt);
242 
243 	if (blkif->irq) {
244 		unbind_from_irqhandler(blkif->irq, blkif);
245 		blkif->irq = 0;
246 	}
247 
248 	if (blkif->blk_rings.common.sring) {
249 		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
250 		blkif->blk_rings.common.sring = NULL;
251 	}
252 }
253 
254 static void xen_blkif_free(struct xen_blkif *blkif)
255 {
256 	struct pending_req *req, *n;
257 	int i = 0, j;
258 
259 	if (!atomic_dec_and_test(&blkif->refcnt))
260 		BUG();
261 
262 	/* Check that there is no request in use */
263 	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
264 		list_del(&req->free_list);
265 
266 		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
267 			kfree(req->segments[j]);
268 
269 		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
270 			kfree(req->indirect_pages[j]);
271 
272 		kfree(req);
273 		i++;
274 	}
275 
276 	WARN_ON(i != XEN_BLKIF_REQS);
277 
278 	kmem_cache_free(xen_blkif_cachep, blkif);
279 }
280 
281 int __init xen_blkif_interface_init(void)
282 {
283 	xen_blkif_cachep = kmem_cache_create("blkif_cache",
284 					     sizeof(struct xen_blkif),
285 					     0, 0, NULL);
286 	if (!xen_blkif_cachep)
287 		return -ENOMEM;
288 
289 	return 0;
290 }
291 
292 /*
293  *  sysfs interface for VBD I/O requests
294  */
295 
296 #define VBD_SHOW(name, format, args...)					\
297 	static ssize_t show_##name(struct device *_dev,			\
298 				   struct device_attribute *attr,	\
299 				   char *buf)				\
300 	{								\
301 		struct xenbus_device *dev = to_xenbus_device(_dev);	\
302 		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
303 									\
304 		return sprintf(buf, format, ##args);			\
305 	}								\
306 	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
307 
308 VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
309 VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
310 VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
311 VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
312 VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
313 VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
314 VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
315 
316 static struct attribute *xen_vbdstat_attrs[] = {
317 	&dev_attr_oo_req.attr,
318 	&dev_attr_rd_req.attr,
319 	&dev_attr_wr_req.attr,
320 	&dev_attr_f_req.attr,
321 	&dev_attr_ds_req.attr,
322 	&dev_attr_rd_sect.attr,
323 	&dev_attr_wr_sect.attr,
324 	NULL
325 };
326 
327 static struct attribute_group xen_vbdstat_group = {
328 	.name = "statistics",
329 	.attrs = xen_vbdstat_attrs,
330 };
331 
332 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
333 VBD_SHOW(mode, "%s\n", be->mode);
334 
335 static int xenvbd_sysfs_addif(struct xenbus_device *dev)
336 {
337 	int error;
338 
339 	error = device_create_file(&dev->dev, &dev_attr_physical_device);
340 	if (error)
341 		goto fail1;
342 
343 	error = device_create_file(&dev->dev, &dev_attr_mode);
344 	if (error)
345 		goto fail2;
346 
347 	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
348 	if (error)
349 		goto fail3;
350 
351 	return 0;
352 
353 fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
354 fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
355 fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
356 	return error;
357 }
358 
359 static void xenvbd_sysfs_delif(struct xenbus_device *dev)
360 {
361 	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
362 	device_remove_file(&dev->dev, &dev_attr_mode);
363 	device_remove_file(&dev->dev, &dev_attr_physical_device);
364 }
365 
366 
367 static void xen_vbd_free(struct xen_vbd *vbd)
368 {
369 	if (vbd->bdev)
370 		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
371 	vbd->bdev = NULL;
372 }
373 
374 static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
375 			  unsigned major, unsigned minor, int readonly,
376 			  int cdrom)
377 {
378 	struct xen_vbd *vbd;
379 	struct block_device *bdev;
380 	struct request_queue *q;
381 
382 	vbd = &blkif->vbd;
383 	vbd->handle   = handle;
384 	vbd->readonly = readonly;
385 	vbd->type     = 0;
386 
387 	vbd->pdevice  = MKDEV(major, minor);
388 
389 	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
390 				 FMODE_READ : FMODE_WRITE, NULL);
391 
392 	if (IS_ERR(bdev)) {
393 		DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
394 			vbd->pdevice);
395 		return -ENOENT;
396 	}
397 
398 	vbd->bdev = bdev;
399 	if (vbd->bdev->bd_disk == NULL) {
400 		DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
401 			vbd->pdevice);
402 		xen_vbd_free(vbd);
403 		return -ENOENT;
404 	}
405 	vbd->size = vbd_sz(vbd);
406 
407 	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
408 		vbd->type |= VDISK_CDROM;
409 	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
410 		vbd->type |= VDISK_REMOVABLE;
411 
412 	q = bdev_get_queue(bdev);
413 	if (q && q->flush_flags)
414 		vbd->flush_support = true;
415 
416 	if (q && blk_queue_secdiscard(q))
417 		vbd->discard_secure = true;
418 
419 	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
420 		handle, blkif->domid);
421 	return 0;
422 }
423 static int xen_blkbk_remove(struct xenbus_device *dev)
424 {
425 	struct backend_info *be = dev_get_drvdata(&dev->dev);
426 
427 	DPRINTK("");
428 
429 	if (be->major || be->minor)
430 		xenvbd_sysfs_delif(dev);
431 
432 	if (be->backend_watch.node) {
433 		unregister_xenbus_watch(&be->backend_watch);
434 		kfree(be->backend_watch.node);
435 		be->backend_watch.node = NULL;
436 	}
437 
438 	if (be->blkif) {
439 		xen_blkif_disconnect(be->blkif);
440 		xen_vbd_free(&be->blkif->vbd);
441 		xen_blkif_free(be->blkif);
442 		be->blkif = NULL;
443 	}
444 
445 	kfree(be->mode);
446 	kfree(be);
447 	dev_set_drvdata(&dev->dev, NULL);
448 	return 0;
449 }
450 
451 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
452 			      struct backend_info *be, int state)
453 {
454 	struct xenbus_device *dev = be->dev;
455 	int err;
456 
457 	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
458 			    "%d", state);
459 	if (err)
460 		dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
461 
462 	return err;
463 }
464 
465 static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
466 {
467 	struct xenbus_device *dev = be->dev;
468 	struct xen_blkif *blkif = be->blkif;
469 	int err;
470 	int state = 0;
471 	struct block_device *bdev = be->blkif->vbd.bdev;
472 	struct request_queue *q = bdev_get_queue(bdev);
473 
474 	if (blk_queue_discard(q)) {
475 		err = xenbus_printf(xbt, dev->nodename,
476 			"discard-granularity", "%u",
477 			q->limits.discard_granularity);
478 		if (err) {
479 			dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
480 			return;
481 		}
482 		err = xenbus_printf(xbt, dev->nodename,
483 			"discard-alignment", "%u",
484 			q->limits.discard_alignment);
485 		if (err) {
486 			dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
487 			return;
488 		}
489 		state = 1;
490 		/* Optional. */
491 		err = xenbus_printf(xbt, dev->nodename,
492 				    "discard-secure", "%d",
493 				    blkif->vbd.discard_secure);
494 		if (err) {
495 			dev_warn(&dev->dev, "writing discard-secure (%d)", err);
496 			return;
497 		}
498 	}
499 	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
500 			    "%d", state);
501 	if (err)
502 		dev_warn(&dev->dev, "writing feature-discard (%d)", err);
503 }
504 int xen_blkbk_barrier(struct xenbus_transaction xbt,
505 		      struct backend_info *be, int state)
506 {
507 	struct xenbus_device *dev = be->dev;
508 	int err;
509 
510 	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
511 			    "%d", state);
512 	if (err)
513 		dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
514 
515 	return err;
516 }
517 
518 /*
519  * Entry point to this code when a new device is created.  Allocate the basic
520  * structures, and watch the store waiting for the hotplug scripts to tell us
521  * the device's physical major and minor numbers.  Switch to InitWait.
522  */
523 static int xen_blkbk_probe(struct xenbus_device *dev,
524 			   const struct xenbus_device_id *id)
525 {
526 	int err;
527 	struct backend_info *be = kzalloc(sizeof(struct backend_info),
528 					  GFP_KERNEL);
529 	if (!be) {
530 		xenbus_dev_fatal(dev, -ENOMEM,
531 				 "allocating backend structure");
532 		return -ENOMEM;
533 	}
534 	be->dev = dev;
535 	dev_set_drvdata(&dev->dev, be);
536 
537 	be->blkif = xen_blkif_alloc(dev->otherend_id);
538 	if (IS_ERR(be->blkif)) {
539 		err = PTR_ERR(be->blkif);
540 		be->blkif = NULL;
541 		xenbus_dev_fatal(dev, err, "creating block interface");
542 		goto fail;
543 	}
544 
545 	/* setup back pointer */
546 	be->blkif->be = be;
547 
548 	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
549 				   "%s/%s", dev->nodename, "physical-device");
550 	if (err)
551 		goto fail;
552 
553 	err = xenbus_switch_state(dev, XenbusStateInitWait);
554 	if (err)
555 		goto fail;
556 
557 	return 0;
558 
559 fail:
560 	DPRINTK("failed");
561 	xen_blkbk_remove(dev);
562 	return err;
563 }
564 
565 
566 /*
567  * Callback received when the hotplug scripts have placed the physical-device
568  * node.  Read it and the mode node, and create a vbd.  If the frontend is
569  * ready, connect.
570  */
571 static void backend_changed(struct xenbus_watch *watch,
572 			    const char **vec, unsigned int len)
573 {
574 	int err;
575 	unsigned major;
576 	unsigned minor;
577 	struct backend_info *be
578 		= container_of(watch, struct backend_info, backend_watch);
579 	struct xenbus_device *dev = be->dev;
580 	int cdrom = 0;
581 	unsigned long handle;
582 	char *device_type;
583 
584 	DPRINTK("");
585 
586 	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
587 			   &major, &minor);
588 	if (XENBUS_EXIST_ERR(err)) {
589 		/*
590 		 * Since this watch will fire once immediately after it is
591 		 * registered, we expect this.  Ignore it, and wait for the
592 		 * hotplug scripts.
593 		 */
594 		return;
595 	}
596 	if (err != 2) {
597 		xenbus_dev_fatal(dev, err, "reading physical-device");
598 		return;
599 	}
600 
601 	if (be->major | be->minor) {
602 		if (be->major != major || be->minor != minor)
603 			pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
604 				be->major, be->minor, major, minor);
605 		return;
606 	}
607 
608 	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
609 	if (IS_ERR(be->mode)) {
610 		err = PTR_ERR(be->mode);
611 		be->mode = NULL;
612 		xenbus_dev_fatal(dev, err, "reading mode");
613 		return;
614 	}
615 
616 	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
617 	if (!IS_ERR(device_type)) {
618 		cdrom = strcmp(device_type, "cdrom") == 0;
619 		kfree(device_type);
620 	}
621 
622 	/* Front end dir is a number, which is used as the handle. */
623 	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
624 	if (err)
625 		return;
626 
627 	be->major = major;
628 	be->minor = minor;
629 
630 	err = xen_vbd_create(be->blkif, handle, major, minor,
631 			     !strchr(be->mode, 'w'), cdrom);
632 
633 	if (err)
634 		xenbus_dev_fatal(dev, err, "creating vbd structure");
635 	else {
636 		err = xenvbd_sysfs_addif(dev);
637 		if (err) {
638 			xen_vbd_free(&be->blkif->vbd);
639 			xenbus_dev_fatal(dev, err, "creating sysfs entries");
640 		}
641 	}
642 
643 	if (err) {
644 		kfree(be->mode);
645 		be->mode = NULL;
646 		be->major = 0;
647 		be->minor = 0;
648 	} else {
649 		/* We're potentially connected now */
650 		xen_update_blkif_status(be->blkif);
651 	}
652 }
653 
654 
655 /*
656  * Callback received when the frontend's state changes.
657  */
658 static void frontend_changed(struct xenbus_device *dev,
659 			     enum xenbus_state frontend_state)
660 {
661 	struct backend_info *be = dev_get_drvdata(&dev->dev);
662 	int err;
663 
664 	DPRINTK("%s", xenbus_strstate(frontend_state));
665 
666 	switch (frontend_state) {
667 	case XenbusStateInitialising:
668 		if (dev->state == XenbusStateClosed) {
669 			pr_info(DRV_PFX "%s: prepare for reconnect\n",
670 				dev->nodename);
671 			xenbus_switch_state(dev, XenbusStateInitWait);
672 		}
673 		break;
674 
675 	case XenbusStateInitialised:
676 	case XenbusStateConnected:
677 		/*
678 		 * Ensure we connect even when two watches fire in
679 		 * close succession and we miss the intermediate value
680 		 * of frontend_state.
681 		 */
682 		if (dev->state == XenbusStateConnected)
683 			break;
684 
685 		/*
686 		 * Enforce precondition before potential leak point.
687 		 * xen_blkif_disconnect() is idempotent.
688 		 */
689 		xen_blkif_disconnect(be->blkif);
690 
691 		err = connect_ring(be);
692 		if (err)
693 			break;
694 		xen_update_blkif_status(be->blkif);
695 		break;
696 
697 	case XenbusStateClosing:
698 		xenbus_switch_state(dev, XenbusStateClosing);
699 		break;
700 
701 	case XenbusStateClosed:
702 		xen_blkif_disconnect(be->blkif);
703 		xenbus_switch_state(dev, XenbusStateClosed);
704 		if (xenbus_dev_is_online(dev))
705 			break;
706 		/* fall through if not online */
707 	case XenbusStateUnknown:
708 		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
709 		device_unregister(&dev->dev);
710 		break;
711 
712 	default:
713 		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
714 				 frontend_state);
715 		break;
716 	}
717 }
718 
719 
720 /* ** Connection ** */
721 
722 
723 /*
724  * Write the physical details regarding the block device to the store, and
725  * switch to Connected state.
726  */
727 static void connect(struct backend_info *be)
728 {
729 	struct xenbus_transaction xbt;
730 	int err;
731 	struct xenbus_device *dev = be->dev;
732 
733 	DPRINTK("%s", dev->otherend);
734 
735 	/* Supply the information about the device the frontend needs */
736 again:
737 	err = xenbus_transaction_start(&xbt);
738 	if (err) {
739 		xenbus_dev_fatal(dev, err, "starting transaction");
740 		return;
741 	}
742 
743 	/* If we can't advertise it is OK. */
744 	xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
745 
746 	xen_blkbk_discard(xbt, be);
747 
748 	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
749 
750 	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
751 	if (err) {
752 		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
753 				 dev->nodename);
754 		goto abort;
755 	}
756 	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
757 			    MAX_INDIRECT_SEGMENTS);
758 	if (err)
759 		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
760 			 dev->nodename, err);
761 
762 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
763 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
764 	if (err) {
765 		xenbus_dev_fatal(dev, err, "writing %s/sectors",
766 				 dev->nodename);
767 		goto abort;
768 	}
769 
770 	/* FIXME: use a typename instead */
771 	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
772 			    be->blkif->vbd.type |
773 			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
774 	if (err) {
775 		xenbus_dev_fatal(dev, err, "writing %s/info",
776 				 dev->nodename);
777 		goto abort;
778 	}
779 	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
780 			    (unsigned long)
781 			    bdev_logical_block_size(be->blkif->vbd.bdev));
782 	if (err) {
783 		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
784 				 dev->nodename);
785 		goto abort;
786 	}
787 	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
788 			    bdev_physical_block_size(be->blkif->vbd.bdev));
789 	if (err)
790 		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
791 				 dev->nodename);
792 
793 	err = xenbus_transaction_end(xbt, 0);
794 	if (err == -EAGAIN)
795 		goto again;
796 	if (err)
797 		xenbus_dev_fatal(dev, err, "ending transaction");
798 
799 	err = xenbus_switch_state(dev, XenbusStateConnected);
800 	if (err)
801 		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
802 				 dev->nodename);
803 
804 	return;
805  abort:
806 	xenbus_transaction_end(xbt, 1);
807 }
808 
809 
810 static int connect_ring(struct backend_info *be)
811 {
812 	struct xenbus_device *dev = be->dev;
813 	unsigned long ring_ref;
814 	unsigned int evtchn;
815 	unsigned int pers_grants;
816 	char protocol[64] = "";
817 	int err;
818 
819 	DPRINTK("%s", dev->otherend);
820 
821 	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
822 			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
823 	if (err) {
824 		xenbus_dev_fatal(dev, err,
825 				 "reading %s/ring-ref and event-channel",
826 				 dev->otherend);
827 		return err;
828 	}
829 
830 	be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
831 	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
832 			    "%63s", protocol, NULL);
833 	if (err)
834 		strcpy(protocol, "unspecified, assuming native");
835 	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
836 		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
837 	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
838 		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
839 	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
840 		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
841 	else {
842 		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
843 		return -1;
844 	}
845 	err = xenbus_gather(XBT_NIL, dev->otherend,
846 			    "feature-persistent", "%u",
847 			    &pers_grants, NULL);
848 	if (err)
849 		pers_grants = 0;
850 
851 	be->blkif->vbd.feature_gnt_persistent = pers_grants;
852 	be->blkif->vbd.overflow_max_grants = 0;
853 
854 	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
855 		ring_ref, evtchn, be->blkif->blk_protocol, protocol,
856 		pers_grants ? "persistent grants" : "");
857 
858 	/* Map the shared frame, irq etc. */
859 	err = xen_blkif_map(be->blkif, ring_ref, evtchn);
860 	if (err) {
861 		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
862 				 ring_ref, evtchn);
863 		return err;
864 	}
865 
866 	return 0;
867 }
868 
869 
870 /* ** Driver Registration ** */
871 
872 
873 static const struct xenbus_device_id xen_blkbk_ids[] = {
874 	{ "vbd" },
875 	{ "" }
876 };
877 
878 
879 static DEFINE_XENBUS_DRIVER(xen_blkbk, ,
880 	.probe = xen_blkbk_probe,
881 	.remove = xen_blkbk_remove,
882 	.otherend_changed = frontend_changed
883 );
884 
885 
886 int xen_blkif_xenbus_init(void)
887 {
888 	return xenbus_register_backend(&xen_blkbk_driver);
889 }
890