xref: /openbmc/linux/drivers/block/rbd.c (revision 86b00e0d)
1602adf40SYehuda Sadeh /*
2602adf40SYehuda Sadeh    rbd.c -- Export ceph rados objects as a Linux block device
3602adf40SYehuda Sadeh 
4602adf40SYehuda Sadeh 
5602adf40SYehuda Sadeh    based on drivers/block/osdblk.c:
6602adf40SYehuda Sadeh 
7602adf40SYehuda Sadeh    Copyright 2009 Red Hat, Inc.
8602adf40SYehuda Sadeh 
9602adf40SYehuda Sadeh    This program is free software; you can redistribute it and/or modify
10602adf40SYehuda Sadeh    it under the terms of the GNU General Public License as published by
11602adf40SYehuda Sadeh    the Free Software Foundation.
12602adf40SYehuda Sadeh 
13602adf40SYehuda Sadeh    This program is distributed in the hope that it will be useful,
14602adf40SYehuda Sadeh    but WITHOUT ANY WARRANTY; without even the implied warranty of
15602adf40SYehuda Sadeh    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16602adf40SYehuda Sadeh    GNU General Public License for more details.
17602adf40SYehuda Sadeh 
18602adf40SYehuda Sadeh    You should have received a copy of the GNU General Public License
19602adf40SYehuda Sadeh    along with this program; see the file COPYING.  If not, write to
20602adf40SYehuda Sadeh    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21602adf40SYehuda Sadeh 
22602adf40SYehuda Sadeh 
23602adf40SYehuda Sadeh 
24dfc5606dSYehuda Sadeh    For usage instructions, please refer to:
25602adf40SYehuda Sadeh 
26dfc5606dSYehuda Sadeh                  Documentation/ABI/testing/sysfs-bus-rbd
27602adf40SYehuda Sadeh 
28602adf40SYehuda Sadeh  */
29602adf40SYehuda Sadeh 
30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h>
31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h>
32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h>
33602adf40SYehuda Sadeh #include <linux/ceph/decode.h>
3459c2be1eSYehuda Sadeh #include <linux/parser.h>
35602adf40SYehuda Sadeh 
36602adf40SYehuda Sadeh #include <linux/kernel.h>
37602adf40SYehuda Sadeh #include <linux/device.h>
38602adf40SYehuda Sadeh #include <linux/module.h>
39602adf40SYehuda Sadeh #include <linux/fs.h>
40602adf40SYehuda Sadeh #include <linux/blkdev.h>
41602adf40SYehuda Sadeh 
42602adf40SYehuda Sadeh #include "rbd_types.h"
43602adf40SYehuda Sadeh 
44aafb230eSAlex Elder #define RBD_DEBUG	/* Activate rbd_assert() calls */
45aafb230eSAlex Elder 
46593a9e7bSAlex Elder /*
47593a9e7bSAlex Elder  * The basic unit of block I/O is a sector.  It is interpreted in a
48593a9e7bSAlex Elder  * number of contexts in Linux (blk, bio, genhd), but the default is
49593a9e7bSAlex Elder  * universally 512 bytes.  These symbols are just slightly more
50593a9e7bSAlex Elder  * meaningful than the bare numbers they represent.
51593a9e7bSAlex Elder  */
52593a9e7bSAlex Elder #define	SECTOR_SHIFT	9
53593a9e7bSAlex Elder #define	SECTOR_SIZE	(1ULL << SECTOR_SHIFT)
54593a9e7bSAlex Elder 
55df111be6SAlex Elder /* It might be useful to have this defined elsewhere too */
56df111be6SAlex Elder 
57df111be6SAlex Elder #define	U64_MAX	((u64) (~0ULL))
58df111be6SAlex Elder 
59f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd"
60f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)"
61602adf40SYehuda Sadeh 
62602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
63602adf40SYehuda Sadeh 
64d4b125e9SAlex Elder #define RBD_SNAP_DEV_NAME_PREFIX	"snap_"
65d4b125e9SAlex Elder #define RBD_MAX_SNAP_NAME_LEN	\
66d4b125e9SAlex Elder 			(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
67d4b125e9SAlex Elder 
6835d489f9SAlex Elder #define RBD_MAX_SNAP_COUNT	510	/* allows max snapc to fit in 4KB */
69602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN		1024
70602adf40SYehuda Sadeh 
71602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME	"-"
72602adf40SYehuda Sadeh 
73589d30e0SAlex Elder #define RBD_IMAGE_ID_LEN_MAX	64
741e130199SAlex Elder #define RBD_OBJ_PREFIX_LEN_MAX	64
75589d30e0SAlex Elder 
76d889140cSAlex Elder /* Feature bits */
77d889140cSAlex Elder 
78d889140cSAlex Elder #define RBD_FEATURE_LAYERING      1
79d889140cSAlex Elder 
80d889140cSAlex Elder /* Features supported by this (client software) implementation. */
81d889140cSAlex Elder 
82d889140cSAlex Elder #define RBD_FEATURES_ALL          (0)
83d889140cSAlex Elder 
8481a89793SAlex Elder /*
8581a89793SAlex Elder  * An RBD device name will be "rbd#", where the "rbd" comes from
8681a89793SAlex Elder  * RBD_DRV_NAME above, and # is a unique integer identifier.
8781a89793SAlex Elder  * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big
8881a89793SAlex Elder  * enough to hold all possible device names.
8981a89793SAlex Elder  */
90602adf40SYehuda Sadeh #define DEV_NAME_LEN		32
9181a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH	((5 * sizeof (int)) / 2 + 1)
92602adf40SYehuda Sadeh 
93cc0538b6SAlex Elder #define RBD_READ_ONLY_DEFAULT		false
9459c2be1eSYehuda Sadeh 
95602adf40SYehuda Sadeh /*
96602adf40SYehuda Sadeh  * block device image metadata (in-memory version)
97602adf40SYehuda Sadeh  */
98602adf40SYehuda Sadeh struct rbd_image_header {
99f84344f3SAlex Elder 	/* These four fields never change for a given rbd image */
100849b4260SAlex Elder 	char *object_prefix;
10134b13184SAlex Elder 	u64 features;
102602adf40SYehuda Sadeh 	__u8 obj_order;
103602adf40SYehuda Sadeh 	__u8 crypt_type;
104602adf40SYehuda Sadeh 	__u8 comp_type;
105602adf40SYehuda Sadeh 
106f84344f3SAlex Elder 	/* The remaining fields need to be updated occasionally */
107f84344f3SAlex Elder 	u64 image_size;
108f84344f3SAlex Elder 	struct ceph_snap_context *snapc;
109602adf40SYehuda Sadeh 	char *snap_names;
110602adf40SYehuda Sadeh 	u64 *snap_sizes;
11159c2be1eSYehuda Sadeh 
11259c2be1eSYehuda Sadeh 	u64 obj_version;
11359c2be1eSYehuda Sadeh };
11459c2be1eSYehuda Sadeh 
1150d7dbfceSAlex Elder /*
1160d7dbfceSAlex Elder  * An rbd image specification.
1170d7dbfceSAlex Elder  *
1180d7dbfceSAlex Elder  * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
1190d7dbfceSAlex Elder  * identify an image.
1200d7dbfceSAlex Elder  */
1210d7dbfceSAlex Elder struct rbd_spec {
1220d7dbfceSAlex Elder 	u64		pool_id;
1230d7dbfceSAlex Elder 	char		*pool_name;
1240d7dbfceSAlex Elder 
1250d7dbfceSAlex Elder 	char		*image_id;
1260d7dbfceSAlex Elder 	size_t		image_id_len;
1270d7dbfceSAlex Elder 	char		*image_name;
1280d7dbfceSAlex Elder 	size_t		image_name_len;
1290d7dbfceSAlex Elder 
1300d7dbfceSAlex Elder 	u64		snap_id;
1310d7dbfceSAlex Elder 	char		*snap_name;
1320d7dbfceSAlex Elder 
1330d7dbfceSAlex Elder 	struct kref	kref;
1340d7dbfceSAlex Elder };
1350d7dbfceSAlex Elder 
13659c2be1eSYehuda Sadeh struct rbd_options {
137cc0538b6SAlex Elder 	bool	read_only;
138602adf40SYehuda Sadeh };
139602adf40SYehuda Sadeh 
140602adf40SYehuda Sadeh /*
141f0f8cef5SAlex Elder  * an instance of the client.  multiple devices may share an rbd client.
142602adf40SYehuda Sadeh  */
143602adf40SYehuda Sadeh struct rbd_client {
144602adf40SYehuda Sadeh 	struct ceph_client	*client;
145602adf40SYehuda Sadeh 	struct kref		kref;
146602adf40SYehuda Sadeh 	struct list_head	node;
147602adf40SYehuda Sadeh };
148602adf40SYehuda Sadeh 
149602adf40SYehuda Sadeh /*
150f0f8cef5SAlex Elder  * a request completion status
151602adf40SYehuda Sadeh  */
1521fec7093SYehuda Sadeh struct rbd_req_status {
1531fec7093SYehuda Sadeh 	int done;
1541fec7093SYehuda Sadeh 	int rc;
1551fec7093SYehuda Sadeh 	u64 bytes;
1561fec7093SYehuda Sadeh };
1571fec7093SYehuda Sadeh 
1581fec7093SYehuda Sadeh /*
1591fec7093SYehuda Sadeh  * a collection of requests
1601fec7093SYehuda Sadeh  */
1611fec7093SYehuda Sadeh struct rbd_req_coll {
1621fec7093SYehuda Sadeh 	int			total;
1631fec7093SYehuda Sadeh 	int			num_done;
1641fec7093SYehuda Sadeh 	struct kref		kref;
1651fec7093SYehuda Sadeh 	struct rbd_req_status	status[0];
166602adf40SYehuda Sadeh };
167602adf40SYehuda Sadeh 
168f0f8cef5SAlex Elder /*
169f0f8cef5SAlex Elder  * a single io request
170f0f8cef5SAlex Elder  */
171f0f8cef5SAlex Elder struct rbd_request {
172f0f8cef5SAlex Elder 	struct request		*rq;		/* blk layer request */
173f0f8cef5SAlex Elder 	struct bio		*bio;		/* cloned bio */
174f0f8cef5SAlex Elder 	struct page		**pages;	/* list of used pages */
175f0f8cef5SAlex Elder 	u64			len;
176f0f8cef5SAlex Elder 	int			coll_index;
177f0f8cef5SAlex Elder 	struct rbd_req_coll	*coll;
178f0f8cef5SAlex Elder };
179f0f8cef5SAlex Elder 
180dfc5606dSYehuda Sadeh struct rbd_snap {
181dfc5606dSYehuda Sadeh 	struct	device		dev;
182dfc5606dSYehuda Sadeh 	const char		*name;
1833591538fSJosh Durgin 	u64			size;
184dfc5606dSYehuda Sadeh 	struct list_head	node;
185dfc5606dSYehuda Sadeh 	u64			id;
18634b13184SAlex Elder 	u64			features;
187dfc5606dSYehuda Sadeh };
188dfc5606dSYehuda Sadeh 
189f84344f3SAlex Elder struct rbd_mapping {
19099c1f08fSAlex Elder 	u64                     size;
19134b13184SAlex Elder 	u64                     features;
192f84344f3SAlex Elder 	bool			read_only;
193f84344f3SAlex Elder };
194f84344f3SAlex Elder 
195602adf40SYehuda Sadeh /*
196602adf40SYehuda Sadeh  * a single device
197602adf40SYehuda Sadeh  */
198602adf40SYehuda Sadeh struct rbd_device {
199de71a297SAlex Elder 	int			dev_id;		/* blkdev unique id */
200602adf40SYehuda Sadeh 
201602adf40SYehuda Sadeh 	int			major;		/* blkdev assigned major */
202602adf40SYehuda Sadeh 	struct gendisk		*disk;		/* blkdev's gendisk and rq */
203602adf40SYehuda Sadeh 
204a30b71b9SAlex Elder 	u32			image_format;	/* Either 1 or 2 */
205602adf40SYehuda Sadeh 	struct rbd_client	*rbd_client;
206602adf40SYehuda Sadeh 
207602adf40SYehuda Sadeh 	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
208602adf40SYehuda Sadeh 
209602adf40SYehuda Sadeh 	spinlock_t		lock;		/* queue lock */
210602adf40SYehuda Sadeh 
211602adf40SYehuda Sadeh 	struct rbd_image_header	header;
212daba5fdbSAlex Elder 	bool                    exists;
2130d7dbfceSAlex Elder 	struct rbd_spec		*spec;
214602adf40SYehuda Sadeh 
2150d7dbfceSAlex Elder 	char			*header_name;
216971f839aSAlex Elder 
21759c2be1eSYehuda Sadeh 	struct ceph_osd_event   *watch_event;
21859c2be1eSYehuda Sadeh 	struct ceph_osd_request *watch_request;
21959c2be1eSYehuda Sadeh 
22086b00e0dSAlex Elder 	struct rbd_spec		*parent_spec;
22186b00e0dSAlex Elder 	u64			parent_overlap;
22286b00e0dSAlex Elder 
223c666601aSJosh Durgin 	/* protects updating the header */
224c666601aSJosh Durgin 	struct rw_semaphore     header_rwsem;
225f84344f3SAlex Elder 
226f84344f3SAlex Elder 	struct rbd_mapping	mapping;
227602adf40SYehuda Sadeh 
228602adf40SYehuda Sadeh 	struct list_head	node;
229dfc5606dSYehuda Sadeh 
230dfc5606dSYehuda Sadeh 	/* list of snapshots */
231dfc5606dSYehuda Sadeh 	struct list_head	snaps;
232dfc5606dSYehuda Sadeh 
233dfc5606dSYehuda Sadeh 	/* sysfs related */
234dfc5606dSYehuda Sadeh 	struct device		dev;
235dfc5606dSYehuda Sadeh };
236dfc5606dSYehuda Sadeh 
237602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
238e124a82fSAlex Elder 
239602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list);    /* devices */
240e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock);
241e124a82fSAlex Elder 
242602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list);		/* clients */
243432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock);
244602adf40SYehuda Sadeh 
245304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev);
246304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev);
247304f6808SAlex Elder 
248dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev);
24941f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap);
250dfc5606dSYehuda Sadeh 
251f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf,
252f0f8cef5SAlex Elder 		       size_t count);
253f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
254f0f8cef5SAlex Elder 			  size_t count);
255f0f8cef5SAlex Elder 
256f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = {
257f0f8cef5SAlex Elder 	__ATTR(add, S_IWUSR, NULL, rbd_add),
258f0f8cef5SAlex Elder 	__ATTR(remove, S_IWUSR, NULL, rbd_remove),
259f0f8cef5SAlex Elder 	__ATTR_NULL
260f0f8cef5SAlex Elder };
261f0f8cef5SAlex Elder 
262f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = {
263f0f8cef5SAlex Elder 	.name		= "rbd",
264f0f8cef5SAlex Elder 	.bus_attrs	= rbd_bus_attrs,
265f0f8cef5SAlex Elder };
266f0f8cef5SAlex Elder 
267f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev)
268f0f8cef5SAlex Elder {
269f0f8cef5SAlex Elder }
270f0f8cef5SAlex Elder 
271f0f8cef5SAlex Elder static struct device rbd_root_dev = {
272f0f8cef5SAlex Elder 	.init_name =    "rbd",
273f0f8cef5SAlex Elder 	.release =      rbd_root_dev_release,
274f0f8cef5SAlex Elder };
275f0f8cef5SAlex Elder 
276aafb230eSAlex Elder #ifdef RBD_DEBUG
277aafb230eSAlex Elder #define rbd_assert(expr)						\
278aafb230eSAlex Elder 		if (unlikely(!(expr))) {				\
279aafb230eSAlex Elder 			printk(KERN_ERR "\nAssertion failure in %s() "	\
280aafb230eSAlex Elder 						"at line %d:\n\n"	\
281aafb230eSAlex Elder 					"\trbd_assert(%s);\n\n",	\
282aafb230eSAlex Elder 					__func__, __LINE__, #expr);	\
283aafb230eSAlex Elder 			BUG();						\
284aafb230eSAlex Elder 		}
285aafb230eSAlex Elder #else /* !RBD_DEBUG */
286aafb230eSAlex Elder #  define rbd_assert(expr)	((void) 0)
287aafb230eSAlex Elder #endif /* !RBD_DEBUG */
288dfc5606dSYehuda Sadeh 
289dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
290dfc5606dSYehuda Sadeh {
291dfc5606dSYehuda Sadeh 	return get_device(&rbd_dev->dev);
292dfc5606dSYehuda Sadeh }
293dfc5606dSYehuda Sadeh 
294dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev)
295dfc5606dSYehuda Sadeh {
296dfc5606dSYehuda Sadeh 	put_device(&rbd_dev->dev);
297dfc5606dSYehuda Sadeh }
298602adf40SYehuda Sadeh 
299117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver);
300117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
30159c2be1eSYehuda Sadeh 
302602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode)
303602adf40SYehuda Sadeh {
304f0f8cef5SAlex Elder 	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
305602adf40SYehuda Sadeh 
306f84344f3SAlex Elder 	if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
307602adf40SYehuda Sadeh 		return -EROFS;
308602adf40SYehuda Sadeh 
309340c7a2bSAlex Elder 	rbd_get_dev(rbd_dev);
310f84344f3SAlex Elder 	set_device_ro(bdev, rbd_dev->mapping.read_only);
311340c7a2bSAlex Elder 
312602adf40SYehuda Sadeh 	return 0;
313602adf40SYehuda Sadeh }
314602adf40SYehuda Sadeh 
315dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode)
316dfc5606dSYehuda Sadeh {
317dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = disk->private_data;
318dfc5606dSYehuda Sadeh 
319dfc5606dSYehuda Sadeh 	rbd_put_dev(rbd_dev);
320dfc5606dSYehuda Sadeh 
321dfc5606dSYehuda Sadeh 	return 0;
322dfc5606dSYehuda Sadeh }
323dfc5606dSYehuda Sadeh 
324602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = {
325602adf40SYehuda Sadeh 	.owner			= THIS_MODULE,
326602adf40SYehuda Sadeh 	.open			= rbd_open,
327dfc5606dSYehuda Sadeh 	.release		= rbd_release,
328602adf40SYehuda Sadeh };
329602adf40SYehuda Sadeh 
330602adf40SYehuda Sadeh /*
331602adf40SYehuda Sadeh  * Initialize an rbd client instance.
33243ae4701SAlex Elder  * We own *ceph_opts.
333602adf40SYehuda Sadeh  */
334f8c38929SAlex Elder static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
335602adf40SYehuda Sadeh {
336602adf40SYehuda Sadeh 	struct rbd_client *rbdc;
337602adf40SYehuda Sadeh 	int ret = -ENOMEM;
338602adf40SYehuda Sadeh 
339602adf40SYehuda Sadeh 	dout("rbd_client_create\n");
340602adf40SYehuda Sadeh 	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
341602adf40SYehuda Sadeh 	if (!rbdc)
342602adf40SYehuda Sadeh 		goto out_opt;
343602adf40SYehuda Sadeh 
344602adf40SYehuda Sadeh 	kref_init(&rbdc->kref);
345602adf40SYehuda Sadeh 	INIT_LIST_HEAD(&rbdc->node);
346602adf40SYehuda Sadeh 
347bc534d86SAlex Elder 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
348bc534d86SAlex Elder 
34943ae4701SAlex Elder 	rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
350602adf40SYehuda Sadeh 	if (IS_ERR(rbdc->client))
351bc534d86SAlex Elder 		goto out_mutex;
35243ae4701SAlex Elder 	ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
353602adf40SYehuda Sadeh 
354602adf40SYehuda Sadeh 	ret = ceph_open_session(rbdc->client);
355602adf40SYehuda Sadeh 	if (ret < 0)
356602adf40SYehuda Sadeh 		goto out_err;
357602adf40SYehuda Sadeh 
358432b8587SAlex Elder 	spin_lock(&rbd_client_list_lock);
359602adf40SYehuda Sadeh 	list_add_tail(&rbdc->node, &rbd_client_list);
360432b8587SAlex Elder 	spin_unlock(&rbd_client_list_lock);
361602adf40SYehuda Sadeh 
362bc534d86SAlex Elder 	mutex_unlock(&ctl_mutex);
363bc534d86SAlex Elder 
364602adf40SYehuda Sadeh 	dout("rbd_client_create created %p\n", rbdc);
365602adf40SYehuda Sadeh 	return rbdc;
366602adf40SYehuda Sadeh 
367602adf40SYehuda Sadeh out_err:
368602adf40SYehuda Sadeh 	ceph_destroy_client(rbdc->client);
369bc534d86SAlex Elder out_mutex:
370bc534d86SAlex Elder 	mutex_unlock(&ctl_mutex);
371602adf40SYehuda Sadeh 	kfree(rbdc);
372602adf40SYehuda Sadeh out_opt:
37343ae4701SAlex Elder 	if (ceph_opts)
37443ae4701SAlex Elder 		ceph_destroy_options(ceph_opts);
37528f259b7SVasiliy Kulikov 	return ERR_PTR(ret);
376602adf40SYehuda Sadeh }
377602adf40SYehuda Sadeh 
378602adf40SYehuda Sadeh /*
3791f7ba331SAlex Elder  * Find a ceph client with specific addr and configuration.  If
3801f7ba331SAlex Elder  * found, bump its reference count.
381602adf40SYehuda Sadeh  */
3821f7ba331SAlex Elder static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
383602adf40SYehuda Sadeh {
384602adf40SYehuda Sadeh 	struct rbd_client *client_node;
3851f7ba331SAlex Elder 	bool found = false;
386602adf40SYehuda Sadeh 
38743ae4701SAlex Elder 	if (ceph_opts->flags & CEPH_OPT_NOSHARE)
388602adf40SYehuda Sadeh 		return NULL;
389602adf40SYehuda Sadeh 
3901f7ba331SAlex Elder 	spin_lock(&rbd_client_list_lock);
3911f7ba331SAlex Elder 	list_for_each_entry(client_node, &rbd_client_list, node) {
3921f7ba331SAlex Elder 		if (!ceph_compare_options(ceph_opts, client_node->client)) {
3931f7ba331SAlex Elder 			kref_get(&client_node->kref);
3941f7ba331SAlex Elder 			found = true;
3951f7ba331SAlex Elder 			break;
3961f7ba331SAlex Elder 		}
3971f7ba331SAlex Elder 	}
3981f7ba331SAlex Elder 	spin_unlock(&rbd_client_list_lock);
3991f7ba331SAlex Elder 
4001f7ba331SAlex Elder 	return found ? client_node : NULL;
401602adf40SYehuda Sadeh }
402602adf40SYehuda Sadeh 
403602adf40SYehuda Sadeh /*
40459c2be1eSYehuda Sadeh  * mount options
40559c2be1eSYehuda Sadeh  */
40659c2be1eSYehuda Sadeh enum {
40759c2be1eSYehuda Sadeh 	Opt_last_int,
40859c2be1eSYehuda Sadeh 	/* int args above */
40959c2be1eSYehuda Sadeh 	Opt_last_string,
41059c2be1eSYehuda Sadeh 	/* string args above */
411cc0538b6SAlex Elder 	Opt_read_only,
412cc0538b6SAlex Elder 	Opt_read_write,
413cc0538b6SAlex Elder 	/* Boolean args above */
414cc0538b6SAlex Elder 	Opt_last_bool,
41559c2be1eSYehuda Sadeh };
41659c2be1eSYehuda Sadeh 
41743ae4701SAlex Elder static match_table_t rbd_opts_tokens = {
41859c2be1eSYehuda Sadeh 	/* int args above */
41959c2be1eSYehuda Sadeh 	/* string args above */
420be466c1cSAlex Elder 	{Opt_read_only, "read_only"},
421cc0538b6SAlex Elder 	{Opt_read_only, "ro"},		/* Alternate spelling */
422cc0538b6SAlex Elder 	{Opt_read_write, "read_write"},
423cc0538b6SAlex Elder 	{Opt_read_write, "rw"},		/* Alternate spelling */
424cc0538b6SAlex Elder 	/* Boolean args above */
42559c2be1eSYehuda Sadeh 	{-1, NULL}
42659c2be1eSYehuda Sadeh };
42759c2be1eSYehuda Sadeh 
42859c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private)
42959c2be1eSYehuda Sadeh {
43043ae4701SAlex Elder 	struct rbd_options *rbd_opts = private;
43159c2be1eSYehuda Sadeh 	substring_t argstr[MAX_OPT_ARGS];
43259c2be1eSYehuda Sadeh 	int token, intval, ret;
43359c2be1eSYehuda Sadeh 
43443ae4701SAlex Elder 	token = match_token(c, rbd_opts_tokens, argstr);
43559c2be1eSYehuda Sadeh 	if (token < 0)
43659c2be1eSYehuda Sadeh 		return -EINVAL;
43759c2be1eSYehuda Sadeh 
43859c2be1eSYehuda Sadeh 	if (token < Opt_last_int) {
43959c2be1eSYehuda Sadeh 		ret = match_int(&argstr[0], &intval);
44059c2be1eSYehuda Sadeh 		if (ret < 0) {
44159c2be1eSYehuda Sadeh 			pr_err("bad mount option arg (not int) "
44259c2be1eSYehuda Sadeh 			       "at '%s'\n", c);
44359c2be1eSYehuda Sadeh 			return ret;
44459c2be1eSYehuda Sadeh 		}
44559c2be1eSYehuda Sadeh 		dout("got int token %d val %d\n", token, intval);
44659c2be1eSYehuda Sadeh 	} else if (token > Opt_last_int && token < Opt_last_string) {
44759c2be1eSYehuda Sadeh 		dout("got string token %d val %s\n", token,
44859c2be1eSYehuda Sadeh 		     argstr[0].from);
449cc0538b6SAlex Elder 	} else if (token > Opt_last_string && token < Opt_last_bool) {
450cc0538b6SAlex Elder 		dout("got Boolean token %d\n", token);
45159c2be1eSYehuda Sadeh 	} else {
45259c2be1eSYehuda Sadeh 		dout("got token %d\n", token);
45359c2be1eSYehuda Sadeh 	}
45459c2be1eSYehuda Sadeh 
45559c2be1eSYehuda Sadeh 	switch (token) {
456cc0538b6SAlex Elder 	case Opt_read_only:
457cc0538b6SAlex Elder 		rbd_opts->read_only = true;
458cc0538b6SAlex Elder 		break;
459cc0538b6SAlex Elder 	case Opt_read_write:
460cc0538b6SAlex Elder 		rbd_opts->read_only = false;
461cc0538b6SAlex Elder 		break;
46259c2be1eSYehuda Sadeh 	default:
463aafb230eSAlex Elder 		rbd_assert(false);
464aafb230eSAlex Elder 		break;
46559c2be1eSYehuda Sadeh 	}
46659c2be1eSYehuda Sadeh 	return 0;
46759c2be1eSYehuda Sadeh }
46859c2be1eSYehuda Sadeh 
46959c2be1eSYehuda Sadeh /*
470602adf40SYehuda Sadeh  * Get a ceph client with specific addr and configuration, if one does
471602adf40SYehuda Sadeh  * not exist create it.
472602adf40SYehuda Sadeh  */
4739d3997fdSAlex Elder static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
474602adf40SYehuda Sadeh {
475f8c38929SAlex Elder 	struct rbd_client *rbdc;
47659c2be1eSYehuda Sadeh 
4771f7ba331SAlex Elder 	rbdc = rbd_client_find(ceph_opts);
4789d3997fdSAlex Elder 	if (rbdc)	/* using an existing client */
47943ae4701SAlex Elder 		ceph_destroy_options(ceph_opts);
4809d3997fdSAlex Elder 	else
481f8c38929SAlex Elder 		rbdc = rbd_client_create(ceph_opts);
482d720bcb0SAlex Elder 
4839d3997fdSAlex Elder 	return rbdc;
484602adf40SYehuda Sadeh }
485602adf40SYehuda Sadeh 
486602adf40SYehuda Sadeh /*
487602adf40SYehuda Sadeh  * Destroy ceph client
488d23a4b3fSAlex Elder  *
489432b8587SAlex Elder  * Caller must hold rbd_client_list_lock.
490602adf40SYehuda Sadeh  */
491602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref)
492602adf40SYehuda Sadeh {
493602adf40SYehuda Sadeh 	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
494602adf40SYehuda Sadeh 
495602adf40SYehuda Sadeh 	dout("rbd_release_client %p\n", rbdc);
496cd9d9f5dSAlex Elder 	spin_lock(&rbd_client_list_lock);
497602adf40SYehuda Sadeh 	list_del(&rbdc->node);
498cd9d9f5dSAlex Elder 	spin_unlock(&rbd_client_list_lock);
499602adf40SYehuda Sadeh 
500602adf40SYehuda Sadeh 	ceph_destroy_client(rbdc->client);
501602adf40SYehuda Sadeh 	kfree(rbdc);
502602adf40SYehuda Sadeh }
503602adf40SYehuda Sadeh 
504602adf40SYehuda Sadeh /*
505602adf40SYehuda Sadeh  * Drop reference to ceph client node. If it's not referenced anymore, release
506602adf40SYehuda Sadeh  * it.
507602adf40SYehuda Sadeh  */
5089d3997fdSAlex Elder static void rbd_put_client(struct rbd_client *rbdc)
509602adf40SYehuda Sadeh {
510c53d5893SAlex Elder 	if (rbdc)
5119d3997fdSAlex Elder 		kref_put(&rbdc->kref, rbd_client_release);
512602adf40SYehuda Sadeh }
513602adf40SYehuda Sadeh 
5141fec7093SYehuda Sadeh /*
5151fec7093SYehuda Sadeh  * Destroy requests collection
5161fec7093SYehuda Sadeh  */
5171fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref)
5181fec7093SYehuda Sadeh {
5191fec7093SYehuda Sadeh 	struct rbd_req_coll *coll =
5201fec7093SYehuda Sadeh 		container_of(kref, struct rbd_req_coll, kref);
5211fec7093SYehuda Sadeh 
5221fec7093SYehuda Sadeh 	dout("rbd_coll_release %p\n", coll);
5231fec7093SYehuda Sadeh 	kfree(coll);
5241fec7093SYehuda Sadeh }
525602adf40SYehuda Sadeh 
526a30b71b9SAlex Elder static bool rbd_image_format_valid(u32 image_format)
527a30b71b9SAlex Elder {
528a30b71b9SAlex Elder 	return image_format == 1 || image_format == 2;
529a30b71b9SAlex Elder }
530a30b71b9SAlex Elder 
5318e94af8eSAlex Elder static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
5328e94af8eSAlex Elder {
533103a150fSAlex Elder 	size_t size;
534103a150fSAlex Elder 	u32 snap_count;
535103a150fSAlex Elder 
536103a150fSAlex Elder 	/* The header has to start with the magic rbd header text */
537103a150fSAlex Elder 	if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
538103a150fSAlex Elder 		return false;
539103a150fSAlex Elder 
540db2388b6SAlex Elder 	/* The bio layer requires at least sector-sized I/O */
541db2388b6SAlex Elder 
542db2388b6SAlex Elder 	if (ondisk->options.order < SECTOR_SHIFT)
543db2388b6SAlex Elder 		return false;
544db2388b6SAlex Elder 
545db2388b6SAlex Elder 	/* If we use u64 in a few spots we may be able to loosen this */
546db2388b6SAlex Elder 
547db2388b6SAlex Elder 	if (ondisk->options.order > 8 * sizeof (int) - 1)
548db2388b6SAlex Elder 		return false;
549db2388b6SAlex Elder 
550103a150fSAlex Elder 	/*
551103a150fSAlex Elder 	 * The size of a snapshot header has to fit in a size_t, and
552103a150fSAlex Elder 	 * that limits the number of snapshots.
553103a150fSAlex Elder 	 */
554103a150fSAlex Elder 	snap_count = le32_to_cpu(ondisk->snap_count);
555103a150fSAlex Elder 	size = SIZE_MAX - sizeof (struct ceph_snap_context);
556103a150fSAlex Elder 	if (snap_count > size / sizeof (__le64))
557103a150fSAlex Elder 		return false;
558103a150fSAlex Elder 
559103a150fSAlex Elder 	/*
560103a150fSAlex Elder 	 * Not only that, but the size of the entire the snapshot
561103a150fSAlex Elder 	 * header must also be representable in a size_t.
562103a150fSAlex Elder 	 */
563103a150fSAlex Elder 	size -= snap_count * sizeof (__le64);
564103a150fSAlex Elder 	if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
565103a150fSAlex Elder 		return false;
566103a150fSAlex Elder 
567103a150fSAlex Elder 	return true;
5688e94af8eSAlex Elder }
5698e94af8eSAlex Elder 
570602adf40SYehuda Sadeh /*
571602adf40SYehuda Sadeh  * Create a new header structure, translate header format from the on-disk
572602adf40SYehuda Sadeh  * header.
573602adf40SYehuda Sadeh  */
574602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header,
5754156d998SAlex Elder 				 struct rbd_image_header_ondisk *ondisk)
576602adf40SYehuda Sadeh {
577ccece235SAlex Elder 	u32 snap_count;
57858c17b0eSAlex Elder 	size_t len;
579d2bb24e5SAlex Elder 	size_t size;
580621901d6SAlex Elder 	u32 i;
581602adf40SYehuda Sadeh 
5826a52325fSAlex Elder 	memset(header, 0, sizeof (*header));
5836a52325fSAlex Elder 
584103a150fSAlex Elder 	snap_count = le32_to_cpu(ondisk->snap_count);
585103a150fSAlex Elder 
58658c17b0eSAlex Elder 	len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix));
58758c17b0eSAlex Elder 	header->object_prefix = kmalloc(len + 1, GFP_KERNEL);
5886a52325fSAlex Elder 	if (!header->object_prefix)
589602adf40SYehuda Sadeh 		return -ENOMEM;
59058c17b0eSAlex Elder 	memcpy(header->object_prefix, ondisk->object_prefix, len);
59158c17b0eSAlex Elder 	header->object_prefix[len] = '\0';
59200f1f36fSAlex Elder 
593602adf40SYehuda Sadeh 	if (snap_count) {
594f785cc1dSAlex Elder 		u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
595f785cc1dSAlex Elder 
596621901d6SAlex Elder 		/* Save a copy of the snapshot names */
597621901d6SAlex Elder 
598f785cc1dSAlex Elder 		if (snap_names_len > (u64) SIZE_MAX)
599f785cc1dSAlex Elder 			return -EIO;
600f785cc1dSAlex Elder 		header->snap_names = kmalloc(snap_names_len, GFP_KERNEL);
601602adf40SYehuda Sadeh 		if (!header->snap_names)
6026a52325fSAlex Elder 			goto out_err;
603f785cc1dSAlex Elder 		/*
604f785cc1dSAlex Elder 		 * Note that rbd_dev_v1_header_read() guarantees
605f785cc1dSAlex Elder 		 * the ondisk buffer we're working with has
606f785cc1dSAlex Elder 		 * snap_names_len bytes beyond the end of the
607f785cc1dSAlex Elder 		 * snapshot id array, this memcpy() is safe.
608f785cc1dSAlex Elder 		 */
609f785cc1dSAlex Elder 		memcpy(header->snap_names, &ondisk->snaps[snap_count],
610f785cc1dSAlex Elder 			snap_names_len);
6116a52325fSAlex Elder 
612621901d6SAlex Elder 		/* Record each snapshot's size */
613621901d6SAlex Elder 
614d2bb24e5SAlex Elder 		size = snap_count * sizeof (*header->snap_sizes);
615d2bb24e5SAlex Elder 		header->snap_sizes = kmalloc(size, GFP_KERNEL);
616602adf40SYehuda Sadeh 		if (!header->snap_sizes)
6176a52325fSAlex Elder 			goto out_err;
618621901d6SAlex Elder 		for (i = 0; i < snap_count; i++)
619621901d6SAlex Elder 			header->snap_sizes[i] =
620621901d6SAlex Elder 				le64_to_cpu(ondisk->snaps[i].image_size);
621602adf40SYehuda Sadeh 	} else {
622ccece235SAlex Elder 		WARN_ON(ondisk->snap_names_len);
623602adf40SYehuda Sadeh 		header->snap_names = NULL;
624602adf40SYehuda Sadeh 		header->snap_sizes = NULL;
625602adf40SYehuda Sadeh 	}
626849b4260SAlex Elder 
62734b13184SAlex Elder 	header->features = 0;	/* No features support in v1 images */
628602adf40SYehuda Sadeh 	header->obj_order = ondisk->options.order;
629602adf40SYehuda Sadeh 	header->crypt_type = ondisk->options.crypt_type;
630602adf40SYehuda Sadeh 	header->comp_type = ondisk->options.comp_type;
6316a52325fSAlex Elder 
632621901d6SAlex Elder 	/* Allocate and fill in the snapshot context */
633621901d6SAlex Elder 
634f84344f3SAlex Elder 	header->image_size = le64_to_cpu(ondisk->image_size);
6356a52325fSAlex Elder 	size = sizeof (struct ceph_snap_context);
6366a52325fSAlex Elder 	size += snap_count * sizeof (header->snapc->snaps[0]);
6376a52325fSAlex Elder 	header->snapc = kzalloc(size, GFP_KERNEL);
6386a52325fSAlex Elder 	if (!header->snapc)
6396a52325fSAlex Elder 		goto out_err;
640602adf40SYehuda Sadeh 
641602adf40SYehuda Sadeh 	atomic_set(&header->snapc->nref, 1);
642505cbb9bSAlex Elder 	header->snapc->seq = le64_to_cpu(ondisk->snap_seq);
643602adf40SYehuda Sadeh 	header->snapc->num_snaps = snap_count;
644621901d6SAlex Elder 	for (i = 0; i < snap_count; i++)
645602adf40SYehuda Sadeh 		header->snapc->snaps[i] =
646602adf40SYehuda Sadeh 			le64_to_cpu(ondisk->snaps[i].id);
647602adf40SYehuda Sadeh 
648602adf40SYehuda Sadeh 	return 0;
649602adf40SYehuda Sadeh 
6506a52325fSAlex Elder out_err:
651849b4260SAlex Elder 	kfree(header->snap_sizes);
652ccece235SAlex Elder 	header->snap_sizes = NULL;
653602adf40SYehuda Sadeh 	kfree(header->snap_names);
654ccece235SAlex Elder 	header->snap_names = NULL;
6556a52325fSAlex Elder 	kfree(header->object_prefix);
6566a52325fSAlex Elder 	header->object_prefix = NULL;
657ccece235SAlex Elder 
65800f1f36fSAlex Elder 	return -ENOMEM;
659602adf40SYehuda Sadeh }
660602adf40SYehuda Sadeh 
6618836b995SAlex Elder static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name)
662602adf40SYehuda Sadeh {
663602adf40SYehuda Sadeh 
664e86924a8SAlex Elder 	struct rbd_snap *snap;
66500f1f36fSAlex Elder 
666e86924a8SAlex Elder 	list_for_each_entry(snap, &rbd_dev->snaps, node) {
667e86924a8SAlex Elder 		if (!strcmp(snap_name, snap->name)) {
6680d7dbfceSAlex Elder 			rbd_dev->spec->snap_id = snap->id;
669e86924a8SAlex Elder 			rbd_dev->mapping.size = snap->size;
67034b13184SAlex Elder 			rbd_dev->mapping.features = snap->features;
67100f1f36fSAlex Elder 
672e86924a8SAlex Elder 			return 0;
673602adf40SYehuda Sadeh 		}
67400f1f36fSAlex Elder 	}
675e86924a8SAlex Elder 
67600f1f36fSAlex Elder 	return -ENOENT;
67700f1f36fSAlex Elder }
678602adf40SYehuda Sadeh 
679819d52bfSAlex Elder static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
680602adf40SYehuda Sadeh {
68178dc447dSAlex Elder 	int ret;
682602adf40SYehuda Sadeh 
6830d7dbfceSAlex Elder 	if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME,
684cc9d734cSJosh Durgin 		    sizeof (RBD_SNAP_HEAD_NAME))) {
6850d7dbfceSAlex Elder 		rbd_dev->spec->snap_id = CEPH_NOSNAP;
68699c1f08fSAlex Elder 		rbd_dev->mapping.size = rbd_dev->header.image_size;
68734b13184SAlex Elder 		rbd_dev->mapping.features = rbd_dev->header.features;
688e86924a8SAlex Elder 		ret = 0;
689602adf40SYehuda Sadeh 	} else {
6900d7dbfceSAlex Elder 		ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name);
691602adf40SYehuda Sadeh 		if (ret < 0)
692602adf40SYehuda Sadeh 			goto done;
693f84344f3SAlex Elder 		rbd_dev->mapping.read_only = true;
694602adf40SYehuda Sadeh 	}
695daba5fdbSAlex Elder 	rbd_dev->exists = true;
696602adf40SYehuda Sadeh done:
697602adf40SYehuda Sadeh 	return ret;
698602adf40SYehuda Sadeh }
699602adf40SYehuda Sadeh 
700602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header)
701602adf40SYehuda Sadeh {
702849b4260SAlex Elder 	kfree(header->object_prefix);
703d78fd7aeSAlex Elder 	header->object_prefix = NULL;
704602adf40SYehuda Sadeh 	kfree(header->snap_sizes);
705d78fd7aeSAlex Elder 	header->snap_sizes = NULL;
706849b4260SAlex Elder 	kfree(header->snap_names);
707d78fd7aeSAlex Elder 	header->snap_names = NULL;
708d1d25646SJosh Durgin 	ceph_put_snap_context(header->snapc);
709d78fd7aeSAlex Elder 	header->snapc = NULL;
710602adf40SYehuda Sadeh }
711602adf40SYehuda Sadeh 
71265ccfe21SAlex Elder static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
713602adf40SYehuda Sadeh {
71465ccfe21SAlex Elder 	char *name;
71565ccfe21SAlex Elder 	u64 segment;
71665ccfe21SAlex Elder 	int ret;
717602adf40SYehuda Sadeh 
71865ccfe21SAlex Elder 	name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
71965ccfe21SAlex Elder 	if (!name)
72065ccfe21SAlex Elder 		return NULL;
72165ccfe21SAlex Elder 	segment = offset >> rbd_dev->header.obj_order;
72265ccfe21SAlex Elder 	ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx",
72365ccfe21SAlex Elder 			rbd_dev->header.object_prefix, segment);
72465ccfe21SAlex Elder 	if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) {
72565ccfe21SAlex Elder 		pr_err("error formatting segment name for #%llu (%d)\n",
72665ccfe21SAlex Elder 			segment, ret);
72765ccfe21SAlex Elder 		kfree(name);
72865ccfe21SAlex Elder 		name = NULL;
72965ccfe21SAlex Elder 	}
730602adf40SYehuda Sadeh 
73165ccfe21SAlex Elder 	return name;
73265ccfe21SAlex Elder }
733602adf40SYehuda Sadeh 
73465ccfe21SAlex Elder static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
73565ccfe21SAlex Elder {
73665ccfe21SAlex Elder 	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
737602adf40SYehuda Sadeh 
73865ccfe21SAlex Elder 	return offset & (segment_size - 1);
73965ccfe21SAlex Elder }
74065ccfe21SAlex Elder 
74165ccfe21SAlex Elder static u64 rbd_segment_length(struct rbd_device *rbd_dev,
74265ccfe21SAlex Elder 				u64 offset, u64 length)
74365ccfe21SAlex Elder {
74465ccfe21SAlex Elder 	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
74565ccfe21SAlex Elder 
74665ccfe21SAlex Elder 	offset &= segment_size - 1;
74765ccfe21SAlex Elder 
748aafb230eSAlex Elder 	rbd_assert(length <= U64_MAX - offset);
74965ccfe21SAlex Elder 	if (offset + length > segment_size)
75065ccfe21SAlex Elder 		length = segment_size - offset;
75165ccfe21SAlex Elder 
75265ccfe21SAlex Elder 	return length;
753602adf40SYehuda Sadeh }
754602adf40SYehuda Sadeh 
7551fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header,
7561fec7093SYehuda Sadeh 				u64 ofs, u64 len)
7571fec7093SYehuda Sadeh {
758df111be6SAlex Elder 	u64 start_seg;
759df111be6SAlex Elder 	u64 end_seg;
760df111be6SAlex Elder 
761df111be6SAlex Elder 	if (!len)
762df111be6SAlex Elder 		return 0;
763df111be6SAlex Elder 	if (len - 1 > U64_MAX - ofs)
764df111be6SAlex Elder 		return -ERANGE;
765df111be6SAlex Elder 
766df111be6SAlex Elder 	start_seg = ofs >> header->obj_order;
767df111be6SAlex Elder 	end_seg = (ofs + len - 1) >> header->obj_order;
768df111be6SAlex Elder 
7691fec7093SYehuda Sadeh 	return end_seg - start_seg + 1;
7701fec7093SYehuda Sadeh }
7711fec7093SYehuda Sadeh 
772602adf40SYehuda Sadeh /*
773029bcbd8SJosh Durgin  * returns the size of an object in the image
774029bcbd8SJosh Durgin  */
775029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header)
776029bcbd8SJosh Durgin {
777029bcbd8SJosh Durgin 	return 1 << header->obj_order;
778029bcbd8SJosh Durgin }
779029bcbd8SJosh Durgin 
780029bcbd8SJosh Durgin /*
781602adf40SYehuda Sadeh  * bio helpers
782602adf40SYehuda Sadeh  */
783602adf40SYehuda Sadeh 
784602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain)
785602adf40SYehuda Sadeh {
786602adf40SYehuda Sadeh 	struct bio *tmp;
787602adf40SYehuda Sadeh 
788602adf40SYehuda Sadeh 	while (chain) {
789602adf40SYehuda Sadeh 		tmp = chain;
790602adf40SYehuda Sadeh 		chain = chain->bi_next;
791602adf40SYehuda Sadeh 		bio_put(tmp);
792602adf40SYehuda Sadeh 	}
793602adf40SYehuda Sadeh }
794602adf40SYehuda Sadeh 
795602adf40SYehuda Sadeh /*
796602adf40SYehuda Sadeh  * zeros a bio chain, starting at specific offset
797602adf40SYehuda Sadeh  */
798602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs)
799602adf40SYehuda Sadeh {
800602adf40SYehuda Sadeh 	struct bio_vec *bv;
801602adf40SYehuda Sadeh 	unsigned long flags;
802602adf40SYehuda Sadeh 	void *buf;
803602adf40SYehuda Sadeh 	int i;
804602adf40SYehuda Sadeh 	int pos = 0;
805602adf40SYehuda Sadeh 
806602adf40SYehuda Sadeh 	while (chain) {
807602adf40SYehuda Sadeh 		bio_for_each_segment(bv, chain, i) {
808602adf40SYehuda Sadeh 			if (pos + bv->bv_len > start_ofs) {
809602adf40SYehuda Sadeh 				int remainder = max(start_ofs - pos, 0);
810602adf40SYehuda Sadeh 				buf = bvec_kmap_irq(bv, &flags);
811602adf40SYehuda Sadeh 				memset(buf + remainder, 0,
812602adf40SYehuda Sadeh 				       bv->bv_len - remainder);
81385b5aaa6SDan Carpenter 				bvec_kunmap_irq(buf, &flags);
814602adf40SYehuda Sadeh 			}
815602adf40SYehuda Sadeh 			pos += bv->bv_len;
816602adf40SYehuda Sadeh 		}
817602adf40SYehuda Sadeh 
818602adf40SYehuda Sadeh 		chain = chain->bi_next;
819602adf40SYehuda Sadeh 	}
820602adf40SYehuda Sadeh }
821602adf40SYehuda Sadeh 
822602adf40SYehuda Sadeh /*
823f7760dadSAlex Elder  * Clone a portion of a bio, starting at the given byte offset
824f7760dadSAlex Elder  * and continuing for the number of bytes indicated.
825602adf40SYehuda Sadeh  */
826f7760dadSAlex Elder static struct bio *bio_clone_range(struct bio *bio_src,
827f7760dadSAlex Elder 					unsigned int offset,
828f7760dadSAlex Elder 					unsigned int len,
829f7760dadSAlex Elder 					gfp_t gfpmask)
830602adf40SYehuda Sadeh {
831f7760dadSAlex Elder 	struct bio_vec *bv;
832f7760dadSAlex Elder 	unsigned int resid;
833f7760dadSAlex Elder 	unsigned short idx;
834f7760dadSAlex Elder 	unsigned int voff;
835f7760dadSAlex Elder 	unsigned short end_idx;
836f7760dadSAlex Elder 	unsigned short vcnt;
837f7760dadSAlex Elder 	struct bio *bio;
838602adf40SYehuda Sadeh 
839f7760dadSAlex Elder 	/* Handle the easy case for the caller */
840f7760dadSAlex Elder 
841f7760dadSAlex Elder 	if (!offset && len == bio_src->bi_size)
842f7760dadSAlex Elder 		return bio_clone(bio_src, gfpmask);
843f7760dadSAlex Elder 
844f7760dadSAlex Elder 	if (WARN_ON_ONCE(!len))
845f7760dadSAlex Elder 		return NULL;
846f7760dadSAlex Elder 	if (WARN_ON_ONCE(len > bio_src->bi_size))
847f7760dadSAlex Elder 		return NULL;
848f7760dadSAlex Elder 	if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
849f7760dadSAlex Elder 		return NULL;
850f7760dadSAlex Elder 
851f7760dadSAlex Elder 	/* Find first affected segment... */
852f7760dadSAlex Elder 
853f7760dadSAlex Elder 	resid = offset;
854f7760dadSAlex Elder 	__bio_for_each_segment(bv, bio_src, idx, 0) {
855f7760dadSAlex Elder 		if (resid < bv->bv_len)
856f7760dadSAlex Elder 			break;
857f7760dadSAlex Elder 		resid -= bv->bv_len;
858602adf40SYehuda Sadeh 	}
859f7760dadSAlex Elder 	voff = resid;
860602adf40SYehuda Sadeh 
861f7760dadSAlex Elder 	/* ...and the last affected segment */
862542582fcSAlex Elder 
863f7760dadSAlex Elder 	resid += len;
864f7760dadSAlex Elder 	__bio_for_each_segment(bv, bio_src, end_idx, idx) {
865f7760dadSAlex Elder 		if (resid <= bv->bv_len)
866f7760dadSAlex Elder 			break;
867f7760dadSAlex Elder 		resid -= bv->bv_len;
868f7760dadSAlex Elder 	}
869f7760dadSAlex Elder 	vcnt = end_idx - idx + 1;
870602adf40SYehuda Sadeh 
871f7760dadSAlex Elder 	/* Build the clone */
872f7760dadSAlex Elder 
873f7760dadSAlex Elder 	bio = bio_alloc(gfpmask, (unsigned int) vcnt);
874f7760dadSAlex Elder 	if (!bio)
875f7760dadSAlex Elder 		return NULL;	/* ENOMEM */
876f7760dadSAlex Elder 
877f7760dadSAlex Elder 	bio->bi_bdev = bio_src->bi_bdev;
878f7760dadSAlex Elder 	bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT);
879f7760dadSAlex Elder 	bio->bi_rw = bio_src->bi_rw;
880f7760dadSAlex Elder 	bio->bi_flags |= 1 << BIO_CLONED;
881602adf40SYehuda Sadeh 
882602adf40SYehuda Sadeh 	/*
883f7760dadSAlex Elder 	 * Copy over our part of the bio_vec, then update the first
884f7760dadSAlex Elder 	 * and last (or only) entries.
885602adf40SYehuda Sadeh 	 */
886f7760dadSAlex Elder 	memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
887f7760dadSAlex Elder 			vcnt * sizeof (struct bio_vec));
888f7760dadSAlex Elder 	bio->bi_io_vec[0].bv_offset += voff;
889f7760dadSAlex Elder 	if (vcnt > 1) {
890f7760dadSAlex Elder 		bio->bi_io_vec[0].bv_len -= voff;
891f7760dadSAlex Elder 		bio->bi_io_vec[vcnt - 1].bv_len = resid;
892602adf40SYehuda Sadeh 	} else {
893f7760dadSAlex Elder 		bio->bi_io_vec[0].bv_len = len;
894602adf40SYehuda Sadeh 	}
895602adf40SYehuda Sadeh 
896f7760dadSAlex Elder 	bio->bi_vcnt = vcnt;
897f7760dadSAlex Elder 	bio->bi_size = len;
898f7760dadSAlex Elder 	bio->bi_idx = 0;
899602adf40SYehuda Sadeh 
900f7760dadSAlex Elder 	return bio;
901602adf40SYehuda Sadeh }
902602adf40SYehuda Sadeh 
903f7760dadSAlex Elder /*
904f7760dadSAlex Elder  * Clone a portion of a bio chain, starting at the given byte offset
905f7760dadSAlex Elder  * into the first bio in the source chain and continuing for the
906f7760dadSAlex Elder  * number of bytes indicated.  The result is another bio chain of
907f7760dadSAlex Elder  * exactly the given length, or a null pointer on error.
908f7760dadSAlex Elder  *
909f7760dadSAlex Elder  * The bio_src and offset parameters are both in-out.  On entry they
910f7760dadSAlex Elder  * refer to the first source bio and the offset into that bio where
911f7760dadSAlex Elder  * the start of data to be cloned is located.
912f7760dadSAlex Elder  *
913f7760dadSAlex Elder  * On return, bio_src is updated to refer to the bio in the source
914f7760dadSAlex Elder  * chain that contains first un-cloned byte, and *offset will
915f7760dadSAlex Elder  * contain the offset of that byte within that bio.
916f7760dadSAlex Elder  */
917f7760dadSAlex Elder static struct bio *bio_chain_clone_range(struct bio **bio_src,
918f7760dadSAlex Elder 					unsigned int *offset,
919f7760dadSAlex Elder 					unsigned int len,
920f7760dadSAlex Elder 					gfp_t gfpmask)
921f7760dadSAlex Elder {
922f7760dadSAlex Elder 	struct bio *bi = *bio_src;
923f7760dadSAlex Elder 	unsigned int off = *offset;
924f7760dadSAlex Elder 	struct bio *chain = NULL;
925f7760dadSAlex Elder 	struct bio **end;
926602adf40SYehuda Sadeh 
927f7760dadSAlex Elder 	/* Build up a chain of clone bios up to the limit */
928602adf40SYehuda Sadeh 
929f7760dadSAlex Elder 	if (!bi || off >= bi->bi_size || !len)
930f7760dadSAlex Elder 		return NULL;		/* Nothing to clone */
931602adf40SYehuda Sadeh 
932f7760dadSAlex Elder 	end = &chain;
933f7760dadSAlex Elder 	while (len) {
934f7760dadSAlex Elder 		unsigned int bi_size;
935f7760dadSAlex Elder 		struct bio *bio;
936f7760dadSAlex Elder 
937f7760dadSAlex Elder 		if (!bi)
938f7760dadSAlex Elder 			goto out_err;	/* EINVAL; ran out of bio's */
939f7760dadSAlex Elder 		bi_size = min_t(unsigned int, bi->bi_size - off, len);
940f7760dadSAlex Elder 		bio = bio_clone_range(bi, off, bi_size, gfpmask);
941f7760dadSAlex Elder 		if (!bio)
942f7760dadSAlex Elder 			goto out_err;	/* ENOMEM */
943f7760dadSAlex Elder 
944f7760dadSAlex Elder 		*end = bio;
945f7760dadSAlex Elder 		end = &bio->bi_next;
946f7760dadSAlex Elder 
947f7760dadSAlex Elder 		off += bi_size;
948f7760dadSAlex Elder 		if (off == bi->bi_size) {
949f7760dadSAlex Elder 			bi = bi->bi_next;
950f7760dadSAlex Elder 			off = 0;
951f7760dadSAlex Elder 		}
952f7760dadSAlex Elder 		len -= bi_size;
953f7760dadSAlex Elder 	}
954f7760dadSAlex Elder 	*bio_src = bi;
955f7760dadSAlex Elder 	*offset = off;
956f7760dadSAlex Elder 
957f7760dadSAlex Elder 	return chain;
958f7760dadSAlex Elder out_err:
959f7760dadSAlex Elder 	bio_chain_put(chain);
960f7760dadSAlex Elder 
961602adf40SYehuda Sadeh 	return NULL;
962602adf40SYehuda Sadeh }
963602adf40SYehuda Sadeh 
964602adf40SYehuda Sadeh /*
965602adf40SYehuda Sadeh  * helpers for osd request op vectors.
966602adf40SYehuda Sadeh  */
96757cfc106SAlex Elder static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops,
96857cfc106SAlex Elder 					int opcode, u32 payload_len)
969602adf40SYehuda Sadeh {
97057cfc106SAlex Elder 	struct ceph_osd_req_op *ops;
97157cfc106SAlex Elder 
97257cfc106SAlex Elder 	ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO);
97357cfc106SAlex Elder 	if (!ops)
97457cfc106SAlex Elder 		return NULL;
97557cfc106SAlex Elder 
97657cfc106SAlex Elder 	ops[0].op = opcode;
97757cfc106SAlex Elder 
978602adf40SYehuda Sadeh 	/*
979602adf40SYehuda Sadeh 	 * op extent offset and length will be set later on
980602adf40SYehuda Sadeh 	 * in calc_raw_layout()
981602adf40SYehuda Sadeh 	 */
98257cfc106SAlex Elder 	ops[0].payload_len = payload_len;
98357cfc106SAlex Elder 
98457cfc106SAlex Elder 	return ops;
985602adf40SYehuda Sadeh }
986602adf40SYehuda Sadeh 
987602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
988602adf40SYehuda Sadeh {
989602adf40SYehuda Sadeh 	kfree(ops);
990602adf40SYehuda Sadeh }
991602adf40SYehuda Sadeh 
9921fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq,
9931fec7093SYehuda Sadeh 				   struct rbd_req_coll *coll,
9941fec7093SYehuda Sadeh 				   int index,
9951fec7093SYehuda Sadeh 				   int ret, u64 len)
9961fec7093SYehuda Sadeh {
9971fec7093SYehuda Sadeh 	struct request_queue *q;
9981fec7093SYehuda Sadeh 	int min, max, i;
9991fec7093SYehuda Sadeh 
1000bd919d45SAlex Elder 	dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n",
1001bd919d45SAlex Elder 	     coll, index, ret, (unsigned long long) len);
10021fec7093SYehuda Sadeh 
10031fec7093SYehuda Sadeh 	if (!rq)
10041fec7093SYehuda Sadeh 		return;
10051fec7093SYehuda Sadeh 
10061fec7093SYehuda Sadeh 	if (!coll) {
10071fec7093SYehuda Sadeh 		blk_end_request(rq, ret, len);
10081fec7093SYehuda Sadeh 		return;
10091fec7093SYehuda Sadeh 	}
10101fec7093SYehuda Sadeh 
10111fec7093SYehuda Sadeh 	q = rq->q;
10121fec7093SYehuda Sadeh 
10131fec7093SYehuda Sadeh 	spin_lock_irq(q->queue_lock);
10141fec7093SYehuda Sadeh 	coll->status[index].done = 1;
10151fec7093SYehuda Sadeh 	coll->status[index].rc = ret;
10161fec7093SYehuda Sadeh 	coll->status[index].bytes = len;
10171fec7093SYehuda Sadeh 	max = min = coll->num_done;
10181fec7093SYehuda Sadeh 	while (max < coll->total && coll->status[max].done)
10191fec7093SYehuda Sadeh 		max++;
10201fec7093SYehuda Sadeh 
10211fec7093SYehuda Sadeh 	for (i = min; i<max; i++) {
10221fec7093SYehuda Sadeh 		__blk_end_request(rq, coll->status[i].rc,
10231fec7093SYehuda Sadeh 				  coll->status[i].bytes);
10241fec7093SYehuda Sadeh 		coll->num_done++;
10251fec7093SYehuda Sadeh 		kref_put(&coll->kref, rbd_coll_release);
10261fec7093SYehuda Sadeh 	}
10271fec7093SYehuda Sadeh 	spin_unlock_irq(q->queue_lock);
10281fec7093SYehuda Sadeh }
10291fec7093SYehuda Sadeh 
10301fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req,
10311fec7093SYehuda Sadeh 			     int ret, u64 len)
10321fec7093SYehuda Sadeh {
10331fec7093SYehuda Sadeh 	rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
10341fec7093SYehuda Sadeh }
10351fec7093SYehuda Sadeh 
1036602adf40SYehuda Sadeh /*
1037602adf40SYehuda Sadeh  * Send ceph osd request
1038602adf40SYehuda Sadeh  */
1039602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq,
10400ce1a794SAlex Elder 			  struct rbd_device *rbd_dev,
1041602adf40SYehuda Sadeh 			  struct ceph_snap_context *snapc,
1042602adf40SYehuda Sadeh 			  u64 snapid,
1043aded07eaSAlex Elder 			  const char *object_name, u64 ofs, u64 len,
1044602adf40SYehuda Sadeh 			  struct bio *bio,
1045602adf40SYehuda Sadeh 			  struct page **pages,
1046602adf40SYehuda Sadeh 			  int num_pages,
1047602adf40SYehuda Sadeh 			  int flags,
1048602adf40SYehuda Sadeh 			  struct ceph_osd_req_op *ops,
10491fec7093SYehuda Sadeh 			  struct rbd_req_coll *coll,
10501fec7093SYehuda Sadeh 			  int coll_index,
1051602adf40SYehuda Sadeh 			  void (*rbd_cb)(struct ceph_osd_request *req,
105259c2be1eSYehuda Sadeh 					 struct ceph_msg *msg),
105359c2be1eSYehuda Sadeh 			  struct ceph_osd_request **linger_req,
105459c2be1eSYehuda Sadeh 			  u64 *ver)
1055602adf40SYehuda Sadeh {
1056602adf40SYehuda Sadeh 	struct ceph_osd_request *req;
1057602adf40SYehuda Sadeh 	struct ceph_file_layout *layout;
1058602adf40SYehuda Sadeh 	int ret;
1059602adf40SYehuda Sadeh 	u64 bno;
1060602adf40SYehuda Sadeh 	struct timespec mtime = CURRENT_TIME;
1061602adf40SYehuda Sadeh 	struct rbd_request *req_data;
1062602adf40SYehuda Sadeh 	struct ceph_osd_request_head *reqhead;
10631dbb4399SAlex Elder 	struct ceph_osd_client *osdc;
1064602adf40SYehuda Sadeh 
1065602adf40SYehuda Sadeh 	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
10661fec7093SYehuda Sadeh 	if (!req_data) {
10671fec7093SYehuda Sadeh 		if (coll)
10681fec7093SYehuda Sadeh 			rbd_coll_end_req_index(rq, coll, coll_index,
10691fec7093SYehuda Sadeh 					       -ENOMEM, len);
10701fec7093SYehuda Sadeh 		return -ENOMEM;
10711fec7093SYehuda Sadeh 	}
1072602adf40SYehuda Sadeh 
10731fec7093SYehuda Sadeh 	if (coll) {
10741fec7093SYehuda Sadeh 		req_data->coll = coll;
10751fec7093SYehuda Sadeh 		req_data->coll_index = coll_index;
10761fec7093SYehuda Sadeh 	}
10771fec7093SYehuda Sadeh 
1078f7760dadSAlex Elder 	dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n",
1079f7760dadSAlex Elder 		object_name, (unsigned long long) ofs,
1080f7760dadSAlex Elder 		(unsigned long long) len, coll, coll_index);
1081602adf40SYehuda Sadeh 
10820ce1a794SAlex Elder 	osdc = &rbd_dev->rbd_client->client->osdc;
10831dbb4399SAlex Elder 	req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
10841dbb4399SAlex Elder 					false, GFP_NOIO, pages, bio);
10854ad12621SSage Weil 	if (!req) {
10864ad12621SSage Weil 		ret = -ENOMEM;
1087602adf40SYehuda Sadeh 		goto done_pages;
1088602adf40SYehuda Sadeh 	}
1089602adf40SYehuda Sadeh 
1090602adf40SYehuda Sadeh 	req->r_callback = rbd_cb;
1091602adf40SYehuda Sadeh 
1092602adf40SYehuda Sadeh 	req_data->rq = rq;
1093602adf40SYehuda Sadeh 	req_data->bio = bio;
1094602adf40SYehuda Sadeh 	req_data->pages = pages;
1095602adf40SYehuda Sadeh 	req_data->len = len;
1096602adf40SYehuda Sadeh 
1097602adf40SYehuda Sadeh 	req->r_priv = req_data;
1098602adf40SYehuda Sadeh 
1099602adf40SYehuda Sadeh 	reqhead = req->r_request->front.iov_base;
1100602adf40SYehuda Sadeh 	reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
1101602adf40SYehuda Sadeh 
1102aded07eaSAlex Elder 	strncpy(req->r_oid, object_name, sizeof(req->r_oid));
1103602adf40SYehuda Sadeh 	req->r_oid_len = strlen(req->r_oid);
1104602adf40SYehuda Sadeh 
1105602adf40SYehuda Sadeh 	layout = &req->r_file_layout;
1106602adf40SYehuda Sadeh 	memset(layout, 0, sizeof(*layout));
1107602adf40SYehuda Sadeh 	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
1108602adf40SYehuda Sadeh 	layout->fl_stripe_count = cpu_to_le32(1);
1109602adf40SYehuda Sadeh 	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
11100d7dbfceSAlex Elder 	layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id);
11116cae3717SSage Weil 	ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
11121dbb4399SAlex Elder 				   req, ops);
11136cae3717SSage Weil 	rbd_assert(ret == 0);
1114602adf40SYehuda Sadeh 
1115602adf40SYehuda Sadeh 	ceph_osdc_build_request(req, ofs, &len,
1116602adf40SYehuda Sadeh 				ops,
1117602adf40SYehuda Sadeh 				snapc,
1118602adf40SYehuda Sadeh 				&mtime,
1119602adf40SYehuda Sadeh 				req->r_oid, req->r_oid_len);
1120602adf40SYehuda Sadeh 
112159c2be1eSYehuda Sadeh 	if (linger_req) {
11221dbb4399SAlex Elder 		ceph_osdc_set_request_linger(osdc, req);
112359c2be1eSYehuda Sadeh 		*linger_req = req;
112459c2be1eSYehuda Sadeh 	}
112559c2be1eSYehuda Sadeh 
11261dbb4399SAlex Elder 	ret = ceph_osdc_start_request(osdc, req, false);
1127602adf40SYehuda Sadeh 	if (ret < 0)
1128602adf40SYehuda Sadeh 		goto done_err;
1129602adf40SYehuda Sadeh 
1130602adf40SYehuda Sadeh 	if (!rbd_cb) {
11311dbb4399SAlex Elder 		ret = ceph_osdc_wait_request(osdc, req);
113259c2be1eSYehuda Sadeh 		if (ver)
113359c2be1eSYehuda Sadeh 			*ver = le64_to_cpu(req->r_reassert_version.version);
1134bd919d45SAlex Elder 		dout("reassert_ver=%llu\n",
1135bd919d45SAlex Elder 			(unsigned long long)
11361fec7093SYehuda Sadeh 				le64_to_cpu(req->r_reassert_version.version));
1137602adf40SYehuda Sadeh 		ceph_osdc_put_request(req);
1138602adf40SYehuda Sadeh 	}
1139602adf40SYehuda Sadeh 	return ret;
1140602adf40SYehuda Sadeh 
1141602adf40SYehuda Sadeh done_err:
1142602adf40SYehuda Sadeh 	bio_chain_put(req_data->bio);
1143602adf40SYehuda Sadeh 	ceph_osdc_put_request(req);
1144602adf40SYehuda Sadeh done_pages:
11451fec7093SYehuda Sadeh 	rbd_coll_end_req(req_data, ret, len);
1146602adf40SYehuda Sadeh 	kfree(req_data);
1147602adf40SYehuda Sadeh 	return ret;
1148602adf40SYehuda Sadeh }
1149602adf40SYehuda Sadeh 
1150602adf40SYehuda Sadeh /*
1151602adf40SYehuda Sadeh  * Ceph osd op callback
1152602adf40SYehuda Sadeh  */
1153602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
1154602adf40SYehuda Sadeh {
1155602adf40SYehuda Sadeh 	struct rbd_request *req_data = req->r_priv;
1156602adf40SYehuda Sadeh 	struct ceph_osd_reply_head *replyhead;
1157602adf40SYehuda Sadeh 	struct ceph_osd_op *op;
1158602adf40SYehuda Sadeh 	__s32 rc;
1159602adf40SYehuda Sadeh 	u64 bytes;
1160602adf40SYehuda Sadeh 	int read_op;
1161602adf40SYehuda Sadeh 
1162602adf40SYehuda Sadeh 	/* parse reply */
1163602adf40SYehuda Sadeh 	replyhead = msg->front.iov_base;
1164602adf40SYehuda Sadeh 	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
1165602adf40SYehuda Sadeh 	op = (void *)(replyhead + 1);
1166602adf40SYehuda Sadeh 	rc = le32_to_cpu(replyhead->result);
1167602adf40SYehuda Sadeh 	bytes = le64_to_cpu(op->extent.length);
1168895cfcc8SDan Carpenter 	read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
1169602adf40SYehuda Sadeh 
1170bd919d45SAlex Elder 	dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n",
1171bd919d45SAlex Elder 		(unsigned long long) bytes, read_op, (int) rc);
1172602adf40SYehuda Sadeh 
1173602adf40SYehuda Sadeh 	if (rc == -ENOENT && read_op) {
1174602adf40SYehuda Sadeh 		zero_bio_chain(req_data->bio, 0);
1175602adf40SYehuda Sadeh 		rc = 0;
1176602adf40SYehuda Sadeh 	} else if (rc == 0 && read_op && bytes < req_data->len) {
1177602adf40SYehuda Sadeh 		zero_bio_chain(req_data->bio, bytes);
1178602adf40SYehuda Sadeh 		bytes = req_data->len;
1179602adf40SYehuda Sadeh 	}
1180602adf40SYehuda Sadeh 
11811fec7093SYehuda Sadeh 	rbd_coll_end_req(req_data, rc, bytes);
1182602adf40SYehuda Sadeh 
1183602adf40SYehuda Sadeh 	if (req_data->bio)
1184602adf40SYehuda Sadeh 		bio_chain_put(req_data->bio);
1185602adf40SYehuda Sadeh 
1186602adf40SYehuda Sadeh 	ceph_osdc_put_request(req);
1187602adf40SYehuda Sadeh 	kfree(req_data);
1188602adf40SYehuda Sadeh }
1189602adf40SYehuda Sadeh 
119059c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
119159c2be1eSYehuda Sadeh {
119259c2be1eSYehuda Sadeh 	ceph_osdc_put_request(req);
119359c2be1eSYehuda Sadeh }
119459c2be1eSYehuda Sadeh 
1195602adf40SYehuda Sadeh /*
1196602adf40SYehuda Sadeh  * Do a synchronous ceph osd operation
1197602adf40SYehuda Sadeh  */
11980ce1a794SAlex Elder static int rbd_req_sync_op(struct rbd_device *rbd_dev,
1199602adf40SYehuda Sadeh 			   struct ceph_snap_context *snapc,
1200602adf40SYehuda Sadeh 			   u64 snapid,
1201602adf40SYehuda Sadeh 			   int flags,
1202913d2fdcSAlex Elder 			   struct ceph_osd_req_op *ops,
1203aded07eaSAlex Elder 			   const char *object_name,
1204f8d4de6eSAlex Elder 			   u64 ofs, u64 inbound_size,
1205f8d4de6eSAlex Elder 			   char *inbound,
120659c2be1eSYehuda Sadeh 			   struct ceph_osd_request **linger_req,
120759c2be1eSYehuda Sadeh 			   u64 *ver)
1208602adf40SYehuda Sadeh {
1209602adf40SYehuda Sadeh 	int ret;
1210602adf40SYehuda Sadeh 	struct page **pages;
1211602adf40SYehuda Sadeh 	int num_pages;
1212913d2fdcSAlex Elder 
1213aafb230eSAlex Elder 	rbd_assert(ops != NULL);
1214602adf40SYehuda Sadeh 
1215f8d4de6eSAlex Elder 	num_pages = calc_pages_for(ofs, inbound_size);
1216602adf40SYehuda Sadeh 	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
1217b8d0638aSDan Carpenter 	if (IS_ERR(pages))
1218b8d0638aSDan Carpenter 		return PTR_ERR(pages);
1219602adf40SYehuda Sadeh 
12200ce1a794SAlex Elder 	ret = rbd_do_request(NULL, rbd_dev, snapc, snapid,
1221f8d4de6eSAlex Elder 			  object_name, ofs, inbound_size, NULL,
1222602adf40SYehuda Sadeh 			  pages, num_pages,
1223602adf40SYehuda Sadeh 			  flags,
1224602adf40SYehuda Sadeh 			  ops,
12251fec7093SYehuda Sadeh 			  NULL, 0,
122659c2be1eSYehuda Sadeh 			  NULL,
122759c2be1eSYehuda Sadeh 			  linger_req, ver);
1228602adf40SYehuda Sadeh 	if (ret < 0)
1229913d2fdcSAlex Elder 		goto done;
1230602adf40SYehuda Sadeh 
1231f8d4de6eSAlex Elder 	if ((flags & CEPH_OSD_FLAG_READ) && inbound)
1232f8d4de6eSAlex Elder 		ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret);
1233602adf40SYehuda Sadeh 
1234602adf40SYehuda Sadeh done:
1235602adf40SYehuda Sadeh 	ceph_release_page_vector(pages, num_pages);
1236602adf40SYehuda Sadeh 	return ret;
1237602adf40SYehuda Sadeh }
1238602adf40SYehuda Sadeh 
1239602adf40SYehuda Sadeh /*
1240602adf40SYehuda Sadeh  * Do an asynchronous ceph osd operation
1241602adf40SYehuda Sadeh  */
1242602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq,
1243602adf40SYehuda Sadeh 		     struct rbd_device *rbd_dev,
1244602adf40SYehuda Sadeh 		     struct ceph_snap_context *snapc,
1245602adf40SYehuda Sadeh 		     u64 ofs, u64 len,
12461fec7093SYehuda Sadeh 		     struct bio *bio,
12471fec7093SYehuda Sadeh 		     struct rbd_req_coll *coll,
12481fec7093SYehuda Sadeh 		     int coll_index)
1249602adf40SYehuda Sadeh {
1250602adf40SYehuda Sadeh 	char *seg_name;
1251602adf40SYehuda Sadeh 	u64 seg_ofs;
1252602adf40SYehuda Sadeh 	u64 seg_len;
1253602adf40SYehuda Sadeh 	int ret;
1254602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops;
1255602adf40SYehuda Sadeh 	u32 payload_len;
1256ff2e4bb5SAlex Elder 	int opcode;
1257ff2e4bb5SAlex Elder 	int flags;
12584634246dSAlex Elder 	u64 snapid;
1259602adf40SYehuda Sadeh 
126065ccfe21SAlex Elder 	seg_name = rbd_segment_name(rbd_dev, ofs);
1261602adf40SYehuda Sadeh 	if (!seg_name)
1262602adf40SYehuda Sadeh 		return -ENOMEM;
126365ccfe21SAlex Elder 	seg_len = rbd_segment_length(rbd_dev, ofs, len);
126465ccfe21SAlex Elder 	seg_ofs = rbd_segment_offset(rbd_dev, ofs);
1265602adf40SYehuda Sadeh 
1266ff2e4bb5SAlex Elder 	if (rq_data_dir(rq) == WRITE) {
1267ff2e4bb5SAlex Elder 		opcode = CEPH_OSD_OP_WRITE;
1268ff2e4bb5SAlex Elder 		flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK;
12694634246dSAlex Elder 		snapid = CEPH_NOSNAP;
1270ff2e4bb5SAlex Elder 		payload_len = seg_len;
1271ff2e4bb5SAlex Elder 	} else {
1272ff2e4bb5SAlex Elder 		opcode = CEPH_OSD_OP_READ;
1273ff2e4bb5SAlex Elder 		flags = CEPH_OSD_FLAG_READ;
12744634246dSAlex Elder 		snapc = NULL;
12750d7dbfceSAlex Elder 		snapid = rbd_dev->spec->snap_id;
1276ff2e4bb5SAlex Elder 		payload_len = 0;
1277ff2e4bb5SAlex Elder 	}
1278602adf40SYehuda Sadeh 
127957cfc106SAlex Elder 	ret = -ENOMEM;
128057cfc106SAlex Elder 	ops = rbd_create_rw_ops(1, opcode, payload_len);
128157cfc106SAlex Elder 	if (!ops)
1282602adf40SYehuda Sadeh 		goto done;
1283602adf40SYehuda Sadeh 
1284602adf40SYehuda Sadeh 	/* we've taken care of segment sizes earlier when we
1285602adf40SYehuda Sadeh 	   cloned the bios. We should never have a segment
1286602adf40SYehuda Sadeh 	   truncated at this point */
1287aafb230eSAlex Elder 	rbd_assert(seg_len == len);
1288602adf40SYehuda Sadeh 
1289602adf40SYehuda Sadeh 	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
1290602adf40SYehuda Sadeh 			     seg_name, seg_ofs, seg_len,
1291602adf40SYehuda Sadeh 			     bio,
1292602adf40SYehuda Sadeh 			     NULL, 0,
1293602adf40SYehuda Sadeh 			     flags,
1294602adf40SYehuda Sadeh 			     ops,
12951fec7093SYehuda Sadeh 			     coll, coll_index,
129659c2be1eSYehuda Sadeh 			     rbd_req_cb, 0, NULL);
129711f77002SSage Weil 
129811f77002SSage Weil 	rbd_destroy_ops(ops);
1299602adf40SYehuda Sadeh done:
1300602adf40SYehuda Sadeh 	kfree(seg_name);
1301602adf40SYehuda Sadeh 	return ret;
1302602adf40SYehuda Sadeh }
1303602adf40SYehuda Sadeh 
1304602adf40SYehuda Sadeh /*
1305602adf40SYehuda Sadeh  * Request sync osd read
1306602adf40SYehuda Sadeh  */
13070ce1a794SAlex Elder static int rbd_req_sync_read(struct rbd_device *rbd_dev,
1308602adf40SYehuda Sadeh 			  u64 snapid,
1309aded07eaSAlex Elder 			  const char *object_name,
1310602adf40SYehuda Sadeh 			  u64 ofs, u64 len,
131159c2be1eSYehuda Sadeh 			  char *buf,
131259c2be1eSYehuda Sadeh 			  u64 *ver)
1313602adf40SYehuda Sadeh {
1314913d2fdcSAlex Elder 	struct ceph_osd_req_op *ops;
1315913d2fdcSAlex Elder 	int ret;
1316913d2fdcSAlex Elder 
1317913d2fdcSAlex Elder 	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0);
1318913d2fdcSAlex Elder 	if (!ops)
1319913d2fdcSAlex Elder 		return -ENOMEM;
1320913d2fdcSAlex Elder 
1321913d2fdcSAlex Elder 	ret = rbd_req_sync_op(rbd_dev, NULL,
1322b06e6a6bSJosh Durgin 			       snapid,
1323602adf40SYehuda Sadeh 			       CEPH_OSD_FLAG_READ,
1324913d2fdcSAlex Elder 			       ops, object_name, ofs, len, buf, NULL, ver);
1325913d2fdcSAlex Elder 	rbd_destroy_ops(ops);
1326913d2fdcSAlex Elder 
1327913d2fdcSAlex Elder 	return ret;
1328602adf40SYehuda Sadeh }
1329602adf40SYehuda Sadeh 
1330602adf40SYehuda Sadeh /*
133159c2be1eSYehuda Sadeh  * Request sync osd watch
133259c2be1eSYehuda Sadeh  */
13330ce1a794SAlex Elder static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev,
133459c2be1eSYehuda Sadeh 				   u64 ver,
13357f0a24d8SAlex Elder 				   u64 notify_id)
133659c2be1eSYehuda Sadeh {
133759c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
133811f77002SSage Weil 	int ret;
133911f77002SSage Weil 
134057cfc106SAlex Elder 	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0);
134157cfc106SAlex Elder 	if (!ops)
134257cfc106SAlex Elder 		return -ENOMEM;
134359c2be1eSYehuda Sadeh 
1344a71b891bSJosh Durgin 	ops[0].watch.ver = cpu_to_le64(ver);
134559c2be1eSYehuda Sadeh 	ops[0].watch.cookie = notify_id;
134659c2be1eSYehuda Sadeh 	ops[0].watch.flag = 0;
134759c2be1eSYehuda Sadeh 
13480ce1a794SAlex Elder 	ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP,
13497f0a24d8SAlex Elder 			  rbd_dev->header_name, 0, 0, NULL,
1350ad4f232fSAlex Elder 			  NULL, 0,
135159c2be1eSYehuda Sadeh 			  CEPH_OSD_FLAG_READ,
135259c2be1eSYehuda Sadeh 			  ops,
13531fec7093SYehuda Sadeh 			  NULL, 0,
135459c2be1eSYehuda Sadeh 			  rbd_simple_req_cb, 0, NULL);
135559c2be1eSYehuda Sadeh 
135659c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
135759c2be1eSYehuda Sadeh 	return ret;
135859c2be1eSYehuda Sadeh }
135959c2be1eSYehuda Sadeh 
136059c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
136159c2be1eSYehuda Sadeh {
13620ce1a794SAlex Elder 	struct rbd_device *rbd_dev = (struct rbd_device *)data;
1363a71b891bSJosh Durgin 	u64 hver;
136413143d2dSSage Weil 	int rc;
136513143d2dSSage Weil 
13660ce1a794SAlex Elder 	if (!rbd_dev)
136759c2be1eSYehuda Sadeh 		return;
136859c2be1eSYehuda Sadeh 
1369bd919d45SAlex Elder 	dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n",
1370bd919d45SAlex Elder 		rbd_dev->header_name, (unsigned long long) notify_id,
1371bd919d45SAlex Elder 		(unsigned int) opcode);
1372117973fbSAlex Elder 	rc = rbd_dev_refresh(rbd_dev, &hver);
137313143d2dSSage Weil 	if (rc)
1374f0f8cef5SAlex Elder 		pr_warning(RBD_DRV_NAME "%d got notification but failed to "
13750ce1a794SAlex Elder 			   " update snaps: %d\n", rbd_dev->major, rc);
137659c2be1eSYehuda Sadeh 
13777f0a24d8SAlex Elder 	rbd_req_sync_notify_ack(rbd_dev, hver, notify_id);
137859c2be1eSYehuda Sadeh }
137959c2be1eSYehuda Sadeh 
138059c2be1eSYehuda Sadeh /*
138159c2be1eSYehuda Sadeh  * Request sync osd watch
138259c2be1eSYehuda Sadeh  */
13830e6f322dSAlex Elder static int rbd_req_sync_watch(struct rbd_device *rbd_dev)
138459c2be1eSYehuda Sadeh {
138559c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
13860ce1a794SAlex Elder 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
138757cfc106SAlex Elder 	int ret;
138859c2be1eSYehuda Sadeh 
138957cfc106SAlex Elder 	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
139057cfc106SAlex Elder 	if (!ops)
139157cfc106SAlex Elder 		return -ENOMEM;
139259c2be1eSYehuda Sadeh 
139359c2be1eSYehuda Sadeh 	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
13940ce1a794SAlex Elder 				     (void *)rbd_dev, &rbd_dev->watch_event);
139559c2be1eSYehuda Sadeh 	if (ret < 0)
139659c2be1eSYehuda Sadeh 		goto fail;
139759c2be1eSYehuda Sadeh 
13980e6f322dSAlex Elder 	ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version);
13990ce1a794SAlex Elder 	ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
140059c2be1eSYehuda Sadeh 	ops[0].watch.flag = 1;
140159c2be1eSYehuda Sadeh 
14020ce1a794SAlex Elder 	ret = rbd_req_sync_op(rbd_dev, NULL,
140359c2be1eSYehuda Sadeh 			      CEPH_NOSNAP,
140459c2be1eSYehuda Sadeh 			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
140559c2be1eSYehuda Sadeh 			      ops,
14060e6f322dSAlex Elder 			      rbd_dev->header_name,
14070e6f322dSAlex Elder 			      0, 0, NULL,
14080ce1a794SAlex Elder 			      &rbd_dev->watch_request, NULL);
140959c2be1eSYehuda Sadeh 
141059c2be1eSYehuda Sadeh 	if (ret < 0)
141159c2be1eSYehuda Sadeh 		goto fail_event;
141259c2be1eSYehuda Sadeh 
141359c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
141459c2be1eSYehuda Sadeh 	return 0;
141559c2be1eSYehuda Sadeh 
141659c2be1eSYehuda Sadeh fail_event:
14170ce1a794SAlex Elder 	ceph_osdc_cancel_event(rbd_dev->watch_event);
14180ce1a794SAlex Elder 	rbd_dev->watch_event = NULL;
141959c2be1eSYehuda Sadeh fail:
142059c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
142159c2be1eSYehuda Sadeh 	return ret;
142259c2be1eSYehuda Sadeh }
142359c2be1eSYehuda Sadeh 
142479e3057cSYehuda Sadeh /*
142579e3057cSYehuda Sadeh  * Request sync osd unwatch
142679e3057cSYehuda Sadeh  */
1427070c633fSAlex Elder static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev)
142879e3057cSYehuda Sadeh {
142979e3057cSYehuda Sadeh 	struct ceph_osd_req_op *ops;
143057cfc106SAlex Elder 	int ret;
143179e3057cSYehuda Sadeh 
143257cfc106SAlex Elder 	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
143357cfc106SAlex Elder 	if (!ops)
143457cfc106SAlex Elder 		return -ENOMEM;
143579e3057cSYehuda Sadeh 
143679e3057cSYehuda Sadeh 	ops[0].watch.ver = 0;
14370ce1a794SAlex Elder 	ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
143879e3057cSYehuda Sadeh 	ops[0].watch.flag = 0;
143979e3057cSYehuda Sadeh 
14400ce1a794SAlex Elder 	ret = rbd_req_sync_op(rbd_dev, NULL,
144179e3057cSYehuda Sadeh 			      CEPH_NOSNAP,
144279e3057cSYehuda Sadeh 			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
144379e3057cSYehuda Sadeh 			      ops,
1444070c633fSAlex Elder 			      rbd_dev->header_name,
1445070c633fSAlex Elder 			      0, 0, NULL, NULL, NULL);
1446070c633fSAlex Elder 
144779e3057cSYehuda Sadeh 
144879e3057cSYehuda Sadeh 	rbd_destroy_ops(ops);
14490ce1a794SAlex Elder 	ceph_osdc_cancel_event(rbd_dev->watch_event);
14500ce1a794SAlex Elder 	rbd_dev->watch_event = NULL;
145179e3057cSYehuda Sadeh 	return ret;
145279e3057cSYehuda Sadeh }
145379e3057cSYehuda Sadeh 
145459c2be1eSYehuda Sadeh /*
14553cb4a687SAlex Elder  * Synchronous osd object method call
1456602adf40SYehuda Sadeh  */
14570ce1a794SAlex Elder static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
1458aded07eaSAlex Elder 			     const char *object_name,
1459aded07eaSAlex Elder 			     const char *class_name,
1460aded07eaSAlex Elder 			     const char *method_name,
14613cb4a687SAlex Elder 			     const char *outbound,
14623cb4a687SAlex Elder 			     size_t outbound_size,
1463f8d4de6eSAlex Elder 			     char *inbound,
1464f8d4de6eSAlex Elder 			     size_t inbound_size,
14653cb4a687SAlex Elder 			     int flags,
146659c2be1eSYehuda Sadeh 			     u64 *ver)
1467602adf40SYehuda Sadeh {
1468602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops;
1469aded07eaSAlex Elder 	int class_name_len = strlen(class_name);
1470aded07eaSAlex Elder 	int method_name_len = strlen(method_name);
14713cb4a687SAlex Elder 	int payload_size;
147257cfc106SAlex Elder 	int ret;
147357cfc106SAlex Elder 
14743cb4a687SAlex Elder 	/*
14753cb4a687SAlex Elder 	 * Any input parameters required by the method we're calling
14763cb4a687SAlex Elder 	 * will be sent along with the class and method names as
14773cb4a687SAlex Elder 	 * part of the message payload.  That data and its size are
14783cb4a687SAlex Elder 	 * supplied via the indata and indata_len fields (named from
14793cb4a687SAlex Elder 	 * the perspective of the server side) in the OSD request
14803cb4a687SAlex Elder 	 * operation.
14813cb4a687SAlex Elder 	 */
14823cb4a687SAlex Elder 	payload_size = class_name_len + method_name_len + outbound_size;
14833cb4a687SAlex Elder 	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size);
148457cfc106SAlex Elder 	if (!ops)
148557cfc106SAlex Elder 		return -ENOMEM;
1486602adf40SYehuda Sadeh 
1487aded07eaSAlex Elder 	ops[0].cls.class_name = class_name;
1488aded07eaSAlex Elder 	ops[0].cls.class_len = (__u8) class_name_len;
1489aded07eaSAlex Elder 	ops[0].cls.method_name = method_name;
1490aded07eaSAlex Elder 	ops[0].cls.method_len = (__u8) method_name_len;
1491602adf40SYehuda Sadeh 	ops[0].cls.argc = 0;
14923cb4a687SAlex Elder 	ops[0].cls.indata = outbound;
14933cb4a687SAlex Elder 	ops[0].cls.indata_len = outbound_size;
1494602adf40SYehuda Sadeh 
14950ce1a794SAlex Elder 	ret = rbd_req_sync_op(rbd_dev, NULL,
1496602adf40SYehuda Sadeh 			       CEPH_NOSNAP,
14973cb4a687SAlex Elder 			       flags, ops,
1498f8d4de6eSAlex Elder 			       object_name, 0, inbound_size, inbound,
1499f8d4de6eSAlex Elder 			       NULL, ver);
1500602adf40SYehuda Sadeh 
1501602adf40SYehuda Sadeh 	rbd_destroy_ops(ops);
1502602adf40SYehuda Sadeh 
1503602adf40SYehuda Sadeh 	dout("cls_exec returned %d\n", ret);
1504602adf40SYehuda Sadeh 	return ret;
1505602adf40SYehuda Sadeh }
1506602adf40SYehuda Sadeh 
15071fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
15081fec7093SYehuda Sadeh {
15091fec7093SYehuda Sadeh 	struct rbd_req_coll *coll =
15101fec7093SYehuda Sadeh 			kzalloc(sizeof(struct rbd_req_coll) +
15111fec7093SYehuda Sadeh 			        sizeof(struct rbd_req_status) * num_reqs,
15121fec7093SYehuda Sadeh 				GFP_ATOMIC);
15131fec7093SYehuda Sadeh 
15141fec7093SYehuda Sadeh 	if (!coll)
15151fec7093SYehuda Sadeh 		return NULL;
15161fec7093SYehuda Sadeh 	coll->total = num_reqs;
15171fec7093SYehuda Sadeh 	kref_init(&coll->kref);
15181fec7093SYehuda Sadeh 	return coll;
15191fec7093SYehuda Sadeh }
15201fec7093SYehuda Sadeh 
1521602adf40SYehuda Sadeh /*
1522602adf40SYehuda Sadeh  * block device queue callback
1523602adf40SYehuda Sadeh  */
1524602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q)
1525602adf40SYehuda Sadeh {
1526602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = q->queuedata;
1527602adf40SYehuda Sadeh 	struct request *rq;
1528602adf40SYehuda Sadeh 
152900f1f36fSAlex Elder 	while ((rq = blk_fetch_request(q))) {
1530602adf40SYehuda Sadeh 		struct bio *bio;
1531602adf40SYehuda Sadeh 		bool do_write;
1532bd919d45SAlex Elder 		unsigned int size;
1533602adf40SYehuda Sadeh 		u64 ofs;
15341fec7093SYehuda Sadeh 		int num_segs, cur_seg = 0;
15351fec7093SYehuda Sadeh 		struct rbd_req_coll *coll;
1536d1d25646SJosh Durgin 		struct ceph_snap_context *snapc;
1537f7760dadSAlex Elder 		unsigned int bio_offset;
1538602adf40SYehuda Sadeh 
1539602adf40SYehuda Sadeh 		dout("fetched request\n");
1540602adf40SYehuda Sadeh 
1541602adf40SYehuda Sadeh 		/* filter out block requests we don't understand */
1542602adf40SYehuda Sadeh 		if ((rq->cmd_type != REQ_TYPE_FS)) {
1543602adf40SYehuda Sadeh 			__blk_end_request_all(rq, 0);
154400f1f36fSAlex Elder 			continue;
1545602adf40SYehuda Sadeh 		}
1546602adf40SYehuda Sadeh 
1547602adf40SYehuda Sadeh 		/* deduce our operation (read, write) */
1548602adf40SYehuda Sadeh 		do_write = (rq_data_dir(rq) == WRITE);
1549f84344f3SAlex Elder 		if (do_write && rbd_dev->mapping.read_only) {
1550602adf40SYehuda Sadeh 			__blk_end_request_all(rq, -EROFS);
155100f1f36fSAlex Elder 			continue;
1552602adf40SYehuda Sadeh 		}
1553602adf40SYehuda Sadeh 
1554602adf40SYehuda Sadeh 		spin_unlock_irq(q->queue_lock);
1555602adf40SYehuda Sadeh 
1556e88a36ecSJosh Durgin 		down_read(&rbd_dev->header_rwsem);
1557e88a36ecSJosh Durgin 
1558daba5fdbSAlex Elder 		if (!rbd_dev->exists) {
15590d7dbfceSAlex Elder 			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
1560d1d25646SJosh Durgin 			up_read(&rbd_dev->header_rwsem);
1561e88a36ecSJosh Durgin 			dout("request for non-existent snapshot");
1562e88a36ecSJosh Durgin 			spin_lock_irq(q->queue_lock);
1563e88a36ecSJosh Durgin 			__blk_end_request_all(rq, -ENXIO);
1564e88a36ecSJosh Durgin 			continue;
1565e88a36ecSJosh Durgin 		}
1566d1d25646SJosh Durgin 
1567d1d25646SJosh Durgin 		snapc = ceph_get_snap_context(rbd_dev->header.snapc);
1568d1d25646SJosh Durgin 
1569d1d25646SJosh Durgin 		up_read(&rbd_dev->header_rwsem);
1570e88a36ecSJosh Durgin 
1571f7760dadSAlex Elder 		size = blk_rq_bytes(rq);
1572f7760dadSAlex Elder 		ofs = blk_rq_pos(rq) * SECTOR_SIZE;
1573f7760dadSAlex Elder 		bio = rq->bio;
1574f7760dadSAlex Elder 
1575602adf40SYehuda Sadeh 		dout("%s 0x%x bytes at 0x%llx\n",
1576602adf40SYehuda Sadeh 		     do_write ? "write" : "read",
1577bd919d45SAlex Elder 		     size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
1578602adf40SYehuda Sadeh 
15791fec7093SYehuda Sadeh 		num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
1580df111be6SAlex Elder 		if (num_segs <= 0) {
1581df111be6SAlex Elder 			spin_lock_irq(q->queue_lock);
1582df111be6SAlex Elder 			__blk_end_request_all(rq, num_segs);
1583df111be6SAlex Elder 			ceph_put_snap_context(snapc);
1584df111be6SAlex Elder 			continue;
1585df111be6SAlex Elder 		}
15861fec7093SYehuda Sadeh 		coll = rbd_alloc_coll(num_segs);
15871fec7093SYehuda Sadeh 		if (!coll) {
15881fec7093SYehuda Sadeh 			spin_lock_irq(q->queue_lock);
15891fec7093SYehuda Sadeh 			__blk_end_request_all(rq, -ENOMEM);
1590d1d25646SJosh Durgin 			ceph_put_snap_context(snapc);
159100f1f36fSAlex Elder 			continue;
15921fec7093SYehuda Sadeh 		}
15931fec7093SYehuda Sadeh 
1594f7760dadSAlex Elder 		bio_offset = 0;
1595602adf40SYehuda Sadeh 		do {
1596f7760dadSAlex Elder 			u64 limit = rbd_segment_length(rbd_dev, ofs, size);
1597f7760dadSAlex Elder 			unsigned int chain_size;
1598f7760dadSAlex Elder 			struct bio *bio_chain;
1599f7760dadSAlex Elder 
1600f7760dadSAlex Elder 			BUG_ON(limit > (u64) UINT_MAX);
1601f7760dadSAlex Elder 			chain_size = (unsigned int) limit;
1602bd919d45SAlex Elder 			dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
1603f7760dadSAlex Elder 
16041fec7093SYehuda Sadeh 			kref_get(&coll->kref);
1605f7760dadSAlex Elder 
1606f7760dadSAlex Elder 			/* Pass a cloned bio chain via an osd request */
1607f7760dadSAlex Elder 
1608f7760dadSAlex Elder 			bio_chain = bio_chain_clone_range(&bio,
1609f7760dadSAlex Elder 						&bio_offset, chain_size,
1610f7760dadSAlex Elder 						GFP_ATOMIC);
1611f7760dadSAlex Elder 			if (bio_chain)
16124634246dSAlex Elder 				(void) rbd_do_op(rq, rbd_dev, snapc,
1613f7760dadSAlex Elder 						ofs, chain_size,
1614f7760dadSAlex Elder 						bio_chain, coll, cur_seg);
16154634246dSAlex Elder 			else
16161fec7093SYehuda Sadeh 				rbd_coll_end_req_index(rq, coll, cur_seg,
1617f7760dadSAlex Elder 						       -ENOMEM, chain_size);
1618f7760dadSAlex Elder 			size -= chain_size;
1619f7760dadSAlex Elder 			ofs += chain_size;
1620602adf40SYehuda Sadeh 
16211fec7093SYehuda Sadeh 			cur_seg++;
1622602adf40SYehuda Sadeh 		} while (size > 0);
16231fec7093SYehuda Sadeh 		kref_put(&coll->kref, rbd_coll_release);
1624602adf40SYehuda Sadeh 
1625602adf40SYehuda Sadeh 		spin_lock_irq(q->queue_lock);
1626d1d25646SJosh Durgin 
1627d1d25646SJosh Durgin 		ceph_put_snap_context(snapc);
1628602adf40SYehuda Sadeh 	}
1629602adf40SYehuda Sadeh }
1630602adf40SYehuda Sadeh 
1631602adf40SYehuda Sadeh /*
1632602adf40SYehuda Sadeh  * a queue callback. Makes sure that we don't create a bio that spans across
1633602adf40SYehuda Sadeh  * multiple osd objects. One exception would be with a single page bios,
1634f7760dadSAlex Elder  * which we handle later at bio_chain_clone_range()
1635602adf40SYehuda Sadeh  */
1636602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
1637602adf40SYehuda Sadeh 			  struct bio_vec *bvec)
1638602adf40SYehuda Sadeh {
1639602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = q->queuedata;
1640e5cfeed2SAlex Elder 	sector_t sector_offset;
1641e5cfeed2SAlex Elder 	sector_t sectors_per_obj;
1642e5cfeed2SAlex Elder 	sector_t obj_sector_offset;
1643e5cfeed2SAlex Elder 	int ret;
1644602adf40SYehuda Sadeh 
1645e5cfeed2SAlex Elder 	/*
1646e5cfeed2SAlex Elder 	 * Find how far into its rbd object the partition-relative
1647e5cfeed2SAlex Elder 	 * bio start sector is to offset relative to the enclosing
1648e5cfeed2SAlex Elder 	 * device.
1649e5cfeed2SAlex Elder 	 */
1650e5cfeed2SAlex Elder 	sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
1651e5cfeed2SAlex Elder 	sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
1652e5cfeed2SAlex Elder 	obj_sector_offset = sector_offset & (sectors_per_obj - 1);
1653593a9e7bSAlex Elder 
1654e5cfeed2SAlex Elder 	/*
1655e5cfeed2SAlex Elder 	 * Compute the number of bytes from that offset to the end
1656e5cfeed2SAlex Elder 	 * of the object.  Account for what's already used by the bio.
1657e5cfeed2SAlex Elder 	 */
1658e5cfeed2SAlex Elder 	ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
1659e5cfeed2SAlex Elder 	if (ret > bmd->bi_size)
1660e5cfeed2SAlex Elder 		ret -= bmd->bi_size;
1661e5cfeed2SAlex Elder 	else
1662e5cfeed2SAlex Elder 		ret = 0;
1663e5cfeed2SAlex Elder 
1664e5cfeed2SAlex Elder 	/*
1665e5cfeed2SAlex Elder 	 * Don't send back more than was asked for.  And if the bio
1666e5cfeed2SAlex Elder 	 * was empty, let the whole thing through because:  "Note
1667e5cfeed2SAlex Elder 	 * that a block device *must* allow a single page to be
1668e5cfeed2SAlex Elder 	 * added to an empty bio."
1669e5cfeed2SAlex Elder 	 */
1670e5cfeed2SAlex Elder 	rbd_assert(bvec->bv_len <= PAGE_SIZE);
1671e5cfeed2SAlex Elder 	if (ret > (int) bvec->bv_len || !bmd->bi_size)
1672e5cfeed2SAlex Elder 		ret = (int) bvec->bv_len;
1673e5cfeed2SAlex Elder 
1674e5cfeed2SAlex Elder 	return ret;
1675602adf40SYehuda Sadeh }
1676602adf40SYehuda Sadeh 
1677602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev)
1678602adf40SYehuda Sadeh {
1679602adf40SYehuda Sadeh 	struct gendisk *disk = rbd_dev->disk;
1680602adf40SYehuda Sadeh 
1681602adf40SYehuda Sadeh 	if (!disk)
1682602adf40SYehuda Sadeh 		return;
1683602adf40SYehuda Sadeh 
1684602adf40SYehuda Sadeh 	if (disk->flags & GENHD_FL_UP)
1685602adf40SYehuda Sadeh 		del_gendisk(disk);
1686602adf40SYehuda Sadeh 	if (disk->queue)
1687602adf40SYehuda Sadeh 		blk_cleanup_queue(disk->queue);
1688602adf40SYehuda Sadeh 	put_disk(disk);
1689602adf40SYehuda Sadeh }
1690602adf40SYehuda Sadeh 
1691602adf40SYehuda Sadeh /*
16924156d998SAlex Elder  * Read the complete header for the given rbd device.
16934156d998SAlex Elder  *
16944156d998SAlex Elder  * Returns a pointer to a dynamically-allocated buffer containing
16954156d998SAlex Elder  * the complete and validated header.  Caller can pass the address
16964156d998SAlex Elder  * of a variable that will be filled in with the version of the
16974156d998SAlex Elder  * header object at the time it was read.
16984156d998SAlex Elder  *
16994156d998SAlex Elder  * Returns a pointer-coded errno if a failure occurs.
17004156d998SAlex Elder  */
17014156d998SAlex Elder static struct rbd_image_header_ondisk *
17024156d998SAlex Elder rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
17034156d998SAlex Elder {
17044156d998SAlex Elder 	struct rbd_image_header_ondisk *ondisk = NULL;
17054156d998SAlex Elder 	u32 snap_count = 0;
17064156d998SAlex Elder 	u64 names_size = 0;
17074156d998SAlex Elder 	u32 want_count;
17084156d998SAlex Elder 	int ret;
17094156d998SAlex Elder 
17104156d998SAlex Elder 	/*
17114156d998SAlex Elder 	 * The complete header will include an array of its 64-bit
17124156d998SAlex Elder 	 * snapshot ids, followed by the names of those snapshots as
17134156d998SAlex Elder 	 * a contiguous block of NUL-terminated strings.  Note that
17144156d998SAlex Elder 	 * the number of snapshots could change by the time we read
17154156d998SAlex Elder 	 * it in, in which case we re-read it.
17164156d998SAlex Elder 	 */
17174156d998SAlex Elder 	do {
17184156d998SAlex Elder 		size_t size;
17194156d998SAlex Elder 
17204156d998SAlex Elder 		kfree(ondisk);
17214156d998SAlex Elder 
17224156d998SAlex Elder 		size = sizeof (*ondisk);
17234156d998SAlex Elder 		size += snap_count * sizeof (struct rbd_image_snap_ondisk);
17244156d998SAlex Elder 		size += names_size;
17254156d998SAlex Elder 		ondisk = kmalloc(size, GFP_KERNEL);
17264156d998SAlex Elder 		if (!ondisk)
17274156d998SAlex Elder 			return ERR_PTR(-ENOMEM);
17284156d998SAlex Elder 
17294156d998SAlex Elder 		ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP,
17304156d998SAlex Elder 				       rbd_dev->header_name,
17314156d998SAlex Elder 				       0, size,
17324156d998SAlex Elder 				       (char *) ondisk, version);
17334156d998SAlex Elder 
17344156d998SAlex Elder 		if (ret < 0)
17354156d998SAlex Elder 			goto out_err;
17364156d998SAlex Elder 		if (WARN_ON((size_t) ret < size)) {
17374156d998SAlex Elder 			ret = -ENXIO;
17384156d998SAlex Elder 			pr_warning("short header read for image %s"
17394156d998SAlex Elder 					" (want %zd got %d)\n",
17400d7dbfceSAlex Elder 				rbd_dev->spec->image_name, size, ret);
17414156d998SAlex Elder 			goto out_err;
17424156d998SAlex Elder 		}
17434156d998SAlex Elder 		if (!rbd_dev_ondisk_valid(ondisk)) {
17444156d998SAlex Elder 			ret = -ENXIO;
17454156d998SAlex Elder 			pr_warning("invalid header for image %s\n",
17460d7dbfceSAlex Elder 				rbd_dev->spec->image_name);
17474156d998SAlex Elder 			goto out_err;
17484156d998SAlex Elder 		}
17494156d998SAlex Elder 
17504156d998SAlex Elder 		names_size = le64_to_cpu(ondisk->snap_names_len);
17514156d998SAlex Elder 		want_count = snap_count;
17524156d998SAlex Elder 		snap_count = le32_to_cpu(ondisk->snap_count);
17534156d998SAlex Elder 	} while (snap_count != want_count);
17544156d998SAlex Elder 
17554156d998SAlex Elder 	return ondisk;
17564156d998SAlex Elder 
17574156d998SAlex Elder out_err:
17584156d998SAlex Elder 	kfree(ondisk);
17594156d998SAlex Elder 
17604156d998SAlex Elder 	return ERR_PTR(ret);
17614156d998SAlex Elder }
17624156d998SAlex Elder 
17634156d998SAlex Elder /*
1764602adf40SYehuda Sadeh  * reload the ondisk the header
1765602adf40SYehuda Sadeh  */
1766602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev,
1767602adf40SYehuda Sadeh 			   struct rbd_image_header *header)
1768602adf40SYehuda Sadeh {
17694156d998SAlex Elder 	struct rbd_image_header_ondisk *ondisk;
17704156d998SAlex Elder 	u64 ver = 0;
17714156d998SAlex Elder 	int ret;
1772602adf40SYehuda Sadeh 
17734156d998SAlex Elder 	ondisk = rbd_dev_v1_header_read(rbd_dev, &ver);
17744156d998SAlex Elder 	if (IS_ERR(ondisk))
17754156d998SAlex Elder 		return PTR_ERR(ondisk);
17764156d998SAlex Elder 	ret = rbd_header_from_disk(header, ondisk);
17774156d998SAlex Elder 	if (ret >= 0)
177859c2be1eSYehuda Sadeh 		header->obj_version = ver;
17794156d998SAlex Elder 	kfree(ondisk);
1780602adf40SYehuda Sadeh 
17814156d998SAlex Elder 	return ret;
1782602adf40SYehuda Sadeh }
1783602adf40SYehuda Sadeh 
178441f38c2bSAlex Elder static void rbd_remove_all_snaps(struct rbd_device *rbd_dev)
1785dfc5606dSYehuda Sadeh {
1786dfc5606dSYehuda Sadeh 	struct rbd_snap *snap;
1787a0593290SAlex Elder 	struct rbd_snap *next;
1788dfc5606dSYehuda Sadeh 
1789a0593290SAlex Elder 	list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node)
179041f38c2bSAlex Elder 		rbd_remove_snap_dev(snap);
1791dfc5606dSYehuda Sadeh }
1792dfc5606dSYehuda Sadeh 
17939478554aSAlex Elder static void rbd_update_mapping_size(struct rbd_device *rbd_dev)
17949478554aSAlex Elder {
17959478554aSAlex Elder 	sector_t size;
17969478554aSAlex Elder 
17970d7dbfceSAlex Elder 	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
17989478554aSAlex Elder 		return;
17999478554aSAlex Elder 
18009478554aSAlex Elder 	size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE;
18019478554aSAlex Elder 	dout("setting size to %llu sectors", (unsigned long long) size);
18029478554aSAlex Elder 	rbd_dev->mapping.size = (u64) size;
18039478554aSAlex Elder 	set_capacity(rbd_dev->disk, size);
18049478554aSAlex Elder }
18059478554aSAlex Elder 
1806602adf40SYehuda Sadeh /*
1807602adf40SYehuda Sadeh  * only read the first part of the ondisk header, without the snaps info
1808602adf40SYehuda Sadeh  */
1809117973fbSAlex Elder static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver)
1810602adf40SYehuda Sadeh {
1811602adf40SYehuda Sadeh 	int ret;
1812602adf40SYehuda Sadeh 	struct rbd_image_header h;
1813602adf40SYehuda Sadeh 
1814602adf40SYehuda Sadeh 	ret = rbd_read_header(rbd_dev, &h);
1815602adf40SYehuda Sadeh 	if (ret < 0)
1816602adf40SYehuda Sadeh 		return ret;
1817602adf40SYehuda Sadeh 
1818a51aa0c0SJosh Durgin 	down_write(&rbd_dev->header_rwsem);
1819a51aa0c0SJosh Durgin 
18209478554aSAlex Elder 	/* Update image size, and check for resize of mapped image */
18219478554aSAlex Elder 	rbd_dev->header.image_size = h.image_size;
18229478554aSAlex Elder 	rbd_update_mapping_size(rbd_dev);
18239db4b3e3SSage Weil 
1824849b4260SAlex Elder 	/* rbd_dev->header.object_prefix shouldn't change */
1825602adf40SYehuda Sadeh 	kfree(rbd_dev->header.snap_sizes);
1826849b4260SAlex Elder 	kfree(rbd_dev->header.snap_names);
1827d1d25646SJosh Durgin 	/* osd requests may still refer to snapc */
1828d1d25646SJosh Durgin 	ceph_put_snap_context(rbd_dev->header.snapc);
1829602adf40SYehuda Sadeh 
1830b813623aSAlex Elder 	if (hver)
1831b813623aSAlex Elder 		*hver = h.obj_version;
1832a71b891bSJosh Durgin 	rbd_dev->header.obj_version = h.obj_version;
183393a24e08SJosh Durgin 	rbd_dev->header.image_size = h.image_size;
1834602adf40SYehuda Sadeh 	rbd_dev->header.snapc = h.snapc;
1835602adf40SYehuda Sadeh 	rbd_dev->header.snap_names = h.snap_names;
1836602adf40SYehuda Sadeh 	rbd_dev->header.snap_sizes = h.snap_sizes;
1837849b4260SAlex Elder 	/* Free the extra copy of the object prefix */
1838849b4260SAlex Elder 	WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix));
1839849b4260SAlex Elder 	kfree(h.object_prefix);
1840849b4260SAlex Elder 
1841304f6808SAlex Elder 	ret = rbd_dev_snaps_update(rbd_dev);
1842304f6808SAlex Elder 	if (!ret)
1843304f6808SAlex Elder 		ret = rbd_dev_snaps_register(rbd_dev);
1844dfc5606dSYehuda Sadeh 
1845c666601aSJosh Durgin 	up_write(&rbd_dev->header_rwsem);
1846602adf40SYehuda Sadeh 
1847dfc5606dSYehuda Sadeh 	return ret;
1848602adf40SYehuda Sadeh }
1849602adf40SYehuda Sadeh 
1850117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver)
18511fe5e993SAlex Elder {
18521fe5e993SAlex Elder 	int ret;
18531fe5e993SAlex Elder 
1854117973fbSAlex Elder 	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
18551fe5e993SAlex Elder 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1856117973fbSAlex Elder 	if (rbd_dev->image_format == 1)
1857117973fbSAlex Elder 		ret = rbd_dev_v1_refresh(rbd_dev, hver);
1858117973fbSAlex Elder 	else
1859117973fbSAlex Elder 		ret = rbd_dev_v2_refresh(rbd_dev, hver);
18601fe5e993SAlex Elder 	mutex_unlock(&ctl_mutex);
18611fe5e993SAlex Elder 
18621fe5e993SAlex Elder 	return ret;
18631fe5e993SAlex Elder }
18641fe5e993SAlex Elder 
1865602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev)
1866602adf40SYehuda Sadeh {
1867602adf40SYehuda Sadeh 	struct gendisk *disk;
1868602adf40SYehuda Sadeh 	struct request_queue *q;
1869593a9e7bSAlex Elder 	u64 segment_size;
1870602adf40SYehuda Sadeh 
1871602adf40SYehuda Sadeh 	/* create gendisk info */
1872602adf40SYehuda Sadeh 	disk = alloc_disk(RBD_MINORS_PER_MAJOR);
1873602adf40SYehuda Sadeh 	if (!disk)
18741fcdb8aaSAlex Elder 		return -ENOMEM;
1875602adf40SYehuda Sadeh 
1876f0f8cef5SAlex Elder 	snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
1877de71a297SAlex Elder 		 rbd_dev->dev_id);
1878602adf40SYehuda Sadeh 	disk->major = rbd_dev->major;
1879602adf40SYehuda Sadeh 	disk->first_minor = 0;
1880602adf40SYehuda Sadeh 	disk->fops = &rbd_bd_ops;
1881602adf40SYehuda Sadeh 	disk->private_data = rbd_dev;
1882602adf40SYehuda Sadeh 
1883602adf40SYehuda Sadeh 	/* init rq */
1884602adf40SYehuda Sadeh 	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
1885602adf40SYehuda Sadeh 	if (!q)
1886602adf40SYehuda Sadeh 		goto out_disk;
1887029bcbd8SJosh Durgin 
1888593a9e7bSAlex Elder 	/* We use the default size, but let's be explicit about it. */
1889593a9e7bSAlex Elder 	blk_queue_physical_block_size(q, SECTOR_SIZE);
1890593a9e7bSAlex Elder 
1891029bcbd8SJosh Durgin 	/* set io sizes to object size */
1892593a9e7bSAlex Elder 	segment_size = rbd_obj_bytes(&rbd_dev->header);
1893593a9e7bSAlex Elder 	blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
1894593a9e7bSAlex Elder 	blk_queue_max_segment_size(q, segment_size);
1895593a9e7bSAlex Elder 	blk_queue_io_min(q, segment_size);
1896593a9e7bSAlex Elder 	blk_queue_io_opt(q, segment_size);
1897029bcbd8SJosh Durgin 
1898602adf40SYehuda Sadeh 	blk_queue_merge_bvec(q, rbd_merge_bvec);
1899602adf40SYehuda Sadeh 	disk->queue = q;
1900602adf40SYehuda Sadeh 
1901602adf40SYehuda Sadeh 	q->queuedata = rbd_dev;
1902602adf40SYehuda Sadeh 
1903602adf40SYehuda Sadeh 	rbd_dev->disk = disk;
1904602adf40SYehuda Sadeh 
190512f02944SAlex Elder 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
190612f02944SAlex Elder 
1907602adf40SYehuda Sadeh 	return 0;
1908602adf40SYehuda Sadeh out_disk:
1909602adf40SYehuda Sadeh 	put_disk(disk);
19101fcdb8aaSAlex Elder 
19111fcdb8aaSAlex Elder 	return -ENOMEM;
1912602adf40SYehuda Sadeh }
1913602adf40SYehuda Sadeh 
1914dfc5606dSYehuda Sadeh /*
1915dfc5606dSYehuda Sadeh   sysfs
1916dfc5606dSYehuda Sadeh */
1917602adf40SYehuda Sadeh 
1918593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev)
1919593a9e7bSAlex Elder {
1920593a9e7bSAlex Elder 	return container_of(dev, struct rbd_device, dev);
1921593a9e7bSAlex Elder }
1922593a9e7bSAlex Elder 
1923dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev,
1924dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1925602adf40SYehuda Sadeh {
1926593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1927a51aa0c0SJosh Durgin 	sector_t size;
1928dfc5606dSYehuda Sadeh 
1929a51aa0c0SJosh Durgin 	down_read(&rbd_dev->header_rwsem);
1930a51aa0c0SJosh Durgin 	size = get_capacity(rbd_dev->disk);
1931a51aa0c0SJosh Durgin 	up_read(&rbd_dev->header_rwsem);
1932a51aa0c0SJosh Durgin 
1933a51aa0c0SJosh Durgin 	return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
1934602adf40SYehuda Sadeh }
1935602adf40SYehuda Sadeh 
193634b13184SAlex Elder /*
193734b13184SAlex Elder  * Note this shows the features for whatever's mapped, which is not
193834b13184SAlex Elder  * necessarily the base image.
193934b13184SAlex Elder  */
194034b13184SAlex Elder static ssize_t rbd_features_show(struct device *dev,
194134b13184SAlex Elder 			     struct device_attribute *attr, char *buf)
194234b13184SAlex Elder {
194334b13184SAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
194434b13184SAlex Elder 
194534b13184SAlex Elder 	return sprintf(buf, "0x%016llx\n",
194634b13184SAlex Elder 			(unsigned long long) rbd_dev->mapping.features);
194734b13184SAlex Elder }
194834b13184SAlex Elder 
1949dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev,
1950dfc5606dSYehuda Sadeh 			      struct device_attribute *attr, char *buf)
1951602adf40SYehuda Sadeh {
1952593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1953dfc5606dSYehuda Sadeh 
1954dfc5606dSYehuda Sadeh 	return sprintf(buf, "%d\n", rbd_dev->major);
1955dfc5606dSYehuda Sadeh }
1956dfc5606dSYehuda Sadeh 
1957dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev,
1958dfc5606dSYehuda Sadeh 				  struct device_attribute *attr, char *buf)
1959dfc5606dSYehuda Sadeh {
1960593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1961dfc5606dSYehuda Sadeh 
19621dbb4399SAlex Elder 	return sprintf(buf, "client%lld\n",
19631dbb4399SAlex Elder 			ceph_client_id(rbd_dev->rbd_client->client));
1964dfc5606dSYehuda Sadeh }
1965dfc5606dSYehuda Sadeh 
1966dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev,
1967dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1968dfc5606dSYehuda Sadeh {
1969593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1970dfc5606dSYehuda Sadeh 
19710d7dbfceSAlex Elder 	return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
1972dfc5606dSYehuda Sadeh }
1973dfc5606dSYehuda Sadeh 
19749bb2f334SAlex Elder static ssize_t rbd_pool_id_show(struct device *dev,
19759bb2f334SAlex Elder 			     struct device_attribute *attr, char *buf)
19769bb2f334SAlex Elder {
19779bb2f334SAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
19789bb2f334SAlex Elder 
19790d7dbfceSAlex Elder 	return sprintf(buf, "%llu\n",
19800d7dbfceSAlex Elder 		(unsigned long long) rbd_dev->spec->pool_id);
19819bb2f334SAlex Elder }
19829bb2f334SAlex Elder 
1983dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev,
1984dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1985dfc5606dSYehuda Sadeh {
1986593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1987dfc5606dSYehuda Sadeh 
1988a92ffdf8SAlex Elder 	if (rbd_dev->spec->image_name)
19890d7dbfceSAlex Elder 		return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
1990a92ffdf8SAlex Elder 
1991a92ffdf8SAlex Elder 	return sprintf(buf, "(unknown)\n");
1992dfc5606dSYehuda Sadeh }
1993dfc5606dSYehuda Sadeh 
1994589d30e0SAlex Elder static ssize_t rbd_image_id_show(struct device *dev,
1995589d30e0SAlex Elder 			     struct device_attribute *attr, char *buf)
1996589d30e0SAlex Elder {
1997589d30e0SAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1998589d30e0SAlex Elder 
19990d7dbfceSAlex Elder 	return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
2000589d30e0SAlex Elder }
2001589d30e0SAlex Elder 
200234b13184SAlex Elder /*
200334b13184SAlex Elder  * Shows the name of the currently-mapped snapshot (or
200434b13184SAlex Elder  * RBD_SNAP_HEAD_NAME for the base image).
200534b13184SAlex Elder  */
2006dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev,
2007dfc5606dSYehuda Sadeh 			     struct device_attribute *attr,
2008dfc5606dSYehuda Sadeh 			     char *buf)
2009dfc5606dSYehuda Sadeh {
2010593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2011dfc5606dSYehuda Sadeh 
20120d7dbfceSAlex Elder 	return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
2013dfc5606dSYehuda Sadeh }
2014dfc5606dSYehuda Sadeh 
201586b00e0dSAlex Elder /*
201686b00e0dSAlex Elder  * For an rbd v2 image, shows the pool id, image id, and snapshot id
201786b00e0dSAlex Elder  * for the parent image.  If there is no parent, simply shows
201886b00e0dSAlex Elder  * "(no parent image)".
201986b00e0dSAlex Elder  */
202086b00e0dSAlex Elder static ssize_t rbd_parent_show(struct device *dev,
202186b00e0dSAlex Elder 			     struct device_attribute *attr,
202286b00e0dSAlex Elder 			     char *buf)
202386b00e0dSAlex Elder {
202486b00e0dSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
202586b00e0dSAlex Elder 	struct rbd_spec *spec = rbd_dev->parent_spec;
202686b00e0dSAlex Elder 	int count;
202786b00e0dSAlex Elder 	char *bufp = buf;
202886b00e0dSAlex Elder 
202986b00e0dSAlex Elder 	if (!spec)
203086b00e0dSAlex Elder 		return sprintf(buf, "(no parent image)\n");
203186b00e0dSAlex Elder 
203286b00e0dSAlex Elder 	count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
203386b00e0dSAlex Elder 			(unsigned long long) spec->pool_id, spec->pool_name);
203486b00e0dSAlex Elder 	if (count < 0)
203586b00e0dSAlex Elder 		return count;
203686b00e0dSAlex Elder 	bufp += count;
203786b00e0dSAlex Elder 
203886b00e0dSAlex Elder 	count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
203986b00e0dSAlex Elder 			spec->image_name ? spec->image_name : "(unknown)");
204086b00e0dSAlex Elder 	if (count < 0)
204186b00e0dSAlex Elder 		return count;
204286b00e0dSAlex Elder 	bufp += count;
204386b00e0dSAlex Elder 
204486b00e0dSAlex Elder 	count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
204586b00e0dSAlex Elder 			(unsigned long long) spec->snap_id, spec->snap_name);
204686b00e0dSAlex Elder 	if (count < 0)
204786b00e0dSAlex Elder 		return count;
204886b00e0dSAlex Elder 	bufp += count;
204986b00e0dSAlex Elder 
205086b00e0dSAlex Elder 	count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
205186b00e0dSAlex Elder 	if (count < 0)
205286b00e0dSAlex Elder 		return count;
205386b00e0dSAlex Elder 	bufp += count;
205486b00e0dSAlex Elder 
205586b00e0dSAlex Elder 	return (ssize_t) (bufp - buf);
205686b00e0dSAlex Elder }
205786b00e0dSAlex Elder 
2058dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev,
2059dfc5606dSYehuda Sadeh 				 struct device_attribute *attr,
2060dfc5606dSYehuda Sadeh 				 const char *buf,
2061dfc5606dSYehuda Sadeh 				 size_t size)
2062dfc5606dSYehuda Sadeh {
2063593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2064b813623aSAlex Elder 	int ret;
2065602adf40SYehuda Sadeh 
2066117973fbSAlex Elder 	ret = rbd_dev_refresh(rbd_dev, NULL);
2067b813623aSAlex Elder 
2068b813623aSAlex Elder 	return ret < 0 ? ret : size;
2069dfc5606dSYehuda Sadeh }
2070602adf40SYehuda Sadeh 
2071dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
207234b13184SAlex Elder static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
2073dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
2074dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
2075dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
20769bb2f334SAlex Elder static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
2077dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
2078589d30e0SAlex Elder static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
2079dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
2080dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
208186b00e0dSAlex Elder static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
2082dfc5606dSYehuda Sadeh 
2083dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = {
2084dfc5606dSYehuda Sadeh 	&dev_attr_size.attr,
208534b13184SAlex Elder 	&dev_attr_features.attr,
2086dfc5606dSYehuda Sadeh 	&dev_attr_major.attr,
2087dfc5606dSYehuda Sadeh 	&dev_attr_client_id.attr,
2088dfc5606dSYehuda Sadeh 	&dev_attr_pool.attr,
20899bb2f334SAlex Elder 	&dev_attr_pool_id.attr,
2090dfc5606dSYehuda Sadeh 	&dev_attr_name.attr,
2091589d30e0SAlex Elder 	&dev_attr_image_id.attr,
2092dfc5606dSYehuda Sadeh 	&dev_attr_current_snap.attr,
209386b00e0dSAlex Elder 	&dev_attr_parent.attr,
2094dfc5606dSYehuda Sadeh 	&dev_attr_refresh.attr,
2095dfc5606dSYehuda Sadeh 	NULL
2096dfc5606dSYehuda Sadeh };
2097dfc5606dSYehuda Sadeh 
2098dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = {
2099dfc5606dSYehuda Sadeh 	.attrs = rbd_attrs,
2100dfc5606dSYehuda Sadeh };
2101dfc5606dSYehuda Sadeh 
2102dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = {
2103dfc5606dSYehuda Sadeh 	&rbd_attr_group,
2104dfc5606dSYehuda Sadeh 	NULL
2105dfc5606dSYehuda Sadeh };
2106dfc5606dSYehuda Sadeh 
2107dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev)
2108dfc5606dSYehuda Sadeh {
2109dfc5606dSYehuda Sadeh }
2110dfc5606dSYehuda Sadeh 
2111dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = {
2112dfc5606dSYehuda Sadeh 	.name		= "rbd",
2113dfc5606dSYehuda Sadeh 	.groups		= rbd_attr_groups,
2114dfc5606dSYehuda Sadeh 	.release	= rbd_sysfs_dev_release,
2115dfc5606dSYehuda Sadeh };
2116dfc5606dSYehuda Sadeh 
2117dfc5606dSYehuda Sadeh 
2118dfc5606dSYehuda Sadeh /*
2119dfc5606dSYehuda Sadeh   sysfs - snapshots
2120dfc5606dSYehuda Sadeh */
2121dfc5606dSYehuda Sadeh 
2122dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev,
2123dfc5606dSYehuda Sadeh 				  struct device_attribute *attr,
2124dfc5606dSYehuda Sadeh 				  char *buf)
2125dfc5606dSYehuda Sadeh {
2126dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2127dfc5606dSYehuda Sadeh 
21283591538fSJosh Durgin 	return sprintf(buf, "%llu\n", (unsigned long long)snap->size);
2129dfc5606dSYehuda Sadeh }
2130dfc5606dSYehuda Sadeh 
2131dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev,
2132dfc5606dSYehuda Sadeh 				struct device_attribute *attr,
2133dfc5606dSYehuda Sadeh 				char *buf)
2134dfc5606dSYehuda Sadeh {
2135dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2136dfc5606dSYehuda Sadeh 
2137593a9e7bSAlex Elder 	return sprintf(buf, "%llu\n", (unsigned long long)snap->id);
2138dfc5606dSYehuda Sadeh }
2139dfc5606dSYehuda Sadeh 
214034b13184SAlex Elder static ssize_t rbd_snap_features_show(struct device *dev,
214134b13184SAlex Elder 				struct device_attribute *attr,
214234b13184SAlex Elder 				char *buf)
214334b13184SAlex Elder {
214434b13184SAlex Elder 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
214534b13184SAlex Elder 
214634b13184SAlex Elder 	return sprintf(buf, "0x%016llx\n",
214734b13184SAlex Elder 			(unsigned long long) snap->features);
214834b13184SAlex Elder }
214934b13184SAlex Elder 
2150dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
2151dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
215234b13184SAlex Elder static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL);
2153dfc5606dSYehuda Sadeh 
2154dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = {
2155dfc5606dSYehuda Sadeh 	&dev_attr_snap_size.attr,
2156dfc5606dSYehuda Sadeh 	&dev_attr_snap_id.attr,
215734b13184SAlex Elder 	&dev_attr_snap_features.attr,
2158dfc5606dSYehuda Sadeh 	NULL,
2159dfc5606dSYehuda Sadeh };
2160dfc5606dSYehuda Sadeh 
2161dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = {
2162dfc5606dSYehuda Sadeh 	.attrs = rbd_snap_attrs,
2163dfc5606dSYehuda Sadeh };
2164dfc5606dSYehuda Sadeh 
2165dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev)
2166dfc5606dSYehuda Sadeh {
2167dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2168dfc5606dSYehuda Sadeh 	kfree(snap->name);
2169dfc5606dSYehuda Sadeh 	kfree(snap);
2170dfc5606dSYehuda Sadeh }
2171dfc5606dSYehuda Sadeh 
2172dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = {
2173dfc5606dSYehuda Sadeh 	&rbd_snap_attr_group,
2174dfc5606dSYehuda Sadeh 	NULL
2175dfc5606dSYehuda Sadeh };
2176dfc5606dSYehuda Sadeh 
2177dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = {
2178dfc5606dSYehuda Sadeh 	.groups		= rbd_snap_attr_groups,
2179dfc5606dSYehuda Sadeh 	.release	= rbd_snap_dev_release,
2180dfc5606dSYehuda Sadeh };
2181dfc5606dSYehuda Sadeh 
21828b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
21838b8fb99cSAlex Elder {
21848b8fb99cSAlex Elder 	kref_get(&spec->kref);
21858b8fb99cSAlex Elder 
21868b8fb99cSAlex Elder 	return spec;
21878b8fb99cSAlex Elder }
21888b8fb99cSAlex Elder 
21898b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref);
21908b8fb99cSAlex Elder static void rbd_spec_put(struct rbd_spec *spec)
21918b8fb99cSAlex Elder {
21928b8fb99cSAlex Elder 	if (spec)
21938b8fb99cSAlex Elder 		kref_put(&spec->kref, rbd_spec_free);
21948b8fb99cSAlex Elder }
21958b8fb99cSAlex Elder 
21968b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_alloc(void)
21978b8fb99cSAlex Elder {
21988b8fb99cSAlex Elder 	struct rbd_spec *spec;
21998b8fb99cSAlex Elder 
22008b8fb99cSAlex Elder 	spec = kzalloc(sizeof (*spec), GFP_KERNEL);
22018b8fb99cSAlex Elder 	if (!spec)
22028b8fb99cSAlex Elder 		return NULL;
22038b8fb99cSAlex Elder 	kref_init(&spec->kref);
22048b8fb99cSAlex Elder 
22058b8fb99cSAlex Elder 	rbd_spec_put(rbd_spec_get(spec));	/* TEMPORARY */
22068b8fb99cSAlex Elder 
22078b8fb99cSAlex Elder 	return spec;
22088b8fb99cSAlex Elder }
22098b8fb99cSAlex Elder 
22108b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref)
22118b8fb99cSAlex Elder {
22128b8fb99cSAlex Elder 	struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
22138b8fb99cSAlex Elder 
22148b8fb99cSAlex Elder 	kfree(spec->pool_name);
22158b8fb99cSAlex Elder 	kfree(spec->image_id);
22168b8fb99cSAlex Elder 	kfree(spec->image_name);
22178b8fb99cSAlex Elder 	kfree(spec->snap_name);
22188b8fb99cSAlex Elder 	kfree(spec);
22198b8fb99cSAlex Elder }
22208b8fb99cSAlex Elder 
2221c53d5893SAlex Elder struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
2222c53d5893SAlex Elder 				struct rbd_spec *spec)
2223c53d5893SAlex Elder {
2224c53d5893SAlex Elder 	struct rbd_device *rbd_dev;
2225c53d5893SAlex Elder 
2226c53d5893SAlex Elder 	rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL);
2227c53d5893SAlex Elder 	if (!rbd_dev)
2228c53d5893SAlex Elder 		return NULL;
2229c53d5893SAlex Elder 
2230c53d5893SAlex Elder 	spin_lock_init(&rbd_dev->lock);
2231c53d5893SAlex Elder 	INIT_LIST_HEAD(&rbd_dev->node);
2232c53d5893SAlex Elder 	INIT_LIST_HEAD(&rbd_dev->snaps);
2233c53d5893SAlex Elder 	init_rwsem(&rbd_dev->header_rwsem);
2234c53d5893SAlex Elder 
2235c53d5893SAlex Elder 	rbd_dev->spec = spec;
2236c53d5893SAlex Elder 	rbd_dev->rbd_client = rbdc;
2237c53d5893SAlex Elder 
2238c53d5893SAlex Elder 	return rbd_dev;
2239c53d5893SAlex Elder }
2240c53d5893SAlex Elder 
2241c53d5893SAlex Elder static void rbd_dev_destroy(struct rbd_device *rbd_dev)
2242c53d5893SAlex Elder {
224386b00e0dSAlex Elder 	rbd_spec_put(rbd_dev->parent_spec);
2244c53d5893SAlex Elder 	kfree(rbd_dev->header_name);
2245c53d5893SAlex Elder 	rbd_put_client(rbd_dev->rbd_client);
2246c53d5893SAlex Elder 	rbd_spec_put(rbd_dev->spec);
2247c53d5893SAlex Elder 	kfree(rbd_dev);
2248c53d5893SAlex Elder }
2249c53d5893SAlex Elder 
2250304f6808SAlex Elder static bool rbd_snap_registered(struct rbd_snap *snap)
2251304f6808SAlex Elder {
2252304f6808SAlex Elder 	bool ret = snap->dev.type == &rbd_snap_device_type;
2253304f6808SAlex Elder 	bool reg = device_is_registered(&snap->dev);
2254304f6808SAlex Elder 
2255304f6808SAlex Elder 	rbd_assert(!ret ^ reg);
2256304f6808SAlex Elder 
2257304f6808SAlex Elder 	return ret;
2258304f6808SAlex Elder }
2259304f6808SAlex Elder 
226041f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap)
2261dfc5606dSYehuda Sadeh {
2262dfc5606dSYehuda Sadeh 	list_del(&snap->node);
2263304f6808SAlex Elder 	if (device_is_registered(&snap->dev))
2264dfc5606dSYehuda Sadeh 		device_unregister(&snap->dev);
2265dfc5606dSYehuda Sadeh }
2266dfc5606dSYehuda Sadeh 
226714e7085dSAlex Elder static int rbd_register_snap_dev(struct rbd_snap *snap,
2268dfc5606dSYehuda Sadeh 				  struct device *parent)
2269dfc5606dSYehuda Sadeh {
2270dfc5606dSYehuda Sadeh 	struct device *dev = &snap->dev;
2271dfc5606dSYehuda Sadeh 	int ret;
2272dfc5606dSYehuda Sadeh 
2273dfc5606dSYehuda Sadeh 	dev->type = &rbd_snap_device_type;
2274dfc5606dSYehuda Sadeh 	dev->parent = parent;
2275dfc5606dSYehuda Sadeh 	dev->release = rbd_snap_dev_release;
2276d4b125e9SAlex Elder 	dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name);
2277304f6808SAlex Elder 	dout("%s: registering device for snapshot %s\n", __func__, snap->name);
2278304f6808SAlex Elder 
2279dfc5606dSYehuda Sadeh 	ret = device_register(dev);
2280dfc5606dSYehuda Sadeh 
2281dfc5606dSYehuda Sadeh 	return ret;
2282dfc5606dSYehuda Sadeh }
2283dfc5606dSYehuda Sadeh 
22844e891e0aSAlex Elder static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev,
2285c8d18425SAlex Elder 						const char *snap_name,
228634b13184SAlex Elder 						u64 snap_id, u64 snap_size,
228734b13184SAlex Elder 						u64 snap_features)
2288dfc5606dSYehuda Sadeh {
22894e891e0aSAlex Elder 	struct rbd_snap *snap;
2290dfc5606dSYehuda Sadeh 	int ret;
22914e891e0aSAlex Elder 
22924e891e0aSAlex Elder 	snap = kzalloc(sizeof (*snap), GFP_KERNEL);
2293dfc5606dSYehuda Sadeh 	if (!snap)
22944e891e0aSAlex Elder 		return ERR_PTR(-ENOMEM);
22954e891e0aSAlex Elder 
22964e891e0aSAlex Elder 	ret = -ENOMEM;
2297c8d18425SAlex Elder 	snap->name = kstrdup(snap_name, GFP_KERNEL);
22984e891e0aSAlex Elder 	if (!snap->name)
22994e891e0aSAlex Elder 		goto err;
23004e891e0aSAlex Elder 
2301c8d18425SAlex Elder 	snap->id = snap_id;
2302c8d18425SAlex Elder 	snap->size = snap_size;
230334b13184SAlex Elder 	snap->features = snap_features;
23044e891e0aSAlex Elder 
23054e891e0aSAlex Elder 	return snap;
23064e891e0aSAlex Elder 
2307dfc5606dSYehuda Sadeh err:
2308dfc5606dSYehuda Sadeh 	kfree(snap->name);
2309dfc5606dSYehuda Sadeh 	kfree(snap);
23104e891e0aSAlex Elder 
23114e891e0aSAlex Elder 	return ERR_PTR(ret);
2312dfc5606dSYehuda Sadeh }
2313dfc5606dSYehuda Sadeh 
2314cd892126SAlex Elder static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which,
2315cd892126SAlex Elder 		u64 *snap_size, u64 *snap_features)
2316cd892126SAlex Elder {
2317cd892126SAlex Elder 	char *snap_name;
2318cd892126SAlex Elder 
2319cd892126SAlex Elder 	rbd_assert(which < rbd_dev->header.snapc->num_snaps);
2320cd892126SAlex Elder 
2321cd892126SAlex Elder 	*snap_size = rbd_dev->header.snap_sizes[which];
2322cd892126SAlex Elder 	*snap_features = 0;	/* No features for v1 */
2323cd892126SAlex Elder 
2324cd892126SAlex Elder 	/* Skip over names until we find the one we are looking for */
2325cd892126SAlex Elder 
2326cd892126SAlex Elder 	snap_name = rbd_dev->header.snap_names;
2327cd892126SAlex Elder 	while (which--)
2328cd892126SAlex Elder 		snap_name += strlen(snap_name) + 1;
2329cd892126SAlex Elder 
2330cd892126SAlex Elder 	return snap_name;
2331cd892126SAlex Elder }
2332cd892126SAlex Elder 
2333dfc5606dSYehuda Sadeh /*
23349d475de5SAlex Elder  * Get the size and object order for an image snapshot, or if
23359d475de5SAlex Elder  * snap_id is CEPH_NOSNAP, gets this information for the base
23369d475de5SAlex Elder  * image.
23379d475de5SAlex Elder  */
23389d475de5SAlex Elder static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
23399d475de5SAlex Elder 				u8 *order, u64 *snap_size)
23409d475de5SAlex Elder {
23419d475de5SAlex Elder 	__le64 snapid = cpu_to_le64(snap_id);
23429d475de5SAlex Elder 	int ret;
23439d475de5SAlex Elder 	struct {
23449d475de5SAlex Elder 		u8 order;
23459d475de5SAlex Elder 		__le64 size;
23469d475de5SAlex Elder 	} __attribute__ ((packed)) size_buf = { 0 };
23479d475de5SAlex Elder 
23489d475de5SAlex Elder 	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
23499d475de5SAlex Elder 				"rbd", "get_size",
23509d475de5SAlex Elder 				(char *) &snapid, sizeof (snapid),
23519d475de5SAlex Elder 				(char *) &size_buf, sizeof (size_buf),
23529d475de5SAlex Elder 				CEPH_OSD_FLAG_READ, NULL);
23539d475de5SAlex Elder 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
23549d475de5SAlex Elder 	if (ret < 0)
23559d475de5SAlex Elder 		return ret;
23569d475de5SAlex Elder 
23579d475de5SAlex Elder 	*order = size_buf.order;
23589d475de5SAlex Elder 	*snap_size = le64_to_cpu(size_buf.size);
23599d475de5SAlex Elder 
23609d475de5SAlex Elder 	dout("  snap_id 0x%016llx order = %u, snap_size = %llu\n",
23619d475de5SAlex Elder 		(unsigned long long) snap_id, (unsigned int) *order,
23629d475de5SAlex Elder 		(unsigned long long) *snap_size);
23639d475de5SAlex Elder 
23649d475de5SAlex Elder 	return 0;
23659d475de5SAlex Elder }
23669d475de5SAlex Elder 
23679d475de5SAlex Elder static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
23689d475de5SAlex Elder {
23699d475de5SAlex Elder 	return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
23709d475de5SAlex Elder 					&rbd_dev->header.obj_order,
23719d475de5SAlex Elder 					&rbd_dev->header.image_size);
23729d475de5SAlex Elder }
23739d475de5SAlex Elder 
23741e130199SAlex Elder static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
23751e130199SAlex Elder {
23761e130199SAlex Elder 	void *reply_buf;
23771e130199SAlex Elder 	int ret;
23781e130199SAlex Elder 	void *p;
23791e130199SAlex Elder 
23801e130199SAlex Elder 	reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
23811e130199SAlex Elder 	if (!reply_buf)
23821e130199SAlex Elder 		return -ENOMEM;
23831e130199SAlex Elder 
23841e130199SAlex Elder 	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
23851e130199SAlex Elder 				"rbd", "get_object_prefix",
23861e130199SAlex Elder 				NULL, 0,
23871e130199SAlex Elder 				reply_buf, RBD_OBJ_PREFIX_LEN_MAX,
23881e130199SAlex Elder 				CEPH_OSD_FLAG_READ, NULL);
23891e130199SAlex Elder 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
23901e130199SAlex Elder 	if (ret < 0)
23911e130199SAlex Elder 		goto out;
2392a0ea3a40SAlex Elder 	ret = 0;    /* rbd_req_sync_exec() can return positive */
23931e130199SAlex Elder 
23941e130199SAlex Elder 	p = reply_buf;
23951e130199SAlex Elder 	rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
23961e130199SAlex Elder 						p + RBD_OBJ_PREFIX_LEN_MAX,
23971e130199SAlex Elder 						NULL, GFP_NOIO);
23981e130199SAlex Elder 
23991e130199SAlex Elder 	if (IS_ERR(rbd_dev->header.object_prefix)) {
24001e130199SAlex Elder 		ret = PTR_ERR(rbd_dev->header.object_prefix);
24011e130199SAlex Elder 		rbd_dev->header.object_prefix = NULL;
24021e130199SAlex Elder 	} else {
24031e130199SAlex Elder 		dout("  object_prefix = %s\n", rbd_dev->header.object_prefix);
24041e130199SAlex Elder 	}
24051e130199SAlex Elder 
24061e130199SAlex Elder out:
24071e130199SAlex Elder 	kfree(reply_buf);
24081e130199SAlex Elder 
24091e130199SAlex Elder 	return ret;
24101e130199SAlex Elder }
24111e130199SAlex Elder 
2412b1b5402aSAlex Elder static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
2413b1b5402aSAlex Elder 		u64 *snap_features)
2414b1b5402aSAlex Elder {
2415b1b5402aSAlex Elder 	__le64 snapid = cpu_to_le64(snap_id);
2416b1b5402aSAlex Elder 	struct {
2417b1b5402aSAlex Elder 		__le64 features;
2418b1b5402aSAlex Elder 		__le64 incompat;
2419b1b5402aSAlex Elder 	} features_buf = { 0 };
2420d889140cSAlex Elder 	u64 incompat;
2421b1b5402aSAlex Elder 	int ret;
2422b1b5402aSAlex Elder 
2423b1b5402aSAlex Elder 	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
2424b1b5402aSAlex Elder 				"rbd", "get_features",
2425b1b5402aSAlex Elder 				(char *) &snapid, sizeof (snapid),
2426b1b5402aSAlex Elder 				(char *) &features_buf, sizeof (features_buf),
2427b1b5402aSAlex Elder 				CEPH_OSD_FLAG_READ, NULL);
2428b1b5402aSAlex Elder 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2429b1b5402aSAlex Elder 	if (ret < 0)
2430b1b5402aSAlex Elder 		return ret;
2431d889140cSAlex Elder 
2432d889140cSAlex Elder 	incompat = le64_to_cpu(features_buf.incompat);
2433d889140cSAlex Elder 	if (incompat & ~RBD_FEATURES_ALL)
2434d889140cSAlex Elder 		return -ENOTSUPP;
2435d889140cSAlex Elder 
2436b1b5402aSAlex Elder 	*snap_features = le64_to_cpu(features_buf.features);
2437b1b5402aSAlex Elder 
2438b1b5402aSAlex Elder 	dout("  snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
2439b1b5402aSAlex Elder 		(unsigned long long) snap_id,
2440b1b5402aSAlex Elder 		(unsigned long long) *snap_features,
2441b1b5402aSAlex Elder 		(unsigned long long) le64_to_cpu(features_buf.incompat));
2442b1b5402aSAlex Elder 
2443b1b5402aSAlex Elder 	return 0;
2444b1b5402aSAlex Elder }
2445b1b5402aSAlex Elder 
2446b1b5402aSAlex Elder static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
2447b1b5402aSAlex Elder {
2448b1b5402aSAlex Elder 	return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
2449b1b5402aSAlex Elder 						&rbd_dev->header.features);
2450b1b5402aSAlex Elder }
2451b1b5402aSAlex Elder 
245286b00e0dSAlex Elder static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
245386b00e0dSAlex Elder {
245486b00e0dSAlex Elder 	struct rbd_spec *parent_spec;
245586b00e0dSAlex Elder 	size_t size;
245686b00e0dSAlex Elder 	void *reply_buf = NULL;
245786b00e0dSAlex Elder 	__le64 snapid;
245886b00e0dSAlex Elder 	void *p;
245986b00e0dSAlex Elder 	void *end;
246086b00e0dSAlex Elder 	char *image_id;
246186b00e0dSAlex Elder 	u64 overlap;
246286b00e0dSAlex Elder 	size_t len = 0;
246386b00e0dSAlex Elder 	int ret;
246486b00e0dSAlex Elder 
246586b00e0dSAlex Elder 	parent_spec = rbd_spec_alloc();
246686b00e0dSAlex Elder 	if (!parent_spec)
246786b00e0dSAlex Elder 		return -ENOMEM;
246886b00e0dSAlex Elder 
246986b00e0dSAlex Elder 	size = sizeof (__le64) +				/* pool_id */
247086b00e0dSAlex Elder 		sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +	/* image_id */
247186b00e0dSAlex Elder 		sizeof (__le64) +				/* snap_id */
247286b00e0dSAlex Elder 		sizeof (__le64);				/* overlap */
247386b00e0dSAlex Elder 	reply_buf = kmalloc(size, GFP_KERNEL);
247486b00e0dSAlex Elder 	if (!reply_buf) {
247586b00e0dSAlex Elder 		ret = -ENOMEM;
247686b00e0dSAlex Elder 		goto out_err;
247786b00e0dSAlex Elder 	}
247886b00e0dSAlex Elder 
247986b00e0dSAlex Elder 	snapid = cpu_to_le64(CEPH_NOSNAP);
248086b00e0dSAlex Elder 	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
248186b00e0dSAlex Elder 				"rbd", "get_parent",
248286b00e0dSAlex Elder 				(char *) &snapid, sizeof (snapid),
248386b00e0dSAlex Elder 				(char *) reply_buf, size,
248486b00e0dSAlex Elder 				CEPH_OSD_FLAG_READ, NULL);
248586b00e0dSAlex Elder 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
248686b00e0dSAlex Elder 	if (ret < 0)
248786b00e0dSAlex Elder 		goto out_err;
248886b00e0dSAlex Elder 
248986b00e0dSAlex Elder 	ret = -ERANGE;
249086b00e0dSAlex Elder 	p = reply_buf;
249186b00e0dSAlex Elder 	end = (char *) reply_buf + size;
249286b00e0dSAlex Elder 	ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err);
249386b00e0dSAlex Elder 	if (parent_spec->pool_id == CEPH_NOPOOL)
249486b00e0dSAlex Elder 		goto out;	/* No parent?  No problem. */
249586b00e0dSAlex Elder 
249686b00e0dSAlex Elder 	image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
249786b00e0dSAlex Elder 	if (IS_ERR(image_id)) {
249886b00e0dSAlex Elder 		ret = PTR_ERR(image_id);
249986b00e0dSAlex Elder 		goto out_err;
250086b00e0dSAlex Elder 	}
250186b00e0dSAlex Elder 	parent_spec->image_id = image_id;
250286b00e0dSAlex Elder 	ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
250386b00e0dSAlex Elder 	ceph_decode_64_safe(&p, end, overlap, out_err);
250486b00e0dSAlex Elder 
250586b00e0dSAlex Elder 	rbd_dev->parent_overlap = overlap;
250686b00e0dSAlex Elder 	rbd_dev->parent_spec = parent_spec;
250786b00e0dSAlex Elder 	parent_spec = NULL;	/* rbd_dev now owns this */
250886b00e0dSAlex Elder out:
250986b00e0dSAlex Elder 	ret = 0;
251086b00e0dSAlex Elder out_err:
251186b00e0dSAlex Elder 	kfree(reply_buf);
251286b00e0dSAlex Elder 	rbd_spec_put(parent_spec);
251386b00e0dSAlex Elder 
251486b00e0dSAlex Elder 	return ret;
251586b00e0dSAlex Elder }
251686b00e0dSAlex Elder 
25176e14b1a6SAlex Elder static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
251835d489f9SAlex Elder {
251935d489f9SAlex Elder 	size_t size;
252035d489f9SAlex Elder 	int ret;
252135d489f9SAlex Elder 	void *reply_buf;
252235d489f9SAlex Elder 	void *p;
252335d489f9SAlex Elder 	void *end;
252435d489f9SAlex Elder 	u64 seq;
252535d489f9SAlex Elder 	u32 snap_count;
252635d489f9SAlex Elder 	struct ceph_snap_context *snapc;
252735d489f9SAlex Elder 	u32 i;
252835d489f9SAlex Elder 
252935d489f9SAlex Elder 	/*
253035d489f9SAlex Elder 	 * We'll need room for the seq value (maximum snapshot id),
253135d489f9SAlex Elder 	 * snapshot count, and array of that many snapshot ids.
253235d489f9SAlex Elder 	 * For now we have a fixed upper limit on the number we're
253335d489f9SAlex Elder 	 * prepared to receive.
253435d489f9SAlex Elder 	 */
253535d489f9SAlex Elder 	size = sizeof (__le64) + sizeof (__le32) +
253635d489f9SAlex Elder 			RBD_MAX_SNAP_COUNT * sizeof (__le64);
253735d489f9SAlex Elder 	reply_buf = kzalloc(size, GFP_KERNEL);
253835d489f9SAlex Elder 	if (!reply_buf)
253935d489f9SAlex Elder 		return -ENOMEM;
254035d489f9SAlex Elder 
254135d489f9SAlex Elder 	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
254235d489f9SAlex Elder 				"rbd", "get_snapcontext",
254335d489f9SAlex Elder 				NULL, 0,
254435d489f9SAlex Elder 				reply_buf, size,
25456e14b1a6SAlex Elder 				CEPH_OSD_FLAG_READ, ver);
254635d489f9SAlex Elder 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
254735d489f9SAlex Elder 	if (ret < 0)
254835d489f9SAlex Elder 		goto out;
254935d489f9SAlex Elder 
255035d489f9SAlex Elder 	ret = -ERANGE;
255135d489f9SAlex Elder 	p = reply_buf;
255235d489f9SAlex Elder 	end = (char *) reply_buf + size;
255335d489f9SAlex Elder 	ceph_decode_64_safe(&p, end, seq, out);
255435d489f9SAlex Elder 	ceph_decode_32_safe(&p, end, snap_count, out);
255535d489f9SAlex Elder 
255635d489f9SAlex Elder 	/*
255735d489f9SAlex Elder 	 * Make sure the reported number of snapshot ids wouldn't go
255835d489f9SAlex Elder 	 * beyond the end of our buffer.  But before checking that,
255935d489f9SAlex Elder 	 * make sure the computed size of the snapshot context we
256035d489f9SAlex Elder 	 * allocate is representable in a size_t.
256135d489f9SAlex Elder 	 */
256235d489f9SAlex Elder 	if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
256335d489f9SAlex Elder 				 / sizeof (u64)) {
256435d489f9SAlex Elder 		ret = -EINVAL;
256535d489f9SAlex Elder 		goto out;
256635d489f9SAlex Elder 	}
256735d489f9SAlex Elder 	if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
256835d489f9SAlex Elder 		goto out;
256935d489f9SAlex Elder 
257035d489f9SAlex Elder 	size = sizeof (struct ceph_snap_context) +
257135d489f9SAlex Elder 				snap_count * sizeof (snapc->snaps[0]);
257235d489f9SAlex Elder 	snapc = kmalloc(size, GFP_KERNEL);
257335d489f9SAlex Elder 	if (!snapc) {
257435d489f9SAlex Elder 		ret = -ENOMEM;
257535d489f9SAlex Elder 		goto out;
257635d489f9SAlex Elder 	}
257735d489f9SAlex Elder 
257835d489f9SAlex Elder 	atomic_set(&snapc->nref, 1);
257935d489f9SAlex Elder 	snapc->seq = seq;
258035d489f9SAlex Elder 	snapc->num_snaps = snap_count;
258135d489f9SAlex Elder 	for (i = 0; i < snap_count; i++)
258235d489f9SAlex Elder 		snapc->snaps[i] = ceph_decode_64(&p);
258335d489f9SAlex Elder 
258435d489f9SAlex Elder 	rbd_dev->header.snapc = snapc;
258535d489f9SAlex Elder 
258635d489f9SAlex Elder 	dout("  snap context seq = %llu, snap_count = %u\n",
258735d489f9SAlex Elder 		(unsigned long long) seq, (unsigned int) snap_count);
258835d489f9SAlex Elder 
258935d489f9SAlex Elder out:
259035d489f9SAlex Elder 	kfree(reply_buf);
259135d489f9SAlex Elder 
259235d489f9SAlex Elder 	return 0;
259335d489f9SAlex Elder }
259435d489f9SAlex Elder 
2595b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
2596b8b1e2dbSAlex Elder {
2597b8b1e2dbSAlex Elder 	size_t size;
2598b8b1e2dbSAlex Elder 	void *reply_buf;
2599b8b1e2dbSAlex Elder 	__le64 snap_id;
2600b8b1e2dbSAlex Elder 	int ret;
2601b8b1e2dbSAlex Elder 	void *p;
2602b8b1e2dbSAlex Elder 	void *end;
2603b8b1e2dbSAlex Elder 	char *snap_name;
2604b8b1e2dbSAlex Elder 
2605b8b1e2dbSAlex Elder 	size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
2606b8b1e2dbSAlex Elder 	reply_buf = kmalloc(size, GFP_KERNEL);
2607b8b1e2dbSAlex Elder 	if (!reply_buf)
2608b8b1e2dbSAlex Elder 		return ERR_PTR(-ENOMEM);
2609b8b1e2dbSAlex Elder 
2610b8b1e2dbSAlex Elder 	snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]);
2611b8b1e2dbSAlex Elder 	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
2612b8b1e2dbSAlex Elder 				"rbd", "get_snapshot_name",
2613b8b1e2dbSAlex Elder 				(char *) &snap_id, sizeof (snap_id),
2614b8b1e2dbSAlex Elder 				reply_buf, size,
2615b8b1e2dbSAlex Elder 				CEPH_OSD_FLAG_READ, NULL);
2616b8b1e2dbSAlex Elder 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2617b8b1e2dbSAlex Elder 	if (ret < 0)
2618b8b1e2dbSAlex Elder 		goto out;
2619b8b1e2dbSAlex Elder 
2620b8b1e2dbSAlex Elder 	p = reply_buf;
2621b8b1e2dbSAlex Elder 	end = (char *) reply_buf + size;
2622e5c35534SAlex Elder 	snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
2623b8b1e2dbSAlex Elder 	if (IS_ERR(snap_name)) {
2624b8b1e2dbSAlex Elder 		ret = PTR_ERR(snap_name);
2625b8b1e2dbSAlex Elder 		goto out;
2626b8b1e2dbSAlex Elder 	} else {
2627b8b1e2dbSAlex Elder 		dout("  snap_id 0x%016llx snap_name = %s\n",
2628b8b1e2dbSAlex Elder 			(unsigned long long) le64_to_cpu(snap_id), snap_name);
2629b8b1e2dbSAlex Elder 	}
2630b8b1e2dbSAlex Elder 	kfree(reply_buf);
2631b8b1e2dbSAlex Elder 
2632b8b1e2dbSAlex Elder 	return snap_name;
2633b8b1e2dbSAlex Elder out:
2634b8b1e2dbSAlex Elder 	kfree(reply_buf);
2635b8b1e2dbSAlex Elder 
2636b8b1e2dbSAlex Elder 	return ERR_PTR(ret);
2637b8b1e2dbSAlex Elder }
2638b8b1e2dbSAlex Elder 
2639b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which,
2640b8b1e2dbSAlex Elder 		u64 *snap_size, u64 *snap_features)
2641b8b1e2dbSAlex Elder {
2642b8b1e2dbSAlex Elder 	__le64 snap_id;
2643b8b1e2dbSAlex Elder 	u8 order;
2644b8b1e2dbSAlex Elder 	int ret;
2645b8b1e2dbSAlex Elder 
2646b8b1e2dbSAlex Elder 	snap_id = rbd_dev->header.snapc->snaps[which];
2647b8b1e2dbSAlex Elder 	ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size);
2648b8b1e2dbSAlex Elder 	if (ret)
2649b8b1e2dbSAlex Elder 		return ERR_PTR(ret);
2650b8b1e2dbSAlex Elder 	ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features);
2651b8b1e2dbSAlex Elder 	if (ret)
2652b8b1e2dbSAlex Elder 		return ERR_PTR(ret);
2653b8b1e2dbSAlex Elder 
2654b8b1e2dbSAlex Elder 	return rbd_dev_v2_snap_name(rbd_dev, which);
2655b8b1e2dbSAlex Elder }
2656b8b1e2dbSAlex Elder 
2657b8b1e2dbSAlex Elder static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which,
2658b8b1e2dbSAlex Elder 		u64 *snap_size, u64 *snap_features)
2659b8b1e2dbSAlex Elder {
2660b8b1e2dbSAlex Elder 	if (rbd_dev->image_format == 1)
2661b8b1e2dbSAlex Elder 		return rbd_dev_v1_snap_info(rbd_dev, which,
2662b8b1e2dbSAlex Elder 					snap_size, snap_features);
2663b8b1e2dbSAlex Elder 	if (rbd_dev->image_format == 2)
2664b8b1e2dbSAlex Elder 		return rbd_dev_v2_snap_info(rbd_dev, which,
2665b8b1e2dbSAlex Elder 					snap_size, snap_features);
2666b8b1e2dbSAlex Elder 	return ERR_PTR(-EINVAL);
2667b8b1e2dbSAlex Elder }
2668b8b1e2dbSAlex Elder 
2669117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver)
2670117973fbSAlex Elder {
2671117973fbSAlex Elder 	int ret;
2672117973fbSAlex Elder 	__u8 obj_order;
2673117973fbSAlex Elder 
2674117973fbSAlex Elder 	down_write(&rbd_dev->header_rwsem);
2675117973fbSAlex Elder 
2676117973fbSAlex Elder 	/* Grab old order first, to see if it changes */
2677117973fbSAlex Elder 
2678117973fbSAlex Elder 	obj_order = rbd_dev->header.obj_order,
2679117973fbSAlex Elder 	ret = rbd_dev_v2_image_size(rbd_dev);
2680117973fbSAlex Elder 	if (ret)
2681117973fbSAlex Elder 		goto out;
2682117973fbSAlex Elder 	if (rbd_dev->header.obj_order != obj_order) {
2683117973fbSAlex Elder 		ret = -EIO;
2684117973fbSAlex Elder 		goto out;
2685117973fbSAlex Elder 	}
2686117973fbSAlex Elder 	rbd_update_mapping_size(rbd_dev);
2687117973fbSAlex Elder 
2688117973fbSAlex Elder 	ret = rbd_dev_v2_snap_context(rbd_dev, hver);
2689117973fbSAlex Elder 	dout("rbd_dev_v2_snap_context returned %d\n", ret);
2690117973fbSAlex Elder 	if (ret)
2691117973fbSAlex Elder 		goto out;
2692117973fbSAlex Elder 	ret = rbd_dev_snaps_update(rbd_dev);
2693117973fbSAlex Elder 	dout("rbd_dev_snaps_update returned %d\n", ret);
2694117973fbSAlex Elder 	if (ret)
2695117973fbSAlex Elder 		goto out;
2696117973fbSAlex Elder 	ret = rbd_dev_snaps_register(rbd_dev);
2697117973fbSAlex Elder 	dout("rbd_dev_snaps_register returned %d\n", ret);
2698117973fbSAlex Elder out:
2699117973fbSAlex Elder 	up_write(&rbd_dev->header_rwsem);
2700117973fbSAlex Elder 
2701117973fbSAlex Elder 	return ret;
2702117973fbSAlex Elder }
2703117973fbSAlex Elder 
27049d475de5SAlex Elder /*
270535938150SAlex Elder  * Scan the rbd device's current snapshot list and compare it to the
270635938150SAlex Elder  * newly-received snapshot context.  Remove any existing snapshots
270735938150SAlex Elder  * not present in the new snapshot context.  Add a new snapshot for
270835938150SAlex Elder  * any snaphots in the snapshot context not in the current list.
270935938150SAlex Elder  * And verify there are no changes to snapshots we already know
271035938150SAlex Elder  * about.
271135938150SAlex Elder  *
271235938150SAlex Elder  * Assumes the snapshots in the snapshot context are sorted by
271335938150SAlex Elder  * snapshot id, highest id first.  (Snapshots in the rbd_dev's list
271435938150SAlex Elder  * are also maintained in that order.)
2715dfc5606dSYehuda Sadeh  */
2716304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
2717dfc5606dSYehuda Sadeh {
271835938150SAlex Elder 	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
271935938150SAlex Elder 	const u32 snap_count = snapc->num_snaps;
272035938150SAlex Elder 	struct list_head *head = &rbd_dev->snaps;
272135938150SAlex Elder 	struct list_head *links = head->next;
272235938150SAlex Elder 	u32 index = 0;
2723dfc5606dSYehuda Sadeh 
27249fcbb800SAlex Elder 	dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count);
272535938150SAlex Elder 	while (index < snap_count || links != head) {
272635938150SAlex Elder 		u64 snap_id;
272735938150SAlex Elder 		struct rbd_snap *snap;
2728cd892126SAlex Elder 		char *snap_name;
2729cd892126SAlex Elder 		u64 snap_size = 0;
2730cd892126SAlex Elder 		u64 snap_features = 0;
2731dfc5606dSYehuda Sadeh 
273235938150SAlex Elder 		snap_id = index < snap_count ? snapc->snaps[index]
273335938150SAlex Elder 					     : CEPH_NOSNAP;
273435938150SAlex Elder 		snap = links != head ? list_entry(links, struct rbd_snap, node)
273535938150SAlex Elder 				     : NULL;
2736aafb230eSAlex Elder 		rbd_assert(!snap || snap->id != CEPH_NOSNAP);
2737dfc5606dSYehuda Sadeh 
273835938150SAlex Elder 		if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) {
273935938150SAlex Elder 			struct list_head *next = links->next;
2740dfc5606dSYehuda Sadeh 
274135938150SAlex Elder 			/* Existing snapshot not in the new snap context */
2742dfc5606dSYehuda Sadeh 
27430d7dbfceSAlex Elder 			if (rbd_dev->spec->snap_id == snap->id)
2744daba5fdbSAlex Elder 				rbd_dev->exists = false;
274541f38c2bSAlex Elder 			rbd_remove_snap_dev(snap);
27469fcbb800SAlex Elder 			dout("%ssnap id %llu has been removed\n",
27470d7dbfceSAlex Elder 				rbd_dev->spec->snap_id == snap->id ?
27480d7dbfceSAlex Elder 							"mapped " : "",
27499fcbb800SAlex Elder 				(unsigned long long) snap->id);
2750dfc5606dSYehuda Sadeh 
275135938150SAlex Elder 			/* Done with this list entry; advance */
275235938150SAlex Elder 
275335938150SAlex Elder 			links = next;
275435938150SAlex Elder 			continue;
2755dfc5606dSYehuda Sadeh 		}
275635938150SAlex Elder 
2757b8b1e2dbSAlex Elder 		snap_name = rbd_dev_snap_info(rbd_dev, index,
2758cd892126SAlex Elder 					&snap_size, &snap_features);
2759cd892126SAlex Elder 		if (IS_ERR(snap_name))
2760cd892126SAlex Elder 			return PTR_ERR(snap_name);
2761cd892126SAlex Elder 
27629fcbb800SAlex Elder 		dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count,
27639fcbb800SAlex Elder 			(unsigned long long) snap_id);
276435938150SAlex Elder 		if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) {
276535938150SAlex Elder 			struct rbd_snap *new_snap;
276635938150SAlex Elder 
276735938150SAlex Elder 			/* We haven't seen this snapshot before */
276835938150SAlex Elder 
2769c8d18425SAlex Elder 			new_snap = __rbd_add_snap_dev(rbd_dev, snap_name,
2770cd892126SAlex Elder 					snap_id, snap_size, snap_features);
27719fcbb800SAlex Elder 			if (IS_ERR(new_snap)) {
27729fcbb800SAlex Elder 				int err = PTR_ERR(new_snap);
27739fcbb800SAlex Elder 
27749fcbb800SAlex Elder 				dout("  failed to add dev, error %d\n", err);
27759fcbb800SAlex Elder 
27769fcbb800SAlex Elder 				return err;
27779fcbb800SAlex Elder 			}
277835938150SAlex Elder 
277935938150SAlex Elder 			/* New goes before existing, or at end of list */
278035938150SAlex Elder 
27819fcbb800SAlex Elder 			dout("  added dev%s\n", snap ? "" : " at end\n");
278235938150SAlex Elder 			if (snap)
278335938150SAlex Elder 				list_add_tail(&new_snap->node, &snap->node);
278435938150SAlex Elder 			else
2785523f3258SAlex Elder 				list_add_tail(&new_snap->node, head);
278635938150SAlex Elder 		} else {
278735938150SAlex Elder 			/* Already have this one */
278835938150SAlex Elder 
27899fcbb800SAlex Elder 			dout("  already present\n");
27909fcbb800SAlex Elder 
2791cd892126SAlex Elder 			rbd_assert(snap->size == snap_size);
2792aafb230eSAlex Elder 			rbd_assert(!strcmp(snap->name, snap_name));
2793cd892126SAlex Elder 			rbd_assert(snap->features == snap_features);
279435938150SAlex Elder 
279535938150SAlex Elder 			/* Done with this list entry; advance */
279635938150SAlex Elder 
279735938150SAlex Elder 			links = links->next;
2798dfc5606dSYehuda Sadeh 		}
279935938150SAlex Elder 
280035938150SAlex Elder 		/* Advance to the next entry in the snapshot context */
280135938150SAlex Elder 
280235938150SAlex Elder 		index++;
2803dfc5606dSYehuda Sadeh 	}
28049fcbb800SAlex Elder 	dout("%s: done\n", __func__);
2805dfc5606dSYehuda Sadeh 
2806dfc5606dSYehuda Sadeh 	return 0;
2807dfc5606dSYehuda Sadeh }
2808dfc5606dSYehuda Sadeh 
2809304f6808SAlex Elder /*
2810304f6808SAlex Elder  * Scan the list of snapshots and register the devices for any that
2811304f6808SAlex Elder  * have not already been registered.
2812304f6808SAlex Elder  */
2813304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev)
2814304f6808SAlex Elder {
2815304f6808SAlex Elder 	struct rbd_snap *snap;
2816304f6808SAlex Elder 	int ret = 0;
2817304f6808SAlex Elder 
2818304f6808SAlex Elder 	dout("%s called\n", __func__);
281986ff77bbSAlex Elder 	if (WARN_ON(!device_is_registered(&rbd_dev->dev)))
282086ff77bbSAlex Elder 		return -EIO;
2821304f6808SAlex Elder 
2822304f6808SAlex Elder 	list_for_each_entry(snap, &rbd_dev->snaps, node) {
2823304f6808SAlex Elder 		if (!rbd_snap_registered(snap)) {
2824304f6808SAlex Elder 			ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
2825304f6808SAlex Elder 			if (ret < 0)
2826304f6808SAlex Elder 				break;
2827304f6808SAlex Elder 		}
2828304f6808SAlex Elder 	}
2829304f6808SAlex Elder 	dout("%s: returning %d\n", __func__, ret);
2830304f6808SAlex Elder 
2831304f6808SAlex Elder 	return ret;
2832304f6808SAlex Elder }
2833304f6808SAlex Elder 
2834dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2835dfc5606dSYehuda Sadeh {
2836dfc5606dSYehuda Sadeh 	struct device *dev;
2837cd789ab9SAlex Elder 	int ret;
2838dfc5606dSYehuda Sadeh 
2839dfc5606dSYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2840dfc5606dSYehuda Sadeh 
2841cd789ab9SAlex Elder 	dev = &rbd_dev->dev;
2842dfc5606dSYehuda Sadeh 	dev->bus = &rbd_bus_type;
2843dfc5606dSYehuda Sadeh 	dev->type = &rbd_device_type;
2844dfc5606dSYehuda Sadeh 	dev->parent = &rbd_root_dev;
2845dfc5606dSYehuda Sadeh 	dev->release = rbd_dev_release;
2846de71a297SAlex Elder 	dev_set_name(dev, "%d", rbd_dev->dev_id);
2847dfc5606dSYehuda Sadeh 	ret = device_register(dev);
2848dfc5606dSYehuda Sadeh 
2849dfc5606dSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
2850cd789ab9SAlex Elder 
2851dfc5606dSYehuda Sadeh 	return ret;
2852602adf40SYehuda Sadeh }
2853602adf40SYehuda Sadeh 
2854dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
2855dfc5606dSYehuda Sadeh {
2856dfc5606dSYehuda Sadeh 	device_unregister(&rbd_dev->dev);
2857dfc5606dSYehuda Sadeh }
2858dfc5606dSYehuda Sadeh 
285959c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
286059c2be1eSYehuda Sadeh {
286159c2be1eSYehuda Sadeh 	int ret, rc;
286259c2be1eSYehuda Sadeh 
286359c2be1eSYehuda Sadeh 	do {
28640e6f322dSAlex Elder 		ret = rbd_req_sync_watch(rbd_dev);
286559c2be1eSYehuda Sadeh 		if (ret == -ERANGE) {
2866117973fbSAlex Elder 			rc = rbd_dev_refresh(rbd_dev, NULL);
286759c2be1eSYehuda Sadeh 			if (rc < 0)
286859c2be1eSYehuda Sadeh 				return rc;
286959c2be1eSYehuda Sadeh 		}
287059c2be1eSYehuda Sadeh 	} while (ret == -ERANGE);
287159c2be1eSYehuda Sadeh 
287259c2be1eSYehuda Sadeh 	return ret;
287359c2be1eSYehuda Sadeh }
287459c2be1eSYehuda Sadeh 
2875e2839308SAlex Elder static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0);
28761ddbe94eSAlex Elder 
28771ddbe94eSAlex Elder /*
2878499afd5bSAlex Elder  * Get a unique rbd identifier for the given new rbd_dev, and add
2879499afd5bSAlex Elder  * the rbd_dev to the global list.  The minimum rbd id is 1.
28801ddbe94eSAlex Elder  */
2881e2839308SAlex Elder static void rbd_dev_id_get(struct rbd_device *rbd_dev)
2882b7f23c36SAlex Elder {
2883e2839308SAlex Elder 	rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max);
2884499afd5bSAlex Elder 
2885499afd5bSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2886499afd5bSAlex Elder 	list_add_tail(&rbd_dev->node, &rbd_dev_list);
2887499afd5bSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
2888e2839308SAlex Elder 	dout("rbd_dev %p given dev id %llu\n", rbd_dev,
2889e2839308SAlex Elder 		(unsigned long long) rbd_dev->dev_id);
2890b7f23c36SAlex Elder }
2891b7f23c36SAlex Elder 
28921ddbe94eSAlex Elder /*
2893499afd5bSAlex Elder  * Remove an rbd_dev from the global list, and record that its
2894499afd5bSAlex Elder  * identifier is no longer in use.
28951ddbe94eSAlex Elder  */
2896e2839308SAlex Elder static void rbd_dev_id_put(struct rbd_device *rbd_dev)
28971ddbe94eSAlex Elder {
2898d184f6bfSAlex Elder 	struct list_head *tmp;
2899de71a297SAlex Elder 	int rbd_id = rbd_dev->dev_id;
2900d184f6bfSAlex Elder 	int max_id;
2901d184f6bfSAlex Elder 
2902aafb230eSAlex Elder 	rbd_assert(rbd_id > 0);
2903499afd5bSAlex Elder 
2904e2839308SAlex Elder 	dout("rbd_dev %p released dev id %llu\n", rbd_dev,
2905e2839308SAlex Elder 		(unsigned long long) rbd_dev->dev_id);
2906499afd5bSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2907499afd5bSAlex Elder 	list_del_init(&rbd_dev->node);
2908d184f6bfSAlex Elder 
2909d184f6bfSAlex Elder 	/*
2910d184f6bfSAlex Elder 	 * If the id being "put" is not the current maximum, there
2911d184f6bfSAlex Elder 	 * is nothing special we need to do.
2912d184f6bfSAlex Elder 	 */
2913e2839308SAlex Elder 	if (rbd_id != atomic64_read(&rbd_dev_id_max)) {
2914d184f6bfSAlex Elder 		spin_unlock(&rbd_dev_list_lock);
2915d184f6bfSAlex Elder 		return;
2916d184f6bfSAlex Elder 	}
2917d184f6bfSAlex Elder 
2918d184f6bfSAlex Elder 	/*
2919d184f6bfSAlex Elder 	 * We need to update the current maximum id.  Search the
2920d184f6bfSAlex Elder 	 * list to find out what it is.  We're more likely to find
2921d184f6bfSAlex Elder 	 * the maximum at the end, so search the list backward.
2922d184f6bfSAlex Elder 	 */
2923d184f6bfSAlex Elder 	max_id = 0;
2924d184f6bfSAlex Elder 	list_for_each_prev(tmp, &rbd_dev_list) {
2925d184f6bfSAlex Elder 		struct rbd_device *rbd_dev;
2926d184f6bfSAlex Elder 
2927d184f6bfSAlex Elder 		rbd_dev = list_entry(tmp, struct rbd_device, node);
2928b213e0b1SAlex Elder 		if (rbd_dev->dev_id > max_id)
2929b213e0b1SAlex Elder 			max_id = rbd_dev->dev_id;
2930d184f6bfSAlex Elder 	}
2931499afd5bSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
29321ddbe94eSAlex Elder 
29331ddbe94eSAlex Elder 	/*
2934e2839308SAlex Elder 	 * The max id could have been updated by rbd_dev_id_get(), in
2935d184f6bfSAlex Elder 	 * which case it now accurately reflects the new maximum.
2936d184f6bfSAlex Elder 	 * Be careful not to overwrite the maximum value in that
2937d184f6bfSAlex Elder 	 * case.
29381ddbe94eSAlex Elder 	 */
2939e2839308SAlex Elder 	atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id);
2940e2839308SAlex Elder 	dout("  max dev id has been reset\n");
2941b7f23c36SAlex Elder }
2942b7f23c36SAlex Elder 
2943a725f65eSAlex Elder /*
2944e28fff26SAlex Elder  * Skips over white space at *buf, and updates *buf to point to the
2945e28fff26SAlex Elder  * first found non-space character (if any). Returns the length of
2946593a9e7bSAlex Elder  * the token (string of non-white space characters) found.  Note
2947593a9e7bSAlex Elder  * that *buf must be terminated with '\0'.
2948e28fff26SAlex Elder  */
2949e28fff26SAlex Elder static inline size_t next_token(const char **buf)
2950e28fff26SAlex Elder {
2951e28fff26SAlex Elder         /*
2952e28fff26SAlex Elder         * These are the characters that produce nonzero for
2953e28fff26SAlex Elder         * isspace() in the "C" and "POSIX" locales.
2954e28fff26SAlex Elder         */
2955e28fff26SAlex Elder         const char *spaces = " \f\n\r\t\v";
2956e28fff26SAlex Elder 
2957e28fff26SAlex Elder         *buf += strspn(*buf, spaces);	/* Find start of token */
2958e28fff26SAlex Elder 
2959e28fff26SAlex Elder 	return strcspn(*buf, spaces);   /* Return token length */
2960e28fff26SAlex Elder }
2961e28fff26SAlex Elder 
2962e28fff26SAlex Elder /*
2963e28fff26SAlex Elder  * Finds the next token in *buf, and if the provided token buffer is
2964e28fff26SAlex Elder  * big enough, copies the found token into it.  The result, if
2965593a9e7bSAlex Elder  * copied, is guaranteed to be terminated with '\0'.  Note that *buf
2966593a9e7bSAlex Elder  * must be terminated with '\0' on entry.
2967e28fff26SAlex Elder  *
2968e28fff26SAlex Elder  * Returns the length of the token found (not including the '\0').
2969e28fff26SAlex Elder  * Return value will be 0 if no token is found, and it will be >=
2970e28fff26SAlex Elder  * token_size if the token would not fit.
2971e28fff26SAlex Elder  *
2972593a9e7bSAlex Elder  * The *buf pointer will be updated to point beyond the end of the
2973e28fff26SAlex Elder  * found token.  Note that this occurs even if the token buffer is
2974e28fff26SAlex Elder  * too small to hold it.
2975e28fff26SAlex Elder  */
2976e28fff26SAlex Elder static inline size_t copy_token(const char **buf,
2977e28fff26SAlex Elder 				char *token,
2978e28fff26SAlex Elder 				size_t token_size)
2979e28fff26SAlex Elder {
2980e28fff26SAlex Elder         size_t len;
2981e28fff26SAlex Elder 
2982e28fff26SAlex Elder 	len = next_token(buf);
2983e28fff26SAlex Elder 	if (len < token_size) {
2984e28fff26SAlex Elder 		memcpy(token, *buf, len);
2985e28fff26SAlex Elder 		*(token + len) = '\0';
2986e28fff26SAlex Elder 	}
2987e28fff26SAlex Elder 	*buf += len;
2988e28fff26SAlex Elder 
2989e28fff26SAlex Elder         return len;
2990e28fff26SAlex Elder }
2991e28fff26SAlex Elder 
2992e28fff26SAlex Elder /*
2993ea3352f4SAlex Elder  * Finds the next token in *buf, dynamically allocates a buffer big
2994ea3352f4SAlex Elder  * enough to hold a copy of it, and copies the token into the new
2995ea3352f4SAlex Elder  * buffer.  The copy is guaranteed to be terminated with '\0'.  Note
2996ea3352f4SAlex Elder  * that a duplicate buffer is created even for a zero-length token.
2997ea3352f4SAlex Elder  *
2998ea3352f4SAlex Elder  * Returns a pointer to the newly-allocated duplicate, or a null
2999ea3352f4SAlex Elder  * pointer if memory for the duplicate was not available.  If
3000ea3352f4SAlex Elder  * the lenp argument is a non-null pointer, the length of the token
3001ea3352f4SAlex Elder  * (not including the '\0') is returned in *lenp.
3002ea3352f4SAlex Elder  *
3003ea3352f4SAlex Elder  * If successful, the *buf pointer will be updated to point beyond
3004ea3352f4SAlex Elder  * the end of the found token.
3005ea3352f4SAlex Elder  *
3006ea3352f4SAlex Elder  * Note: uses GFP_KERNEL for allocation.
3007ea3352f4SAlex Elder  */
3008ea3352f4SAlex Elder static inline char *dup_token(const char **buf, size_t *lenp)
3009ea3352f4SAlex Elder {
3010ea3352f4SAlex Elder 	char *dup;
3011ea3352f4SAlex Elder 	size_t len;
3012ea3352f4SAlex Elder 
3013ea3352f4SAlex Elder 	len = next_token(buf);
3014ea3352f4SAlex Elder 	dup = kmalloc(len + 1, GFP_KERNEL);
3015ea3352f4SAlex Elder 	if (!dup)
3016ea3352f4SAlex Elder 		return NULL;
3017ea3352f4SAlex Elder 
3018ea3352f4SAlex Elder 	memcpy(dup, *buf, len);
3019ea3352f4SAlex Elder 	*(dup + len) = '\0';
3020ea3352f4SAlex Elder 	*buf += len;
3021ea3352f4SAlex Elder 
3022ea3352f4SAlex Elder 	if (lenp)
3023ea3352f4SAlex Elder 		*lenp = len;
3024ea3352f4SAlex Elder 
3025ea3352f4SAlex Elder 	return dup;
3026ea3352f4SAlex Elder }
3027ea3352f4SAlex Elder 
3028ea3352f4SAlex Elder /*
3029859c31dfSAlex Elder  * Parse the options provided for an "rbd add" (i.e., rbd image
3030859c31dfSAlex Elder  * mapping) request.  These arrive via a write to /sys/bus/rbd/add,
3031859c31dfSAlex Elder  * and the data written is passed here via a NUL-terminated buffer.
3032859c31dfSAlex Elder  * Returns 0 if successful or an error code otherwise.
3033d22f76e7SAlex Elder  *
3034859c31dfSAlex Elder  * The information extracted from these options is recorded in
3035859c31dfSAlex Elder  * the other parameters which return dynamically-allocated
3036859c31dfSAlex Elder  * structures:
3037859c31dfSAlex Elder  *  ceph_opts
3038859c31dfSAlex Elder  *      The address of a pointer that will refer to a ceph options
3039859c31dfSAlex Elder  *      structure.  Caller must release the returned pointer using
3040859c31dfSAlex Elder  *      ceph_destroy_options() when it is no longer needed.
3041859c31dfSAlex Elder  *  rbd_opts
3042859c31dfSAlex Elder  *	Address of an rbd options pointer.  Fully initialized by
3043859c31dfSAlex Elder  *	this function; caller must release with kfree().
3044859c31dfSAlex Elder  *  spec
3045859c31dfSAlex Elder  *	Address of an rbd image specification pointer.  Fully
3046859c31dfSAlex Elder  *	initialized by this function based on parsed options.
3047859c31dfSAlex Elder  *	Caller must release with rbd_spec_put().
3048859c31dfSAlex Elder  *
3049859c31dfSAlex Elder  * The options passed take this form:
3050859c31dfSAlex Elder  *  <mon_addrs> <options> <pool_name> <image_name> [<snap_id>]
3051859c31dfSAlex Elder  * where:
3052859c31dfSAlex Elder  *  <mon_addrs>
3053859c31dfSAlex Elder  *      A comma-separated list of one or more monitor addresses.
3054859c31dfSAlex Elder  *      A monitor address is an ip address, optionally followed
3055859c31dfSAlex Elder  *      by a port number (separated by a colon).
3056859c31dfSAlex Elder  *        I.e.:  ip1[:port1][,ip2[:port2]...]
3057859c31dfSAlex Elder  *  <options>
3058859c31dfSAlex Elder  *      A comma-separated list of ceph and/or rbd options.
3059859c31dfSAlex Elder  *  <pool_name>
3060859c31dfSAlex Elder  *      The name of the rados pool containing the rbd image.
3061859c31dfSAlex Elder  *  <image_name>
3062859c31dfSAlex Elder  *      The name of the image in that pool to map.
3063859c31dfSAlex Elder  *  <snap_id>
3064859c31dfSAlex Elder  *      An optional snapshot id.  If provided, the mapping will
3065859c31dfSAlex Elder  *      present data from the image at the time that snapshot was
3066859c31dfSAlex Elder  *      created.  The image head is used if no snapshot id is
3067859c31dfSAlex Elder  *      provided.  Snapshot mappings are always read-only.
3068a725f65eSAlex Elder  */
3069859c31dfSAlex Elder static int rbd_add_parse_args(const char *buf,
3070dc79b113SAlex Elder 				struct ceph_options **ceph_opts,
3071859c31dfSAlex Elder 				struct rbd_options **opts,
3072859c31dfSAlex Elder 				struct rbd_spec **rbd_spec)
3073a725f65eSAlex Elder {
3074e28fff26SAlex Elder 	size_t len;
3075859c31dfSAlex Elder 	char *options;
30760ddebc0cSAlex Elder 	const char *mon_addrs;
30770ddebc0cSAlex Elder 	size_t mon_addrs_size;
3078859c31dfSAlex Elder 	struct rbd_spec *spec = NULL;
30794e9afebaSAlex Elder 	struct rbd_options *rbd_opts = NULL;
3080859c31dfSAlex Elder 	struct ceph_options *copts;
3081dc79b113SAlex Elder 	int ret;
3082e28fff26SAlex Elder 
3083e28fff26SAlex Elder 	/* The first four tokens are required */
3084e28fff26SAlex Elder 
30857ef3214aSAlex Elder 	len = next_token(&buf);
30867ef3214aSAlex Elder 	if (!len)
3087dc79b113SAlex Elder 		return -EINVAL;	/* Missing monitor address(es) */
30880ddebc0cSAlex Elder 	mon_addrs = buf;
3089f28e565aSAlex Elder 	mon_addrs_size = len + 1;
30907ef3214aSAlex Elder 	buf += len;
3091a725f65eSAlex Elder 
3092dc79b113SAlex Elder 	ret = -EINVAL;
3093f28e565aSAlex Elder 	options = dup_token(&buf, NULL);
3094f28e565aSAlex Elder 	if (!options)
3095dc79b113SAlex Elder 		return -ENOMEM;
3096f28e565aSAlex Elder 	if (!*options)
3097f28e565aSAlex Elder 		goto out_err;	/* Missing options */
3098a725f65eSAlex Elder 
3099859c31dfSAlex Elder 	spec = rbd_spec_alloc();
3100859c31dfSAlex Elder 	if (!spec)
3101f28e565aSAlex Elder 		goto out_mem;
3102859c31dfSAlex Elder 
3103859c31dfSAlex Elder 	spec->pool_name = dup_token(&buf, NULL);
3104859c31dfSAlex Elder 	if (!spec->pool_name)
3105859c31dfSAlex Elder 		goto out_mem;
3106859c31dfSAlex Elder 	if (!*spec->pool_name)
3107f28e565aSAlex Elder 		goto out_err;	/* Missing pool name */
3108e28fff26SAlex Elder 
3109859c31dfSAlex Elder 	spec->image_name = dup_token(&buf, &spec->image_name_len);
3110859c31dfSAlex Elder 	if (!spec->image_name)
3111f28e565aSAlex Elder 		goto out_mem;
3112859c31dfSAlex Elder 	if (!*spec->image_name)
3113f28e565aSAlex Elder 		goto out_err;	/* Missing image name */
3114e28fff26SAlex Elder 
3115f28e565aSAlex Elder 	/*
3116f28e565aSAlex Elder 	 * Snapshot name is optional; default is to use "-"
3117f28e565aSAlex Elder 	 * (indicating the head/no snapshot).
3118f28e565aSAlex Elder 	 */
31193feeb894SAlex Elder 	len = next_token(&buf);
3120820a5f3eSAlex Elder 	if (!len) {
31213feeb894SAlex Elder 		buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
31223feeb894SAlex Elder 		len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
3123f28e565aSAlex Elder 	} else if (len > RBD_MAX_SNAP_NAME_LEN) {
3124dc79b113SAlex Elder 		ret = -ENAMETOOLONG;
3125f28e565aSAlex Elder 		goto out_err;
3126849b4260SAlex Elder 	}
3127859c31dfSAlex Elder 	spec->snap_name = kmalloc(len + 1, GFP_KERNEL);
3128859c31dfSAlex Elder 	if (!spec->snap_name)
3129f28e565aSAlex Elder 		goto out_mem;
3130859c31dfSAlex Elder 	memcpy(spec->snap_name, buf, len);
3131859c31dfSAlex Elder 	*(spec->snap_name + len) = '\0';
3132e5c35534SAlex Elder 
31330ddebc0cSAlex Elder 	/* Initialize all rbd options to the defaults */
3134e28fff26SAlex Elder 
31354e9afebaSAlex Elder 	rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
31364e9afebaSAlex Elder 	if (!rbd_opts)
31374e9afebaSAlex Elder 		goto out_mem;
31384e9afebaSAlex Elder 
31394e9afebaSAlex Elder 	rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
3140d22f76e7SAlex Elder 
3141859c31dfSAlex Elder 	copts = ceph_parse_options(options, mon_addrs,
31420ddebc0cSAlex Elder 					mon_addrs + mon_addrs_size - 1,
31434e9afebaSAlex Elder 					parse_rbd_opts_token, rbd_opts);
3144859c31dfSAlex Elder 	if (IS_ERR(copts)) {
3145859c31dfSAlex Elder 		ret = PTR_ERR(copts);
3146dc79b113SAlex Elder 		goto out_err;
3147dc79b113SAlex Elder 	}
3148859c31dfSAlex Elder 	kfree(options);
3149859c31dfSAlex Elder 
3150859c31dfSAlex Elder 	*ceph_opts = copts;
31514e9afebaSAlex Elder 	*opts = rbd_opts;
3152859c31dfSAlex Elder 	*rbd_spec = spec;
31530ddebc0cSAlex Elder 
3154dc79b113SAlex Elder 	return 0;
3155f28e565aSAlex Elder out_mem:
3156dc79b113SAlex Elder 	ret = -ENOMEM;
3157d22f76e7SAlex Elder out_err:
3158859c31dfSAlex Elder 	kfree(rbd_opts);
3159859c31dfSAlex Elder 	rbd_spec_put(spec);
3160f28e565aSAlex Elder 	kfree(options);
3161d22f76e7SAlex Elder 
3162dc79b113SAlex Elder 	return ret;
3163a725f65eSAlex Elder }
3164a725f65eSAlex Elder 
3165589d30e0SAlex Elder /*
3166589d30e0SAlex Elder  * An rbd format 2 image has a unique identifier, distinct from the
3167589d30e0SAlex Elder  * name given to it by the user.  Internally, that identifier is
3168589d30e0SAlex Elder  * what's used to specify the names of objects related to the image.
3169589d30e0SAlex Elder  *
3170589d30e0SAlex Elder  * A special "rbd id" object is used to map an rbd image name to its
3171589d30e0SAlex Elder  * id.  If that object doesn't exist, then there is no v2 rbd image
3172589d30e0SAlex Elder  * with the supplied name.
3173589d30e0SAlex Elder  *
3174589d30e0SAlex Elder  * This function will record the given rbd_dev's image_id field if
3175589d30e0SAlex Elder  * it can be determined, and in that case will return 0.  If any
3176589d30e0SAlex Elder  * errors occur a negative errno will be returned and the rbd_dev's
3177589d30e0SAlex Elder  * image_id field will be unchanged (and should be NULL).
3178589d30e0SAlex Elder  */
3179589d30e0SAlex Elder static int rbd_dev_image_id(struct rbd_device *rbd_dev)
3180589d30e0SAlex Elder {
3181589d30e0SAlex Elder 	int ret;
3182589d30e0SAlex Elder 	size_t size;
3183589d30e0SAlex Elder 	char *object_name;
3184589d30e0SAlex Elder 	void *response;
3185589d30e0SAlex Elder 	void *p;
3186589d30e0SAlex Elder 
3187589d30e0SAlex Elder 	/*
31882c0d0a10SAlex Elder 	 * When probing a parent image, the image id is already
31892c0d0a10SAlex Elder 	 * known (and the image name likely is not).  There's no
31902c0d0a10SAlex Elder 	 * need to fetch the image id again in this case.
31912c0d0a10SAlex Elder 	 */
31922c0d0a10SAlex Elder 	if (rbd_dev->spec->image_id)
31932c0d0a10SAlex Elder 		return 0;
31942c0d0a10SAlex Elder 
31952c0d0a10SAlex Elder 	/*
3196589d30e0SAlex Elder 	 * First, see if the format 2 image id file exists, and if
3197589d30e0SAlex Elder 	 * so, get the image's persistent id from it.
3198589d30e0SAlex Elder 	 */
31990d7dbfceSAlex Elder 	size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len;
3200589d30e0SAlex Elder 	object_name = kmalloc(size, GFP_NOIO);
3201589d30e0SAlex Elder 	if (!object_name)
3202589d30e0SAlex Elder 		return -ENOMEM;
32030d7dbfceSAlex Elder 	sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name);
3204589d30e0SAlex Elder 	dout("rbd id object name is %s\n", object_name);
3205589d30e0SAlex Elder 
3206589d30e0SAlex Elder 	/* Response will be an encoded string, which includes a length */
3207589d30e0SAlex Elder 
3208589d30e0SAlex Elder 	size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
3209589d30e0SAlex Elder 	response = kzalloc(size, GFP_NOIO);
3210589d30e0SAlex Elder 	if (!response) {
3211589d30e0SAlex Elder 		ret = -ENOMEM;
3212589d30e0SAlex Elder 		goto out;
3213589d30e0SAlex Elder 	}
3214589d30e0SAlex Elder 
3215589d30e0SAlex Elder 	ret = rbd_req_sync_exec(rbd_dev, object_name,
3216589d30e0SAlex Elder 				"rbd", "get_id",
3217589d30e0SAlex Elder 				NULL, 0,
3218589d30e0SAlex Elder 				response, RBD_IMAGE_ID_LEN_MAX,
3219589d30e0SAlex Elder 				CEPH_OSD_FLAG_READ, NULL);
3220589d30e0SAlex Elder 	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
3221589d30e0SAlex Elder 	if (ret < 0)
3222589d30e0SAlex Elder 		goto out;
3223a0ea3a40SAlex Elder 	ret = 0;    /* rbd_req_sync_exec() can return positive */
3224589d30e0SAlex Elder 
3225589d30e0SAlex Elder 	p = response;
32260d7dbfceSAlex Elder 	rbd_dev->spec->image_id = ceph_extract_encoded_string(&p,
3227589d30e0SAlex Elder 						p + RBD_IMAGE_ID_LEN_MAX,
32280d7dbfceSAlex Elder 						&rbd_dev->spec->image_id_len,
3229589d30e0SAlex Elder 						GFP_NOIO);
32300d7dbfceSAlex Elder 	if (IS_ERR(rbd_dev->spec->image_id)) {
32310d7dbfceSAlex Elder 		ret = PTR_ERR(rbd_dev->spec->image_id);
32320d7dbfceSAlex Elder 		rbd_dev->spec->image_id = NULL;
3233589d30e0SAlex Elder 	} else {
32340d7dbfceSAlex Elder 		dout("image_id is %s\n", rbd_dev->spec->image_id);
3235589d30e0SAlex Elder 	}
3236589d30e0SAlex Elder out:
3237589d30e0SAlex Elder 	kfree(response);
3238589d30e0SAlex Elder 	kfree(object_name);
3239589d30e0SAlex Elder 
3240589d30e0SAlex Elder 	return ret;
3241589d30e0SAlex Elder }
3242589d30e0SAlex Elder 
3243a30b71b9SAlex Elder static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
3244a30b71b9SAlex Elder {
3245a30b71b9SAlex Elder 	int ret;
3246a30b71b9SAlex Elder 	size_t size;
3247a30b71b9SAlex Elder 
3248a30b71b9SAlex Elder 	/* Version 1 images have no id; empty string is used */
3249a30b71b9SAlex Elder 
32500d7dbfceSAlex Elder 	rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL);
32510d7dbfceSAlex Elder 	if (!rbd_dev->spec->image_id)
3252a30b71b9SAlex Elder 		return -ENOMEM;
32530d7dbfceSAlex Elder 	rbd_dev->spec->image_id_len = 0;
3254a30b71b9SAlex Elder 
3255a30b71b9SAlex Elder 	/* Record the header object name for this rbd image. */
3256a30b71b9SAlex Elder 
32570d7dbfceSAlex Elder 	size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX);
3258a30b71b9SAlex Elder 	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
3259a30b71b9SAlex Elder 	if (!rbd_dev->header_name) {
3260a30b71b9SAlex Elder 		ret = -ENOMEM;
3261a30b71b9SAlex Elder 		goto out_err;
3262a30b71b9SAlex Elder 	}
32630d7dbfceSAlex Elder 	sprintf(rbd_dev->header_name, "%s%s",
32640d7dbfceSAlex Elder 		rbd_dev->spec->image_name, RBD_SUFFIX);
3265a30b71b9SAlex Elder 
3266a30b71b9SAlex Elder 	/* Populate rbd image metadata */
3267a30b71b9SAlex Elder 
3268a30b71b9SAlex Elder 	ret = rbd_read_header(rbd_dev, &rbd_dev->header);
3269a30b71b9SAlex Elder 	if (ret < 0)
3270a30b71b9SAlex Elder 		goto out_err;
327186b00e0dSAlex Elder 
327286b00e0dSAlex Elder 	/* Version 1 images have no parent (no layering) */
327386b00e0dSAlex Elder 
327486b00e0dSAlex Elder 	rbd_dev->parent_spec = NULL;
327586b00e0dSAlex Elder 	rbd_dev->parent_overlap = 0;
327686b00e0dSAlex Elder 
3277a30b71b9SAlex Elder 	rbd_dev->image_format = 1;
3278a30b71b9SAlex Elder 
3279a30b71b9SAlex Elder 	dout("discovered version 1 image, header name is %s\n",
3280a30b71b9SAlex Elder 		rbd_dev->header_name);
3281a30b71b9SAlex Elder 
3282a30b71b9SAlex Elder 	return 0;
3283a30b71b9SAlex Elder 
3284a30b71b9SAlex Elder out_err:
3285a30b71b9SAlex Elder 	kfree(rbd_dev->header_name);
3286a30b71b9SAlex Elder 	rbd_dev->header_name = NULL;
32870d7dbfceSAlex Elder 	kfree(rbd_dev->spec->image_id);
32880d7dbfceSAlex Elder 	rbd_dev->spec->image_id = NULL;
3289a30b71b9SAlex Elder 
3290a30b71b9SAlex Elder 	return ret;
3291a30b71b9SAlex Elder }
3292a30b71b9SAlex Elder 
3293a30b71b9SAlex Elder static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
3294a30b71b9SAlex Elder {
3295a30b71b9SAlex Elder 	size_t size;
32969d475de5SAlex Elder 	int ret;
32976e14b1a6SAlex Elder 	u64 ver = 0;
3298a30b71b9SAlex Elder 
3299a30b71b9SAlex Elder 	/*
3300a30b71b9SAlex Elder 	 * Image id was filled in by the caller.  Record the header
3301a30b71b9SAlex Elder 	 * object name for this rbd image.
3302a30b71b9SAlex Elder 	 */
33030d7dbfceSAlex Elder 	size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len;
3304a30b71b9SAlex Elder 	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
3305a30b71b9SAlex Elder 	if (!rbd_dev->header_name)
3306a30b71b9SAlex Elder 		return -ENOMEM;
3307a30b71b9SAlex Elder 	sprintf(rbd_dev->header_name, "%s%s",
33080d7dbfceSAlex Elder 			RBD_HEADER_PREFIX, rbd_dev->spec->image_id);
33099d475de5SAlex Elder 
33109d475de5SAlex Elder 	/* Get the size and object order for the image */
33119d475de5SAlex Elder 
33129d475de5SAlex Elder 	ret = rbd_dev_v2_image_size(rbd_dev);
33139d475de5SAlex Elder 	if (ret < 0)
33149d475de5SAlex Elder 		goto out_err;
33151e130199SAlex Elder 
33161e130199SAlex Elder 	/* Get the object prefix (a.k.a. block_name) for the image */
33171e130199SAlex Elder 
33181e130199SAlex Elder 	ret = rbd_dev_v2_object_prefix(rbd_dev);
33191e130199SAlex Elder 	if (ret < 0)
33201e130199SAlex Elder 		goto out_err;
3321b1b5402aSAlex Elder 
3322d889140cSAlex Elder 	/* Get the and check features for the image */
3323b1b5402aSAlex Elder 
3324b1b5402aSAlex Elder 	ret = rbd_dev_v2_features(rbd_dev);
3325b1b5402aSAlex Elder 	if (ret < 0)
3326b1b5402aSAlex Elder 		goto out_err;
332735d489f9SAlex Elder 
332886b00e0dSAlex Elder 	/* If the image supports layering, get the parent info */
332986b00e0dSAlex Elder 
333086b00e0dSAlex Elder 	if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
333186b00e0dSAlex Elder 		ret = rbd_dev_v2_parent_info(rbd_dev);
333286b00e0dSAlex Elder 		if (ret < 0)
333386b00e0dSAlex Elder 			goto out_err;
333486b00e0dSAlex Elder 	}
333586b00e0dSAlex Elder 
33366e14b1a6SAlex Elder 	/* crypto and compression type aren't (yet) supported for v2 images */
333735d489f9SAlex Elder 
33386e14b1a6SAlex Elder 	rbd_dev->header.crypt_type = 0;
33396e14b1a6SAlex Elder 	rbd_dev->header.comp_type = 0;
33406e14b1a6SAlex Elder 
33416e14b1a6SAlex Elder 	/* Get the snapshot context, plus the header version */
33426e14b1a6SAlex Elder 
33436e14b1a6SAlex Elder 	ret = rbd_dev_v2_snap_context(rbd_dev, &ver);
334435d489f9SAlex Elder 	if (ret)
334535d489f9SAlex Elder 		goto out_err;
33466e14b1a6SAlex Elder 	rbd_dev->header.obj_version = ver;
33476e14b1a6SAlex Elder 
3348a30b71b9SAlex Elder 	rbd_dev->image_format = 2;
3349a30b71b9SAlex Elder 
3350a30b71b9SAlex Elder 	dout("discovered version 2 image, header name is %s\n",
3351a30b71b9SAlex Elder 		rbd_dev->header_name);
3352a30b71b9SAlex Elder 
335335152979SAlex Elder 	return 0;
33549d475de5SAlex Elder out_err:
335586b00e0dSAlex Elder 	rbd_dev->parent_overlap = 0;
335686b00e0dSAlex Elder 	rbd_spec_put(rbd_dev->parent_spec);
335786b00e0dSAlex Elder 	rbd_dev->parent_spec = NULL;
33589d475de5SAlex Elder 	kfree(rbd_dev->header_name);
33599d475de5SAlex Elder 	rbd_dev->header_name = NULL;
33601e130199SAlex Elder 	kfree(rbd_dev->header.object_prefix);
33611e130199SAlex Elder 	rbd_dev->header.object_prefix = NULL;
33629d475de5SAlex Elder 
33639d475de5SAlex Elder 	return ret;
3364a30b71b9SAlex Elder }
3365a30b71b9SAlex Elder 
336683a06263SAlex Elder static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
336783a06263SAlex Elder {
336883a06263SAlex Elder 	int ret;
336983a06263SAlex Elder 
337083a06263SAlex Elder 	/* no need to lock here, as rbd_dev is not registered yet */
337183a06263SAlex Elder 	ret = rbd_dev_snaps_update(rbd_dev);
337283a06263SAlex Elder 	if (ret)
337383a06263SAlex Elder 		return ret;
337483a06263SAlex Elder 
337583a06263SAlex Elder 	ret = rbd_dev_set_mapping(rbd_dev);
337683a06263SAlex Elder 	if (ret)
337783a06263SAlex Elder 		goto err_out_snaps;
337883a06263SAlex Elder 
337983a06263SAlex Elder 	/* generate unique id: find highest unique id, add one */
338083a06263SAlex Elder 	rbd_dev_id_get(rbd_dev);
338183a06263SAlex Elder 
338283a06263SAlex Elder 	/* Fill in the device name, now that we have its id. */
338383a06263SAlex Elder 	BUILD_BUG_ON(DEV_NAME_LEN
338483a06263SAlex Elder 			< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
338583a06263SAlex Elder 	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
338683a06263SAlex Elder 
338783a06263SAlex Elder 	/* Get our block major device number. */
338883a06263SAlex Elder 
338983a06263SAlex Elder 	ret = register_blkdev(0, rbd_dev->name);
339083a06263SAlex Elder 	if (ret < 0)
339183a06263SAlex Elder 		goto err_out_id;
339283a06263SAlex Elder 	rbd_dev->major = ret;
339383a06263SAlex Elder 
339483a06263SAlex Elder 	/* Set up the blkdev mapping. */
339583a06263SAlex Elder 
339683a06263SAlex Elder 	ret = rbd_init_disk(rbd_dev);
339783a06263SAlex Elder 	if (ret)
339883a06263SAlex Elder 		goto err_out_blkdev;
339983a06263SAlex Elder 
340083a06263SAlex Elder 	ret = rbd_bus_add_dev(rbd_dev);
340183a06263SAlex Elder 	if (ret)
340283a06263SAlex Elder 		goto err_out_disk;
340383a06263SAlex Elder 
340483a06263SAlex Elder 	/*
340583a06263SAlex Elder 	 * At this point cleanup in the event of an error is the job
340683a06263SAlex Elder 	 * of the sysfs code (initiated by rbd_bus_del_dev()).
340783a06263SAlex Elder 	 */
340883a06263SAlex Elder 	down_write(&rbd_dev->header_rwsem);
340983a06263SAlex Elder 	ret = rbd_dev_snaps_register(rbd_dev);
341083a06263SAlex Elder 	up_write(&rbd_dev->header_rwsem);
341183a06263SAlex Elder 	if (ret)
341283a06263SAlex Elder 		goto err_out_bus;
341383a06263SAlex Elder 
341483a06263SAlex Elder 	ret = rbd_init_watch_dev(rbd_dev);
341583a06263SAlex Elder 	if (ret)
341683a06263SAlex Elder 		goto err_out_bus;
341783a06263SAlex Elder 
341883a06263SAlex Elder 	/* Everything's ready.  Announce the disk to the world. */
341983a06263SAlex Elder 
342083a06263SAlex Elder 	add_disk(rbd_dev->disk);
342183a06263SAlex Elder 
342283a06263SAlex Elder 	pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
342383a06263SAlex Elder 		(unsigned long long) rbd_dev->mapping.size);
342483a06263SAlex Elder 
342583a06263SAlex Elder 	return ret;
342683a06263SAlex Elder err_out_bus:
342783a06263SAlex Elder 	/* this will also clean up rest of rbd_dev stuff */
342883a06263SAlex Elder 
342983a06263SAlex Elder 	rbd_bus_del_dev(rbd_dev);
343083a06263SAlex Elder 
343183a06263SAlex Elder 	return ret;
343283a06263SAlex Elder err_out_disk:
343383a06263SAlex Elder 	rbd_free_disk(rbd_dev);
343483a06263SAlex Elder err_out_blkdev:
343583a06263SAlex Elder 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
343683a06263SAlex Elder err_out_id:
343783a06263SAlex Elder 	rbd_dev_id_put(rbd_dev);
343883a06263SAlex Elder err_out_snaps:
343983a06263SAlex Elder 	rbd_remove_all_snaps(rbd_dev);
344083a06263SAlex Elder 
344183a06263SAlex Elder 	return ret;
344283a06263SAlex Elder }
344383a06263SAlex Elder 
3444a30b71b9SAlex Elder /*
3445a30b71b9SAlex Elder  * Probe for the existence of the header object for the given rbd
3446a30b71b9SAlex Elder  * device.  For format 2 images this includes determining the image
3447a30b71b9SAlex Elder  * id.
3448a30b71b9SAlex Elder  */
3449a30b71b9SAlex Elder static int rbd_dev_probe(struct rbd_device *rbd_dev)
3450a30b71b9SAlex Elder {
3451a30b71b9SAlex Elder 	int ret;
3452a30b71b9SAlex Elder 
3453a30b71b9SAlex Elder 	/*
3454a30b71b9SAlex Elder 	 * Get the id from the image id object.  If it's not a
3455a30b71b9SAlex Elder 	 * format 2 image, we'll get ENOENT back, and we'll assume
3456a30b71b9SAlex Elder 	 * it's a format 1 image.
3457a30b71b9SAlex Elder 	 */
3458a30b71b9SAlex Elder 	ret = rbd_dev_image_id(rbd_dev);
3459a30b71b9SAlex Elder 	if (ret)
3460a30b71b9SAlex Elder 		ret = rbd_dev_v1_probe(rbd_dev);
3461a30b71b9SAlex Elder 	else
3462a30b71b9SAlex Elder 		ret = rbd_dev_v2_probe(rbd_dev);
346383a06263SAlex Elder 	if (ret) {
3464a30b71b9SAlex Elder 		dout("probe failed, returning %d\n", ret);
3465a30b71b9SAlex Elder 
3466a30b71b9SAlex Elder 		return ret;
3467a30b71b9SAlex Elder 	}
3468a30b71b9SAlex Elder 
346983a06263SAlex Elder 	ret = rbd_dev_probe_finish(rbd_dev);
347083a06263SAlex Elder 	if (ret)
347183a06263SAlex Elder 		rbd_header_free(&rbd_dev->header);
347283a06263SAlex Elder 
347383a06263SAlex Elder 	return ret;
347483a06263SAlex Elder }
347583a06263SAlex Elder 
347659c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus,
347759c2be1eSYehuda Sadeh 		       const char *buf,
347859c2be1eSYehuda Sadeh 		       size_t count)
3479602adf40SYehuda Sadeh {
3480cb8627c7SAlex Elder 	struct rbd_device *rbd_dev = NULL;
3481dc79b113SAlex Elder 	struct ceph_options *ceph_opts = NULL;
34824e9afebaSAlex Elder 	struct rbd_options *rbd_opts = NULL;
3483859c31dfSAlex Elder 	struct rbd_spec *spec = NULL;
34849d3997fdSAlex Elder 	struct rbd_client *rbdc;
348527cc2594SAlex Elder 	struct ceph_osd_client *osdc;
348627cc2594SAlex Elder 	int rc = -ENOMEM;
3487602adf40SYehuda Sadeh 
3488602adf40SYehuda Sadeh 	if (!try_module_get(THIS_MODULE))
3489602adf40SYehuda Sadeh 		return -ENODEV;
3490602adf40SYehuda Sadeh 
3491a725f65eSAlex Elder 	/* parse add command */
3492859c31dfSAlex Elder 	rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
3493dc79b113SAlex Elder 	if (rc < 0)
3494bd4ba655SAlex Elder 		goto err_out_module;
3495a725f65eSAlex Elder 
34969d3997fdSAlex Elder 	rbdc = rbd_get_client(ceph_opts);
34979d3997fdSAlex Elder 	if (IS_ERR(rbdc)) {
34989d3997fdSAlex Elder 		rc = PTR_ERR(rbdc);
34990ddebc0cSAlex Elder 		goto err_out_args;
35009d3997fdSAlex Elder 	}
3501c53d5893SAlex Elder 	ceph_opts = NULL;	/* rbd_dev client now owns this */
3502602adf40SYehuda Sadeh 
3503602adf40SYehuda Sadeh 	/* pick the pool */
35049d3997fdSAlex Elder 	osdc = &rbdc->client->osdc;
3505859c31dfSAlex Elder 	rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
3506602adf40SYehuda Sadeh 	if (rc < 0)
3507602adf40SYehuda Sadeh 		goto err_out_client;
3508859c31dfSAlex Elder 	spec->pool_id = (u64) rc;
3509859c31dfSAlex Elder 
3510c53d5893SAlex Elder 	rbd_dev = rbd_dev_create(rbdc, spec);
3511bd4ba655SAlex Elder 	if (!rbd_dev)
3512bd4ba655SAlex Elder 		goto err_out_client;
3513c53d5893SAlex Elder 	rbdc = NULL;		/* rbd_dev now owns this */
3514c53d5893SAlex Elder 	spec = NULL;		/* rbd_dev now owns this */
3515602adf40SYehuda Sadeh 
3516bd4ba655SAlex Elder 	rbd_dev->mapping.read_only = rbd_opts->read_only;
3517c53d5893SAlex Elder 	kfree(rbd_opts);
3518c53d5893SAlex Elder 	rbd_opts = NULL;	/* done with this */
3519bd4ba655SAlex Elder 
3520a30b71b9SAlex Elder 	rc = rbd_dev_probe(rbd_dev);
3521a30b71b9SAlex Elder 	if (rc < 0)
3522c53d5893SAlex Elder 		goto err_out_rbd_dev;
352305fd6f6fSAlex Elder 
3524602adf40SYehuda Sadeh 	return count;
3525c53d5893SAlex Elder err_out_rbd_dev:
3526c53d5893SAlex Elder 	rbd_dev_destroy(rbd_dev);
3527bd4ba655SAlex Elder err_out_client:
35289d3997fdSAlex Elder 	rbd_put_client(rbdc);
35290ddebc0cSAlex Elder err_out_args:
353078cea76eSAlex Elder 	if (ceph_opts)
353178cea76eSAlex Elder 		ceph_destroy_options(ceph_opts);
35324e9afebaSAlex Elder 	kfree(rbd_opts);
3533859c31dfSAlex Elder 	rbd_spec_put(spec);
3534bd4ba655SAlex Elder err_out_module:
3535bd4ba655SAlex Elder 	module_put(THIS_MODULE);
353627cc2594SAlex Elder 
3537602adf40SYehuda Sadeh 	dout("Error adding device %s\n", buf);
353827cc2594SAlex Elder 
353927cc2594SAlex Elder 	return (ssize_t) rc;
3540602adf40SYehuda Sadeh }
3541602adf40SYehuda Sadeh 
3542de71a297SAlex Elder static struct rbd_device *__rbd_get_dev(unsigned long dev_id)
3543602adf40SYehuda Sadeh {
3544602adf40SYehuda Sadeh 	struct list_head *tmp;
3545602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev;
3546602adf40SYehuda Sadeh 
3547e124a82fSAlex Elder 	spin_lock(&rbd_dev_list_lock);
3548602adf40SYehuda Sadeh 	list_for_each(tmp, &rbd_dev_list) {
3549602adf40SYehuda Sadeh 		rbd_dev = list_entry(tmp, struct rbd_device, node);
3550de71a297SAlex Elder 		if (rbd_dev->dev_id == dev_id) {
3551e124a82fSAlex Elder 			spin_unlock(&rbd_dev_list_lock);
3552602adf40SYehuda Sadeh 			return rbd_dev;
3553602adf40SYehuda Sadeh 		}
3554e124a82fSAlex Elder 	}
3555e124a82fSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
3556602adf40SYehuda Sadeh 	return NULL;
3557602adf40SYehuda Sadeh }
3558602adf40SYehuda Sadeh 
3559dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev)
3560602adf40SYehuda Sadeh {
3561593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
3562602adf40SYehuda Sadeh 
35631dbb4399SAlex Elder 	if (rbd_dev->watch_request) {
35641dbb4399SAlex Elder 		struct ceph_client *client = rbd_dev->rbd_client->client;
35651dbb4399SAlex Elder 
35661dbb4399SAlex Elder 		ceph_osdc_unregister_linger_request(&client->osdc,
356759c2be1eSYehuda Sadeh 						    rbd_dev->watch_request);
35681dbb4399SAlex Elder 	}
356959c2be1eSYehuda Sadeh 	if (rbd_dev->watch_event)
3570070c633fSAlex Elder 		rbd_req_sync_unwatch(rbd_dev);
357159c2be1eSYehuda Sadeh 
3572602adf40SYehuda Sadeh 
3573602adf40SYehuda Sadeh 	/* clean up and free blkdev */
3574602adf40SYehuda Sadeh 	rbd_free_disk(rbd_dev);
3575602adf40SYehuda Sadeh 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
357632eec68dSAlex Elder 
35772ac4e75dSAlex Elder 	/* release allocated disk header fields */
35782ac4e75dSAlex Elder 	rbd_header_free(&rbd_dev->header);
35792ac4e75dSAlex Elder 
358032eec68dSAlex Elder 	/* done with the id, and with the rbd_dev */
3581e2839308SAlex Elder 	rbd_dev_id_put(rbd_dev);
3582c53d5893SAlex Elder 	rbd_assert(rbd_dev->rbd_client != NULL);
3583c53d5893SAlex Elder 	rbd_dev_destroy(rbd_dev);
3584602adf40SYehuda Sadeh 
3585602adf40SYehuda Sadeh 	/* release module ref */
3586602adf40SYehuda Sadeh 	module_put(THIS_MODULE);
3587602adf40SYehuda Sadeh }
3588602adf40SYehuda Sadeh 
3589dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus,
3590602adf40SYehuda Sadeh 			  const char *buf,
3591602adf40SYehuda Sadeh 			  size_t count)
3592602adf40SYehuda Sadeh {
3593602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = NULL;
3594602adf40SYehuda Sadeh 	int target_id, rc;
3595602adf40SYehuda Sadeh 	unsigned long ul;
3596602adf40SYehuda Sadeh 	int ret = count;
3597602adf40SYehuda Sadeh 
3598602adf40SYehuda Sadeh 	rc = strict_strtoul(buf, 10, &ul);
3599602adf40SYehuda Sadeh 	if (rc)
3600602adf40SYehuda Sadeh 		return rc;
3601602adf40SYehuda Sadeh 
3602602adf40SYehuda Sadeh 	/* convert to int; abort if we lost anything in the conversion */
3603602adf40SYehuda Sadeh 	target_id = (int) ul;
3604602adf40SYehuda Sadeh 	if (target_id != ul)
3605602adf40SYehuda Sadeh 		return -EINVAL;
3606602adf40SYehuda Sadeh 
3607602adf40SYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
3608602adf40SYehuda Sadeh 
3609602adf40SYehuda Sadeh 	rbd_dev = __rbd_get_dev(target_id);
3610602adf40SYehuda Sadeh 	if (!rbd_dev) {
3611602adf40SYehuda Sadeh 		ret = -ENOENT;
3612602adf40SYehuda Sadeh 		goto done;
3613602adf40SYehuda Sadeh 	}
3614602adf40SYehuda Sadeh 
361541f38c2bSAlex Elder 	rbd_remove_all_snaps(rbd_dev);
3616dfc5606dSYehuda Sadeh 	rbd_bus_del_dev(rbd_dev);
3617602adf40SYehuda Sadeh 
3618602adf40SYehuda Sadeh done:
3619602adf40SYehuda Sadeh 	mutex_unlock(&ctl_mutex);
3620aafb230eSAlex Elder 
3621602adf40SYehuda Sadeh 	return ret;
3622602adf40SYehuda Sadeh }
3623602adf40SYehuda Sadeh 
3624602adf40SYehuda Sadeh /*
3625602adf40SYehuda Sadeh  * create control files in sysfs
3626dfc5606dSYehuda Sadeh  * /sys/bus/rbd/...
3627602adf40SYehuda Sadeh  */
3628602adf40SYehuda Sadeh static int rbd_sysfs_init(void)
3629602adf40SYehuda Sadeh {
3630dfc5606dSYehuda Sadeh 	int ret;
3631602adf40SYehuda Sadeh 
3632fed4c143SAlex Elder 	ret = device_register(&rbd_root_dev);
3633dfc5606dSYehuda Sadeh 	if (ret < 0)
3634dfc5606dSYehuda Sadeh 		return ret;
3635602adf40SYehuda Sadeh 
3636fed4c143SAlex Elder 	ret = bus_register(&rbd_bus_type);
3637fed4c143SAlex Elder 	if (ret < 0)
3638fed4c143SAlex Elder 		device_unregister(&rbd_root_dev);
3639602adf40SYehuda Sadeh 
3640602adf40SYehuda Sadeh 	return ret;
3641602adf40SYehuda Sadeh }
3642602adf40SYehuda Sadeh 
3643602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void)
3644602adf40SYehuda Sadeh {
3645dfc5606dSYehuda Sadeh 	bus_unregister(&rbd_bus_type);
3646fed4c143SAlex Elder 	device_unregister(&rbd_root_dev);
3647602adf40SYehuda Sadeh }
3648602adf40SYehuda Sadeh 
3649602adf40SYehuda Sadeh int __init rbd_init(void)
3650602adf40SYehuda Sadeh {
3651602adf40SYehuda Sadeh 	int rc;
3652602adf40SYehuda Sadeh 
3653602adf40SYehuda Sadeh 	rc = rbd_sysfs_init();
3654602adf40SYehuda Sadeh 	if (rc)
3655602adf40SYehuda Sadeh 		return rc;
3656f0f8cef5SAlex Elder 	pr_info("loaded " RBD_DRV_NAME_LONG "\n");
3657602adf40SYehuda Sadeh 	return 0;
3658602adf40SYehuda Sadeh }
3659602adf40SYehuda Sadeh 
3660602adf40SYehuda Sadeh void __exit rbd_exit(void)
3661602adf40SYehuda Sadeh {
3662602adf40SYehuda Sadeh 	rbd_sysfs_cleanup();
3663602adf40SYehuda Sadeh }
3664602adf40SYehuda Sadeh 
3665602adf40SYehuda Sadeh module_init(rbd_init);
3666602adf40SYehuda Sadeh module_exit(rbd_exit);
3667602adf40SYehuda Sadeh 
3668602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
3669602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
3670602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device");
3671602adf40SYehuda Sadeh 
3672602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */
3673602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
3674602adf40SYehuda Sadeh 
3675602adf40SYehuda Sadeh MODULE_LICENSE("GPL");
3676