xref: /openbmc/linux/drivers/block/rbd.c (revision f0f8cef5)
1602adf40SYehuda Sadeh /*
2602adf40SYehuda Sadeh    rbd.c -- Export ceph rados objects as a Linux block device
3602adf40SYehuda Sadeh 
4602adf40SYehuda Sadeh 
5602adf40SYehuda Sadeh    based on drivers/block/osdblk.c:
6602adf40SYehuda Sadeh 
7602adf40SYehuda Sadeh    Copyright 2009 Red Hat, Inc.
8602adf40SYehuda Sadeh 
9602adf40SYehuda Sadeh    This program is free software; you can redistribute it and/or modify
10602adf40SYehuda Sadeh    it under the terms of the GNU General Public License as published by
11602adf40SYehuda Sadeh    the Free Software Foundation.
12602adf40SYehuda Sadeh 
13602adf40SYehuda Sadeh    This program is distributed in the hope that it will be useful,
14602adf40SYehuda Sadeh    but WITHOUT ANY WARRANTY; without even the implied warranty of
15602adf40SYehuda Sadeh    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16602adf40SYehuda Sadeh    GNU General Public License for more details.
17602adf40SYehuda Sadeh 
18602adf40SYehuda Sadeh    You should have received a copy of the GNU General Public License
19602adf40SYehuda Sadeh    along with this program; see the file COPYING.  If not, write to
20602adf40SYehuda Sadeh    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21602adf40SYehuda Sadeh 
22602adf40SYehuda Sadeh 
23602adf40SYehuda Sadeh 
24dfc5606dSYehuda Sadeh    For usage instructions, please refer to:
25602adf40SYehuda Sadeh 
26dfc5606dSYehuda Sadeh                  Documentation/ABI/testing/sysfs-bus-rbd
27602adf40SYehuda Sadeh 
28602adf40SYehuda Sadeh  */
29602adf40SYehuda Sadeh 
30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h>
31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h>
32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h>
33602adf40SYehuda Sadeh #include <linux/ceph/decode.h>
3459c2be1eSYehuda Sadeh #include <linux/parser.h>
35602adf40SYehuda Sadeh 
36602adf40SYehuda Sadeh #include <linux/kernel.h>
37602adf40SYehuda Sadeh #include <linux/device.h>
38602adf40SYehuda Sadeh #include <linux/module.h>
39602adf40SYehuda Sadeh #include <linux/fs.h>
40602adf40SYehuda Sadeh #include <linux/blkdev.h>
41602adf40SYehuda Sadeh 
42602adf40SYehuda Sadeh #include "rbd_types.h"
43602adf40SYehuda Sadeh 
44f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd"
45f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)"
46602adf40SYehuda Sadeh 
47602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
48602adf40SYehuda Sadeh 
4921079786SAlex Elder #define RBD_MAX_MD_NAME_LEN	(RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
50602adf40SYehuda Sadeh #define RBD_MAX_POOL_NAME_LEN	64
51602adf40SYehuda Sadeh #define RBD_MAX_SNAP_NAME_LEN	32
52602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN		1024
53602adf40SYehuda Sadeh 
54602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME	"-"
55602adf40SYehuda Sadeh 
56602adf40SYehuda Sadeh #define DEV_NAME_LEN		32
57602adf40SYehuda Sadeh 
5859c2be1eSYehuda Sadeh #define RBD_NOTIFY_TIMEOUT_DEFAULT 10
5959c2be1eSYehuda Sadeh 
60602adf40SYehuda Sadeh /*
61602adf40SYehuda Sadeh  * block device image metadata (in-memory version)
62602adf40SYehuda Sadeh  */
63602adf40SYehuda Sadeh struct rbd_image_header {
64602adf40SYehuda Sadeh 	u64 image_size;
65602adf40SYehuda Sadeh 	char block_name[32];
66602adf40SYehuda Sadeh 	__u8 obj_order;
67602adf40SYehuda Sadeh 	__u8 crypt_type;
68602adf40SYehuda Sadeh 	__u8 comp_type;
69602adf40SYehuda Sadeh 	struct rw_semaphore snap_rwsem;
70602adf40SYehuda Sadeh 	struct ceph_snap_context *snapc;
71602adf40SYehuda Sadeh 	size_t snap_names_len;
72602adf40SYehuda Sadeh 	u64 snap_seq;
73602adf40SYehuda Sadeh 	u32 total_snaps;
74602adf40SYehuda Sadeh 
75602adf40SYehuda Sadeh 	char *snap_names;
76602adf40SYehuda Sadeh 	u64 *snap_sizes;
7759c2be1eSYehuda Sadeh 
7859c2be1eSYehuda Sadeh 	u64 obj_version;
7959c2be1eSYehuda Sadeh };
8059c2be1eSYehuda Sadeh 
8159c2be1eSYehuda Sadeh struct rbd_options {
8259c2be1eSYehuda Sadeh 	int	notify_timeout;
83602adf40SYehuda Sadeh };
84602adf40SYehuda Sadeh 
85602adf40SYehuda Sadeh /*
86f0f8cef5SAlex Elder  * an instance of the client.  multiple devices may share an rbd client.
87602adf40SYehuda Sadeh  */
88602adf40SYehuda Sadeh struct rbd_client {
89602adf40SYehuda Sadeh 	struct ceph_client	*client;
9059c2be1eSYehuda Sadeh 	struct rbd_options	*rbd_opts;
91602adf40SYehuda Sadeh 	struct kref		kref;
92602adf40SYehuda Sadeh 	struct list_head	node;
93602adf40SYehuda Sadeh };
94602adf40SYehuda Sadeh 
95602adf40SYehuda Sadeh /*
96f0f8cef5SAlex Elder  * a request completion status
97602adf40SYehuda Sadeh  */
981fec7093SYehuda Sadeh struct rbd_req_status {
991fec7093SYehuda Sadeh 	int done;
1001fec7093SYehuda Sadeh 	int rc;
1011fec7093SYehuda Sadeh 	u64 bytes;
1021fec7093SYehuda Sadeh };
1031fec7093SYehuda Sadeh 
1041fec7093SYehuda Sadeh /*
1051fec7093SYehuda Sadeh  * a collection of requests
1061fec7093SYehuda Sadeh  */
1071fec7093SYehuda Sadeh struct rbd_req_coll {
1081fec7093SYehuda Sadeh 	int			total;
1091fec7093SYehuda Sadeh 	int			num_done;
1101fec7093SYehuda Sadeh 	struct kref		kref;
1111fec7093SYehuda Sadeh 	struct rbd_req_status	status[0];
112602adf40SYehuda Sadeh };
113602adf40SYehuda Sadeh 
114f0f8cef5SAlex Elder /*
115f0f8cef5SAlex Elder  * a single io request
116f0f8cef5SAlex Elder  */
117f0f8cef5SAlex Elder struct rbd_request {
118f0f8cef5SAlex Elder 	struct request		*rq;		/* blk layer request */
119f0f8cef5SAlex Elder 	struct bio		*bio;		/* cloned bio */
120f0f8cef5SAlex Elder 	struct page		**pages;	/* list of used pages */
121f0f8cef5SAlex Elder 	u64			len;
122f0f8cef5SAlex Elder 	int			coll_index;
123f0f8cef5SAlex Elder 	struct rbd_req_coll	*coll;
124f0f8cef5SAlex Elder };
125f0f8cef5SAlex Elder 
126dfc5606dSYehuda Sadeh struct rbd_snap {
127dfc5606dSYehuda Sadeh 	struct	device		dev;
128dfc5606dSYehuda Sadeh 	const char		*name;
129dfc5606dSYehuda Sadeh 	size_t			size;
130dfc5606dSYehuda Sadeh 	struct list_head	node;
131dfc5606dSYehuda Sadeh 	u64			id;
132dfc5606dSYehuda Sadeh };
133dfc5606dSYehuda Sadeh 
134602adf40SYehuda Sadeh /*
135602adf40SYehuda Sadeh  * a single device
136602adf40SYehuda Sadeh  */
137602adf40SYehuda Sadeh struct rbd_device {
138602adf40SYehuda Sadeh 	int			id;		/* blkdev unique id */
139602adf40SYehuda Sadeh 
140602adf40SYehuda Sadeh 	int			major;		/* blkdev assigned major */
141602adf40SYehuda Sadeh 	struct gendisk		*disk;		/* blkdev's gendisk and rq */
142602adf40SYehuda Sadeh 	struct request_queue	*q;
143602adf40SYehuda Sadeh 
144602adf40SYehuda Sadeh 	struct rbd_client	*rbd_client;
145602adf40SYehuda Sadeh 
146602adf40SYehuda Sadeh 	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
147602adf40SYehuda Sadeh 
148602adf40SYehuda Sadeh 	spinlock_t		lock;		/* queue lock */
149602adf40SYehuda Sadeh 
150602adf40SYehuda Sadeh 	struct rbd_image_header	header;
151602adf40SYehuda Sadeh 	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
152602adf40SYehuda Sadeh 	int			obj_len;
153602adf40SYehuda Sadeh 	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
154602adf40SYehuda Sadeh 	char			pool_name[RBD_MAX_POOL_NAME_LEN];
155602adf40SYehuda Sadeh 	int			poolid;
156602adf40SYehuda Sadeh 
15759c2be1eSYehuda Sadeh 	struct ceph_osd_event   *watch_event;
15859c2be1eSYehuda Sadeh 	struct ceph_osd_request *watch_request;
15959c2be1eSYehuda Sadeh 
160602adf40SYehuda Sadeh 	char                    snap_name[RBD_MAX_SNAP_NAME_LEN];
161602adf40SYehuda Sadeh 	u32 cur_snap;	/* index+1 of current snapshot within snap context
162602adf40SYehuda Sadeh 			   0 - for the head */
163602adf40SYehuda Sadeh 	int read_only;
164602adf40SYehuda Sadeh 
165602adf40SYehuda Sadeh 	struct list_head	node;
166dfc5606dSYehuda Sadeh 
167dfc5606dSYehuda Sadeh 	/* list of snapshots */
168dfc5606dSYehuda Sadeh 	struct list_head	snaps;
169dfc5606dSYehuda Sadeh 
170dfc5606dSYehuda Sadeh 	/* sysfs related */
171dfc5606dSYehuda Sadeh 	struct device		dev;
172dfc5606dSYehuda Sadeh };
173dfc5606dSYehuda Sadeh 
174602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
175e124a82fSAlex Elder 
176602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list);    /* devices */
177e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock);
178e124a82fSAlex Elder 
179602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list);		/* clients */
180432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock);
181602adf40SYehuda Sadeh 
182dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
183dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev);
184dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev,
185dfc5606dSYehuda Sadeh 			    struct device_attribute *attr,
186dfc5606dSYehuda Sadeh 			    const char *buf,
187dfc5606dSYehuda Sadeh 			    size_t count);
188dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
18969932487SJustin P. Mattock 				  struct rbd_snap *snap);
190dfc5606dSYehuda Sadeh 
191f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf,
192f0f8cef5SAlex Elder 		       size_t count);
193f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
194f0f8cef5SAlex Elder 			  size_t count);
195f0f8cef5SAlex Elder 
196f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = {
197f0f8cef5SAlex Elder 	__ATTR(add, S_IWUSR, NULL, rbd_add),
198f0f8cef5SAlex Elder 	__ATTR(remove, S_IWUSR, NULL, rbd_remove),
199f0f8cef5SAlex Elder 	__ATTR_NULL
200f0f8cef5SAlex Elder };
201f0f8cef5SAlex Elder 
202f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = {
203f0f8cef5SAlex Elder 	.name		= "rbd",
204f0f8cef5SAlex Elder 	.bus_attrs	= rbd_bus_attrs,
205f0f8cef5SAlex Elder };
206f0f8cef5SAlex Elder 
207f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev)
208f0f8cef5SAlex Elder {
209f0f8cef5SAlex Elder }
210f0f8cef5SAlex Elder 
211f0f8cef5SAlex Elder static struct device rbd_root_dev = {
212f0f8cef5SAlex Elder 	.init_name =    "rbd",
213f0f8cef5SAlex Elder 	.release =      rbd_root_dev_release,
214f0f8cef5SAlex Elder };
215f0f8cef5SAlex Elder 
216dfc5606dSYehuda Sadeh 
217dfc5606dSYehuda Sadeh static struct rbd_device *dev_to_rbd(struct device *dev)
218dfc5606dSYehuda Sadeh {
219dfc5606dSYehuda Sadeh 	return container_of(dev, struct rbd_device, dev);
220dfc5606dSYehuda Sadeh }
221dfc5606dSYehuda Sadeh 
222dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
223dfc5606dSYehuda Sadeh {
224dfc5606dSYehuda Sadeh 	return get_device(&rbd_dev->dev);
225dfc5606dSYehuda Sadeh }
226dfc5606dSYehuda Sadeh 
227dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev)
228dfc5606dSYehuda Sadeh {
229dfc5606dSYehuda Sadeh 	put_device(&rbd_dev->dev);
230dfc5606dSYehuda Sadeh }
231602adf40SYehuda Sadeh 
23259c2be1eSYehuda Sadeh static int __rbd_update_snaps(struct rbd_device *rbd_dev);
23359c2be1eSYehuda Sadeh 
234602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode)
235602adf40SYehuda Sadeh {
236f0f8cef5SAlex Elder 	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
237602adf40SYehuda Sadeh 
238dfc5606dSYehuda Sadeh 	rbd_get_dev(rbd_dev);
239dfc5606dSYehuda Sadeh 
240602adf40SYehuda Sadeh 	set_device_ro(bdev, rbd_dev->read_only);
241602adf40SYehuda Sadeh 
242602adf40SYehuda Sadeh 	if ((mode & FMODE_WRITE) && rbd_dev->read_only)
243602adf40SYehuda Sadeh 		return -EROFS;
244602adf40SYehuda Sadeh 
245602adf40SYehuda Sadeh 	return 0;
246602adf40SYehuda Sadeh }
247602adf40SYehuda Sadeh 
248dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode)
249dfc5606dSYehuda Sadeh {
250dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = disk->private_data;
251dfc5606dSYehuda Sadeh 
252dfc5606dSYehuda Sadeh 	rbd_put_dev(rbd_dev);
253dfc5606dSYehuda Sadeh 
254dfc5606dSYehuda Sadeh 	return 0;
255dfc5606dSYehuda Sadeh }
256dfc5606dSYehuda Sadeh 
257602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = {
258602adf40SYehuda Sadeh 	.owner			= THIS_MODULE,
259602adf40SYehuda Sadeh 	.open			= rbd_open,
260dfc5606dSYehuda Sadeh 	.release		= rbd_release,
261602adf40SYehuda Sadeh };
262602adf40SYehuda Sadeh 
263602adf40SYehuda Sadeh /*
264602adf40SYehuda Sadeh  * Initialize an rbd client instance.
265602adf40SYehuda Sadeh  * We own *opt.
266602adf40SYehuda Sadeh  */
26759c2be1eSYehuda Sadeh static struct rbd_client *rbd_client_create(struct ceph_options *opt,
26859c2be1eSYehuda Sadeh 					    struct rbd_options *rbd_opts)
269602adf40SYehuda Sadeh {
270602adf40SYehuda Sadeh 	struct rbd_client *rbdc;
271602adf40SYehuda Sadeh 	int ret = -ENOMEM;
272602adf40SYehuda Sadeh 
273602adf40SYehuda Sadeh 	dout("rbd_client_create\n");
274602adf40SYehuda Sadeh 	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
275602adf40SYehuda Sadeh 	if (!rbdc)
276602adf40SYehuda Sadeh 		goto out_opt;
277602adf40SYehuda Sadeh 
278602adf40SYehuda Sadeh 	kref_init(&rbdc->kref);
279602adf40SYehuda Sadeh 	INIT_LIST_HEAD(&rbdc->node);
280602adf40SYehuda Sadeh 
281bc534d86SAlex Elder 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
282bc534d86SAlex Elder 
2836ab00d46SSage Weil 	rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
284602adf40SYehuda Sadeh 	if (IS_ERR(rbdc->client))
285bc534d86SAlex Elder 		goto out_mutex;
28628f259b7SVasiliy Kulikov 	opt = NULL; /* Now rbdc->client is responsible for opt */
287602adf40SYehuda Sadeh 
288602adf40SYehuda Sadeh 	ret = ceph_open_session(rbdc->client);
289602adf40SYehuda Sadeh 	if (ret < 0)
290602adf40SYehuda Sadeh 		goto out_err;
291602adf40SYehuda Sadeh 
29259c2be1eSYehuda Sadeh 	rbdc->rbd_opts = rbd_opts;
29359c2be1eSYehuda Sadeh 
294432b8587SAlex Elder 	spin_lock(&rbd_client_list_lock);
295602adf40SYehuda Sadeh 	list_add_tail(&rbdc->node, &rbd_client_list);
296432b8587SAlex Elder 	spin_unlock(&rbd_client_list_lock);
297602adf40SYehuda Sadeh 
298bc534d86SAlex Elder 	mutex_unlock(&ctl_mutex);
299bc534d86SAlex Elder 
300602adf40SYehuda Sadeh 	dout("rbd_client_create created %p\n", rbdc);
301602adf40SYehuda Sadeh 	return rbdc;
302602adf40SYehuda Sadeh 
303602adf40SYehuda Sadeh out_err:
304602adf40SYehuda Sadeh 	ceph_destroy_client(rbdc->client);
305bc534d86SAlex Elder out_mutex:
306bc534d86SAlex Elder 	mutex_unlock(&ctl_mutex);
307602adf40SYehuda Sadeh 	kfree(rbdc);
308602adf40SYehuda Sadeh out_opt:
30928f259b7SVasiliy Kulikov 	if (opt)
310602adf40SYehuda Sadeh 		ceph_destroy_options(opt);
31128f259b7SVasiliy Kulikov 	return ERR_PTR(ret);
312602adf40SYehuda Sadeh }
313602adf40SYehuda Sadeh 
314602adf40SYehuda Sadeh /*
315602adf40SYehuda Sadeh  * Find a ceph client with specific addr and configuration.
316602adf40SYehuda Sadeh  */
317602adf40SYehuda Sadeh static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
318602adf40SYehuda Sadeh {
319602adf40SYehuda Sadeh 	struct rbd_client *client_node;
320602adf40SYehuda Sadeh 
321602adf40SYehuda Sadeh 	if (opt->flags & CEPH_OPT_NOSHARE)
322602adf40SYehuda Sadeh 		return NULL;
323602adf40SYehuda Sadeh 
324602adf40SYehuda Sadeh 	list_for_each_entry(client_node, &rbd_client_list, node)
325602adf40SYehuda Sadeh 		if (ceph_compare_options(opt, client_node->client) == 0)
326602adf40SYehuda Sadeh 			return client_node;
327602adf40SYehuda Sadeh 	return NULL;
328602adf40SYehuda Sadeh }
329602adf40SYehuda Sadeh 
330602adf40SYehuda Sadeh /*
33159c2be1eSYehuda Sadeh  * mount options
33259c2be1eSYehuda Sadeh  */
33359c2be1eSYehuda Sadeh enum {
33459c2be1eSYehuda Sadeh 	Opt_notify_timeout,
33559c2be1eSYehuda Sadeh 	Opt_last_int,
33659c2be1eSYehuda Sadeh 	/* int args above */
33759c2be1eSYehuda Sadeh 	Opt_last_string,
33859c2be1eSYehuda Sadeh 	/* string args above */
33959c2be1eSYehuda Sadeh };
34059c2be1eSYehuda Sadeh 
34159c2be1eSYehuda Sadeh static match_table_t rbdopt_tokens = {
34259c2be1eSYehuda Sadeh 	{Opt_notify_timeout, "notify_timeout=%d"},
34359c2be1eSYehuda Sadeh 	/* int args above */
34459c2be1eSYehuda Sadeh 	/* string args above */
34559c2be1eSYehuda Sadeh 	{-1, NULL}
34659c2be1eSYehuda Sadeh };
34759c2be1eSYehuda Sadeh 
34859c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private)
34959c2be1eSYehuda Sadeh {
35059c2be1eSYehuda Sadeh 	struct rbd_options *rbdopt = private;
35159c2be1eSYehuda Sadeh 	substring_t argstr[MAX_OPT_ARGS];
35259c2be1eSYehuda Sadeh 	int token, intval, ret;
35359c2be1eSYehuda Sadeh 
35421079786SAlex Elder 	token = match_token(c, rbdopt_tokens, argstr);
35559c2be1eSYehuda Sadeh 	if (token < 0)
35659c2be1eSYehuda Sadeh 		return -EINVAL;
35759c2be1eSYehuda Sadeh 
35859c2be1eSYehuda Sadeh 	if (token < Opt_last_int) {
35959c2be1eSYehuda Sadeh 		ret = match_int(&argstr[0], &intval);
36059c2be1eSYehuda Sadeh 		if (ret < 0) {
36159c2be1eSYehuda Sadeh 			pr_err("bad mount option arg (not int) "
36259c2be1eSYehuda Sadeh 			       "at '%s'\n", c);
36359c2be1eSYehuda Sadeh 			return ret;
36459c2be1eSYehuda Sadeh 		}
36559c2be1eSYehuda Sadeh 		dout("got int token %d val %d\n", token, intval);
36659c2be1eSYehuda Sadeh 	} else if (token > Opt_last_int && token < Opt_last_string) {
36759c2be1eSYehuda Sadeh 		dout("got string token %d val %s\n", token,
36859c2be1eSYehuda Sadeh 		     argstr[0].from);
36959c2be1eSYehuda Sadeh 	} else {
37059c2be1eSYehuda Sadeh 		dout("got token %d\n", token);
37159c2be1eSYehuda Sadeh 	}
37259c2be1eSYehuda Sadeh 
37359c2be1eSYehuda Sadeh 	switch (token) {
37459c2be1eSYehuda Sadeh 	case Opt_notify_timeout:
37559c2be1eSYehuda Sadeh 		rbdopt->notify_timeout = intval;
37659c2be1eSYehuda Sadeh 		break;
37759c2be1eSYehuda Sadeh 	default:
37859c2be1eSYehuda Sadeh 		BUG_ON(token);
37959c2be1eSYehuda Sadeh 	}
38059c2be1eSYehuda Sadeh 	return 0;
38159c2be1eSYehuda Sadeh }
38259c2be1eSYehuda Sadeh 
38359c2be1eSYehuda Sadeh /*
384602adf40SYehuda Sadeh  * Get a ceph client with specific addr and configuration, if one does
385602adf40SYehuda Sadeh  * not exist create it.
386602adf40SYehuda Sadeh  */
387602adf40SYehuda Sadeh static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
388602adf40SYehuda Sadeh 			  char *options)
389602adf40SYehuda Sadeh {
390602adf40SYehuda Sadeh 	struct rbd_client *rbdc;
391602adf40SYehuda Sadeh 	struct ceph_options *opt;
392602adf40SYehuda Sadeh 	int ret;
39359c2be1eSYehuda Sadeh 	struct rbd_options *rbd_opts;
39459c2be1eSYehuda Sadeh 
39559c2be1eSYehuda Sadeh 	rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
39659c2be1eSYehuda Sadeh 	if (!rbd_opts)
39759c2be1eSYehuda Sadeh 		return -ENOMEM;
39859c2be1eSYehuda Sadeh 
39959c2be1eSYehuda Sadeh 	rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
400602adf40SYehuda Sadeh 
401ee57741cSAlex Elder 	opt = ceph_parse_options(options, mon_addr,
40221079786SAlex Elder 				mon_addr + strlen(mon_addr),
40321079786SAlex Elder 				parse_rbd_opts_token, rbd_opts);
404ee57741cSAlex Elder 	if (IS_ERR(opt)) {
405ee57741cSAlex Elder 		ret = PTR_ERR(opt);
40659c2be1eSYehuda Sadeh 		goto done_err;
407ee57741cSAlex Elder 	}
408602adf40SYehuda Sadeh 
409432b8587SAlex Elder 	spin_lock(&rbd_client_list_lock);
410602adf40SYehuda Sadeh 	rbdc = __rbd_client_find(opt);
411602adf40SYehuda Sadeh 	if (rbdc) {
412e6994d3dSAlex Elder 		/* using an existing client */
413e6994d3dSAlex Elder 		kref_get(&rbdc->kref);
414432b8587SAlex Elder 		spin_unlock(&rbd_client_list_lock);
415e6994d3dSAlex Elder 
416e6994d3dSAlex Elder 		rbd_dev->rbd_client = rbdc;
417e6994d3dSAlex Elder 
418602adf40SYehuda Sadeh 		ceph_destroy_options(opt);
41997bb59a0SAlex Elder 		kfree(rbd_opts);
420602adf40SYehuda Sadeh 
421602adf40SYehuda Sadeh 		return 0;
422602adf40SYehuda Sadeh 	}
423432b8587SAlex Elder 	spin_unlock(&rbd_client_list_lock);
424602adf40SYehuda Sadeh 
42559c2be1eSYehuda Sadeh 	rbdc = rbd_client_create(opt, rbd_opts);
426d97081b0SAlex Elder 
42759c2be1eSYehuda Sadeh 	if (IS_ERR(rbdc)) {
42859c2be1eSYehuda Sadeh 		ret = PTR_ERR(rbdc);
42959c2be1eSYehuda Sadeh 		goto done_err;
43059c2be1eSYehuda Sadeh 	}
431602adf40SYehuda Sadeh 
432602adf40SYehuda Sadeh 	rbd_dev->rbd_client = rbdc;
433602adf40SYehuda Sadeh 	return 0;
43459c2be1eSYehuda Sadeh done_err:
43559c2be1eSYehuda Sadeh 	kfree(rbd_opts);
43659c2be1eSYehuda Sadeh 	return ret;
437602adf40SYehuda Sadeh }
438602adf40SYehuda Sadeh 
439602adf40SYehuda Sadeh /*
440602adf40SYehuda Sadeh  * Destroy ceph client
441d23a4b3fSAlex Elder  *
442432b8587SAlex Elder  * Caller must hold rbd_client_list_lock.
443602adf40SYehuda Sadeh  */
444602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref)
445602adf40SYehuda Sadeh {
446602adf40SYehuda Sadeh 	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
447602adf40SYehuda Sadeh 
448602adf40SYehuda Sadeh 	dout("rbd_release_client %p\n", rbdc);
449602adf40SYehuda Sadeh 	list_del(&rbdc->node);
450602adf40SYehuda Sadeh 
451602adf40SYehuda Sadeh 	ceph_destroy_client(rbdc->client);
45259c2be1eSYehuda Sadeh 	kfree(rbdc->rbd_opts);
453602adf40SYehuda Sadeh 	kfree(rbdc);
454602adf40SYehuda Sadeh }
455602adf40SYehuda Sadeh 
456602adf40SYehuda Sadeh /*
457602adf40SYehuda Sadeh  * Drop reference to ceph client node. If it's not referenced anymore, release
458602adf40SYehuda Sadeh  * it.
459602adf40SYehuda Sadeh  */
460602adf40SYehuda Sadeh static void rbd_put_client(struct rbd_device *rbd_dev)
461602adf40SYehuda Sadeh {
462432b8587SAlex Elder 	spin_lock(&rbd_client_list_lock);
463602adf40SYehuda Sadeh 	kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
464432b8587SAlex Elder 	spin_unlock(&rbd_client_list_lock);
465602adf40SYehuda Sadeh 	rbd_dev->rbd_client = NULL;
466602adf40SYehuda Sadeh }
467602adf40SYehuda Sadeh 
4681fec7093SYehuda Sadeh /*
4691fec7093SYehuda Sadeh  * Destroy requests collection
4701fec7093SYehuda Sadeh  */
4711fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref)
4721fec7093SYehuda Sadeh {
4731fec7093SYehuda Sadeh 	struct rbd_req_coll *coll =
4741fec7093SYehuda Sadeh 		container_of(kref, struct rbd_req_coll, kref);
4751fec7093SYehuda Sadeh 
4761fec7093SYehuda Sadeh 	dout("rbd_coll_release %p\n", coll);
4771fec7093SYehuda Sadeh 	kfree(coll);
4781fec7093SYehuda Sadeh }
479602adf40SYehuda Sadeh 
480602adf40SYehuda Sadeh /*
481602adf40SYehuda Sadeh  * Create a new header structure, translate header format from the on-disk
482602adf40SYehuda Sadeh  * header.
483602adf40SYehuda Sadeh  */
484602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header,
485602adf40SYehuda Sadeh 				 struct rbd_image_header_ondisk *ondisk,
486602adf40SYehuda Sadeh 				 int allocated_snaps,
487602adf40SYehuda Sadeh 				 gfp_t gfp_flags)
488602adf40SYehuda Sadeh {
489602adf40SYehuda Sadeh 	int i;
490602adf40SYehuda Sadeh 	u32 snap_count = le32_to_cpu(ondisk->snap_count);
491602adf40SYehuda Sadeh 	int ret = -ENOMEM;
492602adf40SYehuda Sadeh 
49321079786SAlex Elder 	if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
49481e759fbSJosh Durgin 		return -ENXIO;
49581e759fbSJosh Durgin 
496602adf40SYehuda Sadeh 	init_rwsem(&header->snap_rwsem);
497602adf40SYehuda Sadeh 	header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
498602adf40SYehuda Sadeh 	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
49921079786SAlex Elder 				snap_count * sizeof (*ondisk),
500602adf40SYehuda Sadeh 				gfp_flags);
501602adf40SYehuda Sadeh 	if (!header->snapc)
502602adf40SYehuda Sadeh 		return -ENOMEM;
503602adf40SYehuda Sadeh 	if (snap_count) {
504602adf40SYehuda Sadeh 		header->snap_names = kmalloc(header->snap_names_len,
505602adf40SYehuda Sadeh 					     GFP_KERNEL);
506602adf40SYehuda Sadeh 		if (!header->snap_names)
507602adf40SYehuda Sadeh 			goto err_snapc;
508602adf40SYehuda Sadeh 		header->snap_sizes = kmalloc(snap_count * sizeof(u64),
509602adf40SYehuda Sadeh 					     GFP_KERNEL);
510602adf40SYehuda Sadeh 		if (!header->snap_sizes)
511602adf40SYehuda Sadeh 			goto err_names;
512602adf40SYehuda Sadeh 	} else {
513602adf40SYehuda Sadeh 		header->snap_names = NULL;
514602adf40SYehuda Sadeh 		header->snap_sizes = NULL;
515602adf40SYehuda Sadeh 	}
516602adf40SYehuda Sadeh 	memcpy(header->block_name, ondisk->block_name,
517602adf40SYehuda Sadeh 	       sizeof(ondisk->block_name));
518602adf40SYehuda Sadeh 
519602adf40SYehuda Sadeh 	header->image_size = le64_to_cpu(ondisk->image_size);
520602adf40SYehuda Sadeh 	header->obj_order = ondisk->options.order;
521602adf40SYehuda Sadeh 	header->crypt_type = ondisk->options.crypt_type;
522602adf40SYehuda Sadeh 	header->comp_type = ondisk->options.comp_type;
523602adf40SYehuda Sadeh 
524602adf40SYehuda Sadeh 	atomic_set(&header->snapc->nref, 1);
525602adf40SYehuda Sadeh 	header->snap_seq = le64_to_cpu(ondisk->snap_seq);
526602adf40SYehuda Sadeh 	header->snapc->num_snaps = snap_count;
527602adf40SYehuda Sadeh 	header->total_snaps = snap_count;
528602adf40SYehuda Sadeh 
52921079786SAlex Elder 	if (snap_count && allocated_snaps == snap_count) {
530602adf40SYehuda Sadeh 		for (i = 0; i < snap_count; i++) {
531602adf40SYehuda Sadeh 			header->snapc->snaps[i] =
532602adf40SYehuda Sadeh 				le64_to_cpu(ondisk->snaps[i].id);
533602adf40SYehuda Sadeh 			header->snap_sizes[i] =
534602adf40SYehuda Sadeh 				le64_to_cpu(ondisk->snaps[i].image_size);
535602adf40SYehuda Sadeh 		}
536602adf40SYehuda Sadeh 
537602adf40SYehuda Sadeh 		/* copy snapshot names */
538602adf40SYehuda Sadeh 		memcpy(header->snap_names, &ondisk->snaps[i],
539602adf40SYehuda Sadeh 			header->snap_names_len);
540602adf40SYehuda Sadeh 	}
541602adf40SYehuda Sadeh 
542602adf40SYehuda Sadeh 	return 0;
543602adf40SYehuda Sadeh 
544602adf40SYehuda Sadeh err_names:
545602adf40SYehuda Sadeh 	kfree(header->snap_names);
546602adf40SYehuda Sadeh err_snapc:
547602adf40SYehuda Sadeh 	kfree(header->snapc);
548602adf40SYehuda Sadeh 	return ret;
549602adf40SYehuda Sadeh }
550602adf40SYehuda Sadeh 
551602adf40SYehuda Sadeh static int snap_index(struct rbd_image_header *header, int snap_num)
552602adf40SYehuda Sadeh {
553602adf40SYehuda Sadeh 	return header->total_snaps - snap_num;
554602adf40SYehuda Sadeh }
555602adf40SYehuda Sadeh 
556602adf40SYehuda Sadeh static u64 cur_snap_id(struct rbd_device *rbd_dev)
557602adf40SYehuda Sadeh {
558602adf40SYehuda Sadeh 	struct rbd_image_header *header = &rbd_dev->header;
559602adf40SYehuda Sadeh 
560602adf40SYehuda Sadeh 	if (!rbd_dev->cur_snap)
561602adf40SYehuda Sadeh 		return 0;
562602adf40SYehuda Sadeh 
563602adf40SYehuda Sadeh 	return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
564602adf40SYehuda Sadeh }
565602adf40SYehuda Sadeh 
566602adf40SYehuda Sadeh static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
567602adf40SYehuda Sadeh 			u64 *seq, u64 *size)
568602adf40SYehuda Sadeh {
569602adf40SYehuda Sadeh 	int i;
570602adf40SYehuda Sadeh 	char *p = header->snap_names;
571602adf40SYehuda Sadeh 
572602adf40SYehuda Sadeh 	for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
573602adf40SYehuda Sadeh 		if (strcmp(snap_name, p) == 0)
574602adf40SYehuda Sadeh 			break;
575602adf40SYehuda Sadeh 	}
576602adf40SYehuda Sadeh 	if (i == header->total_snaps)
577602adf40SYehuda Sadeh 		return -ENOENT;
578602adf40SYehuda Sadeh 	if (seq)
579602adf40SYehuda Sadeh 		*seq = header->snapc->snaps[i];
580602adf40SYehuda Sadeh 
581602adf40SYehuda Sadeh 	if (size)
582602adf40SYehuda Sadeh 		*size = header->snap_sizes[i];
583602adf40SYehuda Sadeh 
584602adf40SYehuda Sadeh 	return i;
585602adf40SYehuda Sadeh }
586602adf40SYehuda Sadeh 
587cc9d734cSJosh Durgin static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
588602adf40SYehuda Sadeh {
589602adf40SYehuda Sadeh 	struct rbd_image_header *header = &dev->header;
590602adf40SYehuda Sadeh 	struct ceph_snap_context *snapc = header->snapc;
591602adf40SYehuda Sadeh 	int ret = -ENOENT;
592602adf40SYehuda Sadeh 
593cc9d734cSJosh Durgin 	BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
594cc9d734cSJosh Durgin 
595602adf40SYehuda Sadeh 	down_write(&header->snap_rwsem);
596602adf40SYehuda Sadeh 
597cc9d734cSJosh Durgin 	if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
598cc9d734cSJosh Durgin 		    sizeof (RBD_SNAP_HEAD_NAME))) {
599602adf40SYehuda Sadeh 		if (header->total_snaps)
600602adf40SYehuda Sadeh 			snapc->seq = header->snap_seq;
601602adf40SYehuda Sadeh 		else
602602adf40SYehuda Sadeh 			snapc->seq = 0;
603602adf40SYehuda Sadeh 		dev->cur_snap = 0;
604602adf40SYehuda Sadeh 		dev->read_only = 0;
605602adf40SYehuda Sadeh 		if (size)
606602adf40SYehuda Sadeh 			*size = header->image_size;
607602adf40SYehuda Sadeh 	} else {
608cc9d734cSJosh Durgin 		ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
609602adf40SYehuda Sadeh 		if (ret < 0)
610602adf40SYehuda Sadeh 			goto done;
611602adf40SYehuda Sadeh 
612602adf40SYehuda Sadeh 		dev->cur_snap = header->total_snaps - ret;
613602adf40SYehuda Sadeh 		dev->read_only = 1;
614602adf40SYehuda Sadeh 	}
615602adf40SYehuda Sadeh 
616602adf40SYehuda Sadeh 	ret = 0;
617602adf40SYehuda Sadeh done:
618602adf40SYehuda Sadeh 	up_write(&header->snap_rwsem);
619602adf40SYehuda Sadeh 	return ret;
620602adf40SYehuda Sadeh }
621602adf40SYehuda Sadeh 
622602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header)
623602adf40SYehuda Sadeh {
624602adf40SYehuda Sadeh 	kfree(header->snapc);
625602adf40SYehuda Sadeh 	kfree(header->snap_names);
626602adf40SYehuda Sadeh 	kfree(header->snap_sizes);
627602adf40SYehuda Sadeh }
628602adf40SYehuda Sadeh 
629602adf40SYehuda Sadeh /*
630602adf40SYehuda Sadeh  * get the actual striped segment name, offset and length
631602adf40SYehuda Sadeh  */
632602adf40SYehuda Sadeh static u64 rbd_get_segment(struct rbd_image_header *header,
633602adf40SYehuda Sadeh 			   const char *block_name,
634602adf40SYehuda Sadeh 			   u64 ofs, u64 len,
635602adf40SYehuda Sadeh 			   char *seg_name, u64 *segofs)
636602adf40SYehuda Sadeh {
637602adf40SYehuda Sadeh 	u64 seg = ofs >> header->obj_order;
638602adf40SYehuda Sadeh 
639602adf40SYehuda Sadeh 	if (seg_name)
640602adf40SYehuda Sadeh 		snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
641602adf40SYehuda Sadeh 			 "%s.%012llx", block_name, seg);
642602adf40SYehuda Sadeh 
643602adf40SYehuda Sadeh 	ofs = ofs & ((1 << header->obj_order) - 1);
644602adf40SYehuda Sadeh 	len = min_t(u64, len, (1 << header->obj_order) - ofs);
645602adf40SYehuda Sadeh 
646602adf40SYehuda Sadeh 	if (segofs)
647602adf40SYehuda Sadeh 		*segofs = ofs;
648602adf40SYehuda Sadeh 
649602adf40SYehuda Sadeh 	return len;
650602adf40SYehuda Sadeh }
651602adf40SYehuda Sadeh 
6521fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header,
6531fec7093SYehuda Sadeh 				u64 ofs, u64 len)
6541fec7093SYehuda Sadeh {
6551fec7093SYehuda Sadeh 	u64 start_seg = ofs >> header->obj_order;
6561fec7093SYehuda Sadeh 	u64 end_seg = (ofs + len - 1) >> header->obj_order;
6571fec7093SYehuda Sadeh 	return end_seg - start_seg + 1;
6581fec7093SYehuda Sadeh }
6591fec7093SYehuda Sadeh 
660602adf40SYehuda Sadeh /*
661029bcbd8SJosh Durgin  * returns the size of an object in the image
662029bcbd8SJosh Durgin  */
663029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header)
664029bcbd8SJosh Durgin {
665029bcbd8SJosh Durgin 	return 1 << header->obj_order;
666029bcbd8SJosh Durgin }
667029bcbd8SJosh Durgin 
668029bcbd8SJosh Durgin /*
669602adf40SYehuda Sadeh  * bio helpers
670602adf40SYehuda Sadeh  */
671602adf40SYehuda Sadeh 
672602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain)
673602adf40SYehuda Sadeh {
674602adf40SYehuda Sadeh 	struct bio *tmp;
675602adf40SYehuda Sadeh 
676602adf40SYehuda Sadeh 	while (chain) {
677602adf40SYehuda Sadeh 		tmp = chain;
678602adf40SYehuda Sadeh 		chain = chain->bi_next;
679602adf40SYehuda Sadeh 		bio_put(tmp);
680602adf40SYehuda Sadeh 	}
681602adf40SYehuda Sadeh }
682602adf40SYehuda Sadeh 
683602adf40SYehuda Sadeh /*
684602adf40SYehuda Sadeh  * zeros a bio chain, starting at specific offset
685602adf40SYehuda Sadeh  */
686602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs)
687602adf40SYehuda Sadeh {
688602adf40SYehuda Sadeh 	struct bio_vec *bv;
689602adf40SYehuda Sadeh 	unsigned long flags;
690602adf40SYehuda Sadeh 	void *buf;
691602adf40SYehuda Sadeh 	int i;
692602adf40SYehuda Sadeh 	int pos = 0;
693602adf40SYehuda Sadeh 
694602adf40SYehuda Sadeh 	while (chain) {
695602adf40SYehuda Sadeh 		bio_for_each_segment(bv, chain, i) {
696602adf40SYehuda Sadeh 			if (pos + bv->bv_len > start_ofs) {
697602adf40SYehuda Sadeh 				int remainder = max(start_ofs - pos, 0);
698602adf40SYehuda Sadeh 				buf = bvec_kmap_irq(bv, &flags);
699602adf40SYehuda Sadeh 				memset(buf + remainder, 0,
700602adf40SYehuda Sadeh 				       bv->bv_len - remainder);
70185b5aaa6SDan Carpenter 				bvec_kunmap_irq(buf, &flags);
702602adf40SYehuda Sadeh 			}
703602adf40SYehuda Sadeh 			pos += bv->bv_len;
704602adf40SYehuda Sadeh 		}
705602adf40SYehuda Sadeh 
706602adf40SYehuda Sadeh 		chain = chain->bi_next;
707602adf40SYehuda Sadeh 	}
708602adf40SYehuda Sadeh }
709602adf40SYehuda Sadeh 
710602adf40SYehuda Sadeh /*
711602adf40SYehuda Sadeh  * bio_chain_clone - clone a chain of bios up to a certain length.
712602adf40SYehuda Sadeh  * might return a bio_pair that will need to be released.
713602adf40SYehuda Sadeh  */
714602adf40SYehuda Sadeh static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
715602adf40SYehuda Sadeh 				   struct bio_pair **bp,
716602adf40SYehuda Sadeh 				   int len, gfp_t gfpmask)
717602adf40SYehuda Sadeh {
718602adf40SYehuda Sadeh 	struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
719602adf40SYehuda Sadeh 	int total = 0;
720602adf40SYehuda Sadeh 
721602adf40SYehuda Sadeh 	if (*bp) {
722602adf40SYehuda Sadeh 		bio_pair_release(*bp);
723602adf40SYehuda Sadeh 		*bp = NULL;
724602adf40SYehuda Sadeh 	}
725602adf40SYehuda Sadeh 
726602adf40SYehuda Sadeh 	while (old_chain && (total < len)) {
727602adf40SYehuda Sadeh 		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
728602adf40SYehuda Sadeh 		if (!tmp)
729602adf40SYehuda Sadeh 			goto err_out;
730602adf40SYehuda Sadeh 
731602adf40SYehuda Sadeh 		if (total + old_chain->bi_size > len) {
732602adf40SYehuda Sadeh 			struct bio_pair *bp;
733602adf40SYehuda Sadeh 
734602adf40SYehuda Sadeh 			/*
735602adf40SYehuda Sadeh 			 * this split can only happen with a single paged bio,
736602adf40SYehuda Sadeh 			 * split_bio will BUG_ON if this is not the case
737602adf40SYehuda Sadeh 			 */
738602adf40SYehuda Sadeh 			dout("bio_chain_clone split! total=%d remaining=%d"
739602adf40SYehuda Sadeh 			     "bi_size=%d\n",
740602adf40SYehuda Sadeh 			     (int)total, (int)len-total,
741602adf40SYehuda Sadeh 			     (int)old_chain->bi_size);
742602adf40SYehuda Sadeh 
743602adf40SYehuda Sadeh 			/* split the bio. We'll release it either in the next
744602adf40SYehuda Sadeh 			   call, or it will have to be released outside */
745602adf40SYehuda Sadeh 			bp = bio_split(old_chain, (len - total) / 512ULL);
746602adf40SYehuda Sadeh 			if (!bp)
747602adf40SYehuda Sadeh 				goto err_out;
748602adf40SYehuda Sadeh 
749602adf40SYehuda Sadeh 			__bio_clone(tmp, &bp->bio1);
750602adf40SYehuda Sadeh 
751602adf40SYehuda Sadeh 			*next = &bp->bio2;
752602adf40SYehuda Sadeh 		} else {
753602adf40SYehuda Sadeh 			__bio_clone(tmp, old_chain);
754602adf40SYehuda Sadeh 			*next = old_chain->bi_next;
755602adf40SYehuda Sadeh 		}
756602adf40SYehuda Sadeh 
757602adf40SYehuda Sadeh 		tmp->bi_bdev = NULL;
758602adf40SYehuda Sadeh 		gfpmask &= ~__GFP_WAIT;
759602adf40SYehuda Sadeh 		tmp->bi_next = NULL;
760602adf40SYehuda Sadeh 
761602adf40SYehuda Sadeh 		if (!new_chain) {
762602adf40SYehuda Sadeh 			new_chain = tail = tmp;
763602adf40SYehuda Sadeh 		} else {
764602adf40SYehuda Sadeh 			tail->bi_next = tmp;
765602adf40SYehuda Sadeh 			tail = tmp;
766602adf40SYehuda Sadeh 		}
767602adf40SYehuda Sadeh 		old_chain = old_chain->bi_next;
768602adf40SYehuda Sadeh 
769602adf40SYehuda Sadeh 		total += tmp->bi_size;
770602adf40SYehuda Sadeh 	}
771602adf40SYehuda Sadeh 
772602adf40SYehuda Sadeh 	BUG_ON(total < len);
773602adf40SYehuda Sadeh 
774602adf40SYehuda Sadeh 	if (tail)
775602adf40SYehuda Sadeh 		tail->bi_next = NULL;
776602adf40SYehuda Sadeh 
777602adf40SYehuda Sadeh 	*old = old_chain;
778602adf40SYehuda Sadeh 
779602adf40SYehuda Sadeh 	return new_chain;
780602adf40SYehuda Sadeh 
781602adf40SYehuda Sadeh err_out:
782602adf40SYehuda Sadeh 	dout("bio_chain_clone with err\n");
783602adf40SYehuda Sadeh 	bio_chain_put(new_chain);
784602adf40SYehuda Sadeh 	return NULL;
785602adf40SYehuda Sadeh }
786602adf40SYehuda Sadeh 
787602adf40SYehuda Sadeh /*
788602adf40SYehuda Sadeh  * helpers for osd request op vectors.
789602adf40SYehuda Sadeh  */
790602adf40SYehuda Sadeh static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
791602adf40SYehuda Sadeh 			    int num_ops,
792602adf40SYehuda Sadeh 			    int opcode,
793602adf40SYehuda Sadeh 			    u32 payload_len)
794602adf40SYehuda Sadeh {
795602adf40SYehuda Sadeh 	*ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
796602adf40SYehuda Sadeh 		       GFP_NOIO);
797602adf40SYehuda Sadeh 	if (!*ops)
798602adf40SYehuda Sadeh 		return -ENOMEM;
799602adf40SYehuda Sadeh 	(*ops)[0].op = opcode;
800602adf40SYehuda Sadeh 	/*
801602adf40SYehuda Sadeh 	 * op extent offset and length will be set later on
802602adf40SYehuda Sadeh 	 * in calc_raw_layout()
803602adf40SYehuda Sadeh 	 */
804602adf40SYehuda Sadeh 	(*ops)[0].payload_len = payload_len;
805602adf40SYehuda Sadeh 	return 0;
806602adf40SYehuda Sadeh }
807602adf40SYehuda Sadeh 
808602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
809602adf40SYehuda Sadeh {
810602adf40SYehuda Sadeh 	kfree(ops);
811602adf40SYehuda Sadeh }
812602adf40SYehuda Sadeh 
8131fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq,
8141fec7093SYehuda Sadeh 				   struct rbd_req_coll *coll,
8151fec7093SYehuda Sadeh 				   int index,
8161fec7093SYehuda Sadeh 				   int ret, u64 len)
8171fec7093SYehuda Sadeh {
8181fec7093SYehuda Sadeh 	struct request_queue *q;
8191fec7093SYehuda Sadeh 	int min, max, i;
8201fec7093SYehuda Sadeh 
8211fec7093SYehuda Sadeh 	dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n",
8221fec7093SYehuda Sadeh 	     coll, index, ret, len);
8231fec7093SYehuda Sadeh 
8241fec7093SYehuda Sadeh 	if (!rq)
8251fec7093SYehuda Sadeh 		return;
8261fec7093SYehuda Sadeh 
8271fec7093SYehuda Sadeh 	if (!coll) {
8281fec7093SYehuda Sadeh 		blk_end_request(rq, ret, len);
8291fec7093SYehuda Sadeh 		return;
8301fec7093SYehuda Sadeh 	}
8311fec7093SYehuda Sadeh 
8321fec7093SYehuda Sadeh 	q = rq->q;
8331fec7093SYehuda Sadeh 
8341fec7093SYehuda Sadeh 	spin_lock_irq(q->queue_lock);
8351fec7093SYehuda Sadeh 	coll->status[index].done = 1;
8361fec7093SYehuda Sadeh 	coll->status[index].rc = ret;
8371fec7093SYehuda Sadeh 	coll->status[index].bytes = len;
8381fec7093SYehuda Sadeh 	max = min = coll->num_done;
8391fec7093SYehuda Sadeh 	while (max < coll->total && coll->status[max].done)
8401fec7093SYehuda Sadeh 		max++;
8411fec7093SYehuda Sadeh 
8421fec7093SYehuda Sadeh 	for (i = min; i<max; i++) {
8431fec7093SYehuda Sadeh 		__blk_end_request(rq, coll->status[i].rc,
8441fec7093SYehuda Sadeh 				  coll->status[i].bytes);
8451fec7093SYehuda Sadeh 		coll->num_done++;
8461fec7093SYehuda Sadeh 		kref_put(&coll->kref, rbd_coll_release);
8471fec7093SYehuda Sadeh 	}
8481fec7093SYehuda Sadeh 	spin_unlock_irq(q->queue_lock);
8491fec7093SYehuda Sadeh }
8501fec7093SYehuda Sadeh 
8511fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req,
8521fec7093SYehuda Sadeh 			     int ret, u64 len)
8531fec7093SYehuda Sadeh {
8541fec7093SYehuda Sadeh 	rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
8551fec7093SYehuda Sadeh }
8561fec7093SYehuda Sadeh 
857602adf40SYehuda Sadeh /*
858602adf40SYehuda Sadeh  * Send ceph osd request
859602adf40SYehuda Sadeh  */
860602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq,
861602adf40SYehuda Sadeh 			  struct rbd_device *dev,
862602adf40SYehuda Sadeh 			  struct ceph_snap_context *snapc,
863602adf40SYehuda Sadeh 			  u64 snapid,
864602adf40SYehuda Sadeh 			  const char *obj, u64 ofs, u64 len,
865602adf40SYehuda Sadeh 			  struct bio *bio,
866602adf40SYehuda Sadeh 			  struct page **pages,
867602adf40SYehuda Sadeh 			  int num_pages,
868602adf40SYehuda Sadeh 			  int flags,
869602adf40SYehuda Sadeh 			  struct ceph_osd_req_op *ops,
870602adf40SYehuda Sadeh 			  int num_reply,
8711fec7093SYehuda Sadeh 			  struct rbd_req_coll *coll,
8721fec7093SYehuda Sadeh 			  int coll_index,
873602adf40SYehuda Sadeh 			  void (*rbd_cb)(struct ceph_osd_request *req,
87459c2be1eSYehuda Sadeh 					 struct ceph_msg *msg),
87559c2be1eSYehuda Sadeh 			  struct ceph_osd_request **linger_req,
87659c2be1eSYehuda Sadeh 			  u64 *ver)
877602adf40SYehuda Sadeh {
878602adf40SYehuda Sadeh 	struct ceph_osd_request *req;
879602adf40SYehuda Sadeh 	struct ceph_file_layout *layout;
880602adf40SYehuda Sadeh 	int ret;
881602adf40SYehuda Sadeh 	u64 bno;
882602adf40SYehuda Sadeh 	struct timespec mtime = CURRENT_TIME;
883602adf40SYehuda Sadeh 	struct rbd_request *req_data;
884602adf40SYehuda Sadeh 	struct ceph_osd_request_head *reqhead;
885602adf40SYehuda Sadeh 	struct rbd_image_header *header = &dev->header;
8861dbb4399SAlex Elder 	struct ceph_osd_client *osdc;
887602adf40SYehuda Sadeh 
888602adf40SYehuda Sadeh 	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
8891fec7093SYehuda Sadeh 	if (!req_data) {
8901fec7093SYehuda Sadeh 		if (coll)
8911fec7093SYehuda Sadeh 			rbd_coll_end_req_index(rq, coll, coll_index,
8921fec7093SYehuda Sadeh 					       -ENOMEM, len);
8931fec7093SYehuda Sadeh 		return -ENOMEM;
8941fec7093SYehuda Sadeh 	}
895602adf40SYehuda Sadeh 
8961fec7093SYehuda Sadeh 	if (coll) {
8971fec7093SYehuda Sadeh 		req_data->coll = coll;
8981fec7093SYehuda Sadeh 		req_data->coll_index = coll_index;
8991fec7093SYehuda Sadeh 	}
9001fec7093SYehuda Sadeh 
9011fec7093SYehuda Sadeh 	dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
902602adf40SYehuda Sadeh 
903602adf40SYehuda Sadeh 	down_read(&header->snap_rwsem);
904602adf40SYehuda Sadeh 
9051dbb4399SAlex Elder 	osdc = &dev->rbd_client->client->osdc;
9061dbb4399SAlex Elder 	req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
9071dbb4399SAlex Elder 					false, GFP_NOIO, pages, bio);
9084ad12621SSage Weil 	if (!req) {
909602adf40SYehuda Sadeh 		up_read(&header->snap_rwsem);
9104ad12621SSage Weil 		ret = -ENOMEM;
911602adf40SYehuda Sadeh 		goto done_pages;
912602adf40SYehuda Sadeh 	}
913602adf40SYehuda Sadeh 
914602adf40SYehuda Sadeh 	req->r_callback = rbd_cb;
915602adf40SYehuda Sadeh 
916602adf40SYehuda Sadeh 	req_data->rq = rq;
917602adf40SYehuda Sadeh 	req_data->bio = bio;
918602adf40SYehuda Sadeh 	req_data->pages = pages;
919602adf40SYehuda Sadeh 	req_data->len = len;
920602adf40SYehuda Sadeh 
921602adf40SYehuda Sadeh 	req->r_priv = req_data;
922602adf40SYehuda Sadeh 
923602adf40SYehuda Sadeh 	reqhead = req->r_request->front.iov_base;
924602adf40SYehuda Sadeh 	reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
925602adf40SYehuda Sadeh 
926602adf40SYehuda Sadeh 	strncpy(req->r_oid, obj, sizeof(req->r_oid));
927602adf40SYehuda Sadeh 	req->r_oid_len = strlen(req->r_oid);
928602adf40SYehuda Sadeh 
929602adf40SYehuda Sadeh 	layout = &req->r_file_layout;
930602adf40SYehuda Sadeh 	memset(layout, 0, sizeof(*layout));
931602adf40SYehuda Sadeh 	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
932602adf40SYehuda Sadeh 	layout->fl_stripe_count = cpu_to_le32(1);
933602adf40SYehuda Sadeh 	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
934602adf40SYehuda Sadeh 	layout->fl_pg_preferred = cpu_to_le32(-1);
935602adf40SYehuda Sadeh 	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
9361dbb4399SAlex Elder 	ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
9371dbb4399SAlex Elder 				req, ops);
938602adf40SYehuda Sadeh 
939602adf40SYehuda Sadeh 	ceph_osdc_build_request(req, ofs, &len,
940602adf40SYehuda Sadeh 				ops,
941602adf40SYehuda Sadeh 				snapc,
942602adf40SYehuda Sadeh 				&mtime,
943602adf40SYehuda Sadeh 				req->r_oid, req->r_oid_len);
944602adf40SYehuda Sadeh 	up_read(&header->snap_rwsem);
945602adf40SYehuda Sadeh 
94659c2be1eSYehuda Sadeh 	if (linger_req) {
9471dbb4399SAlex Elder 		ceph_osdc_set_request_linger(osdc, req);
94859c2be1eSYehuda Sadeh 		*linger_req = req;
94959c2be1eSYehuda Sadeh 	}
95059c2be1eSYehuda Sadeh 
9511dbb4399SAlex Elder 	ret = ceph_osdc_start_request(osdc, req, false);
952602adf40SYehuda Sadeh 	if (ret < 0)
953602adf40SYehuda Sadeh 		goto done_err;
954602adf40SYehuda Sadeh 
955602adf40SYehuda Sadeh 	if (!rbd_cb) {
9561dbb4399SAlex Elder 		ret = ceph_osdc_wait_request(osdc, req);
95759c2be1eSYehuda Sadeh 		if (ver)
95859c2be1eSYehuda Sadeh 			*ver = le64_to_cpu(req->r_reassert_version.version);
9591fec7093SYehuda Sadeh 		dout("reassert_ver=%lld\n",
9601fec7093SYehuda Sadeh 		     le64_to_cpu(req->r_reassert_version.version));
961602adf40SYehuda Sadeh 		ceph_osdc_put_request(req);
962602adf40SYehuda Sadeh 	}
963602adf40SYehuda Sadeh 	return ret;
964602adf40SYehuda Sadeh 
965602adf40SYehuda Sadeh done_err:
966602adf40SYehuda Sadeh 	bio_chain_put(req_data->bio);
967602adf40SYehuda Sadeh 	ceph_osdc_put_request(req);
968602adf40SYehuda Sadeh done_pages:
9691fec7093SYehuda Sadeh 	rbd_coll_end_req(req_data, ret, len);
970602adf40SYehuda Sadeh 	kfree(req_data);
971602adf40SYehuda Sadeh 	return ret;
972602adf40SYehuda Sadeh }
973602adf40SYehuda Sadeh 
974602adf40SYehuda Sadeh /*
975602adf40SYehuda Sadeh  * Ceph osd op callback
976602adf40SYehuda Sadeh  */
977602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
978602adf40SYehuda Sadeh {
979602adf40SYehuda Sadeh 	struct rbd_request *req_data = req->r_priv;
980602adf40SYehuda Sadeh 	struct ceph_osd_reply_head *replyhead;
981602adf40SYehuda Sadeh 	struct ceph_osd_op *op;
982602adf40SYehuda Sadeh 	__s32 rc;
983602adf40SYehuda Sadeh 	u64 bytes;
984602adf40SYehuda Sadeh 	int read_op;
985602adf40SYehuda Sadeh 
986602adf40SYehuda Sadeh 	/* parse reply */
987602adf40SYehuda Sadeh 	replyhead = msg->front.iov_base;
988602adf40SYehuda Sadeh 	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
989602adf40SYehuda Sadeh 	op = (void *)(replyhead + 1);
990602adf40SYehuda Sadeh 	rc = le32_to_cpu(replyhead->result);
991602adf40SYehuda Sadeh 	bytes = le64_to_cpu(op->extent.length);
992602adf40SYehuda Sadeh 	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
993602adf40SYehuda Sadeh 
994602adf40SYehuda Sadeh 	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
995602adf40SYehuda Sadeh 
996602adf40SYehuda Sadeh 	if (rc == -ENOENT && read_op) {
997602adf40SYehuda Sadeh 		zero_bio_chain(req_data->bio, 0);
998602adf40SYehuda Sadeh 		rc = 0;
999602adf40SYehuda Sadeh 	} else if (rc == 0 && read_op && bytes < req_data->len) {
1000602adf40SYehuda Sadeh 		zero_bio_chain(req_data->bio, bytes);
1001602adf40SYehuda Sadeh 		bytes = req_data->len;
1002602adf40SYehuda Sadeh 	}
1003602adf40SYehuda Sadeh 
10041fec7093SYehuda Sadeh 	rbd_coll_end_req(req_data, rc, bytes);
1005602adf40SYehuda Sadeh 
1006602adf40SYehuda Sadeh 	if (req_data->bio)
1007602adf40SYehuda Sadeh 		bio_chain_put(req_data->bio);
1008602adf40SYehuda Sadeh 
1009602adf40SYehuda Sadeh 	ceph_osdc_put_request(req);
1010602adf40SYehuda Sadeh 	kfree(req_data);
1011602adf40SYehuda Sadeh }
1012602adf40SYehuda Sadeh 
101359c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
101459c2be1eSYehuda Sadeh {
101559c2be1eSYehuda Sadeh 	ceph_osdc_put_request(req);
101659c2be1eSYehuda Sadeh }
101759c2be1eSYehuda Sadeh 
1018602adf40SYehuda Sadeh /*
1019602adf40SYehuda Sadeh  * Do a synchronous ceph osd operation
1020602adf40SYehuda Sadeh  */
1021602adf40SYehuda Sadeh static int rbd_req_sync_op(struct rbd_device *dev,
1022602adf40SYehuda Sadeh 			   struct ceph_snap_context *snapc,
1023602adf40SYehuda Sadeh 			   u64 snapid,
1024602adf40SYehuda Sadeh 			   int opcode,
1025602adf40SYehuda Sadeh 			   int flags,
1026602adf40SYehuda Sadeh 			   struct ceph_osd_req_op *orig_ops,
1027602adf40SYehuda Sadeh 			   int num_reply,
1028602adf40SYehuda Sadeh 			   const char *obj,
1029602adf40SYehuda Sadeh 			   u64 ofs, u64 len,
103059c2be1eSYehuda Sadeh 			   char *buf,
103159c2be1eSYehuda Sadeh 			   struct ceph_osd_request **linger_req,
103259c2be1eSYehuda Sadeh 			   u64 *ver)
1033602adf40SYehuda Sadeh {
1034602adf40SYehuda Sadeh 	int ret;
1035602adf40SYehuda Sadeh 	struct page **pages;
1036602adf40SYehuda Sadeh 	int num_pages;
1037602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops = orig_ops;
1038602adf40SYehuda Sadeh 	u32 payload_len;
1039602adf40SYehuda Sadeh 
1040602adf40SYehuda Sadeh 	num_pages = calc_pages_for(ofs , len);
1041602adf40SYehuda Sadeh 	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
1042b8d0638aSDan Carpenter 	if (IS_ERR(pages))
1043b8d0638aSDan Carpenter 		return PTR_ERR(pages);
1044602adf40SYehuda Sadeh 
1045602adf40SYehuda Sadeh 	if (!orig_ops) {
1046602adf40SYehuda Sadeh 		payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
1047602adf40SYehuda Sadeh 		ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
1048602adf40SYehuda Sadeh 		if (ret < 0)
1049602adf40SYehuda Sadeh 			goto done;
1050602adf40SYehuda Sadeh 
1051602adf40SYehuda Sadeh 		if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
1052602adf40SYehuda Sadeh 			ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
1053602adf40SYehuda Sadeh 			if (ret < 0)
1054602adf40SYehuda Sadeh 				goto done_ops;
1055602adf40SYehuda Sadeh 		}
1056602adf40SYehuda Sadeh 	}
1057602adf40SYehuda Sadeh 
1058602adf40SYehuda Sadeh 	ret = rbd_do_request(NULL, dev, snapc, snapid,
1059602adf40SYehuda Sadeh 			  obj, ofs, len, NULL,
1060602adf40SYehuda Sadeh 			  pages, num_pages,
1061602adf40SYehuda Sadeh 			  flags,
1062602adf40SYehuda Sadeh 			  ops,
1063602adf40SYehuda Sadeh 			  2,
10641fec7093SYehuda Sadeh 			  NULL, 0,
106559c2be1eSYehuda Sadeh 			  NULL,
106659c2be1eSYehuda Sadeh 			  linger_req, ver);
1067602adf40SYehuda Sadeh 	if (ret < 0)
1068602adf40SYehuda Sadeh 		goto done_ops;
1069602adf40SYehuda Sadeh 
1070602adf40SYehuda Sadeh 	if ((flags & CEPH_OSD_FLAG_READ) && buf)
1071602adf40SYehuda Sadeh 		ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
1072602adf40SYehuda Sadeh 
1073602adf40SYehuda Sadeh done_ops:
1074602adf40SYehuda Sadeh 	if (!orig_ops)
1075602adf40SYehuda Sadeh 		rbd_destroy_ops(ops);
1076602adf40SYehuda Sadeh done:
1077602adf40SYehuda Sadeh 	ceph_release_page_vector(pages, num_pages);
1078602adf40SYehuda Sadeh 	return ret;
1079602adf40SYehuda Sadeh }
1080602adf40SYehuda Sadeh 
1081602adf40SYehuda Sadeh /*
1082602adf40SYehuda Sadeh  * Do an asynchronous ceph osd operation
1083602adf40SYehuda Sadeh  */
1084602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq,
1085602adf40SYehuda Sadeh 		     struct rbd_device *rbd_dev ,
1086602adf40SYehuda Sadeh 		     struct ceph_snap_context *snapc,
1087602adf40SYehuda Sadeh 		     u64 snapid,
1088602adf40SYehuda Sadeh 		     int opcode, int flags, int num_reply,
1089602adf40SYehuda Sadeh 		     u64 ofs, u64 len,
10901fec7093SYehuda Sadeh 		     struct bio *bio,
10911fec7093SYehuda Sadeh 		     struct rbd_req_coll *coll,
10921fec7093SYehuda Sadeh 		     int coll_index)
1093602adf40SYehuda Sadeh {
1094602adf40SYehuda Sadeh 	char *seg_name;
1095602adf40SYehuda Sadeh 	u64 seg_ofs;
1096602adf40SYehuda Sadeh 	u64 seg_len;
1097602adf40SYehuda Sadeh 	int ret;
1098602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops;
1099602adf40SYehuda Sadeh 	u32 payload_len;
1100602adf40SYehuda Sadeh 
1101602adf40SYehuda Sadeh 	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
1102602adf40SYehuda Sadeh 	if (!seg_name)
1103602adf40SYehuda Sadeh 		return -ENOMEM;
1104602adf40SYehuda Sadeh 
1105602adf40SYehuda Sadeh 	seg_len = rbd_get_segment(&rbd_dev->header,
1106602adf40SYehuda Sadeh 				  rbd_dev->header.block_name,
1107602adf40SYehuda Sadeh 				  ofs, len,
1108602adf40SYehuda Sadeh 				  seg_name, &seg_ofs);
1109602adf40SYehuda Sadeh 
1110602adf40SYehuda Sadeh 	payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
1111602adf40SYehuda Sadeh 
1112602adf40SYehuda Sadeh 	ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
1113602adf40SYehuda Sadeh 	if (ret < 0)
1114602adf40SYehuda Sadeh 		goto done;
1115602adf40SYehuda Sadeh 
1116602adf40SYehuda Sadeh 	/* we've taken care of segment sizes earlier when we
1117602adf40SYehuda Sadeh 	   cloned the bios. We should never have a segment
1118602adf40SYehuda Sadeh 	   truncated at this point */
1119602adf40SYehuda Sadeh 	BUG_ON(seg_len < len);
1120602adf40SYehuda Sadeh 
1121602adf40SYehuda Sadeh 	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
1122602adf40SYehuda Sadeh 			     seg_name, seg_ofs, seg_len,
1123602adf40SYehuda Sadeh 			     bio,
1124602adf40SYehuda Sadeh 			     NULL, 0,
1125602adf40SYehuda Sadeh 			     flags,
1126602adf40SYehuda Sadeh 			     ops,
1127602adf40SYehuda Sadeh 			     num_reply,
11281fec7093SYehuda Sadeh 			     coll, coll_index,
112959c2be1eSYehuda Sadeh 			     rbd_req_cb, 0, NULL);
113011f77002SSage Weil 
113111f77002SSage Weil 	rbd_destroy_ops(ops);
1132602adf40SYehuda Sadeh done:
1133602adf40SYehuda Sadeh 	kfree(seg_name);
1134602adf40SYehuda Sadeh 	return ret;
1135602adf40SYehuda Sadeh }
1136602adf40SYehuda Sadeh 
1137602adf40SYehuda Sadeh /*
1138602adf40SYehuda Sadeh  * Request async osd write
1139602adf40SYehuda Sadeh  */
1140602adf40SYehuda Sadeh static int rbd_req_write(struct request *rq,
1141602adf40SYehuda Sadeh 			 struct rbd_device *rbd_dev,
1142602adf40SYehuda Sadeh 			 struct ceph_snap_context *snapc,
1143602adf40SYehuda Sadeh 			 u64 ofs, u64 len,
11441fec7093SYehuda Sadeh 			 struct bio *bio,
11451fec7093SYehuda Sadeh 			 struct rbd_req_coll *coll,
11461fec7093SYehuda Sadeh 			 int coll_index)
1147602adf40SYehuda Sadeh {
1148602adf40SYehuda Sadeh 	return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
1149602adf40SYehuda Sadeh 			 CEPH_OSD_OP_WRITE,
1150602adf40SYehuda Sadeh 			 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1151602adf40SYehuda Sadeh 			 2,
11521fec7093SYehuda Sadeh 			 ofs, len, bio, coll, coll_index);
1153602adf40SYehuda Sadeh }
1154602adf40SYehuda Sadeh 
1155602adf40SYehuda Sadeh /*
1156602adf40SYehuda Sadeh  * Request async osd read
1157602adf40SYehuda Sadeh  */
1158602adf40SYehuda Sadeh static int rbd_req_read(struct request *rq,
1159602adf40SYehuda Sadeh 			 struct rbd_device *rbd_dev,
1160602adf40SYehuda Sadeh 			 u64 snapid,
1161602adf40SYehuda Sadeh 			 u64 ofs, u64 len,
11621fec7093SYehuda Sadeh 			 struct bio *bio,
11631fec7093SYehuda Sadeh 			 struct rbd_req_coll *coll,
11641fec7093SYehuda Sadeh 			 int coll_index)
1165602adf40SYehuda Sadeh {
1166602adf40SYehuda Sadeh 	return rbd_do_op(rq, rbd_dev, NULL,
1167602adf40SYehuda Sadeh 			 (snapid ? snapid : CEPH_NOSNAP),
1168602adf40SYehuda Sadeh 			 CEPH_OSD_OP_READ,
1169602adf40SYehuda Sadeh 			 CEPH_OSD_FLAG_READ,
1170602adf40SYehuda Sadeh 			 2,
11711fec7093SYehuda Sadeh 			 ofs, len, bio, coll, coll_index);
1172602adf40SYehuda Sadeh }
1173602adf40SYehuda Sadeh 
1174602adf40SYehuda Sadeh /*
1175602adf40SYehuda Sadeh  * Request sync osd read
1176602adf40SYehuda Sadeh  */
1177602adf40SYehuda Sadeh static int rbd_req_sync_read(struct rbd_device *dev,
1178602adf40SYehuda Sadeh 			  struct ceph_snap_context *snapc,
1179602adf40SYehuda Sadeh 			  u64 snapid,
1180602adf40SYehuda Sadeh 			  const char *obj,
1181602adf40SYehuda Sadeh 			  u64 ofs, u64 len,
118259c2be1eSYehuda Sadeh 			  char *buf,
118359c2be1eSYehuda Sadeh 			  u64 *ver)
1184602adf40SYehuda Sadeh {
1185602adf40SYehuda Sadeh 	return rbd_req_sync_op(dev, NULL,
1186602adf40SYehuda Sadeh 			       (snapid ? snapid : CEPH_NOSNAP),
1187602adf40SYehuda Sadeh 			       CEPH_OSD_OP_READ,
1188602adf40SYehuda Sadeh 			       CEPH_OSD_FLAG_READ,
1189602adf40SYehuda Sadeh 			       NULL,
119059c2be1eSYehuda Sadeh 			       1, obj, ofs, len, buf, NULL, ver);
1191602adf40SYehuda Sadeh }
1192602adf40SYehuda Sadeh 
1193602adf40SYehuda Sadeh /*
119459c2be1eSYehuda Sadeh  * Request sync osd watch
119559c2be1eSYehuda Sadeh  */
119659c2be1eSYehuda Sadeh static int rbd_req_sync_notify_ack(struct rbd_device *dev,
119759c2be1eSYehuda Sadeh 				   u64 ver,
119859c2be1eSYehuda Sadeh 				   u64 notify_id,
119959c2be1eSYehuda Sadeh 				   const char *obj)
120059c2be1eSYehuda Sadeh {
120159c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
120259c2be1eSYehuda Sadeh 	struct page **pages = NULL;
120311f77002SSage Weil 	int ret;
120411f77002SSage Weil 
120511f77002SSage Weil 	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
120659c2be1eSYehuda Sadeh 	if (ret < 0)
120759c2be1eSYehuda Sadeh 		return ret;
120859c2be1eSYehuda Sadeh 
120959c2be1eSYehuda Sadeh 	ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
121059c2be1eSYehuda Sadeh 	ops[0].watch.cookie = notify_id;
121159c2be1eSYehuda Sadeh 	ops[0].watch.flag = 0;
121259c2be1eSYehuda Sadeh 
121359c2be1eSYehuda Sadeh 	ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
121459c2be1eSYehuda Sadeh 			  obj, 0, 0, NULL,
121559c2be1eSYehuda Sadeh 			  pages, 0,
121659c2be1eSYehuda Sadeh 			  CEPH_OSD_FLAG_READ,
121759c2be1eSYehuda Sadeh 			  ops,
121859c2be1eSYehuda Sadeh 			  1,
12191fec7093SYehuda Sadeh 			  NULL, 0,
122059c2be1eSYehuda Sadeh 			  rbd_simple_req_cb, 0, NULL);
122159c2be1eSYehuda Sadeh 
122259c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
122359c2be1eSYehuda Sadeh 	return ret;
122459c2be1eSYehuda Sadeh }
122559c2be1eSYehuda Sadeh 
122659c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
122759c2be1eSYehuda Sadeh {
122859c2be1eSYehuda Sadeh 	struct rbd_device *dev = (struct rbd_device *)data;
122913143d2dSSage Weil 	int rc;
123013143d2dSSage Weil 
123159c2be1eSYehuda Sadeh 	if (!dev)
123259c2be1eSYehuda Sadeh 		return;
123359c2be1eSYehuda Sadeh 
123459c2be1eSYehuda Sadeh 	dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
123559c2be1eSYehuda Sadeh 		notify_id, (int)opcode);
123659c2be1eSYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
123713143d2dSSage Weil 	rc = __rbd_update_snaps(dev);
123859c2be1eSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
123913143d2dSSage Weil 	if (rc)
1240f0f8cef5SAlex Elder 		pr_warning(RBD_DRV_NAME "%d got notification but failed to "
1241f0f8cef5SAlex Elder 			   " update snaps: %d\n", dev->major, rc);
124259c2be1eSYehuda Sadeh 
124359c2be1eSYehuda Sadeh 	rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
124459c2be1eSYehuda Sadeh }
124559c2be1eSYehuda Sadeh 
124659c2be1eSYehuda Sadeh /*
124759c2be1eSYehuda Sadeh  * Request sync osd watch
124859c2be1eSYehuda Sadeh  */
124959c2be1eSYehuda Sadeh static int rbd_req_sync_watch(struct rbd_device *dev,
125059c2be1eSYehuda Sadeh 			      const char *obj,
125159c2be1eSYehuda Sadeh 			      u64 ver)
125259c2be1eSYehuda Sadeh {
125359c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
12541dbb4399SAlex Elder 	struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
125559c2be1eSYehuda Sadeh 
125659c2be1eSYehuda Sadeh 	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
125759c2be1eSYehuda Sadeh 	if (ret < 0)
125859c2be1eSYehuda Sadeh 		return ret;
125959c2be1eSYehuda Sadeh 
126059c2be1eSYehuda Sadeh 	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
126159c2be1eSYehuda Sadeh 				     (void *)dev, &dev->watch_event);
126259c2be1eSYehuda Sadeh 	if (ret < 0)
126359c2be1eSYehuda Sadeh 		goto fail;
126459c2be1eSYehuda Sadeh 
126559c2be1eSYehuda Sadeh 	ops[0].watch.ver = cpu_to_le64(ver);
126659c2be1eSYehuda Sadeh 	ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
126759c2be1eSYehuda Sadeh 	ops[0].watch.flag = 1;
126859c2be1eSYehuda Sadeh 
126959c2be1eSYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
127059c2be1eSYehuda Sadeh 			      CEPH_NOSNAP,
127159c2be1eSYehuda Sadeh 			      0,
127259c2be1eSYehuda Sadeh 			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
127359c2be1eSYehuda Sadeh 			      ops,
127459c2be1eSYehuda Sadeh 			      1, obj, 0, 0, NULL,
127559c2be1eSYehuda Sadeh 			      &dev->watch_request, NULL);
127659c2be1eSYehuda Sadeh 
127759c2be1eSYehuda Sadeh 	if (ret < 0)
127859c2be1eSYehuda Sadeh 		goto fail_event;
127959c2be1eSYehuda Sadeh 
128059c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
128159c2be1eSYehuda Sadeh 	return 0;
128259c2be1eSYehuda Sadeh 
128359c2be1eSYehuda Sadeh fail_event:
128459c2be1eSYehuda Sadeh 	ceph_osdc_cancel_event(dev->watch_event);
128559c2be1eSYehuda Sadeh 	dev->watch_event = NULL;
128659c2be1eSYehuda Sadeh fail:
128759c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
128859c2be1eSYehuda Sadeh 	return ret;
128959c2be1eSYehuda Sadeh }
129059c2be1eSYehuda Sadeh 
129179e3057cSYehuda Sadeh /*
129279e3057cSYehuda Sadeh  * Request sync osd unwatch
129379e3057cSYehuda Sadeh  */
129479e3057cSYehuda Sadeh static int rbd_req_sync_unwatch(struct rbd_device *dev,
129579e3057cSYehuda Sadeh 				const char *obj)
129679e3057cSYehuda Sadeh {
129779e3057cSYehuda Sadeh 	struct ceph_osd_req_op *ops;
129879e3057cSYehuda Sadeh 
129979e3057cSYehuda Sadeh 	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
130079e3057cSYehuda Sadeh 	if (ret < 0)
130179e3057cSYehuda Sadeh 		return ret;
130279e3057cSYehuda Sadeh 
130379e3057cSYehuda Sadeh 	ops[0].watch.ver = 0;
130479e3057cSYehuda Sadeh 	ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
130579e3057cSYehuda Sadeh 	ops[0].watch.flag = 0;
130679e3057cSYehuda Sadeh 
130779e3057cSYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
130879e3057cSYehuda Sadeh 			      CEPH_NOSNAP,
130979e3057cSYehuda Sadeh 			      0,
131079e3057cSYehuda Sadeh 			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
131179e3057cSYehuda Sadeh 			      ops,
131279e3057cSYehuda Sadeh 			      1, obj, 0, 0, NULL, NULL, NULL);
131379e3057cSYehuda Sadeh 
131479e3057cSYehuda Sadeh 	rbd_destroy_ops(ops);
131579e3057cSYehuda Sadeh 	ceph_osdc_cancel_event(dev->watch_event);
131679e3057cSYehuda Sadeh 	dev->watch_event = NULL;
131779e3057cSYehuda Sadeh 	return ret;
131879e3057cSYehuda Sadeh }
131979e3057cSYehuda Sadeh 
132059c2be1eSYehuda Sadeh struct rbd_notify_info {
132159c2be1eSYehuda Sadeh 	struct rbd_device *dev;
132259c2be1eSYehuda Sadeh };
132359c2be1eSYehuda Sadeh 
132459c2be1eSYehuda Sadeh static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
132559c2be1eSYehuda Sadeh {
132659c2be1eSYehuda Sadeh 	struct rbd_device *dev = (struct rbd_device *)data;
132759c2be1eSYehuda Sadeh 	if (!dev)
132859c2be1eSYehuda Sadeh 		return;
132959c2be1eSYehuda Sadeh 
133059c2be1eSYehuda Sadeh 	dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
133159c2be1eSYehuda Sadeh 		notify_id, (int)opcode);
133259c2be1eSYehuda Sadeh }
133359c2be1eSYehuda Sadeh 
133459c2be1eSYehuda Sadeh /*
133559c2be1eSYehuda Sadeh  * Request sync osd notify
133659c2be1eSYehuda Sadeh  */
133759c2be1eSYehuda Sadeh static int rbd_req_sync_notify(struct rbd_device *dev,
133859c2be1eSYehuda Sadeh 		          const char *obj)
133959c2be1eSYehuda Sadeh {
134059c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
13411dbb4399SAlex Elder 	struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
134259c2be1eSYehuda Sadeh 	struct ceph_osd_event *event;
134359c2be1eSYehuda Sadeh 	struct rbd_notify_info info;
134459c2be1eSYehuda Sadeh 	int payload_len = sizeof(u32) + sizeof(u32);
134559c2be1eSYehuda Sadeh 	int ret;
134659c2be1eSYehuda Sadeh 
134759c2be1eSYehuda Sadeh 	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len);
134859c2be1eSYehuda Sadeh 	if (ret < 0)
134959c2be1eSYehuda Sadeh 		return ret;
135059c2be1eSYehuda Sadeh 
135159c2be1eSYehuda Sadeh 	info.dev = dev;
135259c2be1eSYehuda Sadeh 
135359c2be1eSYehuda Sadeh 	ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
135459c2be1eSYehuda Sadeh 				     (void *)&info, &event);
135559c2be1eSYehuda Sadeh 	if (ret < 0)
135659c2be1eSYehuda Sadeh 		goto fail;
135759c2be1eSYehuda Sadeh 
135859c2be1eSYehuda Sadeh 	ops[0].watch.ver = 1;
135959c2be1eSYehuda Sadeh 	ops[0].watch.flag = 1;
136059c2be1eSYehuda Sadeh 	ops[0].watch.cookie = event->cookie;
136159c2be1eSYehuda Sadeh 	ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
136259c2be1eSYehuda Sadeh 	ops[0].watch.timeout = 12;
136359c2be1eSYehuda Sadeh 
136459c2be1eSYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
136559c2be1eSYehuda Sadeh 			       CEPH_NOSNAP,
136659c2be1eSYehuda Sadeh 			       0,
136759c2be1eSYehuda Sadeh 			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
136859c2be1eSYehuda Sadeh 			       ops,
136959c2be1eSYehuda Sadeh 			       1, obj, 0, 0, NULL, NULL, NULL);
137059c2be1eSYehuda Sadeh 	if (ret < 0)
137159c2be1eSYehuda Sadeh 		goto fail_event;
137259c2be1eSYehuda Sadeh 
137359c2be1eSYehuda Sadeh 	ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT);
137459c2be1eSYehuda Sadeh 	dout("ceph_osdc_wait_event returned %d\n", ret);
137559c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
137659c2be1eSYehuda Sadeh 	return 0;
137759c2be1eSYehuda Sadeh 
137859c2be1eSYehuda Sadeh fail_event:
137959c2be1eSYehuda Sadeh 	ceph_osdc_cancel_event(event);
138059c2be1eSYehuda Sadeh fail:
138159c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
138259c2be1eSYehuda Sadeh 	return ret;
138359c2be1eSYehuda Sadeh }
138459c2be1eSYehuda Sadeh 
138559c2be1eSYehuda Sadeh /*
1386602adf40SYehuda Sadeh  * Request sync osd read
1387602adf40SYehuda Sadeh  */
1388602adf40SYehuda Sadeh static int rbd_req_sync_exec(struct rbd_device *dev,
1389602adf40SYehuda Sadeh 			     const char *obj,
1390602adf40SYehuda Sadeh 			     const char *cls,
1391602adf40SYehuda Sadeh 			     const char *method,
1392602adf40SYehuda Sadeh 			     const char *data,
139359c2be1eSYehuda Sadeh 			     int len,
139459c2be1eSYehuda Sadeh 			     u64 *ver)
1395602adf40SYehuda Sadeh {
1396602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops;
1397602adf40SYehuda Sadeh 	int cls_len = strlen(cls);
1398602adf40SYehuda Sadeh 	int method_len = strlen(method);
1399602adf40SYehuda Sadeh 	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
1400602adf40SYehuda Sadeh 				    cls_len + method_len + len);
1401602adf40SYehuda Sadeh 	if (ret < 0)
1402602adf40SYehuda Sadeh 		return ret;
1403602adf40SYehuda Sadeh 
1404602adf40SYehuda Sadeh 	ops[0].cls.class_name = cls;
1405602adf40SYehuda Sadeh 	ops[0].cls.class_len = (__u8)cls_len;
1406602adf40SYehuda Sadeh 	ops[0].cls.method_name = method;
1407602adf40SYehuda Sadeh 	ops[0].cls.method_len = (__u8)method_len;
1408602adf40SYehuda Sadeh 	ops[0].cls.argc = 0;
1409602adf40SYehuda Sadeh 	ops[0].cls.indata = data;
1410602adf40SYehuda Sadeh 	ops[0].cls.indata_len = len;
1411602adf40SYehuda Sadeh 
1412602adf40SYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
1413602adf40SYehuda Sadeh 			       CEPH_NOSNAP,
1414602adf40SYehuda Sadeh 			       0,
1415602adf40SYehuda Sadeh 			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1416602adf40SYehuda Sadeh 			       ops,
141759c2be1eSYehuda Sadeh 			       1, obj, 0, 0, NULL, NULL, ver);
1418602adf40SYehuda Sadeh 
1419602adf40SYehuda Sadeh 	rbd_destroy_ops(ops);
1420602adf40SYehuda Sadeh 
1421602adf40SYehuda Sadeh 	dout("cls_exec returned %d\n", ret);
1422602adf40SYehuda Sadeh 	return ret;
1423602adf40SYehuda Sadeh }
1424602adf40SYehuda Sadeh 
14251fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
14261fec7093SYehuda Sadeh {
14271fec7093SYehuda Sadeh 	struct rbd_req_coll *coll =
14281fec7093SYehuda Sadeh 			kzalloc(sizeof(struct rbd_req_coll) +
14291fec7093SYehuda Sadeh 			        sizeof(struct rbd_req_status) * num_reqs,
14301fec7093SYehuda Sadeh 				GFP_ATOMIC);
14311fec7093SYehuda Sadeh 
14321fec7093SYehuda Sadeh 	if (!coll)
14331fec7093SYehuda Sadeh 		return NULL;
14341fec7093SYehuda Sadeh 	coll->total = num_reqs;
14351fec7093SYehuda Sadeh 	kref_init(&coll->kref);
14361fec7093SYehuda Sadeh 	return coll;
14371fec7093SYehuda Sadeh }
14381fec7093SYehuda Sadeh 
1439602adf40SYehuda Sadeh /*
1440602adf40SYehuda Sadeh  * block device queue callback
1441602adf40SYehuda Sadeh  */
1442602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q)
1443602adf40SYehuda Sadeh {
1444602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = q->queuedata;
1445602adf40SYehuda Sadeh 	struct request *rq;
1446602adf40SYehuda Sadeh 	struct bio_pair *bp = NULL;
1447602adf40SYehuda Sadeh 
1448602adf40SYehuda Sadeh 	rq = blk_fetch_request(q);
1449602adf40SYehuda Sadeh 
1450602adf40SYehuda Sadeh 	while (1) {
1451602adf40SYehuda Sadeh 		struct bio *bio;
1452602adf40SYehuda Sadeh 		struct bio *rq_bio, *next_bio = NULL;
1453602adf40SYehuda Sadeh 		bool do_write;
1454602adf40SYehuda Sadeh 		int size, op_size = 0;
1455602adf40SYehuda Sadeh 		u64 ofs;
14561fec7093SYehuda Sadeh 		int num_segs, cur_seg = 0;
14571fec7093SYehuda Sadeh 		struct rbd_req_coll *coll;
1458602adf40SYehuda Sadeh 
1459602adf40SYehuda Sadeh 		/* peek at request from block layer */
1460602adf40SYehuda Sadeh 		if (!rq)
1461602adf40SYehuda Sadeh 			break;
1462602adf40SYehuda Sadeh 
1463602adf40SYehuda Sadeh 		dout("fetched request\n");
1464602adf40SYehuda Sadeh 
1465602adf40SYehuda Sadeh 		/* filter out block requests we don't understand */
1466602adf40SYehuda Sadeh 		if ((rq->cmd_type != REQ_TYPE_FS)) {
1467602adf40SYehuda Sadeh 			__blk_end_request_all(rq, 0);
1468602adf40SYehuda Sadeh 			goto next;
1469602adf40SYehuda Sadeh 		}
1470602adf40SYehuda Sadeh 
1471602adf40SYehuda Sadeh 		/* deduce our operation (read, write) */
1472602adf40SYehuda Sadeh 		do_write = (rq_data_dir(rq) == WRITE);
1473602adf40SYehuda Sadeh 
1474602adf40SYehuda Sadeh 		size = blk_rq_bytes(rq);
1475602adf40SYehuda Sadeh 		ofs = blk_rq_pos(rq) * 512ULL;
1476602adf40SYehuda Sadeh 		rq_bio = rq->bio;
1477602adf40SYehuda Sadeh 		if (do_write && rbd_dev->read_only) {
1478602adf40SYehuda Sadeh 			__blk_end_request_all(rq, -EROFS);
1479602adf40SYehuda Sadeh 			goto next;
1480602adf40SYehuda Sadeh 		}
1481602adf40SYehuda Sadeh 
1482602adf40SYehuda Sadeh 		spin_unlock_irq(q->queue_lock);
1483602adf40SYehuda Sadeh 
1484602adf40SYehuda Sadeh 		dout("%s 0x%x bytes at 0x%llx\n",
1485602adf40SYehuda Sadeh 		     do_write ? "write" : "read",
1486602adf40SYehuda Sadeh 		     size, blk_rq_pos(rq) * 512ULL);
1487602adf40SYehuda Sadeh 
14881fec7093SYehuda Sadeh 		num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
14891fec7093SYehuda Sadeh 		coll = rbd_alloc_coll(num_segs);
14901fec7093SYehuda Sadeh 		if (!coll) {
14911fec7093SYehuda Sadeh 			spin_lock_irq(q->queue_lock);
14921fec7093SYehuda Sadeh 			__blk_end_request_all(rq, -ENOMEM);
14931fec7093SYehuda Sadeh 			goto next;
14941fec7093SYehuda Sadeh 		}
14951fec7093SYehuda Sadeh 
1496602adf40SYehuda Sadeh 		do {
1497602adf40SYehuda Sadeh 			/* a bio clone to be passed down to OSD req */
1498602adf40SYehuda Sadeh 			dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
1499602adf40SYehuda Sadeh 			op_size = rbd_get_segment(&rbd_dev->header,
1500602adf40SYehuda Sadeh 						  rbd_dev->header.block_name,
1501602adf40SYehuda Sadeh 						  ofs, size,
1502602adf40SYehuda Sadeh 						  NULL, NULL);
15031fec7093SYehuda Sadeh 			kref_get(&coll->kref);
1504602adf40SYehuda Sadeh 			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
1505602adf40SYehuda Sadeh 					      op_size, GFP_ATOMIC);
1506602adf40SYehuda Sadeh 			if (!bio) {
15071fec7093SYehuda Sadeh 				rbd_coll_end_req_index(rq, coll, cur_seg,
15081fec7093SYehuda Sadeh 						       -ENOMEM, op_size);
15091fec7093SYehuda Sadeh 				goto next_seg;
1510602adf40SYehuda Sadeh 			}
1511602adf40SYehuda Sadeh 
15121fec7093SYehuda Sadeh 
1513602adf40SYehuda Sadeh 			/* init OSD command: write or read */
1514602adf40SYehuda Sadeh 			if (do_write)
1515602adf40SYehuda Sadeh 				rbd_req_write(rq, rbd_dev,
1516602adf40SYehuda Sadeh 					      rbd_dev->header.snapc,
1517602adf40SYehuda Sadeh 					      ofs,
15181fec7093SYehuda Sadeh 					      op_size, bio,
15191fec7093SYehuda Sadeh 					      coll, cur_seg);
1520602adf40SYehuda Sadeh 			else
1521602adf40SYehuda Sadeh 				rbd_req_read(rq, rbd_dev,
1522602adf40SYehuda Sadeh 					     cur_snap_id(rbd_dev),
1523602adf40SYehuda Sadeh 					     ofs,
15241fec7093SYehuda Sadeh 					     op_size, bio,
15251fec7093SYehuda Sadeh 					     coll, cur_seg);
1526602adf40SYehuda Sadeh 
15271fec7093SYehuda Sadeh next_seg:
1528602adf40SYehuda Sadeh 			size -= op_size;
1529602adf40SYehuda Sadeh 			ofs += op_size;
1530602adf40SYehuda Sadeh 
15311fec7093SYehuda Sadeh 			cur_seg++;
1532602adf40SYehuda Sadeh 			rq_bio = next_bio;
1533602adf40SYehuda Sadeh 		} while (size > 0);
15341fec7093SYehuda Sadeh 		kref_put(&coll->kref, rbd_coll_release);
1535602adf40SYehuda Sadeh 
1536602adf40SYehuda Sadeh 		if (bp)
1537602adf40SYehuda Sadeh 			bio_pair_release(bp);
1538602adf40SYehuda Sadeh 		spin_lock_irq(q->queue_lock);
1539602adf40SYehuda Sadeh next:
1540602adf40SYehuda Sadeh 		rq = blk_fetch_request(q);
1541602adf40SYehuda Sadeh 	}
1542602adf40SYehuda Sadeh }
1543602adf40SYehuda Sadeh 
1544602adf40SYehuda Sadeh /*
1545602adf40SYehuda Sadeh  * a queue callback. Makes sure that we don't create a bio that spans across
1546602adf40SYehuda Sadeh  * multiple osd objects. One exception would be with a single page bios,
1547602adf40SYehuda Sadeh  * which we handle later at bio_chain_clone
1548602adf40SYehuda Sadeh  */
1549602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
1550602adf40SYehuda Sadeh 			  struct bio_vec *bvec)
1551602adf40SYehuda Sadeh {
1552602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = q->queuedata;
1553602adf40SYehuda Sadeh 	unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9);
1554602adf40SYehuda Sadeh 	sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
1555602adf40SYehuda Sadeh 	unsigned int bio_sectors = bmd->bi_size >> 9;
1556602adf40SYehuda Sadeh 	int max;
1557602adf40SYehuda Sadeh 
1558602adf40SYehuda Sadeh 	max =  (chunk_sectors - ((sector & (chunk_sectors - 1))
1559602adf40SYehuda Sadeh 				 + bio_sectors)) << 9;
1560602adf40SYehuda Sadeh 	if (max < 0)
1561602adf40SYehuda Sadeh 		max = 0; /* bio_add cannot handle a negative return */
1562602adf40SYehuda Sadeh 	if (max <= bvec->bv_len && bio_sectors == 0)
1563602adf40SYehuda Sadeh 		return bvec->bv_len;
1564602adf40SYehuda Sadeh 	return max;
1565602adf40SYehuda Sadeh }
1566602adf40SYehuda Sadeh 
1567602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev)
1568602adf40SYehuda Sadeh {
1569602adf40SYehuda Sadeh 	struct gendisk *disk = rbd_dev->disk;
1570602adf40SYehuda Sadeh 
1571602adf40SYehuda Sadeh 	if (!disk)
1572602adf40SYehuda Sadeh 		return;
1573602adf40SYehuda Sadeh 
1574602adf40SYehuda Sadeh 	rbd_header_free(&rbd_dev->header);
1575602adf40SYehuda Sadeh 
1576602adf40SYehuda Sadeh 	if (disk->flags & GENHD_FL_UP)
1577602adf40SYehuda Sadeh 		del_gendisk(disk);
1578602adf40SYehuda Sadeh 	if (disk->queue)
1579602adf40SYehuda Sadeh 		blk_cleanup_queue(disk->queue);
1580602adf40SYehuda Sadeh 	put_disk(disk);
1581602adf40SYehuda Sadeh }
1582602adf40SYehuda Sadeh 
1583602adf40SYehuda Sadeh /*
1584602adf40SYehuda Sadeh  * reload the ondisk the header
1585602adf40SYehuda Sadeh  */
1586602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev,
1587602adf40SYehuda Sadeh 			   struct rbd_image_header *header)
1588602adf40SYehuda Sadeh {
1589602adf40SYehuda Sadeh 	ssize_t rc;
1590602adf40SYehuda Sadeh 	struct rbd_image_header_ondisk *dh;
1591602adf40SYehuda Sadeh 	int snap_count = 0;
1592602adf40SYehuda Sadeh 	u64 snap_names_len = 0;
159359c2be1eSYehuda Sadeh 	u64 ver;
1594602adf40SYehuda Sadeh 
1595602adf40SYehuda Sadeh 	while (1) {
1596602adf40SYehuda Sadeh 		int len = sizeof(*dh) +
1597602adf40SYehuda Sadeh 			  snap_count * sizeof(struct rbd_image_snap_ondisk) +
1598602adf40SYehuda Sadeh 			  snap_names_len;
1599602adf40SYehuda Sadeh 
1600602adf40SYehuda Sadeh 		rc = -ENOMEM;
1601602adf40SYehuda Sadeh 		dh = kmalloc(len, GFP_KERNEL);
1602602adf40SYehuda Sadeh 		if (!dh)
1603602adf40SYehuda Sadeh 			return -ENOMEM;
1604602adf40SYehuda Sadeh 
1605602adf40SYehuda Sadeh 		rc = rbd_req_sync_read(rbd_dev,
1606602adf40SYehuda Sadeh 				       NULL, CEPH_NOSNAP,
1607602adf40SYehuda Sadeh 				       rbd_dev->obj_md_name,
1608602adf40SYehuda Sadeh 				       0, len,
160959c2be1eSYehuda Sadeh 				       (char *)dh, &ver);
1610602adf40SYehuda Sadeh 		if (rc < 0)
1611602adf40SYehuda Sadeh 			goto out_dh;
1612602adf40SYehuda Sadeh 
1613602adf40SYehuda Sadeh 		rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
161481e759fbSJosh Durgin 		if (rc < 0) {
161581e759fbSJosh Durgin 			if (rc == -ENXIO) {
161681e759fbSJosh Durgin 				pr_warning("unrecognized header format"
161781e759fbSJosh Durgin 					   " for image %s", rbd_dev->obj);
161881e759fbSJosh Durgin 			}
1619602adf40SYehuda Sadeh 			goto out_dh;
162081e759fbSJosh Durgin 		}
1621602adf40SYehuda Sadeh 
1622602adf40SYehuda Sadeh 		if (snap_count != header->total_snaps) {
1623602adf40SYehuda Sadeh 			snap_count = header->total_snaps;
1624602adf40SYehuda Sadeh 			snap_names_len = header->snap_names_len;
1625602adf40SYehuda Sadeh 			rbd_header_free(header);
1626602adf40SYehuda Sadeh 			kfree(dh);
1627602adf40SYehuda Sadeh 			continue;
1628602adf40SYehuda Sadeh 		}
1629602adf40SYehuda Sadeh 		break;
1630602adf40SYehuda Sadeh 	}
163159c2be1eSYehuda Sadeh 	header->obj_version = ver;
1632602adf40SYehuda Sadeh 
1633602adf40SYehuda Sadeh out_dh:
1634602adf40SYehuda Sadeh 	kfree(dh);
1635602adf40SYehuda Sadeh 	return rc;
1636602adf40SYehuda Sadeh }
1637602adf40SYehuda Sadeh 
1638602adf40SYehuda Sadeh /*
1639602adf40SYehuda Sadeh  * create a snapshot
1640602adf40SYehuda Sadeh  */
1641602adf40SYehuda Sadeh static int rbd_header_add_snap(struct rbd_device *dev,
1642602adf40SYehuda Sadeh 			       const char *snap_name,
1643602adf40SYehuda Sadeh 			       gfp_t gfp_flags)
1644602adf40SYehuda Sadeh {
1645602adf40SYehuda Sadeh 	int name_len = strlen(snap_name);
1646602adf40SYehuda Sadeh 	u64 new_snapid;
1647602adf40SYehuda Sadeh 	int ret;
1648916d4d67SSage Weil 	void *data, *p, *e;
164959c2be1eSYehuda Sadeh 	u64 ver;
16501dbb4399SAlex Elder 	struct ceph_mon_client *monc;
1651602adf40SYehuda Sadeh 
1652602adf40SYehuda Sadeh 	/* we should create a snapshot only if we're pointing at the head */
1653602adf40SYehuda Sadeh 	if (dev->cur_snap)
1654602adf40SYehuda Sadeh 		return -EINVAL;
1655602adf40SYehuda Sadeh 
16561dbb4399SAlex Elder 	monc = &dev->rbd_client->client->monc;
16571dbb4399SAlex Elder 	ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
1658602adf40SYehuda Sadeh 	dout("created snapid=%lld\n", new_snapid);
1659602adf40SYehuda Sadeh 	if (ret < 0)
1660602adf40SYehuda Sadeh 		return ret;
1661602adf40SYehuda Sadeh 
1662602adf40SYehuda Sadeh 	data = kmalloc(name_len + 16, gfp_flags);
1663602adf40SYehuda Sadeh 	if (!data)
1664602adf40SYehuda Sadeh 		return -ENOMEM;
1665602adf40SYehuda Sadeh 
1666916d4d67SSage Weil 	p = data;
1667916d4d67SSage Weil 	e = data + name_len + 16;
1668602adf40SYehuda Sadeh 
1669916d4d67SSage Weil 	ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
1670916d4d67SSage Weil 	ceph_encode_64_safe(&p, e, new_snapid, bad);
1671602adf40SYehuda Sadeh 
1672602adf40SYehuda Sadeh 	ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
1673916d4d67SSage Weil 				data, p - data, &ver);
1674602adf40SYehuda Sadeh 
1675916d4d67SSage Weil 	kfree(data);
1676602adf40SYehuda Sadeh 
1677602adf40SYehuda Sadeh 	if (ret < 0)
1678602adf40SYehuda Sadeh 		return ret;
1679602adf40SYehuda Sadeh 
1680602adf40SYehuda Sadeh 	dev->header.snapc->seq =  new_snapid;
1681602adf40SYehuda Sadeh 
1682602adf40SYehuda Sadeh 	return 0;
1683602adf40SYehuda Sadeh bad:
1684602adf40SYehuda Sadeh 	return -ERANGE;
1685602adf40SYehuda Sadeh }
1686602adf40SYehuda Sadeh 
1687dfc5606dSYehuda Sadeh static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
1688dfc5606dSYehuda Sadeh {
1689dfc5606dSYehuda Sadeh 	struct rbd_snap *snap;
1690dfc5606dSYehuda Sadeh 
1691dfc5606dSYehuda Sadeh 	while (!list_empty(&rbd_dev->snaps)) {
1692dfc5606dSYehuda Sadeh 		snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
1693dfc5606dSYehuda Sadeh 		__rbd_remove_snap_dev(rbd_dev, snap);
1694dfc5606dSYehuda Sadeh 	}
1695dfc5606dSYehuda Sadeh }
1696dfc5606dSYehuda Sadeh 
1697602adf40SYehuda Sadeh /*
1698602adf40SYehuda Sadeh  * only read the first part of the ondisk header, without the snaps info
1699602adf40SYehuda Sadeh  */
1700dfc5606dSYehuda Sadeh static int __rbd_update_snaps(struct rbd_device *rbd_dev)
1701602adf40SYehuda Sadeh {
1702602adf40SYehuda Sadeh 	int ret;
1703602adf40SYehuda Sadeh 	struct rbd_image_header h;
1704602adf40SYehuda Sadeh 	u64 snap_seq;
170559c2be1eSYehuda Sadeh 	int follow_seq = 0;
1706602adf40SYehuda Sadeh 
1707602adf40SYehuda Sadeh 	ret = rbd_read_header(rbd_dev, &h);
1708602adf40SYehuda Sadeh 	if (ret < 0)
1709602adf40SYehuda Sadeh 		return ret;
1710602adf40SYehuda Sadeh 
17119db4b3e3SSage Weil 	/* resized? */
17129db4b3e3SSage Weil 	set_capacity(rbd_dev->disk, h.image_size / 512ULL);
17139db4b3e3SSage Weil 
1714602adf40SYehuda Sadeh 	down_write(&rbd_dev->header.snap_rwsem);
1715602adf40SYehuda Sadeh 
1716602adf40SYehuda Sadeh 	snap_seq = rbd_dev->header.snapc->seq;
171759c2be1eSYehuda Sadeh 	if (rbd_dev->header.total_snaps &&
171859c2be1eSYehuda Sadeh 	    rbd_dev->header.snapc->snaps[0] == snap_seq)
171959c2be1eSYehuda Sadeh 		/* pointing at the head, will need to follow that
172059c2be1eSYehuda Sadeh 		   if head moves */
172159c2be1eSYehuda Sadeh 		follow_seq = 1;
1722602adf40SYehuda Sadeh 
1723602adf40SYehuda Sadeh 	kfree(rbd_dev->header.snapc);
1724602adf40SYehuda Sadeh 	kfree(rbd_dev->header.snap_names);
1725602adf40SYehuda Sadeh 	kfree(rbd_dev->header.snap_sizes);
1726602adf40SYehuda Sadeh 
1727602adf40SYehuda Sadeh 	rbd_dev->header.total_snaps = h.total_snaps;
1728602adf40SYehuda Sadeh 	rbd_dev->header.snapc = h.snapc;
1729602adf40SYehuda Sadeh 	rbd_dev->header.snap_names = h.snap_names;
1730dfc5606dSYehuda Sadeh 	rbd_dev->header.snap_names_len = h.snap_names_len;
1731602adf40SYehuda Sadeh 	rbd_dev->header.snap_sizes = h.snap_sizes;
173259c2be1eSYehuda Sadeh 	if (follow_seq)
173359c2be1eSYehuda Sadeh 		rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
173459c2be1eSYehuda Sadeh 	else
1735602adf40SYehuda Sadeh 		rbd_dev->header.snapc->seq = snap_seq;
1736602adf40SYehuda Sadeh 
1737dfc5606dSYehuda Sadeh 	ret = __rbd_init_snaps_header(rbd_dev);
1738dfc5606dSYehuda Sadeh 
1739602adf40SYehuda Sadeh 	up_write(&rbd_dev->header.snap_rwsem);
1740602adf40SYehuda Sadeh 
1741dfc5606dSYehuda Sadeh 	return ret;
1742602adf40SYehuda Sadeh }
1743602adf40SYehuda Sadeh 
1744602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev)
1745602adf40SYehuda Sadeh {
1746602adf40SYehuda Sadeh 	struct gendisk *disk;
1747602adf40SYehuda Sadeh 	struct request_queue *q;
1748602adf40SYehuda Sadeh 	int rc;
1749602adf40SYehuda Sadeh 	u64 total_size = 0;
1750602adf40SYehuda Sadeh 
1751602adf40SYehuda Sadeh 	/* contact OSD, request size info about the object being mapped */
1752602adf40SYehuda Sadeh 	rc = rbd_read_header(rbd_dev, &rbd_dev->header);
1753602adf40SYehuda Sadeh 	if (rc)
1754602adf40SYehuda Sadeh 		return rc;
1755602adf40SYehuda Sadeh 
1756dfc5606dSYehuda Sadeh 	/* no need to lock here, as rbd_dev is not registered yet */
1757dfc5606dSYehuda Sadeh 	rc = __rbd_init_snaps_header(rbd_dev);
1758dfc5606dSYehuda Sadeh 	if (rc)
1759dfc5606dSYehuda Sadeh 		return rc;
1760dfc5606dSYehuda Sadeh 
1761cc9d734cSJosh Durgin 	rc = rbd_header_set_snap(rbd_dev, &total_size);
1762602adf40SYehuda Sadeh 	if (rc)
1763602adf40SYehuda Sadeh 		return rc;
1764602adf40SYehuda Sadeh 
1765602adf40SYehuda Sadeh 	/* create gendisk info */
1766602adf40SYehuda Sadeh 	rc = -ENOMEM;
1767602adf40SYehuda Sadeh 	disk = alloc_disk(RBD_MINORS_PER_MAJOR);
1768602adf40SYehuda Sadeh 	if (!disk)
1769602adf40SYehuda Sadeh 		goto out;
1770602adf40SYehuda Sadeh 
1771f0f8cef5SAlex Elder 	snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
1772aedfec59SSage Weil 		 rbd_dev->id);
1773602adf40SYehuda Sadeh 	disk->major = rbd_dev->major;
1774602adf40SYehuda Sadeh 	disk->first_minor = 0;
1775602adf40SYehuda Sadeh 	disk->fops = &rbd_bd_ops;
1776602adf40SYehuda Sadeh 	disk->private_data = rbd_dev;
1777602adf40SYehuda Sadeh 
1778602adf40SYehuda Sadeh 	/* init rq */
1779602adf40SYehuda Sadeh 	rc = -ENOMEM;
1780602adf40SYehuda Sadeh 	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
1781602adf40SYehuda Sadeh 	if (!q)
1782602adf40SYehuda Sadeh 		goto out_disk;
1783029bcbd8SJosh Durgin 
1784029bcbd8SJosh Durgin 	/* set io sizes to object size */
1785029bcbd8SJosh Durgin 	blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL);
1786029bcbd8SJosh Durgin 	blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header));
1787029bcbd8SJosh Durgin 	blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header));
1788029bcbd8SJosh Durgin 	blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header));
1789029bcbd8SJosh Durgin 
1790602adf40SYehuda Sadeh 	blk_queue_merge_bvec(q, rbd_merge_bvec);
1791602adf40SYehuda Sadeh 	disk->queue = q;
1792602adf40SYehuda Sadeh 
1793602adf40SYehuda Sadeh 	q->queuedata = rbd_dev;
1794602adf40SYehuda Sadeh 
1795602adf40SYehuda Sadeh 	rbd_dev->disk = disk;
1796602adf40SYehuda Sadeh 	rbd_dev->q = q;
1797602adf40SYehuda Sadeh 
1798602adf40SYehuda Sadeh 	/* finally, announce the disk to the world */
1799602adf40SYehuda Sadeh 	set_capacity(disk, total_size / 512ULL);
1800602adf40SYehuda Sadeh 	add_disk(disk);
1801602adf40SYehuda Sadeh 
1802602adf40SYehuda Sadeh 	pr_info("%s: added with size 0x%llx\n",
1803602adf40SYehuda Sadeh 		disk->disk_name, (unsigned long long)total_size);
1804602adf40SYehuda Sadeh 	return 0;
1805602adf40SYehuda Sadeh 
1806602adf40SYehuda Sadeh out_disk:
1807602adf40SYehuda Sadeh 	put_disk(disk);
1808602adf40SYehuda Sadeh out:
1809602adf40SYehuda Sadeh 	return rc;
1810602adf40SYehuda Sadeh }
1811602adf40SYehuda Sadeh 
1812dfc5606dSYehuda Sadeh /*
1813dfc5606dSYehuda Sadeh   sysfs
1814dfc5606dSYehuda Sadeh */
1815602adf40SYehuda Sadeh 
1816dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev,
1817dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1818602adf40SYehuda Sadeh {
1819dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
1820dfc5606dSYehuda Sadeh 
1821dfc5606dSYehuda Sadeh 	return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
1822602adf40SYehuda Sadeh }
1823602adf40SYehuda Sadeh 
1824dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev,
1825dfc5606dSYehuda Sadeh 			      struct device_attribute *attr, char *buf)
1826602adf40SYehuda Sadeh {
1827dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
1828dfc5606dSYehuda Sadeh 
1829dfc5606dSYehuda Sadeh 	return sprintf(buf, "%d\n", rbd_dev->major);
1830dfc5606dSYehuda Sadeh }
1831dfc5606dSYehuda Sadeh 
1832dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev,
1833dfc5606dSYehuda Sadeh 				  struct device_attribute *attr, char *buf)
1834dfc5606dSYehuda Sadeh {
1835dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
1836dfc5606dSYehuda Sadeh 
18371dbb4399SAlex Elder 	return sprintf(buf, "client%lld\n",
18381dbb4399SAlex Elder 			ceph_client_id(rbd_dev->rbd_client->client));
1839dfc5606dSYehuda Sadeh }
1840dfc5606dSYehuda Sadeh 
1841dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev,
1842dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1843dfc5606dSYehuda Sadeh {
1844dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
1845dfc5606dSYehuda Sadeh 
1846dfc5606dSYehuda Sadeh 	return sprintf(buf, "%s\n", rbd_dev->pool_name);
1847dfc5606dSYehuda Sadeh }
1848dfc5606dSYehuda Sadeh 
1849dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev,
1850dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1851dfc5606dSYehuda Sadeh {
1852dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
1853dfc5606dSYehuda Sadeh 
1854dfc5606dSYehuda Sadeh 	return sprintf(buf, "%s\n", rbd_dev->obj);
1855dfc5606dSYehuda Sadeh }
1856dfc5606dSYehuda Sadeh 
1857dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev,
1858dfc5606dSYehuda Sadeh 			     struct device_attribute *attr,
1859dfc5606dSYehuda Sadeh 			     char *buf)
1860dfc5606dSYehuda Sadeh {
1861dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
1862dfc5606dSYehuda Sadeh 
1863dfc5606dSYehuda Sadeh 	return sprintf(buf, "%s\n", rbd_dev->snap_name);
1864dfc5606dSYehuda Sadeh }
1865dfc5606dSYehuda Sadeh 
1866dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev,
1867dfc5606dSYehuda Sadeh 				 struct device_attribute *attr,
1868dfc5606dSYehuda Sadeh 				 const char *buf,
1869dfc5606dSYehuda Sadeh 				 size_t size)
1870dfc5606dSYehuda Sadeh {
1871dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
1872dfc5606dSYehuda Sadeh 	int rc;
1873dfc5606dSYehuda Sadeh 	int ret = size;
1874602adf40SYehuda Sadeh 
1875602adf40SYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1876602adf40SYehuda Sadeh 
1877dfc5606dSYehuda Sadeh 	rc = __rbd_update_snaps(rbd_dev);
1878dfc5606dSYehuda Sadeh 	if (rc < 0)
1879dfc5606dSYehuda Sadeh 		ret = rc;
1880602adf40SYehuda Sadeh 
1881dfc5606dSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
1882dfc5606dSYehuda Sadeh 	return ret;
1883dfc5606dSYehuda Sadeh }
1884602adf40SYehuda Sadeh 
1885dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
1886dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
1887dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
1888dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
1889dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
1890dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
1891dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
1892dfc5606dSYehuda Sadeh static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
1893dfc5606dSYehuda Sadeh 
1894dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = {
1895dfc5606dSYehuda Sadeh 	&dev_attr_size.attr,
1896dfc5606dSYehuda Sadeh 	&dev_attr_major.attr,
1897dfc5606dSYehuda Sadeh 	&dev_attr_client_id.attr,
1898dfc5606dSYehuda Sadeh 	&dev_attr_pool.attr,
1899dfc5606dSYehuda Sadeh 	&dev_attr_name.attr,
1900dfc5606dSYehuda Sadeh 	&dev_attr_current_snap.attr,
1901dfc5606dSYehuda Sadeh 	&dev_attr_refresh.attr,
1902dfc5606dSYehuda Sadeh 	&dev_attr_create_snap.attr,
1903dfc5606dSYehuda Sadeh 	NULL
1904dfc5606dSYehuda Sadeh };
1905dfc5606dSYehuda Sadeh 
1906dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = {
1907dfc5606dSYehuda Sadeh 	.attrs = rbd_attrs,
1908dfc5606dSYehuda Sadeh };
1909dfc5606dSYehuda Sadeh 
1910dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = {
1911dfc5606dSYehuda Sadeh 	&rbd_attr_group,
1912dfc5606dSYehuda Sadeh 	NULL
1913dfc5606dSYehuda Sadeh };
1914dfc5606dSYehuda Sadeh 
1915dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev)
1916dfc5606dSYehuda Sadeh {
1917dfc5606dSYehuda Sadeh }
1918dfc5606dSYehuda Sadeh 
1919dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = {
1920dfc5606dSYehuda Sadeh 	.name		= "rbd",
1921dfc5606dSYehuda Sadeh 	.groups		= rbd_attr_groups,
1922dfc5606dSYehuda Sadeh 	.release	= rbd_sysfs_dev_release,
1923dfc5606dSYehuda Sadeh };
1924dfc5606dSYehuda Sadeh 
1925dfc5606dSYehuda Sadeh 
1926dfc5606dSYehuda Sadeh /*
1927dfc5606dSYehuda Sadeh   sysfs - snapshots
1928dfc5606dSYehuda Sadeh */
1929dfc5606dSYehuda Sadeh 
1930dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev,
1931dfc5606dSYehuda Sadeh 				  struct device_attribute *attr,
1932dfc5606dSYehuda Sadeh 				  char *buf)
1933dfc5606dSYehuda Sadeh {
1934dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1935dfc5606dSYehuda Sadeh 
1936dfc5606dSYehuda Sadeh 	return sprintf(buf, "%lld\n", (long long)snap->size);
1937dfc5606dSYehuda Sadeh }
1938dfc5606dSYehuda Sadeh 
1939dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev,
1940dfc5606dSYehuda Sadeh 				struct device_attribute *attr,
1941dfc5606dSYehuda Sadeh 				char *buf)
1942dfc5606dSYehuda Sadeh {
1943dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1944dfc5606dSYehuda Sadeh 
1945dfc5606dSYehuda Sadeh 	return sprintf(buf, "%lld\n", (long long)snap->id);
1946dfc5606dSYehuda Sadeh }
1947dfc5606dSYehuda Sadeh 
1948dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
1949dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
1950dfc5606dSYehuda Sadeh 
1951dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = {
1952dfc5606dSYehuda Sadeh 	&dev_attr_snap_size.attr,
1953dfc5606dSYehuda Sadeh 	&dev_attr_snap_id.attr,
1954dfc5606dSYehuda Sadeh 	NULL,
1955dfc5606dSYehuda Sadeh };
1956dfc5606dSYehuda Sadeh 
1957dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = {
1958dfc5606dSYehuda Sadeh 	.attrs = rbd_snap_attrs,
1959dfc5606dSYehuda Sadeh };
1960dfc5606dSYehuda Sadeh 
1961dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev)
1962dfc5606dSYehuda Sadeh {
1963dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1964dfc5606dSYehuda Sadeh 	kfree(snap->name);
1965dfc5606dSYehuda Sadeh 	kfree(snap);
1966dfc5606dSYehuda Sadeh }
1967dfc5606dSYehuda Sadeh 
1968dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = {
1969dfc5606dSYehuda Sadeh 	&rbd_snap_attr_group,
1970dfc5606dSYehuda Sadeh 	NULL
1971dfc5606dSYehuda Sadeh };
1972dfc5606dSYehuda Sadeh 
1973dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = {
1974dfc5606dSYehuda Sadeh 	.groups		= rbd_snap_attr_groups,
1975dfc5606dSYehuda Sadeh 	.release	= rbd_snap_dev_release,
1976dfc5606dSYehuda Sadeh };
1977dfc5606dSYehuda Sadeh 
1978dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
1979dfc5606dSYehuda Sadeh 				  struct rbd_snap *snap)
1980dfc5606dSYehuda Sadeh {
1981dfc5606dSYehuda Sadeh 	list_del(&snap->node);
1982dfc5606dSYehuda Sadeh 	device_unregister(&snap->dev);
1983dfc5606dSYehuda Sadeh }
1984dfc5606dSYehuda Sadeh 
1985dfc5606dSYehuda Sadeh static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
1986dfc5606dSYehuda Sadeh 				  struct rbd_snap *snap,
1987dfc5606dSYehuda Sadeh 				  struct device *parent)
1988dfc5606dSYehuda Sadeh {
1989dfc5606dSYehuda Sadeh 	struct device *dev = &snap->dev;
1990dfc5606dSYehuda Sadeh 	int ret;
1991dfc5606dSYehuda Sadeh 
1992dfc5606dSYehuda Sadeh 	dev->type = &rbd_snap_device_type;
1993dfc5606dSYehuda Sadeh 	dev->parent = parent;
1994dfc5606dSYehuda Sadeh 	dev->release = rbd_snap_dev_release;
1995dfc5606dSYehuda Sadeh 	dev_set_name(dev, "snap_%s", snap->name);
1996dfc5606dSYehuda Sadeh 	ret = device_register(dev);
1997dfc5606dSYehuda Sadeh 
1998dfc5606dSYehuda Sadeh 	return ret;
1999dfc5606dSYehuda Sadeh }
2000dfc5606dSYehuda Sadeh 
2001dfc5606dSYehuda Sadeh static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
2002dfc5606dSYehuda Sadeh 			      int i, const char *name,
2003dfc5606dSYehuda Sadeh 			      struct rbd_snap **snapp)
2004dfc5606dSYehuda Sadeh {
2005dfc5606dSYehuda Sadeh 	int ret;
2006dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
2007dfc5606dSYehuda Sadeh 	if (!snap)
2008dfc5606dSYehuda Sadeh 		return -ENOMEM;
2009dfc5606dSYehuda Sadeh 	snap->name = kstrdup(name, GFP_KERNEL);
2010dfc5606dSYehuda Sadeh 	snap->size = rbd_dev->header.snap_sizes[i];
2011dfc5606dSYehuda Sadeh 	snap->id = rbd_dev->header.snapc->snaps[i];
2012dfc5606dSYehuda Sadeh 	if (device_is_registered(&rbd_dev->dev)) {
2013dfc5606dSYehuda Sadeh 		ret = rbd_register_snap_dev(rbd_dev, snap,
2014dfc5606dSYehuda Sadeh 					     &rbd_dev->dev);
2015dfc5606dSYehuda Sadeh 		if (ret < 0)
2016dfc5606dSYehuda Sadeh 			goto err;
2017dfc5606dSYehuda Sadeh 	}
2018dfc5606dSYehuda Sadeh 	*snapp = snap;
2019dfc5606dSYehuda Sadeh 	return 0;
2020dfc5606dSYehuda Sadeh err:
2021dfc5606dSYehuda Sadeh 	kfree(snap->name);
2022dfc5606dSYehuda Sadeh 	kfree(snap);
2023dfc5606dSYehuda Sadeh 	return ret;
2024dfc5606dSYehuda Sadeh }
2025dfc5606dSYehuda Sadeh 
2026dfc5606dSYehuda Sadeh /*
2027dfc5606dSYehuda Sadeh  * search for the previous snap in a null delimited string list
2028dfc5606dSYehuda Sadeh  */
2029dfc5606dSYehuda Sadeh const char *rbd_prev_snap_name(const char *name, const char *start)
2030dfc5606dSYehuda Sadeh {
2031dfc5606dSYehuda Sadeh 	if (name < start + 2)
2032dfc5606dSYehuda Sadeh 		return NULL;
2033dfc5606dSYehuda Sadeh 
2034dfc5606dSYehuda Sadeh 	name -= 2;
2035dfc5606dSYehuda Sadeh 	while (*name) {
2036dfc5606dSYehuda Sadeh 		if (name == start)
2037dfc5606dSYehuda Sadeh 			return start;
2038dfc5606dSYehuda Sadeh 		name--;
2039dfc5606dSYehuda Sadeh 	}
2040dfc5606dSYehuda Sadeh 	return name + 1;
2041dfc5606dSYehuda Sadeh }
2042dfc5606dSYehuda Sadeh 
2043dfc5606dSYehuda Sadeh /*
2044dfc5606dSYehuda Sadeh  * compare the old list of snapshots that we have to what's in the header
2045dfc5606dSYehuda Sadeh  * and update it accordingly. Note that the header holds the snapshots
2046dfc5606dSYehuda Sadeh  * in a reverse order (from newest to oldest) and we need to go from
2047dfc5606dSYehuda Sadeh  * older to new so that we don't get a duplicate snap name when
2048dfc5606dSYehuda Sadeh  * doing the process (e.g., removed snapshot and recreated a new
2049dfc5606dSYehuda Sadeh  * one with the same name.
2050dfc5606dSYehuda Sadeh  */
2051dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
2052dfc5606dSYehuda Sadeh {
2053dfc5606dSYehuda Sadeh 	const char *name, *first_name;
2054dfc5606dSYehuda Sadeh 	int i = rbd_dev->header.total_snaps;
2055dfc5606dSYehuda Sadeh 	struct rbd_snap *snap, *old_snap = NULL;
2056dfc5606dSYehuda Sadeh 	int ret;
2057dfc5606dSYehuda Sadeh 	struct list_head *p, *n;
2058dfc5606dSYehuda Sadeh 
2059dfc5606dSYehuda Sadeh 	first_name = rbd_dev->header.snap_names;
2060dfc5606dSYehuda Sadeh 	name = first_name + rbd_dev->header.snap_names_len;
2061dfc5606dSYehuda Sadeh 
2062dfc5606dSYehuda Sadeh 	list_for_each_prev_safe(p, n, &rbd_dev->snaps) {
2063dfc5606dSYehuda Sadeh 		u64 cur_id;
2064dfc5606dSYehuda Sadeh 
2065dfc5606dSYehuda Sadeh 		old_snap = list_entry(p, struct rbd_snap, node);
2066dfc5606dSYehuda Sadeh 
2067dfc5606dSYehuda Sadeh 		if (i)
2068dfc5606dSYehuda Sadeh 			cur_id = rbd_dev->header.snapc->snaps[i - 1];
2069dfc5606dSYehuda Sadeh 
2070dfc5606dSYehuda Sadeh 		if (!i || old_snap->id < cur_id) {
2071dfc5606dSYehuda Sadeh 			/* old_snap->id was skipped, thus was removed */
2072dfc5606dSYehuda Sadeh 			__rbd_remove_snap_dev(rbd_dev, old_snap);
2073dfc5606dSYehuda Sadeh 			continue;
2074dfc5606dSYehuda Sadeh 		}
2075dfc5606dSYehuda Sadeh 		if (old_snap->id == cur_id) {
2076dfc5606dSYehuda Sadeh 			/* we have this snapshot already */
2077dfc5606dSYehuda Sadeh 			i--;
2078dfc5606dSYehuda Sadeh 			name = rbd_prev_snap_name(name, first_name);
2079dfc5606dSYehuda Sadeh 			continue;
2080dfc5606dSYehuda Sadeh 		}
2081dfc5606dSYehuda Sadeh 		for (; i > 0;
2082dfc5606dSYehuda Sadeh 		     i--, name = rbd_prev_snap_name(name, first_name)) {
2083dfc5606dSYehuda Sadeh 			if (!name) {
2084dfc5606dSYehuda Sadeh 				WARN_ON(1);
2085dfc5606dSYehuda Sadeh 				return -EINVAL;
2086dfc5606dSYehuda Sadeh 			}
2087dfc5606dSYehuda Sadeh 			cur_id = rbd_dev->header.snapc->snaps[i];
2088dfc5606dSYehuda Sadeh 			/* snapshot removal? handle it above */
2089dfc5606dSYehuda Sadeh 			if (cur_id >= old_snap->id)
2090dfc5606dSYehuda Sadeh 				break;
2091dfc5606dSYehuda Sadeh 			/* a new snapshot */
2092dfc5606dSYehuda Sadeh 			ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
2093dfc5606dSYehuda Sadeh 			if (ret < 0)
2094dfc5606dSYehuda Sadeh 				return ret;
2095dfc5606dSYehuda Sadeh 
2096dfc5606dSYehuda Sadeh 			/* note that we add it backward so using n and not p */
2097dfc5606dSYehuda Sadeh 			list_add(&snap->node, n);
2098dfc5606dSYehuda Sadeh 			p = &snap->node;
2099dfc5606dSYehuda Sadeh 		}
2100dfc5606dSYehuda Sadeh 	}
2101dfc5606dSYehuda Sadeh 	/* we're done going over the old snap list, just add what's left */
2102dfc5606dSYehuda Sadeh 	for (; i > 0; i--) {
2103dfc5606dSYehuda Sadeh 		name = rbd_prev_snap_name(name, first_name);
2104dfc5606dSYehuda Sadeh 		if (!name) {
2105dfc5606dSYehuda Sadeh 			WARN_ON(1);
2106dfc5606dSYehuda Sadeh 			return -EINVAL;
2107dfc5606dSYehuda Sadeh 		}
2108dfc5606dSYehuda Sadeh 		ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
2109dfc5606dSYehuda Sadeh 		if (ret < 0)
2110dfc5606dSYehuda Sadeh 			return ret;
2111dfc5606dSYehuda Sadeh 		list_add(&snap->node, &rbd_dev->snaps);
2112dfc5606dSYehuda Sadeh 	}
2113dfc5606dSYehuda Sadeh 
2114dfc5606dSYehuda Sadeh 	return 0;
2115dfc5606dSYehuda Sadeh }
2116dfc5606dSYehuda Sadeh 
2117dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2118dfc5606dSYehuda Sadeh {
2119f0f8cef5SAlex Elder 	int ret;
2120dfc5606dSYehuda Sadeh 	struct device *dev;
2121dfc5606dSYehuda Sadeh 	struct rbd_snap *snap;
2122dfc5606dSYehuda Sadeh 
2123dfc5606dSYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2124dfc5606dSYehuda Sadeh 	dev = &rbd_dev->dev;
2125dfc5606dSYehuda Sadeh 
2126dfc5606dSYehuda Sadeh 	dev->bus = &rbd_bus_type;
2127dfc5606dSYehuda Sadeh 	dev->type = &rbd_device_type;
2128dfc5606dSYehuda Sadeh 	dev->parent = &rbd_root_dev;
2129dfc5606dSYehuda Sadeh 	dev->release = rbd_dev_release;
2130dfc5606dSYehuda Sadeh 	dev_set_name(dev, "%d", rbd_dev->id);
2131dfc5606dSYehuda Sadeh 	ret = device_register(dev);
2132dfc5606dSYehuda Sadeh 	if (ret < 0)
2133f0f8cef5SAlex Elder 		goto out;
2134dfc5606dSYehuda Sadeh 
2135dfc5606dSYehuda Sadeh 	list_for_each_entry(snap, &rbd_dev->snaps, node) {
2136dfc5606dSYehuda Sadeh 		ret = rbd_register_snap_dev(rbd_dev, snap,
2137dfc5606dSYehuda Sadeh 					     &rbd_dev->dev);
2138dfc5606dSYehuda Sadeh 		if (ret < 0)
2139602adf40SYehuda Sadeh 			break;
2140602adf40SYehuda Sadeh 	}
2141f0f8cef5SAlex Elder out:
2142dfc5606dSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
2143dfc5606dSYehuda Sadeh 	return ret;
2144602adf40SYehuda Sadeh }
2145602adf40SYehuda Sadeh 
2146dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
2147dfc5606dSYehuda Sadeh {
2148dfc5606dSYehuda Sadeh 	device_unregister(&rbd_dev->dev);
2149dfc5606dSYehuda Sadeh }
2150dfc5606dSYehuda Sadeh 
215159c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
215259c2be1eSYehuda Sadeh {
215359c2be1eSYehuda Sadeh 	int ret, rc;
215459c2be1eSYehuda Sadeh 
215559c2be1eSYehuda Sadeh 	do {
215659c2be1eSYehuda Sadeh 		ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
215759c2be1eSYehuda Sadeh 					 rbd_dev->header.obj_version);
215859c2be1eSYehuda Sadeh 		if (ret == -ERANGE) {
215959c2be1eSYehuda Sadeh 			mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
216059c2be1eSYehuda Sadeh 			rc = __rbd_update_snaps(rbd_dev);
216159c2be1eSYehuda Sadeh 			mutex_unlock(&ctl_mutex);
216259c2be1eSYehuda Sadeh 			if (rc < 0)
216359c2be1eSYehuda Sadeh 				return rc;
216459c2be1eSYehuda Sadeh 		}
216559c2be1eSYehuda Sadeh 	} while (ret == -ERANGE);
216659c2be1eSYehuda Sadeh 
216759c2be1eSYehuda Sadeh 	return ret;
216859c2be1eSYehuda Sadeh }
216959c2be1eSYehuda Sadeh 
21701ddbe94eSAlex Elder static atomic64_t rbd_id_max = ATOMIC64_INIT(0);
21711ddbe94eSAlex Elder 
21721ddbe94eSAlex Elder /*
2173499afd5bSAlex Elder  * Get a unique rbd identifier for the given new rbd_dev, and add
2174499afd5bSAlex Elder  * the rbd_dev to the global list.  The minimum rbd id is 1.
21751ddbe94eSAlex Elder  */
2176499afd5bSAlex Elder static void rbd_id_get(struct rbd_device *rbd_dev)
2177b7f23c36SAlex Elder {
2178499afd5bSAlex Elder 	rbd_dev->id = atomic64_inc_return(&rbd_id_max);
2179499afd5bSAlex Elder 
2180499afd5bSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2181499afd5bSAlex Elder 	list_add_tail(&rbd_dev->node, &rbd_dev_list);
2182499afd5bSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
2183b7f23c36SAlex Elder }
2184b7f23c36SAlex Elder 
21851ddbe94eSAlex Elder /*
2186499afd5bSAlex Elder  * Remove an rbd_dev from the global list, and record that its
2187499afd5bSAlex Elder  * identifier is no longer in use.
21881ddbe94eSAlex Elder  */
2189499afd5bSAlex Elder static void rbd_id_put(struct rbd_device *rbd_dev)
21901ddbe94eSAlex Elder {
2191d184f6bfSAlex Elder 	struct list_head *tmp;
2192d184f6bfSAlex Elder 	int rbd_id = rbd_dev->id;
2193d184f6bfSAlex Elder 	int max_id;
2194d184f6bfSAlex Elder 
2195d184f6bfSAlex Elder 	BUG_ON(rbd_id < 1);
2196499afd5bSAlex Elder 
2197499afd5bSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2198499afd5bSAlex Elder 	list_del_init(&rbd_dev->node);
2199d184f6bfSAlex Elder 
2200d184f6bfSAlex Elder 	/*
2201d184f6bfSAlex Elder 	 * If the id being "put" is not the current maximum, there
2202d184f6bfSAlex Elder 	 * is nothing special we need to do.
2203d184f6bfSAlex Elder 	 */
2204d184f6bfSAlex Elder 	if (rbd_id != atomic64_read(&rbd_id_max)) {
2205d184f6bfSAlex Elder 		spin_unlock(&rbd_dev_list_lock);
2206d184f6bfSAlex Elder 		return;
2207d184f6bfSAlex Elder 	}
2208d184f6bfSAlex Elder 
2209d184f6bfSAlex Elder 	/*
2210d184f6bfSAlex Elder 	 * We need to update the current maximum id.  Search the
2211d184f6bfSAlex Elder 	 * list to find out what it is.  We're more likely to find
2212d184f6bfSAlex Elder 	 * the maximum at the end, so search the list backward.
2213d184f6bfSAlex Elder 	 */
2214d184f6bfSAlex Elder 	max_id = 0;
2215d184f6bfSAlex Elder 	list_for_each_prev(tmp, &rbd_dev_list) {
2216d184f6bfSAlex Elder 		struct rbd_device *rbd_dev;
2217d184f6bfSAlex Elder 
2218d184f6bfSAlex Elder 		rbd_dev = list_entry(tmp, struct rbd_device, node);
2219d184f6bfSAlex Elder 		if (rbd_id > max_id)
2220d184f6bfSAlex Elder 			max_id = rbd_id;
2221d184f6bfSAlex Elder 	}
2222499afd5bSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
22231ddbe94eSAlex Elder 
22241ddbe94eSAlex Elder 	/*
2225d184f6bfSAlex Elder 	 * The max id could have been updated by rbd_id_get(), in
2226d184f6bfSAlex Elder 	 * which case it now accurately reflects the new maximum.
2227d184f6bfSAlex Elder 	 * Be careful not to overwrite the maximum value in that
2228d184f6bfSAlex Elder 	 * case.
22291ddbe94eSAlex Elder 	 */
2230d184f6bfSAlex Elder 	atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id);
2231b7f23c36SAlex Elder }
2232b7f23c36SAlex Elder 
223359c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus,
223459c2be1eSYehuda Sadeh 		       const char *buf,
223559c2be1eSYehuda Sadeh 		       size_t count)
2236602adf40SYehuda Sadeh {
2237602adf40SYehuda Sadeh 	struct ceph_osd_client *osdc;
2238602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev;
2239602adf40SYehuda Sadeh 	ssize_t rc = -ENOMEM;
2240b7f23c36SAlex Elder 	int irc;
2241602adf40SYehuda Sadeh 	char *mon_dev_name;
2242602adf40SYehuda Sadeh 	char *options;
2243602adf40SYehuda Sadeh 
2244602adf40SYehuda Sadeh 	if (!try_module_get(THIS_MODULE))
2245602adf40SYehuda Sadeh 		return -ENODEV;
2246602adf40SYehuda Sadeh 
2247602adf40SYehuda Sadeh 	mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
2248602adf40SYehuda Sadeh 	if (!mon_dev_name)
2249602adf40SYehuda Sadeh 		goto err_out_mod;
2250602adf40SYehuda Sadeh 
2251602adf40SYehuda Sadeh 	options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
2252602adf40SYehuda Sadeh 	if (!options)
2253602adf40SYehuda Sadeh 		goto err_mon_dev;
2254602adf40SYehuda Sadeh 
2255602adf40SYehuda Sadeh 	/* new rbd_device object */
2256602adf40SYehuda Sadeh 	rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
2257602adf40SYehuda Sadeh 	if (!rbd_dev)
2258602adf40SYehuda Sadeh 		goto err_out_opt;
2259602adf40SYehuda Sadeh 
2260602adf40SYehuda Sadeh 	/* static rbd_device initialization */
2261602adf40SYehuda Sadeh 	spin_lock_init(&rbd_dev->lock);
2262602adf40SYehuda Sadeh 	INIT_LIST_HEAD(&rbd_dev->node);
2263dfc5606dSYehuda Sadeh 	INIT_LIST_HEAD(&rbd_dev->snaps);
2264602adf40SYehuda Sadeh 
22650e805a1dSAlex Elder 	init_rwsem(&rbd_dev->header.snap_rwsem);
22660e805a1dSAlex Elder 
2267d184f6bfSAlex Elder 	/* generate unique id: find highest unique id, add one */
2268499afd5bSAlex Elder 	rbd_id_get(rbd_dev);
2269602adf40SYehuda Sadeh 
2270602adf40SYehuda Sadeh 	/* parse add command */
2271602adf40SYehuda Sadeh 	if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s "
2272602adf40SYehuda Sadeh 		   "%" __stringify(RBD_MAX_OPT_LEN) "s "
2273602adf40SYehuda Sadeh 		   "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s "
2274602adf40SYehuda Sadeh 		   "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s"
2275602adf40SYehuda Sadeh 		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
2276602adf40SYehuda Sadeh 		   mon_dev_name, options, rbd_dev->pool_name,
2277602adf40SYehuda Sadeh 		   rbd_dev->obj, rbd_dev->snap_name) < 4) {
2278602adf40SYehuda Sadeh 		rc = -EINVAL;
2279f0f8cef5SAlex Elder 		goto err_put_id;
2280602adf40SYehuda Sadeh 	}
2281602adf40SYehuda Sadeh 
2282602adf40SYehuda Sadeh 	if (rbd_dev->snap_name[0] == 0)
2283cc9d734cSJosh Durgin 		memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
2284cc9d734cSJosh Durgin 			sizeof (RBD_SNAP_HEAD_NAME));
2285602adf40SYehuda Sadeh 
2286602adf40SYehuda Sadeh 	rbd_dev->obj_len = strlen(rbd_dev->obj);
2287602adf40SYehuda Sadeh 	snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
2288602adf40SYehuda Sadeh 		 rbd_dev->obj, RBD_SUFFIX);
2289602adf40SYehuda Sadeh 
2290602adf40SYehuda Sadeh 	/* initialize rest of new object */
2291f0f8cef5SAlex Elder 	snprintf(rbd_dev->name, DEV_NAME_LEN, RBD_DRV_NAME "%d", rbd_dev->id);
2292e124a82fSAlex Elder 
2293602adf40SYehuda Sadeh 	rc = rbd_get_client(rbd_dev, mon_dev_name, options);
2294602adf40SYehuda Sadeh 	if (rc < 0)
2295f0f8cef5SAlex Elder 		goto err_put_id;
2296602adf40SYehuda Sadeh 
2297602adf40SYehuda Sadeh 	/* pick the pool */
22981dbb4399SAlex Elder 	osdc = &rbd_dev->rbd_client->client->osdc;
2299602adf40SYehuda Sadeh 	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
2300602adf40SYehuda Sadeh 	if (rc < 0)
2301602adf40SYehuda Sadeh 		goto err_out_client;
2302602adf40SYehuda Sadeh 	rbd_dev->poolid = rc;
2303602adf40SYehuda Sadeh 
2304602adf40SYehuda Sadeh 	/* register our block device */
2305602adf40SYehuda Sadeh 	irc = register_blkdev(0, rbd_dev->name);
2306602adf40SYehuda Sadeh 	if (irc < 0) {
2307602adf40SYehuda Sadeh 		rc = irc;
2308602adf40SYehuda Sadeh 		goto err_out_client;
2309602adf40SYehuda Sadeh 	}
2310602adf40SYehuda Sadeh 	rbd_dev->major = irc;
2311602adf40SYehuda Sadeh 
2312dfc5606dSYehuda Sadeh 	rc = rbd_bus_add_dev(rbd_dev);
2313dfc5606dSYehuda Sadeh 	if (rc)
2314766fc439SYehuda Sadeh 		goto err_out_blkdev;
2315766fc439SYehuda Sadeh 
2316602adf40SYehuda Sadeh 	/* set up and announce blkdev mapping */
2317602adf40SYehuda Sadeh 	rc = rbd_init_disk(rbd_dev);
2318602adf40SYehuda Sadeh 	if (rc)
2319766fc439SYehuda Sadeh 		goto err_out_bus;
2320602adf40SYehuda Sadeh 
232159c2be1eSYehuda Sadeh 	rc = rbd_init_watch_dev(rbd_dev);
232259c2be1eSYehuda Sadeh 	if (rc)
232359c2be1eSYehuda Sadeh 		goto err_out_bus;
232459c2be1eSYehuda Sadeh 
2325602adf40SYehuda Sadeh 	return count;
2326602adf40SYehuda Sadeh 
2327766fc439SYehuda Sadeh err_out_bus:
2328499afd5bSAlex Elder 	rbd_id_put(rbd_dev);
2329766fc439SYehuda Sadeh 
2330766fc439SYehuda Sadeh 	/* this will also clean up rest of rbd_dev stuff */
2331766fc439SYehuda Sadeh 
2332766fc439SYehuda Sadeh 	rbd_bus_del_dev(rbd_dev);
2333766fc439SYehuda Sadeh 	kfree(options);
2334766fc439SYehuda Sadeh 	kfree(mon_dev_name);
2335766fc439SYehuda Sadeh 	return rc;
2336766fc439SYehuda Sadeh 
2337602adf40SYehuda Sadeh err_out_blkdev:
2338602adf40SYehuda Sadeh 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
2339602adf40SYehuda Sadeh err_out_client:
2340602adf40SYehuda Sadeh 	rbd_put_client(rbd_dev);
2341f0f8cef5SAlex Elder err_put_id:
2342499afd5bSAlex Elder 	rbd_id_put(rbd_dev);
2343602adf40SYehuda Sadeh 	kfree(rbd_dev);
2344602adf40SYehuda Sadeh err_out_opt:
2345602adf40SYehuda Sadeh 	kfree(options);
2346602adf40SYehuda Sadeh err_mon_dev:
2347602adf40SYehuda Sadeh 	kfree(mon_dev_name);
2348602adf40SYehuda Sadeh err_out_mod:
2349602adf40SYehuda Sadeh 	dout("Error adding device %s\n", buf);
2350602adf40SYehuda Sadeh 	module_put(THIS_MODULE);
2351602adf40SYehuda Sadeh 	return rc;
2352602adf40SYehuda Sadeh }
2353602adf40SYehuda Sadeh 
2354602adf40SYehuda Sadeh static struct rbd_device *__rbd_get_dev(unsigned long id)
2355602adf40SYehuda Sadeh {
2356602adf40SYehuda Sadeh 	struct list_head *tmp;
2357602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev;
2358602adf40SYehuda Sadeh 
2359e124a82fSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2360602adf40SYehuda Sadeh 	list_for_each(tmp, &rbd_dev_list) {
2361602adf40SYehuda Sadeh 		rbd_dev = list_entry(tmp, struct rbd_device, node);
2362e124a82fSAlex Elder 		if (rbd_dev->id == id) {
2363e124a82fSAlex Elder 			spin_unlock(&rbd_dev_list_lock);
2364602adf40SYehuda Sadeh 			return rbd_dev;
2365602adf40SYehuda Sadeh 		}
2366e124a82fSAlex Elder 	}
2367e124a82fSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
2368602adf40SYehuda Sadeh 	return NULL;
2369602adf40SYehuda Sadeh }
2370602adf40SYehuda Sadeh 
2371dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev)
2372602adf40SYehuda Sadeh {
2373dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev =
2374dfc5606dSYehuda Sadeh 			container_of(dev, struct rbd_device, dev);
2375602adf40SYehuda Sadeh 
23761dbb4399SAlex Elder 	if (rbd_dev->watch_request) {
23771dbb4399SAlex Elder 		struct ceph_client *client = rbd_dev->rbd_client->client;
23781dbb4399SAlex Elder 
23791dbb4399SAlex Elder 		ceph_osdc_unregister_linger_request(&client->osdc,
238059c2be1eSYehuda Sadeh 						    rbd_dev->watch_request);
23811dbb4399SAlex Elder 	}
238259c2be1eSYehuda Sadeh 	if (rbd_dev->watch_event)
238379e3057cSYehuda Sadeh 		rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
238459c2be1eSYehuda Sadeh 
2385602adf40SYehuda Sadeh 	rbd_put_client(rbd_dev);
2386602adf40SYehuda Sadeh 
2387602adf40SYehuda Sadeh 	/* clean up and free blkdev */
2388602adf40SYehuda Sadeh 	rbd_free_disk(rbd_dev);
2389602adf40SYehuda Sadeh 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
2390602adf40SYehuda Sadeh 	kfree(rbd_dev);
2391602adf40SYehuda Sadeh 
2392602adf40SYehuda Sadeh 	/* release module ref */
2393602adf40SYehuda Sadeh 	module_put(THIS_MODULE);
2394602adf40SYehuda Sadeh }
2395602adf40SYehuda Sadeh 
2396dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus,
2397602adf40SYehuda Sadeh 			  const char *buf,
2398602adf40SYehuda Sadeh 			  size_t count)
2399602adf40SYehuda Sadeh {
2400602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = NULL;
2401602adf40SYehuda Sadeh 	int target_id, rc;
2402602adf40SYehuda Sadeh 	unsigned long ul;
2403602adf40SYehuda Sadeh 	int ret = count;
2404602adf40SYehuda Sadeh 
2405602adf40SYehuda Sadeh 	rc = strict_strtoul(buf, 10, &ul);
2406602adf40SYehuda Sadeh 	if (rc)
2407602adf40SYehuda Sadeh 		return rc;
2408602adf40SYehuda Sadeh 
2409602adf40SYehuda Sadeh 	/* convert to int; abort if we lost anything in the conversion */
2410602adf40SYehuda Sadeh 	target_id = (int) ul;
2411602adf40SYehuda Sadeh 	if (target_id != ul)
2412602adf40SYehuda Sadeh 		return -EINVAL;
2413602adf40SYehuda Sadeh 
2414602adf40SYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2415602adf40SYehuda Sadeh 
2416602adf40SYehuda Sadeh 	rbd_dev = __rbd_get_dev(target_id);
2417602adf40SYehuda Sadeh 	if (!rbd_dev) {
2418602adf40SYehuda Sadeh 		ret = -ENOENT;
2419602adf40SYehuda Sadeh 		goto done;
2420602adf40SYehuda Sadeh 	}
2421602adf40SYehuda Sadeh 
2422499afd5bSAlex Elder 	rbd_id_put(rbd_dev);
2423dfc5606dSYehuda Sadeh 
2424dfc5606dSYehuda Sadeh 	__rbd_remove_all_snaps(rbd_dev);
2425dfc5606dSYehuda Sadeh 	rbd_bus_del_dev(rbd_dev);
2426602adf40SYehuda Sadeh 
2427602adf40SYehuda Sadeh done:
2428602adf40SYehuda Sadeh 	mutex_unlock(&ctl_mutex);
2429602adf40SYehuda Sadeh 	return ret;
2430602adf40SYehuda Sadeh }
2431602adf40SYehuda Sadeh 
2432dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev,
2433dfc5606dSYehuda Sadeh 			    struct device_attribute *attr,
2434602adf40SYehuda Sadeh 			    const char *buf,
2435602adf40SYehuda Sadeh 			    size_t count)
2436602adf40SYehuda Sadeh {
2437dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = dev_to_rbd(dev);
2438dfc5606dSYehuda Sadeh 	int ret;
2439dfc5606dSYehuda Sadeh 	char *name = kmalloc(count + 1, GFP_KERNEL);
2440602adf40SYehuda Sadeh 	if (!name)
2441602adf40SYehuda Sadeh 		return -ENOMEM;
2442602adf40SYehuda Sadeh 
2443dfc5606dSYehuda Sadeh 	snprintf(name, count, "%s", buf);
2444602adf40SYehuda Sadeh 
2445602adf40SYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2446602adf40SYehuda Sadeh 
2447602adf40SYehuda Sadeh 	ret = rbd_header_add_snap(rbd_dev,
2448602adf40SYehuda Sadeh 				  name, GFP_KERNEL);
2449602adf40SYehuda Sadeh 	if (ret < 0)
245059c2be1eSYehuda Sadeh 		goto err_unlock;
2451602adf40SYehuda Sadeh 
2452dfc5606dSYehuda Sadeh 	ret = __rbd_update_snaps(rbd_dev);
2453602adf40SYehuda Sadeh 	if (ret < 0)
245459c2be1eSYehuda Sadeh 		goto err_unlock;
245559c2be1eSYehuda Sadeh 
245659c2be1eSYehuda Sadeh 	/* shouldn't hold ctl_mutex when notifying.. notify might
245759c2be1eSYehuda Sadeh 	   trigger a watch callback that would need to get that mutex */
245859c2be1eSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
245959c2be1eSYehuda Sadeh 
246059c2be1eSYehuda Sadeh 	/* make a best effort, don't error if failed */
246159c2be1eSYehuda Sadeh 	rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
2462602adf40SYehuda Sadeh 
2463602adf40SYehuda Sadeh 	ret = count;
246459c2be1eSYehuda Sadeh 	kfree(name);
246559c2be1eSYehuda Sadeh 	return ret;
246659c2be1eSYehuda Sadeh 
246759c2be1eSYehuda Sadeh err_unlock:
2468602adf40SYehuda Sadeh 	mutex_unlock(&ctl_mutex);
2469602adf40SYehuda Sadeh 	kfree(name);
2470602adf40SYehuda Sadeh 	return ret;
2471602adf40SYehuda Sadeh }
2472602adf40SYehuda Sadeh 
2473602adf40SYehuda Sadeh /*
2474602adf40SYehuda Sadeh  * create control files in sysfs
2475dfc5606dSYehuda Sadeh  * /sys/bus/rbd/...
2476602adf40SYehuda Sadeh  */
2477602adf40SYehuda Sadeh static int rbd_sysfs_init(void)
2478602adf40SYehuda Sadeh {
2479dfc5606dSYehuda Sadeh 	int ret;
2480602adf40SYehuda Sadeh 
2481dfc5606dSYehuda Sadeh 	ret = bus_register(&rbd_bus_type);
2482dfc5606dSYehuda Sadeh 	if (ret < 0)
2483dfc5606dSYehuda Sadeh 		return ret;
2484602adf40SYehuda Sadeh 
2485dfc5606dSYehuda Sadeh 	ret = device_register(&rbd_root_dev);
2486602adf40SYehuda Sadeh 
2487602adf40SYehuda Sadeh 	return ret;
2488602adf40SYehuda Sadeh }
2489602adf40SYehuda Sadeh 
2490602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void)
2491602adf40SYehuda Sadeh {
2492dfc5606dSYehuda Sadeh 	device_unregister(&rbd_root_dev);
2493dfc5606dSYehuda Sadeh 	bus_unregister(&rbd_bus_type);
2494602adf40SYehuda Sadeh }
2495602adf40SYehuda Sadeh 
2496602adf40SYehuda Sadeh int __init rbd_init(void)
2497602adf40SYehuda Sadeh {
2498602adf40SYehuda Sadeh 	int rc;
2499602adf40SYehuda Sadeh 
2500602adf40SYehuda Sadeh 	rc = rbd_sysfs_init();
2501602adf40SYehuda Sadeh 	if (rc)
2502602adf40SYehuda Sadeh 		return rc;
2503f0f8cef5SAlex Elder 	pr_info("loaded " RBD_DRV_NAME_LONG "\n");
2504602adf40SYehuda Sadeh 	return 0;
2505602adf40SYehuda Sadeh }
2506602adf40SYehuda Sadeh 
2507602adf40SYehuda Sadeh void __exit rbd_exit(void)
2508602adf40SYehuda Sadeh {
2509602adf40SYehuda Sadeh 	rbd_sysfs_cleanup();
2510602adf40SYehuda Sadeh }
2511602adf40SYehuda Sadeh 
2512602adf40SYehuda Sadeh module_init(rbd_init);
2513602adf40SYehuda Sadeh module_exit(rbd_exit);
2514602adf40SYehuda Sadeh 
2515602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
2516602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
2517602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device");
2518602adf40SYehuda Sadeh 
2519602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */
2520602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
2521602adf40SYehuda Sadeh 
2522602adf40SYehuda Sadeh MODULE_LICENSE("GPL");
2523