xref: /openbmc/linux/drivers/block/rbd.c (revision 263c6ca0)
1602adf40SYehuda Sadeh /*
2602adf40SYehuda Sadeh    rbd.c -- Export ceph rados objects as a Linux block device
3602adf40SYehuda Sadeh 
4602adf40SYehuda Sadeh 
5602adf40SYehuda Sadeh    based on drivers/block/osdblk.c:
6602adf40SYehuda Sadeh 
7602adf40SYehuda Sadeh    Copyright 2009 Red Hat, Inc.
8602adf40SYehuda Sadeh 
9602adf40SYehuda Sadeh    This program is free software; you can redistribute it and/or modify
10602adf40SYehuda Sadeh    it under the terms of the GNU General Public License as published by
11602adf40SYehuda Sadeh    the Free Software Foundation.
12602adf40SYehuda Sadeh 
13602adf40SYehuda Sadeh    This program is distributed in the hope that it will be useful,
14602adf40SYehuda Sadeh    but WITHOUT ANY WARRANTY; without even the implied warranty of
15602adf40SYehuda Sadeh    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16602adf40SYehuda Sadeh    GNU General Public License for more details.
17602adf40SYehuda Sadeh 
18602adf40SYehuda Sadeh    You should have received a copy of the GNU General Public License
19602adf40SYehuda Sadeh    along with this program; see the file COPYING.  If not, write to
20602adf40SYehuda Sadeh    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21602adf40SYehuda Sadeh 
22602adf40SYehuda Sadeh 
23602adf40SYehuda Sadeh 
24dfc5606dSYehuda Sadeh    For usage instructions, please refer to:
25602adf40SYehuda Sadeh 
26dfc5606dSYehuda Sadeh                  Documentation/ABI/testing/sysfs-bus-rbd
27602adf40SYehuda Sadeh 
28602adf40SYehuda Sadeh  */
29602adf40SYehuda Sadeh 
30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h>
31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h>
32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h>
33602adf40SYehuda Sadeh #include <linux/ceph/decode.h>
3459c2be1eSYehuda Sadeh #include <linux/parser.h>
35602adf40SYehuda Sadeh 
36602adf40SYehuda Sadeh #include <linux/kernel.h>
37602adf40SYehuda Sadeh #include <linux/device.h>
38602adf40SYehuda Sadeh #include <linux/module.h>
39602adf40SYehuda Sadeh #include <linux/fs.h>
40602adf40SYehuda Sadeh #include <linux/blkdev.h>
41602adf40SYehuda Sadeh 
42602adf40SYehuda Sadeh #include "rbd_types.h"
43602adf40SYehuda Sadeh 
44593a9e7bSAlex Elder /*
45593a9e7bSAlex Elder  * The basic unit of block I/O is a sector.  It is interpreted in a
46593a9e7bSAlex Elder  * number of contexts in Linux (blk, bio, genhd), but the default is
47593a9e7bSAlex Elder  * universally 512 bytes.  These symbols are just slightly more
48593a9e7bSAlex Elder  * meaningful than the bare numbers they represent.
49593a9e7bSAlex Elder  */
50593a9e7bSAlex Elder #define	SECTOR_SHIFT	9
51593a9e7bSAlex Elder #define	SECTOR_SIZE	(1ULL << SECTOR_SHIFT)
52593a9e7bSAlex Elder 
53f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd"
54f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)"
55602adf40SYehuda Sadeh 
56602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
57602adf40SYehuda Sadeh 
5821079786SAlex Elder #define RBD_MAX_MD_NAME_LEN	(RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
59602adf40SYehuda Sadeh #define RBD_MAX_POOL_NAME_LEN	64
60602adf40SYehuda Sadeh #define RBD_MAX_SNAP_NAME_LEN	32
61602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN		1024
62602adf40SYehuda Sadeh 
63602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME	"-"
64602adf40SYehuda Sadeh 
6581a89793SAlex Elder /*
6681a89793SAlex Elder  * An RBD device name will be "rbd#", where the "rbd" comes from
6781a89793SAlex Elder  * RBD_DRV_NAME above, and # is a unique integer identifier.
6881a89793SAlex Elder  * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big
6981a89793SAlex Elder  * enough to hold all possible device names.
7081a89793SAlex Elder  */
71602adf40SYehuda Sadeh #define DEV_NAME_LEN		32
7281a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH	((5 * sizeof (int)) / 2 + 1)
73602adf40SYehuda Sadeh 
7459c2be1eSYehuda Sadeh #define RBD_NOTIFY_TIMEOUT_DEFAULT 10
7559c2be1eSYehuda Sadeh 
76602adf40SYehuda Sadeh /*
77602adf40SYehuda Sadeh  * block device image metadata (in-memory version)
78602adf40SYehuda Sadeh  */
79602adf40SYehuda Sadeh struct rbd_image_header {
80602adf40SYehuda Sadeh 	u64 image_size;
81602adf40SYehuda Sadeh 	char block_name[32];
82602adf40SYehuda Sadeh 	__u8 obj_order;
83602adf40SYehuda Sadeh 	__u8 crypt_type;
84602adf40SYehuda Sadeh 	__u8 comp_type;
85602adf40SYehuda Sadeh 	struct ceph_snap_context *snapc;
86602adf40SYehuda Sadeh 	size_t snap_names_len;
87602adf40SYehuda Sadeh 	u64 snap_seq;
88602adf40SYehuda Sadeh 	u32 total_snaps;
89602adf40SYehuda Sadeh 
90602adf40SYehuda Sadeh 	char *snap_names;
91602adf40SYehuda Sadeh 	u64 *snap_sizes;
9259c2be1eSYehuda Sadeh 
9359c2be1eSYehuda Sadeh 	u64 obj_version;
9459c2be1eSYehuda Sadeh };
9559c2be1eSYehuda Sadeh 
9659c2be1eSYehuda Sadeh struct rbd_options {
9759c2be1eSYehuda Sadeh 	int	notify_timeout;
98602adf40SYehuda Sadeh };
99602adf40SYehuda Sadeh 
100602adf40SYehuda Sadeh /*
101f0f8cef5SAlex Elder  * an instance of the client.  multiple devices may share an rbd client.
102602adf40SYehuda Sadeh  */
103602adf40SYehuda Sadeh struct rbd_client {
104602adf40SYehuda Sadeh 	struct ceph_client	*client;
10559c2be1eSYehuda Sadeh 	struct rbd_options	*rbd_opts;
106602adf40SYehuda Sadeh 	struct kref		kref;
107602adf40SYehuda Sadeh 	struct list_head	node;
108602adf40SYehuda Sadeh };
109602adf40SYehuda Sadeh 
110602adf40SYehuda Sadeh /*
111f0f8cef5SAlex Elder  * a request completion status
112602adf40SYehuda Sadeh  */
1131fec7093SYehuda Sadeh struct rbd_req_status {
1141fec7093SYehuda Sadeh 	int done;
1151fec7093SYehuda Sadeh 	int rc;
1161fec7093SYehuda Sadeh 	u64 bytes;
1171fec7093SYehuda Sadeh };
1181fec7093SYehuda Sadeh 
1191fec7093SYehuda Sadeh /*
1201fec7093SYehuda Sadeh  * a collection of requests
1211fec7093SYehuda Sadeh  */
1221fec7093SYehuda Sadeh struct rbd_req_coll {
1231fec7093SYehuda Sadeh 	int			total;
1241fec7093SYehuda Sadeh 	int			num_done;
1251fec7093SYehuda Sadeh 	struct kref		kref;
1261fec7093SYehuda Sadeh 	struct rbd_req_status	status[0];
127602adf40SYehuda Sadeh };
128602adf40SYehuda Sadeh 
129f0f8cef5SAlex Elder /*
130f0f8cef5SAlex Elder  * a single io request
131f0f8cef5SAlex Elder  */
132f0f8cef5SAlex Elder struct rbd_request {
133f0f8cef5SAlex Elder 	struct request		*rq;		/* blk layer request */
134f0f8cef5SAlex Elder 	struct bio		*bio;		/* cloned bio */
135f0f8cef5SAlex Elder 	struct page		**pages;	/* list of used pages */
136f0f8cef5SAlex Elder 	u64			len;
137f0f8cef5SAlex Elder 	int			coll_index;
138f0f8cef5SAlex Elder 	struct rbd_req_coll	*coll;
139f0f8cef5SAlex Elder };
140f0f8cef5SAlex Elder 
141dfc5606dSYehuda Sadeh struct rbd_snap {
142dfc5606dSYehuda Sadeh 	struct	device		dev;
143dfc5606dSYehuda Sadeh 	const char		*name;
1443591538fSJosh Durgin 	u64			size;
145dfc5606dSYehuda Sadeh 	struct list_head	node;
146dfc5606dSYehuda Sadeh 	u64			id;
147dfc5606dSYehuda Sadeh };
148dfc5606dSYehuda Sadeh 
149602adf40SYehuda Sadeh /*
150602adf40SYehuda Sadeh  * a single device
151602adf40SYehuda Sadeh  */
152602adf40SYehuda Sadeh struct rbd_device {
153602adf40SYehuda Sadeh 	int			id;		/* blkdev unique id */
154602adf40SYehuda Sadeh 
155602adf40SYehuda Sadeh 	int			major;		/* blkdev assigned major */
156602adf40SYehuda Sadeh 	struct gendisk		*disk;		/* blkdev's gendisk and rq */
157602adf40SYehuda Sadeh 	struct request_queue	*q;
158602adf40SYehuda Sadeh 
159602adf40SYehuda Sadeh 	struct rbd_client	*rbd_client;
160602adf40SYehuda Sadeh 
161602adf40SYehuda Sadeh 	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
162602adf40SYehuda Sadeh 
163602adf40SYehuda Sadeh 	spinlock_t		lock;		/* queue lock */
164602adf40SYehuda Sadeh 
165602adf40SYehuda Sadeh 	struct rbd_image_header	header;
166602adf40SYehuda Sadeh 	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
167602adf40SYehuda Sadeh 	int			obj_len;
168602adf40SYehuda Sadeh 	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
169602adf40SYehuda Sadeh 	char			pool_name[RBD_MAX_POOL_NAME_LEN];
170602adf40SYehuda Sadeh 	int			poolid;
171602adf40SYehuda Sadeh 
17259c2be1eSYehuda Sadeh 	struct ceph_osd_event   *watch_event;
17359c2be1eSYehuda Sadeh 	struct ceph_osd_request *watch_request;
17459c2be1eSYehuda Sadeh 
175c666601aSJosh Durgin 	/* protects updating the header */
176c666601aSJosh Durgin 	struct rw_semaphore     header_rwsem;
177602adf40SYehuda Sadeh 	char                    snap_name[RBD_MAX_SNAP_NAME_LEN];
17877dfe99fSJosh Durgin 	u64                     snap_id;	/* current snapshot id */
179602adf40SYehuda Sadeh 	int read_only;
180602adf40SYehuda Sadeh 
181602adf40SYehuda Sadeh 	struct list_head	node;
182dfc5606dSYehuda Sadeh 
183dfc5606dSYehuda Sadeh 	/* list of snapshots */
184dfc5606dSYehuda Sadeh 	struct list_head	snaps;
185dfc5606dSYehuda Sadeh 
186dfc5606dSYehuda Sadeh 	/* sysfs related */
187dfc5606dSYehuda Sadeh 	struct device		dev;
188dfc5606dSYehuda Sadeh };
189dfc5606dSYehuda Sadeh 
190602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
191e124a82fSAlex Elder 
192602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list);    /* devices */
193e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock);
194e124a82fSAlex Elder 
195602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list);		/* clients */
196432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock);
197602adf40SYehuda Sadeh 
198dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
199dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev);
200dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev,
201dfc5606dSYehuda Sadeh 			    struct device_attribute *attr,
202dfc5606dSYehuda Sadeh 			    const char *buf,
203dfc5606dSYehuda Sadeh 			    size_t count);
204dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
20569932487SJustin P. Mattock 				  struct rbd_snap *snap);
206dfc5606dSYehuda Sadeh 
207f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf,
208f0f8cef5SAlex Elder 		       size_t count);
209f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
210f0f8cef5SAlex Elder 			  size_t count);
211f0f8cef5SAlex Elder 
212f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = {
213f0f8cef5SAlex Elder 	__ATTR(add, S_IWUSR, NULL, rbd_add),
214f0f8cef5SAlex Elder 	__ATTR(remove, S_IWUSR, NULL, rbd_remove),
215f0f8cef5SAlex Elder 	__ATTR_NULL
216f0f8cef5SAlex Elder };
217f0f8cef5SAlex Elder 
218f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = {
219f0f8cef5SAlex Elder 	.name		= "rbd",
220f0f8cef5SAlex Elder 	.bus_attrs	= rbd_bus_attrs,
221f0f8cef5SAlex Elder };
222f0f8cef5SAlex Elder 
223f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev)
224f0f8cef5SAlex Elder {
225f0f8cef5SAlex Elder }
226f0f8cef5SAlex Elder 
227f0f8cef5SAlex Elder static struct device rbd_root_dev = {
228f0f8cef5SAlex Elder 	.init_name =    "rbd",
229f0f8cef5SAlex Elder 	.release =      rbd_root_dev_release,
230f0f8cef5SAlex Elder };
231f0f8cef5SAlex Elder 
232dfc5606dSYehuda Sadeh 
233dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
234dfc5606dSYehuda Sadeh {
235dfc5606dSYehuda Sadeh 	return get_device(&rbd_dev->dev);
236dfc5606dSYehuda Sadeh }
237dfc5606dSYehuda Sadeh 
238dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev)
239dfc5606dSYehuda Sadeh {
240dfc5606dSYehuda Sadeh 	put_device(&rbd_dev->dev);
241dfc5606dSYehuda Sadeh }
242602adf40SYehuda Sadeh 
243263c6ca0SJosh Durgin static int __rbd_refresh_header(struct rbd_device *rbd_dev);
24459c2be1eSYehuda Sadeh 
245602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode)
246602adf40SYehuda Sadeh {
247f0f8cef5SAlex Elder 	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
248602adf40SYehuda Sadeh 
249dfc5606dSYehuda Sadeh 	rbd_get_dev(rbd_dev);
250dfc5606dSYehuda Sadeh 
251602adf40SYehuda Sadeh 	set_device_ro(bdev, rbd_dev->read_only);
252602adf40SYehuda Sadeh 
253602adf40SYehuda Sadeh 	if ((mode & FMODE_WRITE) && rbd_dev->read_only)
254602adf40SYehuda Sadeh 		return -EROFS;
255602adf40SYehuda Sadeh 
256602adf40SYehuda Sadeh 	return 0;
257602adf40SYehuda Sadeh }
258602adf40SYehuda Sadeh 
259dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode)
260dfc5606dSYehuda Sadeh {
261dfc5606dSYehuda Sadeh 	struct rbd_device *rbd_dev = disk->private_data;
262dfc5606dSYehuda Sadeh 
263dfc5606dSYehuda Sadeh 	rbd_put_dev(rbd_dev);
264dfc5606dSYehuda Sadeh 
265dfc5606dSYehuda Sadeh 	return 0;
266dfc5606dSYehuda Sadeh }
267dfc5606dSYehuda Sadeh 
268602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = {
269602adf40SYehuda Sadeh 	.owner			= THIS_MODULE,
270602adf40SYehuda Sadeh 	.open			= rbd_open,
271dfc5606dSYehuda Sadeh 	.release		= rbd_release,
272602adf40SYehuda Sadeh };
273602adf40SYehuda Sadeh 
274602adf40SYehuda Sadeh /*
275602adf40SYehuda Sadeh  * Initialize an rbd client instance.
276602adf40SYehuda Sadeh  * We own *opt.
277602adf40SYehuda Sadeh  */
27859c2be1eSYehuda Sadeh static struct rbd_client *rbd_client_create(struct ceph_options *opt,
27959c2be1eSYehuda Sadeh 					    struct rbd_options *rbd_opts)
280602adf40SYehuda Sadeh {
281602adf40SYehuda Sadeh 	struct rbd_client *rbdc;
282602adf40SYehuda Sadeh 	int ret = -ENOMEM;
283602adf40SYehuda Sadeh 
284602adf40SYehuda Sadeh 	dout("rbd_client_create\n");
285602adf40SYehuda Sadeh 	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
286602adf40SYehuda Sadeh 	if (!rbdc)
287602adf40SYehuda Sadeh 		goto out_opt;
288602adf40SYehuda Sadeh 
289602adf40SYehuda Sadeh 	kref_init(&rbdc->kref);
290602adf40SYehuda Sadeh 	INIT_LIST_HEAD(&rbdc->node);
291602adf40SYehuda Sadeh 
292bc534d86SAlex Elder 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
293bc534d86SAlex Elder 
2946ab00d46SSage Weil 	rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
295602adf40SYehuda Sadeh 	if (IS_ERR(rbdc->client))
296bc534d86SAlex Elder 		goto out_mutex;
29728f259b7SVasiliy Kulikov 	opt = NULL; /* Now rbdc->client is responsible for opt */
298602adf40SYehuda Sadeh 
299602adf40SYehuda Sadeh 	ret = ceph_open_session(rbdc->client);
300602adf40SYehuda Sadeh 	if (ret < 0)
301602adf40SYehuda Sadeh 		goto out_err;
302602adf40SYehuda Sadeh 
30359c2be1eSYehuda Sadeh 	rbdc->rbd_opts = rbd_opts;
30459c2be1eSYehuda Sadeh 
305432b8587SAlex Elder 	spin_lock(&rbd_client_list_lock);
306602adf40SYehuda Sadeh 	list_add_tail(&rbdc->node, &rbd_client_list);
307432b8587SAlex Elder 	spin_unlock(&rbd_client_list_lock);
308602adf40SYehuda Sadeh 
309bc534d86SAlex Elder 	mutex_unlock(&ctl_mutex);
310bc534d86SAlex Elder 
311602adf40SYehuda Sadeh 	dout("rbd_client_create created %p\n", rbdc);
312602adf40SYehuda Sadeh 	return rbdc;
313602adf40SYehuda Sadeh 
314602adf40SYehuda Sadeh out_err:
315602adf40SYehuda Sadeh 	ceph_destroy_client(rbdc->client);
316bc534d86SAlex Elder out_mutex:
317bc534d86SAlex Elder 	mutex_unlock(&ctl_mutex);
318602adf40SYehuda Sadeh 	kfree(rbdc);
319602adf40SYehuda Sadeh out_opt:
32028f259b7SVasiliy Kulikov 	if (opt)
321602adf40SYehuda Sadeh 		ceph_destroy_options(opt);
32228f259b7SVasiliy Kulikov 	return ERR_PTR(ret);
323602adf40SYehuda Sadeh }
324602adf40SYehuda Sadeh 
325602adf40SYehuda Sadeh /*
326602adf40SYehuda Sadeh  * Find a ceph client with specific addr and configuration.
327602adf40SYehuda Sadeh  */
328602adf40SYehuda Sadeh static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
329602adf40SYehuda Sadeh {
330602adf40SYehuda Sadeh 	struct rbd_client *client_node;
331602adf40SYehuda Sadeh 
332602adf40SYehuda Sadeh 	if (opt->flags & CEPH_OPT_NOSHARE)
333602adf40SYehuda Sadeh 		return NULL;
334602adf40SYehuda Sadeh 
335602adf40SYehuda Sadeh 	list_for_each_entry(client_node, &rbd_client_list, node)
336602adf40SYehuda Sadeh 		if (ceph_compare_options(opt, client_node->client) == 0)
337602adf40SYehuda Sadeh 			return client_node;
338602adf40SYehuda Sadeh 	return NULL;
339602adf40SYehuda Sadeh }
340602adf40SYehuda Sadeh 
341602adf40SYehuda Sadeh /*
34259c2be1eSYehuda Sadeh  * mount options
34359c2be1eSYehuda Sadeh  */
34459c2be1eSYehuda Sadeh enum {
34559c2be1eSYehuda Sadeh 	Opt_notify_timeout,
34659c2be1eSYehuda Sadeh 	Opt_last_int,
34759c2be1eSYehuda Sadeh 	/* int args above */
34859c2be1eSYehuda Sadeh 	Opt_last_string,
34959c2be1eSYehuda Sadeh 	/* string args above */
35059c2be1eSYehuda Sadeh };
35159c2be1eSYehuda Sadeh 
35259c2be1eSYehuda Sadeh static match_table_t rbdopt_tokens = {
35359c2be1eSYehuda Sadeh 	{Opt_notify_timeout, "notify_timeout=%d"},
35459c2be1eSYehuda Sadeh 	/* int args above */
35559c2be1eSYehuda Sadeh 	/* string args above */
35659c2be1eSYehuda Sadeh 	{-1, NULL}
35759c2be1eSYehuda Sadeh };
35859c2be1eSYehuda Sadeh 
35959c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private)
36059c2be1eSYehuda Sadeh {
36159c2be1eSYehuda Sadeh 	struct rbd_options *rbdopt = private;
36259c2be1eSYehuda Sadeh 	substring_t argstr[MAX_OPT_ARGS];
36359c2be1eSYehuda Sadeh 	int token, intval, ret;
36459c2be1eSYehuda Sadeh 
36521079786SAlex Elder 	token = match_token(c, rbdopt_tokens, argstr);
36659c2be1eSYehuda Sadeh 	if (token < 0)
36759c2be1eSYehuda Sadeh 		return -EINVAL;
36859c2be1eSYehuda Sadeh 
36959c2be1eSYehuda Sadeh 	if (token < Opt_last_int) {
37059c2be1eSYehuda Sadeh 		ret = match_int(&argstr[0], &intval);
37159c2be1eSYehuda Sadeh 		if (ret < 0) {
37259c2be1eSYehuda Sadeh 			pr_err("bad mount option arg (not int) "
37359c2be1eSYehuda Sadeh 			       "at '%s'\n", c);
37459c2be1eSYehuda Sadeh 			return ret;
37559c2be1eSYehuda Sadeh 		}
37659c2be1eSYehuda Sadeh 		dout("got int token %d val %d\n", token, intval);
37759c2be1eSYehuda Sadeh 	} else if (token > Opt_last_int && token < Opt_last_string) {
37859c2be1eSYehuda Sadeh 		dout("got string token %d val %s\n", token,
37959c2be1eSYehuda Sadeh 		     argstr[0].from);
38059c2be1eSYehuda Sadeh 	} else {
38159c2be1eSYehuda Sadeh 		dout("got token %d\n", token);
38259c2be1eSYehuda Sadeh 	}
38359c2be1eSYehuda Sadeh 
38459c2be1eSYehuda Sadeh 	switch (token) {
38559c2be1eSYehuda Sadeh 	case Opt_notify_timeout:
38659c2be1eSYehuda Sadeh 		rbdopt->notify_timeout = intval;
38759c2be1eSYehuda Sadeh 		break;
38859c2be1eSYehuda Sadeh 	default:
38959c2be1eSYehuda Sadeh 		BUG_ON(token);
39059c2be1eSYehuda Sadeh 	}
39159c2be1eSYehuda Sadeh 	return 0;
39259c2be1eSYehuda Sadeh }
39359c2be1eSYehuda Sadeh 
39459c2be1eSYehuda Sadeh /*
395602adf40SYehuda Sadeh  * Get a ceph client with specific addr and configuration, if one does
396602adf40SYehuda Sadeh  * not exist create it.
397602adf40SYehuda Sadeh  */
3985214ecc4SAlex Elder static struct rbd_client *rbd_get_client(const char *mon_addr,
3995214ecc4SAlex Elder 					 size_t mon_addr_len,
4005214ecc4SAlex Elder 					 char *options)
401602adf40SYehuda Sadeh {
402602adf40SYehuda Sadeh 	struct rbd_client *rbdc;
403602adf40SYehuda Sadeh 	struct ceph_options *opt;
40459c2be1eSYehuda Sadeh 	struct rbd_options *rbd_opts;
40559c2be1eSYehuda Sadeh 
40659c2be1eSYehuda Sadeh 	rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
40759c2be1eSYehuda Sadeh 	if (!rbd_opts)
408d720bcb0SAlex Elder 		return ERR_PTR(-ENOMEM);
40959c2be1eSYehuda Sadeh 
41059c2be1eSYehuda Sadeh 	rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
411602adf40SYehuda Sadeh 
412ee57741cSAlex Elder 	opt = ceph_parse_options(options, mon_addr,
4135214ecc4SAlex Elder 				mon_addr + mon_addr_len,
41421079786SAlex Elder 				parse_rbd_opts_token, rbd_opts);
415ee57741cSAlex Elder 	if (IS_ERR(opt)) {
416d720bcb0SAlex Elder 		kfree(rbd_opts);
417d720bcb0SAlex Elder 		return ERR_CAST(opt);
418ee57741cSAlex Elder 	}
419602adf40SYehuda Sadeh 
420432b8587SAlex Elder 	spin_lock(&rbd_client_list_lock);
421602adf40SYehuda Sadeh 	rbdc = __rbd_client_find(opt);
422602adf40SYehuda Sadeh 	if (rbdc) {
423e6994d3dSAlex Elder 		/* using an existing client */
424e6994d3dSAlex Elder 		kref_get(&rbdc->kref);
425432b8587SAlex Elder 		spin_unlock(&rbd_client_list_lock);
426e6994d3dSAlex Elder 
427602adf40SYehuda Sadeh 		ceph_destroy_options(opt);
42897bb59a0SAlex Elder 		kfree(rbd_opts);
429602adf40SYehuda Sadeh 
430d720bcb0SAlex Elder 		return rbdc;
431602adf40SYehuda Sadeh 	}
432432b8587SAlex Elder 	spin_unlock(&rbd_client_list_lock);
433602adf40SYehuda Sadeh 
43459c2be1eSYehuda Sadeh 	rbdc = rbd_client_create(opt, rbd_opts);
435d97081b0SAlex Elder 
436d720bcb0SAlex Elder 	if (IS_ERR(rbdc))
43759c2be1eSYehuda Sadeh 		kfree(rbd_opts);
438d720bcb0SAlex Elder 
439d720bcb0SAlex Elder 	return rbdc;
440602adf40SYehuda Sadeh }
441602adf40SYehuda Sadeh 
442602adf40SYehuda Sadeh /*
443602adf40SYehuda Sadeh  * Destroy ceph client
444d23a4b3fSAlex Elder  *
445432b8587SAlex Elder  * Caller must hold rbd_client_list_lock.
446602adf40SYehuda Sadeh  */
447602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref)
448602adf40SYehuda Sadeh {
449602adf40SYehuda Sadeh 	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
450602adf40SYehuda Sadeh 
451602adf40SYehuda Sadeh 	dout("rbd_release_client %p\n", rbdc);
452cd9d9f5dSAlex Elder 	spin_lock(&rbd_client_list_lock);
453602adf40SYehuda Sadeh 	list_del(&rbdc->node);
454cd9d9f5dSAlex Elder 	spin_unlock(&rbd_client_list_lock);
455602adf40SYehuda Sadeh 
456602adf40SYehuda Sadeh 	ceph_destroy_client(rbdc->client);
45759c2be1eSYehuda Sadeh 	kfree(rbdc->rbd_opts);
458602adf40SYehuda Sadeh 	kfree(rbdc);
459602adf40SYehuda Sadeh }
460602adf40SYehuda Sadeh 
461602adf40SYehuda Sadeh /*
462602adf40SYehuda Sadeh  * Drop reference to ceph client node. If it's not referenced anymore, release
463602adf40SYehuda Sadeh  * it.
464602adf40SYehuda Sadeh  */
465602adf40SYehuda Sadeh static void rbd_put_client(struct rbd_device *rbd_dev)
466602adf40SYehuda Sadeh {
467602adf40SYehuda Sadeh 	kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
468602adf40SYehuda Sadeh 	rbd_dev->rbd_client = NULL;
469602adf40SYehuda Sadeh }
470602adf40SYehuda Sadeh 
4711fec7093SYehuda Sadeh /*
4721fec7093SYehuda Sadeh  * Destroy requests collection
4731fec7093SYehuda Sadeh  */
4741fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref)
4751fec7093SYehuda Sadeh {
4761fec7093SYehuda Sadeh 	struct rbd_req_coll *coll =
4771fec7093SYehuda Sadeh 		container_of(kref, struct rbd_req_coll, kref);
4781fec7093SYehuda Sadeh 
4791fec7093SYehuda Sadeh 	dout("rbd_coll_release %p\n", coll);
4801fec7093SYehuda Sadeh 	kfree(coll);
4811fec7093SYehuda Sadeh }
482602adf40SYehuda Sadeh 
483602adf40SYehuda Sadeh /*
484602adf40SYehuda Sadeh  * Create a new header structure, translate header format from the on-disk
485602adf40SYehuda Sadeh  * header.
486602adf40SYehuda Sadeh  */
487602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header,
488602adf40SYehuda Sadeh 				 struct rbd_image_header_ondisk *ondisk,
48950f7c4c9SXi Wang 				 u32 allocated_snaps,
490602adf40SYehuda Sadeh 				 gfp_t gfp_flags)
491602adf40SYehuda Sadeh {
49250f7c4c9SXi Wang 	u32 i, snap_count;
493602adf40SYehuda Sadeh 
49421079786SAlex Elder 	if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
49581e759fbSJosh Durgin 		return -ENXIO;
49681e759fbSJosh Durgin 
49700f1f36fSAlex Elder 	snap_count = le32_to_cpu(ondisk->snap_count);
49850f7c4c9SXi Wang 	if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context))
49950f7c4c9SXi Wang 			 / sizeof (*ondisk))
50050f7c4c9SXi Wang 		return -EINVAL;
501602adf40SYehuda Sadeh 	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
50221079786SAlex Elder 				snap_count * sizeof (*ondisk),
503602adf40SYehuda Sadeh 				gfp_flags);
504602adf40SYehuda Sadeh 	if (!header->snapc)
505602adf40SYehuda Sadeh 		return -ENOMEM;
50600f1f36fSAlex Elder 
50700f1f36fSAlex Elder 	header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
508602adf40SYehuda Sadeh 	if (snap_count) {
509602adf40SYehuda Sadeh 		header->snap_names = kmalloc(header->snap_names_len,
510f8ad495aSDan Carpenter 					     gfp_flags);
511602adf40SYehuda Sadeh 		if (!header->snap_names)
512602adf40SYehuda Sadeh 			goto err_snapc;
513602adf40SYehuda Sadeh 		header->snap_sizes = kmalloc(snap_count * sizeof(u64),
514f8ad495aSDan Carpenter 					     gfp_flags);
515602adf40SYehuda Sadeh 		if (!header->snap_sizes)
516602adf40SYehuda Sadeh 			goto err_names;
517602adf40SYehuda Sadeh 	} else {
518602adf40SYehuda Sadeh 		header->snap_names = NULL;
519602adf40SYehuda Sadeh 		header->snap_sizes = NULL;
520602adf40SYehuda Sadeh 	}
521602adf40SYehuda Sadeh 	memcpy(header->block_name, ondisk->block_name,
522602adf40SYehuda Sadeh 	       sizeof(ondisk->block_name));
523602adf40SYehuda Sadeh 
524602adf40SYehuda Sadeh 	header->image_size = le64_to_cpu(ondisk->image_size);
525602adf40SYehuda Sadeh 	header->obj_order = ondisk->options.order;
526602adf40SYehuda Sadeh 	header->crypt_type = ondisk->options.crypt_type;
527602adf40SYehuda Sadeh 	header->comp_type = ondisk->options.comp_type;
528602adf40SYehuda Sadeh 
529602adf40SYehuda Sadeh 	atomic_set(&header->snapc->nref, 1);
530602adf40SYehuda Sadeh 	header->snap_seq = le64_to_cpu(ondisk->snap_seq);
531602adf40SYehuda Sadeh 	header->snapc->num_snaps = snap_count;
532602adf40SYehuda Sadeh 	header->total_snaps = snap_count;
533602adf40SYehuda Sadeh 
53421079786SAlex Elder 	if (snap_count && allocated_snaps == snap_count) {
535602adf40SYehuda Sadeh 		for (i = 0; i < snap_count; i++) {
536602adf40SYehuda Sadeh 			header->snapc->snaps[i] =
537602adf40SYehuda Sadeh 				le64_to_cpu(ondisk->snaps[i].id);
538602adf40SYehuda Sadeh 			header->snap_sizes[i] =
539602adf40SYehuda Sadeh 				le64_to_cpu(ondisk->snaps[i].image_size);
540602adf40SYehuda Sadeh 		}
541602adf40SYehuda Sadeh 
542602adf40SYehuda Sadeh 		/* copy snapshot names */
543602adf40SYehuda Sadeh 		memcpy(header->snap_names, &ondisk->snaps[i],
544602adf40SYehuda Sadeh 			header->snap_names_len);
545602adf40SYehuda Sadeh 	}
546602adf40SYehuda Sadeh 
547602adf40SYehuda Sadeh 	return 0;
548602adf40SYehuda Sadeh 
549602adf40SYehuda Sadeh err_names:
550602adf40SYehuda Sadeh 	kfree(header->snap_names);
551602adf40SYehuda Sadeh err_snapc:
552602adf40SYehuda Sadeh 	kfree(header->snapc);
55300f1f36fSAlex Elder 	return -ENOMEM;
554602adf40SYehuda Sadeh }
555602adf40SYehuda Sadeh 
556602adf40SYehuda Sadeh static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
557602adf40SYehuda Sadeh 			u64 *seq, u64 *size)
558602adf40SYehuda Sadeh {
559602adf40SYehuda Sadeh 	int i;
560602adf40SYehuda Sadeh 	char *p = header->snap_names;
561602adf40SYehuda Sadeh 
56200f1f36fSAlex Elder 	for (i = 0; i < header->total_snaps; i++) {
56300f1f36fSAlex Elder 		if (!strcmp(snap_name, p)) {
56400f1f36fSAlex Elder 
56500f1f36fSAlex Elder 			/* Found it.  Pass back its id and/or size */
56600f1f36fSAlex Elder 
567602adf40SYehuda Sadeh 			if (seq)
568602adf40SYehuda Sadeh 				*seq = header->snapc->snaps[i];
569602adf40SYehuda Sadeh 			if (size)
570602adf40SYehuda Sadeh 				*size = header->snap_sizes[i];
571602adf40SYehuda Sadeh 			return i;
572602adf40SYehuda Sadeh 		}
57300f1f36fSAlex Elder 		p += strlen(p) + 1;	/* Skip ahead to the next name */
57400f1f36fSAlex Elder 	}
57500f1f36fSAlex Elder 	return -ENOENT;
57600f1f36fSAlex Elder }
577602adf40SYehuda Sadeh 
578cc9d734cSJosh Durgin static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
579602adf40SYehuda Sadeh {
580602adf40SYehuda Sadeh 	struct rbd_image_header *header = &dev->header;
581602adf40SYehuda Sadeh 	struct ceph_snap_context *snapc = header->snapc;
582602adf40SYehuda Sadeh 	int ret = -ENOENT;
583602adf40SYehuda Sadeh 
584cc9d734cSJosh Durgin 	BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
585cc9d734cSJosh Durgin 
586c666601aSJosh Durgin 	down_write(&dev->header_rwsem);
587602adf40SYehuda Sadeh 
588cc9d734cSJosh Durgin 	if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
589cc9d734cSJosh Durgin 		    sizeof (RBD_SNAP_HEAD_NAME))) {
590602adf40SYehuda Sadeh 		if (header->total_snaps)
591602adf40SYehuda Sadeh 			snapc->seq = header->snap_seq;
592602adf40SYehuda Sadeh 		else
593602adf40SYehuda Sadeh 			snapc->seq = 0;
59477dfe99fSJosh Durgin 		dev->snap_id = CEPH_NOSNAP;
595602adf40SYehuda Sadeh 		dev->read_only = 0;
596602adf40SYehuda Sadeh 		if (size)
597602adf40SYehuda Sadeh 			*size = header->image_size;
598602adf40SYehuda Sadeh 	} else {
599cc9d734cSJosh Durgin 		ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
600602adf40SYehuda Sadeh 		if (ret < 0)
601602adf40SYehuda Sadeh 			goto done;
60277dfe99fSJosh Durgin 		dev->snap_id = snapc->seq;
603602adf40SYehuda Sadeh 		dev->read_only = 1;
604602adf40SYehuda Sadeh 	}
605602adf40SYehuda Sadeh 
606602adf40SYehuda Sadeh 	ret = 0;
607602adf40SYehuda Sadeh done:
608c666601aSJosh Durgin 	up_write(&dev->header_rwsem);
609602adf40SYehuda Sadeh 	return ret;
610602adf40SYehuda Sadeh }
611602adf40SYehuda Sadeh 
612602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header)
613602adf40SYehuda Sadeh {
614602adf40SYehuda Sadeh 	kfree(header->snapc);
615602adf40SYehuda Sadeh 	kfree(header->snap_names);
616602adf40SYehuda Sadeh 	kfree(header->snap_sizes);
617602adf40SYehuda Sadeh }
618602adf40SYehuda Sadeh 
619602adf40SYehuda Sadeh /*
620602adf40SYehuda Sadeh  * get the actual striped segment name, offset and length
621602adf40SYehuda Sadeh  */
622602adf40SYehuda Sadeh static u64 rbd_get_segment(struct rbd_image_header *header,
623602adf40SYehuda Sadeh 			   const char *block_name,
624602adf40SYehuda Sadeh 			   u64 ofs, u64 len,
625602adf40SYehuda Sadeh 			   char *seg_name, u64 *segofs)
626602adf40SYehuda Sadeh {
627602adf40SYehuda Sadeh 	u64 seg = ofs >> header->obj_order;
628602adf40SYehuda Sadeh 
629602adf40SYehuda Sadeh 	if (seg_name)
630602adf40SYehuda Sadeh 		snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
631602adf40SYehuda Sadeh 			 "%s.%012llx", block_name, seg);
632602adf40SYehuda Sadeh 
633602adf40SYehuda Sadeh 	ofs = ofs & ((1 << header->obj_order) - 1);
634602adf40SYehuda Sadeh 	len = min_t(u64, len, (1 << header->obj_order) - ofs);
635602adf40SYehuda Sadeh 
636602adf40SYehuda Sadeh 	if (segofs)
637602adf40SYehuda Sadeh 		*segofs = ofs;
638602adf40SYehuda Sadeh 
639602adf40SYehuda Sadeh 	return len;
640602adf40SYehuda Sadeh }
641602adf40SYehuda Sadeh 
6421fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header,
6431fec7093SYehuda Sadeh 				u64 ofs, u64 len)
6441fec7093SYehuda Sadeh {
6451fec7093SYehuda Sadeh 	u64 start_seg = ofs >> header->obj_order;
6461fec7093SYehuda Sadeh 	u64 end_seg = (ofs + len - 1) >> header->obj_order;
6471fec7093SYehuda Sadeh 	return end_seg - start_seg + 1;
6481fec7093SYehuda Sadeh }
6491fec7093SYehuda Sadeh 
650602adf40SYehuda Sadeh /*
651029bcbd8SJosh Durgin  * returns the size of an object in the image
652029bcbd8SJosh Durgin  */
653029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header)
654029bcbd8SJosh Durgin {
655029bcbd8SJosh Durgin 	return 1 << header->obj_order;
656029bcbd8SJosh Durgin }
657029bcbd8SJosh Durgin 
658029bcbd8SJosh Durgin /*
659602adf40SYehuda Sadeh  * bio helpers
660602adf40SYehuda Sadeh  */
661602adf40SYehuda Sadeh 
662602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain)
663602adf40SYehuda Sadeh {
664602adf40SYehuda Sadeh 	struct bio *tmp;
665602adf40SYehuda Sadeh 
666602adf40SYehuda Sadeh 	while (chain) {
667602adf40SYehuda Sadeh 		tmp = chain;
668602adf40SYehuda Sadeh 		chain = chain->bi_next;
669602adf40SYehuda Sadeh 		bio_put(tmp);
670602adf40SYehuda Sadeh 	}
671602adf40SYehuda Sadeh }
672602adf40SYehuda Sadeh 
673602adf40SYehuda Sadeh /*
674602adf40SYehuda Sadeh  * zeros a bio chain, starting at specific offset
675602adf40SYehuda Sadeh  */
676602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs)
677602adf40SYehuda Sadeh {
678602adf40SYehuda Sadeh 	struct bio_vec *bv;
679602adf40SYehuda Sadeh 	unsigned long flags;
680602adf40SYehuda Sadeh 	void *buf;
681602adf40SYehuda Sadeh 	int i;
682602adf40SYehuda Sadeh 	int pos = 0;
683602adf40SYehuda Sadeh 
684602adf40SYehuda Sadeh 	while (chain) {
685602adf40SYehuda Sadeh 		bio_for_each_segment(bv, chain, i) {
686602adf40SYehuda Sadeh 			if (pos + bv->bv_len > start_ofs) {
687602adf40SYehuda Sadeh 				int remainder = max(start_ofs - pos, 0);
688602adf40SYehuda Sadeh 				buf = bvec_kmap_irq(bv, &flags);
689602adf40SYehuda Sadeh 				memset(buf + remainder, 0,
690602adf40SYehuda Sadeh 				       bv->bv_len - remainder);
69185b5aaa6SDan Carpenter 				bvec_kunmap_irq(buf, &flags);
692602adf40SYehuda Sadeh 			}
693602adf40SYehuda Sadeh 			pos += bv->bv_len;
694602adf40SYehuda Sadeh 		}
695602adf40SYehuda Sadeh 
696602adf40SYehuda Sadeh 		chain = chain->bi_next;
697602adf40SYehuda Sadeh 	}
698602adf40SYehuda Sadeh }
699602adf40SYehuda Sadeh 
700602adf40SYehuda Sadeh /*
701602adf40SYehuda Sadeh  * bio_chain_clone - clone a chain of bios up to a certain length.
702602adf40SYehuda Sadeh  * might return a bio_pair that will need to be released.
703602adf40SYehuda Sadeh  */
704602adf40SYehuda Sadeh static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
705602adf40SYehuda Sadeh 				   struct bio_pair **bp,
706602adf40SYehuda Sadeh 				   int len, gfp_t gfpmask)
707602adf40SYehuda Sadeh {
708602adf40SYehuda Sadeh 	struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
709602adf40SYehuda Sadeh 	int total = 0;
710602adf40SYehuda Sadeh 
711602adf40SYehuda Sadeh 	if (*bp) {
712602adf40SYehuda Sadeh 		bio_pair_release(*bp);
713602adf40SYehuda Sadeh 		*bp = NULL;
714602adf40SYehuda Sadeh 	}
715602adf40SYehuda Sadeh 
716602adf40SYehuda Sadeh 	while (old_chain && (total < len)) {
717602adf40SYehuda Sadeh 		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
718602adf40SYehuda Sadeh 		if (!tmp)
719602adf40SYehuda Sadeh 			goto err_out;
720602adf40SYehuda Sadeh 
721602adf40SYehuda Sadeh 		if (total + old_chain->bi_size > len) {
722602adf40SYehuda Sadeh 			struct bio_pair *bp;
723602adf40SYehuda Sadeh 
724602adf40SYehuda Sadeh 			/*
725602adf40SYehuda Sadeh 			 * this split can only happen with a single paged bio,
726602adf40SYehuda Sadeh 			 * split_bio will BUG_ON if this is not the case
727602adf40SYehuda Sadeh 			 */
728602adf40SYehuda Sadeh 			dout("bio_chain_clone split! total=%d remaining=%d"
729602adf40SYehuda Sadeh 			     "bi_size=%d\n",
730602adf40SYehuda Sadeh 			     (int)total, (int)len-total,
731602adf40SYehuda Sadeh 			     (int)old_chain->bi_size);
732602adf40SYehuda Sadeh 
733602adf40SYehuda Sadeh 			/* split the bio. We'll release it either in the next
734602adf40SYehuda Sadeh 			   call, or it will have to be released outside */
735593a9e7bSAlex Elder 			bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
736602adf40SYehuda Sadeh 			if (!bp)
737602adf40SYehuda Sadeh 				goto err_out;
738602adf40SYehuda Sadeh 
739602adf40SYehuda Sadeh 			__bio_clone(tmp, &bp->bio1);
740602adf40SYehuda Sadeh 
741602adf40SYehuda Sadeh 			*next = &bp->bio2;
742602adf40SYehuda Sadeh 		} else {
743602adf40SYehuda Sadeh 			__bio_clone(tmp, old_chain);
744602adf40SYehuda Sadeh 			*next = old_chain->bi_next;
745602adf40SYehuda Sadeh 		}
746602adf40SYehuda Sadeh 
747602adf40SYehuda Sadeh 		tmp->bi_bdev = NULL;
748602adf40SYehuda Sadeh 		gfpmask &= ~__GFP_WAIT;
749602adf40SYehuda Sadeh 		tmp->bi_next = NULL;
750602adf40SYehuda Sadeh 
751602adf40SYehuda Sadeh 		if (!new_chain) {
752602adf40SYehuda Sadeh 			new_chain = tail = tmp;
753602adf40SYehuda Sadeh 		} else {
754602adf40SYehuda Sadeh 			tail->bi_next = tmp;
755602adf40SYehuda Sadeh 			tail = tmp;
756602adf40SYehuda Sadeh 		}
757602adf40SYehuda Sadeh 		old_chain = old_chain->bi_next;
758602adf40SYehuda Sadeh 
759602adf40SYehuda Sadeh 		total += tmp->bi_size;
760602adf40SYehuda Sadeh 	}
761602adf40SYehuda Sadeh 
762602adf40SYehuda Sadeh 	BUG_ON(total < len);
763602adf40SYehuda Sadeh 
764602adf40SYehuda Sadeh 	if (tail)
765602adf40SYehuda Sadeh 		tail->bi_next = NULL;
766602adf40SYehuda Sadeh 
767602adf40SYehuda Sadeh 	*old = old_chain;
768602adf40SYehuda Sadeh 
769602adf40SYehuda Sadeh 	return new_chain;
770602adf40SYehuda Sadeh 
771602adf40SYehuda Sadeh err_out:
772602adf40SYehuda Sadeh 	dout("bio_chain_clone with err\n");
773602adf40SYehuda Sadeh 	bio_chain_put(new_chain);
774602adf40SYehuda Sadeh 	return NULL;
775602adf40SYehuda Sadeh }
776602adf40SYehuda Sadeh 
777602adf40SYehuda Sadeh /*
778602adf40SYehuda Sadeh  * helpers for osd request op vectors.
779602adf40SYehuda Sadeh  */
780602adf40SYehuda Sadeh static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
781602adf40SYehuda Sadeh 			    int num_ops,
782602adf40SYehuda Sadeh 			    int opcode,
783602adf40SYehuda Sadeh 			    u32 payload_len)
784602adf40SYehuda Sadeh {
785602adf40SYehuda Sadeh 	*ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
786602adf40SYehuda Sadeh 		       GFP_NOIO);
787602adf40SYehuda Sadeh 	if (!*ops)
788602adf40SYehuda Sadeh 		return -ENOMEM;
789602adf40SYehuda Sadeh 	(*ops)[0].op = opcode;
790602adf40SYehuda Sadeh 	/*
791602adf40SYehuda Sadeh 	 * op extent offset and length will be set later on
792602adf40SYehuda Sadeh 	 * in calc_raw_layout()
793602adf40SYehuda Sadeh 	 */
794602adf40SYehuda Sadeh 	(*ops)[0].payload_len = payload_len;
795602adf40SYehuda Sadeh 	return 0;
796602adf40SYehuda Sadeh }
797602adf40SYehuda Sadeh 
798602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
799602adf40SYehuda Sadeh {
800602adf40SYehuda Sadeh 	kfree(ops);
801602adf40SYehuda Sadeh }
802602adf40SYehuda Sadeh 
8031fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq,
8041fec7093SYehuda Sadeh 				   struct rbd_req_coll *coll,
8051fec7093SYehuda Sadeh 				   int index,
8061fec7093SYehuda Sadeh 				   int ret, u64 len)
8071fec7093SYehuda Sadeh {
8081fec7093SYehuda Sadeh 	struct request_queue *q;
8091fec7093SYehuda Sadeh 	int min, max, i;
8101fec7093SYehuda Sadeh 
8111fec7093SYehuda Sadeh 	dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n",
8121fec7093SYehuda Sadeh 	     coll, index, ret, len);
8131fec7093SYehuda Sadeh 
8141fec7093SYehuda Sadeh 	if (!rq)
8151fec7093SYehuda Sadeh 		return;
8161fec7093SYehuda Sadeh 
8171fec7093SYehuda Sadeh 	if (!coll) {
8181fec7093SYehuda Sadeh 		blk_end_request(rq, ret, len);
8191fec7093SYehuda Sadeh 		return;
8201fec7093SYehuda Sadeh 	}
8211fec7093SYehuda Sadeh 
8221fec7093SYehuda Sadeh 	q = rq->q;
8231fec7093SYehuda Sadeh 
8241fec7093SYehuda Sadeh 	spin_lock_irq(q->queue_lock);
8251fec7093SYehuda Sadeh 	coll->status[index].done = 1;
8261fec7093SYehuda Sadeh 	coll->status[index].rc = ret;
8271fec7093SYehuda Sadeh 	coll->status[index].bytes = len;
8281fec7093SYehuda Sadeh 	max = min = coll->num_done;
8291fec7093SYehuda Sadeh 	while (max < coll->total && coll->status[max].done)
8301fec7093SYehuda Sadeh 		max++;
8311fec7093SYehuda Sadeh 
8321fec7093SYehuda Sadeh 	for (i = min; i<max; i++) {
8331fec7093SYehuda Sadeh 		__blk_end_request(rq, coll->status[i].rc,
8341fec7093SYehuda Sadeh 				  coll->status[i].bytes);
8351fec7093SYehuda Sadeh 		coll->num_done++;
8361fec7093SYehuda Sadeh 		kref_put(&coll->kref, rbd_coll_release);
8371fec7093SYehuda Sadeh 	}
8381fec7093SYehuda Sadeh 	spin_unlock_irq(q->queue_lock);
8391fec7093SYehuda Sadeh }
8401fec7093SYehuda Sadeh 
8411fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req,
8421fec7093SYehuda Sadeh 			     int ret, u64 len)
8431fec7093SYehuda Sadeh {
8441fec7093SYehuda Sadeh 	rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
8451fec7093SYehuda Sadeh }
8461fec7093SYehuda Sadeh 
847602adf40SYehuda Sadeh /*
848602adf40SYehuda Sadeh  * Send ceph osd request
849602adf40SYehuda Sadeh  */
850602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq,
851602adf40SYehuda Sadeh 			  struct rbd_device *dev,
852602adf40SYehuda Sadeh 			  struct ceph_snap_context *snapc,
853602adf40SYehuda Sadeh 			  u64 snapid,
854602adf40SYehuda Sadeh 			  const char *obj, u64 ofs, u64 len,
855602adf40SYehuda Sadeh 			  struct bio *bio,
856602adf40SYehuda Sadeh 			  struct page **pages,
857602adf40SYehuda Sadeh 			  int num_pages,
858602adf40SYehuda Sadeh 			  int flags,
859602adf40SYehuda Sadeh 			  struct ceph_osd_req_op *ops,
860602adf40SYehuda Sadeh 			  int num_reply,
8611fec7093SYehuda Sadeh 			  struct rbd_req_coll *coll,
8621fec7093SYehuda Sadeh 			  int coll_index,
863602adf40SYehuda Sadeh 			  void (*rbd_cb)(struct ceph_osd_request *req,
86459c2be1eSYehuda Sadeh 					 struct ceph_msg *msg),
86559c2be1eSYehuda Sadeh 			  struct ceph_osd_request **linger_req,
86659c2be1eSYehuda Sadeh 			  u64 *ver)
867602adf40SYehuda Sadeh {
868602adf40SYehuda Sadeh 	struct ceph_osd_request *req;
869602adf40SYehuda Sadeh 	struct ceph_file_layout *layout;
870602adf40SYehuda Sadeh 	int ret;
871602adf40SYehuda Sadeh 	u64 bno;
872602adf40SYehuda Sadeh 	struct timespec mtime = CURRENT_TIME;
873602adf40SYehuda Sadeh 	struct rbd_request *req_data;
874602adf40SYehuda Sadeh 	struct ceph_osd_request_head *reqhead;
8751dbb4399SAlex Elder 	struct ceph_osd_client *osdc;
876602adf40SYehuda Sadeh 
877602adf40SYehuda Sadeh 	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
8781fec7093SYehuda Sadeh 	if (!req_data) {
8791fec7093SYehuda Sadeh 		if (coll)
8801fec7093SYehuda Sadeh 			rbd_coll_end_req_index(rq, coll, coll_index,
8811fec7093SYehuda Sadeh 					       -ENOMEM, len);
8821fec7093SYehuda Sadeh 		return -ENOMEM;
8831fec7093SYehuda Sadeh 	}
884602adf40SYehuda Sadeh 
8851fec7093SYehuda Sadeh 	if (coll) {
8861fec7093SYehuda Sadeh 		req_data->coll = coll;
8871fec7093SYehuda Sadeh 		req_data->coll_index = coll_index;
8881fec7093SYehuda Sadeh 	}
8891fec7093SYehuda Sadeh 
8901fec7093SYehuda Sadeh 	dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
891602adf40SYehuda Sadeh 
892c666601aSJosh Durgin 	down_read(&dev->header_rwsem);
893602adf40SYehuda Sadeh 
8941dbb4399SAlex Elder 	osdc = &dev->rbd_client->client->osdc;
8951dbb4399SAlex Elder 	req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
8961dbb4399SAlex Elder 					false, GFP_NOIO, pages, bio);
8974ad12621SSage Weil 	if (!req) {
898c666601aSJosh Durgin 		up_read(&dev->header_rwsem);
8994ad12621SSage Weil 		ret = -ENOMEM;
900602adf40SYehuda Sadeh 		goto done_pages;
901602adf40SYehuda Sadeh 	}
902602adf40SYehuda Sadeh 
903602adf40SYehuda Sadeh 	req->r_callback = rbd_cb;
904602adf40SYehuda Sadeh 
905602adf40SYehuda Sadeh 	req_data->rq = rq;
906602adf40SYehuda Sadeh 	req_data->bio = bio;
907602adf40SYehuda Sadeh 	req_data->pages = pages;
908602adf40SYehuda Sadeh 	req_data->len = len;
909602adf40SYehuda Sadeh 
910602adf40SYehuda Sadeh 	req->r_priv = req_data;
911602adf40SYehuda Sadeh 
912602adf40SYehuda Sadeh 	reqhead = req->r_request->front.iov_base;
913602adf40SYehuda Sadeh 	reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
914602adf40SYehuda Sadeh 
915602adf40SYehuda Sadeh 	strncpy(req->r_oid, obj, sizeof(req->r_oid));
916602adf40SYehuda Sadeh 	req->r_oid_len = strlen(req->r_oid);
917602adf40SYehuda Sadeh 
918602adf40SYehuda Sadeh 	layout = &req->r_file_layout;
919602adf40SYehuda Sadeh 	memset(layout, 0, sizeof(*layout));
920602adf40SYehuda Sadeh 	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
921602adf40SYehuda Sadeh 	layout->fl_stripe_count = cpu_to_le32(1);
922602adf40SYehuda Sadeh 	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
923602adf40SYehuda Sadeh 	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
9241dbb4399SAlex Elder 	ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
9251dbb4399SAlex Elder 				req, ops);
926602adf40SYehuda Sadeh 
927602adf40SYehuda Sadeh 	ceph_osdc_build_request(req, ofs, &len,
928602adf40SYehuda Sadeh 				ops,
929602adf40SYehuda Sadeh 				snapc,
930602adf40SYehuda Sadeh 				&mtime,
931602adf40SYehuda Sadeh 				req->r_oid, req->r_oid_len);
932c666601aSJosh Durgin 	up_read(&dev->header_rwsem);
933602adf40SYehuda Sadeh 
93459c2be1eSYehuda Sadeh 	if (linger_req) {
9351dbb4399SAlex Elder 		ceph_osdc_set_request_linger(osdc, req);
93659c2be1eSYehuda Sadeh 		*linger_req = req;
93759c2be1eSYehuda Sadeh 	}
93859c2be1eSYehuda Sadeh 
9391dbb4399SAlex Elder 	ret = ceph_osdc_start_request(osdc, req, false);
940602adf40SYehuda Sadeh 	if (ret < 0)
941602adf40SYehuda Sadeh 		goto done_err;
942602adf40SYehuda Sadeh 
943602adf40SYehuda Sadeh 	if (!rbd_cb) {
9441dbb4399SAlex Elder 		ret = ceph_osdc_wait_request(osdc, req);
94559c2be1eSYehuda Sadeh 		if (ver)
94659c2be1eSYehuda Sadeh 			*ver = le64_to_cpu(req->r_reassert_version.version);
9471fec7093SYehuda Sadeh 		dout("reassert_ver=%lld\n",
9481fec7093SYehuda Sadeh 		     le64_to_cpu(req->r_reassert_version.version));
949602adf40SYehuda Sadeh 		ceph_osdc_put_request(req);
950602adf40SYehuda Sadeh 	}
951602adf40SYehuda Sadeh 	return ret;
952602adf40SYehuda Sadeh 
953602adf40SYehuda Sadeh done_err:
954602adf40SYehuda Sadeh 	bio_chain_put(req_data->bio);
955602adf40SYehuda Sadeh 	ceph_osdc_put_request(req);
956602adf40SYehuda Sadeh done_pages:
9571fec7093SYehuda Sadeh 	rbd_coll_end_req(req_data, ret, len);
958602adf40SYehuda Sadeh 	kfree(req_data);
959602adf40SYehuda Sadeh 	return ret;
960602adf40SYehuda Sadeh }
961602adf40SYehuda Sadeh 
962602adf40SYehuda Sadeh /*
963602adf40SYehuda Sadeh  * Ceph osd op callback
964602adf40SYehuda Sadeh  */
965602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
966602adf40SYehuda Sadeh {
967602adf40SYehuda Sadeh 	struct rbd_request *req_data = req->r_priv;
968602adf40SYehuda Sadeh 	struct ceph_osd_reply_head *replyhead;
969602adf40SYehuda Sadeh 	struct ceph_osd_op *op;
970602adf40SYehuda Sadeh 	__s32 rc;
971602adf40SYehuda Sadeh 	u64 bytes;
972602adf40SYehuda Sadeh 	int read_op;
973602adf40SYehuda Sadeh 
974602adf40SYehuda Sadeh 	/* parse reply */
975602adf40SYehuda Sadeh 	replyhead = msg->front.iov_base;
976602adf40SYehuda Sadeh 	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
977602adf40SYehuda Sadeh 	op = (void *)(replyhead + 1);
978602adf40SYehuda Sadeh 	rc = le32_to_cpu(replyhead->result);
979602adf40SYehuda Sadeh 	bytes = le64_to_cpu(op->extent.length);
980602adf40SYehuda Sadeh 	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
981602adf40SYehuda Sadeh 
982602adf40SYehuda Sadeh 	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
983602adf40SYehuda Sadeh 
984602adf40SYehuda Sadeh 	if (rc == -ENOENT && read_op) {
985602adf40SYehuda Sadeh 		zero_bio_chain(req_data->bio, 0);
986602adf40SYehuda Sadeh 		rc = 0;
987602adf40SYehuda Sadeh 	} else if (rc == 0 && read_op && bytes < req_data->len) {
988602adf40SYehuda Sadeh 		zero_bio_chain(req_data->bio, bytes);
989602adf40SYehuda Sadeh 		bytes = req_data->len;
990602adf40SYehuda Sadeh 	}
991602adf40SYehuda Sadeh 
9921fec7093SYehuda Sadeh 	rbd_coll_end_req(req_data, rc, bytes);
993602adf40SYehuda Sadeh 
994602adf40SYehuda Sadeh 	if (req_data->bio)
995602adf40SYehuda Sadeh 		bio_chain_put(req_data->bio);
996602adf40SYehuda Sadeh 
997602adf40SYehuda Sadeh 	ceph_osdc_put_request(req);
998602adf40SYehuda Sadeh 	kfree(req_data);
999602adf40SYehuda Sadeh }
1000602adf40SYehuda Sadeh 
100159c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
100259c2be1eSYehuda Sadeh {
100359c2be1eSYehuda Sadeh 	ceph_osdc_put_request(req);
100459c2be1eSYehuda Sadeh }
100559c2be1eSYehuda Sadeh 
1006602adf40SYehuda Sadeh /*
1007602adf40SYehuda Sadeh  * Do a synchronous ceph osd operation
1008602adf40SYehuda Sadeh  */
1009602adf40SYehuda Sadeh static int rbd_req_sync_op(struct rbd_device *dev,
1010602adf40SYehuda Sadeh 			   struct ceph_snap_context *snapc,
1011602adf40SYehuda Sadeh 			   u64 snapid,
1012602adf40SYehuda Sadeh 			   int opcode,
1013602adf40SYehuda Sadeh 			   int flags,
1014602adf40SYehuda Sadeh 			   struct ceph_osd_req_op *orig_ops,
1015602adf40SYehuda Sadeh 			   int num_reply,
1016602adf40SYehuda Sadeh 			   const char *obj,
1017602adf40SYehuda Sadeh 			   u64 ofs, u64 len,
101859c2be1eSYehuda Sadeh 			   char *buf,
101959c2be1eSYehuda Sadeh 			   struct ceph_osd_request **linger_req,
102059c2be1eSYehuda Sadeh 			   u64 *ver)
1021602adf40SYehuda Sadeh {
1022602adf40SYehuda Sadeh 	int ret;
1023602adf40SYehuda Sadeh 	struct page **pages;
1024602adf40SYehuda Sadeh 	int num_pages;
1025602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops = orig_ops;
1026602adf40SYehuda Sadeh 	u32 payload_len;
1027602adf40SYehuda Sadeh 
1028602adf40SYehuda Sadeh 	num_pages = calc_pages_for(ofs , len);
1029602adf40SYehuda Sadeh 	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
1030b8d0638aSDan Carpenter 	if (IS_ERR(pages))
1031b8d0638aSDan Carpenter 		return PTR_ERR(pages);
1032602adf40SYehuda Sadeh 
1033602adf40SYehuda Sadeh 	if (!orig_ops) {
1034602adf40SYehuda Sadeh 		payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
1035602adf40SYehuda Sadeh 		ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
1036602adf40SYehuda Sadeh 		if (ret < 0)
1037602adf40SYehuda Sadeh 			goto done;
1038602adf40SYehuda Sadeh 
1039602adf40SYehuda Sadeh 		if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
1040602adf40SYehuda Sadeh 			ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
1041602adf40SYehuda Sadeh 			if (ret < 0)
1042602adf40SYehuda Sadeh 				goto done_ops;
1043602adf40SYehuda Sadeh 		}
1044602adf40SYehuda Sadeh 	}
1045602adf40SYehuda Sadeh 
1046602adf40SYehuda Sadeh 	ret = rbd_do_request(NULL, dev, snapc, snapid,
1047602adf40SYehuda Sadeh 			  obj, ofs, len, NULL,
1048602adf40SYehuda Sadeh 			  pages, num_pages,
1049602adf40SYehuda Sadeh 			  flags,
1050602adf40SYehuda Sadeh 			  ops,
1051602adf40SYehuda Sadeh 			  2,
10521fec7093SYehuda Sadeh 			  NULL, 0,
105359c2be1eSYehuda Sadeh 			  NULL,
105459c2be1eSYehuda Sadeh 			  linger_req, ver);
1055602adf40SYehuda Sadeh 	if (ret < 0)
1056602adf40SYehuda Sadeh 		goto done_ops;
1057602adf40SYehuda Sadeh 
1058602adf40SYehuda Sadeh 	if ((flags & CEPH_OSD_FLAG_READ) && buf)
1059602adf40SYehuda Sadeh 		ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
1060602adf40SYehuda Sadeh 
1061602adf40SYehuda Sadeh done_ops:
1062602adf40SYehuda Sadeh 	if (!orig_ops)
1063602adf40SYehuda Sadeh 		rbd_destroy_ops(ops);
1064602adf40SYehuda Sadeh done:
1065602adf40SYehuda Sadeh 	ceph_release_page_vector(pages, num_pages);
1066602adf40SYehuda Sadeh 	return ret;
1067602adf40SYehuda Sadeh }
1068602adf40SYehuda Sadeh 
1069602adf40SYehuda Sadeh /*
1070602adf40SYehuda Sadeh  * Do an asynchronous ceph osd operation
1071602adf40SYehuda Sadeh  */
1072602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq,
1073602adf40SYehuda Sadeh 		     struct rbd_device *rbd_dev ,
1074602adf40SYehuda Sadeh 		     struct ceph_snap_context *snapc,
1075602adf40SYehuda Sadeh 		     u64 snapid,
1076602adf40SYehuda Sadeh 		     int opcode, int flags, int num_reply,
1077602adf40SYehuda Sadeh 		     u64 ofs, u64 len,
10781fec7093SYehuda Sadeh 		     struct bio *bio,
10791fec7093SYehuda Sadeh 		     struct rbd_req_coll *coll,
10801fec7093SYehuda Sadeh 		     int coll_index)
1081602adf40SYehuda Sadeh {
1082602adf40SYehuda Sadeh 	char *seg_name;
1083602adf40SYehuda Sadeh 	u64 seg_ofs;
1084602adf40SYehuda Sadeh 	u64 seg_len;
1085602adf40SYehuda Sadeh 	int ret;
1086602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops;
1087602adf40SYehuda Sadeh 	u32 payload_len;
1088602adf40SYehuda Sadeh 
1089602adf40SYehuda Sadeh 	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
1090602adf40SYehuda Sadeh 	if (!seg_name)
1091602adf40SYehuda Sadeh 		return -ENOMEM;
1092602adf40SYehuda Sadeh 
1093602adf40SYehuda Sadeh 	seg_len = rbd_get_segment(&rbd_dev->header,
1094602adf40SYehuda Sadeh 				  rbd_dev->header.block_name,
1095602adf40SYehuda Sadeh 				  ofs, len,
1096602adf40SYehuda Sadeh 				  seg_name, &seg_ofs);
1097602adf40SYehuda Sadeh 
1098602adf40SYehuda Sadeh 	payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
1099602adf40SYehuda Sadeh 
1100602adf40SYehuda Sadeh 	ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
1101602adf40SYehuda Sadeh 	if (ret < 0)
1102602adf40SYehuda Sadeh 		goto done;
1103602adf40SYehuda Sadeh 
1104602adf40SYehuda Sadeh 	/* we've taken care of segment sizes earlier when we
1105602adf40SYehuda Sadeh 	   cloned the bios. We should never have a segment
1106602adf40SYehuda Sadeh 	   truncated at this point */
1107602adf40SYehuda Sadeh 	BUG_ON(seg_len < len);
1108602adf40SYehuda Sadeh 
1109602adf40SYehuda Sadeh 	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
1110602adf40SYehuda Sadeh 			     seg_name, seg_ofs, seg_len,
1111602adf40SYehuda Sadeh 			     bio,
1112602adf40SYehuda Sadeh 			     NULL, 0,
1113602adf40SYehuda Sadeh 			     flags,
1114602adf40SYehuda Sadeh 			     ops,
1115602adf40SYehuda Sadeh 			     num_reply,
11161fec7093SYehuda Sadeh 			     coll, coll_index,
111759c2be1eSYehuda Sadeh 			     rbd_req_cb, 0, NULL);
111811f77002SSage Weil 
111911f77002SSage Weil 	rbd_destroy_ops(ops);
1120602adf40SYehuda Sadeh done:
1121602adf40SYehuda Sadeh 	kfree(seg_name);
1122602adf40SYehuda Sadeh 	return ret;
1123602adf40SYehuda Sadeh }
1124602adf40SYehuda Sadeh 
1125602adf40SYehuda Sadeh /*
1126602adf40SYehuda Sadeh  * Request async osd write
1127602adf40SYehuda Sadeh  */
1128602adf40SYehuda Sadeh static int rbd_req_write(struct request *rq,
1129602adf40SYehuda Sadeh 			 struct rbd_device *rbd_dev,
1130602adf40SYehuda Sadeh 			 struct ceph_snap_context *snapc,
1131602adf40SYehuda Sadeh 			 u64 ofs, u64 len,
11321fec7093SYehuda Sadeh 			 struct bio *bio,
11331fec7093SYehuda Sadeh 			 struct rbd_req_coll *coll,
11341fec7093SYehuda Sadeh 			 int coll_index)
1135602adf40SYehuda Sadeh {
1136602adf40SYehuda Sadeh 	return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
1137602adf40SYehuda Sadeh 			 CEPH_OSD_OP_WRITE,
1138602adf40SYehuda Sadeh 			 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1139602adf40SYehuda Sadeh 			 2,
11401fec7093SYehuda Sadeh 			 ofs, len, bio, coll, coll_index);
1141602adf40SYehuda Sadeh }
1142602adf40SYehuda Sadeh 
1143602adf40SYehuda Sadeh /*
1144602adf40SYehuda Sadeh  * Request async osd read
1145602adf40SYehuda Sadeh  */
1146602adf40SYehuda Sadeh static int rbd_req_read(struct request *rq,
1147602adf40SYehuda Sadeh 			 struct rbd_device *rbd_dev,
1148602adf40SYehuda Sadeh 			 u64 snapid,
1149602adf40SYehuda Sadeh 			 u64 ofs, u64 len,
11501fec7093SYehuda Sadeh 			 struct bio *bio,
11511fec7093SYehuda Sadeh 			 struct rbd_req_coll *coll,
11521fec7093SYehuda Sadeh 			 int coll_index)
1153602adf40SYehuda Sadeh {
1154602adf40SYehuda Sadeh 	return rbd_do_op(rq, rbd_dev, NULL,
1155b06e6a6bSJosh Durgin 			 snapid,
1156602adf40SYehuda Sadeh 			 CEPH_OSD_OP_READ,
1157602adf40SYehuda Sadeh 			 CEPH_OSD_FLAG_READ,
1158602adf40SYehuda Sadeh 			 2,
11591fec7093SYehuda Sadeh 			 ofs, len, bio, coll, coll_index);
1160602adf40SYehuda Sadeh }
1161602adf40SYehuda Sadeh 
1162602adf40SYehuda Sadeh /*
1163602adf40SYehuda Sadeh  * Request sync osd read
1164602adf40SYehuda Sadeh  */
1165602adf40SYehuda Sadeh static int rbd_req_sync_read(struct rbd_device *dev,
1166602adf40SYehuda Sadeh 			  struct ceph_snap_context *snapc,
1167602adf40SYehuda Sadeh 			  u64 snapid,
1168602adf40SYehuda Sadeh 			  const char *obj,
1169602adf40SYehuda Sadeh 			  u64 ofs, u64 len,
117059c2be1eSYehuda Sadeh 			  char *buf,
117159c2be1eSYehuda Sadeh 			  u64 *ver)
1172602adf40SYehuda Sadeh {
1173602adf40SYehuda Sadeh 	return rbd_req_sync_op(dev, NULL,
1174b06e6a6bSJosh Durgin 			       snapid,
1175602adf40SYehuda Sadeh 			       CEPH_OSD_OP_READ,
1176602adf40SYehuda Sadeh 			       CEPH_OSD_FLAG_READ,
1177602adf40SYehuda Sadeh 			       NULL,
117859c2be1eSYehuda Sadeh 			       1, obj, ofs, len, buf, NULL, ver);
1179602adf40SYehuda Sadeh }
1180602adf40SYehuda Sadeh 
1181602adf40SYehuda Sadeh /*
118259c2be1eSYehuda Sadeh  * Request sync osd watch
118359c2be1eSYehuda Sadeh  */
118459c2be1eSYehuda Sadeh static int rbd_req_sync_notify_ack(struct rbd_device *dev,
118559c2be1eSYehuda Sadeh 				   u64 ver,
118659c2be1eSYehuda Sadeh 				   u64 notify_id,
118759c2be1eSYehuda Sadeh 				   const char *obj)
118859c2be1eSYehuda Sadeh {
118959c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
119059c2be1eSYehuda Sadeh 	struct page **pages = NULL;
119111f77002SSage Weil 	int ret;
119211f77002SSage Weil 
119311f77002SSage Weil 	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
119459c2be1eSYehuda Sadeh 	if (ret < 0)
119559c2be1eSYehuda Sadeh 		return ret;
119659c2be1eSYehuda Sadeh 
119759c2be1eSYehuda Sadeh 	ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
119859c2be1eSYehuda Sadeh 	ops[0].watch.cookie = notify_id;
119959c2be1eSYehuda Sadeh 	ops[0].watch.flag = 0;
120059c2be1eSYehuda Sadeh 
120159c2be1eSYehuda Sadeh 	ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
120259c2be1eSYehuda Sadeh 			  obj, 0, 0, NULL,
120359c2be1eSYehuda Sadeh 			  pages, 0,
120459c2be1eSYehuda Sadeh 			  CEPH_OSD_FLAG_READ,
120559c2be1eSYehuda Sadeh 			  ops,
120659c2be1eSYehuda Sadeh 			  1,
12071fec7093SYehuda Sadeh 			  NULL, 0,
120859c2be1eSYehuda Sadeh 			  rbd_simple_req_cb, 0, NULL);
120959c2be1eSYehuda Sadeh 
121059c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
121159c2be1eSYehuda Sadeh 	return ret;
121259c2be1eSYehuda Sadeh }
121359c2be1eSYehuda Sadeh 
121459c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
121559c2be1eSYehuda Sadeh {
121659c2be1eSYehuda Sadeh 	struct rbd_device *dev = (struct rbd_device *)data;
121713143d2dSSage Weil 	int rc;
121813143d2dSSage Weil 
121959c2be1eSYehuda Sadeh 	if (!dev)
122059c2be1eSYehuda Sadeh 		return;
122159c2be1eSYehuda Sadeh 
122259c2be1eSYehuda Sadeh 	dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
122359c2be1eSYehuda Sadeh 		notify_id, (int)opcode);
122459c2be1eSYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1225263c6ca0SJosh Durgin 	rc = __rbd_refresh_header(dev);
122659c2be1eSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
122713143d2dSSage Weil 	if (rc)
1228f0f8cef5SAlex Elder 		pr_warning(RBD_DRV_NAME "%d got notification but failed to "
1229f0f8cef5SAlex Elder 			   " update snaps: %d\n", dev->major, rc);
123059c2be1eSYehuda Sadeh 
123159c2be1eSYehuda Sadeh 	rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
123259c2be1eSYehuda Sadeh }
123359c2be1eSYehuda Sadeh 
123459c2be1eSYehuda Sadeh /*
123559c2be1eSYehuda Sadeh  * Request sync osd watch
123659c2be1eSYehuda Sadeh  */
123759c2be1eSYehuda Sadeh static int rbd_req_sync_watch(struct rbd_device *dev,
123859c2be1eSYehuda Sadeh 			      const char *obj,
123959c2be1eSYehuda Sadeh 			      u64 ver)
124059c2be1eSYehuda Sadeh {
124159c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
12421dbb4399SAlex Elder 	struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
124359c2be1eSYehuda Sadeh 
124459c2be1eSYehuda Sadeh 	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
124559c2be1eSYehuda Sadeh 	if (ret < 0)
124659c2be1eSYehuda Sadeh 		return ret;
124759c2be1eSYehuda Sadeh 
124859c2be1eSYehuda Sadeh 	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
124959c2be1eSYehuda Sadeh 				     (void *)dev, &dev->watch_event);
125059c2be1eSYehuda Sadeh 	if (ret < 0)
125159c2be1eSYehuda Sadeh 		goto fail;
125259c2be1eSYehuda Sadeh 
125359c2be1eSYehuda Sadeh 	ops[0].watch.ver = cpu_to_le64(ver);
125459c2be1eSYehuda Sadeh 	ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
125559c2be1eSYehuda Sadeh 	ops[0].watch.flag = 1;
125659c2be1eSYehuda Sadeh 
125759c2be1eSYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
125859c2be1eSYehuda Sadeh 			      CEPH_NOSNAP,
125959c2be1eSYehuda Sadeh 			      0,
126059c2be1eSYehuda Sadeh 			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
126159c2be1eSYehuda Sadeh 			      ops,
126259c2be1eSYehuda Sadeh 			      1, obj, 0, 0, NULL,
126359c2be1eSYehuda Sadeh 			      &dev->watch_request, NULL);
126459c2be1eSYehuda Sadeh 
126559c2be1eSYehuda Sadeh 	if (ret < 0)
126659c2be1eSYehuda Sadeh 		goto fail_event;
126759c2be1eSYehuda Sadeh 
126859c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
126959c2be1eSYehuda Sadeh 	return 0;
127059c2be1eSYehuda Sadeh 
127159c2be1eSYehuda Sadeh fail_event:
127259c2be1eSYehuda Sadeh 	ceph_osdc_cancel_event(dev->watch_event);
127359c2be1eSYehuda Sadeh 	dev->watch_event = NULL;
127459c2be1eSYehuda Sadeh fail:
127559c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
127659c2be1eSYehuda Sadeh 	return ret;
127759c2be1eSYehuda Sadeh }
127859c2be1eSYehuda Sadeh 
127979e3057cSYehuda Sadeh /*
128079e3057cSYehuda Sadeh  * Request sync osd unwatch
128179e3057cSYehuda Sadeh  */
128279e3057cSYehuda Sadeh static int rbd_req_sync_unwatch(struct rbd_device *dev,
128379e3057cSYehuda Sadeh 				const char *obj)
128479e3057cSYehuda Sadeh {
128579e3057cSYehuda Sadeh 	struct ceph_osd_req_op *ops;
128679e3057cSYehuda Sadeh 
128779e3057cSYehuda Sadeh 	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
128879e3057cSYehuda Sadeh 	if (ret < 0)
128979e3057cSYehuda Sadeh 		return ret;
129079e3057cSYehuda Sadeh 
129179e3057cSYehuda Sadeh 	ops[0].watch.ver = 0;
129279e3057cSYehuda Sadeh 	ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
129379e3057cSYehuda Sadeh 	ops[0].watch.flag = 0;
129479e3057cSYehuda Sadeh 
129579e3057cSYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
129679e3057cSYehuda Sadeh 			      CEPH_NOSNAP,
129779e3057cSYehuda Sadeh 			      0,
129879e3057cSYehuda Sadeh 			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
129979e3057cSYehuda Sadeh 			      ops,
130079e3057cSYehuda Sadeh 			      1, obj, 0, 0, NULL, NULL, NULL);
130179e3057cSYehuda Sadeh 
130279e3057cSYehuda Sadeh 	rbd_destroy_ops(ops);
130379e3057cSYehuda Sadeh 	ceph_osdc_cancel_event(dev->watch_event);
130479e3057cSYehuda Sadeh 	dev->watch_event = NULL;
130579e3057cSYehuda Sadeh 	return ret;
130679e3057cSYehuda Sadeh }
130779e3057cSYehuda Sadeh 
130859c2be1eSYehuda Sadeh struct rbd_notify_info {
130959c2be1eSYehuda Sadeh 	struct rbd_device *dev;
131059c2be1eSYehuda Sadeh };
131159c2be1eSYehuda Sadeh 
131259c2be1eSYehuda Sadeh static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
131359c2be1eSYehuda Sadeh {
131459c2be1eSYehuda Sadeh 	struct rbd_device *dev = (struct rbd_device *)data;
131559c2be1eSYehuda Sadeh 	if (!dev)
131659c2be1eSYehuda Sadeh 		return;
131759c2be1eSYehuda Sadeh 
131859c2be1eSYehuda Sadeh 	dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
131959c2be1eSYehuda Sadeh 		notify_id, (int)opcode);
132059c2be1eSYehuda Sadeh }
132159c2be1eSYehuda Sadeh 
132259c2be1eSYehuda Sadeh /*
132359c2be1eSYehuda Sadeh  * Request sync osd notify
132459c2be1eSYehuda Sadeh  */
132559c2be1eSYehuda Sadeh static int rbd_req_sync_notify(struct rbd_device *dev,
132659c2be1eSYehuda Sadeh 		          const char *obj)
132759c2be1eSYehuda Sadeh {
132859c2be1eSYehuda Sadeh 	struct ceph_osd_req_op *ops;
13291dbb4399SAlex Elder 	struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
133059c2be1eSYehuda Sadeh 	struct ceph_osd_event *event;
133159c2be1eSYehuda Sadeh 	struct rbd_notify_info info;
133259c2be1eSYehuda Sadeh 	int payload_len = sizeof(u32) + sizeof(u32);
133359c2be1eSYehuda Sadeh 	int ret;
133459c2be1eSYehuda Sadeh 
133559c2be1eSYehuda Sadeh 	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len);
133659c2be1eSYehuda Sadeh 	if (ret < 0)
133759c2be1eSYehuda Sadeh 		return ret;
133859c2be1eSYehuda Sadeh 
133959c2be1eSYehuda Sadeh 	info.dev = dev;
134059c2be1eSYehuda Sadeh 
134159c2be1eSYehuda Sadeh 	ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
134259c2be1eSYehuda Sadeh 				     (void *)&info, &event);
134359c2be1eSYehuda Sadeh 	if (ret < 0)
134459c2be1eSYehuda Sadeh 		goto fail;
134559c2be1eSYehuda Sadeh 
134659c2be1eSYehuda Sadeh 	ops[0].watch.ver = 1;
134759c2be1eSYehuda Sadeh 	ops[0].watch.flag = 1;
134859c2be1eSYehuda Sadeh 	ops[0].watch.cookie = event->cookie;
134959c2be1eSYehuda Sadeh 	ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
135059c2be1eSYehuda Sadeh 	ops[0].watch.timeout = 12;
135159c2be1eSYehuda Sadeh 
135259c2be1eSYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
135359c2be1eSYehuda Sadeh 			       CEPH_NOSNAP,
135459c2be1eSYehuda Sadeh 			       0,
135559c2be1eSYehuda Sadeh 			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
135659c2be1eSYehuda Sadeh 			       ops,
135759c2be1eSYehuda Sadeh 			       1, obj, 0, 0, NULL, NULL, NULL);
135859c2be1eSYehuda Sadeh 	if (ret < 0)
135959c2be1eSYehuda Sadeh 		goto fail_event;
136059c2be1eSYehuda Sadeh 
136159c2be1eSYehuda Sadeh 	ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT);
136259c2be1eSYehuda Sadeh 	dout("ceph_osdc_wait_event returned %d\n", ret);
136359c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
136459c2be1eSYehuda Sadeh 	return 0;
136559c2be1eSYehuda Sadeh 
136659c2be1eSYehuda Sadeh fail_event:
136759c2be1eSYehuda Sadeh 	ceph_osdc_cancel_event(event);
136859c2be1eSYehuda Sadeh fail:
136959c2be1eSYehuda Sadeh 	rbd_destroy_ops(ops);
137059c2be1eSYehuda Sadeh 	return ret;
137159c2be1eSYehuda Sadeh }
137259c2be1eSYehuda Sadeh 
137359c2be1eSYehuda Sadeh /*
1374602adf40SYehuda Sadeh  * Request sync osd read
1375602adf40SYehuda Sadeh  */
1376602adf40SYehuda Sadeh static int rbd_req_sync_exec(struct rbd_device *dev,
1377602adf40SYehuda Sadeh 			     const char *obj,
1378602adf40SYehuda Sadeh 			     const char *cls,
1379602adf40SYehuda Sadeh 			     const char *method,
1380602adf40SYehuda Sadeh 			     const char *data,
138159c2be1eSYehuda Sadeh 			     int len,
138259c2be1eSYehuda Sadeh 			     u64 *ver)
1383602adf40SYehuda Sadeh {
1384602adf40SYehuda Sadeh 	struct ceph_osd_req_op *ops;
1385602adf40SYehuda Sadeh 	int cls_len = strlen(cls);
1386602adf40SYehuda Sadeh 	int method_len = strlen(method);
1387602adf40SYehuda Sadeh 	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
1388602adf40SYehuda Sadeh 				    cls_len + method_len + len);
1389602adf40SYehuda Sadeh 	if (ret < 0)
1390602adf40SYehuda Sadeh 		return ret;
1391602adf40SYehuda Sadeh 
1392602adf40SYehuda Sadeh 	ops[0].cls.class_name = cls;
1393602adf40SYehuda Sadeh 	ops[0].cls.class_len = (__u8)cls_len;
1394602adf40SYehuda Sadeh 	ops[0].cls.method_name = method;
1395602adf40SYehuda Sadeh 	ops[0].cls.method_len = (__u8)method_len;
1396602adf40SYehuda Sadeh 	ops[0].cls.argc = 0;
1397602adf40SYehuda Sadeh 	ops[0].cls.indata = data;
1398602adf40SYehuda Sadeh 	ops[0].cls.indata_len = len;
1399602adf40SYehuda Sadeh 
1400602adf40SYehuda Sadeh 	ret = rbd_req_sync_op(dev, NULL,
1401602adf40SYehuda Sadeh 			       CEPH_NOSNAP,
1402602adf40SYehuda Sadeh 			       0,
1403602adf40SYehuda Sadeh 			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1404602adf40SYehuda Sadeh 			       ops,
140559c2be1eSYehuda Sadeh 			       1, obj, 0, 0, NULL, NULL, ver);
1406602adf40SYehuda Sadeh 
1407602adf40SYehuda Sadeh 	rbd_destroy_ops(ops);
1408602adf40SYehuda Sadeh 
1409602adf40SYehuda Sadeh 	dout("cls_exec returned %d\n", ret);
1410602adf40SYehuda Sadeh 	return ret;
1411602adf40SYehuda Sadeh }
1412602adf40SYehuda Sadeh 
14131fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
14141fec7093SYehuda Sadeh {
14151fec7093SYehuda Sadeh 	struct rbd_req_coll *coll =
14161fec7093SYehuda Sadeh 			kzalloc(sizeof(struct rbd_req_coll) +
14171fec7093SYehuda Sadeh 			        sizeof(struct rbd_req_status) * num_reqs,
14181fec7093SYehuda Sadeh 				GFP_ATOMIC);
14191fec7093SYehuda Sadeh 
14201fec7093SYehuda Sadeh 	if (!coll)
14211fec7093SYehuda Sadeh 		return NULL;
14221fec7093SYehuda Sadeh 	coll->total = num_reqs;
14231fec7093SYehuda Sadeh 	kref_init(&coll->kref);
14241fec7093SYehuda Sadeh 	return coll;
14251fec7093SYehuda Sadeh }
14261fec7093SYehuda Sadeh 
1427602adf40SYehuda Sadeh /*
1428602adf40SYehuda Sadeh  * block device queue callback
1429602adf40SYehuda Sadeh  */
1430602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q)
1431602adf40SYehuda Sadeh {
1432602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = q->queuedata;
1433602adf40SYehuda Sadeh 	struct request *rq;
1434602adf40SYehuda Sadeh 	struct bio_pair *bp = NULL;
1435602adf40SYehuda Sadeh 
143600f1f36fSAlex Elder 	while ((rq = blk_fetch_request(q))) {
1437602adf40SYehuda Sadeh 		struct bio *bio;
1438602adf40SYehuda Sadeh 		struct bio *rq_bio, *next_bio = NULL;
1439602adf40SYehuda Sadeh 		bool do_write;
1440602adf40SYehuda Sadeh 		int size, op_size = 0;
1441602adf40SYehuda Sadeh 		u64 ofs;
14421fec7093SYehuda Sadeh 		int num_segs, cur_seg = 0;
14431fec7093SYehuda Sadeh 		struct rbd_req_coll *coll;
1444602adf40SYehuda Sadeh 
1445602adf40SYehuda Sadeh 		/* peek at request from block layer */
1446602adf40SYehuda Sadeh 		if (!rq)
1447602adf40SYehuda Sadeh 			break;
1448602adf40SYehuda Sadeh 
1449602adf40SYehuda Sadeh 		dout("fetched request\n");
1450602adf40SYehuda Sadeh 
1451602adf40SYehuda Sadeh 		/* filter out block requests we don't understand */
1452602adf40SYehuda Sadeh 		if ((rq->cmd_type != REQ_TYPE_FS)) {
1453602adf40SYehuda Sadeh 			__blk_end_request_all(rq, 0);
145400f1f36fSAlex Elder 			continue;
1455602adf40SYehuda Sadeh 		}
1456602adf40SYehuda Sadeh 
1457602adf40SYehuda Sadeh 		/* deduce our operation (read, write) */
1458602adf40SYehuda Sadeh 		do_write = (rq_data_dir(rq) == WRITE);
1459602adf40SYehuda Sadeh 
1460602adf40SYehuda Sadeh 		size = blk_rq_bytes(rq);
1461593a9e7bSAlex Elder 		ofs = blk_rq_pos(rq) * SECTOR_SIZE;
1462602adf40SYehuda Sadeh 		rq_bio = rq->bio;
1463602adf40SYehuda Sadeh 		if (do_write && rbd_dev->read_only) {
1464602adf40SYehuda Sadeh 			__blk_end_request_all(rq, -EROFS);
146500f1f36fSAlex Elder 			continue;
1466602adf40SYehuda Sadeh 		}
1467602adf40SYehuda Sadeh 
1468602adf40SYehuda Sadeh 		spin_unlock_irq(q->queue_lock);
1469602adf40SYehuda Sadeh 
1470602adf40SYehuda Sadeh 		dout("%s 0x%x bytes at 0x%llx\n",
1471602adf40SYehuda Sadeh 		     do_write ? "write" : "read",
1472593a9e7bSAlex Elder 		     size, blk_rq_pos(rq) * SECTOR_SIZE);
1473602adf40SYehuda Sadeh 
14741fec7093SYehuda Sadeh 		num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
14751fec7093SYehuda Sadeh 		coll = rbd_alloc_coll(num_segs);
14761fec7093SYehuda Sadeh 		if (!coll) {
14771fec7093SYehuda Sadeh 			spin_lock_irq(q->queue_lock);
14781fec7093SYehuda Sadeh 			__blk_end_request_all(rq, -ENOMEM);
147900f1f36fSAlex Elder 			continue;
14801fec7093SYehuda Sadeh 		}
14811fec7093SYehuda Sadeh 
1482602adf40SYehuda Sadeh 		do {
1483602adf40SYehuda Sadeh 			/* a bio clone to be passed down to OSD req */
1484602adf40SYehuda Sadeh 			dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
1485602adf40SYehuda Sadeh 			op_size = rbd_get_segment(&rbd_dev->header,
1486602adf40SYehuda Sadeh 						  rbd_dev->header.block_name,
1487602adf40SYehuda Sadeh 						  ofs, size,
1488602adf40SYehuda Sadeh 						  NULL, NULL);
14891fec7093SYehuda Sadeh 			kref_get(&coll->kref);
1490602adf40SYehuda Sadeh 			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
1491602adf40SYehuda Sadeh 					      op_size, GFP_ATOMIC);
1492602adf40SYehuda Sadeh 			if (!bio) {
14931fec7093SYehuda Sadeh 				rbd_coll_end_req_index(rq, coll, cur_seg,
14941fec7093SYehuda Sadeh 						       -ENOMEM, op_size);
14951fec7093SYehuda Sadeh 				goto next_seg;
1496602adf40SYehuda Sadeh 			}
1497602adf40SYehuda Sadeh 
14981fec7093SYehuda Sadeh 
1499602adf40SYehuda Sadeh 			/* init OSD command: write or read */
1500602adf40SYehuda Sadeh 			if (do_write)
1501602adf40SYehuda Sadeh 				rbd_req_write(rq, rbd_dev,
1502602adf40SYehuda Sadeh 					      rbd_dev->header.snapc,
1503602adf40SYehuda Sadeh 					      ofs,
15041fec7093SYehuda Sadeh 					      op_size, bio,
15051fec7093SYehuda Sadeh 					      coll, cur_seg);
1506602adf40SYehuda Sadeh 			else
1507602adf40SYehuda Sadeh 				rbd_req_read(rq, rbd_dev,
150877dfe99fSJosh Durgin 					     rbd_dev->snap_id,
1509602adf40SYehuda Sadeh 					     ofs,
15101fec7093SYehuda Sadeh 					     op_size, bio,
15111fec7093SYehuda Sadeh 					     coll, cur_seg);
1512602adf40SYehuda Sadeh 
15131fec7093SYehuda Sadeh next_seg:
1514602adf40SYehuda Sadeh 			size -= op_size;
1515602adf40SYehuda Sadeh 			ofs += op_size;
1516602adf40SYehuda Sadeh 
15171fec7093SYehuda Sadeh 			cur_seg++;
1518602adf40SYehuda Sadeh 			rq_bio = next_bio;
1519602adf40SYehuda Sadeh 		} while (size > 0);
15201fec7093SYehuda Sadeh 		kref_put(&coll->kref, rbd_coll_release);
1521602adf40SYehuda Sadeh 
1522602adf40SYehuda Sadeh 		if (bp)
1523602adf40SYehuda Sadeh 			bio_pair_release(bp);
1524602adf40SYehuda Sadeh 		spin_lock_irq(q->queue_lock);
1525602adf40SYehuda Sadeh 	}
1526602adf40SYehuda Sadeh }
1527602adf40SYehuda Sadeh 
1528602adf40SYehuda Sadeh /*
1529602adf40SYehuda Sadeh  * a queue callback. Makes sure that we don't create a bio that spans across
1530602adf40SYehuda Sadeh  * multiple osd objects. One exception would be with a single page bios,
1531602adf40SYehuda Sadeh  * which we handle later at bio_chain_clone
1532602adf40SYehuda Sadeh  */
1533602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
1534602adf40SYehuda Sadeh 			  struct bio_vec *bvec)
1535602adf40SYehuda Sadeh {
1536602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = q->queuedata;
1537593a9e7bSAlex Elder 	unsigned int chunk_sectors;
1538593a9e7bSAlex Elder 	sector_t sector;
1539593a9e7bSAlex Elder 	unsigned int bio_sectors;
1540602adf40SYehuda Sadeh 	int max;
1541602adf40SYehuda Sadeh 
1542593a9e7bSAlex Elder 	chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
1543593a9e7bSAlex Elder 	sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
1544593a9e7bSAlex Elder 	bio_sectors = bmd->bi_size >> SECTOR_SHIFT;
1545593a9e7bSAlex Elder 
1546602adf40SYehuda Sadeh 	max =  (chunk_sectors - ((sector & (chunk_sectors - 1))
1547593a9e7bSAlex Elder 				 + bio_sectors)) << SECTOR_SHIFT;
1548602adf40SYehuda Sadeh 	if (max < 0)
1549602adf40SYehuda Sadeh 		max = 0; /* bio_add cannot handle a negative return */
1550602adf40SYehuda Sadeh 	if (max <= bvec->bv_len && bio_sectors == 0)
1551602adf40SYehuda Sadeh 		return bvec->bv_len;
1552602adf40SYehuda Sadeh 	return max;
1553602adf40SYehuda Sadeh }
1554602adf40SYehuda Sadeh 
1555602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev)
1556602adf40SYehuda Sadeh {
1557602adf40SYehuda Sadeh 	struct gendisk *disk = rbd_dev->disk;
1558602adf40SYehuda Sadeh 
1559602adf40SYehuda Sadeh 	if (!disk)
1560602adf40SYehuda Sadeh 		return;
1561602adf40SYehuda Sadeh 
1562602adf40SYehuda Sadeh 	rbd_header_free(&rbd_dev->header);
1563602adf40SYehuda Sadeh 
1564602adf40SYehuda Sadeh 	if (disk->flags & GENHD_FL_UP)
1565602adf40SYehuda Sadeh 		del_gendisk(disk);
1566602adf40SYehuda Sadeh 	if (disk->queue)
1567602adf40SYehuda Sadeh 		blk_cleanup_queue(disk->queue);
1568602adf40SYehuda Sadeh 	put_disk(disk);
1569602adf40SYehuda Sadeh }
1570602adf40SYehuda Sadeh 
1571602adf40SYehuda Sadeh /*
1572602adf40SYehuda Sadeh  * reload the ondisk the header
1573602adf40SYehuda Sadeh  */
1574602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev,
1575602adf40SYehuda Sadeh 			   struct rbd_image_header *header)
1576602adf40SYehuda Sadeh {
1577602adf40SYehuda Sadeh 	ssize_t rc;
1578602adf40SYehuda Sadeh 	struct rbd_image_header_ondisk *dh;
157950f7c4c9SXi Wang 	u32 snap_count = 0;
158059c2be1eSYehuda Sadeh 	u64 ver;
158100f1f36fSAlex Elder 	size_t len;
1582602adf40SYehuda Sadeh 
158300f1f36fSAlex Elder 	/*
158400f1f36fSAlex Elder 	 * First reads the fixed-size header to determine the number
158500f1f36fSAlex Elder 	 * of snapshots, then re-reads it, along with all snapshot
158600f1f36fSAlex Elder 	 * records as well as their stored names.
158700f1f36fSAlex Elder 	 */
158800f1f36fSAlex Elder 	len = sizeof (*dh);
1589602adf40SYehuda Sadeh 	while (1) {
1590602adf40SYehuda Sadeh 		dh = kmalloc(len, GFP_KERNEL);
1591602adf40SYehuda Sadeh 		if (!dh)
1592602adf40SYehuda Sadeh 			return -ENOMEM;
1593602adf40SYehuda Sadeh 
1594602adf40SYehuda Sadeh 		rc = rbd_req_sync_read(rbd_dev,
1595602adf40SYehuda Sadeh 				       NULL, CEPH_NOSNAP,
1596602adf40SYehuda Sadeh 				       rbd_dev->obj_md_name,
1597602adf40SYehuda Sadeh 				       0, len,
159859c2be1eSYehuda Sadeh 				       (char *)dh, &ver);
1599602adf40SYehuda Sadeh 		if (rc < 0)
1600602adf40SYehuda Sadeh 			goto out_dh;
1601602adf40SYehuda Sadeh 
1602602adf40SYehuda Sadeh 		rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
160381e759fbSJosh Durgin 		if (rc < 0) {
160400f1f36fSAlex Elder 			if (rc == -ENXIO)
160581e759fbSJosh Durgin 				pr_warning("unrecognized header format"
160681e759fbSJosh Durgin 					   " for image %s", rbd_dev->obj);
1607602adf40SYehuda Sadeh 			goto out_dh;
160881e759fbSJosh Durgin 		}
1609602adf40SYehuda Sadeh 
161000f1f36fSAlex Elder 		if (snap_count == header->total_snaps)
161100f1f36fSAlex Elder 			break;
161200f1f36fSAlex Elder 
1613602adf40SYehuda Sadeh 		snap_count = header->total_snaps;
161400f1f36fSAlex Elder 		len = sizeof (*dh) +
161500f1f36fSAlex Elder 			snap_count * sizeof(struct rbd_image_snap_ondisk) +
161600f1f36fSAlex Elder 			header->snap_names_len;
161700f1f36fSAlex Elder 
1618602adf40SYehuda Sadeh 		rbd_header_free(header);
1619602adf40SYehuda Sadeh 		kfree(dh);
1620602adf40SYehuda Sadeh 	}
162159c2be1eSYehuda Sadeh 	header->obj_version = ver;
1622602adf40SYehuda Sadeh 
1623602adf40SYehuda Sadeh out_dh:
1624602adf40SYehuda Sadeh 	kfree(dh);
1625602adf40SYehuda Sadeh 	return rc;
1626602adf40SYehuda Sadeh }
1627602adf40SYehuda Sadeh 
1628602adf40SYehuda Sadeh /*
1629602adf40SYehuda Sadeh  * create a snapshot
1630602adf40SYehuda Sadeh  */
1631602adf40SYehuda Sadeh static int rbd_header_add_snap(struct rbd_device *dev,
1632602adf40SYehuda Sadeh 			       const char *snap_name,
1633602adf40SYehuda Sadeh 			       gfp_t gfp_flags)
1634602adf40SYehuda Sadeh {
1635602adf40SYehuda Sadeh 	int name_len = strlen(snap_name);
1636602adf40SYehuda Sadeh 	u64 new_snapid;
1637602adf40SYehuda Sadeh 	int ret;
1638916d4d67SSage Weil 	void *data, *p, *e;
163959c2be1eSYehuda Sadeh 	u64 ver;
16401dbb4399SAlex Elder 	struct ceph_mon_client *monc;
1641602adf40SYehuda Sadeh 
1642602adf40SYehuda Sadeh 	/* we should create a snapshot only if we're pointing at the head */
164377dfe99fSJosh Durgin 	if (dev->snap_id != CEPH_NOSNAP)
1644602adf40SYehuda Sadeh 		return -EINVAL;
1645602adf40SYehuda Sadeh 
16461dbb4399SAlex Elder 	monc = &dev->rbd_client->client->monc;
16471dbb4399SAlex Elder 	ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
1648602adf40SYehuda Sadeh 	dout("created snapid=%lld\n", new_snapid);
1649602adf40SYehuda Sadeh 	if (ret < 0)
1650602adf40SYehuda Sadeh 		return ret;
1651602adf40SYehuda Sadeh 
1652602adf40SYehuda Sadeh 	data = kmalloc(name_len + 16, gfp_flags);
1653602adf40SYehuda Sadeh 	if (!data)
1654602adf40SYehuda Sadeh 		return -ENOMEM;
1655602adf40SYehuda Sadeh 
1656916d4d67SSage Weil 	p = data;
1657916d4d67SSage Weil 	e = data + name_len + 16;
1658602adf40SYehuda Sadeh 
1659916d4d67SSage Weil 	ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
1660916d4d67SSage Weil 	ceph_encode_64_safe(&p, e, new_snapid, bad);
1661602adf40SYehuda Sadeh 
1662602adf40SYehuda Sadeh 	ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
1663916d4d67SSage Weil 				data, p - data, &ver);
1664602adf40SYehuda Sadeh 
1665916d4d67SSage Weil 	kfree(data);
1666602adf40SYehuda Sadeh 
1667602adf40SYehuda Sadeh 	if (ret < 0)
1668602adf40SYehuda Sadeh 		return ret;
1669602adf40SYehuda Sadeh 
1670403f24d3SJosh Durgin 	down_write(&dev->header_rwsem);
1671602adf40SYehuda Sadeh 	dev->header.snapc->seq = new_snapid;
1672403f24d3SJosh Durgin 	up_write(&dev->header_rwsem);
1673602adf40SYehuda Sadeh 
1674602adf40SYehuda Sadeh 	return 0;
1675602adf40SYehuda Sadeh bad:
1676602adf40SYehuda Sadeh 	return -ERANGE;
1677602adf40SYehuda Sadeh }
1678602adf40SYehuda Sadeh 
1679dfc5606dSYehuda Sadeh static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
1680dfc5606dSYehuda Sadeh {
1681dfc5606dSYehuda Sadeh 	struct rbd_snap *snap;
1682dfc5606dSYehuda Sadeh 
1683dfc5606dSYehuda Sadeh 	while (!list_empty(&rbd_dev->snaps)) {
1684dfc5606dSYehuda Sadeh 		snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
1685dfc5606dSYehuda Sadeh 		__rbd_remove_snap_dev(rbd_dev, snap);
1686dfc5606dSYehuda Sadeh 	}
1687dfc5606dSYehuda Sadeh }
1688dfc5606dSYehuda Sadeh 
1689602adf40SYehuda Sadeh /*
1690602adf40SYehuda Sadeh  * only read the first part of the ondisk header, without the snaps info
1691602adf40SYehuda Sadeh  */
1692263c6ca0SJosh Durgin static int __rbd_refresh_header(struct rbd_device *rbd_dev)
1693602adf40SYehuda Sadeh {
1694602adf40SYehuda Sadeh 	int ret;
1695602adf40SYehuda Sadeh 	struct rbd_image_header h;
1696602adf40SYehuda Sadeh 	u64 snap_seq;
169759c2be1eSYehuda Sadeh 	int follow_seq = 0;
1698602adf40SYehuda Sadeh 
1699602adf40SYehuda Sadeh 	ret = rbd_read_header(rbd_dev, &h);
1700602adf40SYehuda Sadeh 	if (ret < 0)
1701602adf40SYehuda Sadeh 		return ret;
1702602adf40SYehuda Sadeh 
17039db4b3e3SSage Weil 	/* resized? */
1704593a9e7bSAlex Elder 	set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
17059db4b3e3SSage Weil 
1706c666601aSJosh Durgin 	down_write(&rbd_dev->header_rwsem);
1707602adf40SYehuda Sadeh 
1708602adf40SYehuda Sadeh 	snap_seq = rbd_dev->header.snapc->seq;
170959c2be1eSYehuda Sadeh 	if (rbd_dev->header.total_snaps &&
171059c2be1eSYehuda Sadeh 	    rbd_dev->header.snapc->snaps[0] == snap_seq)
171159c2be1eSYehuda Sadeh 		/* pointing at the head, will need to follow that
171259c2be1eSYehuda Sadeh 		   if head moves */
171359c2be1eSYehuda Sadeh 		follow_seq = 1;
1714602adf40SYehuda Sadeh 
1715602adf40SYehuda Sadeh 	kfree(rbd_dev->header.snapc);
1716602adf40SYehuda Sadeh 	kfree(rbd_dev->header.snap_names);
1717602adf40SYehuda Sadeh 	kfree(rbd_dev->header.snap_sizes);
1718602adf40SYehuda Sadeh 
1719602adf40SYehuda Sadeh 	rbd_dev->header.total_snaps = h.total_snaps;
1720602adf40SYehuda Sadeh 	rbd_dev->header.snapc = h.snapc;
1721602adf40SYehuda Sadeh 	rbd_dev->header.snap_names = h.snap_names;
1722dfc5606dSYehuda Sadeh 	rbd_dev->header.snap_names_len = h.snap_names_len;
1723602adf40SYehuda Sadeh 	rbd_dev->header.snap_sizes = h.snap_sizes;
172459c2be1eSYehuda Sadeh 	if (follow_seq)
172559c2be1eSYehuda Sadeh 		rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
172659c2be1eSYehuda Sadeh 	else
1727602adf40SYehuda Sadeh 		rbd_dev->header.snapc->seq = snap_seq;
1728602adf40SYehuda Sadeh 
1729dfc5606dSYehuda Sadeh 	ret = __rbd_init_snaps_header(rbd_dev);
1730dfc5606dSYehuda Sadeh 
1731c666601aSJosh Durgin 	up_write(&rbd_dev->header_rwsem);
1732602adf40SYehuda Sadeh 
1733dfc5606dSYehuda Sadeh 	return ret;
1734602adf40SYehuda Sadeh }
1735602adf40SYehuda Sadeh 
1736602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev)
1737602adf40SYehuda Sadeh {
1738602adf40SYehuda Sadeh 	struct gendisk *disk;
1739602adf40SYehuda Sadeh 	struct request_queue *q;
1740602adf40SYehuda Sadeh 	int rc;
1741593a9e7bSAlex Elder 	u64 segment_size;
1742602adf40SYehuda Sadeh 	u64 total_size = 0;
1743602adf40SYehuda Sadeh 
1744602adf40SYehuda Sadeh 	/* contact OSD, request size info about the object being mapped */
1745602adf40SYehuda Sadeh 	rc = rbd_read_header(rbd_dev, &rbd_dev->header);
1746602adf40SYehuda Sadeh 	if (rc)
1747602adf40SYehuda Sadeh 		return rc;
1748602adf40SYehuda Sadeh 
1749dfc5606dSYehuda Sadeh 	/* no need to lock here, as rbd_dev is not registered yet */
1750dfc5606dSYehuda Sadeh 	rc = __rbd_init_snaps_header(rbd_dev);
1751dfc5606dSYehuda Sadeh 	if (rc)
1752dfc5606dSYehuda Sadeh 		return rc;
1753dfc5606dSYehuda Sadeh 
1754cc9d734cSJosh Durgin 	rc = rbd_header_set_snap(rbd_dev, &total_size);
1755602adf40SYehuda Sadeh 	if (rc)
1756602adf40SYehuda Sadeh 		return rc;
1757602adf40SYehuda Sadeh 
1758602adf40SYehuda Sadeh 	/* create gendisk info */
1759602adf40SYehuda Sadeh 	rc = -ENOMEM;
1760602adf40SYehuda Sadeh 	disk = alloc_disk(RBD_MINORS_PER_MAJOR);
1761602adf40SYehuda Sadeh 	if (!disk)
1762602adf40SYehuda Sadeh 		goto out;
1763602adf40SYehuda Sadeh 
1764f0f8cef5SAlex Elder 	snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
1765aedfec59SSage Weil 		 rbd_dev->id);
1766602adf40SYehuda Sadeh 	disk->major = rbd_dev->major;
1767602adf40SYehuda Sadeh 	disk->first_minor = 0;
1768602adf40SYehuda Sadeh 	disk->fops = &rbd_bd_ops;
1769602adf40SYehuda Sadeh 	disk->private_data = rbd_dev;
1770602adf40SYehuda Sadeh 
1771602adf40SYehuda Sadeh 	/* init rq */
1772602adf40SYehuda Sadeh 	rc = -ENOMEM;
1773602adf40SYehuda Sadeh 	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
1774602adf40SYehuda Sadeh 	if (!q)
1775602adf40SYehuda Sadeh 		goto out_disk;
1776029bcbd8SJosh Durgin 
1777593a9e7bSAlex Elder 	/* We use the default size, but let's be explicit about it. */
1778593a9e7bSAlex Elder 	blk_queue_physical_block_size(q, SECTOR_SIZE);
1779593a9e7bSAlex Elder 
1780029bcbd8SJosh Durgin 	/* set io sizes to object size */
1781593a9e7bSAlex Elder 	segment_size = rbd_obj_bytes(&rbd_dev->header);
1782593a9e7bSAlex Elder 	blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
1783593a9e7bSAlex Elder 	blk_queue_max_segment_size(q, segment_size);
1784593a9e7bSAlex Elder 	blk_queue_io_min(q, segment_size);
1785593a9e7bSAlex Elder 	blk_queue_io_opt(q, segment_size);
1786029bcbd8SJosh Durgin 
1787602adf40SYehuda Sadeh 	blk_queue_merge_bvec(q, rbd_merge_bvec);
1788602adf40SYehuda Sadeh 	disk->queue = q;
1789602adf40SYehuda Sadeh 
1790602adf40SYehuda Sadeh 	q->queuedata = rbd_dev;
1791602adf40SYehuda Sadeh 
1792602adf40SYehuda Sadeh 	rbd_dev->disk = disk;
1793602adf40SYehuda Sadeh 	rbd_dev->q = q;
1794602adf40SYehuda Sadeh 
1795602adf40SYehuda Sadeh 	/* finally, announce the disk to the world */
1796593a9e7bSAlex Elder 	set_capacity(disk, total_size / SECTOR_SIZE);
1797602adf40SYehuda Sadeh 	add_disk(disk);
1798602adf40SYehuda Sadeh 
1799602adf40SYehuda Sadeh 	pr_info("%s: added with size 0x%llx\n",
1800602adf40SYehuda Sadeh 		disk->disk_name, (unsigned long long)total_size);
1801602adf40SYehuda Sadeh 	return 0;
1802602adf40SYehuda Sadeh 
1803602adf40SYehuda Sadeh out_disk:
1804602adf40SYehuda Sadeh 	put_disk(disk);
1805602adf40SYehuda Sadeh out:
1806602adf40SYehuda Sadeh 	return rc;
1807602adf40SYehuda Sadeh }
1808602adf40SYehuda Sadeh 
1809dfc5606dSYehuda Sadeh /*
1810dfc5606dSYehuda Sadeh   sysfs
1811dfc5606dSYehuda Sadeh */
1812602adf40SYehuda Sadeh 
1813593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev)
1814593a9e7bSAlex Elder {
1815593a9e7bSAlex Elder 	return container_of(dev, struct rbd_device, dev);
1816593a9e7bSAlex Elder }
1817593a9e7bSAlex Elder 
1818dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev,
1819dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1820602adf40SYehuda Sadeh {
1821593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1822dfc5606dSYehuda Sadeh 
1823dfc5606dSYehuda Sadeh 	return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
1824602adf40SYehuda Sadeh }
1825602adf40SYehuda Sadeh 
1826dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev,
1827dfc5606dSYehuda Sadeh 			      struct device_attribute *attr, char *buf)
1828602adf40SYehuda Sadeh {
1829593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1830dfc5606dSYehuda Sadeh 
1831dfc5606dSYehuda Sadeh 	return sprintf(buf, "%d\n", rbd_dev->major);
1832dfc5606dSYehuda Sadeh }
1833dfc5606dSYehuda Sadeh 
1834dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev,
1835dfc5606dSYehuda Sadeh 				  struct device_attribute *attr, char *buf)
1836dfc5606dSYehuda Sadeh {
1837593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1838dfc5606dSYehuda Sadeh 
18391dbb4399SAlex Elder 	return sprintf(buf, "client%lld\n",
18401dbb4399SAlex Elder 			ceph_client_id(rbd_dev->rbd_client->client));
1841dfc5606dSYehuda Sadeh }
1842dfc5606dSYehuda Sadeh 
1843dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev,
1844dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1845dfc5606dSYehuda Sadeh {
1846593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1847dfc5606dSYehuda Sadeh 
1848dfc5606dSYehuda Sadeh 	return sprintf(buf, "%s\n", rbd_dev->pool_name);
1849dfc5606dSYehuda Sadeh }
1850dfc5606dSYehuda Sadeh 
1851dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev,
1852dfc5606dSYehuda Sadeh 			     struct device_attribute *attr, char *buf)
1853dfc5606dSYehuda Sadeh {
1854593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1855dfc5606dSYehuda Sadeh 
1856dfc5606dSYehuda Sadeh 	return sprintf(buf, "%s\n", rbd_dev->obj);
1857dfc5606dSYehuda Sadeh }
1858dfc5606dSYehuda Sadeh 
1859dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev,
1860dfc5606dSYehuda Sadeh 			     struct device_attribute *attr,
1861dfc5606dSYehuda Sadeh 			     char *buf)
1862dfc5606dSYehuda Sadeh {
1863593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1864dfc5606dSYehuda Sadeh 
1865dfc5606dSYehuda Sadeh 	return sprintf(buf, "%s\n", rbd_dev->snap_name);
1866dfc5606dSYehuda Sadeh }
1867dfc5606dSYehuda Sadeh 
1868dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev,
1869dfc5606dSYehuda Sadeh 				 struct device_attribute *attr,
1870dfc5606dSYehuda Sadeh 				 const char *buf,
1871dfc5606dSYehuda Sadeh 				 size_t size)
1872dfc5606dSYehuda Sadeh {
1873593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1874dfc5606dSYehuda Sadeh 	int rc;
1875dfc5606dSYehuda Sadeh 	int ret = size;
1876602adf40SYehuda Sadeh 
1877602adf40SYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1878602adf40SYehuda Sadeh 
1879263c6ca0SJosh Durgin 	rc = __rbd_refresh_header(rbd_dev);
1880dfc5606dSYehuda Sadeh 	if (rc < 0)
1881dfc5606dSYehuda Sadeh 		ret = rc;
1882602adf40SYehuda Sadeh 
1883dfc5606dSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
1884dfc5606dSYehuda Sadeh 	return ret;
1885dfc5606dSYehuda Sadeh }
1886602adf40SYehuda Sadeh 
1887dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
1888dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
1889dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
1890dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
1891dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
1892dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
1893dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
1894dfc5606dSYehuda Sadeh static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
1895dfc5606dSYehuda Sadeh 
1896dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = {
1897dfc5606dSYehuda Sadeh 	&dev_attr_size.attr,
1898dfc5606dSYehuda Sadeh 	&dev_attr_major.attr,
1899dfc5606dSYehuda Sadeh 	&dev_attr_client_id.attr,
1900dfc5606dSYehuda Sadeh 	&dev_attr_pool.attr,
1901dfc5606dSYehuda Sadeh 	&dev_attr_name.attr,
1902dfc5606dSYehuda Sadeh 	&dev_attr_current_snap.attr,
1903dfc5606dSYehuda Sadeh 	&dev_attr_refresh.attr,
1904dfc5606dSYehuda Sadeh 	&dev_attr_create_snap.attr,
1905dfc5606dSYehuda Sadeh 	NULL
1906dfc5606dSYehuda Sadeh };
1907dfc5606dSYehuda Sadeh 
1908dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = {
1909dfc5606dSYehuda Sadeh 	.attrs = rbd_attrs,
1910dfc5606dSYehuda Sadeh };
1911dfc5606dSYehuda Sadeh 
1912dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = {
1913dfc5606dSYehuda Sadeh 	&rbd_attr_group,
1914dfc5606dSYehuda Sadeh 	NULL
1915dfc5606dSYehuda Sadeh };
1916dfc5606dSYehuda Sadeh 
1917dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev)
1918dfc5606dSYehuda Sadeh {
1919dfc5606dSYehuda Sadeh }
1920dfc5606dSYehuda Sadeh 
1921dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = {
1922dfc5606dSYehuda Sadeh 	.name		= "rbd",
1923dfc5606dSYehuda Sadeh 	.groups		= rbd_attr_groups,
1924dfc5606dSYehuda Sadeh 	.release	= rbd_sysfs_dev_release,
1925dfc5606dSYehuda Sadeh };
1926dfc5606dSYehuda Sadeh 
1927dfc5606dSYehuda Sadeh 
1928dfc5606dSYehuda Sadeh /*
1929dfc5606dSYehuda Sadeh   sysfs - snapshots
1930dfc5606dSYehuda Sadeh */
1931dfc5606dSYehuda Sadeh 
1932dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev,
1933dfc5606dSYehuda Sadeh 				  struct device_attribute *attr,
1934dfc5606dSYehuda Sadeh 				  char *buf)
1935dfc5606dSYehuda Sadeh {
1936dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1937dfc5606dSYehuda Sadeh 
19383591538fSJosh Durgin 	return sprintf(buf, "%llu\n", (unsigned long long)snap->size);
1939dfc5606dSYehuda Sadeh }
1940dfc5606dSYehuda Sadeh 
1941dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev,
1942dfc5606dSYehuda Sadeh 				struct device_attribute *attr,
1943dfc5606dSYehuda Sadeh 				char *buf)
1944dfc5606dSYehuda Sadeh {
1945dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1946dfc5606dSYehuda Sadeh 
1947593a9e7bSAlex Elder 	return sprintf(buf, "%llu\n", (unsigned long long)snap->id);
1948dfc5606dSYehuda Sadeh }
1949dfc5606dSYehuda Sadeh 
1950dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
1951dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
1952dfc5606dSYehuda Sadeh 
1953dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = {
1954dfc5606dSYehuda Sadeh 	&dev_attr_snap_size.attr,
1955dfc5606dSYehuda Sadeh 	&dev_attr_snap_id.attr,
1956dfc5606dSYehuda Sadeh 	NULL,
1957dfc5606dSYehuda Sadeh };
1958dfc5606dSYehuda Sadeh 
1959dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = {
1960dfc5606dSYehuda Sadeh 	.attrs = rbd_snap_attrs,
1961dfc5606dSYehuda Sadeh };
1962dfc5606dSYehuda Sadeh 
1963dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev)
1964dfc5606dSYehuda Sadeh {
1965dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1966dfc5606dSYehuda Sadeh 	kfree(snap->name);
1967dfc5606dSYehuda Sadeh 	kfree(snap);
1968dfc5606dSYehuda Sadeh }
1969dfc5606dSYehuda Sadeh 
1970dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = {
1971dfc5606dSYehuda Sadeh 	&rbd_snap_attr_group,
1972dfc5606dSYehuda Sadeh 	NULL
1973dfc5606dSYehuda Sadeh };
1974dfc5606dSYehuda Sadeh 
1975dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = {
1976dfc5606dSYehuda Sadeh 	.groups		= rbd_snap_attr_groups,
1977dfc5606dSYehuda Sadeh 	.release	= rbd_snap_dev_release,
1978dfc5606dSYehuda Sadeh };
1979dfc5606dSYehuda Sadeh 
1980dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
1981dfc5606dSYehuda Sadeh 				  struct rbd_snap *snap)
1982dfc5606dSYehuda Sadeh {
1983dfc5606dSYehuda Sadeh 	list_del(&snap->node);
1984dfc5606dSYehuda Sadeh 	device_unregister(&snap->dev);
1985dfc5606dSYehuda Sadeh }
1986dfc5606dSYehuda Sadeh 
1987dfc5606dSYehuda Sadeh static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
1988dfc5606dSYehuda Sadeh 				  struct rbd_snap *snap,
1989dfc5606dSYehuda Sadeh 				  struct device *parent)
1990dfc5606dSYehuda Sadeh {
1991dfc5606dSYehuda Sadeh 	struct device *dev = &snap->dev;
1992dfc5606dSYehuda Sadeh 	int ret;
1993dfc5606dSYehuda Sadeh 
1994dfc5606dSYehuda Sadeh 	dev->type = &rbd_snap_device_type;
1995dfc5606dSYehuda Sadeh 	dev->parent = parent;
1996dfc5606dSYehuda Sadeh 	dev->release = rbd_snap_dev_release;
1997dfc5606dSYehuda Sadeh 	dev_set_name(dev, "snap_%s", snap->name);
1998dfc5606dSYehuda Sadeh 	ret = device_register(dev);
1999dfc5606dSYehuda Sadeh 
2000dfc5606dSYehuda Sadeh 	return ret;
2001dfc5606dSYehuda Sadeh }
2002dfc5606dSYehuda Sadeh 
2003dfc5606dSYehuda Sadeh static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
2004dfc5606dSYehuda Sadeh 			      int i, const char *name,
2005dfc5606dSYehuda Sadeh 			      struct rbd_snap **snapp)
2006dfc5606dSYehuda Sadeh {
2007dfc5606dSYehuda Sadeh 	int ret;
2008dfc5606dSYehuda Sadeh 	struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
2009dfc5606dSYehuda Sadeh 	if (!snap)
2010dfc5606dSYehuda Sadeh 		return -ENOMEM;
2011dfc5606dSYehuda Sadeh 	snap->name = kstrdup(name, GFP_KERNEL);
2012dfc5606dSYehuda Sadeh 	snap->size = rbd_dev->header.snap_sizes[i];
2013dfc5606dSYehuda Sadeh 	snap->id = rbd_dev->header.snapc->snaps[i];
2014dfc5606dSYehuda Sadeh 	if (device_is_registered(&rbd_dev->dev)) {
2015dfc5606dSYehuda Sadeh 		ret = rbd_register_snap_dev(rbd_dev, snap,
2016dfc5606dSYehuda Sadeh 					     &rbd_dev->dev);
2017dfc5606dSYehuda Sadeh 		if (ret < 0)
2018dfc5606dSYehuda Sadeh 			goto err;
2019dfc5606dSYehuda Sadeh 	}
2020dfc5606dSYehuda Sadeh 	*snapp = snap;
2021dfc5606dSYehuda Sadeh 	return 0;
2022dfc5606dSYehuda Sadeh err:
2023dfc5606dSYehuda Sadeh 	kfree(snap->name);
2024dfc5606dSYehuda Sadeh 	kfree(snap);
2025dfc5606dSYehuda Sadeh 	return ret;
2026dfc5606dSYehuda Sadeh }
2027dfc5606dSYehuda Sadeh 
2028dfc5606dSYehuda Sadeh /*
2029dfc5606dSYehuda Sadeh  * search for the previous snap in a null delimited string list
2030dfc5606dSYehuda Sadeh  */
2031dfc5606dSYehuda Sadeh const char *rbd_prev_snap_name(const char *name, const char *start)
2032dfc5606dSYehuda Sadeh {
2033dfc5606dSYehuda Sadeh 	if (name < start + 2)
2034dfc5606dSYehuda Sadeh 		return NULL;
2035dfc5606dSYehuda Sadeh 
2036dfc5606dSYehuda Sadeh 	name -= 2;
2037dfc5606dSYehuda Sadeh 	while (*name) {
2038dfc5606dSYehuda Sadeh 		if (name == start)
2039dfc5606dSYehuda Sadeh 			return start;
2040dfc5606dSYehuda Sadeh 		name--;
2041dfc5606dSYehuda Sadeh 	}
2042dfc5606dSYehuda Sadeh 	return name + 1;
2043dfc5606dSYehuda Sadeh }
2044dfc5606dSYehuda Sadeh 
2045dfc5606dSYehuda Sadeh /*
2046dfc5606dSYehuda Sadeh  * compare the old list of snapshots that we have to what's in the header
2047dfc5606dSYehuda Sadeh  * and update it accordingly. Note that the header holds the snapshots
2048dfc5606dSYehuda Sadeh  * in a reverse order (from newest to oldest) and we need to go from
2049dfc5606dSYehuda Sadeh  * older to new so that we don't get a duplicate snap name when
2050dfc5606dSYehuda Sadeh  * doing the process (e.g., removed snapshot and recreated a new
2051dfc5606dSYehuda Sadeh  * one with the same name.
2052dfc5606dSYehuda Sadeh  */
2053dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
2054dfc5606dSYehuda Sadeh {
2055dfc5606dSYehuda Sadeh 	const char *name, *first_name;
2056dfc5606dSYehuda Sadeh 	int i = rbd_dev->header.total_snaps;
2057dfc5606dSYehuda Sadeh 	struct rbd_snap *snap, *old_snap = NULL;
2058dfc5606dSYehuda Sadeh 	int ret;
2059dfc5606dSYehuda Sadeh 	struct list_head *p, *n;
2060dfc5606dSYehuda Sadeh 
2061dfc5606dSYehuda Sadeh 	first_name = rbd_dev->header.snap_names;
2062dfc5606dSYehuda Sadeh 	name = first_name + rbd_dev->header.snap_names_len;
2063dfc5606dSYehuda Sadeh 
2064dfc5606dSYehuda Sadeh 	list_for_each_prev_safe(p, n, &rbd_dev->snaps) {
2065dfc5606dSYehuda Sadeh 		u64 cur_id;
2066dfc5606dSYehuda Sadeh 
2067dfc5606dSYehuda Sadeh 		old_snap = list_entry(p, struct rbd_snap, node);
2068dfc5606dSYehuda Sadeh 
2069dfc5606dSYehuda Sadeh 		if (i)
2070dfc5606dSYehuda Sadeh 			cur_id = rbd_dev->header.snapc->snaps[i - 1];
2071dfc5606dSYehuda Sadeh 
2072dfc5606dSYehuda Sadeh 		if (!i || old_snap->id < cur_id) {
2073dfc5606dSYehuda Sadeh 			/* old_snap->id was skipped, thus was removed */
2074dfc5606dSYehuda Sadeh 			__rbd_remove_snap_dev(rbd_dev, old_snap);
2075dfc5606dSYehuda Sadeh 			continue;
2076dfc5606dSYehuda Sadeh 		}
2077dfc5606dSYehuda Sadeh 		if (old_snap->id == cur_id) {
2078dfc5606dSYehuda Sadeh 			/* we have this snapshot already */
2079dfc5606dSYehuda Sadeh 			i--;
2080dfc5606dSYehuda Sadeh 			name = rbd_prev_snap_name(name, first_name);
2081dfc5606dSYehuda Sadeh 			continue;
2082dfc5606dSYehuda Sadeh 		}
2083dfc5606dSYehuda Sadeh 		for (; i > 0;
2084dfc5606dSYehuda Sadeh 		     i--, name = rbd_prev_snap_name(name, first_name)) {
2085dfc5606dSYehuda Sadeh 			if (!name) {
2086dfc5606dSYehuda Sadeh 				WARN_ON(1);
2087dfc5606dSYehuda Sadeh 				return -EINVAL;
2088dfc5606dSYehuda Sadeh 			}
2089dfc5606dSYehuda Sadeh 			cur_id = rbd_dev->header.snapc->snaps[i];
2090dfc5606dSYehuda Sadeh 			/* snapshot removal? handle it above */
2091dfc5606dSYehuda Sadeh 			if (cur_id >= old_snap->id)
2092dfc5606dSYehuda Sadeh 				break;
2093dfc5606dSYehuda Sadeh 			/* a new snapshot */
2094dfc5606dSYehuda Sadeh 			ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
2095dfc5606dSYehuda Sadeh 			if (ret < 0)
2096dfc5606dSYehuda Sadeh 				return ret;
2097dfc5606dSYehuda Sadeh 
2098dfc5606dSYehuda Sadeh 			/* note that we add it backward so using n and not p */
2099dfc5606dSYehuda Sadeh 			list_add(&snap->node, n);
2100dfc5606dSYehuda Sadeh 			p = &snap->node;
2101dfc5606dSYehuda Sadeh 		}
2102dfc5606dSYehuda Sadeh 	}
2103dfc5606dSYehuda Sadeh 	/* we're done going over the old snap list, just add what's left */
2104dfc5606dSYehuda Sadeh 	for (; i > 0; i--) {
2105dfc5606dSYehuda Sadeh 		name = rbd_prev_snap_name(name, first_name);
2106dfc5606dSYehuda Sadeh 		if (!name) {
2107dfc5606dSYehuda Sadeh 			WARN_ON(1);
2108dfc5606dSYehuda Sadeh 			return -EINVAL;
2109dfc5606dSYehuda Sadeh 		}
2110dfc5606dSYehuda Sadeh 		ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
2111dfc5606dSYehuda Sadeh 		if (ret < 0)
2112dfc5606dSYehuda Sadeh 			return ret;
2113dfc5606dSYehuda Sadeh 		list_add(&snap->node, &rbd_dev->snaps);
2114dfc5606dSYehuda Sadeh 	}
2115dfc5606dSYehuda Sadeh 
2116dfc5606dSYehuda Sadeh 	return 0;
2117dfc5606dSYehuda Sadeh }
2118dfc5606dSYehuda Sadeh 
2119dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2120dfc5606dSYehuda Sadeh {
2121f0f8cef5SAlex Elder 	int ret;
2122dfc5606dSYehuda Sadeh 	struct device *dev;
2123dfc5606dSYehuda Sadeh 	struct rbd_snap *snap;
2124dfc5606dSYehuda Sadeh 
2125dfc5606dSYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2126dfc5606dSYehuda Sadeh 	dev = &rbd_dev->dev;
2127dfc5606dSYehuda Sadeh 
2128dfc5606dSYehuda Sadeh 	dev->bus = &rbd_bus_type;
2129dfc5606dSYehuda Sadeh 	dev->type = &rbd_device_type;
2130dfc5606dSYehuda Sadeh 	dev->parent = &rbd_root_dev;
2131dfc5606dSYehuda Sadeh 	dev->release = rbd_dev_release;
2132dfc5606dSYehuda Sadeh 	dev_set_name(dev, "%d", rbd_dev->id);
2133dfc5606dSYehuda Sadeh 	ret = device_register(dev);
2134dfc5606dSYehuda Sadeh 	if (ret < 0)
2135f0f8cef5SAlex Elder 		goto out;
2136dfc5606dSYehuda Sadeh 
2137dfc5606dSYehuda Sadeh 	list_for_each_entry(snap, &rbd_dev->snaps, node) {
2138dfc5606dSYehuda Sadeh 		ret = rbd_register_snap_dev(rbd_dev, snap,
2139dfc5606dSYehuda Sadeh 					     &rbd_dev->dev);
2140dfc5606dSYehuda Sadeh 		if (ret < 0)
2141602adf40SYehuda Sadeh 			break;
2142602adf40SYehuda Sadeh 	}
2143f0f8cef5SAlex Elder out:
2144dfc5606dSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
2145dfc5606dSYehuda Sadeh 	return ret;
2146602adf40SYehuda Sadeh }
2147602adf40SYehuda Sadeh 
2148dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
2149dfc5606dSYehuda Sadeh {
2150dfc5606dSYehuda Sadeh 	device_unregister(&rbd_dev->dev);
2151dfc5606dSYehuda Sadeh }
2152dfc5606dSYehuda Sadeh 
215359c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
215459c2be1eSYehuda Sadeh {
215559c2be1eSYehuda Sadeh 	int ret, rc;
215659c2be1eSYehuda Sadeh 
215759c2be1eSYehuda Sadeh 	do {
215859c2be1eSYehuda Sadeh 		ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
215959c2be1eSYehuda Sadeh 					 rbd_dev->header.obj_version);
216059c2be1eSYehuda Sadeh 		if (ret == -ERANGE) {
216159c2be1eSYehuda Sadeh 			mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2162263c6ca0SJosh Durgin 			rc = __rbd_refresh_header(rbd_dev);
216359c2be1eSYehuda Sadeh 			mutex_unlock(&ctl_mutex);
216459c2be1eSYehuda Sadeh 			if (rc < 0)
216559c2be1eSYehuda Sadeh 				return rc;
216659c2be1eSYehuda Sadeh 		}
216759c2be1eSYehuda Sadeh 	} while (ret == -ERANGE);
216859c2be1eSYehuda Sadeh 
216959c2be1eSYehuda Sadeh 	return ret;
217059c2be1eSYehuda Sadeh }
217159c2be1eSYehuda Sadeh 
21721ddbe94eSAlex Elder static atomic64_t rbd_id_max = ATOMIC64_INIT(0);
21731ddbe94eSAlex Elder 
21741ddbe94eSAlex Elder /*
2175499afd5bSAlex Elder  * Get a unique rbd identifier for the given new rbd_dev, and add
2176499afd5bSAlex Elder  * the rbd_dev to the global list.  The minimum rbd id is 1.
21771ddbe94eSAlex Elder  */
2178499afd5bSAlex Elder static void rbd_id_get(struct rbd_device *rbd_dev)
2179b7f23c36SAlex Elder {
2180499afd5bSAlex Elder 	rbd_dev->id = atomic64_inc_return(&rbd_id_max);
2181499afd5bSAlex Elder 
2182499afd5bSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2183499afd5bSAlex Elder 	list_add_tail(&rbd_dev->node, &rbd_dev_list);
2184499afd5bSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
2185b7f23c36SAlex Elder }
2186b7f23c36SAlex Elder 
21871ddbe94eSAlex Elder /*
2188499afd5bSAlex Elder  * Remove an rbd_dev from the global list, and record that its
2189499afd5bSAlex Elder  * identifier is no longer in use.
21901ddbe94eSAlex Elder  */
2191499afd5bSAlex Elder static void rbd_id_put(struct rbd_device *rbd_dev)
21921ddbe94eSAlex Elder {
2193d184f6bfSAlex Elder 	struct list_head *tmp;
2194d184f6bfSAlex Elder 	int rbd_id = rbd_dev->id;
2195d184f6bfSAlex Elder 	int max_id;
2196d184f6bfSAlex Elder 
2197d184f6bfSAlex Elder 	BUG_ON(rbd_id < 1);
2198499afd5bSAlex Elder 
2199499afd5bSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2200499afd5bSAlex Elder 	list_del_init(&rbd_dev->node);
2201d184f6bfSAlex Elder 
2202d184f6bfSAlex Elder 	/*
2203d184f6bfSAlex Elder 	 * If the id being "put" is not the current maximum, there
2204d184f6bfSAlex Elder 	 * is nothing special we need to do.
2205d184f6bfSAlex Elder 	 */
2206d184f6bfSAlex Elder 	if (rbd_id != atomic64_read(&rbd_id_max)) {
2207d184f6bfSAlex Elder 		spin_unlock(&rbd_dev_list_lock);
2208d184f6bfSAlex Elder 		return;
2209d184f6bfSAlex Elder 	}
2210d184f6bfSAlex Elder 
2211d184f6bfSAlex Elder 	/*
2212d184f6bfSAlex Elder 	 * We need to update the current maximum id.  Search the
2213d184f6bfSAlex Elder 	 * list to find out what it is.  We're more likely to find
2214d184f6bfSAlex Elder 	 * the maximum at the end, so search the list backward.
2215d184f6bfSAlex Elder 	 */
2216d184f6bfSAlex Elder 	max_id = 0;
2217d184f6bfSAlex Elder 	list_for_each_prev(tmp, &rbd_dev_list) {
2218d184f6bfSAlex Elder 		struct rbd_device *rbd_dev;
2219d184f6bfSAlex Elder 
2220d184f6bfSAlex Elder 		rbd_dev = list_entry(tmp, struct rbd_device, node);
2221d184f6bfSAlex Elder 		if (rbd_id > max_id)
2222d184f6bfSAlex Elder 			max_id = rbd_id;
2223d184f6bfSAlex Elder 	}
2224499afd5bSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
22251ddbe94eSAlex Elder 
22261ddbe94eSAlex Elder 	/*
2227d184f6bfSAlex Elder 	 * The max id could have been updated by rbd_id_get(), in
2228d184f6bfSAlex Elder 	 * which case it now accurately reflects the new maximum.
2229d184f6bfSAlex Elder 	 * Be careful not to overwrite the maximum value in that
2230d184f6bfSAlex Elder 	 * case.
22311ddbe94eSAlex Elder 	 */
2232d184f6bfSAlex Elder 	atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id);
2233b7f23c36SAlex Elder }
2234b7f23c36SAlex Elder 
2235a725f65eSAlex Elder /*
2236e28fff26SAlex Elder  * Skips over white space at *buf, and updates *buf to point to the
2237e28fff26SAlex Elder  * first found non-space character (if any). Returns the length of
2238593a9e7bSAlex Elder  * the token (string of non-white space characters) found.  Note
2239593a9e7bSAlex Elder  * that *buf must be terminated with '\0'.
2240e28fff26SAlex Elder  */
2241e28fff26SAlex Elder static inline size_t next_token(const char **buf)
2242e28fff26SAlex Elder {
2243e28fff26SAlex Elder         /*
2244e28fff26SAlex Elder         * These are the characters that produce nonzero for
2245e28fff26SAlex Elder         * isspace() in the "C" and "POSIX" locales.
2246e28fff26SAlex Elder         */
2247e28fff26SAlex Elder         const char *spaces = " \f\n\r\t\v";
2248e28fff26SAlex Elder 
2249e28fff26SAlex Elder         *buf += strspn(*buf, spaces);	/* Find start of token */
2250e28fff26SAlex Elder 
2251e28fff26SAlex Elder 	return strcspn(*buf, spaces);   /* Return token length */
2252e28fff26SAlex Elder }
2253e28fff26SAlex Elder 
2254e28fff26SAlex Elder /*
2255e28fff26SAlex Elder  * Finds the next token in *buf, and if the provided token buffer is
2256e28fff26SAlex Elder  * big enough, copies the found token into it.  The result, if
2257593a9e7bSAlex Elder  * copied, is guaranteed to be terminated with '\0'.  Note that *buf
2258593a9e7bSAlex Elder  * must be terminated with '\0' on entry.
2259e28fff26SAlex Elder  *
2260e28fff26SAlex Elder  * Returns the length of the token found (not including the '\0').
2261e28fff26SAlex Elder  * Return value will be 0 if no token is found, and it will be >=
2262e28fff26SAlex Elder  * token_size if the token would not fit.
2263e28fff26SAlex Elder  *
2264593a9e7bSAlex Elder  * The *buf pointer will be updated to point beyond the end of the
2265e28fff26SAlex Elder  * found token.  Note that this occurs even if the token buffer is
2266e28fff26SAlex Elder  * too small to hold it.
2267e28fff26SAlex Elder  */
2268e28fff26SAlex Elder static inline size_t copy_token(const char **buf,
2269e28fff26SAlex Elder 				char *token,
2270e28fff26SAlex Elder 				size_t token_size)
2271e28fff26SAlex Elder {
2272e28fff26SAlex Elder         size_t len;
2273e28fff26SAlex Elder 
2274e28fff26SAlex Elder 	len = next_token(buf);
2275e28fff26SAlex Elder 	if (len < token_size) {
2276e28fff26SAlex Elder 		memcpy(token, *buf, len);
2277e28fff26SAlex Elder 		*(token + len) = '\0';
2278e28fff26SAlex Elder 	}
2279e28fff26SAlex Elder 	*buf += len;
2280e28fff26SAlex Elder 
2281e28fff26SAlex Elder         return len;
2282e28fff26SAlex Elder }
2283e28fff26SAlex Elder 
2284e28fff26SAlex Elder /*
2285a725f65eSAlex Elder  * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
2286a725f65eSAlex Elder  * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
2287a725f65eSAlex Elder  * on the list of monitor addresses and other options provided via
2288a725f65eSAlex Elder  * /sys/bus/rbd/add.
2289a725f65eSAlex Elder  */
2290a725f65eSAlex Elder static int rbd_add_parse_args(struct rbd_device *rbd_dev,
2291a725f65eSAlex Elder 			      const char *buf,
22927ef3214aSAlex Elder 			      const char **mon_addrs,
22935214ecc4SAlex Elder 			      size_t *mon_addrs_size,
2294e28fff26SAlex Elder 			      char *options,
2295e28fff26SAlex Elder 			      size_t options_size)
2296a725f65eSAlex Elder {
2297e28fff26SAlex Elder 	size_t	len;
2298e28fff26SAlex Elder 
2299e28fff26SAlex Elder 	/* The first four tokens are required */
2300e28fff26SAlex Elder 
23017ef3214aSAlex Elder 	len = next_token(&buf);
23027ef3214aSAlex Elder 	if (!len)
2303a725f65eSAlex Elder 		return -EINVAL;
23045214ecc4SAlex Elder 	*mon_addrs_size = len + 1;
23057ef3214aSAlex Elder 	*mon_addrs = buf;
23067ef3214aSAlex Elder 
23077ef3214aSAlex Elder 	buf += len;
2308a725f65eSAlex Elder 
2309e28fff26SAlex Elder 	len = copy_token(&buf, options, options_size);
2310e28fff26SAlex Elder 	if (!len || len >= options_size)
2311e28fff26SAlex Elder 		return -EINVAL;
2312a725f65eSAlex Elder 
2313e28fff26SAlex Elder 	len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
2314e28fff26SAlex Elder 	if (!len || len >= sizeof (rbd_dev->pool_name))
2315e28fff26SAlex Elder 		return -EINVAL;
2316e28fff26SAlex Elder 
2317e28fff26SAlex Elder 	len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
2318e28fff26SAlex Elder 	if (!len || len >= sizeof (rbd_dev->obj))
2319e28fff26SAlex Elder 		return -EINVAL;
2320e28fff26SAlex Elder 
2321e28fff26SAlex Elder 	/* We have the object length in hand, save it. */
2322e28fff26SAlex Elder 
2323e28fff26SAlex Elder 	rbd_dev->obj_len = len;
2324e28fff26SAlex Elder 
232581a89793SAlex Elder 	BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN
232681a89793SAlex Elder 				< RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX));
232781a89793SAlex Elder 	sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX);
2328a725f65eSAlex Elder 
2329e28fff26SAlex Elder 	/*
2330e28fff26SAlex Elder 	 * The snapshot name is optional, but it's an error if it's
2331e28fff26SAlex Elder 	 * too long.  If no snapshot is supplied, fill in the default.
2332e28fff26SAlex Elder 	 */
2333e28fff26SAlex Elder 	len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name));
2334e28fff26SAlex Elder 	if (!len)
2335e28fff26SAlex Elder 		memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
2336e28fff26SAlex Elder 			sizeof (RBD_SNAP_HEAD_NAME));
2337e28fff26SAlex Elder 	else if (len >= sizeof (rbd_dev->snap_name))
2338e28fff26SAlex Elder 		return -EINVAL;
2339e28fff26SAlex Elder 
2340a725f65eSAlex Elder 	return 0;
2341a725f65eSAlex Elder }
2342a725f65eSAlex Elder 
234359c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus,
234459c2be1eSYehuda Sadeh 		       const char *buf,
234559c2be1eSYehuda Sadeh 		       size_t count)
2346602adf40SYehuda Sadeh {
2347602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev;
23487ef3214aSAlex Elder 	const char *mon_addrs = NULL;
23497ef3214aSAlex Elder 	size_t mon_addrs_size = 0;
235027cc2594SAlex Elder 	char *options = NULL;
235127cc2594SAlex Elder 	struct ceph_osd_client *osdc;
235227cc2594SAlex Elder 	int rc = -ENOMEM;
2353602adf40SYehuda Sadeh 
2354602adf40SYehuda Sadeh 	if (!try_module_get(THIS_MODULE))
2355602adf40SYehuda Sadeh 		return -ENODEV;
2356602adf40SYehuda Sadeh 
2357602adf40SYehuda Sadeh 	rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
2358602adf40SYehuda Sadeh 	if (!rbd_dev)
235927cc2594SAlex Elder 		goto err_nomem;
236027cc2594SAlex Elder 	options = kmalloc(count, GFP_KERNEL);
236127cc2594SAlex Elder 	if (!options)
236227cc2594SAlex Elder 		goto err_nomem;
2363602adf40SYehuda Sadeh 
2364602adf40SYehuda Sadeh 	/* static rbd_device initialization */
2365602adf40SYehuda Sadeh 	spin_lock_init(&rbd_dev->lock);
2366602adf40SYehuda Sadeh 	INIT_LIST_HEAD(&rbd_dev->node);
2367dfc5606dSYehuda Sadeh 	INIT_LIST_HEAD(&rbd_dev->snaps);
2368c666601aSJosh Durgin 	init_rwsem(&rbd_dev->header_rwsem);
2369602adf40SYehuda Sadeh 
2370c666601aSJosh Durgin 	init_rwsem(&rbd_dev->header_rwsem);
23710e805a1dSAlex Elder 
2372d184f6bfSAlex Elder 	/* generate unique id: find highest unique id, add one */
2373499afd5bSAlex Elder 	rbd_id_get(rbd_dev);
2374602adf40SYehuda Sadeh 
2375a725f65eSAlex Elder 	/* Fill in the device name, now that we have its id. */
237681a89793SAlex Elder 	BUILD_BUG_ON(DEV_NAME_LEN
237781a89793SAlex Elder 			< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
237881a89793SAlex Elder 	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
2379e124a82fSAlex Elder 
2380a725f65eSAlex Elder 	/* parse add command */
23817ef3214aSAlex Elder 	rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size,
2382e28fff26SAlex Elder 				options, count);
2383a725f65eSAlex Elder 	if (rc)
2384a725f65eSAlex Elder 		goto err_put_id;
2385a725f65eSAlex Elder 
23865214ecc4SAlex Elder 	rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
23875214ecc4SAlex Elder 						options);
2388d720bcb0SAlex Elder 	if (IS_ERR(rbd_dev->rbd_client)) {
2389d720bcb0SAlex Elder 		rc = PTR_ERR(rbd_dev->rbd_client);
2390f0f8cef5SAlex Elder 		goto err_put_id;
2391d720bcb0SAlex Elder 	}
2392602adf40SYehuda Sadeh 
2393602adf40SYehuda Sadeh 	/* pick the pool */
23941dbb4399SAlex Elder 	osdc = &rbd_dev->rbd_client->client->osdc;
2395602adf40SYehuda Sadeh 	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
2396602adf40SYehuda Sadeh 	if (rc < 0)
2397602adf40SYehuda Sadeh 		goto err_out_client;
2398602adf40SYehuda Sadeh 	rbd_dev->poolid = rc;
2399602adf40SYehuda Sadeh 
2400602adf40SYehuda Sadeh 	/* register our block device */
240127cc2594SAlex Elder 	rc = register_blkdev(0, rbd_dev->name);
240227cc2594SAlex Elder 	if (rc < 0)
2403602adf40SYehuda Sadeh 		goto err_out_client;
240427cc2594SAlex Elder 	rbd_dev->major = rc;
2405602adf40SYehuda Sadeh 
2406dfc5606dSYehuda Sadeh 	rc = rbd_bus_add_dev(rbd_dev);
2407dfc5606dSYehuda Sadeh 	if (rc)
2408766fc439SYehuda Sadeh 		goto err_out_blkdev;
2409766fc439SYehuda Sadeh 
241032eec68dSAlex Elder 	/*
241132eec68dSAlex Elder 	 * At this point cleanup in the event of an error is the job
241232eec68dSAlex Elder 	 * of the sysfs code (initiated by rbd_bus_del_dev()).
241332eec68dSAlex Elder 	 *
241432eec68dSAlex Elder 	 * Set up and announce blkdev mapping.
241532eec68dSAlex Elder 	 */
2416602adf40SYehuda Sadeh 	rc = rbd_init_disk(rbd_dev);
2417602adf40SYehuda Sadeh 	if (rc)
2418766fc439SYehuda Sadeh 		goto err_out_bus;
2419602adf40SYehuda Sadeh 
242059c2be1eSYehuda Sadeh 	rc = rbd_init_watch_dev(rbd_dev);
242159c2be1eSYehuda Sadeh 	if (rc)
242259c2be1eSYehuda Sadeh 		goto err_out_bus;
242359c2be1eSYehuda Sadeh 
2424602adf40SYehuda Sadeh 	return count;
2425602adf40SYehuda Sadeh 
2426766fc439SYehuda Sadeh err_out_bus:
2427766fc439SYehuda Sadeh 	/* this will also clean up rest of rbd_dev stuff */
2428766fc439SYehuda Sadeh 
2429766fc439SYehuda Sadeh 	rbd_bus_del_dev(rbd_dev);
2430766fc439SYehuda Sadeh 	kfree(options);
2431766fc439SYehuda Sadeh 	return rc;
2432766fc439SYehuda Sadeh 
2433602adf40SYehuda Sadeh err_out_blkdev:
2434602adf40SYehuda Sadeh 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
2435602adf40SYehuda Sadeh err_out_client:
2436602adf40SYehuda Sadeh 	rbd_put_client(rbd_dev);
2437f0f8cef5SAlex Elder err_put_id:
2438499afd5bSAlex Elder 	rbd_id_put(rbd_dev);
243927cc2594SAlex Elder err_nomem:
2440602adf40SYehuda Sadeh 	kfree(options);
244127cc2594SAlex Elder 	kfree(rbd_dev);
244227cc2594SAlex Elder 
2443602adf40SYehuda Sadeh 	dout("Error adding device %s\n", buf);
2444602adf40SYehuda Sadeh 	module_put(THIS_MODULE);
244527cc2594SAlex Elder 
244627cc2594SAlex Elder 	return (ssize_t) rc;
2447602adf40SYehuda Sadeh }
2448602adf40SYehuda Sadeh 
2449602adf40SYehuda Sadeh static struct rbd_device *__rbd_get_dev(unsigned long id)
2450602adf40SYehuda Sadeh {
2451602adf40SYehuda Sadeh 	struct list_head *tmp;
2452602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev;
2453602adf40SYehuda Sadeh 
2454e124a82fSAlex Elder 	spin_lock(&rbd_dev_list_lock);
2455602adf40SYehuda Sadeh 	list_for_each(tmp, &rbd_dev_list) {
2456602adf40SYehuda Sadeh 		rbd_dev = list_entry(tmp, struct rbd_device, node);
2457e124a82fSAlex Elder 		if (rbd_dev->id == id) {
2458e124a82fSAlex Elder 			spin_unlock(&rbd_dev_list_lock);
2459602adf40SYehuda Sadeh 			return rbd_dev;
2460602adf40SYehuda Sadeh 		}
2461e124a82fSAlex Elder 	}
2462e124a82fSAlex Elder 	spin_unlock(&rbd_dev_list_lock);
2463602adf40SYehuda Sadeh 	return NULL;
2464602adf40SYehuda Sadeh }
2465602adf40SYehuda Sadeh 
2466dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev)
2467602adf40SYehuda Sadeh {
2468593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2469602adf40SYehuda Sadeh 
24701dbb4399SAlex Elder 	if (rbd_dev->watch_request) {
24711dbb4399SAlex Elder 		struct ceph_client *client = rbd_dev->rbd_client->client;
24721dbb4399SAlex Elder 
24731dbb4399SAlex Elder 		ceph_osdc_unregister_linger_request(&client->osdc,
247459c2be1eSYehuda Sadeh 						    rbd_dev->watch_request);
24751dbb4399SAlex Elder 	}
247659c2be1eSYehuda Sadeh 	if (rbd_dev->watch_event)
247779e3057cSYehuda Sadeh 		rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
247859c2be1eSYehuda Sadeh 
2479602adf40SYehuda Sadeh 	rbd_put_client(rbd_dev);
2480602adf40SYehuda Sadeh 
2481602adf40SYehuda Sadeh 	/* clean up and free blkdev */
2482602adf40SYehuda Sadeh 	rbd_free_disk(rbd_dev);
2483602adf40SYehuda Sadeh 	unregister_blkdev(rbd_dev->major, rbd_dev->name);
248432eec68dSAlex Elder 
248532eec68dSAlex Elder 	/* done with the id, and with the rbd_dev */
248632eec68dSAlex Elder 	rbd_id_put(rbd_dev);
2487602adf40SYehuda Sadeh 	kfree(rbd_dev);
2488602adf40SYehuda Sadeh 
2489602adf40SYehuda Sadeh 	/* release module ref */
2490602adf40SYehuda Sadeh 	module_put(THIS_MODULE);
2491602adf40SYehuda Sadeh }
2492602adf40SYehuda Sadeh 
2493dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus,
2494602adf40SYehuda Sadeh 			  const char *buf,
2495602adf40SYehuda Sadeh 			  size_t count)
2496602adf40SYehuda Sadeh {
2497602adf40SYehuda Sadeh 	struct rbd_device *rbd_dev = NULL;
2498602adf40SYehuda Sadeh 	int target_id, rc;
2499602adf40SYehuda Sadeh 	unsigned long ul;
2500602adf40SYehuda Sadeh 	int ret = count;
2501602adf40SYehuda Sadeh 
2502602adf40SYehuda Sadeh 	rc = strict_strtoul(buf, 10, &ul);
2503602adf40SYehuda Sadeh 	if (rc)
2504602adf40SYehuda Sadeh 		return rc;
2505602adf40SYehuda Sadeh 
2506602adf40SYehuda Sadeh 	/* convert to int; abort if we lost anything in the conversion */
2507602adf40SYehuda Sadeh 	target_id = (int) ul;
2508602adf40SYehuda Sadeh 	if (target_id != ul)
2509602adf40SYehuda Sadeh 		return -EINVAL;
2510602adf40SYehuda Sadeh 
2511602adf40SYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2512602adf40SYehuda Sadeh 
2513602adf40SYehuda Sadeh 	rbd_dev = __rbd_get_dev(target_id);
2514602adf40SYehuda Sadeh 	if (!rbd_dev) {
2515602adf40SYehuda Sadeh 		ret = -ENOENT;
2516602adf40SYehuda Sadeh 		goto done;
2517602adf40SYehuda Sadeh 	}
2518602adf40SYehuda Sadeh 
2519dfc5606dSYehuda Sadeh 	__rbd_remove_all_snaps(rbd_dev);
2520dfc5606dSYehuda Sadeh 	rbd_bus_del_dev(rbd_dev);
2521602adf40SYehuda Sadeh 
2522602adf40SYehuda Sadeh done:
2523602adf40SYehuda Sadeh 	mutex_unlock(&ctl_mutex);
2524602adf40SYehuda Sadeh 	return ret;
2525602adf40SYehuda Sadeh }
2526602adf40SYehuda Sadeh 
2527dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev,
2528dfc5606dSYehuda Sadeh 			    struct device_attribute *attr,
2529602adf40SYehuda Sadeh 			    const char *buf,
2530602adf40SYehuda Sadeh 			    size_t count)
2531602adf40SYehuda Sadeh {
2532593a9e7bSAlex Elder 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2533dfc5606dSYehuda Sadeh 	int ret;
2534dfc5606dSYehuda Sadeh 	char *name = kmalloc(count + 1, GFP_KERNEL);
2535602adf40SYehuda Sadeh 	if (!name)
2536602adf40SYehuda Sadeh 		return -ENOMEM;
2537602adf40SYehuda Sadeh 
2538dfc5606dSYehuda Sadeh 	snprintf(name, count, "%s", buf);
2539602adf40SYehuda Sadeh 
2540602adf40SYehuda Sadeh 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2541602adf40SYehuda Sadeh 
2542602adf40SYehuda Sadeh 	ret = rbd_header_add_snap(rbd_dev,
2543602adf40SYehuda Sadeh 				  name, GFP_KERNEL);
2544602adf40SYehuda Sadeh 	if (ret < 0)
254559c2be1eSYehuda Sadeh 		goto err_unlock;
2546602adf40SYehuda Sadeh 
2547263c6ca0SJosh Durgin 	ret = __rbd_refresh_header(rbd_dev);
2548602adf40SYehuda Sadeh 	if (ret < 0)
254959c2be1eSYehuda Sadeh 		goto err_unlock;
255059c2be1eSYehuda Sadeh 
255159c2be1eSYehuda Sadeh 	/* shouldn't hold ctl_mutex when notifying.. notify might
255259c2be1eSYehuda Sadeh 	   trigger a watch callback that would need to get that mutex */
255359c2be1eSYehuda Sadeh 	mutex_unlock(&ctl_mutex);
255459c2be1eSYehuda Sadeh 
255559c2be1eSYehuda Sadeh 	/* make a best effort, don't error if failed */
255659c2be1eSYehuda Sadeh 	rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
2557602adf40SYehuda Sadeh 
2558602adf40SYehuda Sadeh 	ret = count;
255959c2be1eSYehuda Sadeh 	kfree(name);
256059c2be1eSYehuda Sadeh 	return ret;
256159c2be1eSYehuda Sadeh 
256259c2be1eSYehuda Sadeh err_unlock:
2563602adf40SYehuda Sadeh 	mutex_unlock(&ctl_mutex);
2564602adf40SYehuda Sadeh 	kfree(name);
2565602adf40SYehuda Sadeh 	return ret;
2566602adf40SYehuda Sadeh }
2567602adf40SYehuda Sadeh 
2568602adf40SYehuda Sadeh /*
2569602adf40SYehuda Sadeh  * create control files in sysfs
2570dfc5606dSYehuda Sadeh  * /sys/bus/rbd/...
2571602adf40SYehuda Sadeh  */
2572602adf40SYehuda Sadeh static int rbd_sysfs_init(void)
2573602adf40SYehuda Sadeh {
2574dfc5606dSYehuda Sadeh 	int ret;
2575602adf40SYehuda Sadeh 
2576fed4c143SAlex Elder 	ret = device_register(&rbd_root_dev);
2577dfc5606dSYehuda Sadeh 	if (ret < 0)
2578dfc5606dSYehuda Sadeh 		return ret;
2579602adf40SYehuda Sadeh 
2580fed4c143SAlex Elder 	ret = bus_register(&rbd_bus_type);
2581fed4c143SAlex Elder 	if (ret < 0)
2582fed4c143SAlex Elder 		device_unregister(&rbd_root_dev);
2583602adf40SYehuda Sadeh 
2584602adf40SYehuda Sadeh 	return ret;
2585602adf40SYehuda Sadeh }
2586602adf40SYehuda Sadeh 
2587602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void)
2588602adf40SYehuda Sadeh {
2589dfc5606dSYehuda Sadeh 	bus_unregister(&rbd_bus_type);
2590fed4c143SAlex Elder 	device_unregister(&rbd_root_dev);
2591602adf40SYehuda Sadeh }
2592602adf40SYehuda Sadeh 
2593602adf40SYehuda Sadeh int __init rbd_init(void)
2594602adf40SYehuda Sadeh {
2595602adf40SYehuda Sadeh 	int rc;
2596602adf40SYehuda Sadeh 
2597602adf40SYehuda Sadeh 	rc = rbd_sysfs_init();
2598602adf40SYehuda Sadeh 	if (rc)
2599602adf40SYehuda Sadeh 		return rc;
2600f0f8cef5SAlex Elder 	pr_info("loaded " RBD_DRV_NAME_LONG "\n");
2601602adf40SYehuda Sadeh 	return 0;
2602602adf40SYehuda Sadeh }
2603602adf40SYehuda Sadeh 
2604602adf40SYehuda Sadeh void __exit rbd_exit(void)
2605602adf40SYehuda Sadeh {
2606602adf40SYehuda Sadeh 	rbd_sysfs_cleanup();
2607602adf40SYehuda Sadeh }
2608602adf40SYehuda Sadeh 
2609602adf40SYehuda Sadeh module_init(rbd_init);
2610602adf40SYehuda Sadeh module_exit(rbd_exit);
2611602adf40SYehuda Sadeh 
2612602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
2613602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
2614602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device");
2615602adf40SYehuda Sadeh 
2616602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */
2617602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
2618602adf40SYehuda Sadeh 
2619602adf40SYehuda Sadeh MODULE_LICENSE("GPL");
2620