1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44aafb230eSAlex Elder #define RBD_DEBUG /* Activate rbd_assert() calls */ 45aafb230eSAlex Elder 46593a9e7bSAlex Elder /* 47593a9e7bSAlex Elder * The basic unit of block I/O is a sector. It is interpreted in a 48593a9e7bSAlex Elder * number of contexts in Linux (blk, bio, genhd), but the default is 49593a9e7bSAlex Elder * universally 512 bytes. These symbols are just slightly more 50593a9e7bSAlex Elder * meaningful than the bare numbers they represent. 51593a9e7bSAlex Elder */ 52593a9e7bSAlex Elder #define SECTOR_SHIFT 9 53593a9e7bSAlex Elder #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 54593a9e7bSAlex Elder 55df111be6SAlex Elder /* It might be useful to have this defined elsewhere too */ 56df111be6SAlex Elder 57df111be6SAlex Elder #define U64_MAX ((u64) (~0ULL)) 58df111be6SAlex Elder 59f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd" 60f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)" 61602adf40SYehuda Sadeh 62602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 63602adf40SYehuda Sadeh 64602adf40SYehuda Sadeh #define RBD_MAX_SNAP_NAME_LEN 32 65602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN 1024 66602adf40SYehuda Sadeh 67602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 68602adf40SYehuda Sadeh 6981a89793SAlex Elder /* 7081a89793SAlex Elder * An RBD device name will be "rbd#", where the "rbd" comes from 7181a89793SAlex Elder * RBD_DRV_NAME above, and # is a unique integer identifier. 7281a89793SAlex Elder * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big 7381a89793SAlex Elder * enough to hold all possible device names. 7481a89793SAlex Elder */ 75602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 7681a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 77602adf40SYehuda Sadeh 78cc0538b6SAlex Elder #define RBD_READ_ONLY_DEFAULT false 7959c2be1eSYehuda Sadeh 80602adf40SYehuda Sadeh /* 81602adf40SYehuda Sadeh * block device image metadata (in-memory version) 82602adf40SYehuda Sadeh */ 83602adf40SYehuda Sadeh struct rbd_image_header { 84f84344f3SAlex Elder /* These four fields never change for a given rbd image */ 85849b4260SAlex Elder char *object_prefix; 86602adf40SYehuda Sadeh __u8 obj_order; 87602adf40SYehuda Sadeh __u8 crypt_type; 88602adf40SYehuda Sadeh __u8 comp_type; 89602adf40SYehuda Sadeh 90f84344f3SAlex Elder /* The remaining fields need to be updated occasionally */ 91f84344f3SAlex Elder u64 image_size; 92f84344f3SAlex Elder struct ceph_snap_context *snapc; 93602adf40SYehuda Sadeh char *snap_names; 94602adf40SYehuda Sadeh u64 *snap_sizes; 9559c2be1eSYehuda Sadeh 9659c2be1eSYehuda Sadeh u64 obj_version; 9759c2be1eSYehuda Sadeh }; 9859c2be1eSYehuda Sadeh 9959c2be1eSYehuda Sadeh struct rbd_options { 100cc0538b6SAlex Elder bool read_only; 101602adf40SYehuda Sadeh }; 102602adf40SYehuda Sadeh 103602adf40SYehuda Sadeh /* 104f0f8cef5SAlex Elder * an instance of the client. multiple devices may share an rbd client. 105602adf40SYehuda Sadeh */ 106602adf40SYehuda Sadeh struct rbd_client { 107602adf40SYehuda Sadeh struct ceph_client *client; 108602adf40SYehuda Sadeh struct kref kref; 109602adf40SYehuda Sadeh struct list_head node; 110602adf40SYehuda Sadeh }; 111602adf40SYehuda Sadeh 112602adf40SYehuda Sadeh /* 113f0f8cef5SAlex Elder * a request completion status 114602adf40SYehuda Sadeh */ 1151fec7093SYehuda Sadeh struct rbd_req_status { 1161fec7093SYehuda Sadeh int done; 1171fec7093SYehuda Sadeh int rc; 1181fec7093SYehuda Sadeh u64 bytes; 1191fec7093SYehuda Sadeh }; 1201fec7093SYehuda Sadeh 1211fec7093SYehuda Sadeh /* 1221fec7093SYehuda Sadeh * a collection of requests 1231fec7093SYehuda Sadeh */ 1241fec7093SYehuda Sadeh struct rbd_req_coll { 1251fec7093SYehuda Sadeh int total; 1261fec7093SYehuda Sadeh int num_done; 1271fec7093SYehuda Sadeh struct kref kref; 1281fec7093SYehuda Sadeh struct rbd_req_status status[0]; 129602adf40SYehuda Sadeh }; 130602adf40SYehuda Sadeh 131f0f8cef5SAlex Elder /* 132f0f8cef5SAlex Elder * a single io request 133f0f8cef5SAlex Elder */ 134f0f8cef5SAlex Elder struct rbd_request { 135f0f8cef5SAlex Elder struct request *rq; /* blk layer request */ 136f0f8cef5SAlex Elder struct bio *bio; /* cloned bio */ 137f0f8cef5SAlex Elder struct page **pages; /* list of used pages */ 138f0f8cef5SAlex Elder u64 len; 139f0f8cef5SAlex Elder int coll_index; 140f0f8cef5SAlex Elder struct rbd_req_coll *coll; 141f0f8cef5SAlex Elder }; 142f0f8cef5SAlex Elder 143dfc5606dSYehuda Sadeh struct rbd_snap { 144dfc5606dSYehuda Sadeh struct device dev; 145dfc5606dSYehuda Sadeh const char *name; 1463591538fSJosh Durgin u64 size; 147dfc5606dSYehuda Sadeh struct list_head node; 148dfc5606dSYehuda Sadeh u64 id; 149dfc5606dSYehuda Sadeh }; 150dfc5606dSYehuda Sadeh 151f84344f3SAlex Elder struct rbd_mapping { 152f84344f3SAlex Elder char *snap_name; 153f84344f3SAlex Elder u64 snap_id; 15499c1f08fSAlex Elder u64 size; 155f84344f3SAlex Elder bool snap_exists; 156f84344f3SAlex Elder bool read_only; 157f84344f3SAlex Elder }; 158f84344f3SAlex Elder 159602adf40SYehuda Sadeh /* 160602adf40SYehuda Sadeh * a single device 161602adf40SYehuda Sadeh */ 162602adf40SYehuda Sadeh struct rbd_device { 163de71a297SAlex Elder int dev_id; /* blkdev unique id */ 164602adf40SYehuda Sadeh 165602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 166602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 167602adf40SYehuda Sadeh 168f8c38929SAlex Elder struct rbd_options rbd_opts; 169602adf40SYehuda Sadeh struct rbd_client *rbd_client; 170602adf40SYehuda Sadeh 171602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 172602adf40SYehuda Sadeh 173602adf40SYehuda Sadeh spinlock_t lock; /* queue lock */ 174602adf40SYehuda Sadeh 175602adf40SYehuda Sadeh struct rbd_image_header header; 1760bed54dcSAlex Elder char *image_name; 1770bed54dcSAlex Elder size_t image_name_len; 1780bed54dcSAlex Elder char *header_name; 179d22f76e7SAlex Elder char *pool_name; 1809bb2f334SAlex Elder int pool_id; 181602adf40SYehuda Sadeh 18259c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 18359c2be1eSYehuda Sadeh struct ceph_osd_request *watch_request; 18459c2be1eSYehuda Sadeh 185c666601aSJosh Durgin /* protects updating the header */ 186c666601aSJosh Durgin struct rw_semaphore header_rwsem; 187f84344f3SAlex Elder 188f84344f3SAlex Elder struct rbd_mapping mapping; 189602adf40SYehuda Sadeh 190602adf40SYehuda Sadeh struct list_head node; 191dfc5606dSYehuda Sadeh 192dfc5606dSYehuda Sadeh /* list of snapshots */ 193dfc5606dSYehuda Sadeh struct list_head snaps; 194dfc5606dSYehuda Sadeh 195dfc5606dSYehuda Sadeh /* sysfs related */ 196dfc5606dSYehuda Sadeh struct device dev; 197dfc5606dSYehuda Sadeh }; 198dfc5606dSYehuda Sadeh 199602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 200e124a82fSAlex Elder 201602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 202e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock); 203e124a82fSAlex Elder 204602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 205432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock); 206602adf40SYehuda Sadeh 2079fcbb800SAlex Elder static int rbd_dev_snap_devs_update(struct rbd_device *rbd_dev); 208dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 209dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 210dfc5606dSYehuda Sadeh struct device_attribute *attr, 211dfc5606dSYehuda Sadeh const char *buf, 212dfc5606dSYehuda Sadeh size_t count); 21314e7085dSAlex Elder static void __rbd_remove_snap_dev(struct rbd_snap *snap); 214dfc5606dSYehuda Sadeh 215f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf, 216f0f8cef5SAlex Elder size_t count); 217f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 218f0f8cef5SAlex Elder size_t count); 219f0f8cef5SAlex Elder 220f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = { 221f0f8cef5SAlex Elder __ATTR(add, S_IWUSR, NULL, rbd_add), 222f0f8cef5SAlex Elder __ATTR(remove, S_IWUSR, NULL, rbd_remove), 223f0f8cef5SAlex Elder __ATTR_NULL 224f0f8cef5SAlex Elder }; 225f0f8cef5SAlex Elder 226f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = { 227f0f8cef5SAlex Elder .name = "rbd", 228f0f8cef5SAlex Elder .bus_attrs = rbd_bus_attrs, 229f0f8cef5SAlex Elder }; 230f0f8cef5SAlex Elder 231f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev) 232f0f8cef5SAlex Elder { 233f0f8cef5SAlex Elder } 234f0f8cef5SAlex Elder 235f0f8cef5SAlex Elder static struct device rbd_root_dev = { 236f0f8cef5SAlex Elder .init_name = "rbd", 237f0f8cef5SAlex Elder .release = rbd_root_dev_release, 238f0f8cef5SAlex Elder }; 239f0f8cef5SAlex Elder 240aafb230eSAlex Elder #ifdef RBD_DEBUG 241aafb230eSAlex Elder #define rbd_assert(expr) \ 242aafb230eSAlex Elder if (unlikely(!(expr))) { \ 243aafb230eSAlex Elder printk(KERN_ERR "\nAssertion failure in %s() " \ 244aafb230eSAlex Elder "at line %d:\n\n" \ 245aafb230eSAlex Elder "\trbd_assert(%s);\n\n", \ 246aafb230eSAlex Elder __func__, __LINE__, #expr); \ 247aafb230eSAlex Elder BUG(); \ 248aafb230eSAlex Elder } 249aafb230eSAlex Elder #else /* !RBD_DEBUG */ 250aafb230eSAlex Elder # define rbd_assert(expr) ((void) 0) 251aafb230eSAlex Elder #endif /* !RBD_DEBUG */ 252dfc5606dSYehuda Sadeh 253dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 254dfc5606dSYehuda Sadeh { 255dfc5606dSYehuda Sadeh return get_device(&rbd_dev->dev); 256dfc5606dSYehuda Sadeh } 257dfc5606dSYehuda Sadeh 258dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev) 259dfc5606dSYehuda Sadeh { 260dfc5606dSYehuda Sadeh put_device(&rbd_dev->dev); 261dfc5606dSYehuda Sadeh } 262602adf40SYehuda Sadeh 2631fe5e993SAlex Elder static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); 26459c2be1eSYehuda Sadeh 265602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 266602adf40SYehuda Sadeh { 267f0f8cef5SAlex Elder struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 268602adf40SYehuda Sadeh 269f84344f3SAlex Elder if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) 270602adf40SYehuda Sadeh return -EROFS; 271602adf40SYehuda Sadeh 272340c7a2bSAlex Elder rbd_get_dev(rbd_dev); 273f84344f3SAlex Elder set_device_ro(bdev, rbd_dev->mapping.read_only); 274340c7a2bSAlex Elder 275602adf40SYehuda Sadeh return 0; 276602adf40SYehuda Sadeh } 277602adf40SYehuda Sadeh 278dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 279dfc5606dSYehuda Sadeh { 280dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 281dfc5606dSYehuda Sadeh 282dfc5606dSYehuda Sadeh rbd_put_dev(rbd_dev); 283dfc5606dSYehuda Sadeh 284dfc5606dSYehuda Sadeh return 0; 285dfc5606dSYehuda Sadeh } 286dfc5606dSYehuda Sadeh 287602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 288602adf40SYehuda Sadeh .owner = THIS_MODULE, 289602adf40SYehuda Sadeh .open = rbd_open, 290dfc5606dSYehuda Sadeh .release = rbd_release, 291602adf40SYehuda Sadeh }; 292602adf40SYehuda Sadeh 293602adf40SYehuda Sadeh /* 294602adf40SYehuda Sadeh * Initialize an rbd client instance. 29543ae4701SAlex Elder * We own *ceph_opts. 296602adf40SYehuda Sadeh */ 297f8c38929SAlex Elder static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 298602adf40SYehuda Sadeh { 299602adf40SYehuda Sadeh struct rbd_client *rbdc; 300602adf40SYehuda Sadeh int ret = -ENOMEM; 301602adf40SYehuda Sadeh 302602adf40SYehuda Sadeh dout("rbd_client_create\n"); 303602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 304602adf40SYehuda Sadeh if (!rbdc) 305602adf40SYehuda Sadeh goto out_opt; 306602adf40SYehuda Sadeh 307602adf40SYehuda Sadeh kref_init(&rbdc->kref); 308602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 309602adf40SYehuda Sadeh 310bc534d86SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 311bc534d86SAlex Elder 31243ae4701SAlex Elder rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); 313602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 314bc534d86SAlex Elder goto out_mutex; 31543ae4701SAlex Elder ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ 316602adf40SYehuda Sadeh 317602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 318602adf40SYehuda Sadeh if (ret < 0) 319602adf40SYehuda Sadeh goto out_err; 320602adf40SYehuda Sadeh 321432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 322602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 323432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 324602adf40SYehuda Sadeh 325bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 326bc534d86SAlex Elder 327602adf40SYehuda Sadeh dout("rbd_client_create created %p\n", rbdc); 328602adf40SYehuda Sadeh return rbdc; 329602adf40SYehuda Sadeh 330602adf40SYehuda Sadeh out_err: 331602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 332bc534d86SAlex Elder out_mutex: 333bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 334602adf40SYehuda Sadeh kfree(rbdc); 335602adf40SYehuda Sadeh out_opt: 33643ae4701SAlex Elder if (ceph_opts) 33743ae4701SAlex Elder ceph_destroy_options(ceph_opts); 33828f259b7SVasiliy Kulikov return ERR_PTR(ret); 339602adf40SYehuda Sadeh } 340602adf40SYehuda Sadeh 341602adf40SYehuda Sadeh /* 3421f7ba331SAlex Elder * Find a ceph client with specific addr and configuration. If 3431f7ba331SAlex Elder * found, bump its reference count. 344602adf40SYehuda Sadeh */ 3451f7ba331SAlex Elder static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) 346602adf40SYehuda Sadeh { 347602adf40SYehuda Sadeh struct rbd_client *client_node; 3481f7ba331SAlex Elder bool found = false; 349602adf40SYehuda Sadeh 35043ae4701SAlex Elder if (ceph_opts->flags & CEPH_OPT_NOSHARE) 351602adf40SYehuda Sadeh return NULL; 352602adf40SYehuda Sadeh 3531f7ba331SAlex Elder spin_lock(&rbd_client_list_lock); 3541f7ba331SAlex Elder list_for_each_entry(client_node, &rbd_client_list, node) { 3551f7ba331SAlex Elder if (!ceph_compare_options(ceph_opts, client_node->client)) { 3561f7ba331SAlex Elder kref_get(&client_node->kref); 3571f7ba331SAlex Elder found = true; 3581f7ba331SAlex Elder break; 3591f7ba331SAlex Elder } 3601f7ba331SAlex Elder } 3611f7ba331SAlex Elder spin_unlock(&rbd_client_list_lock); 3621f7ba331SAlex Elder 3631f7ba331SAlex Elder return found ? client_node : NULL; 364602adf40SYehuda Sadeh } 365602adf40SYehuda Sadeh 366602adf40SYehuda Sadeh /* 36759c2be1eSYehuda Sadeh * mount options 36859c2be1eSYehuda Sadeh */ 36959c2be1eSYehuda Sadeh enum { 37059c2be1eSYehuda Sadeh Opt_last_int, 37159c2be1eSYehuda Sadeh /* int args above */ 37259c2be1eSYehuda Sadeh Opt_last_string, 37359c2be1eSYehuda Sadeh /* string args above */ 374cc0538b6SAlex Elder Opt_read_only, 375cc0538b6SAlex Elder Opt_read_write, 376cc0538b6SAlex Elder /* Boolean args above */ 377cc0538b6SAlex Elder Opt_last_bool, 37859c2be1eSYehuda Sadeh }; 37959c2be1eSYehuda Sadeh 38043ae4701SAlex Elder static match_table_t rbd_opts_tokens = { 38159c2be1eSYehuda Sadeh /* int args above */ 38259c2be1eSYehuda Sadeh /* string args above */ 383f84344f3SAlex Elder {Opt_read_only, "mapping.read_only"}, 384cc0538b6SAlex Elder {Opt_read_only, "ro"}, /* Alternate spelling */ 385cc0538b6SAlex Elder {Opt_read_write, "read_write"}, 386cc0538b6SAlex Elder {Opt_read_write, "rw"}, /* Alternate spelling */ 387cc0538b6SAlex Elder /* Boolean args above */ 38859c2be1eSYehuda Sadeh {-1, NULL} 38959c2be1eSYehuda Sadeh }; 39059c2be1eSYehuda Sadeh 39159c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 39259c2be1eSYehuda Sadeh { 39343ae4701SAlex Elder struct rbd_options *rbd_opts = private; 39459c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 39559c2be1eSYehuda Sadeh int token, intval, ret; 39659c2be1eSYehuda Sadeh 39743ae4701SAlex Elder token = match_token(c, rbd_opts_tokens, argstr); 39859c2be1eSYehuda Sadeh if (token < 0) 39959c2be1eSYehuda Sadeh return -EINVAL; 40059c2be1eSYehuda Sadeh 40159c2be1eSYehuda Sadeh if (token < Opt_last_int) { 40259c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 40359c2be1eSYehuda Sadeh if (ret < 0) { 40459c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 40559c2be1eSYehuda Sadeh "at '%s'\n", c); 40659c2be1eSYehuda Sadeh return ret; 40759c2be1eSYehuda Sadeh } 40859c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 40959c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 41059c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 41159c2be1eSYehuda Sadeh argstr[0].from); 412cc0538b6SAlex Elder } else if (token > Opt_last_string && token < Opt_last_bool) { 413cc0538b6SAlex Elder dout("got Boolean token %d\n", token); 41459c2be1eSYehuda Sadeh } else { 41559c2be1eSYehuda Sadeh dout("got token %d\n", token); 41659c2be1eSYehuda Sadeh } 41759c2be1eSYehuda Sadeh 41859c2be1eSYehuda Sadeh switch (token) { 419cc0538b6SAlex Elder case Opt_read_only: 420cc0538b6SAlex Elder rbd_opts->read_only = true; 421cc0538b6SAlex Elder break; 422cc0538b6SAlex Elder case Opt_read_write: 423cc0538b6SAlex Elder rbd_opts->read_only = false; 424cc0538b6SAlex Elder break; 42559c2be1eSYehuda Sadeh default: 426aafb230eSAlex Elder rbd_assert(false); 427aafb230eSAlex Elder break; 42859c2be1eSYehuda Sadeh } 42959c2be1eSYehuda Sadeh return 0; 43059c2be1eSYehuda Sadeh } 43159c2be1eSYehuda Sadeh 43259c2be1eSYehuda Sadeh /* 433602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 434602adf40SYehuda Sadeh * not exist create it. 435602adf40SYehuda Sadeh */ 436f8c38929SAlex Elder static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, 437f8c38929SAlex Elder size_t mon_addr_len, char *options) 438602adf40SYehuda Sadeh { 439f8c38929SAlex Elder struct rbd_options *rbd_opts = &rbd_dev->rbd_opts; 44043ae4701SAlex Elder struct ceph_options *ceph_opts; 441f8c38929SAlex Elder struct rbd_client *rbdc; 44259c2be1eSYehuda Sadeh 443cc0538b6SAlex Elder rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 444602adf40SYehuda Sadeh 44543ae4701SAlex Elder ceph_opts = ceph_parse_options(options, mon_addr, 4465214ecc4SAlex Elder mon_addr + mon_addr_len, 44721079786SAlex Elder parse_rbd_opts_token, rbd_opts); 448f8c38929SAlex Elder if (IS_ERR(ceph_opts)) 449f8c38929SAlex Elder return PTR_ERR(ceph_opts); 450602adf40SYehuda Sadeh 4511f7ba331SAlex Elder rbdc = rbd_client_find(ceph_opts); 452602adf40SYehuda Sadeh if (rbdc) { 453e6994d3dSAlex Elder /* using an existing client */ 45443ae4701SAlex Elder ceph_destroy_options(ceph_opts); 455f8c38929SAlex Elder } else { 456f8c38929SAlex Elder rbdc = rbd_client_create(ceph_opts); 457d720bcb0SAlex Elder if (IS_ERR(rbdc)) 458f8c38929SAlex Elder return PTR_ERR(rbdc); 459f8c38929SAlex Elder } 460f8c38929SAlex Elder rbd_dev->rbd_client = rbdc; 461d720bcb0SAlex Elder 462f8c38929SAlex Elder return 0; 463602adf40SYehuda Sadeh } 464602adf40SYehuda Sadeh 465602adf40SYehuda Sadeh /* 466602adf40SYehuda Sadeh * Destroy ceph client 467d23a4b3fSAlex Elder * 468432b8587SAlex Elder * Caller must hold rbd_client_list_lock. 469602adf40SYehuda Sadeh */ 470602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 471602adf40SYehuda Sadeh { 472602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 473602adf40SYehuda Sadeh 474602adf40SYehuda Sadeh dout("rbd_release_client %p\n", rbdc); 475cd9d9f5dSAlex Elder spin_lock(&rbd_client_list_lock); 476602adf40SYehuda Sadeh list_del(&rbdc->node); 477cd9d9f5dSAlex Elder spin_unlock(&rbd_client_list_lock); 478602adf40SYehuda Sadeh 479602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 480602adf40SYehuda Sadeh kfree(rbdc); 481602adf40SYehuda Sadeh } 482602adf40SYehuda Sadeh 483602adf40SYehuda Sadeh /* 484602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 485602adf40SYehuda Sadeh * it. 486602adf40SYehuda Sadeh */ 487602adf40SYehuda Sadeh static void rbd_put_client(struct rbd_device *rbd_dev) 488602adf40SYehuda Sadeh { 489602adf40SYehuda Sadeh kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); 490602adf40SYehuda Sadeh rbd_dev->rbd_client = NULL; 491602adf40SYehuda Sadeh } 492602adf40SYehuda Sadeh 4931fec7093SYehuda Sadeh /* 4941fec7093SYehuda Sadeh * Destroy requests collection 4951fec7093SYehuda Sadeh */ 4961fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref) 4971fec7093SYehuda Sadeh { 4981fec7093SYehuda Sadeh struct rbd_req_coll *coll = 4991fec7093SYehuda Sadeh container_of(kref, struct rbd_req_coll, kref); 5001fec7093SYehuda Sadeh 5011fec7093SYehuda Sadeh dout("rbd_coll_release %p\n", coll); 5021fec7093SYehuda Sadeh kfree(coll); 5031fec7093SYehuda Sadeh } 504602adf40SYehuda Sadeh 5058e94af8eSAlex Elder static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 5068e94af8eSAlex Elder { 507103a150fSAlex Elder size_t size; 508103a150fSAlex Elder u32 snap_count; 509103a150fSAlex Elder 510103a150fSAlex Elder /* The header has to start with the magic rbd header text */ 511103a150fSAlex Elder if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) 512103a150fSAlex Elder return false; 513103a150fSAlex Elder 514103a150fSAlex Elder /* 515103a150fSAlex Elder * The size of a snapshot header has to fit in a size_t, and 516103a150fSAlex Elder * that limits the number of snapshots. 517103a150fSAlex Elder */ 518103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 519103a150fSAlex Elder size = SIZE_MAX - sizeof (struct ceph_snap_context); 520103a150fSAlex Elder if (snap_count > size / sizeof (__le64)) 521103a150fSAlex Elder return false; 522103a150fSAlex Elder 523103a150fSAlex Elder /* 524103a150fSAlex Elder * Not only that, but the size of the entire the snapshot 525103a150fSAlex Elder * header must also be representable in a size_t. 526103a150fSAlex Elder */ 527103a150fSAlex Elder size -= snap_count * sizeof (__le64); 528103a150fSAlex Elder if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) 529103a150fSAlex Elder return false; 530103a150fSAlex Elder 531103a150fSAlex Elder return true; 5328e94af8eSAlex Elder } 5338e94af8eSAlex Elder 534602adf40SYehuda Sadeh /* 535602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 536602adf40SYehuda Sadeh * header. 537602adf40SYehuda Sadeh */ 538602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 5394156d998SAlex Elder struct rbd_image_header_ondisk *ondisk) 540602adf40SYehuda Sadeh { 541ccece235SAlex Elder u32 snap_count; 54258c17b0eSAlex Elder size_t len; 543d2bb24e5SAlex Elder size_t size; 544621901d6SAlex Elder u32 i; 545602adf40SYehuda Sadeh 5466a52325fSAlex Elder memset(header, 0, sizeof (*header)); 5476a52325fSAlex Elder 548103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 549103a150fSAlex Elder 55058c17b0eSAlex Elder len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); 55158c17b0eSAlex Elder header->object_prefix = kmalloc(len + 1, GFP_KERNEL); 5526a52325fSAlex Elder if (!header->object_prefix) 553602adf40SYehuda Sadeh return -ENOMEM; 55458c17b0eSAlex Elder memcpy(header->object_prefix, ondisk->object_prefix, len); 55558c17b0eSAlex Elder header->object_prefix[len] = '\0'; 55600f1f36fSAlex Elder 557602adf40SYehuda Sadeh if (snap_count) { 558f785cc1dSAlex Elder u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); 559f785cc1dSAlex Elder 560621901d6SAlex Elder /* Save a copy of the snapshot names */ 561621901d6SAlex Elder 562f785cc1dSAlex Elder if (snap_names_len > (u64) SIZE_MAX) 563f785cc1dSAlex Elder return -EIO; 564f785cc1dSAlex Elder header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); 565602adf40SYehuda Sadeh if (!header->snap_names) 5666a52325fSAlex Elder goto out_err; 567f785cc1dSAlex Elder /* 568f785cc1dSAlex Elder * Note that rbd_dev_v1_header_read() guarantees 569f785cc1dSAlex Elder * the ondisk buffer we're working with has 570f785cc1dSAlex Elder * snap_names_len bytes beyond the end of the 571f785cc1dSAlex Elder * snapshot id array, this memcpy() is safe. 572f785cc1dSAlex Elder */ 573f785cc1dSAlex Elder memcpy(header->snap_names, &ondisk->snaps[snap_count], 574f785cc1dSAlex Elder snap_names_len); 5756a52325fSAlex Elder 576621901d6SAlex Elder /* Record each snapshot's size */ 577621901d6SAlex Elder 578d2bb24e5SAlex Elder size = snap_count * sizeof (*header->snap_sizes); 579d2bb24e5SAlex Elder header->snap_sizes = kmalloc(size, GFP_KERNEL); 580602adf40SYehuda Sadeh if (!header->snap_sizes) 5816a52325fSAlex Elder goto out_err; 582621901d6SAlex Elder for (i = 0; i < snap_count; i++) 583621901d6SAlex Elder header->snap_sizes[i] = 584621901d6SAlex Elder le64_to_cpu(ondisk->snaps[i].image_size); 585602adf40SYehuda Sadeh } else { 586ccece235SAlex Elder WARN_ON(ondisk->snap_names_len); 587602adf40SYehuda Sadeh header->snap_names = NULL; 588602adf40SYehuda Sadeh header->snap_sizes = NULL; 589602adf40SYehuda Sadeh } 590849b4260SAlex Elder 591602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 592602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 593602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 5946a52325fSAlex Elder 595621901d6SAlex Elder /* Allocate and fill in the snapshot context */ 596621901d6SAlex Elder 597f84344f3SAlex Elder header->image_size = le64_to_cpu(ondisk->image_size); 5986a52325fSAlex Elder size = sizeof (struct ceph_snap_context); 5996a52325fSAlex Elder size += snap_count * sizeof (header->snapc->snaps[0]); 6006a52325fSAlex Elder header->snapc = kzalloc(size, GFP_KERNEL); 6016a52325fSAlex Elder if (!header->snapc) 6026a52325fSAlex Elder goto out_err; 603602adf40SYehuda Sadeh 604602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 605505cbb9bSAlex Elder header->snapc->seq = le64_to_cpu(ondisk->snap_seq); 606602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 607621901d6SAlex Elder for (i = 0; i < snap_count; i++) 608602adf40SYehuda Sadeh header->snapc->snaps[i] = 609602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 610602adf40SYehuda Sadeh 611602adf40SYehuda Sadeh return 0; 612602adf40SYehuda Sadeh 6136a52325fSAlex Elder out_err: 614849b4260SAlex Elder kfree(header->snap_sizes); 615ccece235SAlex Elder header->snap_sizes = NULL; 616602adf40SYehuda Sadeh kfree(header->snap_names); 617ccece235SAlex Elder header->snap_names = NULL; 6186a52325fSAlex Elder kfree(header->object_prefix); 6196a52325fSAlex Elder header->object_prefix = NULL; 620ccece235SAlex Elder 62100f1f36fSAlex Elder return -ENOMEM; 622602adf40SYehuda Sadeh } 623602adf40SYehuda Sadeh 624602adf40SYehuda Sadeh static int snap_by_name(struct rbd_image_header *header, const char *snap_name, 625602adf40SYehuda Sadeh u64 *seq, u64 *size) 626602adf40SYehuda Sadeh { 627602adf40SYehuda Sadeh int i; 628602adf40SYehuda Sadeh char *p = header->snap_names; 629602adf40SYehuda Sadeh 630c9aadfe7SAlex Elder rbd_assert(header->snapc != NULL); 631c9aadfe7SAlex Elder for (i = 0; i < header->snapc->num_snaps; i++) { 63200f1f36fSAlex Elder if (!strcmp(snap_name, p)) { 63300f1f36fSAlex Elder 63400f1f36fSAlex Elder /* Found it. Pass back its id and/or size */ 63500f1f36fSAlex Elder 636602adf40SYehuda Sadeh if (seq) 637602adf40SYehuda Sadeh *seq = header->snapc->snaps[i]; 638602adf40SYehuda Sadeh if (size) 639602adf40SYehuda Sadeh *size = header->snap_sizes[i]; 640602adf40SYehuda Sadeh return i; 641602adf40SYehuda Sadeh } 64200f1f36fSAlex Elder p += strlen(p) + 1; /* Skip ahead to the next name */ 64300f1f36fSAlex Elder } 64400f1f36fSAlex Elder return -ENOENT; 64500f1f36fSAlex Elder } 646602adf40SYehuda Sadeh 64799c1f08fSAlex Elder static int rbd_header_set_snap(struct rbd_device *rbd_dev) 648602adf40SYehuda Sadeh { 64978dc447dSAlex Elder int ret; 650602adf40SYehuda Sadeh 6510ce1a794SAlex Elder down_write(&rbd_dev->header_rwsem); 652602adf40SYehuda Sadeh 653f84344f3SAlex Elder if (!memcmp(rbd_dev->mapping.snap_name, RBD_SNAP_HEAD_NAME, 654cc9d734cSJosh Durgin sizeof (RBD_SNAP_HEAD_NAME))) { 655f84344f3SAlex Elder rbd_dev->mapping.snap_id = CEPH_NOSNAP; 65699c1f08fSAlex Elder rbd_dev->mapping.size = rbd_dev->header.image_size; 657f84344f3SAlex Elder rbd_dev->mapping.snap_exists = false; 658f84344f3SAlex Elder rbd_dev->mapping.read_only = rbd_dev->rbd_opts.read_only; 659602adf40SYehuda Sadeh } else { 660f84344f3SAlex Elder ret = snap_by_name(&rbd_dev->header, 661f84344f3SAlex Elder rbd_dev->mapping.snap_name, 66299c1f08fSAlex Elder &rbd_dev->mapping.snap_id, 66399c1f08fSAlex Elder &rbd_dev->mapping.size); 664602adf40SYehuda Sadeh if (ret < 0) 665602adf40SYehuda Sadeh goto done; 666f84344f3SAlex Elder rbd_dev->mapping.snap_exists = true; 667f84344f3SAlex Elder rbd_dev->mapping.read_only = true; 668602adf40SYehuda Sadeh } 669602adf40SYehuda Sadeh 670602adf40SYehuda Sadeh ret = 0; 671602adf40SYehuda Sadeh done: 6720ce1a794SAlex Elder up_write(&rbd_dev->header_rwsem); 673602adf40SYehuda Sadeh return ret; 674602adf40SYehuda Sadeh } 675602adf40SYehuda Sadeh 676602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 677602adf40SYehuda Sadeh { 678849b4260SAlex Elder kfree(header->object_prefix); 679d78fd7aeSAlex Elder header->object_prefix = NULL; 680602adf40SYehuda Sadeh kfree(header->snap_sizes); 681d78fd7aeSAlex Elder header->snap_sizes = NULL; 682849b4260SAlex Elder kfree(header->snap_names); 683d78fd7aeSAlex Elder header->snap_names = NULL; 684d1d25646SJosh Durgin ceph_put_snap_context(header->snapc); 685d78fd7aeSAlex Elder header->snapc = NULL; 686602adf40SYehuda Sadeh } 687602adf40SYehuda Sadeh 68865ccfe21SAlex Elder static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) 689602adf40SYehuda Sadeh { 69065ccfe21SAlex Elder char *name; 69165ccfe21SAlex Elder u64 segment; 69265ccfe21SAlex Elder int ret; 693602adf40SYehuda Sadeh 69465ccfe21SAlex Elder name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 69565ccfe21SAlex Elder if (!name) 69665ccfe21SAlex Elder return NULL; 69765ccfe21SAlex Elder segment = offset >> rbd_dev->header.obj_order; 69865ccfe21SAlex Elder ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx", 69965ccfe21SAlex Elder rbd_dev->header.object_prefix, segment); 70065ccfe21SAlex Elder if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) { 70165ccfe21SAlex Elder pr_err("error formatting segment name for #%llu (%d)\n", 70265ccfe21SAlex Elder segment, ret); 70365ccfe21SAlex Elder kfree(name); 70465ccfe21SAlex Elder name = NULL; 70565ccfe21SAlex Elder } 706602adf40SYehuda Sadeh 70765ccfe21SAlex Elder return name; 70865ccfe21SAlex Elder } 709602adf40SYehuda Sadeh 71065ccfe21SAlex Elder static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) 71165ccfe21SAlex Elder { 71265ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 713602adf40SYehuda Sadeh 71465ccfe21SAlex Elder return offset & (segment_size - 1); 71565ccfe21SAlex Elder } 71665ccfe21SAlex Elder 71765ccfe21SAlex Elder static u64 rbd_segment_length(struct rbd_device *rbd_dev, 71865ccfe21SAlex Elder u64 offset, u64 length) 71965ccfe21SAlex Elder { 72065ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 72165ccfe21SAlex Elder 72265ccfe21SAlex Elder offset &= segment_size - 1; 72365ccfe21SAlex Elder 724aafb230eSAlex Elder rbd_assert(length <= U64_MAX - offset); 72565ccfe21SAlex Elder if (offset + length > segment_size) 72665ccfe21SAlex Elder length = segment_size - offset; 72765ccfe21SAlex Elder 72865ccfe21SAlex Elder return length; 729602adf40SYehuda Sadeh } 730602adf40SYehuda Sadeh 7311fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header, 7321fec7093SYehuda Sadeh u64 ofs, u64 len) 7331fec7093SYehuda Sadeh { 734df111be6SAlex Elder u64 start_seg; 735df111be6SAlex Elder u64 end_seg; 736df111be6SAlex Elder 737df111be6SAlex Elder if (!len) 738df111be6SAlex Elder return 0; 739df111be6SAlex Elder if (len - 1 > U64_MAX - ofs) 740df111be6SAlex Elder return -ERANGE; 741df111be6SAlex Elder 742df111be6SAlex Elder start_seg = ofs >> header->obj_order; 743df111be6SAlex Elder end_seg = (ofs + len - 1) >> header->obj_order; 744df111be6SAlex Elder 7451fec7093SYehuda Sadeh return end_seg - start_seg + 1; 7461fec7093SYehuda Sadeh } 7471fec7093SYehuda Sadeh 748602adf40SYehuda Sadeh /* 749029bcbd8SJosh Durgin * returns the size of an object in the image 750029bcbd8SJosh Durgin */ 751029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 752029bcbd8SJosh Durgin { 753029bcbd8SJosh Durgin return 1 << header->obj_order; 754029bcbd8SJosh Durgin } 755029bcbd8SJosh Durgin 756029bcbd8SJosh Durgin /* 757602adf40SYehuda Sadeh * bio helpers 758602adf40SYehuda Sadeh */ 759602adf40SYehuda Sadeh 760602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 761602adf40SYehuda Sadeh { 762602adf40SYehuda Sadeh struct bio *tmp; 763602adf40SYehuda Sadeh 764602adf40SYehuda Sadeh while (chain) { 765602adf40SYehuda Sadeh tmp = chain; 766602adf40SYehuda Sadeh chain = chain->bi_next; 767602adf40SYehuda Sadeh bio_put(tmp); 768602adf40SYehuda Sadeh } 769602adf40SYehuda Sadeh } 770602adf40SYehuda Sadeh 771602adf40SYehuda Sadeh /* 772602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 773602adf40SYehuda Sadeh */ 774602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 775602adf40SYehuda Sadeh { 776602adf40SYehuda Sadeh struct bio_vec *bv; 777602adf40SYehuda Sadeh unsigned long flags; 778602adf40SYehuda Sadeh void *buf; 779602adf40SYehuda Sadeh int i; 780602adf40SYehuda Sadeh int pos = 0; 781602adf40SYehuda Sadeh 782602adf40SYehuda Sadeh while (chain) { 783602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 784602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 785602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 786602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 787602adf40SYehuda Sadeh memset(buf + remainder, 0, 788602adf40SYehuda Sadeh bv->bv_len - remainder); 78985b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 790602adf40SYehuda Sadeh } 791602adf40SYehuda Sadeh pos += bv->bv_len; 792602adf40SYehuda Sadeh } 793602adf40SYehuda Sadeh 794602adf40SYehuda Sadeh chain = chain->bi_next; 795602adf40SYehuda Sadeh } 796602adf40SYehuda Sadeh } 797602adf40SYehuda Sadeh 798602adf40SYehuda Sadeh /* 799602adf40SYehuda Sadeh * bio_chain_clone - clone a chain of bios up to a certain length. 800602adf40SYehuda Sadeh * might return a bio_pair that will need to be released. 801602adf40SYehuda Sadeh */ 802602adf40SYehuda Sadeh static struct bio *bio_chain_clone(struct bio **old, struct bio **next, 803602adf40SYehuda Sadeh struct bio_pair **bp, 804602adf40SYehuda Sadeh int len, gfp_t gfpmask) 805602adf40SYehuda Sadeh { 806542582fcSAlex Elder struct bio *old_chain = *old; 807542582fcSAlex Elder struct bio *new_chain = NULL; 808542582fcSAlex Elder struct bio *tail; 809602adf40SYehuda Sadeh int total = 0; 810602adf40SYehuda Sadeh 811602adf40SYehuda Sadeh if (*bp) { 812602adf40SYehuda Sadeh bio_pair_release(*bp); 813602adf40SYehuda Sadeh *bp = NULL; 814602adf40SYehuda Sadeh } 815602adf40SYehuda Sadeh 816602adf40SYehuda Sadeh while (old_chain && (total < len)) { 817542582fcSAlex Elder struct bio *tmp; 818542582fcSAlex Elder 819602adf40SYehuda Sadeh tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 820602adf40SYehuda Sadeh if (!tmp) 821602adf40SYehuda Sadeh goto err_out; 822542582fcSAlex Elder gfpmask &= ~__GFP_WAIT; /* can't wait after the first */ 823602adf40SYehuda Sadeh 824602adf40SYehuda Sadeh if (total + old_chain->bi_size > len) { 825602adf40SYehuda Sadeh struct bio_pair *bp; 826602adf40SYehuda Sadeh 827602adf40SYehuda Sadeh /* 828602adf40SYehuda Sadeh * this split can only happen with a single paged bio, 829602adf40SYehuda Sadeh * split_bio will BUG_ON if this is not the case 830602adf40SYehuda Sadeh */ 831602adf40SYehuda Sadeh dout("bio_chain_clone split! total=%d remaining=%d" 832bd919d45SAlex Elder "bi_size=%u\n", 833bd919d45SAlex Elder total, len - total, old_chain->bi_size); 834602adf40SYehuda Sadeh 835602adf40SYehuda Sadeh /* split the bio. We'll release it either in the next 836602adf40SYehuda Sadeh call, or it will have to be released outside */ 837593a9e7bSAlex Elder bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); 838602adf40SYehuda Sadeh if (!bp) 839602adf40SYehuda Sadeh goto err_out; 840602adf40SYehuda Sadeh 841602adf40SYehuda Sadeh __bio_clone(tmp, &bp->bio1); 842602adf40SYehuda Sadeh 843602adf40SYehuda Sadeh *next = &bp->bio2; 844602adf40SYehuda Sadeh } else { 845602adf40SYehuda Sadeh __bio_clone(tmp, old_chain); 846602adf40SYehuda Sadeh *next = old_chain->bi_next; 847602adf40SYehuda Sadeh } 848602adf40SYehuda Sadeh 849602adf40SYehuda Sadeh tmp->bi_bdev = NULL; 850602adf40SYehuda Sadeh tmp->bi_next = NULL; 851542582fcSAlex Elder if (new_chain) 852602adf40SYehuda Sadeh tail->bi_next = tmp; 853542582fcSAlex Elder else 854542582fcSAlex Elder new_chain = tmp; 855602adf40SYehuda Sadeh tail = tmp; 856602adf40SYehuda Sadeh old_chain = old_chain->bi_next; 857602adf40SYehuda Sadeh 858602adf40SYehuda Sadeh total += tmp->bi_size; 859602adf40SYehuda Sadeh } 860602adf40SYehuda Sadeh 861aafb230eSAlex Elder rbd_assert(total == len); 862602adf40SYehuda Sadeh 863602adf40SYehuda Sadeh *old = old_chain; 864602adf40SYehuda Sadeh 865602adf40SYehuda Sadeh return new_chain; 866602adf40SYehuda Sadeh 867602adf40SYehuda Sadeh err_out: 868602adf40SYehuda Sadeh dout("bio_chain_clone with err\n"); 869602adf40SYehuda Sadeh bio_chain_put(new_chain); 870602adf40SYehuda Sadeh return NULL; 871602adf40SYehuda Sadeh } 872602adf40SYehuda Sadeh 873602adf40SYehuda Sadeh /* 874602adf40SYehuda Sadeh * helpers for osd request op vectors. 875602adf40SYehuda Sadeh */ 87657cfc106SAlex Elder static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, 87757cfc106SAlex Elder int opcode, u32 payload_len) 878602adf40SYehuda Sadeh { 87957cfc106SAlex Elder struct ceph_osd_req_op *ops; 88057cfc106SAlex Elder 88157cfc106SAlex Elder ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); 88257cfc106SAlex Elder if (!ops) 88357cfc106SAlex Elder return NULL; 88457cfc106SAlex Elder 88557cfc106SAlex Elder ops[0].op = opcode; 88657cfc106SAlex Elder 887602adf40SYehuda Sadeh /* 888602adf40SYehuda Sadeh * op extent offset and length will be set later on 889602adf40SYehuda Sadeh * in calc_raw_layout() 890602adf40SYehuda Sadeh */ 89157cfc106SAlex Elder ops[0].payload_len = payload_len; 89257cfc106SAlex Elder 89357cfc106SAlex Elder return ops; 894602adf40SYehuda Sadeh } 895602adf40SYehuda Sadeh 896602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops) 897602adf40SYehuda Sadeh { 898602adf40SYehuda Sadeh kfree(ops); 899602adf40SYehuda Sadeh } 900602adf40SYehuda Sadeh 9011fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq, 9021fec7093SYehuda Sadeh struct rbd_req_coll *coll, 9031fec7093SYehuda Sadeh int index, 9041fec7093SYehuda Sadeh int ret, u64 len) 9051fec7093SYehuda Sadeh { 9061fec7093SYehuda Sadeh struct request_queue *q; 9071fec7093SYehuda Sadeh int min, max, i; 9081fec7093SYehuda Sadeh 909bd919d45SAlex Elder dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", 910bd919d45SAlex Elder coll, index, ret, (unsigned long long) len); 9111fec7093SYehuda Sadeh 9121fec7093SYehuda Sadeh if (!rq) 9131fec7093SYehuda Sadeh return; 9141fec7093SYehuda Sadeh 9151fec7093SYehuda Sadeh if (!coll) { 9161fec7093SYehuda Sadeh blk_end_request(rq, ret, len); 9171fec7093SYehuda Sadeh return; 9181fec7093SYehuda Sadeh } 9191fec7093SYehuda Sadeh 9201fec7093SYehuda Sadeh q = rq->q; 9211fec7093SYehuda Sadeh 9221fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 9231fec7093SYehuda Sadeh coll->status[index].done = 1; 9241fec7093SYehuda Sadeh coll->status[index].rc = ret; 9251fec7093SYehuda Sadeh coll->status[index].bytes = len; 9261fec7093SYehuda Sadeh max = min = coll->num_done; 9271fec7093SYehuda Sadeh while (max < coll->total && coll->status[max].done) 9281fec7093SYehuda Sadeh max++; 9291fec7093SYehuda Sadeh 9301fec7093SYehuda Sadeh for (i = min; i<max; i++) { 9311fec7093SYehuda Sadeh __blk_end_request(rq, coll->status[i].rc, 9321fec7093SYehuda Sadeh coll->status[i].bytes); 9331fec7093SYehuda Sadeh coll->num_done++; 9341fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 9351fec7093SYehuda Sadeh } 9361fec7093SYehuda Sadeh spin_unlock_irq(q->queue_lock); 9371fec7093SYehuda Sadeh } 9381fec7093SYehuda Sadeh 9391fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req, 9401fec7093SYehuda Sadeh int ret, u64 len) 9411fec7093SYehuda Sadeh { 9421fec7093SYehuda Sadeh rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); 9431fec7093SYehuda Sadeh } 9441fec7093SYehuda Sadeh 945602adf40SYehuda Sadeh /* 946602adf40SYehuda Sadeh * Send ceph osd request 947602adf40SYehuda Sadeh */ 948602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq, 9490ce1a794SAlex Elder struct rbd_device *rbd_dev, 950602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 951602adf40SYehuda Sadeh u64 snapid, 952aded07eaSAlex Elder const char *object_name, u64 ofs, u64 len, 953602adf40SYehuda Sadeh struct bio *bio, 954602adf40SYehuda Sadeh struct page **pages, 955602adf40SYehuda Sadeh int num_pages, 956602adf40SYehuda Sadeh int flags, 957602adf40SYehuda Sadeh struct ceph_osd_req_op *ops, 9581fec7093SYehuda Sadeh struct rbd_req_coll *coll, 9591fec7093SYehuda Sadeh int coll_index, 960602adf40SYehuda Sadeh void (*rbd_cb)(struct ceph_osd_request *req, 96159c2be1eSYehuda Sadeh struct ceph_msg *msg), 96259c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 96359c2be1eSYehuda Sadeh u64 *ver) 964602adf40SYehuda Sadeh { 965602adf40SYehuda Sadeh struct ceph_osd_request *req; 966602adf40SYehuda Sadeh struct ceph_file_layout *layout; 967602adf40SYehuda Sadeh int ret; 968602adf40SYehuda Sadeh u64 bno; 969602adf40SYehuda Sadeh struct timespec mtime = CURRENT_TIME; 970602adf40SYehuda Sadeh struct rbd_request *req_data; 971602adf40SYehuda Sadeh struct ceph_osd_request_head *reqhead; 9721dbb4399SAlex Elder struct ceph_osd_client *osdc; 973602adf40SYehuda Sadeh 974602adf40SYehuda Sadeh req_data = kzalloc(sizeof(*req_data), GFP_NOIO); 9751fec7093SYehuda Sadeh if (!req_data) { 9761fec7093SYehuda Sadeh if (coll) 9771fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, coll_index, 9781fec7093SYehuda Sadeh -ENOMEM, len); 9791fec7093SYehuda Sadeh return -ENOMEM; 9801fec7093SYehuda Sadeh } 981602adf40SYehuda Sadeh 9821fec7093SYehuda Sadeh if (coll) { 9831fec7093SYehuda Sadeh req_data->coll = coll; 9841fec7093SYehuda Sadeh req_data->coll_index = coll_index; 9851fec7093SYehuda Sadeh } 9861fec7093SYehuda Sadeh 987bd919d45SAlex Elder dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, 988bd919d45SAlex Elder (unsigned long long) ofs, (unsigned long long) len); 989602adf40SYehuda Sadeh 9900ce1a794SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 9911dbb4399SAlex Elder req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, 9921dbb4399SAlex Elder false, GFP_NOIO, pages, bio); 9934ad12621SSage Weil if (!req) { 9944ad12621SSage Weil ret = -ENOMEM; 995602adf40SYehuda Sadeh goto done_pages; 996602adf40SYehuda Sadeh } 997602adf40SYehuda Sadeh 998602adf40SYehuda Sadeh req->r_callback = rbd_cb; 999602adf40SYehuda Sadeh 1000602adf40SYehuda Sadeh req_data->rq = rq; 1001602adf40SYehuda Sadeh req_data->bio = bio; 1002602adf40SYehuda Sadeh req_data->pages = pages; 1003602adf40SYehuda Sadeh req_data->len = len; 1004602adf40SYehuda Sadeh 1005602adf40SYehuda Sadeh req->r_priv = req_data; 1006602adf40SYehuda Sadeh 1007602adf40SYehuda Sadeh reqhead = req->r_request->front.iov_base; 1008602adf40SYehuda Sadeh reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); 1009602adf40SYehuda Sadeh 1010aded07eaSAlex Elder strncpy(req->r_oid, object_name, sizeof(req->r_oid)); 1011602adf40SYehuda Sadeh req->r_oid_len = strlen(req->r_oid); 1012602adf40SYehuda Sadeh 1013602adf40SYehuda Sadeh layout = &req->r_file_layout; 1014602adf40SYehuda Sadeh memset(layout, 0, sizeof(*layout)); 1015602adf40SYehuda Sadeh layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 1016602adf40SYehuda Sadeh layout->fl_stripe_count = cpu_to_le32(1); 1017602adf40SYehuda Sadeh layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 10180ce1a794SAlex Elder layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); 10191dbb4399SAlex Elder ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, 10201dbb4399SAlex Elder req, ops); 1021602adf40SYehuda Sadeh 1022602adf40SYehuda Sadeh ceph_osdc_build_request(req, ofs, &len, 1023602adf40SYehuda Sadeh ops, 1024602adf40SYehuda Sadeh snapc, 1025602adf40SYehuda Sadeh &mtime, 1026602adf40SYehuda Sadeh req->r_oid, req->r_oid_len); 1027602adf40SYehuda Sadeh 102859c2be1eSYehuda Sadeh if (linger_req) { 10291dbb4399SAlex Elder ceph_osdc_set_request_linger(osdc, req); 103059c2be1eSYehuda Sadeh *linger_req = req; 103159c2be1eSYehuda Sadeh } 103259c2be1eSYehuda Sadeh 10331dbb4399SAlex Elder ret = ceph_osdc_start_request(osdc, req, false); 1034602adf40SYehuda Sadeh if (ret < 0) 1035602adf40SYehuda Sadeh goto done_err; 1036602adf40SYehuda Sadeh 1037602adf40SYehuda Sadeh if (!rbd_cb) { 10381dbb4399SAlex Elder ret = ceph_osdc_wait_request(osdc, req); 103959c2be1eSYehuda Sadeh if (ver) 104059c2be1eSYehuda Sadeh *ver = le64_to_cpu(req->r_reassert_version.version); 1041bd919d45SAlex Elder dout("reassert_ver=%llu\n", 1042bd919d45SAlex Elder (unsigned long long) 10431fec7093SYehuda Sadeh le64_to_cpu(req->r_reassert_version.version)); 1044602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1045602adf40SYehuda Sadeh } 1046602adf40SYehuda Sadeh return ret; 1047602adf40SYehuda Sadeh 1048602adf40SYehuda Sadeh done_err: 1049602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1050602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1051602adf40SYehuda Sadeh done_pages: 10521fec7093SYehuda Sadeh rbd_coll_end_req(req_data, ret, len); 1053602adf40SYehuda Sadeh kfree(req_data); 1054602adf40SYehuda Sadeh return ret; 1055602adf40SYehuda Sadeh } 1056602adf40SYehuda Sadeh 1057602adf40SYehuda Sadeh /* 1058602adf40SYehuda Sadeh * Ceph osd op callback 1059602adf40SYehuda Sadeh */ 1060602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 1061602adf40SYehuda Sadeh { 1062602adf40SYehuda Sadeh struct rbd_request *req_data = req->r_priv; 1063602adf40SYehuda Sadeh struct ceph_osd_reply_head *replyhead; 1064602adf40SYehuda Sadeh struct ceph_osd_op *op; 1065602adf40SYehuda Sadeh __s32 rc; 1066602adf40SYehuda Sadeh u64 bytes; 1067602adf40SYehuda Sadeh int read_op; 1068602adf40SYehuda Sadeh 1069602adf40SYehuda Sadeh /* parse reply */ 1070602adf40SYehuda Sadeh replyhead = msg->front.iov_base; 1071602adf40SYehuda Sadeh WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); 1072602adf40SYehuda Sadeh op = (void *)(replyhead + 1); 1073602adf40SYehuda Sadeh rc = le32_to_cpu(replyhead->result); 1074602adf40SYehuda Sadeh bytes = le64_to_cpu(op->extent.length); 1075895cfcc8SDan Carpenter read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); 1076602adf40SYehuda Sadeh 1077bd919d45SAlex Elder dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", 1078bd919d45SAlex Elder (unsigned long long) bytes, read_op, (int) rc); 1079602adf40SYehuda Sadeh 1080602adf40SYehuda Sadeh if (rc == -ENOENT && read_op) { 1081602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, 0); 1082602adf40SYehuda Sadeh rc = 0; 1083602adf40SYehuda Sadeh } else if (rc == 0 && read_op && bytes < req_data->len) { 1084602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, bytes); 1085602adf40SYehuda Sadeh bytes = req_data->len; 1086602adf40SYehuda Sadeh } 1087602adf40SYehuda Sadeh 10881fec7093SYehuda Sadeh rbd_coll_end_req(req_data, rc, bytes); 1089602adf40SYehuda Sadeh 1090602adf40SYehuda Sadeh if (req_data->bio) 1091602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1092602adf40SYehuda Sadeh 1093602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1094602adf40SYehuda Sadeh kfree(req_data); 1095602adf40SYehuda Sadeh } 1096602adf40SYehuda Sadeh 109759c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 109859c2be1eSYehuda Sadeh { 109959c2be1eSYehuda Sadeh ceph_osdc_put_request(req); 110059c2be1eSYehuda Sadeh } 110159c2be1eSYehuda Sadeh 1102602adf40SYehuda Sadeh /* 1103602adf40SYehuda Sadeh * Do a synchronous ceph osd operation 1104602adf40SYehuda Sadeh */ 11050ce1a794SAlex Elder static int rbd_req_sync_op(struct rbd_device *rbd_dev, 1106602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1107602adf40SYehuda Sadeh u64 snapid, 1108602adf40SYehuda Sadeh int flags, 1109913d2fdcSAlex Elder struct ceph_osd_req_op *ops, 1110aded07eaSAlex Elder const char *object_name, 1111602adf40SYehuda Sadeh u64 ofs, u64 len, 111259c2be1eSYehuda Sadeh char *buf, 111359c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 111459c2be1eSYehuda Sadeh u64 *ver) 1115602adf40SYehuda Sadeh { 1116602adf40SYehuda Sadeh int ret; 1117602adf40SYehuda Sadeh struct page **pages; 1118602adf40SYehuda Sadeh int num_pages; 1119913d2fdcSAlex Elder 1120aafb230eSAlex Elder rbd_assert(ops != NULL); 1121602adf40SYehuda Sadeh 1122602adf40SYehuda Sadeh num_pages = calc_pages_for(ofs , len); 1123602adf40SYehuda Sadeh pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 1124b8d0638aSDan Carpenter if (IS_ERR(pages)) 1125b8d0638aSDan Carpenter return PTR_ERR(pages); 1126602adf40SYehuda Sadeh 11270ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, 1128aded07eaSAlex Elder object_name, ofs, len, NULL, 1129602adf40SYehuda Sadeh pages, num_pages, 1130602adf40SYehuda Sadeh flags, 1131602adf40SYehuda Sadeh ops, 11321fec7093SYehuda Sadeh NULL, 0, 113359c2be1eSYehuda Sadeh NULL, 113459c2be1eSYehuda Sadeh linger_req, ver); 1135602adf40SYehuda Sadeh if (ret < 0) 1136913d2fdcSAlex Elder goto done; 1137602adf40SYehuda Sadeh 1138602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_READ) && buf) 1139602adf40SYehuda Sadeh ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); 1140602adf40SYehuda Sadeh 1141602adf40SYehuda Sadeh done: 1142602adf40SYehuda Sadeh ceph_release_page_vector(pages, num_pages); 1143602adf40SYehuda Sadeh return ret; 1144602adf40SYehuda Sadeh } 1145602adf40SYehuda Sadeh 1146602adf40SYehuda Sadeh /* 1147602adf40SYehuda Sadeh * Do an asynchronous ceph osd operation 1148602adf40SYehuda Sadeh */ 1149602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq, 1150602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1151602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1152602adf40SYehuda Sadeh u64 snapid, 1153d1f57ea6SAlex Elder int opcode, int flags, 1154602adf40SYehuda Sadeh u64 ofs, u64 len, 11551fec7093SYehuda Sadeh struct bio *bio, 11561fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11571fec7093SYehuda Sadeh int coll_index) 1158602adf40SYehuda Sadeh { 1159602adf40SYehuda Sadeh char *seg_name; 1160602adf40SYehuda Sadeh u64 seg_ofs; 1161602adf40SYehuda Sadeh u64 seg_len; 1162602adf40SYehuda Sadeh int ret; 1163602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1164602adf40SYehuda Sadeh u32 payload_len; 1165602adf40SYehuda Sadeh 116665ccfe21SAlex Elder seg_name = rbd_segment_name(rbd_dev, ofs); 1167602adf40SYehuda Sadeh if (!seg_name) 1168602adf40SYehuda Sadeh return -ENOMEM; 116965ccfe21SAlex Elder seg_len = rbd_segment_length(rbd_dev, ofs, len); 117065ccfe21SAlex Elder seg_ofs = rbd_segment_offset(rbd_dev, ofs); 1171602adf40SYehuda Sadeh 1172602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); 1173602adf40SYehuda Sadeh 117457cfc106SAlex Elder ret = -ENOMEM; 117557cfc106SAlex Elder ops = rbd_create_rw_ops(1, opcode, payload_len); 117657cfc106SAlex Elder if (!ops) 1177602adf40SYehuda Sadeh goto done; 1178602adf40SYehuda Sadeh 1179602adf40SYehuda Sadeh /* we've taken care of segment sizes earlier when we 1180602adf40SYehuda Sadeh cloned the bios. We should never have a segment 1181602adf40SYehuda Sadeh truncated at this point */ 1182aafb230eSAlex Elder rbd_assert(seg_len == len); 1183602adf40SYehuda Sadeh 1184602adf40SYehuda Sadeh ret = rbd_do_request(rq, rbd_dev, snapc, snapid, 1185602adf40SYehuda Sadeh seg_name, seg_ofs, seg_len, 1186602adf40SYehuda Sadeh bio, 1187602adf40SYehuda Sadeh NULL, 0, 1188602adf40SYehuda Sadeh flags, 1189602adf40SYehuda Sadeh ops, 11901fec7093SYehuda Sadeh coll, coll_index, 119159c2be1eSYehuda Sadeh rbd_req_cb, 0, NULL); 119211f77002SSage Weil 119311f77002SSage Weil rbd_destroy_ops(ops); 1194602adf40SYehuda Sadeh done: 1195602adf40SYehuda Sadeh kfree(seg_name); 1196602adf40SYehuda Sadeh return ret; 1197602adf40SYehuda Sadeh } 1198602adf40SYehuda Sadeh 1199602adf40SYehuda Sadeh /* 1200602adf40SYehuda Sadeh * Request async osd write 1201602adf40SYehuda Sadeh */ 1202602adf40SYehuda Sadeh static int rbd_req_write(struct request *rq, 1203602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1204602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1205602adf40SYehuda Sadeh u64 ofs, u64 len, 12061fec7093SYehuda Sadeh struct bio *bio, 12071fec7093SYehuda Sadeh struct rbd_req_coll *coll, 12081fec7093SYehuda Sadeh int coll_index) 1209602adf40SYehuda Sadeh { 1210602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, 1211602adf40SYehuda Sadeh CEPH_OSD_OP_WRITE, 1212602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 12131fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1214602adf40SYehuda Sadeh } 1215602adf40SYehuda Sadeh 1216602adf40SYehuda Sadeh /* 1217602adf40SYehuda Sadeh * Request async osd read 1218602adf40SYehuda Sadeh */ 1219602adf40SYehuda Sadeh static int rbd_req_read(struct request *rq, 1220602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1221602adf40SYehuda Sadeh u64 snapid, 1222602adf40SYehuda Sadeh u64 ofs, u64 len, 12231fec7093SYehuda Sadeh struct bio *bio, 12241fec7093SYehuda Sadeh struct rbd_req_coll *coll, 12251fec7093SYehuda Sadeh int coll_index) 1226602adf40SYehuda Sadeh { 1227602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, NULL, 1228b06e6a6bSJosh Durgin snapid, 1229602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1230602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 12311fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1232602adf40SYehuda Sadeh } 1233602adf40SYehuda Sadeh 1234602adf40SYehuda Sadeh /* 1235602adf40SYehuda Sadeh * Request sync osd read 1236602adf40SYehuda Sadeh */ 12370ce1a794SAlex Elder static int rbd_req_sync_read(struct rbd_device *rbd_dev, 1238602adf40SYehuda Sadeh u64 snapid, 1239aded07eaSAlex Elder const char *object_name, 1240602adf40SYehuda Sadeh u64 ofs, u64 len, 124159c2be1eSYehuda Sadeh char *buf, 124259c2be1eSYehuda Sadeh u64 *ver) 1243602adf40SYehuda Sadeh { 1244913d2fdcSAlex Elder struct ceph_osd_req_op *ops; 1245913d2fdcSAlex Elder int ret; 1246913d2fdcSAlex Elder 1247913d2fdcSAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); 1248913d2fdcSAlex Elder if (!ops) 1249913d2fdcSAlex Elder return -ENOMEM; 1250913d2fdcSAlex Elder 1251913d2fdcSAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 1252b06e6a6bSJosh Durgin snapid, 1253602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1254913d2fdcSAlex Elder ops, object_name, ofs, len, buf, NULL, ver); 1255913d2fdcSAlex Elder rbd_destroy_ops(ops); 1256913d2fdcSAlex Elder 1257913d2fdcSAlex Elder return ret; 1258602adf40SYehuda Sadeh } 1259602adf40SYehuda Sadeh 1260602adf40SYehuda Sadeh /* 126159c2be1eSYehuda Sadeh * Request sync osd watch 126259c2be1eSYehuda Sadeh */ 12630ce1a794SAlex Elder static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, 126459c2be1eSYehuda Sadeh u64 ver, 12657f0a24d8SAlex Elder u64 notify_id) 126659c2be1eSYehuda Sadeh { 126759c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 126811f77002SSage Weil int ret; 126911f77002SSage Weil 127057cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); 127157cfc106SAlex Elder if (!ops) 127257cfc106SAlex Elder return -ENOMEM; 127359c2be1eSYehuda Sadeh 1274a71b891bSJosh Durgin ops[0].watch.ver = cpu_to_le64(ver); 127559c2be1eSYehuda Sadeh ops[0].watch.cookie = notify_id; 127659c2be1eSYehuda Sadeh ops[0].watch.flag = 0; 127759c2be1eSYehuda Sadeh 12780ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, 12797f0a24d8SAlex Elder rbd_dev->header_name, 0, 0, NULL, 1280ad4f232fSAlex Elder NULL, 0, 128159c2be1eSYehuda Sadeh CEPH_OSD_FLAG_READ, 128259c2be1eSYehuda Sadeh ops, 12831fec7093SYehuda Sadeh NULL, 0, 128459c2be1eSYehuda Sadeh rbd_simple_req_cb, 0, NULL); 128559c2be1eSYehuda Sadeh 128659c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 128759c2be1eSYehuda Sadeh return ret; 128859c2be1eSYehuda Sadeh } 128959c2be1eSYehuda Sadeh 129059c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 129159c2be1eSYehuda Sadeh { 12920ce1a794SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 1293a71b891bSJosh Durgin u64 hver; 129413143d2dSSage Weil int rc; 129513143d2dSSage Weil 12960ce1a794SAlex Elder if (!rbd_dev) 129759c2be1eSYehuda Sadeh return; 129859c2be1eSYehuda Sadeh 1299bd919d45SAlex Elder dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", 1300bd919d45SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1301bd919d45SAlex Elder (unsigned int) opcode); 13021fe5e993SAlex Elder rc = rbd_refresh_header(rbd_dev, &hver); 130313143d2dSSage Weil if (rc) 1304f0f8cef5SAlex Elder pr_warning(RBD_DRV_NAME "%d got notification but failed to " 13050ce1a794SAlex Elder " update snaps: %d\n", rbd_dev->major, rc); 130659c2be1eSYehuda Sadeh 13077f0a24d8SAlex Elder rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); 130859c2be1eSYehuda Sadeh } 130959c2be1eSYehuda Sadeh 131059c2be1eSYehuda Sadeh /* 131159c2be1eSYehuda Sadeh * Request sync osd watch 131259c2be1eSYehuda Sadeh */ 13130e6f322dSAlex Elder static int rbd_req_sync_watch(struct rbd_device *rbd_dev) 131459c2be1eSYehuda Sadeh { 131559c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 13160ce1a794SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 131757cfc106SAlex Elder int ret; 131859c2be1eSYehuda Sadeh 131957cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 132057cfc106SAlex Elder if (!ops) 132157cfc106SAlex Elder return -ENOMEM; 132259c2be1eSYehuda Sadeh 132359c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, 13240ce1a794SAlex Elder (void *)rbd_dev, &rbd_dev->watch_event); 132559c2be1eSYehuda Sadeh if (ret < 0) 132659c2be1eSYehuda Sadeh goto fail; 132759c2be1eSYehuda Sadeh 13280e6f322dSAlex Elder ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); 13290ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 133059c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 133159c2be1eSYehuda Sadeh 13320ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 133359c2be1eSYehuda Sadeh CEPH_NOSNAP, 133459c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 133559c2be1eSYehuda Sadeh ops, 13360e6f322dSAlex Elder rbd_dev->header_name, 13370e6f322dSAlex Elder 0, 0, NULL, 13380ce1a794SAlex Elder &rbd_dev->watch_request, NULL); 133959c2be1eSYehuda Sadeh 134059c2be1eSYehuda Sadeh if (ret < 0) 134159c2be1eSYehuda Sadeh goto fail_event; 134259c2be1eSYehuda Sadeh 134359c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 134459c2be1eSYehuda Sadeh return 0; 134559c2be1eSYehuda Sadeh 134659c2be1eSYehuda Sadeh fail_event: 13470ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 13480ce1a794SAlex Elder rbd_dev->watch_event = NULL; 134959c2be1eSYehuda Sadeh fail: 135059c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 135159c2be1eSYehuda Sadeh return ret; 135259c2be1eSYehuda Sadeh } 135359c2be1eSYehuda Sadeh 135479e3057cSYehuda Sadeh /* 135579e3057cSYehuda Sadeh * Request sync osd unwatch 135679e3057cSYehuda Sadeh */ 1357070c633fSAlex Elder static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) 135879e3057cSYehuda Sadeh { 135979e3057cSYehuda Sadeh struct ceph_osd_req_op *ops; 136057cfc106SAlex Elder int ret; 136179e3057cSYehuda Sadeh 136257cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 136357cfc106SAlex Elder if (!ops) 136457cfc106SAlex Elder return -ENOMEM; 136579e3057cSYehuda Sadeh 136679e3057cSYehuda Sadeh ops[0].watch.ver = 0; 13670ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 136879e3057cSYehuda Sadeh ops[0].watch.flag = 0; 136979e3057cSYehuda Sadeh 13700ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 137179e3057cSYehuda Sadeh CEPH_NOSNAP, 137279e3057cSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 137379e3057cSYehuda Sadeh ops, 1374070c633fSAlex Elder rbd_dev->header_name, 1375070c633fSAlex Elder 0, 0, NULL, NULL, NULL); 1376070c633fSAlex Elder 137779e3057cSYehuda Sadeh 137879e3057cSYehuda Sadeh rbd_destroy_ops(ops); 13790ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 13800ce1a794SAlex Elder rbd_dev->watch_event = NULL; 138179e3057cSYehuda Sadeh return ret; 138279e3057cSYehuda Sadeh } 138379e3057cSYehuda Sadeh 138459c2be1eSYehuda Sadeh struct rbd_notify_info { 13850ce1a794SAlex Elder struct rbd_device *rbd_dev; 138659c2be1eSYehuda Sadeh }; 138759c2be1eSYehuda Sadeh 138859c2be1eSYehuda Sadeh static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 138959c2be1eSYehuda Sadeh { 13900ce1a794SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 13910ce1a794SAlex Elder if (!rbd_dev) 139259c2be1eSYehuda Sadeh return; 139359c2be1eSYehuda Sadeh 1394bd919d45SAlex Elder dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n", 1395bd919d45SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1396bd919d45SAlex Elder (unsigned int) opcode); 139759c2be1eSYehuda Sadeh } 139859c2be1eSYehuda Sadeh 139959c2be1eSYehuda Sadeh /* 140059c2be1eSYehuda Sadeh * Request sync osd notify 140159c2be1eSYehuda Sadeh */ 14024cb16250SAlex Elder static int rbd_req_sync_notify(struct rbd_device *rbd_dev) 140359c2be1eSYehuda Sadeh { 140459c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 14050ce1a794SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 140659c2be1eSYehuda Sadeh struct ceph_osd_event *event; 140759c2be1eSYehuda Sadeh struct rbd_notify_info info; 140859c2be1eSYehuda Sadeh int payload_len = sizeof(u32) + sizeof(u32); 140959c2be1eSYehuda Sadeh int ret; 141059c2be1eSYehuda Sadeh 141157cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len); 141257cfc106SAlex Elder if (!ops) 141357cfc106SAlex Elder return -ENOMEM; 141459c2be1eSYehuda Sadeh 14150ce1a794SAlex Elder info.rbd_dev = rbd_dev; 141659c2be1eSYehuda Sadeh 141759c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, 141859c2be1eSYehuda Sadeh (void *)&info, &event); 141959c2be1eSYehuda Sadeh if (ret < 0) 142059c2be1eSYehuda Sadeh goto fail; 142159c2be1eSYehuda Sadeh 142259c2be1eSYehuda Sadeh ops[0].watch.ver = 1; 142359c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 142459c2be1eSYehuda Sadeh ops[0].watch.cookie = event->cookie; 142559c2be1eSYehuda Sadeh ops[0].watch.prot_ver = RADOS_NOTIFY_VER; 142659c2be1eSYehuda Sadeh ops[0].watch.timeout = 12; 142759c2be1eSYehuda Sadeh 14280ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 142959c2be1eSYehuda Sadeh CEPH_NOSNAP, 143059c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 143159c2be1eSYehuda Sadeh ops, 14324cb16250SAlex Elder rbd_dev->header_name, 14334cb16250SAlex Elder 0, 0, NULL, NULL, NULL); 143459c2be1eSYehuda Sadeh if (ret < 0) 143559c2be1eSYehuda Sadeh goto fail_event; 143659c2be1eSYehuda Sadeh 143759c2be1eSYehuda Sadeh ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT); 143859c2be1eSYehuda Sadeh dout("ceph_osdc_wait_event returned %d\n", ret); 143959c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 144059c2be1eSYehuda Sadeh return 0; 144159c2be1eSYehuda Sadeh 144259c2be1eSYehuda Sadeh fail_event: 144359c2be1eSYehuda Sadeh ceph_osdc_cancel_event(event); 144459c2be1eSYehuda Sadeh fail: 144559c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 144659c2be1eSYehuda Sadeh return ret; 144759c2be1eSYehuda Sadeh } 144859c2be1eSYehuda Sadeh 144959c2be1eSYehuda Sadeh /* 1450602adf40SYehuda Sadeh * Request sync osd read 1451602adf40SYehuda Sadeh */ 14520ce1a794SAlex Elder static int rbd_req_sync_exec(struct rbd_device *rbd_dev, 1453aded07eaSAlex Elder const char *object_name, 1454aded07eaSAlex Elder const char *class_name, 1455aded07eaSAlex Elder const char *method_name, 1456602adf40SYehuda Sadeh const char *data, 145759c2be1eSYehuda Sadeh int len, 145859c2be1eSYehuda Sadeh u64 *ver) 1459602adf40SYehuda Sadeh { 1460602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1461aded07eaSAlex Elder int class_name_len = strlen(class_name); 1462aded07eaSAlex Elder int method_name_len = strlen(method_name); 146357cfc106SAlex Elder int ret; 146457cfc106SAlex Elder 146557cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, 1466aded07eaSAlex Elder class_name_len + method_name_len + len); 146757cfc106SAlex Elder if (!ops) 146857cfc106SAlex Elder return -ENOMEM; 1469602adf40SYehuda Sadeh 1470aded07eaSAlex Elder ops[0].cls.class_name = class_name; 1471aded07eaSAlex Elder ops[0].cls.class_len = (__u8) class_name_len; 1472aded07eaSAlex Elder ops[0].cls.method_name = method_name; 1473aded07eaSAlex Elder ops[0].cls.method_len = (__u8) method_name_len; 1474602adf40SYehuda Sadeh ops[0].cls.argc = 0; 1475602adf40SYehuda Sadeh ops[0].cls.indata = data; 1476602adf40SYehuda Sadeh ops[0].cls.indata_len = len; 1477602adf40SYehuda Sadeh 14780ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 1479602adf40SYehuda Sadeh CEPH_NOSNAP, 1480602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1481602adf40SYehuda Sadeh ops, 1482d1f57ea6SAlex Elder object_name, 0, 0, NULL, NULL, ver); 1483602adf40SYehuda Sadeh 1484602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1485602adf40SYehuda Sadeh 1486602adf40SYehuda Sadeh dout("cls_exec returned %d\n", ret); 1487602adf40SYehuda Sadeh return ret; 1488602adf40SYehuda Sadeh } 1489602adf40SYehuda Sadeh 14901fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) 14911fec7093SYehuda Sadeh { 14921fec7093SYehuda Sadeh struct rbd_req_coll *coll = 14931fec7093SYehuda Sadeh kzalloc(sizeof(struct rbd_req_coll) + 14941fec7093SYehuda Sadeh sizeof(struct rbd_req_status) * num_reqs, 14951fec7093SYehuda Sadeh GFP_ATOMIC); 14961fec7093SYehuda Sadeh 14971fec7093SYehuda Sadeh if (!coll) 14981fec7093SYehuda Sadeh return NULL; 14991fec7093SYehuda Sadeh coll->total = num_reqs; 15001fec7093SYehuda Sadeh kref_init(&coll->kref); 15011fec7093SYehuda Sadeh return coll; 15021fec7093SYehuda Sadeh } 15031fec7093SYehuda Sadeh 1504602adf40SYehuda Sadeh /* 1505602adf40SYehuda Sadeh * block device queue callback 1506602adf40SYehuda Sadeh */ 1507602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q) 1508602adf40SYehuda Sadeh { 1509602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1510602adf40SYehuda Sadeh struct request *rq; 1511602adf40SYehuda Sadeh struct bio_pair *bp = NULL; 1512602adf40SYehuda Sadeh 151300f1f36fSAlex Elder while ((rq = blk_fetch_request(q))) { 1514602adf40SYehuda Sadeh struct bio *bio; 1515602adf40SYehuda Sadeh struct bio *rq_bio, *next_bio = NULL; 1516602adf40SYehuda Sadeh bool do_write; 1517bd919d45SAlex Elder unsigned int size; 1518bd919d45SAlex Elder u64 op_size = 0; 1519602adf40SYehuda Sadeh u64 ofs; 15201fec7093SYehuda Sadeh int num_segs, cur_seg = 0; 15211fec7093SYehuda Sadeh struct rbd_req_coll *coll; 1522d1d25646SJosh Durgin struct ceph_snap_context *snapc; 1523602adf40SYehuda Sadeh 1524602adf40SYehuda Sadeh dout("fetched request\n"); 1525602adf40SYehuda Sadeh 1526602adf40SYehuda Sadeh /* filter out block requests we don't understand */ 1527602adf40SYehuda Sadeh if ((rq->cmd_type != REQ_TYPE_FS)) { 1528602adf40SYehuda Sadeh __blk_end_request_all(rq, 0); 152900f1f36fSAlex Elder continue; 1530602adf40SYehuda Sadeh } 1531602adf40SYehuda Sadeh 1532602adf40SYehuda Sadeh /* deduce our operation (read, write) */ 1533602adf40SYehuda Sadeh do_write = (rq_data_dir(rq) == WRITE); 1534602adf40SYehuda Sadeh 1535602adf40SYehuda Sadeh size = blk_rq_bytes(rq); 1536593a9e7bSAlex Elder ofs = blk_rq_pos(rq) * SECTOR_SIZE; 1537602adf40SYehuda Sadeh rq_bio = rq->bio; 1538f84344f3SAlex Elder if (do_write && rbd_dev->mapping.read_only) { 1539602adf40SYehuda Sadeh __blk_end_request_all(rq, -EROFS); 154000f1f36fSAlex Elder continue; 1541602adf40SYehuda Sadeh } 1542602adf40SYehuda Sadeh 1543602adf40SYehuda Sadeh spin_unlock_irq(q->queue_lock); 1544602adf40SYehuda Sadeh 1545e88a36ecSJosh Durgin down_read(&rbd_dev->header_rwsem); 1546e88a36ecSJosh Durgin 1547f84344f3SAlex Elder if (rbd_dev->mapping.snap_id != CEPH_NOSNAP && 1548f84344f3SAlex Elder !rbd_dev->mapping.snap_exists) { 1549d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1550e88a36ecSJosh Durgin dout("request for non-existent snapshot"); 1551e88a36ecSJosh Durgin spin_lock_irq(q->queue_lock); 1552e88a36ecSJosh Durgin __blk_end_request_all(rq, -ENXIO); 1553e88a36ecSJosh Durgin continue; 1554e88a36ecSJosh Durgin } 1555d1d25646SJosh Durgin 1556d1d25646SJosh Durgin snapc = ceph_get_snap_context(rbd_dev->header.snapc); 1557d1d25646SJosh Durgin 1558d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1559e88a36ecSJosh Durgin 1560602adf40SYehuda Sadeh dout("%s 0x%x bytes at 0x%llx\n", 1561602adf40SYehuda Sadeh do_write ? "write" : "read", 1562bd919d45SAlex Elder size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); 1563602adf40SYehuda Sadeh 15641fec7093SYehuda Sadeh num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 1565df111be6SAlex Elder if (num_segs <= 0) { 1566df111be6SAlex Elder spin_lock_irq(q->queue_lock); 1567df111be6SAlex Elder __blk_end_request_all(rq, num_segs); 1568df111be6SAlex Elder ceph_put_snap_context(snapc); 1569df111be6SAlex Elder continue; 1570df111be6SAlex Elder } 15711fec7093SYehuda Sadeh coll = rbd_alloc_coll(num_segs); 15721fec7093SYehuda Sadeh if (!coll) { 15731fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 15741fec7093SYehuda Sadeh __blk_end_request_all(rq, -ENOMEM); 1575d1d25646SJosh Durgin ceph_put_snap_context(snapc); 157600f1f36fSAlex Elder continue; 15771fec7093SYehuda Sadeh } 15781fec7093SYehuda Sadeh 1579602adf40SYehuda Sadeh do { 1580602adf40SYehuda Sadeh /* a bio clone to be passed down to OSD req */ 1581bd919d45SAlex Elder dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); 158265ccfe21SAlex Elder op_size = rbd_segment_length(rbd_dev, ofs, size); 15831fec7093SYehuda Sadeh kref_get(&coll->kref); 1584602adf40SYehuda Sadeh bio = bio_chain_clone(&rq_bio, &next_bio, &bp, 1585602adf40SYehuda Sadeh op_size, GFP_ATOMIC); 1586602adf40SYehuda Sadeh if (!bio) { 15871fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, cur_seg, 15881fec7093SYehuda Sadeh -ENOMEM, op_size); 15891fec7093SYehuda Sadeh goto next_seg; 1590602adf40SYehuda Sadeh } 1591602adf40SYehuda Sadeh 15921fec7093SYehuda Sadeh 1593602adf40SYehuda Sadeh /* init OSD command: write or read */ 1594602adf40SYehuda Sadeh if (do_write) 1595602adf40SYehuda Sadeh rbd_req_write(rq, rbd_dev, 1596d1d25646SJosh Durgin snapc, 1597602adf40SYehuda Sadeh ofs, 15981fec7093SYehuda Sadeh op_size, bio, 15991fec7093SYehuda Sadeh coll, cur_seg); 1600602adf40SYehuda Sadeh else 1601602adf40SYehuda Sadeh rbd_req_read(rq, rbd_dev, 1602f84344f3SAlex Elder rbd_dev->mapping.snap_id, 1603602adf40SYehuda Sadeh ofs, 16041fec7093SYehuda Sadeh op_size, bio, 16051fec7093SYehuda Sadeh coll, cur_seg); 1606602adf40SYehuda Sadeh 16071fec7093SYehuda Sadeh next_seg: 1608602adf40SYehuda Sadeh size -= op_size; 1609602adf40SYehuda Sadeh ofs += op_size; 1610602adf40SYehuda Sadeh 16111fec7093SYehuda Sadeh cur_seg++; 1612602adf40SYehuda Sadeh rq_bio = next_bio; 1613602adf40SYehuda Sadeh } while (size > 0); 16141fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 1615602adf40SYehuda Sadeh 1616602adf40SYehuda Sadeh if (bp) 1617602adf40SYehuda Sadeh bio_pair_release(bp); 1618602adf40SYehuda Sadeh spin_lock_irq(q->queue_lock); 1619d1d25646SJosh Durgin 1620d1d25646SJosh Durgin ceph_put_snap_context(snapc); 1621602adf40SYehuda Sadeh } 1622602adf40SYehuda Sadeh } 1623602adf40SYehuda Sadeh 1624602adf40SYehuda Sadeh /* 1625602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 1626602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 1627602adf40SYehuda Sadeh * which we handle later at bio_chain_clone 1628602adf40SYehuda Sadeh */ 1629602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 1630602adf40SYehuda Sadeh struct bio_vec *bvec) 1631602adf40SYehuda Sadeh { 1632602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1633593a9e7bSAlex Elder unsigned int chunk_sectors; 1634593a9e7bSAlex Elder sector_t sector; 1635593a9e7bSAlex Elder unsigned int bio_sectors; 1636602adf40SYehuda Sadeh int max; 1637602adf40SYehuda Sadeh 1638593a9e7bSAlex Elder chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); 1639593a9e7bSAlex Elder sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); 1640593a9e7bSAlex Elder bio_sectors = bmd->bi_size >> SECTOR_SHIFT; 1641593a9e7bSAlex Elder 1642602adf40SYehuda Sadeh max = (chunk_sectors - ((sector & (chunk_sectors - 1)) 1643593a9e7bSAlex Elder + bio_sectors)) << SECTOR_SHIFT; 1644602adf40SYehuda Sadeh if (max < 0) 1645602adf40SYehuda Sadeh max = 0; /* bio_add cannot handle a negative return */ 1646602adf40SYehuda Sadeh if (max <= bvec->bv_len && bio_sectors == 0) 1647602adf40SYehuda Sadeh return bvec->bv_len; 1648602adf40SYehuda Sadeh return max; 1649602adf40SYehuda Sadeh } 1650602adf40SYehuda Sadeh 1651602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 1652602adf40SYehuda Sadeh { 1653602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 1654602adf40SYehuda Sadeh 1655602adf40SYehuda Sadeh if (!disk) 1656602adf40SYehuda Sadeh return; 1657602adf40SYehuda Sadeh 1658602adf40SYehuda Sadeh rbd_header_free(&rbd_dev->header); 1659602adf40SYehuda Sadeh 1660602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 1661602adf40SYehuda Sadeh del_gendisk(disk); 1662602adf40SYehuda Sadeh if (disk->queue) 1663602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 1664602adf40SYehuda Sadeh put_disk(disk); 1665602adf40SYehuda Sadeh } 1666602adf40SYehuda Sadeh 1667602adf40SYehuda Sadeh /* 16684156d998SAlex Elder * Read the complete header for the given rbd device. 16694156d998SAlex Elder * 16704156d998SAlex Elder * Returns a pointer to a dynamically-allocated buffer containing 16714156d998SAlex Elder * the complete and validated header. Caller can pass the address 16724156d998SAlex Elder * of a variable that will be filled in with the version of the 16734156d998SAlex Elder * header object at the time it was read. 16744156d998SAlex Elder * 16754156d998SAlex Elder * Returns a pointer-coded errno if a failure occurs. 16764156d998SAlex Elder */ 16774156d998SAlex Elder static struct rbd_image_header_ondisk * 16784156d998SAlex Elder rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) 16794156d998SAlex Elder { 16804156d998SAlex Elder struct rbd_image_header_ondisk *ondisk = NULL; 16814156d998SAlex Elder u32 snap_count = 0; 16824156d998SAlex Elder u64 names_size = 0; 16834156d998SAlex Elder u32 want_count; 16844156d998SAlex Elder int ret; 16854156d998SAlex Elder 16864156d998SAlex Elder /* 16874156d998SAlex Elder * The complete header will include an array of its 64-bit 16884156d998SAlex Elder * snapshot ids, followed by the names of those snapshots as 16894156d998SAlex Elder * a contiguous block of NUL-terminated strings. Note that 16904156d998SAlex Elder * the number of snapshots could change by the time we read 16914156d998SAlex Elder * it in, in which case we re-read it. 16924156d998SAlex Elder */ 16934156d998SAlex Elder do { 16944156d998SAlex Elder size_t size; 16954156d998SAlex Elder 16964156d998SAlex Elder kfree(ondisk); 16974156d998SAlex Elder 16984156d998SAlex Elder size = sizeof (*ondisk); 16994156d998SAlex Elder size += snap_count * sizeof (struct rbd_image_snap_ondisk); 17004156d998SAlex Elder size += names_size; 17014156d998SAlex Elder ondisk = kmalloc(size, GFP_KERNEL); 17024156d998SAlex Elder if (!ondisk) 17034156d998SAlex Elder return ERR_PTR(-ENOMEM); 17044156d998SAlex Elder 17054156d998SAlex Elder ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, 17064156d998SAlex Elder rbd_dev->header_name, 17074156d998SAlex Elder 0, size, 17084156d998SAlex Elder (char *) ondisk, version); 17094156d998SAlex Elder 17104156d998SAlex Elder if (ret < 0) 17114156d998SAlex Elder goto out_err; 17124156d998SAlex Elder if (WARN_ON((size_t) ret < size)) { 17134156d998SAlex Elder ret = -ENXIO; 17144156d998SAlex Elder pr_warning("short header read for image %s" 17154156d998SAlex Elder " (want %zd got %d)\n", 17164156d998SAlex Elder rbd_dev->image_name, size, ret); 17174156d998SAlex Elder goto out_err; 17184156d998SAlex Elder } 17194156d998SAlex Elder if (!rbd_dev_ondisk_valid(ondisk)) { 17204156d998SAlex Elder ret = -ENXIO; 17214156d998SAlex Elder pr_warning("invalid header for image %s\n", 17224156d998SAlex Elder rbd_dev->image_name); 17234156d998SAlex Elder goto out_err; 17244156d998SAlex Elder } 17254156d998SAlex Elder 17264156d998SAlex Elder names_size = le64_to_cpu(ondisk->snap_names_len); 17274156d998SAlex Elder want_count = snap_count; 17284156d998SAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 17294156d998SAlex Elder } while (snap_count != want_count); 17304156d998SAlex Elder 17314156d998SAlex Elder return ondisk; 17324156d998SAlex Elder 17334156d998SAlex Elder out_err: 17344156d998SAlex Elder kfree(ondisk); 17354156d998SAlex Elder 17364156d998SAlex Elder return ERR_PTR(ret); 17374156d998SAlex Elder } 17384156d998SAlex Elder 17394156d998SAlex Elder /* 1740602adf40SYehuda Sadeh * reload the ondisk the header 1741602adf40SYehuda Sadeh */ 1742602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 1743602adf40SYehuda Sadeh struct rbd_image_header *header) 1744602adf40SYehuda Sadeh { 17454156d998SAlex Elder struct rbd_image_header_ondisk *ondisk; 17464156d998SAlex Elder u64 ver = 0; 17474156d998SAlex Elder int ret; 1748602adf40SYehuda Sadeh 17494156d998SAlex Elder ondisk = rbd_dev_v1_header_read(rbd_dev, &ver); 17504156d998SAlex Elder if (IS_ERR(ondisk)) 17514156d998SAlex Elder return PTR_ERR(ondisk); 17524156d998SAlex Elder ret = rbd_header_from_disk(header, ondisk); 17534156d998SAlex Elder if (ret >= 0) 175459c2be1eSYehuda Sadeh header->obj_version = ver; 17554156d998SAlex Elder kfree(ondisk); 1756602adf40SYehuda Sadeh 17574156d998SAlex Elder return ret; 1758602adf40SYehuda Sadeh } 1759602adf40SYehuda Sadeh 1760602adf40SYehuda Sadeh /* 1761602adf40SYehuda Sadeh * create a snapshot 1762602adf40SYehuda Sadeh */ 17630ce1a794SAlex Elder static int rbd_header_add_snap(struct rbd_device *rbd_dev, 1764602adf40SYehuda Sadeh const char *snap_name, 1765602adf40SYehuda Sadeh gfp_t gfp_flags) 1766602adf40SYehuda Sadeh { 1767602adf40SYehuda Sadeh int name_len = strlen(snap_name); 1768602adf40SYehuda Sadeh u64 new_snapid; 1769602adf40SYehuda Sadeh int ret; 1770916d4d67SSage Weil void *data, *p, *e; 17711dbb4399SAlex Elder struct ceph_mon_client *monc; 1772602adf40SYehuda Sadeh 1773602adf40SYehuda Sadeh /* we should create a snapshot only if we're pointing at the head */ 1774f84344f3SAlex Elder if (rbd_dev->mapping.snap_id != CEPH_NOSNAP) 1775602adf40SYehuda Sadeh return -EINVAL; 1776602adf40SYehuda Sadeh 17770ce1a794SAlex Elder monc = &rbd_dev->rbd_client->client->monc; 17780ce1a794SAlex Elder ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid); 1779bd919d45SAlex Elder dout("created snapid=%llu\n", (unsigned long long) new_snapid); 1780602adf40SYehuda Sadeh if (ret < 0) 1781602adf40SYehuda Sadeh return ret; 1782602adf40SYehuda Sadeh 1783602adf40SYehuda Sadeh data = kmalloc(name_len + 16, gfp_flags); 1784602adf40SYehuda Sadeh if (!data) 1785602adf40SYehuda Sadeh return -ENOMEM; 1786602adf40SYehuda Sadeh 1787916d4d67SSage Weil p = data; 1788916d4d67SSage Weil e = data + name_len + 16; 1789602adf40SYehuda Sadeh 1790916d4d67SSage Weil ceph_encode_string_safe(&p, e, snap_name, name_len, bad); 1791916d4d67SSage Weil ceph_encode_64_safe(&p, e, new_snapid, bad); 1792602adf40SYehuda Sadeh 17930bed54dcSAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 17940ce1a794SAlex Elder "rbd", "snap_add", 1795d67d4be5SAlex Elder data, p - data, NULL); 1796602adf40SYehuda Sadeh 1797916d4d67SSage Weil kfree(data); 1798602adf40SYehuda Sadeh 1799505cbb9bSAlex Elder return ret < 0 ? ret : 0; 1800602adf40SYehuda Sadeh bad: 1801602adf40SYehuda Sadeh return -ERANGE; 1802602adf40SYehuda Sadeh } 1803602adf40SYehuda Sadeh 1804dfc5606dSYehuda Sadeh static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) 1805dfc5606dSYehuda Sadeh { 1806dfc5606dSYehuda Sadeh struct rbd_snap *snap; 1807a0593290SAlex Elder struct rbd_snap *next; 1808dfc5606dSYehuda Sadeh 1809a0593290SAlex Elder list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) 181014e7085dSAlex Elder __rbd_remove_snap_dev(snap); 1811dfc5606dSYehuda Sadeh } 1812dfc5606dSYehuda Sadeh 1813602adf40SYehuda Sadeh /* 1814602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 1815602adf40SYehuda Sadeh */ 1816b813623aSAlex Elder static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) 1817602adf40SYehuda Sadeh { 1818602adf40SYehuda Sadeh int ret; 1819602adf40SYehuda Sadeh struct rbd_image_header h; 1820602adf40SYehuda Sadeh 1821602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 1822602adf40SYehuda Sadeh if (ret < 0) 1823602adf40SYehuda Sadeh return ret; 1824602adf40SYehuda Sadeh 1825a51aa0c0SJosh Durgin down_write(&rbd_dev->header_rwsem); 1826a51aa0c0SJosh Durgin 18279db4b3e3SSage Weil /* resized? */ 1828f84344f3SAlex Elder if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) { 1829474ef7ceSJosh Durgin sector_t size = (sector_t) h.image_size / SECTOR_SIZE; 1830474ef7ceSJosh Durgin 183199c1f08fSAlex Elder if (size != (sector_t) rbd_dev->mapping.size) { 183299c1f08fSAlex Elder dout("setting size to %llu sectors", 183399c1f08fSAlex Elder (unsigned long long) size); 183499c1f08fSAlex Elder rbd_dev->mapping.size = (u64) size; 1835474ef7ceSJosh Durgin set_capacity(rbd_dev->disk, size); 1836474ef7ceSJosh Durgin } 183799c1f08fSAlex Elder } 18389db4b3e3SSage Weil 1839849b4260SAlex Elder /* rbd_dev->header.object_prefix shouldn't change */ 1840602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 1841849b4260SAlex Elder kfree(rbd_dev->header.snap_names); 1842d1d25646SJosh Durgin /* osd requests may still refer to snapc */ 1843d1d25646SJosh Durgin ceph_put_snap_context(rbd_dev->header.snapc); 1844602adf40SYehuda Sadeh 1845b813623aSAlex Elder if (hver) 1846b813623aSAlex Elder *hver = h.obj_version; 1847a71b891bSJosh Durgin rbd_dev->header.obj_version = h.obj_version; 184893a24e08SJosh Durgin rbd_dev->header.image_size = h.image_size; 1849602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 1850602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 1851602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 1852849b4260SAlex Elder /* Free the extra copy of the object prefix */ 1853849b4260SAlex Elder WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); 1854849b4260SAlex Elder kfree(h.object_prefix); 1855849b4260SAlex Elder 18569fcbb800SAlex Elder ret = rbd_dev_snap_devs_update(rbd_dev); 1857dfc5606dSYehuda Sadeh 1858c666601aSJosh Durgin up_write(&rbd_dev->header_rwsem); 1859602adf40SYehuda Sadeh 1860dfc5606dSYehuda Sadeh return ret; 1861602adf40SYehuda Sadeh } 1862602adf40SYehuda Sadeh 18631fe5e993SAlex Elder static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) 18641fe5e993SAlex Elder { 18651fe5e993SAlex Elder int ret; 18661fe5e993SAlex Elder 18671fe5e993SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 18681fe5e993SAlex Elder ret = __rbd_refresh_header(rbd_dev, hver); 18691fe5e993SAlex Elder mutex_unlock(&ctl_mutex); 18701fe5e993SAlex Elder 18711fe5e993SAlex Elder return ret; 18721fe5e993SAlex Elder } 18731fe5e993SAlex Elder 1874602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 1875602adf40SYehuda Sadeh { 1876602adf40SYehuda Sadeh struct gendisk *disk; 1877602adf40SYehuda Sadeh struct request_queue *q; 1878602adf40SYehuda Sadeh int rc; 1879593a9e7bSAlex Elder u64 segment_size; 1880602adf40SYehuda Sadeh 1881602adf40SYehuda Sadeh /* contact OSD, request size info about the object being mapped */ 1882602adf40SYehuda Sadeh rc = rbd_read_header(rbd_dev, &rbd_dev->header); 1883602adf40SYehuda Sadeh if (rc) 1884602adf40SYehuda Sadeh return rc; 1885602adf40SYehuda Sadeh 1886dfc5606dSYehuda Sadeh /* no need to lock here, as rbd_dev is not registered yet */ 18879fcbb800SAlex Elder rc = rbd_dev_snap_devs_update(rbd_dev); 1888dfc5606dSYehuda Sadeh if (rc) 1889dfc5606dSYehuda Sadeh return rc; 1890dfc5606dSYehuda Sadeh 189199c1f08fSAlex Elder rc = rbd_header_set_snap(rbd_dev); 1892602adf40SYehuda Sadeh if (rc) 1893602adf40SYehuda Sadeh return rc; 1894602adf40SYehuda Sadeh 1895602adf40SYehuda Sadeh /* create gendisk info */ 1896602adf40SYehuda Sadeh rc = -ENOMEM; 1897602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 1898602adf40SYehuda Sadeh if (!disk) 1899602adf40SYehuda Sadeh goto out; 1900602adf40SYehuda Sadeh 1901f0f8cef5SAlex Elder snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 1902de71a297SAlex Elder rbd_dev->dev_id); 1903602adf40SYehuda Sadeh disk->major = rbd_dev->major; 1904602adf40SYehuda Sadeh disk->first_minor = 0; 1905602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 1906602adf40SYehuda Sadeh disk->private_data = rbd_dev; 1907602adf40SYehuda Sadeh 1908602adf40SYehuda Sadeh /* init rq */ 1909602adf40SYehuda Sadeh rc = -ENOMEM; 1910602adf40SYehuda Sadeh q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); 1911602adf40SYehuda Sadeh if (!q) 1912602adf40SYehuda Sadeh goto out_disk; 1913029bcbd8SJosh Durgin 1914593a9e7bSAlex Elder /* We use the default size, but let's be explicit about it. */ 1915593a9e7bSAlex Elder blk_queue_physical_block_size(q, SECTOR_SIZE); 1916593a9e7bSAlex Elder 1917029bcbd8SJosh Durgin /* set io sizes to object size */ 1918593a9e7bSAlex Elder segment_size = rbd_obj_bytes(&rbd_dev->header); 1919593a9e7bSAlex Elder blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 1920593a9e7bSAlex Elder blk_queue_max_segment_size(q, segment_size); 1921593a9e7bSAlex Elder blk_queue_io_min(q, segment_size); 1922593a9e7bSAlex Elder blk_queue_io_opt(q, segment_size); 1923029bcbd8SJosh Durgin 1924602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 1925602adf40SYehuda Sadeh disk->queue = q; 1926602adf40SYehuda Sadeh 1927602adf40SYehuda Sadeh q->queuedata = rbd_dev; 1928602adf40SYehuda Sadeh 1929602adf40SYehuda Sadeh rbd_dev->disk = disk; 1930602adf40SYehuda Sadeh 1931602adf40SYehuda Sadeh /* finally, announce the disk to the world */ 193299c1f08fSAlex Elder set_capacity(disk, (sector_t) rbd_dev->mapping.size / SECTOR_SIZE); 1933602adf40SYehuda Sadeh add_disk(disk); 1934602adf40SYehuda Sadeh 1935602adf40SYehuda Sadeh pr_info("%s: added with size 0x%llx\n", 193699c1f08fSAlex Elder disk->disk_name, (unsigned long long) rbd_dev->mapping.size); 1937602adf40SYehuda Sadeh return 0; 1938602adf40SYehuda Sadeh 1939602adf40SYehuda Sadeh out_disk: 1940602adf40SYehuda Sadeh put_disk(disk); 1941602adf40SYehuda Sadeh out: 1942602adf40SYehuda Sadeh return rc; 1943602adf40SYehuda Sadeh } 1944602adf40SYehuda Sadeh 1945dfc5606dSYehuda Sadeh /* 1946dfc5606dSYehuda Sadeh sysfs 1947dfc5606dSYehuda Sadeh */ 1948602adf40SYehuda Sadeh 1949593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev) 1950593a9e7bSAlex Elder { 1951593a9e7bSAlex Elder return container_of(dev, struct rbd_device, dev); 1952593a9e7bSAlex Elder } 1953593a9e7bSAlex Elder 1954dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 1955dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1956602adf40SYehuda Sadeh { 1957593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1958a51aa0c0SJosh Durgin sector_t size; 1959dfc5606dSYehuda Sadeh 1960a51aa0c0SJosh Durgin down_read(&rbd_dev->header_rwsem); 1961a51aa0c0SJosh Durgin size = get_capacity(rbd_dev->disk); 1962a51aa0c0SJosh Durgin up_read(&rbd_dev->header_rwsem); 1963a51aa0c0SJosh Durgin 1964a51aa0c0SJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); 1965602adf40SYehuda Sadeh } 1966602adf40SYehuda Sadeh 1967dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 1968dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1969602adf40SYehuda Sadeh { 1970593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1971dfc5606dSYehuda Sadeh 1972dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 1973dfc5606dSYehuda Sadeh } 1974dfc5606dSYehuda Sadeh 1975dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 1976dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1977dfc5606dSYehuda Sadeh { 1978593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1979dfc5606dSYehuda Sadeh 19801dbb4399SAlex Elder return sprintf(buf, "client%lld\n", 19811dbb4399SAlex Elder ceph_client_id(rbd_dev->rbd_client->client)); 1982dfc5606dSYehuda Sadeh } 1983dfc5606dSYehuda Sadeh 1984dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 1985dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1986dfc5606dSYehuda Sadeh { 1987593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1988dfc5606dSYehuda Sadeh 1989dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->pool_name); 1990dfc5606dSYehuda Sadeh } 1991dfc5606dSYehuda Sadeh 19929bb2f334SAlex Elder static ssize_t rbd_pool_id_show(struct device *dev, 19939bb2f334SAlex Elder struct device_attribute *attr, char *buf) 19949bb2f334SAlex Elder { 19959bb2f334SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 19969bb2f334SAlex Elder 19979bb2f334SAlex Elder return sprintf(buf, "%d\n", rbd_dev->pool_id); 19989bb2f334SAlex Elder } 19999bb2f334SAlex Elder 2000dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 2001dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2002dfc5606dSYehuda Sadeh { 2003593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2004dfc5606dSYehuda Sadeh 20050bed54dcSAlex Elder return sprintf(buf, "%s\n", rbd_dev->image_name); 2006dfc5606dSYehuda Sadeh } 2007dfc5606dSYehuda Sadeh 2008dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 2009dfc5606dSYehuda Sadeh struct device_attribute *attr, 2010dfc5606dSYehuda Sadeh char *buf) 2011dfc5606dSYehuda Sadeh { 2012593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2013dfc5606dSYehuda Sadeh 2014f84344f3SAlex Elder return sprintf(buf, "%s\n", rbd_dev->mapping.snap_name); 2015dfc5606dSYehuda Sadeh } 2016dfc5606dSYehuda Sadeh 2017dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 2018dfc5606dSYehuda Sadeh struct device_attribute *attr, 2019dfc5606dSYehuda Sadeh const char *buf, 2020dfc5606dSYehuda Sadeh size_t size) 2021dfc5606dSYehuda Sadeh { 2022593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2023b813623aSAlex Elder int ret; 2024602adf40SYehuda Sadeh 20251fe5e993SAlex Elder ret = rbd_refresh_header(rbd_dev, NULL); 2026b813623aSAlex Elder 2027b813623aSAlex Elder return ret < 0 ? ret : size; 2028dfc5606dSYehuda Sadeh } 2029602adf40SYehuda Sadeh 2030dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 2031dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 2032dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 2033dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 20349bb2f334SAlex Elder static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 2035dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 2036dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 2037dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 2038dfc5606dSYehuda Sadeh static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add); 2039dfc5606dSYehuda Sadeh 2040dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 2041dfc5606dSYehuda Sadeh &dev_attr_size.attr, 2042dfc5606dSYehuda Sadeh &dev_attr_major.attr, 2043dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 2044dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 20459bb2f334SAlex Elder &dev_attr_pool_id.attr, 2046dfc5606dSYehuda Sadeh &dev_attr_name.attr, 2047dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 2048dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 2049dfc5606dSYehuda Sadeh &dev_attr_create_snap.attr, 2050dfc5606dSYehuda Sadeh NULL 2051dfc5606dSYehuda Sadeh }; 2052dfc5606dSYehuda Sadeh 2053dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 2054dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 2055dfc5606dSYehuda Sadeh }; 2056dfc5606dSYehuda Sadeh 2057dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 2058dfc5606dSYehuda Sadeh &rbd_attr_group, 2059dfc5606dSYehuda Sadeh NULL 2060dfc5606dSYehuda Sadeh }; 2061dfc5606dSYehuda Sadeh 2062dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 2063dfc5606dSYehuda Sadeh { 2064dfc5606dSYehuda Sadeh } 2065dfc5606dSYehuda Sadeh 2066dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 2067dfc5606dSYehuda Sadeh .name = "rbd", 2068dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 2069dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 2070dfc5606dSYehuda Sadeh }; 2071dfc5606dSYehuda Sadeh 2072dfc5606dSYehuda Sadeh 2073dfc5606dSYehuda Sadeh /* 2074dfc5606dSYehuda Sadeh sysfs - snapshots 2075dfc5606dSYehuda Sadeh */ 2076dfc5606dSYehuda Sadeh 2077dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 2078dfc5606dSYehuda Sadeh struct device_attribute *attr, 2079dfc5606dSYehuda Sadeh char *buf) 2080dfc5606dSYehuda Sadeh { 2081dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2082dfc5606dSYehuda Sadeh 20833591538fSJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long)snap->size); 2084dfc5606dSYehuda Sadeh } 2085dfc5606dSYehuda Sadeh 2086dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 2087dfc5606dSYehuda Sadeh struct device_attribute *attr, 2088dfc5606dSYehuda Sadeh char *buf) 2089dfc5606dSYehuda Sadeh { 2090dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2091dfc5606dSYehuda Sadeh 2092593a9e7bSAlex Elder return sprintf(buf, "%llu\n", (unsigned long long)snap->id); 2093dfc5606dSYehuda Sadeh } 2094dfc5606dSYehuda Sadeh 2095dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 2096dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 2097dfc5606dSYehuda Sadeh 2098dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 2099dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 2100dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 2101dfc5606dSYehuda Sadeh NULL, 2102dfc5606dSYehuda Sadeh }; 2103dfc5606dSYehuda Sadeh 2104dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 2105dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 2106dfc5606dSYehuda Sadeh }; 2107dfc5606dSYehuda Sadeh 2108dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 2109dfc5606dSYehuda Sadeh { 2110dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2111dfc5606dSYehuda Sadeh kfree(snap->name); 2112dfc5606dSYehuda Sadeh kfree(snap); 2113dfc5606dSYehuda Sadeh } 2114dfc5606dSYehuda Sadeh 2115dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 2116dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 2117dfc5606dSYehuda Sadeh NULL 2118dfc5606dSYehuda Sadeh }; 2119dfc5606dSYehuda Sadeh 2120dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 2121dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 2122dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 2123dfc5606dSYehuda Sadeh }; 2124dfc5606dSYehuda Sadeh 212514e7085dSAlex Elder static void __rbd_remove_snap_dev(struct rbd_snap *snap) 2126dfc5606dSYehuda Sadeh { 2127dfc5606dSYehuda Sadeh list_del(&snap->node); 2128dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 2129dfc5606dSYehuda Sadeh } 2130dfc5606dSYehuda Sadeh 213114e7085dSAlex Elder static int rbd_register_snap_dev(struct rbd_snap *snap, 2132dfc5606dSYehuda Sadeh struct device *parent) 2133dfc5606dSYehuda Sadeh { 2134dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 2135dfc5606dSYehuda Sadeh int ret; 2136dfc5606dSYehuda Sadeh 2137dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 2138dfc5606dSYehuda Sadeh dev->parent = parent; 2139dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 2140dfc5606dSYehuda Sadeh dev_set_name(dev, "snap_%s", snap->name); 2141dfc5606dSYehuda Sadeh ret = device_register(dev); 2142dfc5606dSYehuda Sadeh 2143dfc5606dSYehuda Sadeh return ret; 2144dfc5606dSYehuda Sadeh } 2145dfc5606dSYehuda Sadeh 21464e891e0aSAlex Elder static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, 21474e891e0aSAlex Elder int i, const char *name) 2148dfc5606dSYehuda Sadeh { 21494e891e0aSAlex Elder struct rbd_snap *snap; 2150dfc5606dSYehuda Sadeh int ret; 21514e891e0aSAlex Elder 21524e891e0aSAlex Elder snap = kzalloc(sizeof (*snap), GFP_KERNEL); 2153dfc5606dSYehuda Sadeh if (!snap) 21544e891e0aSAlex Elder return ERR_PTR(-ENOMEM); 21554e891e0aSAlex Elder 21564e891e0aSAlex Elder ret = -ENOMEM; 2157dfc5606dSYehuda Sadeh snap->name = kstrdup(name, GFP_KERNEL); 21584e891e0aSAlex Elder if (!snap->name) 21594e891e0aSAlex Elder goto err; 21604e891e0aSAlex Elder 2161dfc5606dSYehuda Sadeh snap->size = rbd_dev->header.snap_sizes[i]; 2162dfc5606dSYehuda Sadeh snap->id = rbd_dev->header.snapc->snaps[i]; 2163dfc5606dSYehuda Sadeh if (device_is_registered(&rbd_dev->dev)) { 216414e7085dSAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 2165dfc5606dSYehuda Sadeh if (ret < 0) 2166dfc5606dSYehuda Sadeh goto err; 2167dfc5606dSYehuda Sadeh } 21684e891e0aSAlex Elder 21694e891e0aSAlex Elder return snap; 21704e891e0aSAlex Elder 2171dfc5606dSYehuda Sadeh err: 2172dfc5606dSYehuda Sadeh kfree(snap->name); 2173dfc5606dSYehuda Sadeh kfree(snap); 21744e891e0aSAlex Elder 21754e891e0aSAlex Elder return ERR_PTR(ret); 2176dfc5606dSYehuda Sadeh } 2177dfc5606dSYehuda Sadeh 2178dfc5606dSYehuda Sadeh /* 217935938150SAlex Elder * Scan the rbd device's current snapshot list and compare it to the 218035938150SAlex Elder * newly-received snapshot context. Remove any existing snapshots 218135938150SAlex Elder * not present in the new snapshot context. Add a new snapshot for 218235938150SAlex Elder * any snaphots in the snapshot context not in the current list. 218335938150SAlex Elder * And verify there are no changes to snapshots we already know 218435938150SAlex Elder * about. 218535938150SAlex Elder * 218635938150SAlex Elder * Assumes the snapshots in the snapshot context are sorted by 218735938150SAlex Elder * snapshot id, highest id first. (Snapshots in the rbd_dev's list 218835938150SAlex Elder * are also maintained in that order.) 2189dfc5606dSYehuda Sadeh */ 21909fcbb800SAlex Elder static int rbd_dev_snap_devs_update(struct rbd_device *rbd_dev) 2191dfc5606dSYehuda Sadeh { 219235938150SAlex Elder struct ceph_snap_context *snapc = rbd_dev->header.snapc; 219335938150SAlex Elder const u32 snap_count = snapc->num_snaps; 219435938150SAlex Elder char *snap_name = rbd_dev->header.snap_names; 219535938150SAlex Elder struct list_head *head = &rbd_dev->snaps; 219635938150SAlex Elder struct list_head *links = head->next; 219735938150SAlex Elder u32 index = 0; 2198dfc5606dSYehuda Sadeh 21999fcbb800SAlex Elder dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count); 220035938150SAlex Elder while (index < snap_count || links != head) { 220135938150SAlex Elder u64 snap_id; 220235938150SAlex Elder struct rbd_snap *snap; 2203dfc5606dSYehuda Sadeh 220435938150SAlex Elder snap_id = index < snap_count ? snapc->snaps[index] 220535938150SAlex Elder : CEPH_NOSNAP; 220635938150SAlex Elder snap = links != head ? list_entry(links, struct rbd_snap, node) 220735938150SAlex Elder : NULL; 2208aafb230eSAlex Elder rbd_assert(!snap || snap->id != CEPH_NOSNAP); 2209dfc5606dSYehuda Sadeh 221035938150SAlex Elder if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { 221135938150SAlex Elder struct list_head *next = links->next; 2212dfc5606dSYehuda Sadeh 221335938150SAlex Elder /* Existing snapshot not in the new snap context */ 2214dfc5606dSYehuda Sadeh 2215f84344f3SAlex Elder if (rbd_dev->mapping.snap_id == snap->id) 2216f84344f3SAlex Elder rbd_dev->mapping.snap_exists = false; 221735938150SAlex Elder __rbd_remove_snap_dev(snap); 22189fcbb800SAlex Elder dout("%ssnap id %llu has been removed\n", 2219f84344f3SAlex Elder rbd_dev->mapping.snap_id == snap->id ? 2220f84344f3SAlex Elder "mapped " : "", 22219fcbb800SAlex Elder (unsigned long long) snap->id); 2222dfc5606dSYehuda Sadeh 222335938150SAlex Elder /* Done with this list entry; advance */ 222435938150SAlex Elder 222535938150SAlex Elder links = next; 222635938150SAlex Elder continue; 2227dfc5606dSYehuda Sadeh } 222835938150SAlex Elder 22299fcbb800SAlex Elder dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count, 22309fcbb800SAlex Elder (unsigned long long) snap_id); 223135938150SAlex Elder if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) { 223235938150SAlex Elder struct rbd_snap *new_snap; 223335938150SAlex Elder 223435938150SAlex Elder /* We haven't seen this snapshot before */ 223535938150SAlex Elder 223635938150SAlex Elder new_snap = __rbd_add_snap_dev(rbd_dev, index, 223735938150SAlex Elder snap_name); 22389fcbb800SAlex Elder if (IS_ERR(new_snap)) { 22399fcbb800SAlex Elder int err = PTR_ERR(new_snap); 22409fcbb800SAlex Elder 22419fcbb800SAlex Elder dout(" failed to add dev, error %d\n", err); 22429fcbb800SAlex Elder 22439fcbb800SAlex Elder return err; 22449fcbb800SAlex Elder } 224535938150SAlex Elder 224635938150SAlex Elder /* New goes before existing, or at end of list */ 224735938150SAlex Elder 22489fcbb800SAlex Elder dout(" added dev%s\n", snap ? "" : " at end\n"); 224935938150SAlex Elder if (snap) 225035938150SAlex Elder list_add_tail(&new_snap->node, &snap->node); 225135938150SAlex Elder else 2252523f3258SAlex Elder list_add_tail(&new_snap->node, head); 225335938150SAlex Elder } else { 225435938150SAlex Elder /* Already have this one */ 225535938150SAlex Elder 22569fcbb800SAlex Elder dout(" already present\n"); 22579fcbb800SAlex Elder 2258aafb230eSAlex Elder rbd_assert(snap->size == 2259aafb230eSAlex Elder rbd_dev->header.snap_sizes[index]); 2260aafb230eSAlex Elder rbd_assert(!strcmp(snap->name, snap_name)); 226135938150SAlex Elder 226235938150SAlex Elder /* Done with this list entry; advance */ 226335938150SAlex Elder 226435938150SAlex Elder links = links->next; 2265dfc5606dSYehuda Sadeh } 226635938150SAlex Elder 226735938150SAlex Elder /* Advance to the next entry in the snapshot context */ 226835938150SAlex Elder 226935938150SAlex Elder index++; 227035938150SAlex Elder snap_name += strlen(snap_name) + 1; 2271dfc5606dSYehuda Sadeh } 22729fcbb800SAlex Elder dout("%s: done\n", __func__); 2273dfc5606dSYehuda Sadeh 2274dfc5606dSYehuda Sadeh return 0; 2275dfc5606dSYehuda Sadeh } 2276dfc5606dSYehuda Sadeh 2277dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2278dfc5606dSYehuda Sadeh { 2279f0f8cef5SAlex Elder int ret; 2280dfc5606dSYehuda Sadeh struct device *dev; 2281dfc5606dSYehuda Sadeh struct rbd_snap *snap; 2282dfc5606dSYehuda Sadeh 2283dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2284dfc5606dSYehuda Sadeh dev = &rbd_dev->dev; 2285dfc5606dSYehuda Sadeh 2286dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 2287dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 2288dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 2289dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 2290de71a297SAlex Elder dev_set_name(dev, "%d", rbd_dev->dev_id); 2291dfc5606dSYehuda Sadeh ret = device_register(dev); 2292dfc5606dSYehuda Sadeh if (ret < 0) 2293f0f8cef5SAlex Elder goto out; 2294dfc5606dSYehuda Sadeh 2295dfc5606dSYehuda Sadeh list_for_each_entry(snap, &rbd_dev->snaps, node) { 229614e7085dSAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 2297dfc5606dSYehuda Sadeh if (ret < 0) 2298602adf40SYehuda Sadeh break; 2299602adf40SYehuda Sadeh } 2300f0f8cef5SAlex Elder out: 2301dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 2302dfc5606dSYehuda Sadeh return ret; 2303602adf40SYehuda Sadeh } 2304602adf40SYehuda Sadeh 2305dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 2306dfc5606dSYehuda Sadeh { 2307dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 2308dfc5606dSYehuda Sadeh } 2309dfc5606dSYehuda Sadeh 231059c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev) 231159c2be1eSYehuda Sadeh { 231259c2be1eSYehuda Sadeh int ret, rc; 231359c2be1eSYehuda Sadeh 231459c2be1eSYehuda Sadeh do { 23150e6f322dSAlex Elder ret = rbd_req_sync_watch(rbd_dev); 231659c2be1eSYehuda Sadeh if (ret == -ERANGE) { 23171fe5e993SAlex Elder rc = rbd_refresh_header(rbd_dev, NULL); 231859c2be1eSYehuda Sadeh if (rc < 0) 231959c2be1eSYehuda Sadeh return rc; 232059c2be1eSYehuda Sadeh } 232159c2be1eSYehuda Sadeh } while (ret == -ERANGE); 232259c2be1eSYehuda Sadeh 232359c2be1eSYehuda Sadeh return ret; 232459c2be1eSYehuda Sadeh } 232559c2be1eSYehuda Sadeh 2326e2839308SAlex Elder static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); 23271ddbe94eSAlex Elder 23281ddbe94eSAlex Elder /* 2329499afd5bSAlex Elder * Get a unique rbd identifier for the given new rbd_dev, and add 2330499afd5bSAlex Elder * the rbd_dev to the global list. The minimum rbd id is 1. 23311ddbe94eSAlex Elder */ 2332e2839308SAlex Elder static void rbd_dev_id_get(struct rbd_device *rbd_dev) 2333b7f23c36SAlex Elder { 2334e2839308SAlex Elder rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max); 2335499afd5bSAlex Elder 2336499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2337499afd5bSAlex Elder list_add_tail(&rbd_dev->node, &rbd_dev_list); 2338499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 2339e2839308SAlex Elder dout("rbd_dev %p given dev id %llu\n", rbd_dev, 2340e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 2341b7f23c36SAlex Elder } 2342b7f23c36SAlex Elder 23431ddbe94eSAlex Elder /* 2344499afd5bSAlex Elder * Remove an rbd_dev from the global list, and record that its 2345499afd5bSAlex Elder * identifier is no longer in use. 23461ddbe94eSAlex Elder */ 2347e2839308SAlex Elder static void rbd_dev_id_put(struct rbd_device *rbd_dev) 23481ddbe94eSAlex Elder { 2349d184f6bfSAlex Elder struct list_head *tmp; 2350de71a297SAlex Elder int rbd_id = rbd_dev->dev_id; 2351d184f6bfSAlex Elder int max_id; 2352d184f6bfSAlex Elder 2353aafb230eSAlex Elder rbd_assert(rbd_id > 0); 2354499afd5bSAlex Elder 2355e2839308SAlex Elder dout("rbd_dev %p released dev id %llu\n", rbd_dev, 2356e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 2357499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2358499afd5bSAlex Elder list_del_init(&rbd_dev->node); 2359d184f6bfSAlex Elder 2360d184f6bfSAlex Elder /* 2361d184f6bfSAlex Elder * If the id being "put" is not the current maximum, there 2362d184f6bfSAlex Elder * is nothing special we need to do. 2363d184f6bfSAlex Elder */ 2364e2839308SAlex Elder if (rbd_id != atomic64_read(&rbd_dev_id_max)) { 2365d184f6bfSAlex Elder spin_unlock(&rbd_dev_list_lock); 2366d184f6bfSAlex Elder return; 2367d184f6bfSAlex Elder } 2368d184f6bfSAlex Elder 2369d184f6bfSAlex Elder /* 2370d184f6bfSAlex Elder * We need to update the current maximum id. Search the 2371d184f6bfSAlex Elder * list to find out what it is. We're more likely to find 2372d184f6bfSAlex Elder * the maximum at the end, so search the list backward. 2373d184f6bfSAlex Elder */ 2374d184f6bfSAlex Elder max_id = 0; 2375d184f6bfSAlex Elder list_for_each_prev(tmp, &rbd_dev_list) { 2376d184f6bfSAlex Elder struct rbd_device *rbd_dev; 2377d184f6bfSAlex Elder 2378d184f6bfSAlex Elder rbd_dev = list_entry(tmp, struct rbd_device, node); 2379d184f6bfSAlex Elder if (rbd_id > max_id) 2380d184f6bfSAlex Elder max_id = rbd_id; 2381d184f6bfSAlex Elder } 2382499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 23831ddbe94eSAlex Elder 23841ddbe94eSAlex Elder /* 2385e2839308SAlex Elder * The max id could have been updated by rbd_dev_id_get(), in 2386d184f6bfSAlex Elder * which case it now accurately reflects the new maximum. 2387d184f6bfSAlex Elder * Be careful not to overwrite the maximum value in that 2388d184f6bfSAlex Elder * case. 23891ddbe94eSAlex Elder */ 2390e2839308SAlex Elder atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id); 2391e2839308SAlex Elder dout(" max dev id has been reset\n"); 2392b7f23c36SAlex Elder } 2393b7f23c36SAlex Elder 2394a725f65eSAlex Elder /* 2395e28fff26SAlex Elder * Skips over white space at *buf, and updates *buf to point to the 2396e28fff26SAlex Elder * first found non-space character (if any). Returns the length of 2397593a9e7bSAlex Elder * the token (string of non-white space characters) found. Note 2398593a9e7bSAlex Elder * that *buf must be terminated with '\0'. 2399e28fff26SAlex Elder */ 2400e28fff26SAlex Elder static inline size_t next_token(const char **buf) 2401e28fff26SAlex Elder { 2402e28fff26SAlex Elder /* 2403e28fff26SAlex Elder * These are the characters that produce nonzero for 2404e28fff26SAlex Elder * isspace() in the "C" and "POSIX" locales. 2405e28fff26SAlex Elder */ 2406e28fff26SAlex Elder const char *spaces = " \f\n\r\t\v"; 2407e28fff26SAlex Elder 2408e28fff26SAlex Elder *buf += strspn(*buf, spaces); /* Find start of token */ 2409e28fff26SAlex Elder 2410e28fff26SAlex Elder return strcspn(*buf, spaces); /* Return token length */ 2411e28fff26SAlex Elder } 2412e28fff26SAlex Elder 2413e28fff26SAlex Elder /* 2414e28fff26SAlex Elder * Finds the next token in *buf, and if the provided token buffer is 2415e28fff26SAlex Elder * big enough, copies the found token into it. The result, if 2416593a9e7bSAlex Elder * copied, is guaranteed to be terminated with '\0'. Note that *buf 2417593a9e7bSAlex Elder * must be terminated with '\0' on entry. 2418e28fff26SAlex Elder * 2419e28fff26SAlex Elder * Returns the length of the token found (not including the '\0'). 2420e28fff26SAlex Elder * Return value will be 0 if no token is found, and it will be >= 2421e28fff26SAlex Elder * token_size if the token would not fit. 2422e28fff26SAlex Elder * 2423593a9e7bSAlex Elder * The *buf pointer will be updated to point beyond the end of the 2424e28fff26SAlex Elder * found token. Note that this occurs even if the token buffer is 2425e28fff26SAlex Elder * too small to hold it. 2426e28fff26SAlex Elder */ 2427e28fff26SAlex Elder static inline size_t copy_token(const char **buf, 2428e28fff26SAlex Elder char *token, 2429e28fff26SAlex Elder size_t token_size) 2430e28fff26SAlex Elder { 2431e28fff26SAlex Elder size_t len; 2432e28fff26SAlex Elder 2433e28fff26SAlex Elder len = next_token(buf); 2434e28fff26SAlex Elder if (len < token_size) { 2435e28fff26SAlex Elder memcpy(token, *buf, len); 2436e28fff26SAlex Elder *(token + len) = '\0'; 2437e28fff26SAlex Elder } 2438e28fff26SAlex Elder *buf += len; 2439e28fff26SAlex Elder 2440e28fff26SAlex Elder return len; 2441e28fff26SAlex Elder } 2442e28fff26SAlex Elder 2443e28fff26SAlex Elder /* 2444ea3352f4SAlex Elder * Finds the next token in *buf, dynamically allocates a buffer big 2445ea3352f4SAlex Elder * enough to hold a copy of it, and copies the token into the new 2446ea3352f4SAlex Elder * buffer. The copy is guaranteed to be terminated with '\0'. Note 2447ea3352f4SAlex Elder * that a duplicate buffer is created even for a zero-length token. 2448ea3352f4SAlex Elder * 2449ea3352f4SAlex Elder * Returns a pointer to the newly-allocated duplicate, or a null 2450ea3352f4SAlex Elder * pointer if memory for the duplicate was not available. If 2451ea3352f4SAlex Elder * the lenp argument is a non-null pointer, the length of the token 2452ea3352f4SAlex Elder * (not including the '\0') is returned in *lenp. 2453ea3352f4SAlex Elder * 2454ea3352f4SAlex Elder * If successful, the *buf pointer will be updated to point beyond 2455ea3352f4SAlex Elder * the end of the found token. 2456ea3352f4SAlex Elder * 2457ea3352f4SAlex Elder * Note: uses GFP_KERNEL for allocation. 2458ea3352f4SAlex Elder */ 2459ea3352f4SAlex Elder static inline char *dup_token(const char **buf, size_t *lenp) 2460ea3352f4SAlex Elder { 2461ea3352f4SAlex Elder char *dup; 2462ea3352f4SAlex Elder size_t len; 2463ea3352f4SAlex Elder 2464ea3352f4SAlex Elder len = next_token(buf); 2465ea3352f4SAlex Elder dup = kmalloc(len + 1, GFP_KERNEL); 2466ea3352f4SAlex Elder if (!dup) 2467ea3352f4SAlex Elder return NULL; 2468ea3352f4SAlex Elder 2469ea3352f4SAlex Elder memcpy(dup, *buf, len); 2470ea3352f4SAlex Elder *(dup + len) = '\0'; 2471ea3352f4SAlex Elder *buf += len; 2472ea3352f4SAlex Elder 2473ea3352f4SAlex Elder if (lenp) 2474ea3352f4SAlex Elder *lenp = len; 2475ea3352f4SAlex Elder 2476ea3352f4SAlex Elder return dup; 2477ea3352f4SAlex Elder } 2478ea3352f4SAlex Elder 2479ea3352f4SAlex Elder /* 24800bed54dcSAlex Elder * This fills in the pool_name, image_name, image_name_len, snap_name, 2481a725f65eSAlex Elder * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based 2482a725f65eSAlex Elder * on the list of monitor addresses and other options provided via 2483a725f65eSAlex Elder * /sys/bus/rbd/add. 2484d22f76e7SAlex Elder * 2485d22f76e7SAlex Elder * Note: rbd_dev is assumed to have been initially zero-filled. 2486a725f65eSAlex Elder */ 2487a725f65eSAlex Elder static int rbd_add_parse_args(struct rbd_device *rbd_dev, 2488a725f65eSAlex Elder const char *buf, 24897ef3214aSAlex Elder const char **mon_addrs, 24905214ecc4SAlex Elder size_t *mon_addrs_size, 2491e28fff26SAlex Elder char *options, 2492e28fff26SAlex Elder size_t options_size) 2493a725f65eSAlex Elder { 2494e28fff26SAlex Elder size_t len; 2495d22f76e7SAlex Elder int ret; 2496e28fff26SAlex Elder 2497e28fff26SAlex Elder /* The first four tokens are required */ 2498e28fff26SAlex Elder 24997ef3214aSAlex Elder len = next_token(&buf); 25007ef3214aSAlex Elder if (!len) 2501a725f65eSAlex Elder return -EINVAL; 25025214ecc4SAlex Elder *mon_addrs_size = len + 1; 25037ef3214aSAlex Elder *mon_addrs = buf; 25047ef3214aSAlex Elder 25057ef3214aSAlex Elder buf += len; 2506a725f65eSAlex Elder 2507e28fff26SAlex Elder len = copy_token(&buf, options, options_size); 2508e28fff26SAlex Elder if (!len || len >= options_size) 2509e28fff26SAlex Elder return -EINVAL; 2510a725f65eSAlex Elder 2511bf3e5ae1SAlex Elder ret = -ENOMEM; 2512d22f76e7SAlex Elder rbd_dev->pool_name = dup_token(&buf, NULL); 2513d22f76e7SAlex Elder if (!rbd_dev->pool_name) 2514d22f76e7SAlex Elder goto out_err; 2515e28fff26SAlex Elder 25160bed54dcSAlex Elder rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); 25170bed54dcSAlex Elder if (!rbd_dev->image_name) 2518bf3e5ae1SAlex Elder goto out_err; 2519e28fff26SAlex Elder 2520cb8627c7SAlex Elder /* Create the name of the header object */ 2521cb8627c7SAlex Elder 25220bed54dcSAlex Elder rbd_dev->header_name = kmalloc(rbd_dev->image_name_len 2523bf3e5ae1SAlex Elder + sizeof (RBD_SUFFIX), 2524bf3e5ae1SAlex Elder GFP_KERNEL); 25250bed54dcSAlex Elder if (!rbd_dev->header_name) 2526cb8627c7SAlex Elder goto out_err; 25270bed54dcSAlex Elder sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); 2528a725f65eSAlex Elder 2529e28fff26SAlex Elder /* 2530820a5f3eSAlex Elder * The snapshot name is optional. If none is is supplied, 2531820a5f3eSAlex Elder * we use the default value. 2532e28fff26SAlex Elder */ 2533f84344f3SAlex Elder rbd_dev->mapping.snap_name = dup_token(&buf, &len); 2534f84344f3SAlex Elder if (!rbd_dev->mapping.snap_name) 2535820a5f3eSAlex Elder goto out_err; 2536820a5f3eSAlex Elder if (!len) { 2537820a5f3eSAlex Elder /* Replace the empty name with the default */ 2538f84344f3SAlex Elder kfree(rbd_dev->mapping.snap_name); 2539f84344f3SAlex Elder rbd_dev->mapping.snap_name 2540820a5f3eSAlex Elder = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL); 2541f84344f3SAlex Elder if (!rbd_dev->mapping.snap_name) 2542820a5f3eSAlex Elder goto out_err; 2543820a5f3eSAlex Elder 2544f84344f3SAlex Elder memcpy(rbd_dev->mapping.snap_name, RBD_SNAP_HEAD_NAME, 2545e28fff26SAlex Elder sizeof (RBD_SNAP_HEAD_NAME)); 2546849b4260SAlex Elder } 2547e28fff26SAlex Elder 2548a725f65eSAlex Elder return 0; 2549d22f76e7SAlex Elder 2550d22f76e7SAlex Elder out_err: 25510bed54dcSAlex Elder kfree(rbd_dev->header_name); 2552d78fd7aeSAlex Elder rbd_dev->header_name = NULL; 25530bed54dcSAlex Elder kfree(rbd_dev->image_name); 2554d78fd7aeSAlex Elder rbd_dev->image_name = NULL; 2555d78fd7aeSAlex Elder rbd_dev->image_name_len = 0; 2556d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 2557d22f76e7SAlex Elder rbd_dev->pool_name = NULL; 2558d22f76e7SAlex Elder 2559d22f76e7SAlex Elder return ret; 2560a725f65eSAlex Elder } 2561a725f65eSAlex Elder 256259c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 256359c2be1eSYehuda Sadeh const char *buf, 256459c2be1eSYehuda Sadeh size_t count) 2565602adf40SYehuda Sadeh { 2566cb8627c7SAlex Elder char *options; 2567cb8627c7SAlex Elder struct rbd_device *rbd_dev = NULL; 25687ef3214aSAlex Elder const char *mon_addrs = NULL; 25697ef3214aSAlex Elder size_t mon_addrs_size = 0; 257027cc2594SAlex Elder struct ceph_osd_client *osdc; 257127cc2594SAlex Elder int rc = -ENOMEM; 2572602adf40SYehuda Sadeh 2573602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 2574602adf40SYehuda Sadeh return -ENODEV; 2575602adf40SYehuda Sadeh 257627cc2594SAlex Elder options = kmalloc(count, GFP_KERNEL); 257727cc2594SAlex Elder if (!options) 257827cc2594SAlex Elder goto err_nomem; 2579cb8627c7SAlex Elder rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 2580cb8627c7SAlex Elder if (!rbd_dev) 2581cb8627c7SAlex Elder goto err_nomem; 2582602adf40SYehuda Sadeh 2583602adf40SYehuda Sadeh /* static rbd_device initialization */ 2584602adf40SYehuda Sadeh spin_lock_init(&rbd_dev->lock); 2585602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->node); 2586dfc5606dSYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->snaps); 2587c666601aSJosh Durgin init_rwsem(&rbd_dev->header_rwsem); 2588602adf40SYehuda Sadeh 2589d184f6bfSAlex Elder /* generate unique id: find highest unique id, add one */ 2590e2839308SAlex Elder rbd_dev_id_get(rbd_dev); 2591602adf40SYehuda Sadeh 2592a725f65eSAlex Elder /* Fill in the device name, now that we have its id. */ 259381a89793SAlex Elder BUILD_BUG_ON(DEV_NAME_LEN 259481a89793SAlex Elder < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 2595de71a297SAlex Elder sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 2596e124a82fSAlex Elder 2597a725f65eSAlex Elder /* parse add command */ 25987ef3214aSAlex Elder rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, 2599e28fff26SAlex Elder options, count); 2600a725f65eSAlex Elder if (rc) 2601a725f65eSAlex Elder goto err_put_id; 2602a725f65eSAlex Elder 2603f8c38929SAlex Elder rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options); 2604f8c38929SAlex Elder if (rc < 0) 2605f0f8cef5SAlex Elder goto err_put_id; 2606602adf40SYehuda Sadeh 2607602adf40SYehuda Sadeh /* pick the pool */ 26081dbb4399SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 2609602adf40SYehuda Sadeh rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); 2610602adf40SYehuda Sadeh if (rc < 0) 2611602adf40SYehuda Sadeh goto err_out_client; 26129bb2f334SAlex Elder rbd_dev->pool_id = rc; 2613602adf40SYehuda Sadeh 2614602adf40SYehuda Sadeh /* register our block device */ 261527cc2594SAlex Elder rc = register_blkdev(0, rbd_dev->name); 261627cc2594SAlex Elder if (rc < 0) 2617602adf40SYehuda Sadeh goto err_out_client; 261827cc2594SAlex Elder rbd_dev->major = rc; 2619602adf40SYehuda Sadeh 2620dfc5606dSYehuda Sadeh rc = rbd_bus_add_dev(rbd_dev); 2621dfc5606dSYehuda Sadeh if (rc) 2622766fc439SYehuda Sadeh goto err_out_blkdev; 2623766fc439SYehuda Sadeh 262432eec68dSAlex Elder /* 262532eec68dSAlex Elder * At this point cleanup in the event of an error is the job 262632eec68dSAlex Elder * of the sysfs code (initiated by rbd_bus_del_dev()). 262732eec68dSAlex Elder * 262832eec68dSAlex Elder * Set up and announce blkdev mapping. 262932eec68dSAlex Elder */ 2630602adf40SYehuda Sadeh rc = rbd_init_disk(rbd_dev); 2631602adf40SYehuda Sadeh if (rc) 2632766fc439SYehuda Sadeh goto err_out_bus; 2633602adf40SYehuda Sadeh 263459c2be1eSYehuda Sadeh rc = rbd_init_watch_dev(rbd_dev); 263559c2be1eSYehuda Sadeh if (rc) 263659c2be1eSYehuda Sadeh goto err_out_bus; 263759c2be1eSYehuda Sadeh 2638602adf40SYehuda Sadeh return count; 2639602adf40SYehuda Sadeh 2640766fc439SYehuda Sadeh err_out_bus: 2641766fc439SYehuda Sadeh /* this will also clean up rest of rbd_dev stuff */ 2642766fc439SYehuda Sadeh 2643766fc439SYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2644766fc439SYehuda Sadeh kfree(options); 2645766fc439SYehuda Sadeh return rc; 2646766fc439SYehuda Sadeh 2647602adf40SYehuda Sadeh err_out_blkdev: 2648602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 2649602adf40SYehuda Sadeh err_out_client: 2650602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2651f0f8cef5SAlex Elder err_put_id: 2652cb8627c7SAlex Elder if (rbd_dev->pool_name) { 2653f84344f3SAlex Elder kfree(rbd_dev->mapping.snap_name); 26540bed54dcSAlex Elder kfree(rbd_dev->header_name); 26550bed54dcSAlex Elder kfree(rbd_dev->image_name); 2656d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 2657cb8627c7SAlex Elder } 2658e2839308SAlex Elder rbd_dev_id_put(rbd_dev); 265927cc2594SAlex Elder err_nomem: 266027cc2594SAlex Elder kfree(rbd_dev); 2661cb8627c7SAlex Elder kfree(options); 266227cc2594SAlex Elder 2663602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 2664602adf40SYehuda Sadeh module_put(THIS_MODULE); 266527cc2594SAlex Elder 266627cc2594SAlex Elder return (ssize_t) rc; 2667602adf40SYehuda Sadeh } 2668602adf40SYehuda Sadeh 2669de71a297SAlex Elder static struct rbd_device *__rbd_get_dev(unsigned long dev_id) 2670602adf40SYehuda Sadeh { 2671602adf40SYehuda Sadeh struct list_head *tmp; 2672602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 2673602adf40SYehuda Sadeh 2674e124a82fSAlex Elder spin_lock(&rbd_dev_list_lock); 2675602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 2676602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 2677de71a297SAlex Elder if (rbd_dev->dev_id == dev_id) { 2678e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2679602adf40SYehuda Sadeh return rbd_dev; 2680602adf40SYehuda Sadeh } 2681e124a82fSAlex Elder } 2682e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2683602adf40SYehuda Sadeh return NULL; 2684602adf40SYehuda Sadeh } 2685602adf40SYehuda Sadeh 2686dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 2687602adf40SYehuda Sadeh { 2688593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2689602adf40SYehuda Sadeh 26901dbb4399SAlex Elder if (rbd_dev->watch_request) { 26911dbb4399SAlex Elder struct ceph_client *client = rbd_dev->rbd_client->client; 26921dbb4399SAlex Elder 26931dbb4399SAlex Elder ceph_osdc_unregister_linger_request(&client->osdc, 269459c2be1eSYehuda Sadeh rbd_dev->watch_request); 26951dbb4399SAlex Elder } 269659c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 2697070c633fSAlex Elder rbd_req_sync_unwatch(rbd_dev); 269859c2be1eSYehuda Sadeh 2699602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2700602adf40SYehuda Sadeh 2701602adf40SYehuda Sadeh /* clean up and free blkdev */ 2702602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 2703602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 270432eec68dSAlex Elder 270532eec68dSAlex Elder /* done with the id, and with the rbd_dev */ 2706f84344f3SAlex Elder kfree(rbd_dev->mapping.snap_name); 27070bed54dcSAlex Elder kfree(rbd_dev->header_name); 2708d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 27090bed54dcSAlex Elder kfree(rbd_dev->image_name); 2710e2839308SAlex Elder rbd_dev_id_put(rbd_dev); 2711602adf40SYehuda Sadeh kfree(rbd_dev); 2712602adf40SYehuda Sadeh 2713602adf40SYehuda Sadeh /* release module ref */ 2714602adf40SYehuda Sadeh module_put(THIS_MODULE); 2715602adf40SYehuda Sadeh } 2716602adf40SYehuda Sadeh 2717dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 2718602adf40SYehuda Sadeh const char *buf, 2719602adf40SYehuda Sadeh size_t count) 2720602adf40SYehuda Sadeh { 2721602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 2722602adf40SYehuda Sadeh int target_id, rc; 2723602adf40SYehuda Sadeh unsigned long ul; 2724602adf40SYehuda Sadeh int ret = count; 2725602adf40SYehuda Sadeh 2726602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 2727602adf40SYehuda Sadeh if (rc) 2728602adf40SYehuda Sadeh return rc; 2729602adf40SYehuda Sadeh 2730602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 2731602adf40SYehuda Sadeh target_id = (int) ul; 2732602adf40SYehuda Sadeh if (target_id != ul) 2733602adf40SYehuda Sadeh return -EINVAL; 2734602adf40SYehuda Sadeh 2735602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2736602adf40SYehuda Sadeh 2737602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 2738602adf40SYehuda Sadeh if (!rbd_dev) { 2739602adf40SYehuda Sadeh ret = -ENOENT; 2740602adf40SYehuda Sadeh goto done; 2741602adf40SYehuda Sadeh } 2742602adf40SYehuda Sadeh 2743dfc5606dSYehuda Sadeh __rbd_remove_all_snaps(rbd_dev); 2744dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2745602adf40SYehuda Sadeh 2746602adf40SYehuda Sadeh done: 2747602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2748aafb230eSAlex Elder 2749602adf40SYehuda Sadeh return ret; 2750602adf40SYehuda Sadeh } 2751602adf40SYehuda Sadeh 2752dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 2753dfc5606dSYehuda Sadeh struct device_attribute *attr, 2754602adf40SYehuda Sadeh const char *buf, 2755602adf40SYehuda Sadeh size_t count) 2756602adf40SYehuda Sadeh { 2757593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2758dfc5606dSYehuda Sadeh int ret; 2759dfc5606dSYehuda Sadeh char *name = kmalloc(count + 1, GFP_KERNEL); 2760602adf40SYehuda Sadeh if (!name) 2761602adf40SYehuda Sadeh return -ENOMEM; 2762602adf40SYehuda Sadeh 2763dfc5606dSYehuda Sadeh snprintf(name, count, "%s", buf); 2764602adf40SYehuda Sadeh 2765602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2766602adf40SYehuda Sadeh 2767602adf40SYehuda Sadeh ret = rbd_header_add_snap(rbd_dev, 2768602adf40SYehuda Sadeh name, GFP_KERNEL); 2769602adf40SYehuda Sadeh if (ret < 0) 277059c2be1eSYehuda Sadeh goto err_unlock; 2771602adf40SYehuda Sadeh 2772b813623aSAlex Elder ret = __rbd_refresh_header(rbd_dev, NULL); 2773602adf40SYehuda Sadeh if (ret < 0) 277459c2be1eSYehuda Sadeh goto err_unlock; 277559c2be1eSYehuda Sadeh 277659c2be1eSYehuda Sadeh /* shouldn't hold ctl_mutex when notifying.. notify might 277759c2be1eSYehuda Sadeh trigger a watch callback that would need to get that mutex */ 277859c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 277959c2be1eSYehuda Sadeh 278059c2be1eSYehuda Sadeh /* make a best effort, don't error if failed */ 27814cb16250SAlex Elder rbd_req_sync_notify(rbd_dev); 2782602adf40SYehuda Sadeh 2783602adf40SYehuda Sadeh ret = count; 278459c2be1eSYehuda Sadeh kfree(name); 278559c2be1eSYehuda Sadeh return ret; 278659c2be1eSYehuda Sadeh 278759c2be1eSYehuda Sadeh err_unlock: 2788602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2789602adf40SYehuda Sadeh kfree(name); 2790602adf40SYehuda Sadeh return ret; 2791602adf40SYehuda Sadeh } 2792602adf40SYehuda Sadeh 2793602adf40SYehuda Sadeh /* 2794602adf40SYehuda Sadeh * create control files in sysfs 2795dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 2796602adf40SYehuda Sadeh */ 2797602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 2798602adf40SYehuda Sadeh { 2799dfc5606dSYehuda Sadeh int ret; 2800602adf40SYehuda Sadeh 2801fed4c143SAlex Elder ret = device_register(&rbd_root_dev); 2802dfc5606dSYehuda Sadeh if (ret < 0) 2803dfc5606dSYehuda Sadeh return ret; 2804602adf40SYehuda Sadeh 2805fed4c143SAlex Elder ret = bus_register(&rbd_bus_type); 2806fed4c143SAlex Elder if (ret < 0) 2807fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2808602adf40SYehuda Sadeh 2809602adf40SYehuda Sadeh return ret; 2810602adf40SYehuda Sadeh } 2811602adf40SYehuda Sadeh 2812602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 2813602adf40SYehuda Sadeh { 2814dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 2815fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2816602adf40SYehuda Sadeh } 2817602adf40SYehuda Sadeh 2818602adf40SYehuda Sadeh int __init rbd_init(void) 2819602adf40SYehuda Sadeh { 2820602adf40SYehuda Sadeh int rc; 2821602adf40SYehuda Sadeh 2822602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 2823602adf40SYehuda Sadeh if (rc) 2824602adf40SYehuda Sadeh return rc; 2825f0f8cef5SAlex Elder pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 2826602adf40SYehuda Sadeh return 0; 2827602adf40SYehuda Sadeh } 2828602adf40SYehuda Sadeh 2829602adf40SYehuda Sadeh void __exit rbd_exit(void) 2830602adf40SYehuda Sadeh { 2831602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 2832602adf40SYehuda Sadeh } 2833602adf40SYehuda Sadeh 2834602adf40SYehuda Sadeh module_init(rbd_init); 2835602adf40SYehuda Sadeh module_exit(rbd_exit); 2836602adf40SYehuda Sadeh 2837602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 2838602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 2839602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 2840602adf40SYehuda Sadeh 2841602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 2842602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 2843602adf40SYehuda Sadeh 2844602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 2845