1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44aafb230eSAlex Elder #define RBD_DEBUG /* Activate rbd_assert() calls */ 45aafb230eSAlex Elder 46593a9e7bSAlex Elder /* 47593a9e7bSAlex Elder * The basic unit of block I/O is a sector. It is interpreted in a 48593a9e7bSAlex Elder * number of contexts in Linux (blk, bio, genhd), but the default is 49593a9e7bSAlex Elder * universally 512 bytes. These symbols are just slightly more 50593a9e7bSAlex Elder * meaningful than the bare numbers they represent. 51593a9e7bSAlex Elder */ 52593a9e7bSAlex Elder #define SECTOR_SHIFT 9 53593a9e7bSAlex Elder #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 54593a9e7bSAlex Elder 55df111be6SAlex Elder /* It might be useful to have this defined elsewhere too */ 56df111be6SAlex Elder 57df111be6SAlex Elder #define U64_MAX ((u64) (~0ULL)) 58df111be6SAlex Elder 59f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd" 60f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)" 61602adf40SYehuda Sadeh 62602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 63602adf40SYehuda Sadeh 64602adf40SYehuda Sadeh #define RBD_MAX_SNAP_NAME_LEN 32 65602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN 1024 66602adf40SYehuda Sadeh 67602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 68602adf40SYehuda Sadeh 6981a89793SAlex Elder /* 7081a89793SAlex Elder * An RBD device name will be "rbd#", where the "rbd" comes from 7181a89793SAlex Elder * RBD_DRV_NAME above, and # is a unique integer identifier. 7281a89793SAlex Elder * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big 7381a89793SAlex Elder * enough to hold all possible device names. 7481a89793SAlex Elder */ 75602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 7681a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 77602adf40SYehuda Sadeh 78cc0538b6SAlex Elder #define RBD_READ_ONLY_DEFAULT false 7959c2be1eSYehuda Sadeh 80602adf40SYehuda Sadeh /* 81602adf40SYehuda Sadeh * block device image metadata (in-memory version) 82602adf40SYehuda Sadeh */ 83602adf40SYehuda Sadeh struct rbd_image_header { 84602adf40SYehuda Sadeh u64 image_size; 85849b4260SAlex Elder char *object_prefix; 86602adf40SYehuda Sadeh __u8 obj_order; 87602adf40SYehuda Sadeh __u8 crypt_type; 88602adf40SYehuda Sadeh __u8 comp_type; 89602adf40SYehuda Sadeh struct ceph_snap_context *snapc; 90602adf40SYehuda Sadeh u32 total_snaps; 91602adf40SYehuda Sadeh 92602adf40SYehuda Sadeh char *snap_names; 93602adf40SYehuda Sadeh u64 *snap_sizes; 9459c2be1eSYehuda Sadeh 9559c2be1eSYehuda Sadeh u64 obj_version; 9659c2be1eSYehuda Sadeh }; 9759c2be1eSYehuda Sadeh 9859c2be1eSYehuda Sadeh struct rbd_options { 99cc0538b6SAlex Elder bool read_only; 100602adf40SYehuda Sadeh }; 101602adf40SYehuda Sadeh 102602adf40SYehuda Sadeh /* 103f0f8cef5SAlex Elder * an instance of the client. multiple devices may share an rbd client. 104602adf40SYehuda Sadeh */ 105602adf40SYehuda Sadeh struct rbd_client { 106602adf40SYehuda Sadeh struct ceph_client *client; 107602adf40SYehuda Sadeh struct kref kref; 108602adf40SYehuda Sadeh struct list_head node; 109602adf40SYehuda Sadeh }; 110602adf40SYehuda Sadeh 111602adf40SYehuda Sadeh /* 112f0f8cef5SAlex Elder * a request completion status 113602adf40SYehuda Sadeh */ 1141fec7093SYehuda Sadeh struct rbd_req_status { 1151fec7093SYehuda Sadeh int done; 1161fec7093SYehuda Sadeh int rc; 1171fec7093SYehuda Sadeh u64 bytes; 1181fec7093SYehuda Sadeh }; 1191fec7093SYehuda Sadeh 1201fec7093SYehuda Sadeh /* 1211fec7093SYehuda Sadeh * a collection of requests 1221fec7093SYehuda Sadeh */ 1231fec7093SYehuda Sadeh struct rbd_req_coll { 1241fec7093SYehuda Sadeh int total; 1251fec7093SYehuda Sadeh int num_done; 1261fec7093SYehuda Sadeh struct kref kref; 1271fec7093SYehuda Sadeh struct rbd_req_status status[0]; 128602adf40SYehuda Sadeh }; 129602adf40SYehuda Sadeh 130f0f8cef5SAlex Elder /* 131f0f8cef5SAlex Elder * a single io request 132f0f8cef5SAlex Elder */ 133f0f8cef5SAlex Elder struct rbd_request { 134f0f8cef5SAlex Elder struct request *rq; /* blk layer request */ 135f0f8cef5SAlex Elder struct bio *bio; /* cloned bio */ 136f0f8cef5SAlex Elder struct page **pages; /* list of used pages */ 137f0f8cef5SAlex Elder u64 len; 138f0f8cef5SAlex Elder int coll_index; 139f0f8cef5SAlex Elder struct rbd_req_coll *coll; 140f0f8cef5SAlex Elder }; 141f0f8cef5SAlex Elder 142dfc5606dSYehuda Sadeh struct rbd_snap { 143dfc5606dSYehuda Sadeh struct device dev; 144dfc5606dSYehuda Sadeh const char *name; 1453591538fSJosh Durgin u64 size; 146dfc5606dSYehuda Sadeh struct list_head node; 147dfc5606dSYehuda Sadeh u64 id; 148dfc5606dSYehuda Sadeh }; 149dfc5606dSYehuda Sadeh 150602adf40SYehuda Sadeh /* 151602adf40SYehuda Sadeh * a single device 152602adf40SYehuda Sadeh */ 153602adf40SYehuda Sadeh struct rbd_device { 154de71a297SAlex Elder int dev_id; /* blkdev unique id */ 155602adf40SYehuda Sadeh 156602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 157602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 158602adf40SYehuda Sadeh struct request_queue *q; 159602adf40SYehuda Sadeh 160f8c38929SAlex Elder struct rbd_options rbd_opts; 161602adf40SYehuda Sadeh struct rbd_client *rbd_client; 162602adf40SYehuda Sadeh 163602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 164602adf40SYehuda Sadeh 165602adf40SYehuda Sadeh spinlock_t lock; /* queue lock */ 166602adf40SYehuda Sadeh 167602adf40SYehuda Sadeh struct rbd_image_header header; 1680bed54dcSAlex Elder char *image_name; 1690bed54dcSAlex Elder size_t image_name_len; 1700bed54dcSAlex Elder char *header_name; 171d22f76e7SAlex Elder char *pool_name; 1729bb2f334SAlex Elder int pool_id; 173602adf40SYehuda Sadeh 17459c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 17559c2be1eSYehuda Sadeh struct ceph_osd_request *watch_request; 17659c2be1eSYehuda Sadeh 177c666601aSJosh Durgin /* protects updating the header */ 178c666601aSJosh Durgin struct rw_semaphore header_rwsem; 179e88a36ecSJosh Durgin /* name of the snapshot this device reads from */ 180820a5f3eSAlex Elder char *snap_name; 181e88a36ecSJosh Durgin /* id of the snapshot this device reads from */ 18277dfe99fSJosh Durgin u64 snap_id; /* current snapshot id */ 183e88a36ecSJosh Durgin /* whether the snap_id this device reads from still exists */ 184e88a36ecSJosh Durgin bool snap_exists; 185cc0538b6SAlex Elder bool read_only; 186602adf40SYehuda Sadeh 187602adf40SYehuda Sadeh struct list_head node; 188dfc5606dSYehuda Sadeh 189dfc5606dSYehuda Sadeh /* list of snapshots */ 190dfc5606dSYehuda Sadeh struct list_head snaps; 191dfc5606dSYehuda Sadeh 192dfc5606dSYehuda Sadeh /* sysfs related */ 193dfc5606dSYehuda Sadeh struct device dev; 194dfc5606dSYehuda Sadeh }; 195dfc5606dSYehuda Sadeh 196602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 197e124a82fSAlex Elder 198602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 199e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock); 200e124a82fSAlex Elder 201602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 202432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock); 203602adf40SYehuda Sadeh 2049fcbb800SAlex Elder static int rbd_dev_snap_devs_update(struct rbd_device *rbd_dev); 205dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 206dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 207dfc5606dSYehuda Sadeh struct device_attribute *attr, 208dfc5606dSYehuda Sadeh const char *buf, 209dfc5606dSYehuda Sadeh size_t count); 21014e7085dSAlex Elder static void __rbd_remove_snap_dev(struct rbd_snap *snap); 211dfc5606dSYehuda Sadeh 212f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf, 213f0f8cef5SAlex Elder size_t count); 214f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 215f0f8cef5SAlex Elder size_t count); 216f0f8cef5SAlex Elder 217f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = { 218f0f8cef5SAlex Elder __ATTR(add, S_IWUSR, NULL, rbd_add), 219f0f8cef5SAlex Elder __ATTR(remove, S_IWUSR, NULL, rbd_remove), 220f0f8cef5SAlex Elder __ATTR_NULL 221f0f8cef5SAlex Elder }; 222f0f8cef5SAlex Elder 223f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = { 224f0f8cef5SAlex Elder .name = "rbd", 225f0f8cef5SAlex Elder .bus_attrs = rbd_bus_attrs, 226f0f8cef5SAlex Elder }; 227f0f8cef5SAlex Elder 228f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev) 229f0f8cef5SAlex Elder { 230f0f8cef5SAlex Elder } 231f0f8cef5SAlex Elder 232f0f8cef5SAlex Elder static struct device rbd_root_dev = { 233f0f8cef5SAlex Elder .init_name = "rbd", 234f0f8cef5SAlex Elder .release = rbd_root_dev_release, 235f0f8cef5SAlex Elder }; 236f0f8cef5SAlex Elder 237aafb230eSAlex Elder #ifdef RBD_DEBUG 238aafb230eSAlex Elder #define rbd_assert(expr) \ 239aafb230eSAlex Elder if (unlikely(!(expr))) { \ 240aafb230eSAlex Elder printk(KERN_ERR "\nAssertion failure in %s() " \ 241aafb230eSAlex Elder "at line %d:\n\n" \ 242aafb230eSAlex Elder "\trbd_assert(%s);\n\n", \ 243aafb230eSAlex Elder __func__, __LINE__, #expr); \ 244aafb230eSAlex Elder BUG(); \ 245aafb230eSAlex Elder } 246aafb230eSAlex Elder #else /* !RBD_DEBUG */ 247aafb230eSAlex Elder # define rbd_assert(expr) ((void) 0) 248aafb230eSAlex Elder #endif /* !RBD_DEBUG */ 249dfc5606dSYehuda Sadeh 250dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 251dfc5606dSYehuda Sadeh { 252dfc5606dSYehuda Sadeh return get_device(&rbd_dev->dev); 253dfc5606dSYehuda Sadeh } 254dfc5606dSYehuda Sadeh 255dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev) 256dfc5606dSYehuda Sadeh { 257dfc5606dSYehuda Sadeh put_device(&rbd_dev->dev); 258dfc5606dSYehuda Sadeh } 259602adf40SYehuda Sadeh 2601fe5e993SAlex Elder static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); 26159c2be1eSYehuda Sadeh 262602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 263602adf40SYehuda Sadeh { 264f0f8cef5SAlex Elder struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 265602adf40SYehuda Sadeh 266602adf40SYehuda Sadeh if ((mode & FMODE_WRITE) && rbd_dev->read_only) 267602adf40SYehuda Sadeh return -EROFS; 268602adf40SYehuda Sadeh 269340c7a2bSAlex Elder rbd_get_dev(rbd_dev); 270340c7a2bSAlex Elder set_device_ro(bdev, rbd_dev->read_only); 271340c7a2bSAlex Elder 272602adf40SYehuda Sadeh return 0; 273602adf40SYehuda Sadeh } 274602adf40SYehuda Sadeh 275dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 276dfc5606dSYehuda Sadeh { 277dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 278dfc5606dSYehuda Sadeh 279dfc5606dSYehuda Sadeh rbd_put_dev(rbd_dev); 280dfc5606dSYehuda Sadeh 281dfc5606dSYehuda Sadeh return 0; 282dfc5606dSYehuda Sadeh } 283dfc5606dSYehuda Sadeh 284602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 285602adf40SYehuda Sadeh .owner = THIS_MODULE, 286602adf40SYehuda Sadeh .open = rbd_open, 287dfc5606dSYehuda Sadeh .release = rbd_release, 288602adf40SYehuda Sadeh }; 289602adf40SYehuda Sadeh 290602adf40SYehuda Sadeh /* 291602adf40SYehuda Sadeh * Initialize an rbd client instance. 29243ae4701SAlex Elder * We own *ceph_opts. 293602adf40SYehuda Sadeh */ 294f8c38929SAlex Elder static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 295602adf40SYehuda Sadeh { 296602adf40SYehuda Sadeh struct rbd_client *rbdc; 297602adf40SYehuda Sadeh int ret = -ENOMEM; 298602adf40SYehuda Sadeh 299602adf40SYehuda Sadeh dout("rbd_client_create\n"); 300602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 301602adf40SYehuda Sadeh if (!rbdc) 302602adf40SYehuda Sadeh goto out_opt; 303602adf40SYehuda Sadeh 304602adf40SYehuda Sadeh kref_init(&rbdc->kref); 305602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 306602adf40SYehuda Sadeh 307bc534d86SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 308bc534d86SAlex Elder 30943ae4701SAlex Elder rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); 310602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 311bc534d86SAlex Elder goto out_mutex; 31243ae4701SAlex Elder ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ 313602adf40SYehuda Sadeh 314602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 315602adf40SYehuda Sadeh if (ret < 0) 316602adf40SYehuda Sadeh goto out_err; 317602adf40SYehuda Sadeh 318432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 319602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 320432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 321602adf40SYehuda Sadeh 322bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 323bc534d86SAlex Elder 324602adf40SYehuda Sadeh dout("rbd_client_create created %p\n", rbdc); 325602adf40SYehuda Sadeh return rbdc; 326602adf40SYehuda Sadeh 327602adf40SYehuda Sadeh out_err: 328602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 329bc534d86SAlex Elder out_mutex: 330bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 331602adf40SYehuda Sadeh kfree(rbdc); 332602adf40SYehuda Sadeh out_opt: 33343ae4701SAlex Elder if (ceph_opts) 33443ae4701SAlex Elder ceph_destroy_options(ceph_opts); 33528f259b7SVasiliy Kulikov return ERR_PTR(ret); 336602adf40SYehuda Sadeh } 337602adf40SYehuda Sadeh 338602adf40SYehuda Sadeh /* 3391f7ba331SAlex Elder * Find a ceph client with specific addr and configuration. If 3401f7ba331SAlex Elder * found, bump its reference count. 341602adf40SYehuda Sadeh */ 3421f7ba331SAlex Elder static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) 343602adf40SYehuda Sadeh { 344602adf40SYehuda Sadeh struct rbd_client *client_node; 3451f7ba331SAlex Elder bool found = false; 346602adf40SYehuda Sadeh 34743ae4701SAlex Elder if (ceph_opts->flags & CEPH_OPT_NOSHARE) 348602adf40SYehuda Sadeh return NULL; 349602adf40SYehuda Sadeh 3501f7ba331SAlex Elder spin_lock(&rbd_client_list_lock); 3511f7ba331SAlex Elder list_for_each_entry(client_node, &rbd_client_list, node) { 3521f7ba331SAlex Elder if (!ceph_compare_options(ceph_opts, client_node->client)) { 3531f7ba331SAlex Elder kref_get(&client_node->kref); 3541f7ba331SAlex Elder found = true; 3551f7ba331SAlex Elder break; 3561f7ba331SAlex Elder } 3571f7ba331SAlex Elder } 3581f7ba331SAlex Elder spin_unlock(&rbd_client_list_lock); 3591f7ba331SAlex Elder 3601f7ba331SAlex Elder return found ? client_node : NULL; 361602adf40SYehuda Sadeh } 362602adf40SYehuda Sadeh 363602adf40SYehuda Sadeh /* 36459c2be1eSYehuda Sadeh * mount options 36559c2be1eSYehuda Sadeh */ 36659c2be1eSYehuda Sadeh enum { 36759c2be1eSYehuda Sadeh Opt_last_int, 36859c2be1eSYehuda Sadeh /* int args above */ 36959c2be1eSYehuda Sadeh Opt_last_string, 37059c2be1eSYehuda Sadeh /* string args above */ 371cc0538b6SAlex Elder Opt_read_only, 372cc0538b6SAlex Elder Opt_read_write, 373cc0538b6SAlex Elder /* Boolean args above */ 374cc0538b6SAlex Elder Opt_last_bool, 37559c2be1eSYehuda Sadeh }; 37659c2be1eSYehuda Sadeh 37743ae4701SAlex Elder static match_table_t rbd_opts_tokens = { 37859c2be1eSYehuda Sadeh /* int args above */ 37959c2be1eSYehuda Sadeh /* string args above */ 380cc0538b6SAlex Elder {Opt_read_only, "read_only"}, 381cc0538b6SAlex Elder {Opt_read_only, "ro"}, /* Alternate spelling */ 382cc0538b6SAlex Elder {Opt_read_write, "read_write"}, 383cc0538b6SAlex Elder {Opt_read_write, "rw"}, /* Alternate spelling */ 384cc0538b6SAlex Elder /* Boolean args above */ 38559c2be1eSYehuda Sadeh {-1, NULL} 38659c2be1eSYehuda Sadeh }; 38759c2be1eSYehuda Sadeh 38859c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 38959c2be1eSYehuda Sadeh { 39043ae4701SAlex Elder struct rbd_options *rbd_opts = private; 39159c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 39259c2be1eSYehuda Sadeh int token, intval, ret; 39359c2be1eSYehuda Sadeh 39443ae4701SAlex Elder token = match_token(c, rbd_opts_tokens, argstr); 39559c2be1eSYehuda Sadeh if (token < 0) 39659c2be1eSYehuda Sadeh return -EINVAL; 39759c2be1eSYehuda Sadeh 39859c2be1eSYehuda Sadeh if (token < Opt_last_int) { 39959c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 40059c2be1eSYehuda Sadeh if (ret < 0) { 40159c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 40259c2be1eSYehuda Sadeh "at '%s'\n", c); 40359c2be1eSYehuda Sadeh return ret; 40459c2be1eSYehuda Sadeh } 40559c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 40659c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 40759c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 40859c2be1eSYehuda Sadeh argstr[0].from); 409cc0538b6SAlex Elder } else if (token > Opt_last_string && token < Opt_last_bool) { 410cc0538b6SAlex Elder dout("got Boolean token %d\n", token); 41159c2be1eSYehuda Sadeh } else { 41259c2be1eSYehuda Sadeh dout("got token %d\n", token); 41359c2be1eSYehuda Sadeh } 41459c2be1eSYehuda Sadeh 41559c2be1eSYehuda Sadeh switch (token) { 416cc0538b6SAlex Elder case Opt_read_only: 417cc0538b6SAlex Elder rbd_opts->read_only = true; 418cc0538b6SAlex Elder break; 419cc0538b6SAlex Elder case Opt_read_write: 420cc0538b6SAlex Elder rbd_opts->read_only = false; 421cc0538b6SAlex Elder break; 42259c2be1eSYehuda Sadeh default: 423aafb230eSAlex Elder rbd_assert(false); 424aafb230eSAlex Elder break; 42559c2be1eSYehuda Sadeh } 42659c2be1eSYehuda Sadeh return 0; 42759c2be1eSYehuda Sadeh } 42859c2be1eSYehuda Sadeh 42959c2be1eSYehuda Sadeh /* 430602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 431602adf40SYehuda Sadeh * not exist create it. 432602adf40SYehuda Sadeh */ 433f8c38929SAlex Elder static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, 434f8c38929SAlex Elder size_t mon_addr_len, char *options) 435602adf40SYehuda Sadeh { 436f8c38929SAlex Elder struct rbd_options *rbd_opts = &rbd_dev->rbd_opts; 43743ae4701SAlex Elder struct ceph_options *ceph_opts; 438f8c38929SAlex Elder struct rbd_client *rbdc; 43959c2be1eSYehuda Sadeh 440cc0538b6SAlex Elder rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 441602adf40SYehuda Sadeh 44243ae4701SAlex Elder ceph_opts = ceph_parse_options(options, mon_addr, 4435214ecc4SAlex Elder mon_addr + mon_addr_len, 44421079786SAlex Elder parse_rbd_opts_token, rbd_opts); 445f8c38929SAlex Elder if (IS_ERR(ceph_opts)) 446f8c38929SAlex Elder return PTR_ERR(ceph_opts); 447602adf40SYehuda Sadeh 4481f7ba331SAlex Elder rbdc = rbd_client_find(ceph_opts); 449602adf40SYehuda Sadeh if (rbdc) { 450e6994d3dSAlex Elder /* using an existing client */ 45143ae4701SAlex Elder ceph_destroy_options(ceph_opts); 452f8c38929SAlex Elder } else { 453f8c38929SAlex Elder rbdc = rbd_client_create(ceph_opts); 454d720bcb0SAlex Elder if (IS_ERR(rbdc)) 455f8c38929SAlex Elder return PTR_ERR(rbdc); 456f8c38929SAlex Elder } 457f8c38929SAlex Elder rbd_dev->rbd_client = rbdc; 458d720bcb0SAlex Elder 459f8c38929SAlex Elder return 0; 460602adf40SYehuda Sadeh } 461602adf40SYehuda Sadeh 462602adf40SYehuda Sadeh /* 463602adf40SYehuda Sadeh * Destroy ceph client 464d23a4b3fSAlex Elder * 465432b8587SAlex Elder * Caller must hold rbd_client_list_lock. 466602adf40SYehuda Sadeh */ 467602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 468602adf40SYehuda Sadeh { 469602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 470602adf40SYehuda Sadeh 471602adf40SYehuda Sadeh dout("rbd_release_client %p\n", rbdc); 472cd9d9f5dSAlex Elder spin_lock(&rbd_client_list_lock); 473602adf40SYehuda Sadeh list_del(&rbdc->node); 474cd9d9f5dSAlex Elder spin_unlock(&rbd_client_list_lock); 475602adf40SYehuda Sadeh 476602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 477602adf40SYehuda Sadeh kfree(rbdc); 478602adf40SYehuda Sadeh } 479602adf40SYehuda Sadeh 480602adf40SYehuda Sadeh /* 481602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 482602adf40SYehuda Sadeh * it. 483602adf40SYehuda Sadeh */ 484602adf40SYehuda Sadeh static void rbd_put_client(struct rbd_device *rbd_dev) 485602adf40SYehuda Sadeh { 486602adf40SYehuda Sadeh kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); 487602adf40SYehuda Sadeh rbd_dev->rbd_client = NULL; 488602adf40SYehuda Sadeh } 489602adf40SYehuda Sadeh 4901fec7093SYehuda Sadeh /* 4911fec7093SYehuda Sadeh * Destroy requests collection 4921fec7093SYehuda Sadeh */ 4931fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref) 4941fec7093SYehuda Sadeh { 4951fec7093SYehuda Sadeh struct rbd_req_coll *coll = 4961fec7093SYehuda Sadeh container_of(kref, struct rbd_req_coll, kref); 4971fec7093SYehuda Sadeh 4981fec7093SYehuda Sadeh dout("rbd_coll_release %p\n", coll); 4991fec7093SYehuda Sadeh kfree(coll); 5001fec7093SYehuda Sadeh } 501602adf40SYehuda Sadeh 5028e94af8eSAlex Elder static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 5038e94af8eSAlex Elder { 504103a150fSAlex Elder size_t size; 505103a150fSAlex Elder u32 snap_count; 506103a150fSAlex Elder 507103a150fSAlex Elder /* The header has to start with the magic rbd header text */ 508103a150fSAlex Elder if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) 509103a150fSAlex Elder return false; 510103a150fSAlex Elder 511103a150fSAlex Elder /* 512103a150fSAlex Elder * The size of a snapshot header has to fit in a size_t, and 513103a150fSAlex Elder * that limits the number of snapshots. 514103a150fSAlex Elder */ 515103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 516103a150fSAlex Elder size = SIZE_MAX - sizeof (struct ceph_snap_context); 517103a150fSAlex Elder if (snap_count > size / sizeof (__le64)) 518103a150fSAlex Elder return false; 519103a150fSAlex Elder 520103a150fSAlex Elder /* 521103a150fSAlex Elder * Not only that, but the size of the entire the snapshot 522103a150fSAlex Elder * header must also be representable in a size_t. 523103a150fSAlex Elder */ 524103a150fSAlex Elder size -= snap_count * sizeof (__le64); 525103a150fSAlex Elder if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) 526103a150fSAlex Elder return false; 527103a150fSAlex Elder 528103a150fSAlex Elder return true; 5298e94af8eSAlex Elder } 5308e94af8eSAlex Elder 531602adf40SYehuda Sadeh /* 532602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 533602adf40SYehuda Sadeh * header. 534602adf40SYehuda Sadeh */ 535602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 5364156d998SAlex Elder struct rbd_image_header_ondisk *ondisk) 537602adf40SYehuda Sadeh { 538ccece235SAlex Elder u32 snap_count; 53958c17b0eSAlex Elder size_t len; 540d2bb24e5SAlex Elder size_t size; 541621901d6SAlex Elder u32 i; 542602adf40SYehuda Sadeh 5436a52325fSAlex Elder memset(header, 0, sizeof (*header)); 5446a52325fSAlex Elder 545103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 546103a150fSAlex Elder 54758c17b0eSAlex Elder len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); 54858c17b0eSAlex Elder header->object_prefix = kmalloc(len + 1, GFP_KERNEL); 5496a52325fSAlex Elder if (!header->object_prefix) 550602adf40SYehuda Sadeh return -ENOMEM; 55158c17b0eSAlex Elder memcpy(header->object_prefix, ondisk->object_prefix, len); 55258c17b0eSAlex Elder header->object_prefix[len] = '\0'; 55300f1f36fSAlex Elder 554602adf40SYehuda Sadeh if (snap_count) { 555f785cc1dSAlex Elder u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); 556f785cc1dSAlex Elder 557621901d6SAlex Elder /* Save a copy of the snapshot names */ 558621901d6SAlex Elder 559f785cc1dSAlex Elder if (snap_names_len > (u64) SIZE_MAX) 560f785cc1dSAlex Elder return -EIO; 561f785cc1dSAlex Elder header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); 562602adf40SYehuda Sadeh if (!header->snap_names) 5636a52325fSAlex Elder goto out_err; 564f785cc1dSAlex Elder /* 565f785cc1dSAlex Elder * Note that rbd_dev_v1_header_read() guarantees 566f785cc1dSAlex Elder * the ondisk buffer we're working with has 567f785cc1dSAlex Elder * snap_names_len bytes beyond the end of the 568f785cc1dSAlex Elder * snapshot id array, this memcpy() is safe. 569f785cc1dSAlex Elder */ 570f785cc1dSAlex Elder memcpy(header->snap_names, &ondisk->snaps[snap_count], 571f785cc1dSAlex Elder snap_names_len); 5726a52325fSAlex Elder 573621901d6SAlex Elder /* Record each snapshot's size */ 574621901d6SAlex Elder 575d2bb24e5SAlex Elder size = snap_count * sizeof (*header->snap_sizes); 576d2bb24e5SAlex Elder header->snap_sizes = kmalloc(size, GFP_KERNEL); 577602adf40SYehuda Sadeh if (!header->snap_sizes) 5786a52325fSAlex Elder goto out_err; 579621901d6SAlex Elder for (i = 0; i < snap_count; i++) 580621901d6SAlex Elder header->snap_sizes[i] = 581621901d6SAlex Elder le64_to_cpu(ondisk->snaps[i].image_size); 582602adf40SYehuda Sadeh } else { 583ccece235SAlex Elder WARN_ON(ondisk->snap_names_len); 584602adf40SYehuda Sadeh header->snap_names = NULL; 585602adf40SYehuda Sadeh header->snap_sizes = NULL; 586602adf40SYehuda Sadeh } 587849b4260SAlex Elder 588602adf40SYehuda Sadeh header->image_size = le64_to_cpu(ondisk->image_size); 589602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 590602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 591602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 5926a52325fSAlex Elder header->total_snaps = snap_count; 5936a52325fSAlex Elder 594621901d6SAlex Elder /* Allocate and fill in the snapshot context */ 595621901d6SAlex Elder 5966a52325fSAlex Elder size = sizeof (struct ceph_snap_context); 5976a52325fSAlex Elder size += snap_count * sizeof (header->snapc->snaps[0]); 5986a52325fSAlex Elder header->snapc = kzalloc(size, GFP_KERNEL); 5996a52325fSAlex Elder if (!header->snapc) 6006a52325fSAlex Elder goto out_err; 601602adf40SYehuda Sadeh 602602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 603505cbb9bSAlex Elder header->snapc->seq = le64_to_cpu(ondisk->snap_seq); 604602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 605621901d6SAlex Elder for (i = 0; i < snap_count; i++) 606602adf40SYehuda Sadeh header->snapc->snaps[i] = 607602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 608602adf40SYehuda Sadeh 609602adf40SYehuda Sadeh return 0; 610602adf40SYehuda Sadeh 6116a52325fSAlex Elder out_err: 612849b4260SAlex Elder kfree(header->snap_sizes); 613ccece235SAlex Elder header->snap_sizes = NULL; 614602adf40SYehuda Sadeh kfree(header->snap_names); 615ccece235SAlex Elder header->snap_names = NULL; 6166a52325fSAlex Elder kfree(header->object_prefix); 6176a52325fSAlex Elder header->object_prefix = NULL; 618ccece235SAlex Elder 61900f1f36fSAlex Elder return -ENOMEM; 620602adf40SYehuda Sadeh } 621602adf40SYehuda Sadeh 622602adf40SYehuda Sadeh static int snap_by_name(struct rbd_image_header *header, const char *snap_name, 623602adf40SYehuda Sadeh u64 *seq, u64 *size) 624602adf40SYehuda Sadeh { 625602adf40SYehuda Sadeh int i; 626602adf40SYehuda Sadeh char *p = header->snap_names; 627602adf40SYehuda Sadeh 62800f1f36fSAlex Elder for (i = 0; i < header->total_snaps; i++) { 62900f1f36fSAlex Elder if (!strcmp(snap_name, p)) { 63000f1f36fSAlex Elder 63100f1f36fSAlex Elder /* Found it. Pass back its id and/or size */ 63200f1f36fSAlex Elder 633602adf40SYehuda Sadeh if (seq) 634602adf40SYehuda Sadeh *seq = header->snapc->snaps[i]; 635602adf40SYehuda Sadeh if (size) 636602adf40SYehuda Sadeh *size = header->snap_sizes[i]; 637602adf40SYehuda Sadeh return i; 638602adf40SYehuda Sadeh } 63900f1f36fSAlex Elder p += strlen(p) + 1; /* Skip ahead to the next name */ 64000f1f36fSAlex Elder } 64100f1f36fSAlex Elder return -ENOENT; 64200f1f36fSAlex Elder } 643602adf40SYehuda Sadeh 6440ce1a794SAlex Elder static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size) 645602adf40SYehuda Sadeh { 64678dc447dSAlex Elder int ret; 647602adf40SYehuda Sadeh 6480ce1a794SAlex Elder down_write(&rbd_dev->header_rwsem); 649602adf40SYehuda Sadeh 6500ce1a794SAlex Elder if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, 651cc9d734cSJosh Durgin sizeof (RBD_SNAP_HEAD_NAME))) { 6520ce1a794SAlex Elder rbd_dev->snap_id = CEPH_NOSNAP; 653e88a36ecSJosh Durgin rbd_dev->snap_exists = false; 654cc0538b6SAlex Elder rbd_dev->read_only = rbd_dev->rbd_opts.read_only; 655602adf40SYehuda Sadeh if (size) 65678dc447dSAlex Elder *size = rbd_dev->header.image_size; 657602adf40SYehuda Sadeh } else { 65878dc447dSAlex Elder u64 snap_id = 0; 65978dc447dSAlex Elder 66078dc447dSAlex Elder ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name, 66178dc447dSAlex Elder &snap_id, size); 662602adf40SYehuda Sadeh if (ret < 0) 663602adf40SYehuda Sadeh goto done; 66478dc447dSAlex Elder rbd_dev->snap_id = snap_id; 665e88a36ecSJosh Durgin rbd_dev->snap_exists = true; 666cc0538b6SAlex Elder rbd_dev->read_only = true; /* No choice for snapshots */ 667602adf40SYehuda Sadeh } 668602adf40SYehuda Sadeh 669602adf40SYehuda Sadeh ret = 0; 670602adf40SYehuda Sadeh done: 6710ce1a794SAlex Elder up_write(&rbd_dev->header_rwsem); 672602adf40SYehuda Sadeh return ret; 673602adf40SYehuda Sadeh } 674602adf40SYehuda Sadeh 675602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 676602adf40SYehuda Sadeh { 677849b4260SAlex Elder kfree(header->object_prefix); 678d78fd7aeSAlex Elder header->object_prefix = NULL; 679602adf40SYehuda Sadeh kfree(header->snap_sizes); 680d78fd7aeSAlex Elder header->snap_sizes = NULL; 681849b4260SAlex Elder kfree(header->snap_names); 682d78fd7aeSAlex Elder header->snap_names = NULL; 683d1d25646SJosh Durgin ceph_put_snap_context(header->snapc); 684d78fd7aeSAlex Elder header->snapc = NULL; 685602adf40SYehuda Sadeh } 686602adf40SYehuda Sadeh 68765ccfe21SAlex Elder static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) 688602adf40SYehuda Sadeh { 68965ccfe21SAlex Elder char *name; 69065ccfe21SAlex Elder u64 segment; 69165ccfe21SAlex Elder int ret; 692602adf40SYehuda Sadeh 69365ccfe21SAlex Elder name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 69465ccfe21SAlex Elder if (!name) 69565ccfe21SAlex Elder return NULL; 69665ccfe21SAlex Elder segment = offset >> rbd_dev->header.obj_order; 69765ccfe21SAlex Elder ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx", 69865ccfe21SAlex Elder rbd_dev->header.object_prefix, segment); 69965ccfe21SAlex Elder if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) { 70065ccfe21SAlex Elder pr_err("error formatting segment name for #%llu (%d)\n", 70165ccfe21SAlex Elder segment, ret); 70265ccfe21SAlex Elder kfree(name); 70365ccfe21SAlex Elder name = NULL; 70465ccfe21SAlex Elder } 705602adf40SYehuda Sadeh 70665ccfe21SAlex Elder return name; 70765ccfe21SAlex Elder } 708602adf40SYehuda Sadeh 70965ccfe21SAlex Elder static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) 71065ccfe21SAlex Elder { 71165ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 712602adf40SYehuda Sadeh 71365ccfe21SAlex Elder return offset & (segment_size - 1); 71465ccfe21SAlex Elder } 71565ccfe21SAlex Elder 71665ccfe21SAlex Elder static u64 rbd_segment_length(struct rbd_device *rbd_dev, 71765ccfe21SAlex Elder u64 offset, u64 length) 71865ccfe21SAlex Elder { 71965ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 72065ccfe21SAlex Elder 72165ccfe21SAlex Elder offset &= segment_size - 1; 72265ccfe21SAlex Elder 723aafb230eSAlex Elder rbd_assert(length <= U64_MAX - offset); 72465ccfe21SAlex Elder if (offset + length > segment_size) 72565ccfe21SAlex Elder length = segment_size - offset; 72665ccfe21SAlex Elder 72765ccfe21SAlex Elder return length; 728602adf40SYehuda Sadeh } 729602adf40SYehuda Sadeh 7301fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header, 7311fec7093SYehuda Sadeh u64 ofs, u64 len) 7321fec7093SYehuda Sadeh { 733df111be6SAlex Elder u64 start_seg; 734df111be6SAlex Elder u64 end_seg; 735df111be6SAlex Elder 736df111be6SAlex Elder if (!len) 737df111be6SAlex Elder return 0; 738df111be6SAlex Elder if (len - 1 > U64_MAX - ofs) 739df111be6SAlex Elder return -ERANGE; 740df111be6SAlex Elder 741df111be6SAlex Elder start_seg = ofs >> header->obj_order; 742df111be6SAlex Elder end_seg = (ofs + len - 1) >> header->obj_order; 743df111be6SAlex Elder 7441fec7093SYehuda Sadeh return end_seg - start_seg + 1; 7451fec7093SYehuda Sadeh } 7461fec7093SYehuda Sadeh 747602adf40SYehuda Sadeh /* 748029bcbd8SJosh Durgin * returns the size of an object in the image 749029bcbd8SJosh Durgin */ 750029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 751029bcbd8SJosh Durgin { 752029bcbd8SJosh Durgin return 1 << header->obj_order; 753029bcbd8SJosh Durgin } 754029bcbd8SJosh Durgin 755029bcbd8SJosh Durgin /* 756602adf40SYehuda Sadeh * bio helpers 757602adf40SYehuda Sadeh */ 758602adf40SYehuda Sadeh 759602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 760602adf40SYehuda Sadeh { 761602adf40SYehuda Sadeh struct bio *tmp; 762602adf40SYehuda Sadeh 763602adf40SYehuda Sadeh while (chain) { 764602adf40SYehuda Sadeh tmp = chain; 765602adf40SYehuda Sadeh chain = chain->bi_next; 766602adf40SYehuda Sadeh bio_put(tmp); 767602adf40SYehuda Sadeh } 768602adf40SYehuda Sadeh } 769602adf40SYehuda Sadeh 770602adf40SYehuda Sadeh /* 771602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 772602adf40SYehuda Sadeh */ 773602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 774602adf40SYehuda Sadeh { 775602adf40SYehuda Sadeh struct bio_vec *bv; 776602adf40SYehuda Sadeh unsigned long flags; 777602adf40SYehuda Sadeh void *buf; 778602adf40SYehuda Sadeh int i; 779602adf40SYehuda Sadeh int pos = 0; 780602adf40SYehuda Sadeh 781602adf40SYehuda Sadeh while (chain) { 782602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 783602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 784602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 785602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 786602adf40SYehuda Sadeh memset(buf + remainder, 0, 787602adf40SYehuda Sadeh bv->bv_len - remainder); 78885b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 789602adf40SYehuda Sadeh } 790602adf40SYehuda Sadeh pos += bv->bv_len; 791602adf40SYehuda Sadeh } 792602adf40SYehuda Sadeh 793602adf40SYehuda Sadeh chain = chain->bi_next; 794602adf40SYehuda Sadeh } 795602adf40SYehuda Sadeh } 796602adf40SYehuda Sadeh 797602adf40SYehuda Sadeh /* 798602adf40SYehuda Sadeh * bio_chain_clone - clone a chain of bios up to a certain length. 799602adf40SYehuda Sadeh * might return a bio_pair that will need to be released. 800602adf40SYehuda Sadeh */ 801602adf40SYehuda Sadeh static struct bio *bio_chain_clone(struct bio **old, struct bio **next, 802602adf40SYehuda Sadeh struct bio_pair **bp, 803602adf40SYehuda Sadeh int len, gfp_t gfpmask) 804602adf40SYehuda Sadeh { 805542582fcSAlex Elder struct bio *old_chain = *old; 806542582fcSAlex Elder struct bio *new_chain = NULL; 807542582fcSAlex Elder struct bio *tail; 808602adf40SYehuda Sadeh int total = 0; 809602adf40SYehuda Sadeh 810602adf40SYehuda Sadeh if (*bp) { 811602adf40SYehuda Sadeh bio_pair_release(*bp); 812602adf40SYehuda Sadeh *bp = NULL; 813602adf40SYehuda Sadeh } 814602adf40SYehuda Sadeh 815602adf40SYehuda Sadeh while (old_chain && (total < len)) { 816542582fcSAlex Elder struct bio *tmp; 817542582fcSAlex Elder 818602adf40SYehuda Sadeh tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 819602adf40SYehuda Sadeh if (!tmp) 820602adf40SYehuda Sadeh goto err_out; 821542582fcSAlex Elder gfpmask &= ~__GFP_WAIT; /* can't wait after the first */ 822602adf40SYehuda Sadeh 823602adf40SYehuda Sadeh if (total + old_chain->bi_size > len) { 824602adf40SYehuda Sadeh struct bio_pair *bp; 825602adf40SYehuda Sadeh 826602adf40SYehuda Sadeh /* 827602adf40SYehuda Sadeh * this split can only happen with a single paged bio, 828602adf40SYehuda Sadeh * split_bio will BUG_ON if this is not the case 829602adf40SYehuda Sadeh */ 830602adf40SYehuda Sadeh dout("bio_chain_clone split! total=%d remaining=%d" 831bd919d45SAlex Elder "bi_size=%u\n", 832bd919d45SAlex Elder total, len - total, old_chain->bi_size); 833602adf40SYehuda Sadeh 834602adf40SYehuda Sadeh /* split the bio. We'll release it either in the next 835602adf40SYehuda Sadeh call, or it will have to be released outside */ 836593a9e7bSAlex Elder bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); 837602adf40SYehuda Sadeh if (!bp) 838602adf40SYehuda Sadeh goto err_out; 839602adf40SYehuda Sadeh 840602adf40SYehuda Sadeh __bio_clone(tmp, &bp->bio1); 841602adf40SYehuda Sadeh 842602adf40SYehuda Sadeh *next = &bp->bio2; 843602adf40SYehuda Sadeh } else { 844602adf40SYehuda Sadeh __bio_clone(tmp, old_chain); 845602adf40SYehuda Sadeh *next = old_chain->bi_next; 846602adf40SYehuda Sadeh } 847602adf40SYehuda Sadeh 848602adf40SYehuda Sadeh tmp->bi_bdev = NULL; 849602adf40SYehuda Sadeh tmp->bi_next = NULL; 850542582fcSAlex Elder if (new_chain) 851602adf40SYehuda Sadeh tail->bi_next = tmp; 852542582fcSAlex Elder else 853542582fcSAlex Elder new_chain = tmp; 854602adf40SYehuda Sadeh tail = tmp; 855602adf40SYehuda Sadeh old_chain = old_chain->bi_next; 856602adf40SYehuda Sadeh 857602adf40SYehuda Sadeh total += tmp->bi_size; 858602adf40SYehuda Sadeh } 859602adf40SYehuda Sadeh 860aafb230eSAlex Elder rbd_assert(total == len); 861602adf40SYehuda Sadeh 862602adf40SYehuda Sadeh *old = old_chain; 863602adf40SYehuda Sadeh 864602adf40SYehuda Sadeh return new_chain; 865602adf40SYehuda Sadeh 866602adf40SYehuda Sadeh err_out: 867602adf40SYehuda Sadeh dout("bio_chain_clone with err\n"); 868602adf40SYehuda Sadeh bio_chain_put(new_chain); 869602adf40SYehuda Sadeh return NULL; 870602adf40SYehuda Sadeh } 871602adf40SYehuda Sadeh 872602adf40SYehuda Sadeh /* 873602adf40SYehuda Sadeh * helpers for osd request op vectors. 874602adf40SYehuda Sadeh */ 87557cfc106SAlex Elder static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, 87657cfc106SAlex Elder int opcode, u32 payload_len) 877602adf40SYehuda Sadeh { 87857cfc106SAlex Elder struct ceph_osd_req_op *ops; 87957cfc106SAlex Elder 88057cfc106SAlex Elder ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); 88157cfc106SAlex Elder if (!ops) 88257cfc106SAlex Elder return NULL; 88357cfc106SAlex Elder 88457cfc106SAlex Elder ops[0].op = opcode; 88557cfc106SAlex Elder 886602adf40SYehuda Sadeh /* 887602adf40SYehuda Sadeh * op extent offset and length will be set later on 888602adf40SYehuda Sadeh * in calc_raw_layout() 889602adf40SYehuda Sadeh */ 89057cfc106SAlex Elder ops[0].payload_len = payload_len; 89157cfc106SAlex Elder 89257cfc106SAlex Elder return ops; 893602adf40SYehuda Sadeh } 894602adf40SYehuda Sadeh 895602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops) 896602adf40SYehuda Sadeh { 897602adf40SYehuda Sadeh kfree(ops); 898602adf40SYehuda Sadeh } 899602adf40SYehuda Sadeh 9001fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq, 9011fec7093SYehuda Sadeh struct rbd_req_coll *coll, 9021fec7093SYehuda Sadeh int index, 9031fec7093SYehuda Sadeh int ret, u64 len) 9041fec7093SYehuda Sadeh { 9051fec7093SYehuda Sadeh struct request_queue *q; 9061fec7093SYehuda Sadeh int min, max, i; 9071fec7093SYehuda Sadeh 908bd919d45SAlex Elder dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", 909bd919d45SAlex Elder coll, index, ret, (unsigned long long) len); 9101fec7093SYehuda Sadeh 9111fec7093SYehuda Sadeh if (!rq) 9121fec7093SYehuda Sadeh return; 9131fec7093SYehuda Sadeh 9141fec7093SYehuda Sadeh if (!coll) { 9151fec7093SYehuda Sadeh blk_end_request(rq, ret, len); 9161fec7093SYehuda Sadeh return; 9171fec7093SYehuda Sadeh } 9181fec7093SYehuda Sadeh 9191fec7093SYehuda Sadeh q = rq->q; 9201fec7093SYehuda Sadeh 9211fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 9221fec7093SYehuda Sadeh coll->status[index].done = 1; 9231fec7093SYehuda Sadeh coll->status[index].rc = ret; 9241fec7093SYehuda Sadeh coll->status[index].bytes = len; 9251fec7093SYehuda Sadeh max = min = coll->num_done; 9261fec7093SYehuda Sadeh while (max < coll->total && coll->status[max].done) 9271fec7093SYehuda Sadeh max++; 9281fec7093SYehuda Sadeh 9291fec7093SYehuda Sadeh for (i = min; i<max; i++) { 9301fec7093SYehuda Sadeh __blk_end_request(rq, coll->status[i].rc, 9311fec7093SYehuda Sadeh coll->status[i].bytes); 9321fec7093SYehuda Sadeh coll->num_done++; 9331fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 9341fec7093SYehuda Sadeh } 9351fec7093SYehuda Sadeh spin_unlock_irq(q->queue_lock); 9361fec7093SYehuda Sadeh } 9371fec7093SYehuda Sadeh 9381fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req, 9391fec7093SYehuda Sadeh int ret, u64 len) 9401fec7093SYehuda Sadeh { 9411fec7093SYehuda Sadeh rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); 9421fec7093SYehuda Sadeh } 9431fec7093SYehuda Sadeh 944602adf40SYehuda Sadeh /* 945602adf40SYehuda Sadeh * Send ceph osd request 946602adf40SYehuda Sadeh */ 947602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq, 9480ce1a794SAlex Elder struct rbd_device *rbd_dev, 949602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 950602adf40SYehuda Sadeh u64 snapid, 951aded07eaSAlex Elder const char *object_name, u64 ofs, u64 len, 952602adf40SYehuda Sadeh struct bio *bio, 953602adf40SYehuda Sadeh struct page **pages, 954602adf40SYehuda Sadeh int num_pages, 955602adf40SYehuda Sadeh int flags, 956602adf40SYehuda Sadeh struct ceph_osd_req_op *ops, 9571fec7093SYehuda Sadeh struct rbd_req_coll *coll, 9581fec7093SYehuda Sadeh int coll_index, 959602adf40SYehuda Sadeh void (*rbd_cb)(struct ceph_osd_request *req, 96059c2be1eSYehuda Sadeh struct ceph_msg *msg), 96159c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 96259c2be1eSYehuda Sadeh u64 *ver) 963602adf40SYehuda Sadeh { 964602adf40SYehuda Sadeh struct ceph_osd_request *req; 965602adf40SYehuda Sadeh struct ceph_file_layout *layout; 966602adf40SYehuda Sadeh int ret; 967602adf40SYehuda Sadeh u64 bno; 968602adf40SYehuda Sadeh struct timespec mtime = CURRENT_TIME; 969602adf40SYehuda Sadeh struct rbd_request *req_data; 970602adf40SYehuda Sadeh struct ceph_osd_request_head *reqhead; 9711dbb4399SAlex Elder struct ceph_osd_client *osdc; 972602adf40SYehuda Sadeh 973602adf40SYehuda Sadeh req_data = kzalloc(sizeof(*req_data), GFP_NOIO); 9741fec7093SYehuda Sadeh if (!req_data) { 9751fec7093SYehuda Sadeh if (coll) 9761fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, coll_index, 9771fec7093SYehuda Sadeh -ENOMEM, len); 9781fec7093SYehuda Sadeh return -ENOMEM; 9791fec7093SYehuda Sadeh } 980602adf40SYehuda Sadeh 9811fec7093SYehuda Sadeh if (coll) { 9821fec7093SYehuda Sadeh req_data->coll = coll; 9831fec7093SYehuda Sadeh req_data->coll_index = coll_index; 9841fec7093SYehuda Sadeh } 9851fec7093SYehuda Sadeh 986bd919d45SAlex Elder dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, 987bd919d45SAlex Elder (unsigned long long) ofs, (unsigned long long) len); 988602adf40SYehuda Sadeh 9890ce1a794SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 9901dbb4399SAlex Elder req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, 9911dbb4399SAlex Elder false, GFP_NOIO, pages, bio); 9924ad12621SSage Weil if (!req) { 9934ad12621SSage Weil ret = -ENOMEM; 994602adf40SYehuda Sadeh goto done_pages; 995602adf40SYehuda Sadeh } 996602adf40SYehuda Sadeh 997602adf40SYehuda Sadeh req->r_callback = rbd_cb; 998602adf40SYehuda Sadeh 999602adf40SYehuda Sadeh req_data->rq = rq; 1000602adf40SYehuda Sadeh req_data->bio = bio; 1001602adf40SYehuda Sadeh req_data->pages = pages; 1002602adf40SYehuda Sadeh req_data->len = len; 1003602adf40SYehuda Sadeh 1004602adf40SYehuda Sadeh req->r_priv = req_data; 1005602adf40SYehuda Sadeh 1006602adf40SYehuda Sadeh reqhead = req->r_request->front.iov_base; 1007602adf40SYehuda Sadeh reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); 1008602adf40SYehuda Sadeh 1009aded07eaSAlex Elder strncpy(req->r_oid, object_name, sizeof(req->r_oid)); 1010602adf40SYehuda Sadeh req->r_oid_len = strlen(req->r_oid); 1011602adf40SYehuda Sadeh 1012602adf40SYehuda Sadeh layout = &req->r_file_layout; 1013602adf40SYehuda Sadeh memset(layout, 0, sizeof(*layout)); 1014602adf40SYehuda Sadeh layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 1015602adf40SYehuda Sadeh layout->fl_stripe_count = cpu_to_le32(1); 1016602adf40SYehuda Sadeh layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 10170ce1a794SAlex Elder layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); 10181dbb4399SAlex Elder ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, 10191dbb4399SAlex Elder req, ops); 1020602adf40SYehuda Sadeh 1021602adf40SYehuda Sadeh ceph_osdc_build_request(req, ofs, &len, 1022602adf40SYehuda Sadeh ops, 1023602adf40SYehuda Sadeh snapc, 1024602adf40SYehuda Sadeh &mtime, 1025602adf40SYehuda Sadeh req->r_oid, req->r_oid_len); 1026602adf40SYehuda Sadeh 102759c2be1eSYehuda Sadeh if (linger_req) { 10281dbb4399SAlex Elder ceph_osdc_set_request_linger(osdc, req); 102959c2be1eSYehuda Sadeh *linger_req = req; 103059c2be1eSYehuda Sadeh } 103159c2be1eSYehuda Sadeh 10321dbb4399SAlex Elder ret = ceph_osdc_start_request(osdc, req, false); 1033602adf40SYehuda Sadeh if (ret < 0) 1034602adf40SYehuda Sadeh goto done_err; 1035602adf40SYehuda Sadeh 1036602adf40SYehuda Sadeh if (!rbd_cb) { 10371dbb4399SAlex Elder ret = ceph_osdc_wait_request(osdc, req); 103859c2be1eSYehuda Sadeh if (ver) 103959c2be1eSYehuda Sadeh *ver = le64_to_cpu(req->r_reassert_version.version); 1040bd919d45SAlex Elder dout("reassert_ver=%llu\n", 1041bd919d45SAlex Elder (unsigned long long) 10421fec7093SYehuda Sadeh le64_to_cpu(req->r_reassert_version.version)); 1043602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1044602adf40SYehuda Sadeh } 1045602adf40SYehuda Sadeh return ret; 1046602adf40SYehuda Sadeh 1047602adf40SYehuda Sadeh done_err: 1048602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1049602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1050602adf40SYehuda Sadeh done_pages: 10511fec7093SYehuda Sadeh rbd_coll_end_req(req_data, ret, len); 1052602adf40SYehuda Sadeh kfree(req_data); 1053602adf40SYehuda Sadeh return ret; 1054602adf40SYehuda Sadeh } 1055602adf40SYehuda Sadeh 1056602adf40SYehuda Sadeh /* 1057602adf40SYehuda Sadeh * Ceph osd op callback 1058602adf40SYehuda Sadeh */ 1059602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 1060602adf40SYehuda Sadeh { 1061602adf40SYehuda Sadeh struct rbd_request *req_data = req->r_priv; 1062602adf40SYehuda Sadeh struct ceph_osd_reply_head *replyhead; 1063602adf40SYehuda Sadeh struct ceph_osd_op *op; 1064602adf40SYehuda Sadeh __s32 rc; 1065602adf40SYehuda Sadeh u64 bytes; 1066602adf40SYehuda Sadeh int read_op; 1067602adf40SYehuda Sadeh 1068602adf40SYehuda Sadeh /* parse reply */ 1069602adf40SYehuda Sadeh replyhead = msg->front.iov_base; 1070602adf40SYehuda Sadeh WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); 1071602adf40SYehuda Sadeh op = (void *)(replyhead + 1); 1072602adf40SYehuda Sadeh rc = le32_to_cpu(replyhead->result); 1073602adf40SYehuda Sadeh bytes = le64_to_cpu(op->extent.length); 1074895cfcc8SDan Carpenter read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); 1075602adf40SYehuda Sadeh 1076bd919d45SAlex Elder dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", 1077bd919d45SAlex Elder (unsigned long long) bytes, read_op, (int) rc); 1078602adf40SYehuda Sadeh 1079602adf40SYehuda Sadeh if (rc == -ENOENT && read_op) { 1080602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, 0); 1081602adf40SYehuda Sadeh rc = 0; 1082602adf40SYehuda Sadeh } else if (rc == 0 && read_op && bytes < req_data->len) { 1083602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, bytes); 1084602adf40SYehuda Sadeh bytes = req_data->len; 1085602adf40SYehuda Sadeh } 1086602adf40SYehuda Sadeh 10871fec7093SYehuda Sadeh rbd_coll_end_req(req_data, rc, bytes); 1088602adf40SYehuda Sadeh 1089602adf40SYehuda Sadeh if (req_data->bio) 1090602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1091602adf40SYehuda Sadeh 1092602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1093602adf40SYehuda Sadeh kfree(req_data); 1094602adf40SYehuda Sadeh } 1095602adf40SYehuda Sadeh 109659c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 109759c2be1eSYehuda Sadeh { 109859c2be1eSYehuda Sadeh ceph_osdc_put_request(req); 109959c2be1eSYehuda Sadeh } 110059c2be1eSYehuda Sadeh 1101602adf40SYehuda Sadeh /* 1102602adf40SYehuda Sadeh * Do a synchronous ceph osd operation 1103602adf40SYehuda Sadeh */ 11040ce1a794SAlex Elder static int rbd_req_sync_op(struct rbd_device *rbd_dev, 1105602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1106602adf40SYehuda Sadeh u64 snapid, 1107602adf40SYehuda Sadeh int flags, 1108913d2fdcSAlex Elder struct ceph_osd_req_op *ops, 1109aded07eaSAlex Elder const char *object_name, 1110602adf40SYehuda Sadeh u64 ofs, u64 len, 111159c2be1eSYehuda Sadeh char *buf, 111259c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 111359c2be1eSYehuda Sadeh u64 *ver) 1114602adf40SYehuda Sadeh { 1115602adf40SYehuda Sadeh int ret; 1116602adf40SYehuda Sadeh struct page **pages; 1117602adf40SYehuda Sadeh int num_pages; 1118913d2fdcSAlex Elder 1119aafb230eSAlex Elder rbd_assert(ops != NULL); 1120602adf40SYehuda Sadeh 1121602adf40SYehuda Sadeh num_pages = calc_pages_for(ofs , len); 1122602adf40SYehuda Sadeh pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 1123b8d0638aSDan Carpenter if (IS_ERR(pages)) 1124b8d0638aSDan Carpenter return PTR_ERR(pages); 1125602adf40SYehuda Sadeh 11260ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, 1127aded07eaSAlex Elder object_name, ofs, len, NULL, 1128602adf40SYehuda Sadeh pages, num_pages, 1129602adf40SYehuda Sadeh flags, 1130602adf40SYehuda Sadeh ops, 11311fec7093SYehuda Sadeh NULL, 0, 113259c2be1eSYehuda Sadeh NULL, 113359c2be1eSYehuda Sadeh linger_req, ver); 1134602adf40SYehuda Sadeh if (ret < 0) 1135913d2fdcSAlex Elder goto done; 1136602adf40SYehuda Sadeh 1137602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_READ) && buf) 1138602adf40SYehuda Sadeh ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); 1139602adf40SYehuda Sadeh 1140602adf40SYehuda Sadeh done: 1141602adf40SYehuda Sadeh ceph_release_page_vector(pages, num_pages); 1142602adf40SYehuda Sadeh return ret; 1143602adf40SYehuda Sadeh } 1144602adf40SYehuda Sadeh 1145602adf40SYehuda Sadeh /* 1146602adf40SYehuda Sadeh * Do an asynchronous ceph osd operation 1147602adf40SYehuda Sadeh */ 1148602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq, 1149602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1150602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1151602adf40SYehuda Sadeh u64 snapid, 1152d1f57ea6SAlex Elder int opcode, int flags, 1153602adf40SYehuda Sadeh u64 ofs, u64 len, 11541fec7093SYehuda Sadeh struct bio *bio, 11551fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11561fec7093SYehuda Sadeh int coll_index) 1157602adf40SYehuda Sadeh { 1158602adf40SYehuda Sadeh char *seg_name; 1159602adf40SYehuda Sadeh u64 seg_ofs; 1160602adf40SYehuda Sadeh u64 seg_len; 1161602adf40SYehuda Sadeh int ret; 1162602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1163602adf40SYehuda Sadeh u32 payload_len; 1164602adf40SYehuda Sadeh 116565ccfe21SAlex Elder seg_name = rbd_segment_name(rbd_dev, ofs); 1166602adf40SYehuda Sadeh if (!seg_name) 1167602adf40SYehuda Sadeh return -ENOMEM; 116865ccfe21SAlex Elder seg_len = rbd_segment_length(rbd_dev, ofs, len); 116965ccfe21SAlex Elder seg_ofs = rbd_segment_offset(rbd_dev, ofs); 1170602adf40SYehuda Sadeh 1171602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); 1172602adf40SYehuda Sadeh 117357cfc106SAlex Elder ret = -ENOMEM; 117457cfc106SAlex Elder ops = rbd_create_rw_ops(1, opcode, payload_len); 117557cfc106SAlex Elder if (!ops) 1176602adf40SYehuda Sadeh goto done; 1177602adf40SYehuda Sadeh 1178602adf40SYehuda Sadeh /* we've taken care of segment sizes earlier when we 1179602adf40SYehuda Sadeh cloned the bios. We should never have a segment 1180602adf40SYehuda Sadeh truncated at this point */ 1181aafb230eSAlex Elder rbd_assert(seg_len == len); 1182602adf40SYehuda Sadeh 1183602adf40SYehuda Sadeh ret = rbd_do_request(rq, rbd_dev, snapc, snapid, 1184602adf40SYehuda Sadeh seg_name, seg_ofs, seg_len, 1185602adf40SYehuda Sadeh bio, 1186602adf40SYehuda Sadeh NULL, 0, 1187602adf40SYehuda Sadeh flags, 1188602adf40SYehuda Sadeh ops, 11891fec7093SYehuda Sadeh coll, coll_index, 119059c2be1eSYehuda Sadeh rbd_req_cb, 0, NULL); 119111f77002SSage Weil 119211f77002SSage Weil rbd_destroy_ops(ops); 1193602adf40SYehuda Sadeh done: 1194602adf40SYehuda Sadeh kfree(seg_name); 1195602adf40SYehuda Sadeh return ret; 1196602adf40SYehuda Sadeh } 1197602adf40SYehuda Sadeh 1198602adf40SYehuda Sadeh /* 1199602adf40SYehuda Sadeh * Request async osd write 1200602adf40SYehuda Sadeh */ 1201602adf40SYehuda Sadeh static int rbd_req_write(struct request *rq, 1202602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1203602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1204602adf40SYehuda Sadeh u64 ofs, u64 len, 12051fec7093SYehuda Sadeh struct bio *bio, 12061fec7093SYehuda Sadeh struct rbd_req_coll *coll, 12071fec7093SYehuda Sadeh int coll_index) 1208602adf40SYehuda Sadeh { 1209602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, 1210602adf40SYehuda Sadeh CEPH_OSD_OP_WRITE, 1211602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 12121fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1213602adf40SYehuda Sadeh } 1214602adf40SYehuda Sadeh 1215602adf40SYehuda Sadeh /* 1216602adf40SYehuda Sadeh * Request async osd read 1217602adf40SYehuda Sadeh */ 1218602adf40SYehuda Sadeh static int rbd_req_read(struct request *rq, 1219602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1220602adf40SYehuda Sadeh u64 snapid, 1221602adf40SYehuda Sadeh u64 ofs, u64 len, 12221fec7093SYehuda Sadeh struct bio *bio, 12231fec7093SYehuda Sadeh struct rbd_req_coll *coll, 12241fec7093SYehuda Sadeh int coll_index) 1225602adf40SYehuda Sadeh { 1226602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, NULL, 1227b06e6a6bSJosh Durgin snapid, 1228602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1229602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 12301fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1231602adf40SYehuda Sadeh } 1232602adf40SYehuda Sadeh 1233602adf40SYehuda Sadeh /* 1234602adf40SYehuda Sadeh * Request sync osd read 1235602adf40SYehuda Sadeh */ 12360ce1a794SAlex Elder static int rbd_req_sync_read(struct rbd_device *rbd_dev, 1237602adf40SYehuda Sadeh u64 snapid, 1238aded07eaSAlex Elder const char *object_name, 1239602adf40SYehuda Sadeh u64 ofs, u64 len, 124059c2be1eSYehuda Sadeh char *buf, 124159c2be1eSYehuda Sadeh u64 *ver) 1242602adf40SYehuda Sadeh { 1243913d2fdcSAlex Elder struct ceph_osd_req_op *ops; 1244913d2fdcSAlex Elder int ret; 1245913d2fdcSAlex Elder 1246913d2fdcSAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); 1247913d2fdcSAlex Elder if (!ops) 1248913d2fdcSAlex Elder return -ENOMEM; 1249913d2fdcSAlex Elder 1250913d2fdcSAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 1251b06e6a6bSJosh Durgin snapid, 1252602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1253913d2fdcSAlex Elder ops, object_name, ofs, len, buf, NULL, ver); 1254913d2fdcSAlex Elder rbd_destroy_ops(ops); 1255913d2fdcSAlex Elder 1256913d2fdcSAlex Elder return ret; 1257602adf40SYehuda Sadeh } 1258602adf40SYehuda Sadeh 1259602adf40SYehuda Sadeh /* 126059c2be1eSYehuda Sadeh * Request sync osd watch 126159c2be1eSYehuda Sadeh */ 12620ce1a794SAlex Elder static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, 126359c2be1eSYehuda Sadeh u64 ver, 12647f0a24d8SAlex Elder u64 notify_id) 126559c2be1eSYehuda Sadeh { 126659c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 126711f77002SSage Weil int ret; 126811f77002SSage Weil 126957cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); 127057cfc106SAlex Elder if (!ops) 127157cfc106SAlex Elder return -ENOMEM; 127259c2be1eSYehuda Sadeh 1273a71b891bSJosh Durgin ops[0].watch.ver = cpu_to_le64(ver); 127459c2be1eSYehuda Sadeh ops[0].watch.cookie = notify_id; 127559c2be1eSYehuda Sadeh ops[0].watch.flag = 0; 127659c2be1eSYehuda Sadeh 12770ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, 12787f0a24d8SAlex Elder rbd_dev->header_name, 0, 0, NULL, 1279ad4f232fSAlex Elder NULL, 0, 128059c2be1eSYehuda Sadeh CEPH_OSD_FLAG_READ, 128159c2be1eSYehuda Sadeh ops, 12821fec7093SYehuda Sadeh NULL, 0, 128359c2be1eSYehuda Sadeh rbd_simple_req_cb, 0, NULL); 128459c2be1eSYehuda Sadeh 128559c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 128659c2be1eSYehuda Sadeh return ret; 128759c2be1eSYehuda Sadeh } 128859c2be1eSYehuda Sadeh 128959c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 129059c2be1eSYehuda Sadeh { 12910ce1a794SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 1292a71b891bSJosh Durgin u64 hver; 129313143d2dSSage Weil int rc; 129413143d2dSSage Weil 12950ce1a794SAlex Elder if (!rbd_dev) 129659c2be1eSYehuda Sadeh return; 129759c2be1eSYehuda Sadeh 1298bd919d45SAlex Elder dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", 1299bd919d45SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1300bd919d45SAlex Elder (unsigned int) opcode); 13011fe5e993SAlex Elder rc = rbd_refresh_header(rbd_dev, &hver); 130213143d2dSSage Weil if (rc) 1303f0f8cef5SAlex Elder pr_warning(RBD_DRV_NAME "%d got notification but failed to " 13040ce1a794SAlex Elder " update snaps: %d\n", rbd_dev->major, rc); 130559c2be1eSYehuda Sadeh 13067f0a24d8SAlex Elder rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); 130759c2be1eSYehuda Sadeh } 130859c2be1eSYehuda Sadeh 130959c2be1eSYehuda Sadeh /* 131059c2be1eSYehuda Sadeh * Request sync osd watch 131159c2be1eSYehuda Sadeh */ 13120e6f322dSAlex Elder static int rbd_req_sync_watch(struct rbd_device *rbd_dev) 131359c2be1eSYehuda Sadeh { 131459c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 13150ce1a794SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 131657cfc106SAlex Elder int ret; 131759c2be1eSYehuda Sadeh 131857cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 131957cfc106SAlex Elder if (!ops) 132057cfc106SAlex Elder return -ENOMEM; 132159c2be1eSYehuda Sadeh 132259c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, 13230ce1a794SAlex Elder (void *)rbd_dev, &rbd_dev->watch_event); 132459c2be1eSYehuda Sadeh if (ret < 0) 132559c2be1eSYehuda Sadeh goto fail; 132659c2be1eSYehuda Sadeh 13270e6f322dSAlex Elder ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); 13280ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 132959c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 133059c2be1eSYehuda Sadeh 13310ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 133259c2be1eSYehuda Sadeh CEPH_NOSNAP, 133359c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 133459c2be1eSYehuda Sadeh ops, 13350e6f322dSAlex Elder rbd_dev->header_name, 13360e6f322dSAlex Elder 0, 0, NULL, 13370ce1a794SAlex Elder &rbd_dev->watch_request, NULL); 133859c2be1eSYehuda Sadeh 133959c2be1eSYehuda Sadeh if (ret < 0) 134059c2be1eSYehuda Sadeh goto fail_event; 134159c2be1eSYehuda Sadeh 134259c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 134359c2be1eSYehuda Sadeh return 0; 134459c2be1eSYehuda Sadeh 134559c2be1eSYehuda Sadeh fail_event: 13460ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 13470ce1a794SAlex Elder rbd_dev->watch_event = NULL; 134859c2be1eSYehuda Sadeh fail: 134959c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 135059c2be1eSYehuda Sadeh return ret; 135159c2be1eSYehuda Sadeh } 135259c2be1eSYehuda Sadeh 135379e3057cSYehuda Sadeh /* 135479e3057cSYehuda Sadeh * Request sync osd unwatch 135579e3057cSYehuda Sadeh */ 1356070c633fSAlex Elder static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) 135779e3057cSYehuda Sadeh { 135879e3057cSYehuda Sadeh struct ceph_osd_req_op *ops; 135957cfc106SAlex Elder int ret; 136079e3057cSYehuda Sadeh 136157cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 136257cfc106SAlex Elder if (!ops) 136357cfc106SAlex Elder return -ENOMEM; 136479e3057cSYehuda Sadeh 136579e3057cSYehuda Sadeh ops[0].watch.ver = 0; 13660ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 136779e3057cSYehuda Sadeh ops[0].watch.flag = 0; 136879e3057cSYehuda Sadeh 13690ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 137079e3057cSYehuda Sadeh CEPH_NOSNAP, 137179e3057cSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 137279e3057cSYehuda Sadeh ops, 1373070c633fSAlex Elder rbd_dev->header_name, 1374070c633fSAlex Elder 0, 0, NULL, NULL, NULL); 1375070c633fSAlex Elder 137679e3057cSYehuda Sadeh 137779e3057cSYehuda Sadeh rbd_destroy_ops(ops); 13780ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 13790ce1a794SAlex Elder rbd_dev->watch_event = NULL; 138079e3057cSYehuda Sadeh return ret; 138179e3057cSYehuda Sadeh } 138279e3057cSYehuda Sadeh 138359c2be1eSYehuda Sadeh struct rbd_notify_info { 13840ce1a794SAlex Elder struct rbd_device *rbd_dev; 138559c2be1eSYehuda Sadeh }; 138659c2be1eSYehuda Sadeh 138759c2be1eSYehuda Sadeh static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 138859c2be1eSYehuda Sadeh { 13890ce1a794SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 13900ce1a794SAlex Elder if (!rbd_dev) 139159c2be1eSYehuda Sadeh return; 139259c2be1eSYehuda Sadeh 1393bd919d45SAlex Elder dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n", 1394bd919d45SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1395bd919d45SAlex Elder (unsigned int) opcode); 139659c2be1eSYehuda Sadeh } 139759c2be1eSYehuda Sadeh 139859c2be1eSYehuda Sadeh /* 139959c2be1eSYehuda Sadeh * Request sync osd notify 140059c2be1eSYehuda Sadeh */ 14014cb16250SAlex Elder static int rbd_req_sync_notify(struct rbd_device *rbd_dev) 140259c2be1eSYehuda Sadeh { 140359c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 14040ce1a794SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 140559c2be1eSYehuda Sadeh struct ceph_osd_event *event; 140659c2be1eSYehuda Sadeh struct rbd_notify_info info; 140759c2be1eSYehuda Sadeh int payload_len = sizeof(u32) + sizeof(u32); 140859c2be1eSYehuda Sadeh int ret; 140959c2be1eSYehuda Sadeh 141057cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len); 141157cfc106SAlex Elder if (!ops) 141257cfc106SAlex Elder return -ENOMEM; 141359c2be1eSYehuda Sadeh 14140ce1a794SAlex Elder info.rbd_dev = rbd_dev; 141559c2be1eSYehuda Sadeh 141659c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, 141759c2be1eSYehuda Sadeh (void *)&info, &event); 141859c2be1eSYehuda Sadeh if (ret < 0) 141959c2be1eSYehuda Sadeh goto fail; 142059c2be1eSYehuda Sadeh 142159c2be1eSYehuda Sadeh ops[0].watch.ver = 1; 142259c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 142359c2be1eSYehuda Sadeh ops[0].watch.cookie = event->cookie; 142459c2be1eSYehuda Sadeh ops[0].watch.prot_ver = RADOS_NOTIFY_VER; 142559c2be1eSYehuda Sadeh ops[0].watch.timeout = 12; 142659c2be1eSYehuda Sadeh 14270ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 142859c2be1eSYehuda Sadeh CEPH_NOSNAP, 142959c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 143059c2be1eSYehuda Sadeh ops, 14314cb16250SAlex Elder rbd_dev->header_name, 14324cb16250SAlex Elder 0, 0, NULL, NULL, NULL); 143359c2be1eSYehuda Sadeh if (ret < 0) 143459c2be1eSYehuda Sadeh goto fail_event; 143559c2be1eSYehuda Sadeh 143659c2be1eSYehuda Sadeh ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT); 143759c2be1eSYehuda Sadeh dout("ceph_osdc_wait_event returned %d\n", ret); 143859c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 143959c2be1eSYehuda Sadeh return 0; 144059c2be1eSYehuda Sadeh 144159c2be1eSYehuda Sadeh fail_event: 144259c2be1eSYehuda Sadeh ceph_osdc_cancel_event(event); 144359c2be1eSYehuda Sadeh fail: 144459c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 144559c2be1eSYehuda Sadeh return ret; 144659c2be1eSYehuda Sadeh } 144759c2be1eSYehuda Sadeh 144859c2be1eSYehuda Sadeh /* 1449602adf40SYehuda Sadeh * Request sync osd read 1450602adf40SYehuda Sadeh */ 14510ce1a794SAlex Elder static int rbd_req_sync_exec(struct rbd_device *rbd_dev, 1452aded07eaSAlex Elder const char *object_name, 1453aded07eaSAlex Elder const char *class_name, 1454aded07eaSAlex Elder const char *method_name, 1455602adf40SYehuda Sadeh const char *data, 145659c2be1eSYehuda Sadeh int len, 145759c2be1eSYehuda Sadeh u64 *ver) 1458602adf40SYehuda Sadeh { 1459602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1460aded07eaSAlex Elder int class_name_len = strlen(class_name); 1461aded07eaSAlex Elder int method_name_len = strlen(method_name); 146257cfc106SAlex Elder int ret; 146357cfc106SAlex Elder 146457cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, 1465aded07eaSAlex Elder class_name_len + method_name_len + len); 146657cfc106SAlex Elder if (!ops) 146757cfc106SAlex Elder return -ENOMEM; 1468602adf40SYehuda Sadeh 1469aded07eaSAlex Elder ops[0].cls.class_name = class_name; 1470aded07eaSAlex Elder ops[0].cls.class_len = (__u8) class_name_len; 1471aded07eaSAlex Elder ops[0].cls.method_name = method_name; 1472aded07eaSAlex Elder ops[0].cls.method_len = (__u8) method_name_len; 1473602adf40SYehuda Sadeh ops[0].cls.argc = 0; 1474602adf40SYehuda Sadeh ops[0].cls.indata = data; 1475602adf40SYehuda Sadeh ops[0].cls.indata_len = len; 1476602adf40SYehuda Sadeh 14770ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 1478602adf40SYehuda Sadeh CEPH_NOSNAP, 1479602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1480602adf40SYehuda Sadeh ops, 1481d1f57ea6SAlex Elder object_name, 0, 0, NULL, NULL, ver); 1482602adf40SYehuda Sadeh 1483602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1484602adf40SYehuda Sadeh 1485602adf40SYehuda Sadeh dout("cls_exec returned %d\n", ret); 1486602adf40SYehuda Sadeh return ret; 1487602adf40SYehuda Sadeh } 1488602adf40SYehuda Sadeh 14891fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) 14901fec7093SYehuda Sadeh { 14911fec7093SYehuda Sadeh struct rbd_req_coll *coll = 14921fec7093SYehuda Sadeh kzalloc(sizeof(struct rbd_req_coll) + 14931fec7093SYehuda Sadeh sizeof(struct rbd_req_status) * num_reqs, 14941fec7093SYehuda Sadeh GFP_ATOMIC); 14951fec7093SYehuda Sadeh 14961fec7093SYehuda Sadeh if (!coll) 14971fec7093SYehuda Sadeh return NULL; 14981fec7093SYehuda Sadeh coll->total = num_reqs; 14991fec7093SYehuda Sadeh kref_init(&coll->kref); 15001fec7093SYehuda Sadeh return coll; 15011fec7093SYehuda Sadeh } 15021fec7093SYehuda Sadeh 1503602adf40SYehuda Sadeh /* 1504602adf40SYehuda Sadeh * block device queue callback 1505602adf40SYehuda Sadeh */ 1506602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q) 1507602adf40SYehuda Sadeh { 1508602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1509602adf40SYehuda Sadeh struct request *rq; 1510602adf40SYehuda Sadeh struct bio_pair *bp = NULL; 1511602adf40SYehuda Sadeh 151200f1f36fSAlex Elder while ((rq = blk_fetch_request(q))) { 1513602adf40SYehuda Sadeh struct bio *bio; 1514602adf40SYehuda Sadeh struct bio *rq_bio, *next_bio = NULL; 1515602adf40SYehuda Sadeh bool do_write; 1516bd919d45SAlex Elder unsigned int size; 1517bd919d45SAlex Elder u64 op_size = 0; 1518602adf40SYehuda Sadeh u64 ofs; 15191fec7093SYehuda Sadeh int num_segs, cur_seg = 0; 15201fec7093SYehuda Sadeh struct rbd_req_coll *coll; 1521d1d25646SJosh Durgin struct ceph_snap_context *snapc; 1522602adf40SYehuda Sadeh 1523602adf40SYehuda Sadeh dout("fetched request\n"); 1524602adf40SYehuda Sadeh 1525602adf40SYehuda Sadeh /* filter out block requests we don't understand */ 1526602adf40SYehuda Sadeh if ((rq->cmd_type != REQ_TYPE_FS)) { 1527602adf40SYehuda Sadeh __blk_end_request_all(rq, 0); 152800f1f36fSAlex Elder continue; 1529602adf40SYehuda Sadeh } 1530602adf40SYehuda Sadeh 1531602adf40SYehuda Sadeh /* deduce our operation (read, write) */ 1532602adf40SYehuda Sadeh do_write = (rq_data_dir(rq) == WRITE); 1533602adf40SYehuda Sadeh 1534602adf40SYehuda Sadeh size = blk_rq_bytes(rq); 1535593a9e7bSAlex Elder ofs = blk_rq_pos(rq) * SECTOR_SIZE; 1536602adf40SYehuda Sadeh rq_bio = rq->bio; 1537602adf40SYehuda Sadeh if (do_write && rbd_dev->read_only) { 1538602adf40SYehuda Sadeh __blk_end_request_all(rq, -EROFS); 153900f1f36fSAlex Elder continue; 1540602adf40SYehuda Sadeh } 1541602adf40SYehuda Sadeh 1542602adf40SYehuda Sadeh spin_unlock_irq(q->queue_lock); 1543602adf40SYehuda Sadeh 1544e88a36ecSJosh Durgin down_read(&rbd_dev->header_rwsem); 1545e88a36ecSJosh Durgin 1546d1d25646SJosh Durgin if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) { 1547d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1548e88a36ecSJosh Durgin dout("request for non-existent snapshot"); 1549e88a36ecSJosh Durgin spin_lock_irq(q->queue_lock); 1550e88a36ecSJosh Durgin __blk_end_request_all(rq, -ENXIO); 1551e88a36ecSJosh Durgin continue; 1552e88a36ecSJosh Durgin } 1553d1d25646SJosh Durgin 1554d1d25646SJosh Durgin snapc = ceph_get_snap_context(rbd_dev->header.snapc); 1555d1d25646SJosh Durgin 1556d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1557e88a36ecSJosh Durgin 1558602adf40SYehuda Sadeh dout("%s 0x%x bytes at 0x%llx\n", 1559602adf40SYehuda Sadeh do_write ? "write" : "read", 1560bd919d45SAlex Elder size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); 1561602adf40SYehuda Sadeh 15621fec7093SYehuda Sadeh num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 1563df111be6SAlex Elder if (num_segs <= 0) { 1564df111be6SAlex Elder spin_lock_irq(q->queue_lock); 1565df111be6SAlex Elder __blk_end_request_all(rq, num_segs); 1566df111be6SAlex Elder ceph_put_snap_context(snapc); 1567df111be6SAlex Elder continue; 1568df111be6SAlex Elder } 15691fec7093SYehuda Sadeh coll = rbd_alloc_coll(num_segs); 15701fec7093SYehuda Sadeh if (!coll) { 15711fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 15721fec7093SYehuda Sadeh __blk_end_request_all(rq, -ENOMEM); 1573d1d25646SJosh Durgin ceph_put_snap_context(snapc); 157400f1f36fSAlex Elder continue; 15751fec7093SYehuda Sadeh } 15761fec7093SYehuda Sadeh 1577602adf40SYehuda Sadeh do { 1578602adf40SYehuda Sadeh /* a bio clone to be passed down to OSD req */ 1579bd919d45SAlex Elder dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); 158065ccfe21SAlex Elder op_size = rbd_segment_length(rbd_dev, ofs, size); 15811fec7093SYehuda Sadeh kref_get(&coll->kref); 1582602adf40SYehuda Sadeh bio = bio_chain_clone(&rq_bio, &next_bio, &bp, 1583602adf40SYehuda Sadeh op_size, GFP_ATOMIC); 1584602adf40SYehuda Sadeh if (!bio) { 15851fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, cur_seg, 15861fec7093SYehuda Sadeh -ENOMEM, op_size); 15871fec7093SYehuda Sadeh goto next_seg; 1588602adf40SYehuda Sadeh } 1589602adf40SYehuda Sadeh 15901fec7093SYehuda Sadeh 1591602adf40SYehuda Sadeh /* init OSD command: write or read */ 1592602adf40SYehuda Sadeh if (do_write) 1593602adf40SYehuda Sadeh rbd_req_write(rq, rbd_dev, 1594d1d25646SJosh Durgin snapc, 1595602adf40SYehuda Sadeh ofs, 15961fec7093SYehuda Sadeh op_size, bio, 15971fec7093SYehuda Sadeh coll, cur_seg); 1598602adf40SYehuda Sadeh else 1599602adf40SYehuda Sadeh rbd_req_read(rq, rbd_dev, 160077dfe99fSJosh Durgin rbd_dev->snap_id, 1601602adf40SYehuda Sadeh ofs, 16021fec7093SYehuda Sadeh op_size, bio, 16031fec7093SYehuda Sadeh coll, cur_seg); 1604602adf40SYehuda Sadeh 16051fec7093SYehuda Sadeh next_seg: 1606602adf40SYehuda Sadeh size -= op_size; 1607602adf40SYehuda Sadeh ofs += op_size; 1608602adf40SYehuda Sadeh 16091fec7093SYehuda Sadeh cur_seg++; 1610602adf40SYehuda Sadeh rq_bio = next_bio; 1611602adf40SYehuda Sadeh } while (size > 0); 16121fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 1613602adf40SYehuda Sadeh 1614602adf40SYehuda Sadeh if (bp) 1615602adf40SYehuda Sadeh bio_pair_release(bp); 1616602adf40SYehuda Sadeh spin_lock_irq(q->queue_lock); 1617d1d25646SJosh Durgin 1618d1d25646SJosh Durgin ceph_put_snap_context(snapc); 1619602adf40SYehuda Sadeh } 1620602adf40SYehuda Sadeh } 1621602adf40SYehuda Sadeh 1622602adf40SYehuda Sadeh /* 1623602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 1624602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 1625602adf40SYehuda Sadeh * which we handle later at bio_chain_clone 1626602adf40SYehuda Sadeh */ 1627602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 1628602adf40SYehuda Sadeh struct bio_vec *bvec) 1629602adf40SYehuda Sadeh { 1630602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1631593a9e7bSAlex Elder unsigned int chunk_sectors; 1632593a9e7bSAlex Elder sector_t sector; 1633593a9e7bSAlex Elder unsigned int bio_sectors; 1634602adf40SYehuda Sadeh int max; 1635602adf40SYehuda Sadeh 1636593a9e7bSAlex Elder chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); 1637593a9e7bSAlex Elder sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); 1638593a9e7bSAlex Elder bio_sectors = bmd->bi_size >> SECTOR_SHIFT; 1639593a9e7bSAlex Elder 1640602adf40SYehuda Sadeh max = (chunk_sectors - ((sector & (chunk_sectors - 1)) 1641593a9e7bSAlex Elder + bio_sectors)) << SECTOR_SHIFT; 1642602adf40SYehuda Sadeh if (max < 0) 1643602adf40SYehuda Sadeh max = 0; /* bio_add cannot handle a negative return */ 1644602adf40SYehuda Sadeh if (max <= bvec->bv_len && bio_sectors == 0) 1645602adf40SYehuda Sadeh return bvec->bv_len; 1646602adf40SYehuda Sadeh return max; 1647602adf40SYehuda Sadeh } 1648602adf40SYehuda Sadeh 1649602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 1650602adf40SYehuda Sadeh { 1651602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 1652602adf40SYehuda Sadeh 1653602adf40SYehuda Sadeh if (!disk) 1654602adf40SYehuda Sadeh return; 1655602adf40SYehuda Sadeh 1656602adf40SYehuda Sadeh rbd_header_free(&rbd_dev->header); 1657602adf40SYehuda Sadeh 1658602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 1659602adf40SYehuda Sadeh del_gendisk(disk); 1660602adf40SYehuda Sadeh if (disk->queue) 1661602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 1662602adf40SYehuda Sadeh put_disk(disk); 1663602adf40SYehuda Sadeh } 1664602adf40SYehuda Sadeh 1665602adf40SYehuda Sadeh /* 16664156d998SAlex Elder * Read the complete header for the given rbd device. 16674156d998SAlex Elder * 16684156d998SAlex Elder * Returns a pointer to a dynamically-allocated buffer containing 16694156d998SAlex Elder * the complete and validated header. Caller can pass the address 16704156d998SAlex Elder * of a variable that will be filled in with the version of the 16714156d998SAlex Elder * header object at the time it was read. 16724156d998SAlex Elder * 16734156d998SAlex Elder * Returns a pointer-coded errno if a failure occurs. 16744156d998SAlex Elder */ 16754156d998SAlex Elder static struct rbd_image_header_ondisk * 16764156d998SAlex Elder rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) 16774156d998SAlex Elder { 16784156d998SAlex Elder struct rbd_image_header_ondisk *ondisk = NULL; 16794156d998SAlex Elder u32 snap_count = 0; 16804156d998SAlex Elder u64 names_size = 0; 16814156d998SAlex Elder u32 want_count; 16824156d998SAlex Elder int ret; 16834156d998SAlex Elder 16844156d998SAlex Elder /* 16854156d998SAlex Elder * The complete header will include an array of its 64-bit 16864156d998SAlex Elder * snapshot ids, followed by the names of those snapshots as 16874156d998SAlex Elder * a contiguous block of NUL-terminated strings. Note that 16884156d998SAlex Elder * the number of snapshots could change by the time we read 16894156d998SAlex Elder * it in, in which case we re-read it. 16904156d998SAlex Elder */ 16914156d998SAlex Elder do { 16924156d998SAlex Elder size_t size; 16934156d998SAlex Elder 16944156d998SAlex Elder kfree(ondisk); 16954156d998SAlex Elder 16964156d998SAlex Elder size = sizeof (*ondisk); 16974156d998SAlex Elder size += snap_count * sizeof (struct rbd_image_snap_ondisk); 16984156d998SAlex Elder size += names_size; 16994156d998SAlex Elder ondisk = kmalloc(size, GFP_KERNEL); 17004156d998SAlex Elder if (!ondisk) 17014156d998SAlex Elder return ERR_PTR(-ENOMEM); 17024156d998SAlex Elder 17034156d998SAlex Elder ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, 17044156d998SAlex Elder rbd_dev->header_name, 17054156d998SAlex Elder 0, size, 17064156d998SAlex Elder (char *) ondisk, version); 17074156d998SAlex Elder 17084156d998SAlex Elder if (ret < 0) 17094156d998SAlex Elder goto out_err; 17104156d998SAlex Elder if (WARN_ON((size_t) ret < size)) { 17114156d998SAlex Elder ret = -ENXIO; 17124156d998SAlex Elder pr_warning("short header read for image %s" 17134156d998SAlex Elder " (want %zd got %d)\n", 17144156d998SAlex Elder rbd_dev->image_name, size, ret); 17154156d998SAlex Elder goto out_err; 17164156d998SAlex Elder } 17174156d998SAlex Elder if (!rbd_dev_ondisk_valid(ondisk)) { 17184156d998SAlex Elder ret = -ENXIO; 17194156d998SAlex Elder pr_warning("invalid header for image %s\n", 17204156d998SAlex Elder rbd_dev->image_name); 17214156d998SAlex Elder goto out_err; 17224156d998SAlex Elder } 17234156d998SAlex Elder 17244156d998SAlex Elder names_size = le64_to_cpu(ondisk->snap_names_len); 17254156d998SAlex Elder want_count = snap_count; 17264156d998SAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 17274156d998SAlex Elder } while (snap_count != want_count); 17284156d998SAlex Elder 17294156d998SAlex Elder return ondisk; 17304156d998SAlex Elder 17314156d998SAlex Elder out_err: 17324156d998SAlex Elder kfree(ondisk); 17334156d998SAlex Elder 17344156d998SAlex Elder return ERR_PTR(ret); 17354156d998SAlex Elder } 17364156d998SAlex Elder 17374156d998SAlex Elder /* 1738602adf40SYehuda Sadeh * reload the ondisk the header 1739602adf40SYehuda Sadeh */ 1740602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 1741602adf40SYehuda Sadeh struct rbd_image_header *header) 1742602adf40SYehuda Sadeh { 17434156d998SAlex Elder struct rbd_image_header_ondisk *ondisk; 17444156d998SAlex Elder u64 ver = 0; 17454156d998SAlex Elder int ret; 1746602adf40SYehuda Sadeh 17474156d998SAlex Elder ondisk = rbd_dev_v1_header_read(rbd_dev, &ver); 17484156d998SAlex Elder if (IS_ERR(ondisk)) 17494156d998SAlex Elder return PTR_ERR(ondisk); 17504156d998SAlex Elder ret = rbd_header_from_disk(header, ondisk); 17514156d998SAlex Elder if (ret >= 0) 175259c2be1eSYehuda Sadeh header->obj_version = ver; 17534156d998SAlex Elder kfree(ondisk); 1754602adf40SYehuda Sadeh 17554156d998SAlex Elder return ret; 1756602adf40SYehuda Sadeh } 1757602adf40SYehuda Sadeh 1758602adf40SYehuda Sadeh /* 1759602adf40SYehuda Sadeh * create a snapshot 1760602adf40SYehuda Sadeh */ 17610ce1a794SAlex Elder static int rbd_header_add_snap(struct rbd_device *rbd_dev, 1762602adf40SYehuda Sadeh const char *snap_name, 1763602adf40SYehuda Sadeh gfp_t gfp_flags) 1764602adf40SYehuda Sadeh { 1765602adf40SYehuda Sadeh int name_len = strlen(snap_name); 1766602adf40SYehuda Sadeh u64 new_snapid; 1767602adf40SYehuda Sadeh int ret; 1768916d4d67SSage Weil void *data, *p, *e; 17691dbb4399SAlex Elder struct ceph_mon_client *monc; 1770602adf40SYehuda Sadeh 1771602adf40SYehuda Sadeh /* we should create a snapshot only if we're pointing at the head */ 17720ce1a794SAlex Elder if (rbd_dev->snap_id != CEPH_NOSNAP) 1773602adf40SYehuda Sadeh return -EINVAL; 1774602adf40SYehuda Sadeh 17750ce1a794SAlex Elder monc = &rbd_dev->rbd_client->client->monc; 17760ce1a794SAlex Elder ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid); 1777bd919d45SAlex Elder dout("created snapid=%llu\n", (unsigned long long) new_snapid); 1778602adf40SYehuda Sadeh if (ret < 0) 1779602adf40SYehuda Sadeh return ret; 1780602adf40SYehuda Sadeh 1781602adf40SYehuda Sadeh data = kmalloc(name_len + 16, gfp_flags); 1782602adf40SYehuda Sadeh if (!data) 1783602adf40SYehuda Sadeh return -ENOMEM; 1784602adf40SYehuda Sadeh 1785916d4d67SSage Weil p = data; 1786916d4d67SSage Weil e = data + name_len + 16; 1787602adf40SYehuda Sadeh 1788916d4d67SSage Weil ceph_encode_string_safe(&p, e, snap_name, name_len, bad); 1789916d4d67SSage Weil ceph_encode_64_safe(&p, e, new_snapid, bad); 1790602adf40SYehuda Sadeh 17910bed54dcSAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 17920ce1a794SAlex Elder "rbd", "snap_add", 1793d67d4be5SAlex Elder data, p - data, NULL); 1794602adf40SYehuda Sadeh 1795916d4d67SSage Weil kfree(data); 1796602adf40SYehuda Sadeh 1797505cbb9bSAlex Elder return ret < 0 ? ret : 0; 1798602adf40SYehuda Sadeh bad: 1799602adf40SYehuda Sadeh return -ERANGE; 1800602adf40SYehuda Sadeh } 1801602adf40SYehuda Sadeh 1802dfc5606dSYehuda Sadeh static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) 1803dfc5606dSYehuda Sadeh { 1804dfc5606dSYehuda Sadeh struct rbd_snap *snap; 1805a0593290SAlex Elder struct rbd_snap *next; 1806dfc5606dSYehuda Sadeh 1807a0593290SAlex Elder list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) 180814e7085dSAlex Elder __rbd_remove_snap_dev(snap); 1809dfc5606dSYehuda Sadeh } 1810dfc5606dSYehuda Sadeh 1811602adf40SYehuda Sadeh /* 1812602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 1813602adf40SYehuda Sadeh */ 1814b813623aSAlex Elder static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) 1815602adf40SYehuda Sadeh { 1816602adf40SYehuda Sadeh int ret; 1817602adf40SYehuda Sadeh struct rbd_image_header h; 1818602adf40SYehuda Sadeh 1819602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 1820602adf40SYehuda Sadeh if (ret < 0) 1821602adf40SYehuda Sadeh return ret; 1822602adf40SYehuda Sadeh 1823a51aa0c0SJosh Durgin down_write(&rbd_dev->header_rwsem); 1824a51aa0c0SJosh Durgin 18259db4b3e3SSage Weil /* resized? */ 1826474ef7ceSJosh Durgin if (rbd_dev->snap_id == CEPH_NOSNAP) { 1827474ef7ceSJosh Durgin sector_t size = (sector_t) h.image_size / SECTOR_SIZE; 1828474ef7ceSJosh Durgin 1829474ef7ceSJosh Durgin dout("setting size to %llu sectors", (unsigned long long) size); 1830474ef7ceSJosh Durgin set_capacity(rbd_dev->disk, size); 1831474ef7ceSJosh Durgin } 18329db4b3e3SSage Weil 1833849b4260SAlex Elder /* rbd_dev->header.object_prefix shouldn't change */ 1834602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 1835849b4260SAlex Elder kfree(rbd_dev->header.snap_names); 1836d1d25646SJosh Durgin /* osd requests may still refer to snapc */ 1837d1d25646SJosh Durgin ceph_put_snap_context(rbd_dev->header.snapc); 1838602adf40SYehuda Sadeh 1839b813623aSAlex Elder if (hver) 1840b813623aSAlex Elder *hver = h.obj_version; 1841a71b891bSJosh Durgin rbd_dev->header.obj_version = h.obj_version; 184293a24e08SJosh Durgin rbd_dev->header.image_size = h.image_size; 1843602adf40SYehuda Sadeh rbd_dev->header.total_snaps = h.total_snaps; 1844602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 1845602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 1846602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 1847849b4260SAlex Elder /* Free the extra copy of the object prefix */ 1848849b4260SAlex Elder WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); 1849849b4260SAlex Elder kfree(h.object_prefix); 1850849b4260SAlex Elder 18519fcbb800SAlex Elder ret = rbd_dev_snap_devs_update(rbd_dev); 1852dfc5606dSYehuda Sadeh 1853c666601aSJosh Durgin up_write(&rbd_dev->header_rwsem); 1854602adf40SYehuda Sadeh 1855dfc5606dSYehuda Sadeh return ret; 1856602adf40SYehuda Sadeh } 1857602adf40SYehuda Sadeh 18581fe5e993SAlex Elder static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) 18591fe5e993SAlex Elder { 18601fe5e993SAlex Elder int ret; 18611fe5e993SAlex Elder 18621fe5e993SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 18631fe5e993SAlex Elder ret = __rbd_refresh_header(rbd_dev, hver); 18641fe5e993SAlex Elder mutex_unlock(&ctl_mutex); 18651fe5e993SAlex Elder 18661fe5e993SAlex Elder return ret; 18671fe5e993SAlex Elder } 18681fe5e993SAlex Elder 1869602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 1870602adf40SYehuda Sadeh { 1871602adf40SYehuda Sadeh struct gendisk *disk; 1872602adf40SYehuda Sadeh struct request_queue *q; 1873602adf40SYehuda Sadeh int rc; 1874593a9e7bSAlex Elder u64 segment_size; 1875602adf40SYehuda Sadeh u64 total_size = 0; 1876602adf40SYehuda Sadeh 1877602adf40SYehuda Sadeh /* contact OSD, request size info about the object being mapped */ 1878602adf40SYehuda Sadeh rc = rbd_read_header(rbd_dev, &rbd_dev->header); 1879602adf40SYehuda Sadeh if (rc) 1880602adf40SYehuda Sadeh return rc; 1881602adf40SYehuda Sadeh 1882dfc5606dSYehuda Sadeh /* no need to lock here, as rbd_dev is not registered yet */ 18839fcbb800SAlex Elder rc = rbd_dev_snap_devs_update(rbd_dev); 1884dfc5606dSYehuda Sadeh if (rc) 1885dfc5606dSYehuda Sadeh return rc; 1886dfc5606dSYehuda Sadeh 1887cc9d734cSJosh Durgin rc = rbd_header_set_snap(rbd_dev, &total_size); 1888602adf40SYehuda Sadeh if (rc) 1889602adf40SYehuda Sadeh return rc; 1890602adf40SYehuda Sadeh 1891602adf40SYehuda Sadeh /* create gendisk info */ 1892602adf40SYehuda Sadeh rc = -ENOMEM; 1893602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 1894602adf40SYehuda Sadeh if (!disk) 1895602adf40SYehuda Sadeh goto out; 1896602adf40SYehuda Sadeh 1897f0f8cef5SAlex Elder snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 1898de71a297SAlex Elder rbd_dev->dev_id); 1899602adf40SYehuda Sadeh disk->major = rbd_dev->major; 1900602adf40SYehuda Sadeh disk->first_minor = 0; 1901602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 1902602adf40SYehuda Sadeh disk->private_data = rbd_dev; 1903602adf40SYehuda Sadeh 1904602adf40SYehuda Sadeh /* init rq */ 1905602adf40SYehuda Sadeh rc = -ENOMEM; 1906602adf40SYehuda Sadeh q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); 1907602adf40SYehuda Sadeh if (!q) 1908602adf40SYehuda Sadeh goto out_disk; 1909029bcbd8SJosh Durgin 1910593a9e7bSAlex Elder /* We use the default size, but let's be explicit about it. */ 1911593a9e7bSAlex Elder blk_queue_physical_block_size(q, SECTOR_SIZE); 1912593a9e7bSAlex Elder 1913029bcbd8SJosh Durgin /* set io sizes to object size */ 1914593a9e7bSAlex Elder segment_size = rbd_obj_bytes(&rbd_dev->header); 1915593a9e7bSAlex Elder blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 1916593a9e7bSAlex Elder blk_queue_max_segment_size(q, segment_size); 1917593a9e7bSAlex Elder blk_queue_io_min(q, segment_size); 1918593a9e7bSAlex Elder blk_queue_io_opt(q, segment_size); 1919029bcbd8SJosh Durgin 1920602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 1921602adf40SYehuda Sadeh disk->queue = q; 1922602adf40SYehuda Sadeh 1923602adf40SYehuda Sadeh q->queuedata = rbd_dev; 1924602adf40SYehuda Sadeh 1925602adf40SYehuda Sadeh rbd_dev->disk = disk; 1926602adf40SYehuda Sadeh rbd_dev->q = q; 1927602adf40SYehuda Sadeh 1928602adf40SYehuda Sadeh /* finally, announce the disk to the world */ 1929593a9e7bSAlex Elder set_capacity(disk, total_size / SECTOR_SIZE); 1930602adf40SYehuda Sadeh add_disk(disk); 1931602adf40SYehuda Sadeh 1932602adf40SYehuda Sadeh pr_info("%s: added with size 0x%llx\n", 1933602adf40SYehuda Sadeh disk->disk_name, (unsigned long long)total_size); 1934602adf40SYehuda Sadeh return 0; 1935602adf40SYehuda Sadeh 1936602adf40SYehuda Sadeh out_disk: 1937602adf40SYehuda Sadeh put_disk(disk); 1938602adf40SYehuda Sadeh out: 1939602adf40SYehuda Sadeh return rc; 1940602adf40SYehuda Sadeh } 1941602adf40SYehuda Sadeh 1942dfc5606dSYehuda Sadeh /* 1943dfc5606dSYehuda Sadeh sysfs 1944dfc5606dSYehuda Sadeh */ 1945602adf40SYehuda Sadeh 1946593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev) 1947593a9e7bSAlex Elder { 1948593a9e7bSAlex Elder return container_of(dev, struct rbd_device, dev); 1949593a9e7bSAlex Elder } 1950593a9e7bSAlex Elder 1951dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 1952dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1953602adf40SYehuda Sadeh { 1954593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1955a51aa0c0SJosh Durgin sector_t size; 1956dfc5606dSYehuda Sadeh 1957a51aa0c0SJosh Durgin down_read(&rbd_dev->header_rwsem); 1958a51aa0c0SJosh Durgin size = get_capacity(rbd_dev->disk); 1959a51aa0c0SJosh Durgin up_read(&rbd_dev->header_rwsem); 1960a51aa0c0SJosh Durgin 1961a51aa0c0SJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); 1962602adf40SYehuda Sadeh } 1963602adf40SYehuda Sadeh 1964dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 1965dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1966602adf40SYehuda Sadeh { 1967593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1968dfc5606dSYehuda Sadeh 1969dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 1970dfc5606dSYehuda Sadeh } 1971dfc5606dSYehuda Sadeh 1972dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 1973dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1974dfc5606dSYehuda Sadeh { 1975593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1976dfc5606dSYehuda Sadeh 19771dbb4399SAlex Elder return sprintf(buf, "client%lld\n", 19781dbb4399SAlex Elder ceph_client_id(rbd_dev->rbd_client->client)); 1979dfc5606dSYehuda Sadeh } 1980dfc5606dSYehuda Sadeh 1981dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 1982dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1983dfc5606dSYehuda Sadeh { 1984593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1985dfc5606dSYehuda Sadeh 1986dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->pool_name); 1987dfc5606dSYehuda Sadeh } 1988dfc5606dSYehuda Sadeh 19899bb2f334SAlex Elder static ssize_t rbd_pool_id_show(struct device *dev, 19909bb2f334SAlex Elder struct device_attribute *attr, char *buf) 19919bb2f334SAlex Elder { 19929bb2f334SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 19939bb2f334SAlex Elder 19949bb2f334SAlex Elder return sprintf(buf, "%d\n", rbd_dev->pool_id); 19959bb2f334SAlex Elder } 19969bb2f334SAlex Elder 1997dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 1998dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1999dfc5606dSYehuda Sadeh { 2000593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2001dfc5606dSYehuda Sadeh 20020bed54dcSAlex Elder return sprintf(buf, "%s\n", rbd_dev->image_name); 2003dfc5606dSYehuda Sadeh } 2004dfc5606dSYehuda Sadeh 2005dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 2006dfc5606dSYehuda Sadeh struct device_attribute *attr, 2007dfc5606dSYehuda Sadeh char *buf) 2008dfc5606dSYehuda Sadeh { 2009593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2010dfc5606dSYehuda Sadeh 2011dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->snap_name); 2012dfc5606dSYehuda Sadeh } 2013dfc5606dSYehuda Sadeh 2014dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 2015dfc5606dSYehuda Sadeh struct device_attribute *attr, 2016dfc5606dSYehuda Sadeh const char *buf, 2017dfc5606dSYehuda Sadeh size_t size) 2018dfc5606dSYehuda Sadeh { 2019593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2020b813623aSAlex Elder int ret; 2021602adf40SYehuda Sadeh 20221fe5e993SAlex Elder ret = rbd_refresh_header(rbd_dev, NULL); 2023b813623aSAlex Elder 2024b813623aSAlex Elder return ret < 0 ? ret : size; 2025dfc5606dSYehuda Sadeh } 2026602adf40SYehuda Sadeh 2027dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 2028dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 2029dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 2030dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 20319bb2f334SAlex Elder static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 2032dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 2033dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 2034dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 2035dfc5606dSYehuda Sadeh static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add); 2036dfc5606dSYehuda Sadeh 2037dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 2038dfc5606dSYehuda Sadeh &dev_attr_size.attr, 2039dfc5606dSYehuda Sadeh &dev_attr_major.attr, 2040dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 2041dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 20429bb2f334SAlex Elder &dev_attr_pool_id.attr, 2043dfc5606dSYehuda Sadeh &dev_attr_name.attr, 2044dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 2045dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 2046dfc5606dSYehuda Sadeh &dev_attr_create_snap.attr, 2047dfc5606dSYehuda Sadeh NULL 2048dfc5606dSYehuda Sadeh }; 2049dfc5606dSYehuda Sadeh 2050dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 2051dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 2052dfc5606dSYehuda Sadeh }; 2053dfc5606dSYehuda Sadeh 2054dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 2055dfc5606dSYehuda Sadeh &rbd_attr_group, 2056dfc5606dSYehuda Sadeh NULL 2057dfc5606dSYehuda Sadeh }; 2058dfc5606dSYehuda Sadeh 2059dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 2060dfc5606dSYehuda Sadeh { 2061dfc5606dSYehuda Sadeh } 2062dfc5606dSYehuda Sadeh 2063dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 2064dfc5606dSYehuda Sadeh .name = "rbd", 2065dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 2066dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 2067dfc5606dSYehuda Sadeh }; 2068dfc5606dSYehuda Sadeh 2069dfc5606dSYehuda Sadeh 2070dfc5606dSYehuda Sadeh /* 2071dfc5606dSYehuda Sadeh sysfs - snapshots 2072dfc5606dSYehuda Sadeh */ 2073dfc5606dSYehuda Sadeh 2074dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 2075dfc5606dSYehuda Sadeh struct device_attribute *attr, 2076dfc5606dSYehuda Sadeh char *buf) 2077dfc5606dSYehuda Sadeh { 2078dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2079dfc5606dSYehuda Sadeh 20803591538fSJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long)snap->size); 2081dfc5606dSYehuda Sadeh } 2082dfc5606dSYehuda Sadeh 2083dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 2084dfc5606dSYehuda Sadeh struct device_attribute *attr, 2085dfc5606dSYehuda Sadeh char *buf) 2086dfc5606dSYehuda Sadeh { 2087dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2088dfc5606dSYehuda Sadeh 2089593a9e7bSAlex Elder return sprintf(buf, "%llu\n", (unsigned long long)snap->id); 2090dfc5606dSYehuda Sadeh } 2091dfc5606dSYehuda Sadeh 2092dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 2093dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 2094dfc5606dSYehuda Sadeh 2095dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 2096dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 2097dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 2098dfc5606dSYehuda Sadeh NULL, 2099dfc5606dSYehuda Sadeh }; 2100dfc5606dSYehuda Sadeh 2101dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 2102dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 2103dfc5606dSYehuda Sadeh }; 2104dfc5606dSYehuda Sadeh 2105dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 2106dfc5606dSYehuda Sadeh { 2107dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2108dfc5606dSYehuda Sadeh kfree(snap->name); 2109dfc5606dSYehuda Sadeh kfree(snap); 2110dfc5606dSYehuda Sadeh } 2111dfc5606dSYehuda Sadeh 2112dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 2113dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 2114dfc5606dSYehuda Sadeh NULL 2115dfc5606dSYehuda Sadeh }; 2116dfc5606dSYehuda Sadeh 2117dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 2118dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 2119dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 2120dfc5606dSYehuda Sadeh }; 2121dfc5606dSYehuda Sadeh 212214e7085dSAlex Elder static void __rbd_remove_snap_dev(struct rbd_snap *snap) 2123dfc5606dSYehuda Sadeh { 2124dfc5606dSYehuda Sadeh list_del(&snap->node); 2125dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 2126dfc5606dSYehuda Sadeh } 2127dfc5606dSYehuda Sadeh 212814e7085dSAlex Elder static int rbd_register_snap_dev(struct rbd_snap *snap, 2129dfc5606dSYehuda Sadeh struct device *parent) 2130dfc5606dSYehuda Sadeh { 2131dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 2132dfc5606dSYehuda Sadeh int ret; 2133dfc5606dSYehuda Sadeh 2134dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 2135dfc5606dSYehuda Sadeh dev->parent = parent; 2136dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 2137dfc5606dSYehuda Sadeh dev_set_name(dev, "snap_%s", snap->name); 2138dfc5606dSYehuda Sadeh ret = device_register(dev); 2139dfc5606dSYehuda Sadeh 2140dfc5606dSYehuda Sadeh return ret; 2141dfc5606dSYehuda Sadeh } 2142dfc5606dSYehuda Sadeh 21434e891e0aSAlex Elder static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, 21444e891e0aSAlex Elder int i, const char *name) 2145dfc5606dSYehuda Sadeh { 21464e891e0aSAlex Elder struct rbd_snap *snap; 2147dfc5606dSYehuda Sadeh int ret; 21484e891e0aSAlex Elder 21494e891e0aSAlex Elder snap = kzalloc(sizeof (*snap), GFP_KERNEL); 2150dfc5606dSYehuda Sadeh if (!snap) 21514e891e0aSAlex Elder return ERR_PTR(-ENOMEM); 21524e891e0aSAlex Elder 21534e891e0aSAlex Elder ret = -ENOMEM; 2154dfc5606dSYehuda Sadeh snap->name = kstrdup(name, GFP_KERNEL); 21554e891e0aSAlex Elder if (!snap->name) 21564e891e0aSAlex Elder goto err; 21574e891e0aSAlex Elder 2158dfc5606dSYehuda Sadeh snap->size = rbd_dev->header.snap_sizes[i]; 2159dfc5606dSYehuda Sadeh snap->id = rbd_dev->header.snapc->snaps[i]; 2160dfc5606dSYehuda Sadeh if (device_is_registered(&rbd_dev->dev)) { 216114e7085dSAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 2162dfc5606dSYehuda Sadeh if (ret < 0) 2163dfc5606dSYehuda Sadeh goto err; 2164dfc5606dSYehuda Sadeh } 21654e891e0aSAlex Elder 21664e891e0aSAlex Elder return snap; 21674e891e0aSAlex Elder 2168dfc5606dSYehuda Sadeh err: 2169dfc5606dSYehuda Sadeh kfree(snap->name); 2170dfc5606dSYehuda Sadeh kfree(snap); 21714e891e0aSAlex Elder 21724e891e0aSAlex Elder return ERR_PTR(ret); 2173dfc5606dSYehuda Sadeh } 2174dfc5606dSYehuda Sadeh 2175dfc5606dSYehuda Sadeh /* 217635938150SAlex Elder * Scan the rbd device's current snapshot list and compare it to the 217735938150SAlex Elder * newly-received snapshot context. Remove any existing snapshots 217835938150SAlex Elder * not present in the new snapshot context. Add a new snapshot for 217935938150SAlex Elder * any snaphots in the snapshot context not in the current list. 218035938150SAlex Elder * And verify there are no changes to snapshots we already know 218135938150SAlex Elder * about. 218235938150SAlex Elder * 218335938150SAlex Elder * Assumes the snapshots in the snapshot context are sorted by 218435938150SAlex Elder * snapshot id, highest id first. (Snapshots in the rbd_dev's list 218535938150SAlex Elder * are also maintained in that order.) 2186dfc5606dSYehuda Sadeh */ 21879fcbb800SAlex Elder static int rbd_dev_snap_devs_update(struct rbd_device *rbd_dev) 2188dfc5606dSYehuda Sadeh { 218935938150SAlex Elder struct ceph_snap_context *snapc = rbd_dev->header.snapc; 219035938150SAlex Elder const u32 snap_count = snapc->num_snaps; 219135938150SAlex Elder char *snap_name = rbd_dev->header.snap_names; 219235938150SAlex Elder struct list_head *head = &rbd_dev->snaps; 219335938150SAlex Elder struct list_head *links = head->next; 219435938150SAlex Elder u32 index = 0; 2195dfc5606dSYehuda Sadeh 21969fcbb800SAlex Elder dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count); 219735938150SAlex Elder while (index < snap_count || links != head) { 219835938150SAlex Elder u64 snap_id; 219935938150SAlex Elder struct rbd_snap *snap; 2200dfc5606dSYehuda Sadeh 220135938150SAlex Elder snap_id = index < snap_count ? snapc->snaps[index] 220235938150SAlex Elder : CEPH_NOSNAP; 220335938150SAlex Elder snap = links != head ? list_entry(links, struct rbd_snap, node) 220435938150SAlex Elder : NULL; 2205aafb230eSAlex Elder rbd_assert(!snap || snap->id != CEPH_NOSNAP); 2206dfc5606dSYehuda Sadeh 220735938150SAlex Elder if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { 220835938150SAlex Elder struct list_head *next = links->next; 2209dfc5606dSYehuda Sadeh 221035938150SAlex Elder /* Existing snapshot not in the new snap context */ 2211dfc5606dSYehuda Sadeh 221235938150SAlex Elder if (rbd_dev->snap_id == snap->id) 2213e88a36ecSJosh Durgin rbd_dev->snap_exists = false; 221435938150SAlex Elder __rbd_remove_snap_dev(snap); 22159fcbb800SAlex Elder dout("%ssnap id %llu has been removed\n", 22169fcbb800SAlex Elder rbd_dev->snap_id == snap->id ? "mapped " : "", 22179fcbb800SAlex Elder (unsigned long long) snap->id); 2218dfc5606dSYehuda Sadeh 221935938150SAlex Elder /* Done with this list entry; advance */ 222035938150SAlex Elder 222135938150SAlex Elder links = next; 222235938150SAlex Elder continue; 2223dfc5606dSYehuda Sadeh } 222435938150SAlex Elder 22259fcbb800SAlex Elder dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count, 22269fcbb800SAlex Elder (unsigned long long) snap_id); 222735938150SAlex Elder if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) { 222835938150SAlex Elder struct rbd_snap *new_snap; 222935938150SAlex Elder 223035938150SAlex Elder /* We haven't seen this snapshot before */ 223135938150SAlex Elder 223235938150SAlex Elder new_snap = __rbd_add_snap_dev(rbd_dev, index, 223335938150SAlex Elder snap_name); 22349fcbb800SAlex Elder if (IS_ERR(new_snap)) { 22359fcbb800SAlex Elder int err = PTR_ERR(new_snap); 22369fcbb800SAlex Elder 22379fcbb800SAlex Elder dout(" failed to add dev, error %d\n", err); 22389fcbb800SAlex Elder 22399fcbb800SAlex Elder return err; 22409fcbb800SAlex Elder } 224135938150SAlex Elder 224235938150SAlex Elder /* New goes before existing, or at end of list */ 224335938150SAlex Elder 22449fcbb800SAlex Elder dout(" added dev%s\n", snap ? "" : " at end\n"); 224535938150SAlex Elder if (snap) 224635938150SAlex Elder list_add_tail(&new_snap->node, &snap->node); 224735938150SAlex Elder else 2248523f3258SAlex Elder list_add_tail(&new_snap->node, head); 224935938150SAlex Elder } else { 225035938150SAlex Elder /* Already have this one */ 225135938150SAlex Elder 22529fcbb800SAlex Elder dout(" already present\n"); 22539fcbb800SAlex Elder 2254aafb230eSAlex Elder rbd_assert(snap->size == 2255aafb230eSAlex Elder rbd_dev->header.snap_sizes[index]); 2256aafb230eSAlex Elder rbd_assert(!strcmp(snap->name, snap_name)); 225735938150SAlex Elder 225835938150SAlex Elder /* Done with this list entry; advance */ 225935938150SAlex Elder 226035938150SAlex Elder links = links->next; 2261dfc5606dSYehuda Sadeh } 226235938150SAlex Elder 226335938150SAlex Elder /* Advance to the next entry in the snapshot context */ 226435938150SAlex Elder 226535938150SAlex Elder index++; 226635938150SAlex Elder snap_name += strlen(snap_name) + 1; 2267dfc5606dSYehuda Sadeh } 22689fcbb800SAlex Elder dout("%s: done\n", __func__); 2269dfc5606dSYehuda Sadeh 2270dfc5606dSYehuda Sadeh return 0; 2271dfc5606dSYehuda Sadeh } 2272dfc5606dSYehuda Sadeh 2273dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2274dfc5606dSYehuda Sadeh { 2275f0f8cef5SAlex Elder int ret; 2276dfc5606dSYehuda Sadeh struct device *dev; 2277dfc5606dSYehuda Sadeh struct rbd_snap *snap; 2278dfc5606dSYehuda Sadeh 2279dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2280dfc5606dSYehuda Sadeh dev = &rbd_dev->dev; 2281dfc5606dSYehuda Sadeh 2282dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 2283dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 2284dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 2285dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 2286de71a297SAlex Elder dev_set_name(dev, "%d", rbd_dev->dev_id); 2287dfc5606dSYehuda Sadeh ret = device_register(dev); 2288dfc5606dSYehuda Sadeh if (ret < 0) 2289f0f8cef5SAlex Elder goto out; 2290dfc5606dSYehuda Sadeh 2291dfc5606dSYehuda Sadeh list_for_each_entry(snap, &rbd_dev->snaps, node) { 229214e7085dSAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 2293dfc5606dSYehuda Sadeh if (ret < 0) 2294602adf40SYehuda Sadeh break; 2295602adf40SYehuda Sadeh } 2296f0f8cef5SAlex Elder out: 2297dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 2298dfc5606dSYehuda Sadeh return ret; 2299602adf40SYehuda Sadeh } 2300602adf40SYehuda Sadeh 2301dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 2302dfc5606dSYehuda Sadeh { 2303dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 2304dfc5606dSYehuda Sadeh } 2305dfc5606dSYehuda Sadeh 230659c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev) 230759c2be1eSYehuda Sadeh { 230859c2be1eSYehuda Sadeh int ret, rc; 230959c2be1eSYehuda Sadeh 231059c2be1eSYehuda Sadeh do { 23110e6f322dSAlex Elder ret = rbd_req_sync_watch(rbd_dev); 231259c2be1eSYehuda Sadeh if (ret == -ERANGE) { 23131fe5e993SAlex Elder rc = rbd_refresh_header(rbd_dev, NULL); 231459c2be1eSYehuda Sadeh if (rc < 0) 231559c2be1eSYehuda Sadeh return rc; 231659c2be1eSYehuda Sadeh } 231759c2be1eSYehuda Sadeh } while (ret == -ERANGE); 231859c2be1eSYehuda Sadeh 231959c2be1eSYehuda Sadeh return ret; 232059c2be1eSYehuda Sadeh } 232159c2be1eSYehuda Sadeh 2322e2839308SAlex Elder static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); 23231ddbe94eSAlex Elder 23241ddbe94eSAlex Elder /* 2325499afd5bSAlex Elder * Get a unique rbd identifier for the given new rbd_dev, and add 2326499afd5bSAlex Elder * the rbd_dev to the global list. The minimum rbd id is 1. 23271ddbe94eSAlex Elder */ 2328e2839308SAlex Elder static void rbd_dev_id_get(struct rbd_device *rbd_dev) 2329b7f23c36SAlex Elder { 2330e2839308SAlex Elder rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max); 2331499afd5bSAlex Elder 2332499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2333499afd5bSAlex Elder list_add_tail(&rbd_dev->node, &rbd_dev_list); 2334499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 2335e2839308SAlex Elder dout("rbd_dev %p given dev id %llu\n", rbd_dev, 2336e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 2337b7f23c36SAlex Elder } 2338b7f23c36SAlex Elder 23391ddbe94eSAlex Elder /* 2340499afd5bSAlex Elder * Remove an rbd_dev from the global list, and record that its 2341499afd5bSAlex Elder * identifier is no longer in use. 23421ddbe94eSAlex Elder */ 2343e2839308SAlex Elder static void rbd_dev_id_put(struct rbd_device *rbd_dev) 23441ddbe94eSAlex Elder { 2345d184f6bfSAlex Elder struct list_head *tmp; 2346de71a297SAlex Elder int rbd_id = rbd_dev->dev_id; 2347d184f6bfSAlex Elder int max_id; 2348d184f6bfSAlex Elder 2349aafb230eSAlex Elder rbd_assert(rbd_id > 0); 2350499afd5bSAlex Elder 2351e2839308SAlex Elder dout("rbd_dev %p released dev id %llu\n", rbd_dev, 2352e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 2353499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2354499afd5bSAlex Elder list_del_init(&rbd_dev->node); 2355d184f6bfSAlex Elder 2356d184f6bfSAlex Elder /* 2357d184f6bfSAlex Elder * If the id being "put" is not the current maximum, there 2358d184f6bfSAlex Elder * is nothing special we need to do. 2359d184f6bfSAlex Elder */ 2360e2839308SAlex Elder if (rbd_id != atomic64_read(&rbd_dev_id_max)) { 2361d184f6bfSAlex Elder spin_unlock(&rbd_dev_list_lock); 2362d184f6bfSAlex Elder return; 2363d184f6bfSAlex Elder } 2364d184f6bfSAlex Elder 2365d184f6bfSAlex Elder /* 2366d184f6bfSAlex Elder * We need to update the current maximum id. Search the 2367d184f6bfSAlex Elder * list to find out what it is. We're more likely to find 2368d184f6bfSAlex Elder * the maximum at the end, so search the list backward. 2369d184f6bfSAlex Elder */ 2370d184f6bfSAlex Elder max_id = 0; 2371d184f6bfSAlex Elder list_for_each_prev(tmp, &rbd_dev_list) { 2372d184f6bfSAlex Elder struct rbd_device *rbd_dev; 2373d184f6bfSAlex Elder 2374d184f6bfSAlex Elder rbd_dev = list_entry(tmp, struct rbd_device, node); 2375d184f6bfSAlex Elder if (rbd_id > max_id) 2376d184f6bfSAlex Elder max_id = rbd_id; 2377d184f6bfSAlex Elder } 2378499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 23791ddbe94eSAlex Elder 23801ddbe94eSAlex Elder /* 2381e2839308SAlex Elder * The max id could have been updated by rbd_dev_id_get(), in 2382d184f6bfSAlex Elder * which case it now accurately reflects the new maximum. 2383d184f6bfSAlex Elder * Be careful not to overwrite the maximum value in that 2384d184f6bfSAlex Elder * case. 23851ddbe94eSAlex Elder */ 2386e2839308SAlex Elder atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id); 2387e2839308SAlex Elder dout(" max dev id has been reset\n"); 2388b7f23c36SAlex Elder } 2389b7f23c36SAlex Elder 2390a725f65eSAlex Elder /* 2391e28fff26SAlex Elder * Skips over white space at *buf, and updates *buf to point to the 2392e28fff26SAlex Elder * first found non-space character (if any). Returns the length of 2393593a9e7bSAlex Elder * the token (string of non-white space characters) found. Note 2394593a9e7bSAlex Elder * that *buf must be terminated with '\0'. 2395e28fff26SAlex Elder */ 2396e28fff26SAlex Elder static inline size_t next_token(const char **buf) 2397e28fff26SAlex Elder { 2398e28fff26SAlex Elder /* 2399e28fff26SAlex Elder * These are the characters that produce nonzero for 2400e28fff26SAlex Elder * isspace() in the "C" and "POSIX" locales. 2401e28fff26SAlex Elder */ 2402e28fff26SAlex Elder const char *spaces = " \f\n\r\t\v"; 2403e28fff26SAlex Elder 2404e28fff26SAlex Elder *buf += strspn(*buf, spaces); /* Find start of token */ 2405e28fff26SAlex Elder 2406e28fff26SAlex Elder return strcspn(*buf, spaces); /* Return token length */ 2407e28fff26SAlex Elder } 2408e28fff26SAlex Elder 2409e28fff26SAlex Elder /* 2410e28fff26SAlex Elder * Finds the next token in *buf, and if the provided token buffer is 2411e28fff26SAlex Elder * big enough, copies the found token into it. The result, if 2412593a9e7bSAlex Elder * copied, is guaranteed to be terminated with '\0'. Note that *buf 2413593a9e7bSAlex Elder * must be terminated with '\0' on entry. 2414e28fff26SAlex Elder * 2415e28fff26SAlex Elder * Returns the length of the token found (not including the '\0'). 2416e28fff26SAlex Elder * Return value will be 0 if no token is found, and it will be >= 2417e28fff26SAlex Elder * token_size if the token would not fit. 2418e28fff26SAlex Elder * 2419593a9e7bSAlex Elder * The *buf pointer will be updated to point beyond the end of the 2420e28fff26SAlex Elder * found token. Note that this occurs even if the token buffer is 2421e28fff26SAlex Elder * too small to hold it. 2422e28fff26SAlex Elder */ 2423e28fff26SAlex Elder static inline size_t copy_token(const char **buf, 2424e28fff26SAlex Elder char *token, 2425e28fff26SAlex Elder size_t token_size) 2426e28fff26SAlex Elder { 2427e28fff26SAlex Elder size_t len; 2428e28fff26SAlex Elder 2429e28fff26SAlex Elder len = next_token(buf); 2430e28fff26SAlex Elder if (len < token_size) { 2431e28fff26SAlex Elder memcpy(token, *buf, len); 2432e28fff26SAlex Elder *(token + len) = '\0'; 2433e28fff26SAlex Elder } 2434e28fff26SAlex Elder *buf += len; 2435e28fff26SAlex Elder 2436e28fff26SAlex Elder return len; 2437e28fff26SAlex Elder } 2438e28fff26SAlex Elder 2439e28fff26SAlex Elder /* 2440ea3352f4SAlex Elder * Finds the next token in *buf, dynamically allocates a buffer big 2441ea3352f4SAlex Elder * enough to hold a copy of it, and copies the token into the new 2442ea3352f4SAlex Elder * buffer. The copy is guaranteed to be terminated with '\0'. Note 2443ea3352f4SAlex Elder * that a duplicate buffer is created even for a zero-length token. 2444ea3352f4SAlex Elder * 2445ea3352f4SAlex Elder * Returns a pointer to the newly-allocated duplicate, or a null 2446ea3352f4SAlex Elder * pointer if memory for the duplicate was not available. If 2447ea3352f4SAlex Elder * the lenp argument is a non-null pointer, the length of the token 2448ea3352f4SAlex Elder * (not including the '\0') is returned in *lenp. 2449ea3352f4SAlex Elder * 2450ea3352f4SAlex Elder * If successful, the *buf pointer will be updated to point beyond 2451ea3352f4SAlex Elder * the end of the found token. 2452ea3352f4SAlex Elder * 2453ea3352f4SAlex Elder * Note: uses GFP_KERNEL for allocation. 2454ea3352f4SAlex Elder */ 2455ea3352f4SAlex Elder static inline char *dup_token(const char **buf, size_t *lenp) 2456ea3352f4SAlex Elder { 2457ea3352f4SAlex Elder char *dup; 2458ea3352f4SAlex Elder size_t len; 2459ea3352f4SAlex Elder 2460ea3352f4SAlex Elder len = next_token(buf); 2461ea3352f4SAlex Elder dup = kmalloc(len + 1, GFP_KERNEL); 2462ea3352f4SAlex Elder if (!dup) 2463ea3352f4SAlex Elder return NULL; 2464ea3352f4SAlex Elder 2465ea3352f4SAlex Elder memcpy(dup, *buf, len); 2466ea3352f4SAlex Elder *(dup + len) = '\0'; 2467ea3352f4SAlex Elder *buf += len; 2468ea3352f4SAlex Elder 2469ea3352f4SAlex Elder if (lenp) 2470ea3352f4SAlex Elder *lenp = len; 2471ea3352f4SAlex Elder 2472ea3352f4SAlex Elder return dup; 2473ea3352f4SAlex Elder } 2474ea3352f4SAlex Elder 2475ea3352f4SAlex Elder /* 24760bed54dcSAlex Elder * This fills in the pool_name, image_name, image_name_len, snap_name, 2477a725f65eSAlex Elder * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based 2478a725f65eSAlex Elder * on the list of monitor addresses and other options provided via 2479a725f65eSAlex Elder * /sys/bus/rbd/add. 2480d22f76e7SAlex Elder * 2481d22f76e7SAlex Elder * Note: rbd_dev is assumed to have been initially zero-filled. 2482a725f65eSAlex Elder */ 2483a725f65eSAlex Elder static int rbd_add_parse_args(struct rbd_device *rbd_dev, 2484a725f65eSAlex Elder const char *buf, 24857ef3214aSAlex Elder const char **mon_addrs, 24865214ecc4SAlex Elder size_t *mon_addrs_size, 2487e28fff26SAlex Elder char *options, 2488e28fff26SAlex Elder size_t options_size) 2489a725f65eSAlex Elder { 2490e28fff26SAlex Elder size_t len; 2491d22f76e7SAlex Elder int ret; 2492e28fff26SAlex Elder 2493e28fff26SAlex Elder /* The first four tokens are required */ 2494e28fff26SAlex Elder 24957ef3214aSAlex Elder len = next_token(&buf); 24967ef3214aSAlex Elder if (!len) 2497a725f65eSAlex Elder return -EINVAL; 24985214ecc4SAlex Elder *mon_addrs_size = len + 1; 24997ef3214aSAlex Elder *mon_addrs = buf; 25007ef3214aSAlex Elder 25017ef3214aSAlex Elder buf += len; 2502a725f65eSAlex Elder 2503e28fff26SAlex Elder len = copy_token(&buf, options, options_size); 2504e28fff26SAlex Elder if (!len || len >= options_size) 2505e28fff26SAlex Elder return -EINVAL; 2506a725f65eSAlex Elder 2507bf3e5ae1SAlex Elder ret = -ENOMEM; 2508d22f76e7SAlex Elder rbd_dev->pool_name = dup_token(&buf, NULL); 2509d22f76e7SAlex Elder if (!rbd_dev->pool_name) 2510d22f76e7SAlex Elder goto out_err; 2511e28fff26SAlex Elder 25120bed54dcSAlex Elder rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); 25130bed54dcSAlex Elder if (!rbd_dev->image_name) 2514bf3e5ae1SAlex Elder goto out_err; 2515e28fff26SAlex Elder 2516cb8627c7SAlex Elder /* Create the name of the header object */ 2517cb8627c7SAlex Elder 25180bed54dcSAlex Elder rbd_dev->header_name = kmalloc(rbd_dev->image_name_len 2519bf3e5ae1SAlex Elder + sizeof (RBD_SUFFIX), 2520bf3e5ae1SAlex Elder GFP_KERNEL); 25210bed54dcSAlex Elder if (!rbd_dev->header_name) 2522cb8627c7SAlex Elder goto out_err; 25230bed54dcSAlex Elder sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); 2524a725f65eSAlex Elder 2525e28fff26SAlex Elder /* 2526820a5f3eSAlex Elder * The snapshot name is optional. If none is is supplied, 2527820a5f3eSAlex Elder * we use the default value. 2528e28fff26SAlex Elder */ 2529820a5f3eSAlex Elder rbd_dev->snap_name = dup_token(&buf, &len); 2530820a5f3eSAlex Elder if (!rbd_dev->snap_name) 2531820a5f3eSAlex Elder goto out_err; 2532820a5f3eSAlex Elder if (!len) { 2533820a5f3eSAlex Elder /* Replace the empty name with the default */ 2534820a5f3eSAlex Elder kfree(rbd_dev->snap_name); 2535820a5f3eSAlex Elder rbd_dev->snap_name 2536820a5f3eSAlex Elder = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL); 2537820a5f3eSAlex Elder if (!rbd_dev->snap_name) 2538820a5f3eSAlex Elder goto out_err; 2539820a5f3eSAlex Elder 2540e28fff26SAlex Elder memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, 2541e28fff26SAlex Elder sizeof (RBD_SNAP_HEAD_NAME)); 2542849b4260SAlex Elder } 2543e28fff26SAlex Elder 2544a725f65eSAlex Elder return 0; 2545d22f76e7SAlex Elder 2546d22f76e7SAlex Elder out_err: 25470bed54dcSAlex Elder kfree(rbd_dev->header_name); 2548d78fd7aeSAlex Elder rbd_dev->header_name = NULL; 25490bed54dcSAlex Elder kfree(rbd_dev->image_name); 2550d78fd7aeSAlex Elder rbd_dev->image_name = NULL; 2551d78fd7aeSAlex Elder rbd_dev->image_name_len = 0; 2552d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 2553d22f76e7SAlex Elder rbd_dev->pool_name = NULL; 2554d22f76e7SAlex Elder 2555d22f76e7SAlex Elder return ret; 2556a725f65eSAlex Elder } 2557a725f65eSAlex Elder 255859c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 255959c2be1eSYehuda Sadeh const char *buf, 256059c2be1eSYehuda Sadeh size_t count) 2561602adf40SYehuda Sadeh { 2562cb8627c7SAlex Elder char *options; 2563cb8627c7SAlex Elder struct rbd_device *rbd_dev = NULL; 25647ef3214aSAlex Elder const char *mon_addrs = NULL; 25657ef3214aSAlex Elder size_t mon_addrs_size = 0; 256627cc2594SAlex Elder struct ceph_osd_client *osdc; 256727cc2594SAlex Elder int rc = -ENOMEM; 2568602adf40SYehuda Sadeh 2569602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 2570602adf40SYehuda Sadeh return -ENODEV; 2571602adf40SYehuda Sadeh 257227cc2594SAlex Elder options = kmalloc(count, GFP_KERNEL); 257327cc2594SAlex Elder if (!options) 257427cc2594SAlex Elder goto err_nomem; 2575cb8627c7SAlex Elder rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 2576cb8627c7SAlex Elder if (!rbd_dev) 2577cb8627c7SAlex Elder goto err_nomem; 2578602adf40SYehuda Sadeh 2579602adf40SYehuda Sadeh /* static rbd_device initialization */ 2580602adf40SYehuda Sadeh spin_lock_init(&rbd_dev->lock); 2581602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->node); 2582dfc5606dSYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->snaps); 2583c666601aSJosh Durgin init_rwsem(&rbd_dev->header_rwsem); 2584602adf40SYehuda Sadeh 2585d184f6bfSAlex Elder /* generate unique id: find highest unique id, add one */ 2586e2839308SAlex Elder rbd_dev_id_get(rbd_dev); 2587602adf40SYehuda Sadeh 2588a725f65eSAlex Elder /* Fill in the device name, now that we have its id. */ 258981a89793SAlex Elder BUILD_BUG_ON(DEV_NAME_LEN 259081a89793SAlex Elder < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 2591de71a297SAlex Elder sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 2592e124a82fSAlex Elder 2593a725f65eSAlex Elder /* parse add command */ 25947ef3214aSAlex Elder rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, 2595e28fff26SAlex Elder options, count); 2596a725f65eSAlex Elder if (rc) 2597a725f65eSAlex Elder goto err_put_id; 2598a725f65eSAlex Elder 2599f8c38929SAlex Elder rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options); 2600f8c38929SAlex Elder if (rc < 0) 2601f0f8cef5SAlex Elder goto err_put_id; 2602602adf40SYehuda Sadeh 2603602adf40SYehuda Sadeh /* pick the pool */ 26041dbb4399SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 2605602adf40SYehuda Sadeh rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); 2606602adf40SYehuda Sadeh if (rc < 0) 2607602adf40SYehuda Sadeh goto err_out_client; 26089bb2f334SAlex Elder rbd_dev->pool_id = rc; 2609602adf40SYehuda Sadeh 2610602adf40SYehuda Sadeh /* register our block device */ 261127cc2594SAlex Elder rc = register_blkdev(0, rbd_dev->name); 261227cc2594SAlex Elder if (rc < 0) 2613602adf40SYehuda Sadeh goto err_out_client; 261427cc2594SAlex Elder rbd_dev->major = rc; 2615602adf40SYehuda Sadeh 2616dfc5606dSYehuda Sadeh rc = rbd_bus_add_dev(rbd_dev); 2617dfc5606dSYehuda Sadeh if (rc) 2618766fc439SYehuda Sadeh goto err_out_blkdev; 2619766fc439SYehuda Sadeh 262032eec68dSAlex Elder /* 262132eec68dSAlex Elder * At this point cleanup in the event of an error is the job 262232eec68dSAlex Elder * of the sysfs code (initiated by rbd_bus_del_dev()). 262332eec68dSAlex Elder * 262432eec68dSAlex Elder * Set up and announce blkdev mapping. 262532eec68dSAlex Elder */ 2626602adf40SYehuda Sadeh rc = rbd_init_disk(rbd_dev); 2627602adf40SYehuda Sadeh if (rc) 2628766fc439SYehuda Sadeh goto err_out_bus; 2629602adf40SYehuda Sadeh 263059c2be1eSYehuda Sadeh rc = rbd_init_watch_dev(rbd_dev); 263159c2be1eSYehuda Sadeh if (rc) 263259c2be1eSYehuda Sadeh goto err_out_bus; 263359c2be1eSYehuda Sadeh 2634602adf40SYehuda Sadeh return count; 2635602adf40SYehuda Sadeh 2636766fc439SYehuda Sadeh err_out_bus: 2637766fc439SYehuda Sadeh /* this will also clean up rest of rbd_dev stuff */ 2638766fc439SYehuda Sadeh 2639766fc439SYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2640766fc439SYehuda Sadeh kfree(options); 2641766fc439SYehuda Sadeh return rc; 2642766fc439SYehuda Sadeh 2643602adf40SYehuda Sadeh err_out_blkdev: 2644602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 2645602adf40SYehuda Sadeh err_out_client: 2646602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2647f0f8cef5SAlex Elder err_put_id: 2648cb8627c7SAlex Elder if (rbd_dev->pool_name) { 2649820a5f3eSAlex Elder kfree(rbd_dev->snap_name); 26500bed54dcSAlex Elder kfree(rbd_dev->header_name); 26510bed54dcSAlex Elder kfree(rbd_dev->image_name); 2652d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 2653cb8627c7SAlex Elder } 2654e2839308SAlex Elder rbd_dev_id_put(rbd_dev); 265527cc2594SAlex Elder err_nomem: 265627cc2594SAlex Elder kfree(rbd_dev); 2657cb8627c7SAlex Elder kfree(options); 265827cc2594SAlex Elder 2659602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 2660602adf40SYehuda Sadeh module_put(THIS_MODULE); 266127cc2594SAlex Elder 266227cc2594SAlex Elder return (ssize_t) rc; 2663602adf40SYehuda Sadeh } 2664602adf40SYehuda Sadeh 2665de71a297SAlex Elder static struct rbd_device *__rbd_get_dev(unsigned long dev_id) 2666602adf40SYehuda Sadeh { 2667602adf40SYehuda Sadeh struct list_head *tmp; 2668602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 2669602adf40SYehuda Sadeh 2670e124a82fSAlex Elder spin_lock(&rbd_dev_list_lock); 2671602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 2672602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 2673de71a297SAlex Elder if (rbd_dev->dev_id == dev_id) { 2674e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2675602adf40SYehuda Sadeh return rbd_dev; 2676602adf40SYehuda Sadeh } 2677e124a82fSAlex Elder } 2678e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2679602adf40SYehuda Sadeh return NULL; 2680602adf40SYehuda Sadeh } 2681602adf40SYehuda Sadeh 2682dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 2683602adf40SYehuda Sadeh { 2684593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2685602adf40SYehuda Sadeh 26861dbb4399SAlex Elder if (rbd_dev->watch_request) { 26871dbb4399SAlex Elder struct ceph_client *client = rbd_dev->rbd_client->client; 26881dbb4399SAlex Elder 26891dbb4399SAlex Elder ceph_osdc_unregister_linger_request(&client->osdc, 269059c2be1eSYehuda Sadeh rbd_dev->watch_request); 26911dbb4399SAlex Elder } 269259c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 2693070c633fSAlex Elder rbd_req_sync_unwatch(rbd_dev); 269459c2be1eSYehuda Sadeh 2695602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2696602adf40SYehuda Sadeh 2697602adf40SYehuda Sadeh /* clean up and free blkdev */ 2698602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 2699602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 270032eec68dSAlex Elder 270132eec68dSAlex Elder /* done with the id, and with the rbd_dev */ 2702820a5f3eSAlex Elder kfree(rbd_dev->snap_name); 27030bed54dcSAlex Elder kfree(rbd_dev->header_name); 2704d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 27050bed54dcSAlex Elder kfree(rbd_dev->image_name); 2706e2839308SAlex Elder rbd_dev_id_put(rbd_dev); 2707602adf40SYehuda Sadeh kfree(rbd_dev); 2708602adf40SYehuda Sadeh 2709602adf40SYehuda Sadeh /* release module ref */ 2710602adf40SYehuda Sadeh module_put(THIS_MODULE); 2711602adf40SYehuda Sadeh } 2712602adf40SYehuda Sadeh 2713dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 2714602adf40SYehuda Sadeh const char *buf, 2715602adf40SYehuda Sadeh size_t count) 2716602adf40SYehuda Sadeh { 2717602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 2718602adf40SYehuda Sadeh int target_id, rc; 2719602adf40SYehuda Sadeh unsigned long ul; 2720602adf40SYehuda Sadeh int ret = count; 2721602adf40SYehuda Sadeh 2722602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 2723602adf40SYehuda Sadeh if (rc) 2724602adf40SYehuda Sadeh return rc; 2725602adf40SYehuda Sadeh 2726602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 2727602adf40SYehuda Sadeh target_id = (int) ul; 2728602adf40SYehuda Sadeh if (target_id != ul) 2729602adf40SYehuda Sadeh return -EINVAL; 2730602adf40SYehuda Sadeh 2731602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2732602adf40SYehuda Sadeh 2733602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 2734602adf40SYehuda Sadeh if (!rbd_dev) { 2735602adf40SYehuda Sadeh ret = -ENOENT; 2736602adf40SYehuda Sadeh goto done; 2737602adf40SYehuda Sadeh } 2738602adf40SYehuda Sadeh 2739dfc5606dSYehuda Sadeh __rbd_remove_all_snaps(rbd_dev); 2740dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2741602adf40SYehuda Sadeh 2742602adf40SYehuda Sadeh done: 2743602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2744aafb230eSAlex Elder 2745602adf40SYehuda Sadeh return ret; 2746602adf40SYehuda Sadeh } 2747602adf40SYehuda Sadeh 2748dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 2749dfc5606dSYehuda Sadeh struct device_attribute *attr, 2750602adf40SYehuda Sadeh const char *buf, 2751602adf40SYehuda Sadeh size_t count) 2752602adf40SYehuda Sadeh { 2753593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2754dfc5606dSYehuda Sadeh int ret; 2755dfc5606dSYehuda Sadeh char *name = kmalloc(count + 1, GFP_KERNEL); 2756602adf40SYehuda Sadeh if (!name) 2757602adf40SYehuda Sadeh return -ENOMEM; 2758602adf40SYehuda Sadeh 2759dfc5606dSYehuda Sadeh snprintf(name, count, "%s", buf); 2760602adf40SYehuda Sadeh 2761602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2762602adf40SYehuda Sadeh 2763602adf40SYehuda Sadeh ret = rbd_header_add_snap(rbd_dev, 2764602adf40SYehuda Sadeh name, GFP_KERNEL); 2765602adf40SYehuda Sadeh if (ret < 0) 276659c2be1eSYehuda Sadeh goto err_unlock; 2767602adf40SYehuda Sadeh 2768b813623aSAlex Elder ret = __rbd_refresh_header(rbd_dev, NULL); 2769602adf40SYehuda Sadeh if (ret < 0) 277059c2be1eSYehuda Sadeh goto err_unlock; 277159c2be1eSYehuda Sadeh 277259c2be1eSYehuda Sadeh /* shouldn't hold ctl_mutex when notifying.. notify might 277359c2be1eSYehuda Sadeh trigger a watch callback that would need to get that mutex */ 277459c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 277559c2be1eSYehuda Sadeh 277659c2be1eSYehuda Sadeh /* make a best effort, don't error if failed */ 27774cb16250SAlex Elder rbd_req_sync_notify(rbd_dev); 2778602adf40SYehuda Sadeh 2779602adf40SYehuda Sadeh ret = count; 278059c2be1eSYehuda Sadeh kfree(name); 278159c2be1eSYehuda Sadeh return ret; 278259c2be1eSYehuda Sadeh 278359c2be1eSYehuda Sadeh err_unlock: 2784602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2785602adf40SYehuda Sadeh kfree(name); 2786602adf40SYehuda Sadeh return ret; 2787602adf40SYehuda Sadeh } 2788602adf40SYehuda Sadeh 2789602adf40SYehuda Sadeh /* 2790602adf40SYehuda Sadeh * create control files in sysfs 2791dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 2792602adf40SYehuda Sadeh */ 2793602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 2794602adf40SYehuda Sadeh { 2795dfc5606dSYehuda Sadeh int ret; 2796602adf40SYehuda Sadeh 2797fed4c143SAlex Elder ret = device_register(&rbd_root_dev); 2798dfc5606dSYehuda Sadeh if (ret < 0) 2799dfc5606dSYehuda Sadeh return ret; 2800602adf40SYehuda Sadeh 2801fed4c143SAlex Elder ret = bus_register(&rbd_bus_type); 2802fed4c143SAlex Elder if (ret < 0) 2803fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2804602adf40SYehuda Sadeh 2805602adf40SYehuda Sadeh return ret; 2806602adf40SYehuda Sadeh } 2807602adf40SYehuda Sadeh 2808602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 2809602adf40SYehuda Sadeh { 2810dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 2811fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2812602adf40SYehuda Sadeh } 2813602adf40SYehuda Sadeh 2814602adf40SYehuda Sadeh int __init rbd_init(void) 2815602adf40SYehuda Sadeh { 2816602adf40SYehuda Sadeh int rc; 2817602adf40SYehuda Sadeh 2818602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 2819602adf40SYehuda Sadeh if (rc) 2820602adf40SYehuda Sadeh return rc; 2821f0f8cef5SAlex Elder pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 2822602adf40SYehuda Sadeh return 0; 2823602adf40SYehuda Sadeh } 2824602adf40SYehuda Sadeh 2825602adf40SYehuda Sadeh void __exit rbd_exit(void) 2826602adf40SYehuda Sadeh { 2827602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 2828602adf40SYehuda Sadeh } 2829602adf40SYehuda Sadeh 2830602adf40SYehuda Sadeh module_init(rbd_init); 2831602adf40SYehuda Sadeh module_exit(rbd_exit); 2832602adf40SYehuda Sadeh 2833602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 2834602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 2835602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 2836602adf40SYehuda Sadeh 2837602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 2838602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 2839602adf40SYehuda Sadeh 2840602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 2841