1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44593a9e7bSAlex Elder /* 45593a9e7bSAlex Elder * The basic unit of block I/O is a sector. It is interpreted in a 46593a9e7bSAlex Elder * number of contexts in Linux (blk, bio, genhd), but the default is 47593a9e7bSAlex Elder * universally 512 bytes. These symbols are just slightly more 48593a9e7bSAlex Elder * meaningful than the bare numbers they represent. 49593a9e7bSAlex Elder */ 50593a9e7bSAlex Elder #define SECTOR_SHIFT 9 51593a9e7bSAlex Elder #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 52593a9e7bSAlex Elder 53f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd" 54f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)" 55602adf40SYehuda Sadeh 56602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 57602adf40SYehuda Sadeh 58602adf40SYehuda Sadeh #define RBD_MAX_SNAP_NAME_LEN 32 59602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN 1024 60602adf40SYehuda Sadeh 61602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 62602adf40SYehuda Sadeh 6381a89793SAlex Elder /* 6481a89793SAlex Elder * An RBD device name will be "rbd#", where the "rbd" comes from 6581a89793SAlex Elder * RBD_DRV_NAME above, and # is a unique integer identifier. 6681a89793SAlex Elder * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big 6781a89793SAlex Elder * enough to hold all possible device names. 6881a89793SAlex Elder */ 69602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 7081a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 71602adf40SYehuda Sadeh 7259c2be1eSYehuda Sadeh #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 7359c2be1eSYehuda Sadeh 74602adf40SYehuda Sadeh /* 75602adf40SYehuda Sadeh * block device image metadata (in-memory version) 76602adf40SYehuda Sadeh */ 77602adf40SYehuda Sadeh struct rbd_image_header { 78602adf40SYehuda Sadeh u64 image_size; 79849b4260SAlex Elder char *object_prefix; 80602adf40SYehuda Sadeh __u8 obj_order; 81602adf40SYehuda Sadeh __u8 crypt_type; 82602adf40SYehuda Sadeh __u8 comp_type; 83602adf40SYehuda Sadeh struct ceph_snap_context *snapc; 84602adf40SYehuda Sadeh size_t snap_names_len; 85602adf40SYehuda Sadeh u32 total_snaps; 86602adf40SYehuda Sadeh 87602adf40SYehuda Sadeh char *snap_names; 88602adf40SYehuda Sadeh u64 *snap_sizes; 8959c2be1eSYehuda Sadeh 9059c2be1eSYehuda Sadeh u64 obj_version; 9159c2be1eSYehuda Sadeh }; 9259c2be1eSYehuda Sadeh 9359c2be1eSYehuda Sadeh struct rbd_options { 9459c2be1eSYehuda Sadeh int notify_timeout; 95602adf40SYehuda Sadeh }; 96602adf40SYehuda Sadeh 97602adf40SYehuda Sadeh /* 98f0f8cef5SAlex Elder * an instance of the client. multiple devices may share an rbd client. 99602adf40SYehuda Sadeh */ 100602adf40SYehuda Sadeh struct rbd_client { 101602adf40SYehuda Sadeh struct ceph_client *client; 10259c2be1eSYehuda Sadeh struct rbd_options *rbd_opts; 103602adf40SYehuda Sadeh struct kref kref; 104602adf40SYehuda Sadeh struct list_head node; 105602adf40SYehuda Sadeh }; 106602adf40SYehuda Sadeh 107602adf40SYehuda Sadeh /* 108f0f8cef5SAlex Elder * a request completion status 109602adf40SYehuda Sadeh */ 1101fec7093SYehuda Sadeh struct rbd_req_status { 1111fec7093SYehuda Sadeh int done; 1121fec7093SYehuda Sadeh int rc; 1131fec7093SYehuda Sadeh u64 bytes; 1141fec7093SYehuda Sadeh }; 1151fec7093SYehuda Sadeh 1161fec7093SYehuda Sadeh /* 1171fec7093SYehuda Sadeh * a collection of requests 1181fec7093SYehuda Sadeh */ 1191fec7093SYehuda Sadeh struct rbd_req_coll { 1201fec7093SYehuda Sadeh int total; 1211fec7093SYehuda Sadeh int num_done; 1221fec7093SYehuda Sadeh struct kref kref; 1231fec7093SYehuda Sadeh struct rbd_req_status status[0]; 124602adf40SYehuda Sadeh }; 125602adf40SYehuda Sadeh 126f0f8cef5SAlex Elder /* 127f0f8cef5SAlex Elder * a single io request 128f0f8cef5SAlex Elder */ 129f0f8cef5SAlex Elder struct rbd_request { 130f0f8cef5SAlex Elder struct request *rq; /* blk layer request */ 131f0f8cef5SAlex Elder struct bio *bio; /* cloned bio */ 132f0f8cef5SAlex Elder struct page **pages; /* list of used pages */ 133f0f8cef5SAlex Elder u64 len; 134f0f8cef5SAlex Elder int coll_index; 135f0f8cef5SAlex Elder struct rbd_req_coll *coll; 136f0f8cef5SAlex Elder }; 137f0f8cef5SAlex Elder 138dfc5606dSYehuda Sadeh struct rbd_snap { 139dfc5606dSYehuda Sadeh struct device dev; 140dfc5606dSYehuda Sadeh const char *name; 1413591538fSJosh Durgin u64 size; 142dfc5606dSYehuda Sadeh struct list_head node; 143dfc5606dSYehuda Sadeh u64 id; 144dfc5606dSYehuda Sadeh }; 145dfc5606dSYehuda Sadeh 146602adf40SYehuda Sadeh /* 147602adf40SYehuda Sadeh * a single device 148602adf40SYehuda Sadeh */ 149602adf40SYehuda Sadeh struct rbd_device { 150de71a297SAlex Elder int dev_id; /* blkdev unique id */ 151602adf40SYehuda Sadeh 152602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 153602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 154602adf40SYehuda Sadeh struct request_queue *q; 155602adf40SYehuda Sadeh 156602adf40SYehuda Sadeh struct rbd_client *rbd_client; 157602adf40SYehuda Sadeh 158602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 159602adf40SYehuda Sadeh 160602adf40SYehuda Sadeh spinlock_t lock; /* queue lock */ 161602adf40SYehuda Sadeh 162602adf40SYehuda Sadeh struct rbd_image_header header; 1630bed54dcSAlex Elder char *image_name; 1640bed54dcSAlex Elder size_t image_name_len; 1650bed54dcSAlex Elder char *header_name; 166d22f76e7SAlex Elder char *pool_name; 1679bb2f334SAlex Elder int pool_id; 168602adf40SYehuda Sadeh 16959c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 17059c2be1eSYehuda Sadeh struct ceph_osd_request *watch_request; 17159c2be1eSYehuda Sadeh 172c666601aSJosh Durgin /* protects updating the header */ 173c666601aSJosh Durgin struct rw_semaphore header_rwsem; 174e88a36ecSJosh Durgin /* name of the snapshot this device reads from */ 175820a5f3eSAlex Elder char *snap_name; 176e88a36ecSJosh Durgin /* id of the snapshot this device reads from */ 17777dfe99fSJosh Durgin u64 snap_id; /* current snapshot id */ 178e88a36ecSJosh Durgin /* whether the snap_id this device reads from still exists */ 179e88a36ecSJosh Durgin bool snap_exists; 180602adf40SYehuda Sadeh int read_only; 181602adf40SYehuda Sadeh 182602adf40SYehuda Sadeh struct list_head node; 183dfc5606dSYehuda Sadeh 184dfc5606dSYehuda Sadeh /* list of snapshots */ 185dfc5606dSYehuda Sadeh struct list_head snaps; 186dfc5606dSYehuda Sadeh 187dfc5606dSYehuda Sadeh /* sysfs related */ 188dfc5606dSYehuda Sadeh struct device dev; 189dfc5606dSYehuda Sadeh }; 190dfc5606dSYehuda Sadeh 191602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 192e124a82fSAlex Elder 193602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 194e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock); 195e124a82fSAlex Elder 196602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 197432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock); 198602adf40SYehuda Sadeh 199dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); 200dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 201dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 202dfc5606dSYehuda Sadeh struct device_attribute *attr, 203dfc5606dSYehuda Sadeh const char *buf, 204dfc5606dSYehuda Sadeh size_t count); 20514e7085dSAlex Elder static void __rbd_remove_snap_dev(struct rbd_snap *snap); 206dfc5606dSYehuda Sadeh 207f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf, 208f0f8cef5SAlex Elder size_t count); 209f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 210f0f8cef5SAlex Elder size_t count); 211f0f8cef5SAlex Elder 212f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = { 213f0f8cef5SAlex Elder __ATTR(add, S_IWUSR, NULL, rbd_add), 214f0f8cef5SAlex Elder __ATTR(remove, S_IWUSR, NULL, rbd_remove), 215f0f8cef5SAlex Elder __ATTR_NULL 216f0f8cef5SAlex Elder }; 217f0f8cef5SAlex Elder 218f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = { 219f0f8cef5SAlex Elder .name = "rbd", 220f0f8cef5SAlex Elder .bus_attrs = rbd_bus_attrs, 221f0f8cef5SAlex Elder }; 222f0f8cef5SAlex Elder 223f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev) 224f0f8cef5SAlex Elder { 225f0f8cef5SAlex Elder } 226f0f8cef5SAlex Elder 227f0f8cef5SAlex Elder static struct device rbd_root_dev = { 228f0f8cef5SAlex Elder .init_name = "rbd", 229f0f8cef5SAlex Elder .release = rbd_root_dev_release, 230f0f8cef5SAlex Elder }; 231f0f8cef5SAlex Elder 232dfc5606dSYehuda Sadeh 233dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 234dfc5606dSYehuda Sadeh { 235dfc5606dSYehuda Sadeh return get_device(&rbd_dev->dev); 236dfc5606dSYehuda Sadeh } 237dfc5606dSYehuda Sadeh 238dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev) 239dfc5606dSYehuda Sadeh { 240dfc5606dSYehuda Sadeh put_device(&rbd_dev->dev); 241dfc5606dSYehuda Sadeh } 242602adf40SYehuda Sadeh 243263c6ca0SJosh Durgin static int __rbd_refresh_header(struct rbd_device *rbd_dev); 24459c2be1eSYehuda Sadeh 245602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 246602adf40SYehuda Sadeh { 247f0f8cef5SAlex Elder struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 248602adf40SYehuda Sadeh 249dfc5606dSYehuda Sadeh rbd_get_dev(rbd_dev); 250dfc5606dSYehuda Sadeh 251602adf40SYehuda Sadeh set_device_ro(bdev, rbd_dev->read_only); 252602adf40SYehuda Sadeh 253602adf40SYehuda Sadeh if ((mode & FMODE_WRITE) && rbd_dev->read_only) 254602adf40SYehuda Sadeh return -EROFS; 255602adf40SYehuda Sadeh 256602adf40SYehuda Sadeh return 0; 257602adf40SYehuda Sadeh } 258602adf40SYehuda Sadeh 259dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 260dfc5606dSYehuda Sadeh { 261dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 262dfc5606dSYehuda Sadeh 263dfc5606dSYehuda Sadeh rbd_put_dev(rbd_dev); 264dfc5606dSYehuda Sadeh 265dfc5606dSYehuda Sadeh return 0; 266dfc5606dSYehuda Sadeh } 267dfc5606dSYehuda Sadeh 268602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 269602adf40SYehuda Sadeh .owner = THIS_MODULE, 270602adf40SYehuda Sadeh .open = rbd_open, 271dfc5606dSYehuda Sadeh .release = rbd_release, 272602adf40SYehuda Sadeh }; 273602adf40SYehuda Sadeh 274602adf40SYehuda Sadeh /* 275602adf40SYehuda Sadeh * Initialize an rbd client instance. 27643ae4701SAlex Elder * We own *ceph_opts. 277602adf40SYehuda Sadeh */ 27843ae4701SAlex Elder static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts, 27959c2be1eSYehuda Sadeh struct rbd_options *rbd_opts) 280602adf40SYehuda Sadeh { 281602adf40SYehuda Sadeh struct rbd_client *rbdc; 282602adf40SYehuda Sadeh int ret = -ENOMEM; 283602adf40SYehuda Sadeh 284602adf40SYehuda Sadeh dout("rbd_client_create\n"); 285602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 286602adf40SYehuda Sadeh if (!rbdc) 287602adf40SYehuda Sadeh goto out_opt; 288602adf40SYehuda Sadeh 289602adf40SYehuda Sadeh kref_init(&rbdc->kref); 290602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 291602adf40SYehuda Sadeh 292bc534d86SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 293bc534d86SAlex Elder 29443ae4701SAlex Elder rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); 295602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 296bc534d86SAlex Elder goto out_mutex; 29743ae4701SAlex Elder ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ 298602adf40SYehuda Sadeh 299602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 300602adf40SYehuda Sadeh if (ret < 0) 301602adf40SYehuda Sadeh goto out_err; 302602adf40SYehuda Sadeh 30359c2be1eSYehuda Sadeh rbdc->rbd_opts = rbd_opts; 30459c2be1eSYehuda Sadeh 305432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 306602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 307432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 308602adf40SYehuda Sadeh 309bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 310bc534d86SAlex Elder 311602adf40SYehuda Sadeh dout("rbd_client_create created %p\n", rbdc); 312602adf40SYehuda Sadeh return rbdc; 313602adf40SYehuda Sadeh 314602adf40SYehuda Sadeh out_err: 315602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 316bc534d86SAlex Elder out_mutex: 317bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 318602adf40SYehuda Sadeh kfree(rbdc); 319602adf40SYehuda Sadeh out_opt: 32043ae4701SAlex Elder if (ceph_opts) 32143ae4701SAlex Elder ceph_destroy_options(ceph_opts); 32228f259b7SVasiliy Kulikov return ERR_PTR(ret); 323602adf40SYehuda Sadeh } 324602adf40SYehuda Sadeh 325602adf40SYehuda Sadeh /* 326602adf40SYehuda Sadeh * Find a ceph client with specific addr and configuration. 327602adf40SYehuda Sadeh */ 32843ae4701SAlex Elder static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts) 329602adf40SYehuda Sadeh { 330602adf40SYehuda Sadeh struct rbd_client *client_node; 331602adf40SYehuda Sadeh 33243ae4701SAlex Elder if (ceph_opts->flags & CEPH_OPT_NOSHARE) 333602adf40SYehuda Sadeh return NULL; 334602adf40SYehuda Sadeh 335602adf40SYehuda Sadeh list_for_each_entry(client_node, &rbd_client_list, node) 33643ae4701SAlex Elder if (!ceph_compare_options(ceph_opts, client_node->client)) 337602adf40SYehuda Sadeh return client_node; 338602adf40SYehuda Sadeh return NULL; 339602adf40SYehuda Sadeh } 340602adf40SYehuda Sadeh 341602adf40SYehuda Sadeh /* 34259c2be1eSYehuda Sadeh * mount options 34359c2be1eSYehuda Sadeh */ 34459c2be1eSYehuda Sadeh enum { 34559c2be1eSYehuda Sadeh Opt_notify_timeout, 34659c2be1eSYehuda Sadeh Opt_last_int, 34759c2be1eSYehuda Sadeh /* int args above */ 34859c2be1eSYehuda Sadeh Opt_last_string, 34959c2be1eSYehuda Sadeh /* string args above */ 35059c2be1eSYehuda Sadeh }; 35159c2be1eSYehuda Sadeh 35243ae4701SAlex Elder static match_table_t rbd_opts_tokens = { 35359c2be1eSYehuda Sadeh {Opt_notify_timeout, "notify_timeout=%d"}, 35459c2be1eSYehuda Sadeh /* int args above */ 35559c2be1eSYehuda Sadeh /* string args above */ 35659c2be1eSYehuda Sadeh {-1, NULL} 35759c2be1eSYehuda Sadeh }; 35859c2be1eSYehuda Sadeh 35959c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 36059c2be1eSYehuda Sadeh { 36143ae4701SAlex Elder struct rbd_options *rbd_opts = private; 36259c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 36359c2be1eSYehuda Sadeh int token, intval, ret; 36459c2be1eSYehuda Sadeh 36543ae4701SAlex Elder token = match_token(c, rbd_opts_tokens, argstr); 36659c2be1eSYehuda Sadeh if (token < 0) 36759c2be1eSYehuda Sadeh return -EINVAL; 36859c2be1eSYehuda Sadeh 36959c2be1eSYehuda Sadeh if (token < Opt_last_int) { 37059c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 37159c2be1eSYehuda Sadeh if (ret < 0) { 37259c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 37359c2be1eSYehuda Sadeh "at '%s'\n", c); 37459c2be1eSYehuda Sadeh return ret; 37559c2be1eSYehuda Sadeh } 37659c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 37759c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 37859c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 37959c2be1eSYehuda Sadeh argstr[0].from); 38059c2be1eSYehuda Sadeh } else { 38159c2be1eSYehuda Sadeh dout("got token %d\n", token); 38259c2be1eSYehuda Sadeh } 38359c2be1eSYehuda Sadeh 38459c2be1eSYehuda Sadeh switch (token) { 38559c2be1eSYehuda Sadeh case Opt_notify_timeout: 38643ae4701SAlex Elder rbd_opts->notify_timeout = intval; 38759c2be1eSYehuda Sadeh break; 38859c2be1eSYehuda Sadeh default: 38959c2be1eSYehuda Sadeh BUG_ON(token); 39059c2be1eSYehuda Sadeh } 39159c2be1eSYehuda Sadeh return 0; 39259c2be1eSYehuda Sadeh } 39359c2be1eSYehuda Sadeh 39459c2be1eSYehuda Sadeh /* 395602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 396602adf40SYehuda Sadeh * not exist create it. 397602adf40SYehuda Sadeh */ 3985214ecc4SAlex Elder static struct rbd_client *rbd_get_client(const char *mon_addr, 3995214ecc4SAlex Elder size_t mon_addr_len, 4005214ecc4SAlex Elder char *options) 401602adf40SYehuda Sadeh { 402602adf40SYehuda Sadeh struct rbd_client *rbdc; 40343ae4701SAlex Elder struct ceph_options *ceph_opts; 40459c2be1eSYehuda Sadeh struct rbd_options *rbd_opts; 40559c2be1eSYehuda Sadeh 40659c2be1eSYehuda Sadeh rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); 40759c2be1eSYehuda Sadeh if (!rbd_opts) 408d720bcb0SAlex Elder return ERR_PTR(-ENOMEM); 40959c2be1eSYehuda Sadeh 41059c2be1eSYehuda Sadeh rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; 411602adf40SYehuda Sadeh 41243ae4701SAlex Elder ceph_opts = ceph_parse_options(options, mon_addr, 4135214ecc4SAlex Elder mon_addr + mon_addr_len, 41421079786SAlex Elder parse_rbd_opts_token, rbd_opts); 41543ae4701SAlex Elder if (IS_ERR(ceph_opts)) { 416d720bcb0SAlex Elder kfree(rbd_opts); 41743ae4701SAlex Elder return ERR_CAST(ceph_opts); 418ee57741cSAlex Elder } 419602adf40SYehuda Sadeh 420432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 42143ae4701SAlex Elder rbdc = __rbd_client_find(ceph_opts); 422602adf40SYehuda Sadeh if (rbdc) { 423e6994d3dSAlex Elder /* using an existing client */ 424e6994d3dSAlex Elder kref_get(&rbdc->kref); 425432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 426e6994d3dSAlex Elder 42743ae4701SAlex Elder ceph_destroy_options(ceph_opts); 42897bb59a0SAlex Elder kfree(rbd_opts); 429602adf40SYehuda Sadeh 430d720bcb0SAlex Elder return rbdc; 431602adf40SYehuda Sadeh } 432432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 433602adf40SYehuda Sadeh 43443ae4701SAlex Elder rbdc = rbd_client_create(ceph_opts, rbd_opts); 435d97081b0SAlex Elder 436d720bcb0SAlex Elder if (IS_ERR(rbdc)) 43759c2be1eSYehuda Sadeh kfree(rbd_opts); 438d720bcb0SAlex Elder 439d720bcb0SAlex Elder return rbdc; 440602adf40SYehuda Sadeh } 441602adf40SYehuda Sadeh 442602adf40SYehuda Sadeh /* 443602adf40SYehuda Sadeh * Destroy ceph client 444d23a4b3fSAlex Elder * 445432b8587SAlex Elder * Caller must hold rbd_client_list_lock. 446602adf40SYehuda Sadeh */ 447602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 448602adf40SYehuda Sadeh { 449602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 450602adf40SYehuda Sadeh 451602adf40SYehuda Sadeh dout("rbd_release_client %p\n", rbdc); 452cd9d9f5dSAlex Elder spin_lock(&rbd_client_list_lock); 453602adf40SYehuda Sadeh list_del(&rbdc->node); 454cd9d9f5dSAlex Elder spin_unlock(&rbd_client_list_lock); 455602adf40SYehuda Sadeh 456602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 45759c2be1eSYehuda Sadeh kfree(rbdc->rbd_opts); 458602adf40SYehuda Sadeh kfree(rbdc); 459602adf40SYehuda Sadeh } 460602adf40SYehuda Sadeh 461602adf40SYehuda Sadeh /* 462602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 463602adf40SYehuda Sadeh * it. 464602adf40SYehuda Sadeh */ 465602adf40SYehuda Sadeh static void rbd_put_client(struct rbd_device *rbd_dev) 466602adf40SYehuda Sadeh { 467602adf40SYehuda Sadeh kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); 468602adf40SYehuda Sadeh rbd_dev->rbd_client = NULL; 469602adf40SYehuda Sadeh } 470602adf40SYehuda Sadeh 4711fec7093SYehuda Sadeh /* 4721fec7093SYehuda Sadeh * Destroy requests collection 4731fec7093SYehuda Sadeh */ 4741fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref) 4751fec7093SYehuda Sadeh { 4761fec7093SYehuda Sadeh struct rbd_req_coll *coll = 4771fec7093SYehuda Sadeh container_of(kref, struct rbd_req_coll, kref); 4781fec7093SYehuda Sadeh 4791fec7093SYehuda Sadeh dout("rbd_coll_release %p\n", coll); 4801fec7093SYehuda Sadeh kfree(coll); 4811fec7093SYehuda Sadeh } 482602adf40SYehuda Sadeh 4838e94af8eSAlex Elder static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 4848e94af8eSAlex Elder { 4858e94af8eSAlex Elder return !memcmp(&ondisk->text, 4868e94af8eSAlex Elder RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)); 4878e94af8eSAlex Elder } 4888e94af8eSAlex Elder 489602adf40SYehuda Sadeh /* 490602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 491602adf40SYehuda Sadeh * header. 492602adf40SYehuda Sadeh */ 493602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 494602adf40SYehuda Sadeh struct rbd_image_header_ondisk *ondisk, 495ed63f4fdSAlex Elder u32 allocated_snaps) 496602adf40SYehuda Sadeh { 49750f7c4c9SXi Wang u32 i, snap_count; 498602adf40SYehuda Sadeh 4998e94af8eSAlex Elder if (!rbd_dev_ondisk_valid(ondisk)) 50081e759fbSJosh Durgin return -ENXIO; 50181e759fbSJosh Durgin 50200f1f36fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 50350f7c4c9SXi Wang if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context)) 50450f7c4c9SXi Wang / sizeof (*ondisk)) 50550f7c4c9SXi Wang return -EINVAL; 506602adf40SYehuda Sadeh header->snapc = kmalloc(sizeof(struct ceph_snap_context) + 507f9f9a190SYan, Zheng snap_count * sizeof(u64), 508ed63f4fdSAlex Elder GFP_KERNEL); 509602adf40SYehuda Sadeh if (!header->snapc) 510602adf40SYehuda Sadeh return -ENOMEM; 51100f1f36fSAlex Elder 51200f1f36fSAlex Elder header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); 513602adf40SYehuda Sadeh if (snap_count) { 514602adf40SYehuda Sadeh header->snap_names = kmalloc(header->snap_names_len, 515ed63f4fdSAlex Elder GFP_KERNEL); 516602adf40SYehuda Sadeh if (!header->snap_names) 517602adf40SYehuda Sadeh goto err_snapc; 518602adf40SYehuda Sadeh header->snap_sizes = kmalloc(snap_count * sizeof(u64), 519ed63f4fdSAlex Elder GFP_KERNEL); 520602adf40SYehuda Sadeh if (!header->snap_sizes) 521602adf40SYehuda Sadeh goto err_names; 522602adf40SYehuda Sadeh } else { 523602adf40SYehuda Sadeh header->snap_names = NULL; 524602adf40SYehuda Sadeh header->snap_sizes = NULL; 525602adf40SYehuda Sadeh } 526849b4260SAlex Elder 527849b4260SAlex Elder header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1, 528ed63f4fdSAlex Elder GFP_KERNEL); 529849b4260SAlex Elder if (!header->object_prefix) 530849b4260SAlex Elder goto err_sizes; 531849b4260SAlex Elder 532ca1e49a6SAlex Elder memcpy(header->object_prefix, ondisk->block_name, 533602adf40SYehuda Sadeh sizeof(ondisk->block_name)); 534849b4260SAlex Elder header->object_prefix[sizeof (ondisk->block_name)] = '\0'; 535602adf40SYehuda Sadeh 536602adf40SYehuda Sadeh header->image_size = le64_to_cpu(ondisk->image_size); 537602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 538602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 539602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 540602adf40SYehuda Sadeh 541602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 542505cbb9bSAlex Elder header->snapc->seq = le64_to_cpu(ondisk->snap_seq); 543602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 544602adf40SYehuda Sadeh header->total_snaps = snap_count; 545602adf40SYehuda Sadeh 54621079786SAlex Elder if (snap_count && allocated_snaps == snap_count) { 547602adf40SYehuda Sadeh for (i = 0; i < snap_count; i++) { 548602adf40SYehuda Sadeh header->snapc->snaps[i] = 549602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 550602adf40SYehuda Sadeh header->snap_sizes[i] = 551602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].image_size); 552602adf40SYehuda Sadeh } 553602adf40SYehuda Sadeh 554602adf40SYehuda Sadeh /* copy snapshot names */ 555602adf40SYehuda Sadeh memcpy(header->snap_names, &ondisk->snaps[i], 556602adf40SYehuda Sadeh header->snap_names_len); 557602adf40SYehuda Sadeh } 558602adf40SYehuda Sadeh 559602adf40SYehuda Sadeh return 0; 560602adf40SYehuda Sadeh 561849b4260SAlex Elder err_sizes: 562849b4260SAlex Elder kfree(header->snap_sizes); 563602adf40SYehuda Sadeh err_names: 564602adf40SYehuda Sadeh kfree(header->snap_names); 565602adf40SYehuda Sadeh err_snapc: 566602adf40SYehuda Sadeh kfree(header->snapc); 56700f1f36fSAlex Elder return -ENOMEM; 568602adf40SYehuda Sadeh } 569602adf40SYehuda Sadeh 570602adf40SYehuda Sadeh static int snap_by_name(struct rbd_image_header *header, const char *snap_name, 571602adf40SYehuda Sadeh u64 *seq, u64 *size) 572602adf40SYehuda Sadeh { 573602adf40SYehuda Sadeh int i; 574602adf40SYehuda Sadeh char *p = header->snap_names; 575602adf40SYehuda Sadeh 57600f1f36fSAlex Elder for (i = 0; i < header->total_snaps; i++) { 57700f1f36fSAlex Elder if (!strcmp(snap_name, p)) { 57800f1f36fSAlex Elder 57900f1f36fSAlex Elder /* Found it. Pass back its id and/or size */ 58000f1f36fSAlex Elder 581602adf40SYehuda Sadeh if (seq) 582602adf40SYehuda Sadeh *seq = header->snapc->snaps[i]; 583602adf40SYehuda Sadeh if (size) 584602adf40SYehuda Sadeh *size = header->snap_sizes[i]; 585602adf40SYehuda Sadeh return i; 586602adf40SYehuda Sadeh } 58700f1f36fSAlex Elder p += strlen(p) + 1; /* Skip ahead to the next name */ 58800f1f36fSAlex Elder } 58900f1f36fSAlex Elder return -ENOENT; 59000f1f36fSAlex Elder } 591602adf40SYehuda Sadeh 5920ce1a794SAlex Elder static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size) 593602adf40SYehuda Sadeh { 59478dc447dSAlex Elder int ret; 595602adf40SYehuda Sadeh 5960ce1a794SAlex Elder down_write(&rbd_dev->header_rwsem); 597602adf40SYehuda Sadeh 5980ce1a794SAlex Elder if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, 599cc9d734cSJosh Durgin sizeof (RBD_SNAP_HEAD_NAME))) { 6000ce1a794SAlex Elder rbd_dev->snap_id = CEPH_NOSNAP; 601e88a36ecSJosh Durgin rbd_dev->snap_exists = false; 6020ce1a794SAlex Elder rbd_dev->read_only = 0; 603602adf40SYehuda Sadeh if (size) 60478dc447dSAlex Elder *size = rbd_dev->header.image_size; 605602adf40SYehuda Sadeh } else { 60678dc447dSAlex Elder u64 snap_id = 0; 60778dc447dSAlex Elder 60878dc447dSAlex Elder ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name, 60978dc447dSAlex Elder &snap_id, size); 610602adf40SYehuda Sadeh if (ret < 0) 611602adf40SYehuda Sadeh goto done; 61278dc447dSAlex Elder rbd_dev->snap_id = snap_id; 613e88a36ecSJosh Durgin rbd_dev->snap_exists = true; 6140ce1a794SAlex Elder rbd_dev->read_only = 1; 615602adf40SYehuda Sadeh } 616602adf40SYehuda Sadeh 617602adf40SYehuda Sadeh ret = 0; 618602adf40SYehuda Sadeh done: 6190ce1a794SAlex Elder up_write(&rbd_dev->header_rwsem); 620602adf40SYehuda Sadeh return ret; 621602adf40SYehuda Sadeh } 622602adf40SYehuda Sadeh 623602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 624602adf40SYehuda Sadeh { 625849b4260SAlex Elder kfree(header->object_prefix); 626602adf40SYehuda Sadeh kfree(header->snap_sizes); 627849b4260SAlex Elder kfree(header->snap_names); 628d1d25646SJosh Durgin ceph_put_snap_context(header->snapc); 629602adf40SYehuda Sadeh } 630602adf40SYehuda Sadeh 631602adf40SYehuda Sadeh /* 632602adf40SYehuda Sadeh * get the actual striped segment name, offset and length 633602adf40SYehuda Sadeh */ 634602adf40SYehuda Sadeh static u64 rbd_get_segment(struct rbd_image_header *header, 635ca1e49a6SAlex Elder const char *object_prefix, 636602adf40SYehuda Sadeh u64 ofs, u64 len, 637602adf40SYehuda Sadeh char *seg_name, u64 *segofs) 638602adf40SYehuda Sadeh { 639602adf40SYehuda Sadeh u64 seg = ofs >> header->obj_order; 640602adf40SYehuda Sadeh 641602adf40SYehuda Sadeh if (seg_name) 642602adf40SYehuda Sadeh snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, 643ca1e49a6SAlex Elder "%s.%012llx", object_prefix, seg); 644602adf40SYehuda Sadeh 645602adf40SYehuda Sadeh ofs = ofs & ((1 << header->obj_order) - 1); 646602adf40SYehuda Sadeh len = min_t(u64, len, (1 << header->obj_order) - ofs); 647602adf40SYehuda Sadeh 648602adf40SYehuda Sadeh if (segofs) 649602adf40SYehuda Sadeh *segofs = ofs; 650602adf40SYehuda Sadeh 651602adf40SYehuda Sadeh return len; 652602adf40SYehuda Sadeh } 653602adf40SYehuda Sadeh 6541fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header, 6551fec7093SYehuda Sadeh u64 ofs, u64 len) 6561fec7093SYehuda Sadeh { 6571fec7093SYehuda Sadeh u64 start_seg = ofs >> header->obj_order; 6581fec7093SYehuda Sadeh u64 end_seg = (ofs + len - 1) >> header->obj_order; 6591fec7093SYehuda Sadeh return end_seg - start_seg + 1; 6601fec7093SYehuda Sadeh } 6611fec7093SYehuda Sadeh 662602adf40SYehuda Sadeh /* 663029bcbd8SJosh Durgin * returns the size of an object in the image 664029bcbd8SJosh Durgin */ 665029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 666029bcbd8SJosh Durgin { 667029bcbd8SJosh Durgin return 1 << header->obj_order; 668029bcbd8SJosh Durgin } 669029bcbd8SJosh Durgin 670029bcbd8SJosh Durgin /* 671602adf40SYehuda Sadeh * bio helpers 672602adf40SYehuda Sadeh */ 673602adf40SYehuda Sadeh 674602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 675602adf40SYehuda Sadeh { 676602adf40SYehuda Sadeh struct bio *tmp; 677602adf40SYehuda Sadeh 678602adf40SYehuda Sadeh while (chain) { 679602adf40SYehuda Sadeh tmp = chain; 680602adf40SYehuda Sadeh chain = chain->bi_next; 681602adf40SYehuda Sadeh bio_put(tmp); 682602adf40SYehuda Sadeh } 683602adf40SYehuda Sadeh } 684602adf40SYehuda Sadeh 685602adf40SYehuda Sadeh /* 686602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 687602adf40SYehuda Sadeh */ 688602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 689602adf40SYehuda Sadeh { 690602adf40SYehuda Sadeh struct bio_vec *bv; 691602adf40SYehuda Sadeh unsigned long flags; 692602adf40SYehuda Sadeh void *buf; 693602adf40SYehuda Sadeh int i; 694602adf40SYehuda Sadeh int pos = 0; 695602adf40SYehuda Sadeh 696602adf40SYehuda Sadeh while (chain) { 697602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 698602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 699602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 700602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 701602adf40SYehuda Sadeh memset(buf + remainder, 0, 702602adf40SYehuda Sadeh bv->bv_len - remainder); 70385b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 704602adf40SYehuda Sadeh } 705602adf40SYehuda Sadeh pos += bv->bv_len; 706602adf40SYehuda Sadeh } 707602adf40SYehuda Sadeh 708602adf40SYehuda Sadeh chain = chain->bi_next; 709602adf40SYehuda Sadeh } 710602adf40SYehuda Sadeh } 711602adf40SYehuda Sadeh 712602adf40SYehuda Sadeh /* 713602adf40SYehuda Sadeh * bio_chain_clone - clone a chain of bios up to a certain length. 714602adf40SYehuda Sadeh * might return a bio_pair that will need to be released. 715602adf40SYehuda Sadeh */ 716602adf40SYehuda Sadeh static struct bio *bio_chain_clone(struct bio **old, struct bio **next, 717602adf40SYehuda Sadeh struct bio_pair **bp, 718602adf40SYehuda Sadeh int len, gfp_t gfpmask) 719602adf40SYehuda Sadeh { 720602adf40SYehuda Sadeh struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; 721602adf40SYehuda Sadeh int total = 0; 722602adf40SYehuda Sadeh 723602adf40SYehuda Sadeh if (*bp) { 724602adf40SYehuda Sadeh bio_pair_release(*bp); 725602adf40SYehuda Sadeh *bp = NULL; 726602adf40SYehuda Sadeh } 727602adf40SYehuda Sadeh 728602adf40SYehuda Sadeh while (old_chain && (total < len)) { 729602adf40SYehuda Sadeh tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 730602adf40SYehuda Sadeh if (!tmp) 731602adf40SYehuda Sadeh goto err_out; 732602adf40SYehuda Sadeh 733602adf40SYehuda Sadeh if (total + old_chain->bi_size > len) { 734602adf40SYehuda Sadeh struct bio_pair *bp; 735602adf40SYehuda Sadeh 736602adf40SYehuda Sadeh /* 737602adf40SYehuda Sadeh * this split can only happen with a single paged bio, 738602adf40SYehuda Sadeh * split_bio will BUG_ON if this is not the case 739602adf40SYehuda Sadeh */ 740602adf40SYehuda Sadeh dout("bio_chain_clone split! total=%d remaining=%d" 741bd919d45SAlex Elder "bi_size=%u\n", 742bd919d45SAlex Elder total, len - total, old_chain->bi_size); 743602adf40SYehuda Sadeh 744602adf40SYehuda Sadeh /* split the bio. We'll release it either in the next 745602adf40SYehuda Sadeh call, or it will have to be released outside */ 746593a9e7bSAlex Elder bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); 747602adf40SYehuda Sadeh if (!bp) 748602adf40SYehuda Sadeh goto err_out; 749602adf40SYehuda Sadeh 750602adf40SYehuda Sadeh __bio_clone(tmp, &bp->bio1); 751602adf40SYehuda Sadeh 752602adf40SYehuda Sadeh *next = &bp->bio2; 753602adf40SYehuda Sadeh } else { 754602adf40SYehuda Sadeh __bio_clone(tmp, old_chain); 755602adf40SYehuda Sadeh *next = old_chain->bi_next; 756602adf40SYehuda Sadeh } 757602adf40SYehuda Sadeh 758602adf40SYehuda Sadeh tmp->bi_bdev = NULL; 759602adf40SYehuda Sadeh gfpmask &= ~__GFP_WAIT; 760602adf40SYehuda Sadeh tmp->bi_next = NULL; 761602adf40SYehuda Sadeh 762602adf40SYehuda Sadeh if (!new_chain) { 763602adf40SYehuda Sadeh new_chain = tail = tmp; 764602adf40SYehuda Sadeh } else { 765602adf40SYehuda Sadeh tail->bi_next = tmp; 766602adf40SYehuda Sadeh tail = tmp; 767602adf40SYehuda Sadeh } 768602adf40SYehuda Sadeh old_chain = old_chain->bi_next; 769602adf40SYehuda Sadeh 770602adf40SYehuda Sadeh total += tmp->bi_size; 771602adf40SYehuda Sadeh } 772602adf40SYehuda Sadeh 773602adf40SYehuda Sadeh BUG_ON(total < len); 774602adf40SYehuda Sadeh 775602adf40SYehuda Sadeh if (tail) 776602adf40SYehuda Sadeh tail->bi_next = NULL; 777602adf40SYehuda Sadeh 778602adf40SYehuda Sadeh *old = old_chain; 779602adf40SYehuda Sadeh 780602adf40SYehuda Sadeh return new_chain; 781602adf40SYehuda Sadeh 782602adf40SYehuda Sadeh err_out: 783602adf40SYehuda Sadeh dout("bio_chain_clone with err\n"); 784602adf40SYehuda Sadeh bio_chain_put(new_chain); 785602adf40SYehuda Sadeh return NULL; 786602adf40SYehuda Sadeh } 787602adf40SYehuda Sadeh 788602adf40SYehuda Sadeh /* 789602adf40SYehuda Sadeh * helpers for osd request op vectors. 790602adf40SYehuda Sadeh */ 79157cfc106SAlex Elder static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, 79257cfc106SAlex Elder int opcode, u32 payload_len) 793602adf40SYehuda Sadeh { 79457cfc106SAlex Elder struct ceph_osd_req_op *ops; 79557cfc106SAlex Elder 79657cfc106SAlex Elder ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); 79757cfc106SAlex Elder if (!ops) 79857cfc106SAlex Elder return NULL; 79957cfc106SAlex Elder 80057cfc106SAlex Elder ops[0].op = opcode; 80157cfc106SAlex Elder 802602adf40SYehuda Sadeh /* 803602adf40SYehuda Sadeh * op extent offset and length will be set later on 804602adf40SYehuda Sadeh * in calc_raw_layout() 805602adf40SYehuda Sadeh */ 80657cfc106SAlex Elder ops[0].payload_len = payload_len; 80757cfc106SAlex Elder 80857cfc106SAlex Elder return ops; 809602adf40SYehuda Sadeh } 810602adf40SYehuda Sadeh 811602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops) 812602adf40SYehuda Sadeh { 813602adf40SYehuda Sadeh kfree(ops); 814602adf40SYehuda Sadeh } 815602adf40SYehuda Sadeh 8161fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq, 8171fec7093SYehuda Sadeh struct rbd_req_coll *coll, 8181fec7093SYehuda Sadeh int index, 8191fec7093SYehuda Sadeh int ret, u64 len) 8201fec7093SYehuda Sadeh { 8211fec7093SYehuda Sadeh struct request_queue *q; 8221fec7093SYehuda Sadeh int min, max, i; 8231fec7093SYehuda Sadeh 824bd919d45SAlex Elder dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", 825bd919d45SAlex Elder coll, index, ret, (unsigned long long) len); 8261fec7093SYehuda Sadeh 8271fec7093SYehuda Sadeh if (!rq) 8281fec7093SYehuda Sadeh return; 8291fec7093SYehuda Sadeh 8301fec7093SYehuda Sadeh if (!coll) { 8311fec7093SYehuda Sadeh blk_end_request(rq, ret, len); 8321fec7093SYehuda Sadeh return; 8331fec7093SYehuda Sadeh } 8341fec7093SYehuda Sadeh 8351fec7093SYehuda Sadeh q = rq->q; 8361fec7093SYehuda Sadeh 8371fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 8381fec7093SYehuda Sadeh coll->status[index].done = 1; 8391fec7093SYehuda Sadeh coll->status[index].rc = ret; 8401fec7093SYehuda Sadeh coll->status[index].bytes = len; 8411fec7093SYehuda Sadeh max = min = coll->num_done; 8421fec7093SYehuda Sadeh while (max < coll->total && coll->status[max].done) 8431fec7093SYehuda Sadeh max++; 8441fec7093SYehuda Sadeh 8451fec7093SYehuda Sadeh for (i = min; i<max; i++) { 8461fec7093SYehuda Sadeh __blk_end_request(rq, coll->status[i].rc, 8471fec7093SYehuda Sadeh coll->status[i].bytes); 8481fec7093SYehuda Sadeh coll->num_done++; 8491fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 8501fec7093SYehuda Sadeh } 8511fec7093SYehuda Sadeh spin_unlock_irq(q->queue_lock); 8521fec7093SYehuda Sadeh } 8531fec7093SYehuda Sadeh 8541fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req, 8551fec7093SYehuda Sadeh int ret, u64 len) 8561fec7093SYehuda Sadeh { 8571fec7093SYehuda Sadeh rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); 8581fec7093SYehuda Sadeh } 8591fec7093SYehuda Sadeh 860602adf40SYehuda Sadeh /* 861602adf40SYehuda Sadeh * Send ceph osd request 862602adf40SYehuda Sadeh */ 863602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq, 8640ce1a794SAlex Elder struct rbd_device *rbd_dev, 865602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 866602adf40SYehuda Sadeh u64 snapid, 867aded07eaSAlex Elder const char *object_name, u64 ofs, u64 len, 868602adf40SYehuda Sadeh struct bio *bio, 869602adf40SYehuda Sadeh struct page **pages, 870602adf40SYehuda Sadeh int num_pages, 871602adf40SYehuda Sadeh int flags, 872602adf40SYehuda Sadeh struct ceph_osd_req_op *ops, 8731fec7093SYehuda Sadeh struct rbd_req_coll *coll, 8741fec7093SYehuda Sadeh int coll_index, 875602adf40SYehuda Sadeh void (*rbd_cb)(struct ceph_osd_request *req, 87659c2be1eSYehuda Sadeh struct ceph_msg *msg), 87759c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 87859c2be1eSYehuda Sadeh u64 *ver) 879602adf40SYehuda Sadeh { 880602adf40SYehuda Sadeh struct ceph_osd_request *req; 881602adf40SYehuda Sadeh struct ceph_file_layout *layout; 882602adf40SYehuda Sadeh int ret; 883602adf40SYehuda Sadeh u64 bno; 884602adf40SYehuda Sadeh struct timespec mtime = CURRENT_TIME; 885602adf40SYehuda Sadeh struct rbd_request *req_data; 886602adf40SYehuda Sadeh struct ceph_osd_request_head *reqhead; 8871dbb4399SAlex Elder struct ceph_osd_client *osdc; 888602adf40SYehuda Sadeh 889602adf40SYehuda Sadeh req_data = kzalloc(sizeof(*req_data), GFP_NOIO); 8901fec7093SYehuda Sadeh if (!req_data) { 8911fec7093SYehuda Sadeh if (coll) 8921fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, coll_index, 8931fec7093SYehuda Sadeh -ENOMEM, len); 8941fec7093SYehuda Sadeh return -ENOMEM; 8951fec7093SYehuda Sadeh } 896602adf40SYehuda Sadeh 8971fec7093SYehuda Sadeh if (coll) { 8981fec7093SYehuda Sadeh req_data->coll = coll; 8991fec7093SYehuda Sadeh req_data->coll_index = coll_index; 9001fec7093SYehuda Sadeh } 9011fec7093SYehuda Sadeh 902bd919d45SAlex Elder dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, 903bd919d45SAlex Elder (unsigned long long) ofs, (unsigned long long) len); 904602adf40SYehuda Sadeh 9050ce1a794SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 9061dbb4399SAlex Elder req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, 9071dbb4399SAlex Elder false, GFP_NOIO, pages, bio); 9084ad12621SSage Weil if (!req) { 9094ad12621SSage Weil ret = -ENOMEM; 910602adf40SYehuda Sadeh goto done_pages; 911602adf40SYehuda Sadeh } 912602adf40SYehuda Sadeh 913602adf40SYehuda Sadeh req->r_callback = rbd_cb; 914602adf40SYehuda Sadeh 915602adf40SYehuda Sadeh req_data->rq = rq; 916602adf40SYehuda Sadeh req_data->bio = bio; 917602adf40SYehuda Sadeh req_data->pages = pages; 918602adf40SYehuda Sadeh req_data->len = len; 919602adf40SYehuda Sadeh 920602adf40SYehuda Sadeh req->r_priv = req_data; 921602adf40SYehuda Sadeh 922602adf40SYehuda Sadeh reqhead = req->r_request->front.iov_base; 923602adf40SYehuda Sadeh reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); 924602adf40SYehuda Sadeh 925aded07eaSAlex Elder strncpy(req->r_oid, object_name, sizeof(req->r_oid)); 926602adf40SYehuda Sadeh req->r_oid_len = strlen(req->r_oid); 927602adf40SYehuda Sadeh 928602adf40SYehuda Sadeh layout = &req->r_file_layout; 929602adf40SYehuda Sadeh memset(layout, 0, sizeof(*layout)); 930602adf40SYehuda Sadeh layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 931602adf40SYehuda Sadeh layout->fl_stripe_count = cpu_to_le32(1); 932602adf40SYehuda Sadeh layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 9330ce1a794SAlex Elder layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); 9341dbb4399SAlex Elder ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, 9351dbb4399SAlex Elder req, ops); 936602adf40SYehuda Sadeh 937602adf40SYehuda Sadeh ceph_osdc_build_request(req, ofs, &len, 938602adf40SYehuda Sadeh ops, 939602adf40SYehuda Sadeh snapc, 940602adf40SYehuda Sadeh &mtime, 941602adf40SYehuda Sadeh req->r_oid, req->r_oid_len); 942602adf40SYehuda Sadeh 94359c2be1eSYehuda Sadeh if (linger_req) { 9441dbb4399SAlex Elder ceph_osdc_set_request_linger(osdc, req); 94559c2be1eSYehuda Sadeh *linger_req = req; 94659c2be1eSYehuda Sadeh } 94759c2be1eSYehuda Sadeh 9481dbb4399SAlex Elder ret = ceph_osdc_start_request(osdc, req, false); 949602adf40SYehuda Sadeh if (ret < 0) 950602adf40SYehuda Sadeh goto done_err; 951602adf40SYehuda Sadeh 952602adf40SYehuda Sadeh if (!rbd_cb) { 9531dbb4399SAlex Elder ret = ceph_osdc_wait_request(osdc, req); 95459c2be1eSYehuda Sadeh if (ver) 95559c2be1eSYehuda Sadeh *ver = le64_to_cpu(req->r_reassert_version.version); 956bd919d45SAlex Elder dout("reassert_ver=%llu\n", 957bd919d45SAlex Elder (unsigned long long) 9581fec7093SYehuda Sadeh le64_to_cpu(req->r_reassert_version.version)); 959602adf40SYehuda Sadeh ceph_osdc_put_request(req); 960602adf40SYehuda Sadeh } 961602adf40SYehuda Sadeh return ret; 962602adf40SYehuda Sadeh 963602adf40SYehuda Sadeh done_err: 964602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 965602adf40SYehuda Sadeh ceph_osdc_put_request(req); 966602adf40SYehuda Sadeh done_pages: 9671fec7093SYehuda Sadeh rbd_coll_end_req(req_data, ret, len); 968602adf40SYehuda Sadeh kfree(req_data); 969602adf40SYehuda Sadeh return ret; 970602adf40SYehuda Sadeh } 971602adf40SYehuda Sadeh 972602adf40SYehuda Sadeh /* 973602adf40SYehuda Sadeh * Ceph osd op callback 974602adf40SYehuda Sadeh */ 975602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 976602adf40SYehuda Sadeh { 977602adf40SYehuda Sadeh struct rbd_request *req_data = req->r_priv; 978602adf40SYehuda Sadeh struct ceph_osd_reply_head *replyhead; 979602adf40SYehuda Sadeh struct ceph_osd_op *op; 980602adf40SYehuda Sadeh __s32 rc; 981602adf40SYehuda Sadeh u64 bytes; 982602adf40SYehuda Sadeh int read_op; 983602adf40SYehuda Sadeh 984602adf40SYehuda Sadeh /* parse reply */ 985602adf40SYehuda Sadeh replyhead = msg->front.iov_base; 986602adf40SYehuda Sadeh WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); 987602adf40SYehuda Sadeh op = (void *)(replyhead + 1); 988602adf40SYehuda Sadeh rc = le32_to_cpu(replyhead->result); 989602adf40SYehuda Sadeh bytes = le64_to_cpu(op->extent.length); 990895cfcc8SDan Carpenter read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); 991602adf40SYehuda Sadeh 992bd919d45SAlex Elder dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", 993bd919d45SAlex Elder (unsigned long long) bytes, read_op, (int) rc); 994602adf40SYehuda Sadeh 995602adf40SYehuda Sadeh if (rc == -ENOENT && read_op) { 996602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, 0); 997602adf40SYehuda Sadeh rc = 0; 998602adf40SYehuda Sadeh } else if (rc == 0 && read_op && bytes < req_data->len) { 999602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, bytes); 1000602adf40SYehuda Sadeh bytes = req_data->len; 1001602adf40SYehuda Sadeh } 1002602adf40SYehuda Sadeh 10031fec7093SYehuda Sadeh rbd_coll_end_req(req_data, rc, bytes); 1004602adf40SYehuda Sadeh 1005602adf40SYehuda Sadeh if (req_data->bio) 1006602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1007602adf40SYehuda Sadeh 1008602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1009602adf40SYehuda Sadeh kfree(req_data); 1010602adf40SYehuda Sadeh } 1011602adf40SYehuda Sadeh 101259c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 101359c2be1eSYehuda Sadeh { 101459c2be1eSYehuda Sadeh ceph_osdc_put_request(req); 101559c2be1eSYehuda Sadeh } 101659c2be1eSYehuda Sadeh 1017602adf40SYehuda Sadeh /* 1018602adf40SYehuda Sadeh * Do a synchronous ceph osd operation 1019602adf40SYehuda Sadeh */ 10200ce1a794SAlex Elder static int rbd_req_sync_op(struct rbd_device *rbd_dev, 1021602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1022602adf40SYehuda Sadeh u64 snapid, 1023602adf40SYehuda Sadeh int opcode, 1024602adf40SYehuda Sadeh int flags, 1025602adf40SYehuda Sadeh struct ceph_osd_req_op *orig_ops, 1026aded07eaSAlex Elder const char *object_name, 1027602adf40SYehuda Sadeh u64 ofs, u64 len, 102859c2be1eSYehuda Sadeh char *buf, 102959c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 103059c2be1eSYehuda Sadeh u64 *ver) 1031602adf40SYehuda Sadeh { 1032602adf40SYehuda Sadeh int ret; 1033602adf40SYehuda Sadeh struct page **pages; 1034602adf40SYehuda Sadeh int num_pages; 1035602adf40SYehuda Sadeh struct ceph_osd_req_op *ops = orig_ops; 1036602adf40SYehuda Sadeh u32 payload_len; 1037602adf40SYehuda Sadeh 1038602adf40SYehuda Sadeh num_pages = calc_pages_for(ofs , len); 1039602adf40SYehuda Sadeh pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 1040b8d0638aSDan Carpenter if (IS_ERR(pages)) 1041b8d0638aSDan Carpenter return PTR_ERR(pages); 1042602adf40SYehuda Sadeh 1043602adf40SYehuda Sadeh if (!orig_ops) { 1044602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); 104557cfc106SAlex Elder ret = -ENOMEM; 104657cfc106SAlex Elder ops = rbd_create_rw_ops(1, opcode, payload_len); 104757cfc106SAlex Elder if (!ops) 1048602adf40SYehuda Sadeh goto done; 1049602adf40SYehuda Sadeh 1050602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { 1051602adf40SYehuda Sadeh ret = ceph_copy_to_page_vector(pages, buf, ofs, len); 1052602adf40SYehuda Sadeh if (ret < 0) 1053602adf40SYehuda Sadeh goto done_ops; 1054602adf40SYehuda Sadeh } 1055602adf40SYehuda Sadeh } 1056602adf40SYehuda Sadeh 10570ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, 1058aded07eaSAlex Elder object_name, ofs, len, NULL, 1059602adf40SYehuda Sadeh pages, num_pages, 1060602adf40SYehuda Sadeh flags, 1061602adf40SYehuda Sadeh ops, 10621fec7093SYehuda Sadeh NULL, 0, 106359c2be1eSYehuda Sadeh NULL, 106459c2be1eSYehuda Sadeh linger_req, ver); 1065602adf40SYehuda Sadeh if (ret < 0) 1066602adf40SYehuda Sadeh goto done_ops; 1067602adf40SYehuda Sadeh 1068602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_READ) && buf) 1069602adf40SYehuda Sadeh ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); 1070602adf40SYehuda Sadeh 1071602adf40SYehuda Sadeh done_ops: 1072602adf40SYehuda Sadeh if (!orig_ops) 1073602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1074602adf40SYehuda Sadeh done: 1075602adf40SYehuda Sadeh ceph_release_page_vector(pages, num_pages); 1076602adf40SYehuda Sadeh return ret; 1077602adf40SYehuda Sadeh } 1078602adf40SYehuda Sadeh 1079602adf40SYehuda Sadeh /* 1080602adf40SYehuda Sadeh * Do an asynchronous ceph osd operation 1081602adf40SYehuda Sadeh */ 1082602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq, 1083602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1084602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1085602adf40SYehuda Sadeh u64 snapid, 1086d1f57ea6SAlex Elder int opcode, int flags, 1087602adf40SYehuda Sadeh u64 ofs, u64 len, 10881fec7093SYehuda Sadeh struct bio *bio, 10891fec7093SYehuda Sadeh struct rbd_req_coll *coll, 10901fec7093SYehuda Sadeh int coll_index) 1091602adf40SYehuda Sadeh { 1092602adf40SYehuda Sadeh char *seg_name; 1093602adf40SYehuda Sadeh u64 seg_ofs; 1094602adf40SYehuda Sadeh u64 seg_len; 1095602adf40SYehuda Sadeh int ret; 1096602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1097602adf40SYehuda Sadeh u32 payload_len; 1098602adf40SYehuda Sadeh 1099602adf40SYehuda Sadeh seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 1100602adf40SYehuda Sadeh if (!seg_name) 1101602adf40SYehuda Sadeh return -ENOMEM; 1102602adf40SYehuda Sadeh 1103602adf40SYehuda Sadeh seg_len = rbd_get_segment(&rbd_dev->header, 1104ca1e49a6SAlex Elder rbd_dev->header.object_prefix, 1105602adf40SYehuda Sadeh ofs, len, 1106602adf40SYehuda Sadeh seg_name, &seg_ofs); 1107602adf40SYehuda Sadeh 1108602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); 1109602adf40SYehuda Sadeh 111057cfc106SAlex Elder ret = -ENOMEM; 111157cfc106SAlex Elder ops = rbd_create_rw_ops(1, opcode, payload_len); 111257cfc106SAlex Elder if (!ops) 1113602adf40SYehuda Sadeh goto done; 1114602adf40SYehuda Sadeh 1115602adf40SYehuda Sadeh /* we've taken care of segment sizes earlier when we 1116602adf40SYehuda Sadeh cloned the bios. We should never have a segment 1117602adf40SYehuda Sadeh truncated at this point */ 1118602adf40SYehuda Sadeh BUG_ON(seg_len < len); 1119602adf40SYehuda Sadeh 1120602adf40SYehuda Sadeh ret = rbd_do_request(rq, rbd_dev, snapc, snapid, 1121602adf40SYehuda Sadeh seg_name, seg_ofs, seg_len, 1122602adf40SYehuda Sadeh bio, 1123602adf40SYehuda Sadeh NULL, 0, 1124602adf40SYehuda Sadeh flags, 1125602adf40SYehuda Sadeh ops, 11261fec7093SYehuda Sadeh coll, coll_index, 112759c2be1eSYehuda Sadeh rbd_req_cb, 0, NULL); 112811f77002SSage Weil 112911f77002SSage Weil rbd_destroy_ops(ops); 1130602adf40SYehuda Sadeh done: 1131602adf40SYehuda Sadeh kfree(seg_name); 1132602adf40SYehuda Sadeh return ret; 1133602adf40SYehuda Sadeh } 1134602adf40SYehuda Sadeh 1135602adf40SYehuda Sadeh /* 1136602adf40SYehuda Sadeh * Request async osd write 1137602adf40SYehuda Sadeh */ 1138602adf40SYehuda Sadeh static int rbd_req_write(struct request *rq, 1139602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1140602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1141602adf40SYehuda Sadeh u64 ofs, u64 len, 11421fec7093SYehuda Sadeh struct bio *bio, 11431fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11441fec7093SYehuda Sadeh int coll_index) 1145602adf40SYehuda Sadeh { 1146602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, 1147602adf40SYehuda Sadeh CEPH_OSD_OP_WRITE, 1148602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 11491fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1150602adf40SYehuda Sadeh } 1151602adf40SYehuda Sadeh 1152602adf40SYehuda Sadeh /* 1153602adf40SYehuda Sadeh * Request async osd read 1154602adf40SYehuda Sadeh */ 1155602adf40SYehuda Sadeh static int rbd_req_read(struct request *rq, 1156602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1157602adf40SYehuda Sadeh u64 snapid, 1158602adf40SYehuda Sadeh u64 ofs, u64 len, 11591fec7093SYehuda Sadeh struct bio *bio, 11601fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11611fec7093SYehuda Sadeh int coll_index) 1162602adf40SYehuda Sadeh { 1163602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, NULL, 1164b06e6a6bSJosh Durgin snapid, 1165602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1166602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 11671fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1168602adf40SYehuda Sadeh } 1169602adf40SYehuda Sadeh 1170602adf40SYehuda Sadeh /* 1171602adf40SYehuda Sadeh * Request sync osd read 1172602adf40SYehuda Sadeh */ 11730ce1a794SAlex Elder static int rbd_req_sync_read(struct rbd_device *rbd_dev, 1174602adf40SYehuda Sadeh u64 snapid, 1175aded07eaSAlex Elder const char *object_name, 1176602adf40SYehuda Sadeh u64 ofs, u64 len, 117759c2be1eSYehuda Sadeh char *buf, 117859c2be1eSYehuda Sadeh u64 *ver) 1179602adf40SYehuda Sadeh { 11800ce1a794SAlex Elder return rbd_req_sync_op(rbd_dev, NULL, 1181b06e6a6bSJosh Durgin snapid, 1182602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1183602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1184602adf40SYehuda Sadeh NULL, 1185d1f57ea6SAlex Elder object_name, ofs, len, buf, NULL, ver); 1186602adf40SYehuda Sadeh } 1187602adf40SYehuda Sadeh 1188602adf40SYehuda Sadeh /* 118959c2be1eSYehuda Sadeh * Request sync osd watch 119059c2be1eSYehuda Sadeh */ 11910ce1a794SAlex Elder static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, 119259c2be1eSYehuda Sadeh u64 ver, 11937f0a24d8SAlex Elder u64 notify_id) 119459c2be1eSYehuda Sadeh { 119559c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 119611f77002SSage Weil int ret; 119711f77002SSage Weil 119857cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); 119957cfc106SAlex Elder if (!ops) 120057cfc106SAlex Elder return -ENOMEM; 120159c2be1eSYehuda Sadeh 1202a71b891bSJosh Durgin ops[0].watch.ver = cpu_to_le64(ver); 120359c2be1eSYehuda Sadeh ops[0].watch.cookie = notify_id; 120459c2be1eSYehuda Sadeh ops[0].watch.flag = 0; 120559c2be1eSYehuda Sadeh 12060ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, 12077f0a24d8SAlex Elder rbd_dev->header_name, 0, 0, NULL, 1208ad4f232fSAlex Elder NULL, 0, 120959c2be1eSYehuda Sadeh CEPH_OSD_FLAG_READ, 121059c2be1eSYehuda Sadeh ops, 12111fec7093SYehuda Sadeh NULL, 0, 121259c2be1eSYehuda Sadeh rbd_simple_req_cb, 0, NULL); 121359c2be1eSYehuda Sadeh 121459c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 121559c2be1eSYehuda Sadeh return ret; 121659c2be1eSYehuda Sadeh } 121759c2be1eSYehuda Sadeh 121859c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 121959c2be1eSYehuda Sadeh { 12200ce1a794SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 1221a71b891bSJosh Durgin u64 hver; 122213143d2dSSage Weil int rc; 122313143d2dSSage Weil 12240ce1a794SAlex Elder if (!rbd_dev) 122559c2be1eSYehuda Sadeh return; 122659c2be1eSYehuda Sadeh 1227bd919d45SAlex Elder dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", 1228bd919d45SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1229bd919d45SAlex Elder (unsigned int) opcode); 123059c2be1eSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 12310ce1a794SAlex Elder rc = __rbd_refresh_header(rbd_dev); 1232a71b891bSJosh Durgin hver = rbd_dev->header.obj_version; 123359c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 123413143d2dSSage Weil if (rc) 1235f0f8cef5SAlex Elder pr_warning(RBD_DRV_NAME "%d got notification but failed to " 12360ce1a794SAlex Elder " update snaps: %d\n", rbd_dev->major, rc); 123759c2be1eSYehuda Sadeh 12387f0a24d8SAlex Elder rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); 123959c2be1eSYehuda Sadeh } 124059c2be1eSYehuda Sadeh 124159c2be1eSYehuda Sadeh /* 124259c2be1eSYehuda Sadeh * Request sync osd watch 124359c2be1eSYehuda Sadeh */ 12440e6f322dSAlex Elder static int rbd_req_sync_watch(struct rbd_device *rbd_dev) 124559c2be1eSYehuda Sadeh { 124659c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 12470ce1a794SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 124857cfc106SAlex Elder int ret; 124959c2be1eSYehuda Sadeh 125057cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 125157cfc106SAlex Elder if (!ops) 125257cfc106SAlex Elder return -ENOMEM; 125359c2be1eSYehuda Sadeh 125459c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, 12550ce1a794SAlex Elder (void *)rbd_dev, &rbd_dev->watch_event); 125659c2be1eSYehuda Sadeh if (ret < 0) 125759c2be1eSYehuda Sadeh goto fail; 125859c2be1eSYehuda Sadeh 12590e6f322dSAlex Elder ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); 12600ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 126159c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 126259c2be1eSYehuda Sadeh 12630ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 126459c2be1eSYehuda Sadeh CEPH_NOSNAP, 126559c2be1eSYehuda Sadeh 0, 126659c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 126759c2be1eSYehuda Sadeh ops, 12680e6f322dSAlex Elder rbd_dev->header_name, 12690e6f322dSAlex Elder 0, 0, NULL, 12700ce1a794SAlex Elder &rbd_dev->watch_request, NULL); 127159c2be1eSYehuda Sadeh 127259c2be1eSYehuda Sadeh if (ret < 0) 127359c2be1eSYehuda Sadeh goto fail_event; 127459c2be1eSYehuda Sadeh 127559c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 127659c2be1eSYehuda Sadeh return 0; 127759c2be1eSYehuda Sadeh 127859c2be1eSYehuda Sadeh fail_event: 12790ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 12800ce1a794SAlex Elder rbd_dev->watch_event = NULL; 128159c2be1eSYehuda Sadeh fail: 128259c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 128359c2be1eSYehuda Sadeh return ret; 128459c2be1eSYehuda Sadeh } 128559c2be1eSYehuda Sadeh 128679e3057cSYehuda Sadeh /* 128779e3057cSYehuda Sadeh * Request sync osd unwatch 128879e3057cSYehuda Sadeh */ 1289070c633fSAlex Elder static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) 129079e3057cSYehuda Sadeh { 129179e3057cSYehuda Sadeh struct ceph_osd_req_op *ops; 129257cfc106SAlex Elder int ret; 129379e3057cSYehuda Sadeh 129457cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 129557cfc106SAlex Elder if (!ops) 129657cfc106SAlex Elder return -ENOMEM; 129779e3057cSYehuda Sadeh 129879e3057cSYehuda Sadeh ops[0].watch.ver = 0; 12990ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 130079e3057cSYehuda Sadeh ops[0].watch.flag = 0; 130179e3057cSYehuda Sadeh 13020ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 130379e3057cSYehuda Sadeh CEPH_NOSNAP, 130479e3057cSYehuda Sadeh 0, 130579e3057cSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 130679e3057cSYehuda Sadeh ops, 1307070c633fSAlex Elder rbd_dev->header_name, 1308070c633fSAlex Elder 0, 0, NULL, NULL, NULL); 1309070c633fSAlex Elder 131079e3057cSYehuda Sadeh 131179e3057cSYehuda Sadeh rbd_destroy_ops(ops); 13120ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 13130ce1a794SAlex Elder rbd_dev->watch_event = NULL; 131479e3057cSYehuda Sadeh return ret; 131579e3057cSYehuda Sadeh } 131679e3057cSYehuda Sadeh 131759c2be1eSYehuda Sadeh struct rbd_notify_info { 13180ce1a794SAlex Elder struct rbd_device *rbd_dev; 131959c2be1eSYehuda Sadeh }; 132059c2be1eSYehuda Sadeh 132159c2be1eSYehuda Sadeh static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 132259c2be1eSYehuda Sadeh { 13230ce1a794SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 13240ce1a794SAlex Elder if (!rbd_dev) 132559c2be1eSYehuda Sadeh return; 132659c2be1eSYehuda Sadeh 1327bd919d45SAlex Elder dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n", 1328bd919d45SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1329bd919d45SAlex Elder (unsigned int) opcode); 133059c2be1eSYehuda Sadeh } 133159c2be1eSYehuda Sadeh 133259c2be1eSYehuda Sadeh /* 133359c2be1eSYehuda Sadeh * Request sync osd notify 133459c2be1eSYehuda Sadeh */ 13354cb16250SAlex Elder static int rbd_req_sync_notify(struct rbd_device *rbd_dev) 133659c2be1eSYehuda Sadeh { 133759c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 13380ce1a794SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 133959c2be1eSYehuda Sadeh struct ceph_osd_event *event; 134059c2be1eSYehuda Sadeh struct rbd_notify_info info; 134159c2be1eSYehuda Sadeh int payload_len = sizeof(u32) + sizeof(u32); 134259c2be1eSYehuda Sadeh int ret; 134359c2be1eSYehuda Sadeh 134457cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len); 134557cfc106SAlex Elder if (!ops) 134657cfc106SAlex Elder return -ENOMEM; 134759c2be1eSYehuda Sadeh 13480ce1a794SAlex Elder info.rbd_dev = rbd_dev; 134959c2be1eSYehuda Sadeh 135059c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, 135159c2be1eSYehuda Sadeh (void *)&info, &event); 135259c2be1eSYehuda Sadeh if (ret < 0) 135359c2be1eSYehuda Sadeh goto fail; 135459c2be1eSYehuda Sadeh 135559c2be1eSYehuda Sadeh ops[0].watch.ver = 1; 135659c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 135759c2be1eSYehuda Sadeh ops[0].watch.cookie = event->cookie; 135859c2be1eSYehuda Sadeh ops[0].watch.prot_ver = RADOS_NOTIFY_VER; 135959c2be1eSYehuda Sadeh ops[0].watch.timeout = 12; 136059c2be1eSYehuda Sadeh 13610ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 136259c2be1eSYehuda Sadeh CEPH_NOSNAP, 136359c2be1eSYehuda Sadeh 0, 136459c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 136559c2be1eSYehuda Sadeh ops, 13664cb16250SAlex Elder rbd_dev->header_name, 13674cb16250SAlex Elder 0, 0, NULL, NULL, NULL); 136859c2be1eSYehuda Sadeh if (ret < 0) 136959c2be1eSYehuda Sadeh goto fail_event; 137059c2be1eSYehuda Sadeh 137159c2be1eSYehuda Sadeh ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT); 137259c2be1eSYehuda Sadeh dout("ceph_osdc_wait_event returned %d\n", ret); 137359c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 137459c2be1eSYehuda Sadeh return 0; 137559c2be1eSYehuda Sadeh 137659c2be1eSYehuda Sadeh fail_event: 137759c2be1eSYehuda Sadeh ceph_osdc_cancel_event(event); 137859c2be1eSYehuda Sadeh fail: 137959c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 138059c2be1eSYehuda Sadeh return ret; 138159c2be1eSYehuda Sadeh } 138259c2be1eSYehuda Sadeh 138359c2be1eSYehuda Sadeh /* 1384602adf40SYehuda Sadeh * Request sync osd read 1385602adf40SYehuda Sadeh */ 13860ce1a794SAlex Elder static int rbd_req_sync_exec(struct rbd_device *rbd_dev, 1387aded07eaSAlex Elder const char *object_name, 1388aded07eaSAlex Elder const char *class_name, 1389aded07eaSAlex Elder const char *method_name, 1390602adf40SYehuda Sadeh const char *data, 139159c2be1eSYehuda Sadeh int len, 139259c2be1eSYehuda Sadeh u64 *ver) 1393602adf40SYehuda Sadeh { 1394602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1395aded07eaSAlex Elder int class_name_len = strlen(class_name); 1396aded07eaSAlex Elder int method_name_len = strlen(method_name); 139757cfc106SAlex Elder int ret; 139857cfc106SAlex Elder 139957cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, 1400aded07eaSAlex Elder class_name_len + method_name_len + len); 140157cfc106SAlex Elder if (!ops) 140257cfc106SAlex Elder return -ENOMEM; 1403602adf40SYehuda Sadeh 1404aded07eaSAlex Elder ops[0].cls.class_name = class_name; 1405aded07eaSAlex Elder ops[0].cls.class_len = (__u8) class_name_len; 1406aded07eaSAlex Elder ops[0].cls.method_name = method_name; 1407aded07eaSAlex Elder ops[0].cls.method_len = (__u8) method_name_len; 1408602adf40SYehuda Sadeh ops[0].cls.argc = 0; 1409602adf40SYehuda Sadeh ops[0].cls.indata = data; 1410602adf40SYehuda Sadeh ops[0].cls.indata_len = len; 1411602adf40SYehuda Sadeh 14120ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 1413602adf40SYehuda Sadeh CEPH_NOSNAP, 1414602adf40SYehuda Sadeh 0, 1415602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1416602adf40SYehuda Sadeh ops, 1417d1f57ea6SAlex Elder object_name, 0, 0, NULL, NULL, ver); 1418602adf40SYehuda Sadeh 1419602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1420602adf40SYehuda Sadeh 1421602adf40SYehuda Sadeh dout("cls_exec returned %d\n", ret); 1422602adf40SYehuda Sadeh return ret; 1423602adf40SYehuda Sadeh } 1424602adf40SYehuda Sadeh 14251fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) 14261fec7093SYehuda Sadeh { 14271fec7093SYehuda Sadeh struct rbd_req_coll *coll = 14281fec7093SYehuda Sadeh kzalloc(sizeof(struct rbd_req_coll) + 14291fec7093SYehuda Sadeh sizeof(struct rbd_req_status) * num_reqs, 14301fec7093SYehuda Sadeh GFP_ATOMIC); 14311fec7093SYehuda Sadeh 14321fec7093SYehuda Sadeh if (!coll) 14331fec7093SYehuda Sadeh return NULL; 14341fec7093SYehuda Sadeh coll->total = num_reqs; 14351fec7093SYehuda Sadeh kref_init(&coll->kref); 14361fec7093SYehuda Sadeh return coll; 14371fec7093SYehuda Sadeh } 14381fec7093SYehuda Sadeh 1439602adf40SYehuda Sadeh /* 1440602adf40SYehuda Sadeh * block device queue callback 1441602adf40SYehuda Sadeh */ 1442602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q) 1443602adf40SYehuda Sadeh { 1444602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1445602adf40SYehuda Sadeh struct request *rq; 1446602adf40SYehuda Sadeh struct bio_pair *bp = NULL; 1447602adf40SYehuda Sadeh 144800f1f36fSAlex Elder while ((rq = blk_fetch_request(q))) { 1449602adf40SYehuda Sadeh struct bio *bio; 1450602adf40SYehuda Sadeh struct bio *rq_bio, *next_bio = NULL; 1451602adf40SYehuda Sadeh bool do_write; 1452bd919d45SAlex Elder unsigned int size; 1453bd919d45SAlex Elder u64 op_size = 0; 1454602adf40SYehuda Sadeh u64 ofs; 14551fec7093SYehuda Sadeh int num_segs, cur_seg = 0; 14561fec7093SYehuda Sadeh struct rbd_req_coll *coll; 1457d1d25646SJosh Durgin struct ceph_snap_context *snapc; 1458602adf40SYehuda Sadeh 1459602adf40SYehuda Sadeh /* peek at request from block layer */ 1460602adf40SYehuda Sadeh if (!rq) 1461602adf40SYehuda Sadeh break; 1462602adf40SYehuda Sadeh 1463602adf40SYehuda Sadeh dout("fetched request\n"); 1464602adf40SYehuda Sadeh 1465602adf40SYehuda Sadeh /* filter out block requests we don't understand */ 1466602adf40SYehuda Sadeh if ((rq->cmd_type != REQ_TYPE_FS)) { 1467602adf40SYehuda Sadeh __blk_end_request_all(rq, 0); 146800f1f36fSAlex Elder continue; 1469602adf40SYehuda Sadeh } 1470602adf40SYehuda Sadeh 1471602adf40SYehuda Sadeh /* deduce our operation (read, write) */ 1472602adf40SYehuda Sadeh do_write = (rq_data_dir(rq) == WRITE); 1473602adf40SYehuda Sadeh 1474602adf40SYehuda Sadeh size = blk_rq_bytes(rq); 1475593a9e7bSAlex Elder ofs = blk_rq_pos(rq) * SECTOR_SIZE; 1476602adf40SYehuda Sadeh rq_bio = rq->bio; 1477602adf40SYehuda Sadeh if (do_write && rbd_dev->read_only) { 1478602adf40SYehuda Sadeh __blk_end_request_all(rq, -EROFS); 147900f1f36fSAlex Elder continue; 1480602adf40SYehuda Sadeh } 1481602adf40SYehuda Sadeh 1482602adf40SYehuda Sadeh spin_unlock_irq(q->queue_lock); 1483602adf40SYehuda Sadeh 1484e88a36ecSJosh Durgin down_read(&rbd_dev->header_rwsem); 1485e88a36ecSJosh Durgin 1486d1d25646SJosh Durgin if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) { 1487d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1488e88a36ecSJosh Durgin dout("request for non-existent snapshot"); 1489e88a36ecSJosh Durgin spin_lock_irq(q->queue_lock); 1490e88a36ecSJosh Durgin __blk_end_request_all(rq, -ENXIO); 1491e88a36ecSJosh Durgin continue; 1492e88a36ecSJosh Durgin } 1493d1d25646SJosh Durgin 1494d1d25646SJosh Durgin snapc = ceph_get_snap_context(rbd_dev->header.snapc); 1495d1d25646SJosh Durgin 1496d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1497e88a36ecSJosh Durgin 1498602adf40SYehuda Sadeh dout("%s 0x%x bytes at 0x%llx\n", 1499602adf40SYehuda Sadeh do_write ? "write" : "read", 1500bd919d45SAlex Elder size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); 1501602adf40SYehuda Sadeh 15021fec7093SYehuda Sadeh num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 15031fec7093SYehuda Sadeh coll = rbd_alloc_coll(num_segs); 15041fec7093SYehuda Sadeh if (!coll) { 15051fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 15061fec7093SYehuda Sadeh __blk_end_request_all(rq, -ENOMEM); 1507d1d25646SJosh Durgin ceph_put_snap_context(snapc); 150800f1f36fSAlex Elder continue; 15091fec7093SYehuda Sadeh } 15101fec7093SYehuda Sadeh 1511602adf40SYehuda Sadeh do { 1512602adf40SYehuda Sadeh /* a bio clone to be passed down to OSD req */ 1513bd919d45SAlex Elder dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); 1514602adf40SYehuda Sadeh op_size = rbd_get_segment(&rbd_dev->header, 1515ca1e49a6SAlex Elder rbd_dev->header.object_prefix, 1516602adf40SYehuda Sadeh ofs, size, 1517602adf40SYehuda Sadeh NULL, NULL); 15181fec7093SYehuda Sadeh kref_get(&coll->kref); 1519602adf40SYehuda Sadeh bio = bio_chain_clone(&rq_bio, &next_bio, &bp, 1520602adf40SYehuda Sadeh op_size, GFP_ATOMIC); 1521602adf40SYehuda Sadeh if (!bio) { 15221fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, cur_seg, 15231fec7093SYehuda Sadeh -ENOMEM, op_size); 15241fec7093SYehuda Sadeh goto next_seg; 1525602adf40SYehuda Sadeh } 1526602adf40SYehuda Sadeh 15271fec7093SYehuda Sadeh 1528602adf40SYehuda Sadeh /* init OSD command: write or read */ 1529602adf40SYehuda Sadeh if (do_write) 1530602adf40SYehuda Sadeh rbd_req_write(rq, rbd_dev, 1531d1d25646SJosh Durgin snapc, 1532602adf40SYehuda Sadeh ofs, 15331fec7093SYehuda Sadeh op_size, bio, 15341fec7093SYehuda Sadeh coll, cur_seg); 1535602adf40SYehuda Sadeh else 1536602adf40SYehuda Sadeh rbd_req_read(rq, rbd_dev, 153777dfe99fSJosh Durgin rbd_dev->snap_id, 1538602adf40SYehuda Sadeh ofs, 15391fec7093SYehuda Sadeh op_size, bio, 15401fec7093SYehuda Sadeh coll, cur_seg); 1541602adf40SYehuda Sadeh 15421fec7093SYehuda Sadeh next_seg: 1543602adf40SYehuda Sadeh size -= op_size; 1544602adf40SYehuda Sadeh ofs += op_size; 1545602adf40SYehuda Sadeh 15461fec7093SYehuda Sadeh cur_seg++; 1547602adf40SYehuda Sadeh rq_bio = next_bio; 1548602adf40SYehuda Sadeh } while (size > 0); 15491fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 1550602adf40SYehuda Sadeh 1551602adf40SYehuda Sadeh if (bp) 1552602adf40SYehuda Sadeh bio_pair_release(bp); 1553602adf40SYehuda Sadeh spin_lock_irq(q->queue_lock); 1554d1d25646SJosh Durgin 1555d1d25646SJosh Durgin ceph_put_snap_context(snapc); 1556602adf40SYehuda Sadeh } 1557602adf40SYehuda Sadeh } 1558602adf40SYehuda Sadeh 1559602adf40SYehuda Sadeh /* 1560602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 1561602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 1562602adf40SYehuda Sadeh * which we handle later at bio_chain_clone 1563602adf40SYehuda Sadeh */ 1564602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 1565602adf40SYehuda Sadeh struct bio_vec *bvec) 1566602adf40SYehuda Sadeh { 1567602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1568593a9e7bSAlex Elder unsigned int chunk_sectors; 1569593a9e7bSAlex Elder sector_t sector; 1570593a9e7bSAlex Elder unsigned int bio_sectors; 1571602adf40SYehuda Sadeh int max; 1572602adf40SYehuda Sadeh 1573593a9e7bSAlex Elder chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); 1574593a9e7bSAlex Elder sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); 1575593a9e7bSAlex Elder bio_sectors = bmd->bi_size >> SECTOR_SHIFT; 1576593a9e7bSAlex Elder 1577602adf40SYehuda Sadeh max = (chunk_sectors - ((sector & (chunk_sectors - 1)) 1578593a9e7bSAlex Elder + bio_sectors)) << SECTOR_SHIFT; 1579602adf40SYehuda Sadeh if (max < 0) 1580602adf40SYehuda Sadeh max = 0; /* bio_add cannot handle a negative return */ 1581602adf40SYehuda Sadeh if (max <= bvec->bv_len && bio_sectors == 0) 1582602adf40SYehuda Sadeh return bvec->bv_len; 1583602adf40SYehuda Sadeh return max; 1584602adf40SYehuda Sadeh } 1585602adf40SYehuda Sadeh 1586602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 1587602adf40SYehuda Sadeh { 1588602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 1589602adf40SYehuda Sadeh 1590602adf40SYehuda Sadeh if (!disk) 1591602adf40SYehuda Sadeh return; 1592602adf40SYehuda Sadeh 1593602adf40SYehuda Sadeh rbd_header_free(&rbd_dev->header); 1594602adf40SYehuda Sadeh 1595602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 1596602adf40SYehuda Sadeh del_gendisk(disk); 1597602adf40SYehuda Sadeh if (disk->queue) 1598602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 1599602adf40SYehuda Sadeh put_disk(disk); 1600602adf40SYehuda Sadeh } 1601602adf40SYehuda Sadeh 1602602adf40SYehuda Sadeh /* 1603602adf40SYehuda Sadeh * reload the ondisk the header 1604602adf40SYehuda Sadeh */ 1605602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 1606602adf40SYehuda Sadeh struct rbd_image_header *header) 1607602adf40SYehuda Sadeh { 1608602adf40SYehuda Sadeh ssize_t rc; 1609602adf40SYehuda Sadeh struct rbd_image_header_ondisk *dh; 161050f7c4c9SXi Wang u32 snap_count = 0; 161159c2be1eSYehuda Sadeh u64 ver; 161200f1f36fSAlex Elder size_t len; 1613602adf40SYehuda Sadeh 161400f1f36fSAlex Elder /* 161500f1f36fSAlex Elder * First reads the fixed-size header to determine the number 161600f1f36fSAlex Elder * of snapshots, then re-reads it, along with all snapshot 161700f1f36fSAlex Elder * records as well as their stored names. 161800f1f36fSAlex Elder */ 161900f1f36fSAlex Elder len = sizeof (*dh); 1620602adf40SYehuda Sadeh while (1) { 1621602adf40SYehuda Sadeh dh = kmalloc(len, GFP_KERNEL); 1622602adf40SYehuda Sadeh if (!dh) 1623602adf40SYehuda Sadeh return -ENOMEM; 1624602adf40SYehuda Sadeh 1625602adf40SYehuda Sadeh rc = rbd_req_sync_read(rbd_dev, 16269a5d690bSAlex Elder CEPH_NOSNAP, 16270bed54dcSAlex Elder rbd_dev->header_name, 1628602adf40SYehuda Sadeh 0, len, 162959c2be1eSYehuda Sadeh (char *)dh, &ver); 1630602adf40SYehuda Sadeh if (rc < 0) 1631602adf40SYehuda Sadeh goto out_dh; 1632602adf40SYehuda Sadeh 1633ed63f4fdSAlex Elder rc = rbd_header_from_disk(header, dh, snap_count); 163481e759fbSJosh Durgin if (rc < 0) { 163500f1f36fSAlex Elder if (rc == -ENXIO) 163681e759fbSJosh Durgin pr_warning("unrecognized header format" 16370bed54dcSAlex Elder " for image %s\n", 16380bed54dcSAlex Elder rbd_dev->image_name); 1639602adf40SYehuda Sadeh goto out_dh; 164081e759fbSJosh Durgin } 1641602adf40SYehuda Sadeh 164200f1f36fSAlex Elder if (snap_count == header->total_snaps) 164300f1f36fSAlex Elder break; 164400f1f36fSAlex Elder 1645602adf40SYehuda Sadeh snap_count = header->total_snaps; 164600f1f36fSAlex Elder len = sizeof (*dh) + 164700f1f36fSAlex Elder snap_count * sizeof(struct rbd_image_snap_ondisk) + 164800f1f36fSAlex Elder header->snap_names_len; 164900f1f36fSAlex Elder 1650602adf40SYehuda Sadeh rbd_header_free(header); 1651602adf40SYehuda Sadeh kfree(dh); 1652602adf40SYehuda Sadeh } 165359c2be1eSYehuda Sadeh header->obj_version = ver; 1654602adf40SYehuda Sadeh 1655602adf40SYehuda Sadeh out_dh: 1656602adf40SYehuda Sadeh kfree(dh); 1657602adf40SYehuda Sadeh return rc; 1658602adf40SYehuda Sadeh } 1659602adf40SYehuda Sadeh 1660602adf40SYehuda Sadeh /* 1661602adf40SYehuda Sadeh * create a snapshot 1662602adf40SYehuda Sadeh */ 16630ce1a794SAlex Elder static int rbd_header_add_snap(struct rbd_device *rbd_dev, 1664602adf40SYehuda Sadeh const char *snap_name, 1665602adf40SYehuda Sadeh gfp_t gfp_flags) 1666602adf40SYehuda Sadeh { 1667602adf40SYehuda Sadeh int name_len = strlen(snap_name); 1668602adf40SYehuda Sadeh u64 new_snapid; 1669602adf40SYehuda Sadeh int ret; 1670916d4d67SSage Weil void *data, *p, *e; 167159c2be1eSYehuda Sadeh u64 ver; 16721dbb4399SAlex Elder struct ceph_mon_client *monc; 1673602adf40SYehuda Sadeh 1674602adf40SYehuda Sadeh /* we should create a snapshot only if we're pointing at the head */ 16750ce1a794SAlex Elder if (rbd_dev->snap_id != CEPH_NOSNAP) 1676602adf40SYehuda Sadeh return -EINVAL; 1677602adf40SYehuda Sadeh 16780ce1a794SAlex Elder monc = &rbd_dev->rbd_client->client->monc; 16790ce1a794SAlex Elder ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid); 1680bd919d45SAlex Elder dout("created snapid=%llu\n", (unsigned long long) new_snapid); 1681602adf40SYehuda Sadeh if (ret < 0) 1682602adf40SYehuda Sadeh return ret; 1683602adf40SYehuda Sadeh 1684602adf40SYehuda Sadeh data = kmalloc(name_len + 16, gfp_flags); 1685602adf40SYehuda Sadeh if (!data) 1686602adf40SYehuda Sadeh return -ENOMEM; 1687602adf40SYehuda Sadeh 1688916d4d67SSage Weil p = data; 1689916d4d67SSage Weil e = data + name_len + 16; 1690602adf40SYehuda Sadeh 1691916d4d67SSage Weil ceph_encode_string_safe(&p, e, snap_name, name_len, bad); 1692916d4d67SSage Weil ceph_encode_64_safe(&p, e, new_snapid, bad); 1693602adf40SYehuda Sadeh 16940bed54dcSAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 16950ce1a794SAlex Elder "rbd", "snap_add", 1696916d4d67SSage Weil data, p - data, &ver); 1697602adf40SYehuda Sadeh 1698916d4d67SSage Weil kfree(data); 1699602adf40SYehuda Sadeh 1700505cbb9bSAlex Elder return ret < 0 ? ret : 0; 1701602adf40SYehuda Sadeh bad: 1702602adf40SYehuda Sadeh return -ERANGE; 1703602adf40SYehuda Sadeh } 1704602adf40SYehuda Sadeh 1705dfc5606dSYehuda Sadeh static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) 1706dfc5606dSYehuda Sadeh { 1707dfc5606dSYehuda Sadeh struct rbd_snap *snap; 1708a0593290SAlex Elder struct rbd_snap *next; 1709dfc5606dSYehuda Sadeh 1710a0593290SAlex Elder list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) 171114e7085dSAlex Elder __rbd_remove_snap_dev(snap); 1712dfc5606dSYehuda Sadeh } 1713dfc5606dSYehuda Sadeh 1714602adf40SYehuda Sadeh /* 1715602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 1716602adf40SYehuda Sadeh */ 1717263c6ca0SJosh Durgin static int __rbd_refresh_header(struct rbd_device *rbd_dev) 1718602adf40SYehuda Sadeh { 1719602adf40SYehuda Sadeh int ret; 1720602adf40SYehuda Sadeh struct rbd_image_header h; 1721602adf40SYehuda Sadeh 1722602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 1723602adf40SYehuda Sadeh if (ret < 0) 1724602adf40SYehuda Sadeh return ret; 1725602adf40SYehuda Sadeh 1726a51aa0c0SJosh Durgin down_write(&rbd_dev->header_rwsem); 1727a51aa0c0SJosh Durgin 17289db4b3e3SSage Weil /* resized? */ 1729474ef7ceSJosh Durgin if (rbd_dev->snap_id == CEPH_NOSNAP) { 1730474ef7ceSJosh Durgin sector_t size = (sector_t) h.image_size / SECTOR_SIZE; 1731474ef7ceSJosh Durgin 1732474ef7ceSJosh Durgin dout("setting size to %llu sectors", (unsigned long long) size); 1733474ef7ceSJosh Durgin set_capacity(rbd_dev->disk, size); 1734474ef7ceSJosh Durgin } 17359db4b3e3SSage Weil 1736849b4260SAlex Elder /* rbd_dev->header.object_prefix shouldn't change */ 1737602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 1738849b4260SAlex Elder kfree(rbd_dev->header.snap_names); 1739d1d25646SJosh Durgin /* osd requests may still refer to snapc */ 1740d1d25646SJosh Durgin ceph_put_snap_context(rbd_dev->header.snapc); 1741602adf40SYehuda Sadeh 1742a71b891bSJosh Durgin rbd_dev->header.obj_version = h.obj_version; 174393a24e08SJosh Durgin rbd_dev->header.image_size = h.image_size; 1744602adf40SYehuda Sadeh rbd_dev->header.total_snaps = h.total_snaps; 1745602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 1746602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 1747dfc5606dSYehuda Sadeh rbd_dev->header.snap_names_len = h.snap_names_len; 1748602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 1749849b4260SAlex Elder /* Free the extra copy of the object prefix */ 1750849b4260SAlex Elder WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); 1751849b4260SAlex Elder kfree(h.object_prefix); 1752849b4260SAlex Elder 1753dfc5606dSYehuda Sadeh ret = __rbd_init_snaps_header(rbd_dev); 1754dfc5606dSYehuda Sadeh 1755c666601aSJosh Durgin up_write(&rbd_dev->header_rwsem); 1756602adf40SYehuda Sadeh 1757dfc5606dSYehuda Sadeh return ret; 1758602adf40SYehuda Sadeh } 1759602adf40SYehuda Sadeh 1760602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 1761602adf40SYehuda Sadeh { 1762602adf40SYehuda Sadeh struct gendisk *disk; 1763602adf40SYehuda Sadeh struct request_queue *q; 1764602adf40SYehuda Sadeh int rc; 1765593a9e7bSAlex Elder u64 segment_size; 1766602adf40SYehuda Sadeh u64 total_size = 0; 1767602adf40SYehuda Sadeh 1768602adf40SYehuda Sadeh /* contact OSD, request size info about the object being mapped */ 1769602adf40SYehuda Sadeh rc = rbd_read_header(rbd_dev, &rbd_dev->header); 1770602adf40SYehuda Sadeh if (rc) 1771602adf40SYehuda Sadeh return rc; 1772602adf40SYehuda Sadeh 1773dfc5606dSYehuda Sadeh /* no need to lock here, as rbd_dev is not registered yet */ 1774dfc5606dSYehuda Sadeh rc = __rbd_init_snaps_header(rbd_dev); 1775dfc5606dSYehuda Sadeh if (rc) 1776dfc5606dSYehuda Sadeh return rc; 1777dfc5606dSYehuda Sadeh 1778cc9d734cSJosh Durgin rc = rbd_header_set_snap(rbd_dev, &total_size); 1779602adf40SYehuda Sadeh if (rc) 1780602adf40SYehuda Sadeh return rc; 1781602adf40SYehuda Sadeh 1782602adf40SYehuda Sadeh /* create gendisk info */ 1783602adf40SYehuda Sadeh rc = -ENOMEM; 1784602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 1785602adf40SYehuda Sadeh if (!disk) 1786602adf40SYehuda Sadeh goto out; 1787602adf40SYehuda Sadeh 1788f0f8cef5SAlex Elder snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 1789de71a297SAlex Elder rbd_dev->dev_id); 1790602adf40SYehuda Sadeh disk->major = rbd_dev->major; 1791602adf40SYehuda Sadeh disk->first_minor = 0; 1792602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 1793602adf40SYehuda Sadeh disk->private_data = rbd_dev; 1794602adf40SYehuda Sadeh 1795602adf40SYehuda Sadeh /* init rq */ 1796602adf40SYehuda Sadeh rc = -ENOMEM; 1797602adf40SYehuda Sadeh q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); 1798602adf40SYehuda Sadeh if (!q) 1799602adf40SYehuda Sadeh goto out_disk; 1800029bcbd8SJosh Durgin 1801593a9e7bSAlex Elder /* We use the default size, but let's be explicit about it. */ 1802593a9e7bSAlex Elder blk_queue_physical_block_size(q, SECTOR_SIZE); 1803593a9e7bSAlex Elder 1804029bcbd8SJosh Durgin /* set io sizes to object size */ 1805593a9e7bSAlex Elder segment_size = rbd_obj_bytes(&rbd_dev->header); 1806593a9e7bSAlex Elder blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 1807593a9e7bSAlex Elder blk_queue_max_segment_size(q, segment_size); 1808593a9e7bSAlex Elder blk_queue_io_min(q, segment_size); 1809593a9e7bSAlex Elder blk_queue_io_opt(q, segment_size); 1810029bcbd8SJosh Durgin 1811602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 1812602adf40SYehuda Sadeh disk->queue = q; 1813602adf40SYehuda Sadeh 1814602adf40SYehuda Sadeh q->queuedata = rbd_dev; 1815602adf40SYehuda Sadeh 1816602adf40SYehuda Sadeh rbd_dev->disk = disk; 1817602adf40SYehuda Sadeh rbd_dev->q = q; 1818602adf40SYehuda Sadeh 1819602adf40SYehuda Sadeh /* finally, announce the disk to the world */ 1820593a9e7bSAlex Elder set_capacity(disk, total_size / SECTOR_SIZE); 1821602adf40SYehuda Sadeh add_disk(disk); 1822602adf40SYehuda Sadeh 1823602adf40SYehuda Sadeh pr_info("%s: added with size 0x%llx\n", 1824602adf40SYehuda Sadeh disk->disk_name, (unsigned long long)total_size); 1825602adf40SYehuda Sadeh return 0; 1826602adf40SYehuda Sadeh 1827602adf40SYehuda Sadeh out_disk: 1828602adf40SYehuda Sadeh put_disk(disk); 1829602adf40SYehuda Sadeh out: 1830602adf40SYehuda Sadeh return rc; 1831602adf40SYehuda Sadeh } 1832602adf40SYehuda Sadeh 1833dfc5606dSYehuda Sadeh /* 1834dfc5606dSYehuda Sadeh sysfs 1835dfc5606dSYehuda Sadeh */ 1836602adf40SYehuda Sadeh 1837593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev) 1838593a9e7bSAlex Elder { 1839593a9e7bSAlex Elder return container_of(dev, struct rbd_device, dev); 1840593a9e7bSAlex Elder } 1841593a9e7bSAlex Elder 1842dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 1843dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1844602adf40SYehuda Sadeh { 1845593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1846a51aa0c0SJosh Durgin sector_t size; 1847dfc5606dSYehuda Sadeh 1848a51aa0c0SJosh Durgin down_read(&rbd_dev->header_rwsem); 1849a51aa0c0SJosh Durgin size = get_capacity(rbd_dev->disk); 1850a51aa0c0SJosh Durgin up_read(&rbd_dev->header_rwsem); 1851a51aa0c0SJosh Durgin 1852a51aa0c0SJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); 1853602adf40SYehuda Sadeh } 1854602adf40SYehuda Sadeh 1855dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 1856dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1857602adf40SYehuda Sadeh { 1858593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1859dfc5606dSYehuda Sadeh 1860dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 1861dfc5606dSYehuda Sadeh } 1862dfc5606dSYehuda Sadeh 1863dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 1864dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1865dfc5606dSYehuda Sadeh { 1866593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1867dfc5606dSYehuda Sadeh 18681dbb4399SAlex Elder return sprintf(buf, "client%lld\n", 18691dbb4399SAlex Elder ceph_client_id(rbd_dev->rbd_client->client)); 1870dfc5606dSYehuda Sadeh } 1871dfc5606dSYehuda Sadeh 1872dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 1873dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1874dfc5606dSYehuda Sadeh { 1875593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1876dfc5606dSYehuda Sadeh 1877dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->pool_name); 1878dfc5606dSYehuda Sadeh } 1879dfc5606dSYehuda Sadeh 18809bb2f334SAlex Elder static ssize_t rbd_pool_id_show(struct device *dev, 18819bb2f334SAlex Elder struct device_attribute *attr, char *buf) 18829bb2f334SAlex Elder { 18839bb2f334SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 18849bb2f334SAlex Elder 18859bb2f334SAlex Elder return sprintf(buf, "%d\n", rbd_dev->pool_id); 18869bb2f334SAlex Elder } 18879bb2f334SAlex Elder 1888dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 1889dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1890dfc5606dSYehuda Sadeh { 1891593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1892dfc5606dSYehuda Sadeh 18930bed54dcSAlex Elder return sprintf(buf, "%s\n", rbd_dev->image_name); 1894dfc5606dSYehuda Sadeh } 1895dfc5606dSYehuda Sadeh 1896dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 1897dfc5606dSYehuda Sadeh struct device_attribute *attr, 1898dfc5606dSYehuda Sadeh char *buf) 1899dfc5606dSYehuda Sadeh { 1900593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1901dfc5606dSYehuda Sadeh 1902dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->snap_name); 1903dfc5606dSYehuda Sadeh } 1904dfc5606dSYehuda Sadeh 1905dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 1906dfc5606dSYehuda Sadeh struct device_attribute *attr, 1907dfc5606dSYehuda Sadeh const char *buf, 1908dfc5606dSYehuda Sadeh size_t size) 1909dfc5606dSYehuda Sadeh { 1910593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1911dfc5606dSYehuda Sadeh int rc; 1912dfc5606dSYehuda Sadeh int ret = size; 1913602adf40SYehuda Sadeh 1914602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1915602adf40SYehuda Sadeh 1916263c6ca0SJosh Durgin rc = __rbd_refresh_header(rbd_dev); 1917dfc5606dSYehuda Sadeh if (rc < 0) 1918dfc5606dSYehuda Sadeh ret = rc; 1919602adf40SYehuda Sadeh 1920dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 1921dfc5606dSYehuda Sadeh return ret; 1922dfc5606dSYehuda Sadeh } 1923602adf40SYehuda Sadeh 1924dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 1925dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 1926dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 1927dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 19289bb2f334SAlex Elder static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 1929dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 1930dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 1931dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 1932dfc5606dSYehuda Sadeh static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add); 1933dfc5606dSYehuda Sadeh 1934dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 1935dfc5606dSYehuda Sadeh &dev_attr_size.attr, 1936dfc5606dSYehuda Sadeh &dev_attr_major.attr, 1937dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 1938dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 19399bb2f334SAlex Elder &dev_attr_pool_id.attr, 1940dfc5606dSYehuda Sadeh &dev_attr_name.attr, 1941dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 1942dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 1943dfc5606dSYehuda Sadeh &dev_attr_create_snap.attr, 1944dfc5606dSYehuda Sadeh NULL 1945dfc5606dSYehuda Sadeh }; 1946dfc5606dSYehuda Sadeh 1947dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 1948dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 1949dfc5606dSYehuda Sadeh }; 1950dfc5606dSYehuda Sadeh 1951dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 1952dfc5606dSYehuda Sadeh &rbd_attr_group, 1953dfc5606dSYehuda Sadeh NULL 1954dfc5606dSYehuda Sadeh }; 1955dfc5606dSYehuda Sadeh 1956dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 1957dfc5606dSYehuda Sadeh { 1958dfc5606dSYehuda Sadeh } 1959dfc5606dSYehuda Sadeh 1960dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 1961dfc5606dSYehuda Sadeh .name = "rbd", 1962dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 1963dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 1964dfc5606dSYehuda Sadeh }; 1965dfc5606dSYehuda Sadeh 1966dfc5606dSYehuda Sadeh 1967dfc5606dSYehuda Sadeh /* 1968dfc5606dSYehuda Sadeh sysfs - snapshots 1969dfc5606dSYehuda Sadeh */ 1970dfc5606dSYehuda Sadeh 1971dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 1972dfc5606dSYehuda Sadeh struct device_attribute *attr, 1973dfc5606dSYehuda Sadeh char *buf) 1974dfc5606dSYehuda Sadeh { 1975dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1976dfc5606dSYehuda Sadeh 19773591538fSJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long)snap->size); 1978dfc5606dSYehuda Sadeh } 1979dfc5606dSYehuda Sadeh 1980dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 1981dfc5606dSYehuda Sadeh struct device_attribute *attr, 1982dfc5606dSYehuda Sadeh char *buf) 1983dfc5606dSYehuda Sadeh { 1984dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1985dfc5606dSYehuda Sadeh 1986593a9e7bSAlex Elder return sprintf(buf, "%llu\n", (unsigned long long)snap->id); 1987dfc5606dSYehuda Sadeh } 1988dfc5606dSYehuda Sadeh 1989dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 1990dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 1991dfc5606dSYehuda Sadeh 1992dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 1993dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 1994dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 1995dfc5606dSYehuda Sadeh NULL, 1996dfc5606dSYehuda Sadeh }; 1997dfc5606dSYehuda Sadeh 1998dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 1999dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 2000dfc5606dSYehuda Sadeh }; 2001dfc5606dSYehuda Sadeh 2002dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 2003dfc5606dSYehuda Sadeh { 2004dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2005dfc5606dSYehuda Sadeh kfree(snap->name); 2006dfc5606dSYehuda Sadeh kfree(snap); 2007dfc5606dSYehuda Sadeh } 2008dfc5606dSYehuda Sadeh 2009dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 2010dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 2011dfc5606dSYehuda Sadeh NULL 2012dfc5606dSYehuda Sadeh }; 2013dfc5606dSYehuda Sadeh 2014dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 2015dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 2016dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 2017dfc5606dSYehuda Sadeh }; 2018dfc5606dSYehuda Sadeh 201914e7085dSAlex Elder static void __rbd_remove_snap_dev(struct rbd_snap *snap) 2020dfc5606dSYehuda Sadeh { 2021dfc5606dSYehuda Sadeh list_del(&snap->node); 2022dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 2023dfc5606dSYehuda Sadeh } 2024dfc5606dSYehuda Sadeh 202514e7085dSAlex Elder static int rbd_register_snap_dev(struct rbd_snap *snap, 2026dfc5606dSYehuda Sadeh struct device *parent) 2027dfc5606dSYehuda Sadeh { 2028dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 2029dfc5606dSYehuda Sadeh int ret; 2030dfc5606dSYehuda Sadeh 2031dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 2032dfc5606dSYehuda Sadeh dev->parent = parent; 2033dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 2034dfc5606dSYehuda Sadeh dev_set_name(dev, "snap_%s", snap->name); 2035dfc5606dSYehuda Sadeh ret = device_register(dev); 2036dfc5606dSYehuda Sadeh 2037dfc5606dSYehuda Sadeh return ret; 2038dfc5606dSYehuda Sadeh } 2039dfc5606dSYehuda Sadeh 20404e891e0aSAlex Elder static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, 20414e891e0aSAlex Elder int i, const char *name) 2042dfc5606dSYehuda Sadeh { 20434e891e0aSAlex Elder struct rbd_snap *snap; 2044dfc5606dSYehuda Sadeh int ret; 20454e891e0aSAlex Elder 20464e891e0aSAlex Elder snap = kzalloc(sizeof (*snap), GFP_KERNEL); 2047dfc5606dSYehuda Sadeh if (!snap) 20484e891e0aSAlex Elder return ERR_PTR(-ENOMEM); 20494e891e0aSAlex Elder 20504e891e0aSAlex Elder ret = -ENOMEM; 2051dfc5606dSYehuda Sadeh snap->name = kstrdup(name, GFP_KERNEL); 20524e891e0aSAlex Elder if (!snap->name) 20534e891e0aSAlex Elder goto err; 20544e891e0aSAlex Elder 2055dfc5606dSYehuda Sadeh snap->size = rbd_dev->header.snap_sizes[i]; 2056dfc5606dSYehuda Sadeh snap->id = rbd_dev->header.snapc->snaps[i]; 2057dfc5606dSYehuda Sadeh if (device_is_registered(&rbd_dev->dev)) { 205814e7085dSAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 2059dfc5606dSYehuda Sadeh if (ret < 0) 2060dfc5606dSYehuda Sadeh goto err; 2061dfc5606dSYehuda Sadeh } 20624e891e0aSAlex Elder 20634e891e0aSAlex Elder return snap; 20644e891e0aSAlex Elder 2065dfc5606dSYehuda Sadeh err: 2066dfc5606dSYehuda Sadeh kfree(snap->name); 2067dfc5606dSYehuda Sadeh kfree(snap); 20684e891e0aSAlex Elder 20694e891e0aSAlex Elder return ERR_PTR(ret); 2070dfc5606dSYehuda Sadeh } 2071dfc5606dSYehuda Sadeh 2072dfc5606dSYehuda Sadeh /* 2073dfc5606dSYehuda Sadeh * search for the previous snap in a null delimited string list 2074dfc5606dSYehuda Sadeh */ 2075dfc5606dSYehuda Sadeh const char *rbd_prev_snap_name(const char *name, const char *start) 2076dfc5606dSYehuda Sadeh { 2077dfc5606dSYehuda Sadeh if (name < start + 2) 2078dfc5606dSYehuda Sadeh return NULL; 2079dfc5606dSYehuda Sadeh 2080dfc5606dSYehuda Sadeh name -= 2; 2081dfc5606dSYehuda Sadeh while (*name) { 2082dfc5606dSYehuda Sadeh if (name == start) 2083dfc5606dSYehuda Sadeh return start; 2084dfc5606dSYehuda Sadeh name--; 2085dfc5606dSYehuda Sadeh } 2086dfc5606dSYehuda Sadeh return name + 1; 2087dfc5606dSYehuda Sadeh } 2088dfc5606dSYehuda Sadeh 2089dfc5606dSYehuda Sadeh /* 2090dfc5606dSYehuda Sadeh * compare the old list of snapshots that we have to what's in the header 2091dfc5606dSYehuda Sadeh * and update it accordingly. Note that the header holds the snapshots 2092dfc5606dSYehuda Sadeh * in a reverse order (from newest to oldest) and we need to go from 2093dfc5606dSYehuda Sadeh * older to new so that we don't get a duplicate snap name when 2094dfc5606dSYehuda Sadeh * doing the process (e.g., removed snapshot and recreated a new 2095dfc5606dSYehuda Sadeh * one with the same name. 2096dfc5606dSYehuda Sadeh */ 2097dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) 2098dfc5606dSYehuda Sadeh { 2099dfc5606dSYehuda Sadeh const char *name, *first_name; 2100dfc5606dSYehuda Sadeh int i = rbd_dev->header.total_snaps; 2101dfc5606dSYehuda Sadeh struct rbd_snap *snap, *old_snap = NULL; 2102dfc5606dSYehuda Sadeh struct list_head *p, *n; 2103dfc5606dSYehuda Sadeh 2104dfc5606dSYehuda Sadeh first_name = rbd_dev->header.snap_names; 2105dfc5606dSYehuda Sadeh name = first_name + rbd_dev->header.snap_names_len; 2106dfc5606dSYehuda Sadeh 2107dfc5606dSYehuda Sadeh list_for_each_prev_safe(p, n, &rbd_dev->snaps) { 2108dfc5606dSYehuda Sadeh u64 cur_id; 2109dfc5606dSYehuda Sadeh 2110dfc5606dSYehuda Sadeh old_snap = list_entry(p, struct rbd_snap, node); 2111dfc5606dSYehuda Sadeh 2112dfc5606dSYehuda Sadeh if (i) 2113dfc5606dSYehuda Sadeh cur_id = rbd_dev->header.snapc->snaps[i - 1]; 2114dfc5606dSYehuda Sadeh 2115dfc5606dSYehuda Sadeh if (!i || old_snap->id < cur_id) { 2116e88a36ecSJosh Durgin /* 2117e88a36ecSJosh Durgin * old_snap->id was skipped, thus was 2118e88a36ecSJosh Durgin * removed. If this rbd_dev is mapped to 2119e88a36ecSJosh Durgin * the removed snapshot, record that it no 2120e88a36ecSJosh Durgin * longer exists, to prevent further I/O. 2121e88a36ecSJosh Durgin */ 2122e88a36ecSJosh Durgin if (rbd_dev->snap_id == old_snap->id) 2123e88a36ecSJosh Durgin rbd_dev->snap_exists = false; 212414e7085dSAlex Elder __rbd_remove_snap_dev(old_snap); 2125dfc5606dSYehuda Sadeh continue; 2126dfc5606dSYehuda Sadeh } 2127dfc5606dSYehuda Sadeh if (old_snap->id == cur_id) { 2128dfc5606dSYehuda Sadeh /* we have this snapshot already */ 2129dfc5606dSYehuda Sadeh i--; 2130dfc5606dSYehuda Sadeh name = rbd_prev_snap_name(name, first_name); 2131dfc5606dSYehuda Sadeh continue; 2132dfc5606dSYehuda Sadeh } 2133dfc5606dSYehuda Sadeh for (; i > 0; 2134dfc5606dSYehuda Sadeh i--, name = rbd_prev_snap_name(name, first_name)) { 2135dfc5606dSYehuda Sadeh if (!name) { 2136dfc5606dSYehuda Sadeh WARN_ON(1); 2137dfc5606dSYehuda Sadeh return -EINVAL; 2138dfc5606dSYehuda Sadeh } 2139dfc5606dSYehuda Sadeh cur_id = rbd_dev->header.snapc->snaps[i]; 2140dfc5606dSYehuda Sadeh /* snapshot removal? handle it above */ 2141dfc5606dSYehuda Sadeh if (cur_id >= old_snap->id) 2142dfc5606dSYehuda Sadeh break; 2143dfc5606dSYehuda Sadeh /* a new snapshot */ 21444e891e0aSAlex Elder snap = __rbd_add_snap_dev(rbd_dev, i - 1, name); 21454e891e0aSAlex Elder if (IS_ERR(snap)) 21464e891e0aSAlex Elder return PTR_ERR(snap); 2147dfc5606dSYehuda Sadeh 2148dfc5606dSYehuda Sadeh /* note that we add it backward so using n and not p */ 2149dfc5606dSYehuda Sadeh list_add(&snap->node, n); 2150dfc5606dSYehuda Sadeh p = &snap->node; 2151dfc5606dSYehuda Sadeh } 2152dfc5606dSYehuda Sadeh } 2153dfc5606dSYehuda Sadeh /* we're done going over the old snap list, just add what's left */ 2154dfc5606dSYehuda Sadeh for (; i > 0; i--) { 2155dfc5606dSYehuda Sadeh name = rbd_prev_snap_name(name, first_name); 2156dfc5606dSYehuda Sadeh if (!name) { 2157dfc5606dSYehuda Sadeh WARN_ON(1); 2158dfc5606dSYehuda Sadeh return -EINVAL; 2159dfc5606dSYehuda Sadeh } 21604e891e0aSAlex Elder snap = __rbd_add_snap_dev(rbd_dev, i - 1, name); 21614e891e0aSAlex Elder if (IS_ERR(snap)) 21624e891e0aSAlex Elder return PTR_ERR(snap); 2163dfc5606dSYehuda Sadeh list_add(&snap->node, &rbd_dev->snaps); 2164dfc5606dSYehuda Sadeh } 2165dfc5606dSYehuda Sadeh 2166dfc5606dSYehuda Sadeh return 0; 2167dfc5606dSYehuda Sadeh } 2168dfc5606dSYehuda Sadeh 2169dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2170dfc5606dSYehuda Sadeh { 2171f0f8cef5SAlex Elder int ret; 2172dfc5606dSYehuda Sadeh struct device *dev; 2173dfc5606dSYehuda Sadeh struct rbd_snap *snap; 2174dfc5606dSYehuda Sadeh 2175dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2176dfc5606dSYehuda Sadeh dev = &rbd_dev->dev; 2177dfc5606dSYehuda Sadeh 2178dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 2179dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 2180dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 2181dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 2182de71a297SAlex Elder dev_set_name(dev, "%d", rbd_dev->dev_id); 2183dfc5606dSYehuda Sadeh ret = device_register(dev); 2184dfc5606dSYehuda Sadeh if (ret < 0) 2185f0f8cef5SAlex Elder goto out; 2186dfc5606dSYehuda Sadeh 2187dfc5606dSYehuda Sadeh list_for_each_entry(snap, &rbd_dev->snaps, node) { 218814e7085dSAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 2189dfc5606dSYehuda Sadeh if (ret < 0) 2190602adf40SYehuda Sadeh break; 2191602adf40SYehuda Sadeh } 2192f0f8cef5SAlex Elder out: 2193dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 2194dfc5606dSYehuda Sadeh return ret; 2195602adf40SYehuda Sadeh } 2196602adf40SYehuda Sadeh 2197dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 2198dfc5606dSYehuda Sadeh { 2199dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 2200dfc5606dSYehuda Sadeh } 2201dfc5606dSYehuda Sadeh 220259c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev) 220359c2be1eSYehuda Sadeh { 220459c2be1eSYehuda Sadeh int ret, rc; 220559c2be1eSYehuda Sadeh 220659c2be1eSYehuda Sadeh do { 22070e6f322dSAlex Elder ret = rbd_req_sync_watch(rbd_dev); 220859c2be1eSYehuda Sadeh if (ret == -ERANGE) { 220959c2be1eSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2210263c6ca0SJosh Durgin rc = __rbd_refresh_header(rbd_dev); 221159c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 221259c2be1eSYehuda Sadeh if (rc < 0) 221359c2be1eSYehuda Sadeh return rc; 221459c2be1eSYehuda Sadeh } 221559c2be1eSYehuda Sadeh } while (ret == -ERANGE); 221659c2be1eSYehuda Sadeh 221759c2be1eSYehuda Sadeh return ret; 221859c2be1eSYehuda Sadeh } 221959c2be1eSYehuda Sadeh 22201ddbe94eSAlex Elder static atomic64_t rbd_id_max = ATOMIC64_INIT(0); 22211ddbe94eSAlex Elder 22221ddbe94eSAlex Elder /* 2223499afd5bSAlex Elder * Get a unique rbd identifier for the given new rbd_dev, and add 2224499afd5bSAlex Elder * the rbd_dev to the global list. The minimum rbd id is 1. 22251ddbe94eSAlex Elder */ 2226499afd5bSAlex Elder static void rbd_id_get(struct rbd_device *rbd_dev) 2227b7f23c36SAlex Elder { 2228de71a297SAlex Elder rbd_dev->dev_id = atomic64_inc_return(&rbd_id_max); 2229499afd5bSAlex Elder 2230499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2231499afd5bSAlex Elder list_add_tail(&rbd_dev->node, &rbd_dev_list); 2232499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 2233b7f23c36SAlex Elder } 2234b7f23c36SAlex Elder 22351ddbe94eSAlex Elder /* 2236499afd5bSAlex Elder * Remove an rbd_dev from the global list, and record that its 2237499afd5bSAlex Elder * identifier is no longer in use. 22381ddbe94eSAlex Elder */ 2239499afd5bSAlex Elder static void rbd_id_put(struct rbd_device *rbd_dev) 22401ddbe94eSAlex Elder { 2241d184f6bfSAlex Elder struct list_head *tmp; 2242de71a297SAlex Elder int rbd_id = rbd_dev->dev_id; 2243d184f6bfSAlex Elder int max_id; 2244d184f6bfSAlex Elder 2245d184f6bfSAlex Elder BUG_ON(rbd_id < 1); 2246499afd5bSAlex Elder 2247499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2248499afd5bSAlex Elder list_del_init(&rbd_dev->node); 2249d184f6bfSAlex Elder 2250d184f6bfSAlex Elder /* 2251d184f6bfSAlex Elder * If the id being "put" is not the current maximum, there 2252d184f6bfSAlex Elder * is nothing special we need to do. 2253d184f6bfSAlex Elder */ 2254d184f6bfSAlex Elder if (rbd_id != atomic64_read(&rbd_id_max)) { 2255d184f6bfSAlex Elder spin_unlock(&rbd_dev_list_lock); 2256d184f6bfSAlex Elder return; 2257d184f6bfSAlex Elder } 2258d184f6bfSAlex Elder 2259d184f6bfSAlex Elder /* 2260d184f6bfSAlex Elder * We need to update the current maximum id. Search the 2261d184f6bfSAlex Elder * list to find out what it is. We're more likely to find 2262d184f6bfSAlex Elder * the maximum at the end, so search the list backward. 2263d184f6bfSAlex Elder */ 2264d184f6bfSAlex Elder max_id = 0; 2265d184f6bfSAlex Elder list_for_each_prev(tmp, &rbd_dev_list) { 2266d184f6bfSAlex Elder struct rbd_device *rbd_dev; 2267d184f6bfSAlex Elder 2268d184f6bfSAlex Elder rbd_dev = list_entry(tmp, struct rbd_device, node); 2269d184f6bfSAlex Elder if (rbd_id > max_id) 2270d184f6bfSAlex Elder max_id = rbd_id; 2271d184f6bfSAlex Elder } 2272499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 22731ddbe94eSAlex Elder 22741ddbe94eSAlex Elder /* 2275d184f6bfSAlex Elder * The max id could have been updated by rbd_id_get(), in 2276d184f6bfSAlex Elder * which case it now accurately reflects the new maximum. 2277d184f6bfSAlex Elder * Be careful not to overwrite the maximum value in that 2278d184f6bfSAlex Elder * case. 22791ddbe94eSAlex Elder */ 2280d184f6bfSAlex Elder atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id); 2281b7f23c36SAlex Elder } 2282b7f23c36SAlex Elder 2283a725f65eSAlex Elder /* 2284e28fff26SAlex Elder * Skips over white space at *buf, and updates *buf to point to the 2285e28fff26SAlex Elder * first found non-space character (if any). Returns the length of 2286593a9e7bSAlex Elder * the token (string of non-white space characters) found. Note 2287593a9e7bSAlex Elder * that *buf must be terminated with '\0'. 2288e28fff26SAlex Elder */ 2289e28fff26SAlex Elder static inline size_t next_token(const char **buf) 2290e28fff26SAlex Elder { 2291e28fff26SAlex Elder /* 2292e28fff26SAlex Elder * These are the characters that produce nonzero for 2293e28fff26SAlex Elder * isspace() in the "C" and "POSIX" locales. 2294e28fff26SAlex Elder */ 2295e28fff26SAlex Elder const char *spaces = " \f\n\r\t\v"; 2296e28fff26SAlex Elder 2297e28fff26SAlex Elder *buf += strspn(*buf, spaces); /* Find start of token */ 2298e28fff26SAlex Elder 2299e28fff26SAlex Elder return strcspn(*buf, spaces); /* Return token length */ 2300e28fff26SAlex Elder } 2301e28fff26SAlex Elder 2302e28fff26SAlex Elder /* 2303e28fff26SAlex Elder * Finds the next token in *buf, and if the provided token buffer is 2304e28fff26SAlex Elder * big enough, copies the found token into it. The result, if 2305593a9e7bSAlex Elder * copied, is guaranteed to be terminated with '\0'. Note that *buf 2306593a9e7bSAlex Elder * must be terminated with '\0' on entry. 2307e28fff26SAlex Elder * 2308e28fff26SAlex Elder * Returns the length of the token found (not including the '\0'). 2309e28fff26SAlex Elder * Return value will be 0 if no token is found, and it will be >= 2310e28fff26SAlex Elder * token_size if the token would not fit. 2311e28fff26SAlex Elder * 2312593a9e7bSAlex Elder * The *buf pointer will be updated to point beyond the end of the 2313e28fff26SAlex Elder * found token. Note that this occurs even if the token buffer is 2314e28fff26SAlex Elder * too small to hold it. 2315e28fff26SAlex Elder */ 2316e28fff26SAlex Elder static inline size_t copy_token(const char **buf, 2317e28fff26SAlex Elder char *token, 2318e28fff26SAlex Elder size_t token_size) 2319e28fff26SAlex Elder { 2320e28fff26SAlex Elder size_t len; 2321e28fff26SAlex Elder 2322e28fff26SAlex Elder len = next_token(buf); 2323e28fff26SAlex Elder if (len < token_size) { 2324e28fff26SAlex Elder memcpy(token, *buf, len); 2325e28fff26SAlex Elder *(token + len) = '\0'; 2326e28fff26SAlex Elder } 2327e28fff26SAlex Elder *buf += len; 2328e28fff26SAlex Elder 2329e28fff26SAlex Elder return len; 2330e28fff26SAlex Elder } 2331e28fff26SAlex Elder 2332e28fff26SAlex Elder /* 2333ea3352f4SAlex Elder * Finds the next token in *buf, dynamically allocates a buffer big 2334ea3352f4SAlex Elder * enough to hold a copy of it, and copies the token into the new 2335ea3352f4SAlex Elder * buffer. The copy is guaranteed to be terminated with '\0'. Note 2336ea3352f4SAlex Elder * that a duplicate buffer is created even for a zero-length token. 2337ea3352f4SAlex Elder * 2338ea3352f4SAlex Elder * Returns a pointer to the newly-allocated duplicate, or a null 2339ea3352f4SAlex Elder * pointer if memory for the duplicate was not available. If 2340ea3352f4SAlex Elder * the lenp argument is a non-null pointer, the length of the token 2341ea3352f4SAlex Elder * (not including the '\0') is returned in *lenp. 2342ea3352f4SAlex Elder * 2343ea3352f4SAlex Elder * If successful, the *buf pointer will be updated to point beyond 2344ea3352f4SAlex Elder * the end of the found token. 2345ea3352f4SAlex Elder * 2346ea3352f4SAlex Elder * Note: uses GFP_KERNEL for allocation. 2347ea3352f4SAlex Elder */ 2348ea3352f4SAlex Elder static inline char *dup_token(const char **buf, size_t *lenp) 2349ea3352f4SAlex Elder { 2350ea3352f4SAlex Elder char *dup; 2351ea3352f4SAlex Elder size_t len; 2352ea3352f4SAlex Elder 2353ea3352f4SAlex Elder len = next_token(buf); 2354ea3352f4SAlex Elder dup = kmalloc(len + 1, GFP_KERNEL); 2355ea3352f4SAlex Elder if (!dup) 2356ea3352f4SAlex Elder return NULL; 2357ea3352f4SAlex Elder 2358ea3352f4SAlex Elder memcpy(dup, *buf, len); 2359ea3352f4SAlex Elder *(dup + len) = '\0'; 2360ea3352f4SAlex Elder *buf += len; 2361ea3352f4SAlex Elder 2362ea3352f4SAlex Elder if (lenp) 2363ea3352f4SAlex Elder *lenp = len; 2364ea3352f4SAlex Elder 2365ea3352f4SAlex Elder return dup; 2366ea3352f4SAlex Elder } 2367ea3352f4SAlex Elder 2368ea3352f4SAlex Elder /* 23690bed54dcSAlex Elder * This fills in the pool_name, image_name, image_name_len, snap_name, 2370a725f65eSAlex Elder * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based 2371a725f65eSAlex Elder * on the list of monitor addresses and other options provided via 2372a725f65eSAlex Elder * /sys/bus/rbd/add. 2373d22f76e7SAlex Elder * 2374d22f76e7SAlex Elder * Note: rbd_dev is assumed to have been initially zero-filled. 2375a725f65eSAlex Elder */ 2376a725f65eSAlex Elder static int rbd_add_parse_args(struct rbd_device *rbd_dev, 2377a725f65eSAlex Elder const char *buf, 23787ef3214aSAlex Elder const char **mon_addrs, 23795214ecc4SAlex Elder size_t *mon_addrs_size, 2380e28fff26SAlex Elder char *options, 2381e28fff26SAlex Elder size_t options_size) 2382a725f65eSAlex Elder { 2383e28fff26SAlex Elder size_t len; 2384d22f76e7SAlex Elder int ret; 2385e28fff26SAlex Elder 2386e28fff26SAlex Elder /* The first four tokens are required */ 2387e28fff26SAlex Elder 23887ef3214aSAlex Elder len = next_token(&buf); 23897ef3214aSAlex Elder if (!len) 2390a725f65eSAlex Elder return -EINVAL; 23915214ecc4SAlex Elder *mon_addrs_size = len + 1; 23927ef3214aSAlex Elder *mon_addrs = buf; 23937ef3214aSAlex Elder 23947ef3214aSAlex Elder buf += len; 2395a725f65eSAlex Elder 2396e28fff26SAlex Elder len = copy_token(&buf, options, options_size); 2397e28fff26SAlex Elder if (!len || len >= options_size) 2398e28fff26SAlex Elder return -EINVAL; 2399a725f65eSAlex Elder 2400bf3e5ae1SAlex Elder ret = -ENOMEM; 2401d22f76e7SAlex Elder rbd_dev->pool_name = dup_token(&buf, NULL); 2402d22f76e7SAlex Elder if (!rbd_dev->pool_name) 2403d22f76e7SAlex Elder goto out_err; 2404e28fff26SAlex Elder 24050bed54dcSAlex Elder rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); 24060bed54dcSAlex Elder if (!rbd_dev->image_name) 2407bf3e5ae1SAlex Elder goto out_err; 2408e28fff26SAlex Elder 2409cb8627c7SAlex Elder /* Create the name of the header object */ 2410cb8627c7SAlex Elder 24110bed54dcSAlex Elder rbd_dev->header_name = kmalloc(rbd_dev->image_name_len 2412bf3e5ae1SAlex Elder + sizeof (RBD_SUFFIX), 2413bf3e5ae1SAlex Elder GFP_KERNEL); 24140bed54dcSAlex Elder if (!rbd_dev->header_name) 2415cb8627c7SAlex Elder goto out_err; 24160bed54dcSAlex Elder sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); 2417a725f65eSAlex Elder 2418e28fff26SAlex Elder /* 2419820a5f3eSAlex Elder * The snapshot name is optional. If none is is supplied, 2420820a5f3eSAlex Elder * we use the default value. 2421e28fff26SAlex Elder */ 2422820a5f3eSAlex Elder rbd_dev->snap_name = dup_token(&buf, &len); 2423820a5f3eSAlex Elder if (!rbd_dev->snap_name) 2424820a5f3eSAlex Elder goto out_err; 2425820a5f3eSAlex Elder if (!len) { 2426820a5f3eSAlex Elder /* Replace the empty name with the default */ 2427820a5f3eSAlex Elder kfree(rbd_dev->snap_name); 2428820a5f3eSAlex Elder rbd_dev->snap_name 2429820a5f3eSAlex Elder = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL); 2430820a5f3eSAlex Elder if (!rbd_dev->snap_name) 2431820a5f3eSAlex Elder goto out_err; 2432820a5f3eSAlex Elder 2433e28fff26SAlex Elder memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, 2434e28fff26SAlex Elder sizeof (RBD_SNAP_HEAD_NAME)); 2435849b4260SAlex Elder } 2436e28fff26SAlex Elder 2437a725f65eSAlex Elder return 0; 2438d22f76e7SAlex Elder 2439d22f76e7SAlex Elder out_err: 24400bed54dcSAlex Elder kfree(rbd_dev->header_name); 24410bed54dcSAlex Elder kfree(rbd_dev->image_name); 2442d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 2443d22f76e7SAlex Elder rbd_dev->pool_name = NULL; 2444d22f76e7SAlex Elder 2445d22f76e7SAlex Elder return ret; 2446a725f65eSAlex Elder } 2447a725f65eSAlex Elder 244859c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 244959c2be1eSYehuda Sadeh const char *buf, 245059c2be1eSYehuda Sadeh size_t count) 2451602adf40SYehuda Sadeh { 2452cb8627c7SAlex Elder char *options; 2453cb8627c7SAlex Elder struct rbd_device *rbd_dev = NULL; 24547ef3214aSAlex Elder const char *mon_addrs = NULL; 24557ef3214aSAlex Elder size_t mon_addrs_size = 0; 245627cc2594SAlex Elder struct ceph_osd_client *osdc; 245727cc2594SAlex Elder int rc = -ENOMEM; 2458602adf40SYehuda Sadeh 2459602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 2460602adf40SYehuda Sadeh return -ENODEV; 2461602adf40SYehuda Sadeh 246227cc2594SAlex Elder options = kmalloc(count, GFP_KERNEL); 246327cc2594SAlex Elder if (!options) 246427cc2594SAlex Elder goto err_nomem; 2465cb8627c7SAlex Elder rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 2466cb8627c7SAlex Elder if (!rbd_dev) 2467cb8627c7SAlex Elder goto err_nomem; 2468602adf40SYehuda Sadeh 2469602adf40SYehuda Sadeh /* static rbd_device initialization */ 2470602adf40SYehuda Sadeh spin_lock_init(&rbd_dev->lock); 2471602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->node); 2472dfc5606dSYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->snaps); 2473c666601aSJosh Durgin init_rwsem(&rbd_dev->header_rwsem); 2474602adf40SYehuda Sadeh 2475d184f6bfSAlex Elder /* generate unique id: find highest unique id, add one */ 2476499afd5bSAlex Elder rbd_id_get(rbd_dev); 2477602adf40SYehuda Sadeh 2478a725f65eSAlex Elder /* Fill in the device name, now that we have its id. */ 247981a89793SAlex Elder BUILD_BUG_ON(DEV_NAME_LEN 248081a89793SAlex Elder < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 2481de71a297SAlex Elder sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 2482e124a82fSAlex Elder 2483a725f65eSAlex Elder /* parse add command */ 24847ef3214aSAlex Elder rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, 2485e28fff26SAlex Elder options, count); 2486a725f65eSAlex Elder if (rc) 2487a725f65eSAlex Elder goto err_put_id; 2488a725f65eSAlex Elder 24895214ecc4SAlex Elder rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1, 24905214ecc4SAlex Elder options); 2491d720bcb0SAlex Elder if (IS_ERR(rbd_dev->rbd_client)) { 2492d720bcb0SAlex Elder rc = PTR_ERR(rbd_dev->rbd_client); 2493f0f8cef5SAlex Elder goto err_put_id; 2494d720bcb0SAlex Elder } 2495602adf40SYehuda Sadeh 2496602adf40SYehuda Sadeh /* pick the pool */ 24971dbb4399SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 2498602adf40SYehuda Sadeh rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); 2499602adf40SYehuda Sadeh if (rc < 0) 2500602adf40SYehuda Sadeh goto err_out_client; 25019bb2f334SAlex Elder rbd_dev->pool_id = rc; 2502602adf40SYehuda Sadeh 2503602adf40SYehuda Sadeh /* register our block device */ 250427cc2594SAlex Elder rc = register_blkdev(0, rbd_dev->name); 250527cc2594SAlex Elder if (rc < 0) 2506602adf40SYehuda Sadeh goto err_out_client; 250727cc2594SAlex Elder rbd_dev->major = rc; 2508602adf40SYehuda Sadeh 2509dfc5606dSYehuda Sadeh rc = rbd_bus_add_dev(rbd_dev); 2510dfc5606dSYehuda Sadeh if (rc) 2511766fc439SYehuda Sadeh goto err_out_blkdev; 2512766fc439SYehuda Sadeh 251332eec68dSAlex Elder /* 251432eec68dSAlex Elder * At this point cleanup in the event of an error is the job 251532eec68dSAlex Elder * of the sysfs code (initiated by rbd_bus_del_dev()). 251632eec68dSAlex Elder * 251732eec68dSAlex Elder * Set up and announce blkdev mapping. 251832eec68dSAlex Elder */ 2519602adf40SYehuda Sadeh rc = rbd_init_disk(rbd_dev); 2520602adf40SYehuda Sadeh if (rc) 2521766fc439SYehuda Sadeh goto err_out_bus; 2522602adf40SYehuda Sadeh 252359c2be1eSYehuda Sadeh rc = rbd_init_watch_dev(rbd_dev); 252459c2be1eSYehuda Sadeh if (rc) 252559c2be1eSYehuda Sadeh goto err_out_bus; 252659c2be1eSYehuda Sadeh 2527602adf40SYehuda Sadeh return count; 2528602adf40SYehuda Sadeh 2529766fc439SYehuda Sadeh err_out_bus: 2530766fc439SYehuda Sadeh /* this will also clean up rest of rbd_dev stuff */ 2531766fc439SYehuda Sadeh 2532766fc439SYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2533766fc439SYehuda Sadeh kfree(options); 2534766fc439SYehuda Sadeh return rc; 2535766fc439SYehuda Sadeh 2536602adf40SYehuda Sadeh err_out_blkdev: 2537602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 2538602adf40SYehuda Sadeh err_out_client: 2539602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2540f0f8cef5SAlex Elder err_put_id: 2541cb8627c7SAlex Elder if (rbd_dev->pool_name) { 2542820a5f3eSAlex Elder kfree(rbd_dev->snap_name); 25430bed54dcSAlex Elder kfree(rbd_dev->header_name); 25440bed54dcSAlex Elder kfree(rbd_dev->image_name); 2545d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 2546cb8627c7SAlex Elder } 2547499afd5bSAlex Elder rbd_id_put(rbd_dev); 254827cc2594SAlex Elder err_nomem: 254927cc2594SAlex Elder kfree(rbd_dev); 2550cb8627c7SAlex Elder kfree(options); 255127cc2594SAlex Elder 2552602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 2553602adf40SYehuda Sadeh module_put(THIS_MODULE); 255427cc2594SAlex Elder 255527cc2594SAlex Elder return (ssize_t) rc; 2556602adf40SYehuda Sadeh } 2557602adf40SYehuda Sadeh 2558de71a297SAlex Elder static struct rbd_device *__rbd_get_dev(unsigned long dev_id) 2559602adf40SYehuda Sadeh { 2560602adf40SYehuda Sadeh struct list_head *tmp; 2561602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 2562602adf40SYehuda Sadeh 2563e124a82fSAlex Elder spin_lock(&rbd_dev_list_lock); 2564602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 2565602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 2566de71a297SAlex Elder if (rbd_dev->dev_id == dev_id) { 2567e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2568602adf40SYehuda Sadeh return rbd_dev; 2569602adf40SYehuda Sadeh } 2570e124a82fSAlex Elder } 2571e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2572602adf40SYehuda Sadeh return NULL; 2573602adf40SYehuda Sadeh } 2574602adf40SYehuda Sadeh 2575dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 2576602adf40SYehuda Sadeh { 2577593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2578602adf40SYehuda Sadeh 25791dbb4399SAlex Elder if (rbd_dev->watch_request) { 25801dbb4399SAlex Elder struct ceph_client *client = rbd_dev->rbd_client->client; 25811dbb4399SAlex Elder 25821dbb4399SAlex Elder ceph_osdc_unregister_linger_request(&client->osdc, 258359c2be1eSYehuda Sadeh rbd_dev->watch_request); 25841dbb4399SAlex Elder } 258559c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 2586070c633fSAlex Elder rbd_req_sync_unwatch(rbd_dev); 258759c2be1eSYehuda Sadeh 2588602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2589602adf40SYehuda Sadeh 2590602adf40SYehuda Sadeh /* clean up and free blkdev */ 2591602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 2592602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 259332eec68dSAlex Elder 259432eec68dSAlex Elder /* done with the id, and with the rbd_dev */ 2595820a5f3eSAlex Elder kfree(rbd_dev->snap_name); 25960bed54dcSAlex Elder kfree(rbd_dev->header_name); 2597d22f76e7SAlex Elder kfree(rbd_dev->pool_name); 25980bed54dcSAlex Elder kfree(rbd_dev->image_name); 259932eec68dSAlex Elder rbd_id_put(rbd_dev); 2600602adf40SYehuda Sadeh kfree(rbd_dev); 2601602adf40SYehuda Sadeh 2602602adf40SYehuda Sadeh /* release module ref */ 2603602adf40SYehuda Sadeh module_put(THIS_MODULE); 2604602adf40SYehuda Sadeh } 2605602adf40SYehuda Sadeh 2606dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 2607602adf40SYehuda Sadeh const char *buf, 2608602adf40SYehuda Sadeh size_t count) 2609602adf40SYehuda Sadeh { 2610602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 2611602adf40SYehuda Sadeh int target_id, rc; 2612602adf40SYehuda Sadeh unsigned long ul; 2613602adf40SYehuda Sadeh int ret = count; 2614602adf40SYehuda Sadeh 2615602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 2616602adf40SYehuda Sadeh if (rc) 2617602adf40SYehuda Sadeh return rc; 2618602adf40SYehuda Sadeh 2619602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 2620602adf40SYehuda Sadeh target_id = (int) ul; 2621602adf40SYehuda Sadeh if (target_id != ul) 2622602adf40SYehuda Sadeh return -EINVAL; 2623602adf40SYehuda Sadeh 2624602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2625602adf40SYehuda Sadeh 2626602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 2627602adf40SYehuda Sadeh if (!rbd_dev) { 2628602adf40SYehuda Sadeh ret = -ENOENT; 2629602adf40SYehuda Sadeh goto done; 2630602adf40SYehuda Sadeh } 2631602adf40SYehuda Sadeh 2632dfc5606dSYehuda Sadeh __rbd_remove_all_snaps(rbd_dev); 2633dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2634602adf40SYehuda Sadeh 2635602adf40SYehuda Sadeh done: 2636602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2637602adf40SYehuda Sadeh return ret; 2638602adf40SYehuda Sadeh } 2639602adf40SYehuda Sadeh 2640dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 2641dfc5606dSYehuda Sadeh struct device_attribute *attr, 2642602adf40SYehuda Sadeh const char *buf, 2643602adf40SYehuda Sadeh size_t count) 2644602adf40SYehuda Sadeh { 2645593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2646dfc5606dSYehuda Sadeh int ret; 2647dfc5606dSYehuda Sadeh char *name = kmalloc(count + 1, GFP_KERNEL); 2648602adf40SYehuda Sadeh if (!name) 2649602adf40SYehuda Sadeh return -ENOMEM; 2650602adf40SYehuda Sadeh 2651dfc5606dSYehuda Sadeh snprintf(name, count, "%s", buf); 2652602adf40SYehuda Sadeh 2653602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2654602adf40SYehuda Sadeh 2655602adf40SYehuda Sadeh ret = rbd_header_add_snap(rbd_dev, 2656602adf40SYehuda Sadeh name, GFP_KERNEL); 2657602adf40SYehuda Sadeh if (ret < 0) 265859c2be1eSYehuda Sadeh goto err_unlock; 2659602adf40SYehuda Sadeh 2660263c6ca0SJosh Durgin ret = __rbd_refresh_header(rbd_dev); 2661602adf40SYehuda Sadeh if (ret < 0) 266259c2be1eSYehuda Sadeh goto err_unlock; 266359c2be1eSYehuda Sadeh 266459c2be1eSYehuda Sadeh /* shouldn't hold ctl_mutex when notifying.. notify might 266559c2be1eSYehuda Sadeh trigger a watch callback that would need to get that mutex */ 266659c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 266759c2be1eSYehuda Sadeh 266859c2be1eSYehuda Sadeh /* make a best effort, don't error if failed */ 26694cb16250SAlex Elder rbd_req_sync_notify(rbd_dev); 2670602adf40SYehuda Sadeh 2671602adf40SYehuda Sadeh ret = count; 267259c2be1eSYehuda Sadeh kfree(name); 267359c2be1eSYehuda Sadeh return ret; 267459c2be1eSYehuda Sadeh 267559c2be1eSYehuda Sadeh err_unlock: 2676602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2677602adf40SYehuda Sadeh kfree(name); 2678602adf40SYehuda Sadeh return ret; 2679602adf40SYehuda Sadeh } 2680602adf40SYehuda Sadeh 2681602adf40SYehuda Sadeh /* 2682602adf40SYehuda Sadeh * create control files in sysfs 2683dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 2684602adf40SYehuda Sadeh */ 2685602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 2686602adf40SYehuda Sadeh { 2687dfc5606dSYehuda Sadeh int ret; 2688602adf40SYehuda Sadeh 2689fed4c143SAlex Elder ret = device_register(&rbd_root_dev); 2690dfc5606dSYehuda Sadeh if (ret < 0) 2691dfc5606dSYehuda Sadeh return ret; 2692602adf40SYehuda Sadeh 2693fed4c143SAlex Elder ret = bus_register(&rbd_bus_type); 2694fed4c143SAlex Elder if (ret < 0) 2695fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2696602adf40SYehuda Sadeh 2697602adf40SYehuda Sadeh return ret; 2698602adf40SYehuda Sadeh } 2699602adf40SYehuda Sadeh 2700602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 2701602adf40SYehuda Sadeh { 2702dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 2703fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2704602adf40SYehuda Sadeh } 2705602adf40SYehuda Sadeh 2706602adf40SYehuda Sadeh int __init rbd_init(void) 2707602adf40SYehuda Sadeh { 2708602adf40SYehuda Sadeh int rc; 2709602adf40SYehuda Sadeh 2710602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 2711602adf40SYehuda Sadeh if (rc) 2712602adf40SYehuda Sadeh return rc; 2713f0f8cef5SAlex Elder pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 2714602adf40SYehuda Sadeh return 0; 2715602adf40SYehuda Sadeh } 2716602adf40SYehuda Sadeh 2717602adf40SYehuda Sadeh void __exit rbd_exit(void) 2718602adf40SYehuda Sadeh { 2719602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 2720602adf40SYehuda Sadeh } 2721602adf40SYehuda Sadeh 2722602adf40SYehuda Sadeh module_init(rbd_init); 2723602adf40SYehuda Sadeh module_exit(rbd_exit); 2724602adf40SYehuda Sadeh 2725602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 2726602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 2727602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 2728602adf40SYehuda Sadeh 2729602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 2730602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 2731602adf40SYehuda Sadeh 2732602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 2733