1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44602adf40SYehuda Sadeh #define DRV_NAME "rbd" 45602adf40SYehuda Sadeh #define DRV_NAME_LONG "rbd (rados block device)" 46602adf40SYehuda Sadeh 47602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 48602adf40SYehuda Sadeh 4921079786SAlex Elder #define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX)) 50602adf40SYehuda Sadeh #define RBD_MAX_POOL_NAME_LEN 64 51602adf40SYehuda Sadeh #define RBD_MAX_SNAP_NAME_LEN 32 52602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN 1024 53602adf40SYehuda Sadeh 54602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 55602adf40SYehuda Sadeh 56602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 57602adf40SYehuda Sadeh 5859c2be1eSYehuda Sadeh #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 5959c2be1eSYehuda Sadeh 60602adf40SYehuda Sadeh /* 61602adf40SYehuda Sadeh * block device image metadata (in-memory version) 62602adf40SYehuda Sadeh */ 63602adf40SYehuda Sadeh struct rbd_image_header { 64602adf40SYehuda Sadeh u64 image_size; 65602adf40SYehuda Sadeh char block_name[32]; 66602adf40SYehuda Sadeh __u8 obj_order; 67602adf40SYehuda Sadeh __u8 crypt_type; 68602adf40SYehuda Sadeh __u8 comp_type; 69602adf40SYehuda Sadeh struct rw_semaphore snap_rwsem; 70602adf40SYehuda Sadeh struct ceph_snap_context *snapc; 71602adf40SYehuda Sadeh size_t snap_names_len; 72602adf40SYehuda Sadeh u64 snap_seq; 73602adf40SYehuda Sadeh u32 total_snaps; 74602adf40SYehuda Sadeh 75602adf40SYehuda Sadeh char *snap_names; 76602adf40SYehuda Sadeh u64 *snap_sizes; 7759c2be1eSYehuda Sadeh 7859c2be1eSYehuda Sadeh u64 obj_version; 7959c2be1eSYehuda Sadeh }; 8059c2be1eSYehuda Sadeh 8159c2be1eSYehuda Sadeh struct rbd_options { 8259c2be1eSYehuda Sadeh int notify_timeout; 83602adf40SYehuda Sadeh }; 84602adf40SYehuda Sadeh 85602adf40SYehuda Sadeh /* 86602adf40SYehuda Sadeh * an instance of the client. multiple devices may share a client. 87602adf40SYehuda Sadeh */ 88602adf40SYehuda Sadeh struct rbd_client { 89602adf40SYehuda Sadeh struct ceph_client *client; 9059c2be1eSYehuda Sadeh struct rbd_options *rbd_opts; 91602adf40SYehuda Sadeh struct kref kref; 92602adf40SYehuda Sadeh struct list_head node; 93602adf40SYehuda Sadeh }; 94602adf40SYehuda Sadeh 951fec7093SYehuda Sadeh struct rbd_req_coll; 961fec7093SYehuda Sadeh 97602adf40SYehuda Sadeh /* 98602adf40SYehuda Sadeh * a single io request 99602adf40SYehuda Sadeh */ 100602adf40SYehuda Sadeh struct rbd_request { 101602adf40SYehuda Sadeh struct request *rq; /* blk layer request */ 102602adf40SYehuda Sadeh struct bio *bio; /* cloned bio */ 103602adf40SYehuda Sadeh struct page **pages; /* list of used pages */ 104602adf40SYehuda Sadeh u64 len; 1051fec7093SYehuda Sadeh int coll_index; 1061fec7093SYehuda Sadeh struct rbd_req_coll *coll; 1071fec7093SYehuda Sadeh }; 1081fec7093SYehuda Sadeh 1091fec7093SYehuda Sadeh struct rbd_req_status { 1101fec7093SYehuda Sadeh int done; 1111fec7093SYehuda Sadeh int rc; 1121fec7093SYehuda Sadeh u64 bytes; 1131fec7093SYehuda Sadeh }; 1141fec7093SYehuda Sadeh 1151fec7093SYehuda Sadeh /* 1161fec7093SYehuda Sadeh * a collection of requests 1171fec7093SYehuda Sadeh */ 1181fec7093SYehuda Sadeh struct rbd_req_coll { 1191fec7093SYehuda Sadeh int total; 1201fec7093SYehuda Sadeh int num_done; 1211fec7093SYehuda Sadeh struct kref kref; 1221fec7093SYehuda Sadeh struct rbd_req_status status[0]; 123602adf40SYehuda Sadeh }; 124602adf40SYehuda Sadeh 125dfc5606dSYehuda Sadeh struct rbd_snap { 126dfc5606dSYehuda Sadeh struct device dev; 127dfc5606dSYehuda Sadeh const char *name; 128dfc5606dSYehuda Sadeh size_t size; 129dfc5606dSYehuda Sadeh struct list_head node; 130dfc5606dSYehuda Sadeh u64 id; 131dfc5606dSYehuda Sadeh }; 132dfc5606dSYehuda Sadeh 133602adf40SYehuda Sadeh /* 134602adf40SYehuda Sadeh * a single device 135602adf40SYehuda Sadeh */ 136602adf40SYehuda Sadeh struct rbd_device { 137602adf40SYehuda Sadeh int id; /* blkdev unique id */ 138602adf40SYehuda Sadeh 139602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 140602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 141602adf40SYehuda Sadeh struct request_queue *q; 142602adf40SYehuda Sadeh 143602adf40SYehuda Sadeh struct ceph_client *client; 144602adf40SYehuda Sadeh struct rbd_client *rbd_client; 145602adf40SYehuda Sadeh 146602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 147602adf40SYehuda Sadeh 148602adf40SYehuda Sadeh spinlock_t lock; /* queue lock */ 149602adf40SYehuda Sadeh 150602adf40SYehuda Sadeh struct rbd_image_header header; 151602adf40SYehuda Sadeh char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ 152602adf40SYehuda Sadeh int obj_len; 153602adf40SYehuda Sadeh char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ 154602adf40SYehuda Sadeh char pool_name[RBD_MAX_POOL_NAME_LEN]; 155602adf40SYehuda Sadeh int poolid; 156602adf40SYehuda Sadeh 15759c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 15859c2be1eSYehuda Sadeh struct ceph_osd_request *watch_request; 15959c2be1eSYehuda Sadeh 160602adf40SYehuda Sadeh char snap_name[RBD_MAX_SNAP_NAME_LEN]; 161602adf40SYehuda Sadeh u32 cur_snap; /* index+1 of current snapshot within snap context 162602adf40SYehuda Sadeh 0 - for the head */ 163602adf40SYehuda Sadeh int read_only; 164602adf40SYehuda Sadeh 165602adf40SYehuda Sadeh struct list_head node; 166dfc5606dSYehuda Sadeh 167dfc5606dSYehuda Sadeh /* list of snapshots */ 168dfc5606dSYehuda Sadeh struct list_head snaps; 169dfc5606dSYehuda Sadeh 170dfc5606dSYehuda Sadeh /* sysfs related */ 171dfc5606dSYehuda Sadeh struct device dev; 172dfc5606dSYehuda Sadeh }; 173dfc5606dSYehuda Sadeh 174dfc5606dSYehuda Sadeh static struct bus_type rbd_bus_type = { 175dfc5606dSYehuda Sadeh .name = "rbd", 176602adf40SYehuda Sadeh }; 177602adf40SYehuda Sadeh 17821079786SAlex Elder static DEFINE_SPINLOCK(node_lock); /* protects client get/put */ 179602adf40SYehuda Sadeh 180602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 181602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 182602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 183602adf40SYehuda Sadeh 184dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); 185dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 186dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 187dfc5606dSYehuda Sadeh struct device_attribute *attr, 188dfc5606dSYehuda Sadeh const char *buf, 189dfc5606dSYehuda Sadeh size_t count); 190dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, 19169932487SJustin P. Mattock struct rbd_snap *snap); 192dfc5606dSYehuda Sadeh 193dfc5606dSYehuda Sadeh 194dfc5606dSYehuda Sadeh static struct rbd_device *dev_to_rbd(struct device *dev) 195dfc5606dSYehuda Sadeh { 196dfc5606dSYehuda Sadeh return container_of(dev, struct rbd_device, dev); 197dfc5606dSYehuda Sadeh } 198dfc5606dSYehuda Sadeh 199dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 200dfc5606dSYehuda Sadeh { 201dfc5606dSYehuda Sadeh return get_device(&rbd_dev->dev); 202dfc5606dSYehuda Sadeh } 203dfc5606dSYehuda Sadeh 204dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev) 205dfc5606dSYehuda Sadeh { 206dfc5606dSYehuda Sadeh put_device(&rbd_dev->dev); 207dfc5606dSYehuda Sadeh } 208602adf40SYehuda Sadeh 20959c2be1eSYehuda Sadeh static int __rbd_update_snaps(struct rbd_device *rbd_dev); 21059c2be1eSYehuda Sadeh 211602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 212602adf40SYehuda Sadeh { 213602adf40SYehuda Sadeh struct gendisk *disk = bdev->bd_disk; 214602adf40SYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 215602adf40SYehuda Sadeh 216dfc5606dSYehuda Sadeh rbd_get_dev(rbd_dev); 217dfc5606dSYehuda Sadeh 218602adf40SYehuda Sadeh set_device_ro(bdev, rbd_dev->read_only); 219602adf40SYehuda Sadeh 220602adf40SYehuda Sadeh if ((mode & FMODE_WRITE) && rbd_dev->read_only) 221602adf40SYehuda Sadeh return -EROFS; 222602adf40SYehuda Sadeh 223602adf40SYehuda Sadeh return 0; 224602adf40SYehuda Sadeh } 225602adf40SYehuda Sadeh 226dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 227dfc5606dSYehuda Sadeh { 228dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 229dfc5606dSYehuda Sadeh 230dfc5606dSYehuda Sadeh rbd_put_dev(rbd_dev); 231dfc5606dSYehuda Sadeh 232dfc5606dSYehuda Sadeh return 0; 233dfc5606dSYehuda Sadeh } 234dfc5606dSYehuda Sadeh 235602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 236602adf40SYehuda Sadeh .owner = THIS_MODULE, 237602adf40SYehuda Sadeh .open = rbd_open, 238dfc5606dSYehuda Sadeh .release = rbd_release, 239602adf40SYehuda Sadeh }; 240602adf40SYehuda Sadeh 241602adf40SYehuda Sadeh /* 242602adf40SYehuda Sadeh * Initialize an rbd client instance. 243602adf40SYehuda Sadeh * We own *opt. 244602adf40SYehuda Sadeh */ 24559c2be1eSYehuda Sadeh static struct rbd_client *rbd_client_create(struct ceph_options *opt, 24659c2be1eSYehuda Sadeh struct rbd_options *rbd_opts) 247602adf40SYehuda Sadeh { 248602adf40SYehuda Sadeh struct rbd_client *rbdc; 249602adf40SYehuda Sadeh int ret = -ENOMEM; 250602adf40SYehuda Sadeh 251602adf40SYehuda Sadeh dout("rbd_client_create\n"); 252602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 253602adf40SYehuda Sadeh if (!rbdc) 254602adf40SYehuda Sadeh goto out_opt; 255602adf40SYehuda Sadeh 256602adf40SYehuda Sadeh kref_init(&rbdc->kref); 257602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 258602adf40SYehuda Sadeh 2596ab00d46SSage Weil rbdc->client = ceph_create_client(opt, rbdc, 0, 0); 260602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 261602adf40SYehuda Sadeh goto out_rbdc; 26228f259b7SVasiliy Kulikov opt = NULL; /* Now rbdc->client is responsible for opt */ 263602adf40SYehuda Sadeh 264602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 265602adf40SYehuda Sadeh if (ret < 0) 266602adf40SYehuda Sadeh goto out_err; 267602adf40SYehuda Sadeh 26859c2be1eSYehuda Sadeh rbdc->rbd_opts = rbd_opts; 26959c2be1eSYehuda Sadeh 270602adf40SYehuda Sadeh spin_lock(&node_lock); 271602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 272602adf40SYehuda Sadeh spin_unlock(&node_lock); 273602adf40SYehuda Sadeh 274602adf40SYehuda Sadeh dout("rbd_client_create created %p\n", rbdc); 275602adf40SYehuda Sadeh return rbdc; 276602adf40SYehuda Sadeh 277602adf40SYehuda Sadeh out_err: 278602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 279602adf40SYehuda Sadeh out_rbdc: 280602adf40SYehuda Sadeh kfree(rbdc); 281602adf40SYehuda Sadeh out_opt: 28228f259b7SVasiliy Kulikov if (opt) 283602adf40SYehuda Sadeh ceph_destroy_options(opt); 28428f259b7SVasiliy Kulikov return ERR_PTR(ret); 285602adf40SYehuda Sadeh } 286602adf40SYehuda Sadeh 287602adf40SYehuda Sadeh /* 288602adf40SYehuda Sadeh * Find a ceph client with specific addr and configuration. 289602adf40SYehuda Sadeh */ 290602adf40SYehuda Sadeh static struct rbd_client *__rbd_client_find(struct ceph_options *opt) 291602adf40SYehuda Sadeh { 292602adf40SYehuda Sadeh struct rbd_client *client_node; 293602adf40SYehuda Sadeh 294602adf40SYehuda Sadeh if (opt->flags & CEPH_OPT_NOSHARE) 295602adf40SYehuda Sadeh return NULL; 296602adf40SYehuda Sadeh 297602adf40SYehuda Sadeh list_for_each_entry(client_node, &rbd_client_list, node) 298602adf40SYehuda Sadeh if (ceph_compare_options(opt, client_node->client) == 0) 299602adf40SYehuda Sadeh return client_node; 300602adf40SYehuda Sadeh return NULL; 301602adf40SYehuda Sadeh } 302602adf40SYehuda Sadeh 303602adf40SYehuda Sadeh /* 30459c2be1eSYehuda Sadeh * mount options 30559c2be1eSYehuda Sadeh */ 30659c2be1eSYehuda Sadeh enum { 30759c2be1eSYehuda Sadeh Opt_notify_timeout, 30859c2be1eSYehuda Sadeh Opt_last_int, 30959c2be1eSYehuda Sadeh /* int args above */ 31059c2be1eSYehuda Sadeh Opt_last_string, 31159c2be1eSYehuda Sadeh /* string args above */ 31259c2be1eSYehuda Sadeh }; 31359c2be1eSYehuda Sadeh 31459c2be1eSYehuda Sadeh static match_table_t rbdopt_tokens = { 31559c2be1eSYehuda Sadeh {Opt_notify_timeout, "notify_timeout=%d"}, 31659c2be1eSYehuda Sadeh /* int args above */ 31759c2be1eSYehuda Sadeh /* string args above */ 31859c2be1eSYehuda Sadeh {-1, NULL} 31959c2be1eSYehuda Sadeh }; 32059c2be1eSYehuda Sadeh 32159c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 32259c2be1eSYehuda Sadeh { 32359c2be1eSYehuda Sadeh struct rbd_options *rbdopt = private; 32459c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 32559c2be1eSYehuda Sadeh int token, intval, ret; 32659c2be1eSYehuda Sadeh 32721079786SAlex Elder token = match_token(c, rbdopt_tokens, argstr); 32859c2be1eSYehuda Sadeh if (token < 0) 32959c2be1eSYehuda Sadeh return -EINVAL; 33059c2be1eSYehuda Sadeh 33159c2be1eSYehuda Sadeh if (token < Opt_last_int) { 33259c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 33359c2be1eSYehuda Sadeh if (ret < 0) { 33459c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 33559c2be1eSYehuda Sadeh "at '%s'\n", c); 33659c2be1eSYehuda Sadeh return ret; 33759c2be1eSYehuda Sadeh } 33859c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 33959c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 34059c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 34159c2be1eSYehuda Sadeh argstr[0].from); 34259c2be1eSYehuda Sadeh } else { 34359c2be1eSYehuda Sadeh dout("got token %d\n", token); 34459c2be1eSYehuda Sadeh } 34559c2be1eSYehuda Sadeh 34659c2be1eSYehuda Sadeh switch (token) { 34759c2be1eSYehuda Sadeh case Opt_notify_timeout: 34859c2be1eSYehuda Sadeh rbdopt->notify_timeout = intval; 34959c2be1eSYehuda Sadeh break; 35059c2be1eSYehuda Sadeh default: 35159c2be1eSYehuda Sadeh BUG_ON(token); 35259c2be1eSYehuda Sadeh } 35359c2be1eSYehuda Sadeh return 0; 35459c2be1eSYehuda Sadeh } 35559c2be1eSYehuda Sadeh 35659c2be1eSYehuda Sadeh /* 357602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 358602adf40SYehuda Sadeh * not exist create it. 359602adf40SYehuda Sadeh */ 360602adf40SYehuda Sadeh static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, 361602adf40SYehuda Sadeh char *options) 362602adf40SYehuda Sadeh { 363602adf40SYehuda Sadeh struct rbd_client *rbdc; 364602adf40SYehuda Sadeh struct ceph_options *opt; 365602adf40SYehuda Sadeh int ret; 36659c2be1eSYehuda Sadeh struct rbd_options *rbd_opts; 36759c2be1eSYehuda Sadeh 36859c2be1eSYehuda Sadeh rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); 36959c2be1eSYehuda Sadeh if (!rbd_opts) 37059c2be1eSYehuda Sadeh return -ENOMEM; 37159c2be1eSYehuda Sadeh 37259c2be1eSYehuda Sadeh rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; 373602adf40SYehuda Sadeh 374602adf40SYehuda Sadeh ret = ceph_parse_options(&opt, options, mon_addr, 37521079786SAlex Elder mon_addr + strlen(mon_addr), 37621079786SAlex Elder parse_rbd_opts_token, rbd_opts); 377602adf40SYehuda Sadeh if (ret < 0) 37859c2be1eSYehuda Sadeh goto done_err; 379602adf40SYehuda Sadeh 380602adf40SYehuda Sadeh spin_lock(&node_lock); 381602adf40SYehuda Sadeh rbdc = __rbd_client_find(opt); 382602adf40SYehuda Sadeh if (rbdc) { 383602adf40SYehuda Sadeh ceph_destroy_options(opt); 38497bb59a0SAlex Elder kfree(rbd_opts); 385602adf40SYehuda Sadeh 386602adf40SYehuda Sadeh /* using an existing client */ 387602adf40SYehuda Sadeh kref_get(&rbdc->kref); 388602adf40SYehuda Sadeh rbd_dev->rbd_client = rbdc; 389602adf40SYehuda Sadeh rbd_dev->client = rbdc->client; 390602adf40SYehuda Sadeh spin_unlock(&node_lock); 391602adf40SYehuda Sadeh return 0; 392602adf40SYehuda Sadeh } 393602adf40SYehuda Sadeh spin_unlock(&node_lock); 394602adf40SYehuda Sadeh 39559c2be1eSYehuda Sadeh rbdc = rbd_client_create(opt, rbd_opts); 39659c2be1eSYehuda Sadeh if (IS_ERR(rbdc)) { 39759c2be1eSYehuda Sadeh ret = PTR_ERR(rbdc); 39859c2be1eSYehuda Sadeh goto done_err; 39959c2be1eSYehuda Sadeh } 400602adf40SYehuda Sadeh 401602adf40SYehuda Sadeh rbd_dev->rbd_client = rbdc; 402602adf40SYehuda Sadeh rbd_dev->client = rbdc->client; 403602adf40SYehuda Sadeh return 0; 40459c2be1eSYehuda Sadeh done_err: 40559c2be1eSYehuda Sadeh kfree(rbd_opts); 40659c2be1eSYehuda Sadeh return ret; 407602adf40SYehuda Sadeh } 408602adf40SYehuda Sadeh 409602adf40SYehuda Sadeh /* 410602adf40SYehuda Sadeh * Destroy ceph client 411d23a4b3fSAlex Elder * 412d23a4b3fSAlex Elder * Caller must hold node_lock. 413602adf40SYehuda Sadeh */ 414602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 415602adf40SYehuda Sadeh { 416602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 417602adf40SYehuda Sadeh 418602adf40SYehuda Sadeh dout("rbd_release_client %p\n", rbdc); 419602adf40SYehuda Sadeh list_del(&rbdc->node); 420602adf40SYehuda Sadeh 421602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 42259c2be1eSYehuda Sadeh kfree(rbdc->rbd_opts); 423602adf40SYehuda Sadeh kfree(rbdc); 424602adf40SYehuda Sadeh } 425602adf40SYehuda Sadeh 426602adf40SYehuda Sadeh /* 427602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 428602adf40SYehuda Sadeh * it. 429602adf40SYehuda Sadeh */ 430602adf40SYehuda Sadeh static void rbd_put_client(struct rbd_device *rbd_dev) 431602adf40SYehuda Sadeh { 432d23a4b3fSAlex Elder spin_lock(&node_lock); 433602adf40SYehuda Sadeh kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); 434d23a4b3fSAlex Elder spin_unlock(&node_lock); 435602adf40SYehuda Sadeh rbd_dev->rbd_client = NULL; 436602adf40SYehuda Sadeh rbd_dev->client = NULL; 437602adf40SYehuda Sadeh } 438602adf40SYehuda Sadeh 4391fec7093SYehuda Sadeh /* 4401fec7093SYehuda Sadeh * Destroy requests collection 4411fec7093SYehuda Sadeh */ 4421fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref) 4431fec7093SYehuda Sadeh { 4441fec7093SYehuda Sadeh struct rbd_req_coll *coll = 4451fec7093SYehuda Sadeh container_of(kref, struct rbd_req_coll, kref); 4461fec7093SYehuda Sadeh 4471fec7093SYehuda Sadeh dout("rbd_coll_release %p\n", coll); 4481fec7093SYehuda Sadeh kfree(coll); 4491fec7093SYehuda Sadeh } 450602adf40SYehuda Sadeh 451602adf40SYehuda Sadeh /* 452602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 453602adf40SYehuda Sadeh * header. 454602adf40SYehuda Sadeh */ 455602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 456602adf40SYehuda Sadeh struct rbd_image_header_ondisk *ondisk, 457602adf40SYehuda Sadeh int allocated_snaps, 458602adf40SYehuda Sadeh gfp_t gfp_flags) 459602adf40SYehuda Sadeh { 460602adf40SYehuda Sadeh int i; 461602adf40SYehuda Sadeh u32 snap_count = le32_to_cpu(ondisk->snap_count); 462602adf40SYehuda Sadeh int ret = -ENOMEM; 463602adf40SYehuda Sadeh 46421079786SAlex Elder if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) 46581e759fbSJosh Durgin return -ENXIO; 46681e759fbSJosh Durgin 467602adf40SYehuda Sadeh init_rwsem(&header->snap_rwsem); 468602adf40SYehuda Sadeh header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); 469602adf40SYehuda Sadeh header->snapc = kmalloc(sizeof(struct ceph_snap_context) + 47021079786SAlex Elder snap_count * sizeof (*ondisk), 471602adf40SYehuda Sadeh gfp_flags); 472602adf40SYehuda Sadeh if (!header->snapc) 473602adf40SYehuda Sadeh return -ENOMEM; 474602adf40SYehuda Sadeh if (snap_count) { 475602adf40SYehuda Sadeh header->snap_names = kmalloc(header->snap_names_len, 476602adf40SYehuda Sadeh GFP_KERNEL); 477602adf40SYehuda Sadeh if (!header->snap_names) 478602adf40SYehuda Sadeh goto err_snapc; 479602adf40SYehuda Sadeh header->snap_sizes = kmalloc(snap_count * sizeof(u64), 480602adf40SYehuda Sadeh GFP_KERNEL); 481602adf40SYehuda Sadeh if (!header->snap_sizes) 482602adf40SYehuda Sadeh goto err_names; 483602adf40SYehuda Sadeh } else { 484602adf40SYehuda Sadeh header->snap_names = NULL; 485602adf40SYehuda Sadeh header->snap_sizes = NULL; 486602adf40SYehuda Sadeh } 487602adf40SYehuda Sadeh memcpy(header->block_name, ondisk->block_name, 488602adf40SYehuda Sadeh sizeof(ondisk->block_name)); 489602adf40SYehuda Sadeh 490602adf40SYehuda Sadeh header->image_size = le64_to_cpu(ondisk->image_size); 491602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 492602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 493602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 494602adf40SYehuda Sadeh 495602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 496602adf40SYehuda Sadeh header->snap_seq = le64_to_cpu(ondisk->snap_seq); 497602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 498602adf40SYehuda Sadeh header->total_snaps = snap_count; 499602adf40SYehuda Sadeh 50021079786SAlex Elder if (snap_count && allocated_snaps == snap_count) { 501602adf40SYehuda Sadeh for (i = 0; i < snap_count; i++) { 502602adf40SYehuda Sadeh header->snapc->snaps[i] = 503602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 504602adf40SYehuda Sadeh header->snap_sizes[i] = 505602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].image_size); 506602adf40SYehuda Sadeh } 507602adf40SYehuda Sadeh 508602adf40SYehuda Sadeh /* copy snapshot names */ 509602adf40SYehuda Sadeh memcpy(header->snap_names, &ondisk->snaps[i], 510602adf40SYehuda Sadeh header->snap_names_len); 511602adf40SYehuda Sadeh } 512602adf40SYehuda Sadeh 513602adf40SYehuda Sadeh return 0; 514602adf40SYehuda Sadeh 515602adf40SYehuda Sadeh err_names: 516602adf40SYehuda Sadeh kfree(header->snap_names); 517602adf40SYehuda Sadeh err_snapc: 518602adf40SYehuda Sadeh kfree(header->snapc); 519602adf40SYehuda Sadeh return ret; 520602adf40SYehuda Sadeh } 521602adf40SYehuda Sadeh 522602adf40SYehuda Sadeh static int snap_index(struct rbd_image_header *header, int snap_num) 523602adf40SYehuda Sadeh { 524602adf40SYehuda Sadeh return header->total_snaps - snap_num; 525602adf40SYehuda Sadeh } 526602adf40SYehuda Sadeh 527602adf40SYehuda Sadeh static u64 cur_snap_id(struct rbd_device *rbd_dev) 528602adf40SYehuda Sadeh { 529602adf40SYehuda Sadeh struct rbd_image_header *header = &rbd_dev->header; 530602adf40SYehuda Sadeh 531602adf40SYehuda Sadeh if (!rbd_dev->cur_snap) 532602adf40SYehuda Sadeh return 0; 533602adf40SYehuda Sadeh 534602adf40SYehuda Sadeh return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; 535602adf40SYehuda Sadeh } 536602adf40SYehuda Sadeh 537602adf40SYehuda Sadeh static int snap_by_name(struct rbd_image_header *header, const char *snap_name, 538602adf40SYehuda Sadeh u64 *seq, u64 *size) 539602adf40SYehuda Sadeh { 540602adf40SYehuda Sadeh int i; 541602adf40SYehuda Sadeh char *p = header->snap_names; 542602adf40SYehuda Sadeh 543602adf40SYehuda Sadeh for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { 544602adf40SYehuda Sadeh if (strcmp(snap_name, p) == 0) 545602adf40SYehuda Sadeh break; 546602adf40SYehuda Sadeh } 547602adf40SYehuda Sadeh if (i == header->total_snaps) 548602adf40SYehuda Sadeh return -ENOENT; 549602adf40SYehuda Sadeh if (seq) 550602adf40SYehuda Sadeh *seq = header->snapc->snaps[i]; 551602adf40SYehuda Sadeh 552602adf40SYehuda Sadeh if (size) 553602adf40SYehuda Sadeh *size = header->snap_sizes[i]; 554602adf40SYehuda Sadeh 555602adf40SYehuda Sadeh return i; 556602adf40SYehuda Sadeh } 557602adf40SYehuda Sadeh 558602adf40SYehuda Sadeh static int rbd_header_set_snap(struct rbd_device *dev, 559602adf40SYehuda Sadeh const char *snap_name, 560602adf40SYehuda Sadeh u64 *size) 561602adf40SYehuda Sadeh { 562602adf40SYehuda Sadeh struct rbd_image_header *header = &dev->header; 563602adf40SYehuda Sadeh struct ceph_snap_context *snapc = header->snapc; 564602adf40SYehuda Sadeh int ret = -ENOENT; 565602adf40SYehuda Sadeh 566602adf40SYehuda Sadeh down_write(&header->snap_rwsem); 567602adf40SYehuda Sadeh 568602adf40SYehuda Sadeh if (!snap_name || 569602adf40SYehuda Sadeh !*snap_name || 570602adf40SYehuda Sadeh strcmp(snap_name, "-") == 0 || 571602adf40SYehuda Sadeh strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { 572602adf40SYehuda Sadeh if (header->total_snaps) 573602adf40SYehuda Sadeh snapc->seq = header->snap_seq; 574602adf40SYehuda Sadeh else 575602adf40SYehuda Sadeh snapc->seq = 0; 576602adf40SYehuda Sadeh dev->cur_snap = 0; 577602adf40SYehuda Sadeh dev->read_only = 0; 578602adf40SYehuda Sadeh if (size) 579602adf40SYehuda Sadeh *size = header->image_size; 580602adf40SYehuda Sadeh } else { 581602adf40SYehuda Sadeh ret = snap_by_name(header, snap_name, &snapc->seq, size); 582602adf40SYehuda Sadeh if (ret < 0) 583602adf40SYehuda Sadeh goto done; 584602adf40SYehuda Sadeh 585602adf40SYehuda Sadeh dev->cur_snap = header->total_snaps - ret; 586602adf40SYehuda Sadeh dev->read_only = 1; 587602adf40SYehuda Sadeh } 588602adf40SYehuda Sadeh 589602adf40SYehuda Sadeh ret = 0; 590602adf40SYehuda Sadeh done: 591602adf40SYehuda Sadeh up_write(&header->snap_rwsem); 592602adf40SYehuda Sadeh return ret; 593602adf40SYehuda Sadeh } 594602adf40SYehuda Sadeh 595602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 596602adf40SYehuda Sadeh { 597602adf40SYehuda Sadeh kfree(header->snapc); 598602adf40SYehuda Sadeh kfree(header->snap_names); 599602adf40SYehuda Sadeh kfree(header->snap_sizes); 600602adf40SYehuda Sadeh } 601602adf40SYehuda Sadeh 602602adf40SYehuda Sadeh /* 603602adf40SYehuda Sadeh * get the actual striped segment name, offset and length 604602adf40SYehuda Sadeh */ 605602adf40SYehuda Sadeh static u64 rbd_get_segment(struct rbd_image_header *header, 606602adf40SYehuda Sadeh const char *block_name, 607602adf40SYehuda Sadeh u64 ofs, u64 len, 608602adf40SYehuda Sadeh char *seg_name, u64 *segofs) 609602adf40SYehuda Sadeh { 610602adf40SYehuda Sadeh u64 seg = ofs >> header->obj_order; 611602adf40SYehuda Sadeh 612602adf40SYehuda Sadeh if (seg_name) 613602adf40SYehuda Sadeh snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, 614602adf40SYehuda Sadeh "%s.%012llx", block_name, seg); 615602adf40SYehuda Sadeh 616602adf40SYehuda Sadeh ofs = ofs & ((1 << header->obj_order) - 1); 617602adf40SYehuda Sadeh len = min_t(u64, len, (1 << header->obj_order) - ofs); 618602adf40SYehuda Sadeh 619602adf40SYehuda Sadeh if (segofs) 620602adf40SYehuda Sadeh *segofs = ofs; 621602adf40SYehuda Sadeh 622602adf40SYehuda Sadeh return len; 623602adf40SYehuda Sadeh } 624602adf40SYehuda Sadeh 6251fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header, 6261fec7093SYehuda Sadeh u64 ofs, u64 len) 6271fec7093SYehuda Sadeh { 6281fec7093SYehuda Sadeh u64 start_seg = ofs >> header->obj_order; 6291fec7093SYehuda Sadeh u64 end_seg = (ofs + len - 1) >> header->obj_order; 6301fec7093SYehuda Sadeh return end_seg - start_seg + 1; 6311fec7093SYehuda Sadeh } 6321fec7093SYehuda Sadeh 633602adf40SYehuda Sadeh /* 634029bcbd8SJosh Durgin * returns the size of an object in the image 635029bcbd8SJosh Durgin */ 636029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 637029bcbd8SJosh Durgin { 638029bcbd8SJosh Durgin return 1 << header->obj_order; 639029bcbd8SJosh Durgin } 640029bcbd8SJosh Durgin 641029bcbd8SJosh Durgin /* 642602adf40SYehuda Sadeh * bio helpers 643602adf40SYehuda Sadeh */ 644602adf40SYehuda Sadeh 645602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 646602adf40SYehuda Sadeh { 647602adf40SYehuda Sadeh struct bio *tmp; 648602adf40SYehuda Sadeh 649602adf40SYehuda Sadeh while (chain) { 650602adf40SYehuda Sadeh tmp = chain; 651602adf40SYehuda Sadeh chain = chain->bi_next; 652602adf40SYehuda Sadeh bio_put(tmp); 653602adf40SYehuda Sadeh } 654602adf40SYehuda Sadeh } 655602adf40SYehuda Sadeh 656602adf40SYehuda Sadeh /* 657602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 658602adf40SYehuda Sadeh */ 659602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 660602adf40SYehuda Sadeh { 661602adf40SYehuda Sadeh struct bio_vec *bv; 662602adf40SYehuda Sadeh unsigned long flags; 663602adf40SYehuda Sadeh void *buf; 664602adf40SYehuda Sadeh int i; 665602adf40SYehuda Sadeh int pos = 0; 666602adf40SYehuda Sadeh 667602adf40SYehuda Sadeh while (chain) { 668602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 669602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 670602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 671602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 672602adf40SYehuda Sadeh memset(buf + remainder, 0, 673602adf40SYehuda Sadeh bv->bv_len - remainder); 67485b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 675602adf40SYehuda Sadeh } 676602adf40SYehuda Sadeh pos += bv->bv_len; 677602adf40SYehuda Sadeh } 678602adf40SYehuda Sadeh 679602adf40SYehuda Sadeh chain = chain->bi_next; 680602adf40SYehuda Sadeh } 681602adf40SYehuda Sadeh } 682602adf40SYehuda Sadeh 683602adf40SYehuda Sadeh /* 684602adf40SYehuda Sadeh * bio_chain_clone - clone a chain of bios up to a certain length. 685602adf40SYehuda Sadeh * might return a bio_pair that will need to be released. 686602adf40SYehuda Sadeh */ 687602adf40SYehuda Sadeh static struct bio *bio_chain_clone(struct bio **old, struct bio **next, 688602adf40SYehuda Sadeh struct bio_pair **bp, 689602adf40SYehuda Sadeh int len, gfp_t gfpmask) 690602adf40SYehuda Sadeh { 691602adf40SYehuda Sadeh struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; 692602adf40SYehuda Sadeh int total = 0; 693602adf40SYehuda Sadeh 694602adf40SYehuda Sadeh if (*bp) { 695602adf40SYehuda Sadeh bio_pair_release(*bp); 696602adf40SYehuda Sadeh *bp = NULL; 697602adf40SYehuda Sadeh } 698602adf40SYehuda Sadeh 699602adf40SYehuda Sadeh while (old_chain && (total < len)) { 700602adf40SYehuda Sadeh tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 701602adf40SYehuda Sadeh if (!tmp) 702602adf40SYehuda Sadeh goto err_out; 703602adf40SYehuda Sadeh 704602adf40SYehuda Sadeh if (total + old_chain->bi_size > len) { 705602adf40SYehuda Sadeh struct bio_pair *bp; 706602adf40SYehuda Sadeh 707602adf40SYehuda Sadeh /* 708602adf40SYehuda Sadeh * this split can only happen with a single paged bio, 709602adf40SYehuda Sadeh * split_bio will BUG_ON if this is not the case 710602adf40SYehuda Sadeh */ 711602adf40SYehuda Sadeh dout("bio_chain_clone split! total=%d remaining=%d" 712602adf40SYehuda Sadeh "bi_size=%d\n", 713602adf40SYehuda Sadeh (int)total, (int)len-total, 714602adf40SYehuda Sadeh (int)old_chain->bi_size); 715602adf40SYehuda Sadeh 716602adf40SYehuda Sadeh /* split the bio. We'll release it either in the next 717602adf40SYehuda Sadeh call, or it will have to be released outside */ 718602adf40SYehuda Sadeh bp = bio_split(old_chain, (len - total) / 512ULL); 719602adf40SYehuda Sadeh if (!bp) 720602adf40SYehuda Sadeh goto err_out; 721602adf40SYehuda Sadeh 722602adf40SYehuda Sadeh __bio_clone(tmp, &bp->bio1); 723602adf40SYehuda Sadeh 724602adf40SYehuda Sadeh *next = &bp->bio2; 725602adf40SYehuda Sadeh } else { 726602adf40SYehuda Sadeh __bio_clone(tmp, old_chain); 727602adf40SYehuda Sadeh *next = old_chain->bi_next; 728602adf40SYehuda Sadeh } 729602adf40SYehuda Sadeh 730602adf40SYehuda Sadeh tmp->bi_bdev = NULL; 731602adf40SYehuda Sadeh gfpmask &= ~__GFP_WAIT; 732602adf40SYehuda Sadeh tmp->bi_next = NULL; 733602adf40SYehuda Sadeh 734602adf40SYehuda Sadeh if (!new_chain) { 735602adf40SYehuda Sadeh new_chain = tail = tmp; 736602adf40SYehuda Sadeh } else { 737602adf40SYehuda Sadeh tail->bi_next = tmp; 738602adf40SYehuda Sadeh tail = tmp; 739602adf40SYehuda Sadeh } 740602adf40SYehuda Sadeh old_chain = old_chain->bi_next; 741602adf40SYehuda Sadeh 742602adf40SYehuda Sadeh total += tmp->bi_size; 743602adf40SYehuda Sadeh } 744602adf40SYehuda Sadeh 745602adf40SYehuda Sadeh BUG_ON(total < len); 746602adf40SYehuda Sadeh 747602adf40SYehuda Sadeh if (tail) 748602adf40SYehuda Sadeh tail->bi_next = NULL; 749602adf40SYehuda Sadeh 750602adf40SYehuda Sadeh *old = old_chain; 751602adf40SYehuda Sadeh 752602adf40SYehuda Sadeh return new_chain; 753602adf40SYehuda Sadeh 754602adf40SYehuda Sadeh err_out: 755602adf40SYehuda Sadeh dout("bio_chain_clone with err\n"); 756602adf40SYehuda Sadeh bio_chain_put(new_chain); 757602adf40SYehuda Sadeh return NULL; 758602adf40SYehuda Sadeh } 759602adf40SYehuda Sadeh 760602adf40SYehuda Sadeh /* 761602adf40SYehuda Sadeh * helpers for osd request op vectors. 762602adf40SYehuda Sadeh */ 763602adf40SYehuda Sadeh static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, 764602adf40SYehuda Sadeh int num_ops, 765602adf40SYehuda Sadeh int opcode, 766602adf40SYehuda Sadeh u32 payload_len) 767602adf40SYehuda Sadeh { 768602adf40SYehuda Sadeh *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), 769602adf40SYehuda Sadeh GFP_NOIO); 770602adf40SYehuda Sadeh if (!*ops) 771602adf40SYehuda Sadeh return -ENOMEM; 772602adf40SYehuda Sadeh (*ops)[0].op = opcode; 773602adf40SYehuda Sadeh /* 774602adf40SYehuda Sadeh * op extent offset and length will be set later on 775602adf40SYehuda Sadeh * in calc_raw_layout() 776602adf40SYehuda Sadeh */ 777602adf40SYehuda Sadeh (*ops)[0].payload_len = payload_len; 778602adf40SYehuda Sadeh return 0; 779602adf40SYehuda Sadeh } 780602adf40SYehuda Sadeh 781602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops) 782602adf40SYehuda Sadeh { 783602adf40SYehuda Sadeh kfree(ops); 784602adf40SYehuda Sadeh } 785602adf40SYehuda Sadeh 7861fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq, 7871fec7093SYehuda Sadeh struct rbd_req_coll *coll, 7881fec7093SYehuda Sadeh int index, 7891fec7093SYehuda Sadeh int ret, u64 len) 7901fec7093SYehuda Sadeh { 7911fec7093SYehuda Sadeh struct request_queue *q; 7921fec7093SYehuda Sadeh int min, max, i; 7931fec7093SYehuda Sadeh 7941fec7093SYehuda Sadeh dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n", 7951fec7093SYehuda Sadeh coll, index, ret, len); 7961fec7093SYehuda Sadeh 7971fec7093SYehuda Sadeh if (!rq) 7981fec7093SYehuda Sadeh return; 7991fec7093SYehuda Sadeh 8001fec7093SYehuda Sadeh if (!coll) { 8011fec7093SYehuda Sadeh blk_end_request(rq, ret, len); 8021fec7093SYehuda Sadeh return; 8031fec7093SYehuda Sadeh } 8041fec7093SYehuda Sadeh 8051fec7093SYehuda Sadeh q = rq->q; 8061fec7093SYehuda Sadeh 8071fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 8081fec7093SYehuda Sadeh coll->status[index].done = 1; 8091fec7093SYehuda Sadeh coll->status[index].rc = ret; 8101fec7093SYehuda Sadeh coll->status[index].bytes = len; 8111fec7093SYehuda Sadeh max = min = coll->num_done; 8121fec7093SYehuda Sadeh while (max < coll->total && coll->status[max].done) 8131fec7093SYehuda Sadeh max++; 8141fec7093SYehuda Sadeh 8151fec7093SYehuda Sadeh for (i = min; i<max; i++) { 8161fec7093SYehuda Sadeh __blk_end_request(rq, coll->status[i].rc, 8171fec7093SYehuda Sadeh coll->status[i].bytes); 8181fec7093SYehuda Sadeh coll->num_done++; 8191fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 8201fec7093SYehuda Sadeh } 8211fec7093SYehuda Sadeh spin_unlock_irq(q->queue_lock); 8221fec7093SYehuda Sadeh } 8231fec7093SYehuda Sadeh 8241fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req, 8251fec7093SYehuda Sadeh int ret, u64 len) 8261fec7093SYehuda Sadeh { 8271fec7093SYehuda Sadeh rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); 8281fec7093SYehuda Sadeh } 8291fec7093SYehuda Sadeh 830602adf40SYehuda Sadeh /* 831602adf40SYehuda Sadeh * Send ceph osd request 832602adf40SYehuda Sadeh */ 833602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq, 834602adf40SYehuda Sadeh struct rbd_device *dev, 835602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 836602adf40SYehuda Sadeh u64 snapid, 837602adf40SYehuda Sadeh const char *obj, u64 ofs, u64 len, 838602adf40SYehuda Sadeh struct bio *bio, 839602adf40SYehuda Sadeh struct page **pages, 840602adf40SYehuda Sadeh int num_pages, 841602adf40SYehuda Sadeh int flags, 842602adf40SYehuda Sadeh struct ceph_osd_req_op *ops, 843602adf40SYehuda Sadeh int num_reply, 8441fec7093SYehuda Sadeh struct rbd_req_coll *coll, 8451fec7093SYehuda Sadeh int coll_index, 846602adf40SYehuda Sadeh void (*rbd_cb)(struct ceph_osd_request *req, 84759c2be1eSYehuda Sadeh struct ceph_msg *msg), 84859c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 84959c2be1eSYehuda Sadeh u64 *ver) 850602adf40SYehuda Sadeh { 851602adf40SYehuda Sadeh struct ceph_osd_request *req; 852602adf40SYehuda Sadeh struct ceph_file_layout *layout; 853602adf40SYehuda Sadeh int ret; 854602adf40SYehuda Sadeh u64 bno; 855602adf40SYehuda Sadeh struct timespec mtime = CURRENT_TIME; 856602adf40SYehuda Sadeh struct rbd_request *req_data; 857602adf40SYehuda Sadeh struct ceph_osd_request_head *reqhead; 858602adf40SYehuda Sadeh struct rbd_image_header *header = &dev->header; 859602adf40SYehuda Sadeh 860602adf40SYehuda Sadeh req_data = kzalloc(sizeof(*req_data), GFP_NOIO); 8611fec7093SYehuda Sadeh if (!req_data) { 8621fec7093SYehuda Sadeh if (coll) 8631fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, coll_index, 8641fec7093SYehuda Sadeh -ENOMEM, len); 8651fec7093SYehuda Sadeh return -ENOMEM; 8661fec7093SYehuda Sadeh } 867602adf40SYehuda Sadeh 8681fec7093SYehuda Sadeh if (coll) { 8691fec7093SYehuda Sadeh req_data->coll = coll; 8701fec7093SYehuda Sadeh req_data->coll_index = coll_index; 8711fec7093SYehuda Sadeh } 8721fec7093SYehuda Sadeh 8731fec7093SYehuda Sadeh dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); 874602adf40SYehuda Sadeh 875602adf40SYehuda Sadeh down_read(&header->snap_rwsem); 876602adf40SYehuda Sadeh 877602adf40SYehuda Sadeh req = ceph_osdc_alloc_request(&dev->client->osdc, flags, 878602adf40SYehuda Sadeh snapc, 879602adf40SYehuda Sadeh ops, 880602adf40SYehuda Sadeh false, 881602adf40SYehuda Sadeh GFP_NOIO, pages, bio); 8824ad12621SSage Weil if (!req) { 883602adf40SYehuda Sadeh up_read(&header->snap_rwsem); 8844ad12621SSage Weil ret = -ENOMEM; 885602adf40SYehuda Sadeh goto done_pages; 886602adf40SYehuda Sadeh } 887602adf40SYehuda Sadeh 888602adf40SYehuda Sadeh req->r_callback = rbd_cb; 889602adf40SYehuda Sadeh 890602adf40SYehuda Sadeh req_data->rq = rq; 891602adf40SYehuda Sadeh req_data->bio = bio; 892602adf40SYehuda Sadeh req_data->pages = pages; 893602adf40SYehuda Sadeh req_data->len = len; 894602adf40SYehuda Sadeh 895602adf40SYehuda Sadeh req->r_priv = req_data; 896602adf40SYehuda Sadeh 897602adf40SYehuda Sadeh reqhead = req->r_request->front.iov_base; 898602adf40SYehuda Sadeh reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); 899602adf40SYehuda Sadeh 900602adf40SYehuda Sadeh strncpy(req->r_oid, obj, sizeof(req->r_oid)); 901602adf40SYehuda Sadeh req->r_oid_len = strlen(req->r_oid); 902602adf40SYehuda Sadeh 903602adf40SYehuda Sadeh layout = &req->r_file_layout; 904602adf40SYehuda Sadeh memset(layout, 0, sizeof(*layout)); 905602adf40SYehuda Sadeh layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 906602adf40SYehuda Sadeh layout->fl_stripe_count = cpu_to_le32(1); 907602adf40SYehuda Sadeh layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 908602adf40SYehuda Sadeh layout->fl_pg_preferred = cpu_to_le32(-1); 909602adf40SYehuda Sadeh layout->fl_pg_pool = cpu_to_le32(dev->poolid); 910602adf40SYehuda Sadeh ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, 911602adf40SYehuda Sadeh ofs, &len, &bno, req, ops); 912602adf40SYehuda Sadeh 913602adf40SYehuda Sadeh ceph_osdc_build_request(req, ofs, &len, 914602adf40SYehuda Sadeh ops, 915602adf40SYehuda Sadeh snapc, 916602adf40SYehuda Sadeh &mtime, 917602adf40SYehuda Sadeh req->r_oid, req->r_oid_len); 918602adf40SYehuda Sadeh up_read(&header->snap_rwsem); 919602adf40SYehuda Sadeh 92059c2be1eSYehuda Sadeh if (linger_req) { 92159c2be1eSYehuda Sadeh ceph_osdc_set_request_linger(&dev->client->osdc, req); 92259c2be1eSYehuda Sadeh *linger_req = req; 92359c2be1eSYehuda Sadeh } 92459c2be1eSYehuda Sadeh 925602adf40SYehuda Sadeh ret = ceph_osdc_start_request(&dev->client->osdc, req, false); 926602adf40SYehuda Sadeh if (ret < 0) 927602adf40SYehuda Sadeh goto done_err; 928602adf40SYehuda Sadeh 929602adf40SYehuda Sadeh if (!rbd_cb) { 930602adf40SYehuda Sadeh ret = ceph_osdc_wait_request(&dev->client->osdc, req); 93159c2be1eSYehuda Sadeh if (ver) 93259c2be1eSYehuda Sadeh *ver = le64_to_cpu(req->r_reassert_version.version); 9331fec7093SYehuda Sadeh dout("reassert_ver=%lld\n", 9341fec7093SYehuda Sadeh le64_to_cpu(req->r_reassert_version.version)); 935602adf40SYehuda Sadeh ceph_osdc_put_request(req); 936602adf40SYehuda Sadeh } 937602adf40SYehuda Sadeh return ret; 938602adf40SYehuda Sadeh 939602adf40SYehuda Sadeh done_err: 940602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 941602adf40SYehuda Sadeh ceph_osdc_put_request(req); 942602adf40SYehuda Sadeh done_pages: 9431fec7093SYehuda Sadeh rbd_coll_end_req(req_data, ret, len); 944602adf40SYehuda Sadeh kfree(req_data); 945602adf40SYehuda Sadeh return ret; 946602adf40SYehuda Sadeh } 947602adf40SYehuda Sadeh 948602adf40SYehuda Sadeh /* 949602adf40SYehuda Sadeh * Ceph osd op callback 950602adf40SYehuda Sadeh */ 951602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 952602adf40SYehuda Sadeh { 953602adf40SYehuda Sadeh struct rbd_request *req_data = req->r_priv; 954602adf40SYehuda Sadeh struct ceph_osd_reply_head *replyhead; 955602adf40SYehuda Sadeh struct ceph_osd_op *op; 956602adf40SYehuda Sadeh __s32 rc; 957602adf40SYehuda Sadeh u64 bytes; 958602adf40SYehuda Sadeh int read_op; 959602adf40SYehuda Sadeh 960602adf40SYehuda Sadeh /* parse reply */ 961602adf40SYehuda Sadeh replyhead = msg->front.iov_base; 962602adf40SYehuda Sadeh WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); 963602adf40SYehuda Sadeh op = (void *)(replyhead + 1); 964602adf40SYehuda Sadeh rc = le32_to_cpu(replyhead->result); 965602adf40SYehuda Sadeh bytes = le64_to_cpu(op->extent.length); 966602adf40SYehuda Sadeh read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); 967602adf40SYehuda Sadeh 968602adf40SYehuda Sadeh dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); 969602adf40SYehuda Sadeh 970602adf40SYehuda Sadeh if (rc == -ENOENT && read_op) { 971602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, 0); 972602adf40SYehuda Sadeh rc = 0; 973602adf40SYehuda Sadeh } else if (rc == 0 && read_op && bytes < req_data->len) { 974602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, bytes); 975602adf40SYehuda Sadeh bytes = req_data->len; 976602adf40SYehuda Sadeh } 977602adf40SYehuda Sadeh 9781fec7093SYehuda Sadeh rbd_coll_end_req(req_data, rc, bytes); 979602adf40SYehuda Sadeh 980602adf40SYehuda Sadeh if (req_data->bio) 981602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 982602adf40SYehuda Sadeh 983602adf40SYehuda Sadeh ceph_osdc_put_request(req); 984602adf40SYehuda Sadeh kfree(req_data); 985602adf40SYehuda Sadeh } 986602adf40SYehuda Sadeh 98759c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 98859c2be1eSYehuda Sadeh { 98959c2be1eSYehuda Sadeh ceph_osdc_put_request(req); 99059c2be1eSYehuda Sadeh } 99159c2be1eSYehuda Sadeh 992602adf40SYehuda Sadeh /* 993602adf40SYehuda Sadeh * Do a synchronous ceph osd operation 994602adf40SYehuda Sadeh */ 995602adf40SYehuda Sadeh static int rbd_req_sync_op(struct rbd_device *dev, 996602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 997602adf40SYehuda Sadeh u64 snapid, 998602adf40SYehuda Sadeh int opcode, 999602adf40SYehuda Sadeh int flags, 1000602adf40SYehuda Sadeh struct ceph_osd_req_op *orig_ops, 1001602adf40SYehuda Sadeh int num_reply, 1002602adf40SYehuda Sadeh const char *obj, 1003602adf40SYehuda Sadeh u64 ofs, u64 len, 100459c2be1eSYehuda Sadeh char *buf, 100559c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 100659c2be1eSYehuda Sadeh u64 *ver) 1007602adf40SYehuda Sadeh { 1008602adf40SYehuda Sadeh int ret; 1009602adf40SYehuda Sadeh struct page **pages; 1010602adf40SYehuda Sadeh int num_pages; 1011602adf40SYehuda Sadeh struct ceph_osd_req_op *ops = orig_ops; 1012602adf40SYehuda Sadeh u32 payload_len; 1013602adf40SYehuda Sadeh 1014602adf40SYehuda Sadeh num_pages = calc_pages_for(ofs , len); 1015602adf40SYehuda Sadeh pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 1016b8d0638aSDan Carpenter if (IS_ERR(pages)) 1017b8d0638aSDan Carpenter return PTR_ERR(pages); 1018602adf40SYehuda Sadeh 1019602adf40SYehuda Sadeh if (!orig_ops) { 1020602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); 1021602adf40SYehuda Sadeh ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); 1022602adf40SYehuda Sadeh if (ret < 0) 1023602adf40SYehuda Sadeh goto done; 1024602adf40SYehuda Sadeh 1025602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { 1026602adf40SYehuda Sadeh ret = ceph_copy_to_page_vector(pages, buf, ofs, len); 1027602adf40SYehuda Sadeh if (ret < 0) 1028602adf40SYehuda Sadeh goto done_ops; 1029602adf40SYehuda Sadeh } 1030602adf40SYehuda Sadeh } 1031602adf40SYehuda Sadeh 1032602adf40SYehuda Sadeh ret = rbd_do_request(NULL, dev, snapc, snapid, 1033602adf40SYehuda Sadeh obj, ofs, len, NULL, 1034602adf40SYehuda Sadeh pages, num_pages, 1035602adf40SYehuda Sadeh flags, 1036602adf40SYehuda Sadeh ops, 1037602adf40SYehuda Sadeh 2, 10381fec7093SYehuda Sadeh NULL, 0, 103959c2be1eSYehuda Sadeh NULL, 104059c2be1eSYehuda Sadeh linger_req, ver); 1041602adf40SYehuda Sadeh if (ret < 0) 1042602adf40SYehuda Sadeh goto done_ops; 1043602adf40SYehuda Sadeh 1044602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_READ) && buf) 1045602adf40SYehuda Sadeh ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); 1046602adf40SYehuda Sadeh 1047602adf40SYehuda Sadeh done_ops: 1048602adf40SYehuda Sadeh if (!orig_ops) 1049602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1050602adf40SYehuda Sadeh done: 1051602adf40SYehuda Sadeh ceph_release_page_vector(pages, num_pages); 1052602adf40SYehuda Sadeh return ret; 1053602adf40SYehuda Sadeh } 1054602adf40SYehuda Sadeh 1055602adf40SYehuda Sadeh /* 1056602adf40SYehuda Sadeh * Do an asynchronous ceph osd operation 1057602adf40SYehuda Sadeh */ 1058602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq, 1059602adf40SYehuda Sadeh struct rbd_device *rbd_dev , 1060602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1061602adf40SYehuda Sadeh u64 snapid, 1062602adf40SYehuda Sadeh int opcode, int flags, int num_reply, 1063602adf40SYehuda Sadeh u64 ofs, u64 len, 10641fec7093SYehuda Sadeh struct bio *bio, 10651fec7093SYehuda Sadeh struct rbd_req_coll *coll, 10661fec7093SYehuda Sadeh int coll_index) 1067602adf40SYehuda Sadeh { 1068602adf40SYehuda Sadeh char *seg_name; 1069602adf40SYehuda Sadeh u64 seg_ofs; 1070602adf40SYehuda Sadeh u64 seg_len; 1071602adf40SYehuda Sadeh int ret; 1072602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1073602adf40SYehuda Sadeh u32 payload_len; 1074602adf40SYehuda Sadeh 1075602adf40SYehuda Sadeh seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 1076602adf40SYehuda Sadeh if (!seg_name) 1077602adf40SYehuda Sadeh return -ENOMEM; 1078602adf40SYehuda Sadeh 1079602adf40SYehuda Sadeh seg_len = rbd_get_segment(&rbd_dev->header, 1080602adf40SYehuda Sadeh rbd_dev->header.block_name, 1081602adf40SYehuda Sadeh ofs, len, 1082602adf40SYehuda Sadeh seg_name, &seg_ofs); 1083602adf40SYehuda Sadeh 1084602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); 1085602adf40SYehuda Sadeh 1086602adf40SYehuda Sadeh ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); 1087602adf40SYehuda Sadeh if (ret < 0) 1088602adf40SYehuda Sadeh goto done; 1089602adf40SYehuda Sadeh 1090602adf40SYehuda Sadeh /* we've taken care of segment sizes earlier when we 1091602adf40SYehuda Sadeh cloned the bios. We should never have a segment 1092602adf40SYehuda Sadeh truncated at this point */ 1093602adf40SYehuda Sadeh BUG_ON(seg_len < len); 1094602adf40SYehuda Sadeh 1095602adf40SYehuda Sadeh ret = rbd_do_request(rq, rbd_dev, snapc, snapid, 1096602adf40SYehuda Sadeh seg_name, seg_ofs, seg_len, 1097602adf40SYehuda Sadeh bio, 1098602adf40SYehuda Sadeh NULL, 0, 1099602adf40SYehuda Sadeh flags, 1100602adf40SYehuda Sadeh ops, 1101602adf40SYehuda Sadeh num_reply, 11021fec7093SYehuda Sadeh coll, coll_index, 110359c2be1eSYehuda Sadeh rbd_req_cb, 0, NULL); 110411f77002SSage Weil 110511f77002SSage Weil rbd_destroy_ops(ops); 1106602adf40SYehuda Sadeh done: 1107602adf40SYehuda Sadeh kfree(seg_name); 1108602adf40SYehuda Sadeh return ret; 1109602adf40SYehuda Sadeh } 1110602adf40SYehuda Sadeh 1111602adf40SYehuda Sadeh /* 1112602adf40SYehuda Sadeh * Request async osd write 1113602adf40SYehuda Sadeh */ 1114602adf40SYehuda Sadeh static int rbd_req_write(struct request *rq, 1115602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1116602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1117602adf40SYehuda Sadeh u64 ofs, u64 len, 11181fec7093SYehuda Sadeh struct bio *bio, 11191fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11201fec7093SYehuda Sadeh int coll_index) 1121602adf40SYehuda Sadeh { 1122602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, 1123602adf40SYehuda Sadeh CEPH_OSD_OP_WRITE, 1124602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1125602adf40SYehuda Sadeh 2, 11261fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1127602adf40SYehuda Sadeh } 1128602adf40SYehuda Sadeh 1129602adf40SYehuda Sadeh /* 1130602adf40SYehuda Sadeh * Request async osd read 1131602adf40SYehuda Sadeh */ 1132602adf40SYehuda Sadeh static int rbd_req_read(struct request *rq, 1133602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1134602adf40SYehuda Sadeh u64 snapid, 1135602adf40SYehuda Sadeh u64 ofs, u64 len, 11361fec7093SYehuda Sadeh struct bio *bio, 11371fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11381fec7093SYehuda Sadeh int coll_index) 1139602adf40SYehuda Sadeh { 1140602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, NULL, 1141602adf40SYehuda Sadeh (snapid ? snapid : CEPH_NOSNAP), 1142602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1143602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1144602adf40SYehuda Sadeh 2, 11451fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1146602adf40SYehuda Sadeh } 1147602adf40SYehuda Sadeh 1148602adf40SYehuda Sadeh /* 1149602adf40SYehuda Sadeh * Request sync osd read 1150602adf40SYehuda Sadeh */ 1151602adf40SYehuda Sadeh static int rbd_req_sync_read(struct rbd_device *dev, 1152602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1153602adf40SYehuda Sadeh u64 snapid, 1154602adf40SYehuda Sadeh const char *obj, 1155602adf40SYehuda Sadeh u64 ofs, u64 len, 115659c2be1eSYehuda Sadeh char *buf, 115759c2be1eSYehuda Sadeh u64 *ver) 1158602adf40SYehuda Sadeh { 1159602adf40SYehuda Sadeh return rbd_req_sync_op(dev, NULL, 1160602adf40SYehuda Sadeh (snapid ? snapid : CEPH_NOSNAP), 1161602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1162602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1163602adf40SYehuda Sadeh NULL, 116459c2be1eSYehuda Sadeh 1, obj, ofs, len, buf, NULL, ver); 1165602adf40SYehuda Sadeh } 1166602adf40SYehuda Sadeh 1167602adf40SYehuda Sadeh /* 116859c2be1eSYehuda Sadeh * Request sync osd watch 116959c2be1eSYehuda Sadeh */ 117059c2be1eSYehuda Sadeh static int rbd_req_sync_notify_ack(struct rbd_device *dev, 117159c2be1eSYehuda Sadeh u64 ver, 117259c2be1eSYehuda Sadeh u64 notify_id, 117359c2be1eSYehuda Sadeh const char *obj) 117459c2be1eSYehuda Sadeh { 117559c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 117659c2be1eSYehuda Sadeh struct page **pages = NULL; 117711f77002SSage Weil int ret; 117811f77002SSage Weil 117911f77002SSage Weil ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); 118059c2be1eSYehuda Sadeh if (ret < 0) 118159c2be1eSYehuda Sadeh return ret; 118259c2be1eSYehuda Sadeh 118359c2be1eSYehuda Sadeh ops[0].watch.ver = cpu_to_le64(dev->header.obj_version); 118459c2be1eSYehuda Sadeh ops[0].watch.cookie = notify_id; 118559c2be1eSYehuda Sadeh ops[0].watch.flag = 0; 118659c2be1eSYehuda Sadeh 118759c2be1eSYehuda Sadeh ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP, 118859c2be1eSYehuda Sadeh obj, 0, 0, NULL, 118959c2be1eSYehuda Sadeh pages, 0, 119059c2be1eSYehuda Sadeh CEPH_OSD_FLAG_READ, 119159c2be1eSYehuda Sadeh ops, 119259c2be1eSYehuda Sadeh 1, 11931fec7093SYehuda Sadeh NULL, 0, 119459c2be1eSYehuda Sadeh rbd_simple_req_cb, 0, NULL); 119559c2be1eSYehuda Sadeh 119659c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 119759c2be1eSYehuda Sadeh return ret; 119859c2be1eSYehuda Sadeh } 119959c2be1eSYehuda Sadeh 120059c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 120159c2be1eSYehuda Sadeh { 120259c2be1eSYehuda Sadeh struct rbd_device *dev = (struct rbd_device *)data; 120313143d2dSSage Weil int rc; 120413143d2dSSage Weil 120559c2be1eSYehuda Sadeh if (!dev) 120659c2be1eSYehuda Sadeh return; 120759c2be1eSYehuda Sadeh 120859c2be1eSYehuda Sadeh dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, 120959c2be1eSYehuda Sadeh notify_id, (int)opcode); 121059c2be1eSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 121113143d2dSSage Weil rc = __rbd_update_snaps(dev); 121259c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 121313143d2dSSage Weil if (rc) 121413143d2dSSage Weil pr_warning(DRV_NAME "%d got notification but failed to update" 121513143d2dSSage Weil " snaps: %d\n", dev->major, rc); 121659c2be1eSYehuda Sadeh 121759c2be1eSYehuda Sadeh rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); 121859c2be1eSYehuda Sadeh } 121959c2be1eSYehuda Sadeh 122059c2be1eSYehuda Sadeh /* 122159c2be1eSYehuda Sadeh * Request sync osd watch 122259c2be1eSYehuda Sadeh */ 122359c2be1eSYehuda Sadeh static int rbd_req_sync_watch(struct rbd_device *dev, 122459c2be1eSYehuda Sadeh const char *obj, 122559c2be1eSYehuda Sadeh u64 ver) 122659c2be1eSYehuda Sadeh { 122759c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 122859c2be1eSYehuda Sadeh struct ceph_osd_client *osdc = &dev->client->osdc; 122959c2be1eSYehuda Sadeh 123059c2be1eSYehuda Sadeh int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); 123159c2be1eSYehuda Sadeh if (ret < 0) 123259c2be1eSYehuda Sadeh return ret; 123359c2be1eSYehuda Sadeh 123459c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, 123559c2be1eSYehuda Sadeh (void *)dev, &dev->watch_event); 123659c2be1eSYehuda Sadeh if (ret < 0) 123759c2be1eSYehuda Sadeh goto fail; 123859c2be1eSYehuda Sadeh 123959c2be1eSYehuda Sadeh ops[0].watch.ver = cpu_to_le64(ver); 124059c2be1eSYehuda Sadeh ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie); 124159c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 124259c2be1eSYehuda Sadeh 124359c2be1eSYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 124459c2be1eSYehuda Sadeh CEPH_NOSNAP, 124559c2be1eSYehuda Sadeh 0, 124659c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 124759c2be1eSYehuda Sadeh ops, 124859c2be1eSYehuda Sadeh 1, obj, 0, 0, NULL, 124959c2be1eSYehuda Sadeh &dev->watch_request, NULL); 125059c2be1eSYehuda Sadeh 125159c2be1eSYehuda Sadeh if (ret < 0) 125259c2be1eSYehuda Sadeh goto fail_event; 125359c2be1eSYehuda Sadeh 125459c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 125559c2be1eSYehuda Sadeh return 0; 125659c2be1eSYehuda Sadeh 125759c2be1eSYehuda Sadeh fail_event: 125859c2be1eSYehuda Sadeh ceph_osdc_cancel_event(dev->watch_event); 125959c2be1eSYehuda Sadeh dev->watch_event = NULL; 126059c2be1eSYehuda Sadeh fail: 126159c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 126259c2be1eSYehuda Sadeh return ret; 126359c2be1eSYehuda Sadeh } 126459c2be1eSYehuda Sadeh 126579e3057cSYehuda Sadeh /* 126679e3057cSYehuda Sadeh * Request sync osd unwatch 126779e3057cSYehuda Sadeh */ 126879e3057cSYehuda Sadeh static int rbd_req_sync_unwatch(struct rbd_device *dev, 126979e3057cSYehuda Sadeh const char *obj) 127079e3057cSYehuda Sadeh { 127179e3057cSYehuda Sadeh struct ceph_osd_req_op *ops; 127279e3057cSYehuda Sadeh 127379e3057cSYehuda Sadeh int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); 127479e3057cSYehuda Sadeh if (ret < 0) 127579e3057cSYehuda Sadeh return ret; 127679e3057cSYehuda Sadeh 127779e3057cSYehuda Sadeh ops[0].watch.ver = 0; 127879e3057cSYehuda Sadeh ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie); 127979e3057cSYehuda Sadeh ops[0].watch.flag = 0; 128079e3057cSYehuda Sadeh 128179e3057cSYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 128279e3057cSYehuda Sadeh CEPH_NOSNAP, 128379e3057cSYehuda Sadeh 0, 128479e3057cSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 128579e3057cSYehuda Sadeh ops, 128679e3057cSYehuda Sadeh 1, obj, 0, 0, NULL, NULL, NULL); 128779e3057cSYehuda Sadeh 128879e3057cSYehuda Sadeh rbd_destroy_ops(ops); 128979e3057cSYehuda Sadeh ceph_osdc_cancel_event(dev->watch_event); 129079e3057cSYehuda Sadeh dev->watch_event = NULL; 129179e3057cSYehuda Sadeh return ret; 129279e3057cSYehuda Sadeh } 129379e3057cSYehuda Sadeh 129459c2be1eSYehuda Sadeh struct rbd_notify_info { 129559c2be1eSYehuda Sadeh struct rbd_device *dev; 129659c2be1eSYehuda Sadeh }; 129759c2be1eSYehuda Sadeh 129859c2be1eSYehuda Sadeh static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 129959c2be1eSYehuda Sadeh { 130059c2be1eSYehuda Sadeh struct rbd_device *dev = (struct rbd_device *)data; 130159c2be1eSYehuda Sadeh if (!dev) 130259c2be1eSYehuda Sadeh return; 130359c2be1eSYehuda Sadeh 130459c2be1eSYehuda Sadeh dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, 130559c2be1eSYehuda Sadeh notify_id, (int)opcode); 130659c2be1eSYehuda Sadeh } 130759c2be1eSYehuda Sadeh 130859c2be1eSYehuda Sadeh /* 130959c2be1eSYehuda Sadeh * Request sync osd notify 131059c2be1eSYehuda Sadeh */ 131159c2be1eSYehuda Sadeh static int rbd_req_sync_notify(struct rbd_device *dev, 131259c2be1eSYehuda Sadeh const char *obj) 131359c2be1eSYehuda Sadeh { 131459c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 131559c2be1eSYehuda Sadeh struct ceph_osd_client *osdc = &dev->client->osdc; 131659c2be1eSYehuda Sadeh struct ceph_osd_event *event; 131759c2be1eSYehuda Sadeh struct rbd_notify_info info; 131859c2be1eSYehuda Sadeh int payload_len = sizeof(u32) + sizeof(u32); 131959c2be1eSYehuda Sadeh int ret; 132059c2be1eSYehuda Sadeh 132159c2be1eSYehuda Sadeh ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len); 132259c2be1eSYehuda Sadeh if (ret < 0) 132359c2be1eSYehuda Sadeh return ret; 132459c2be1eSYehuda Sadeh 132559c2be1eSYehuda Sadeh info.dev = dev; 132659c2be1eSYehuda Sadeh 132759c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, 132859c2be1eSYehuda Sadeh (void *)&info, &event); 132959c2be1eSYehuda Sadeh if (ret < 0) 133059c2be1eSYehuda Sadeh goto fail; 133159c2be1eSYehuda Sadeh 133259c2be1eSYehuda Sadeh ops[0].watch.ver = 1; 133359c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 133459c2be1eSYehuda Sadeh ops[0].watch.cookie = event->cookie; 133559c2be1eSYehuda Sadeh ops[0].watch.prot_ver = RADOS_NOTIFY_VER; 133659c2be1eSYehuda Sadeh ops[0].watch.timeout = 12; 133759c2be1eSYehuda Sadeh 133859c2be1eSYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 133959c2be1eSYehuda Sadeh CEPH_NOSNAP, 134059c2be1eSYehuda Sadeh 0, 134159c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 134259c2be1eSYehuda Sadeh ops, 134359c2be1eSYehuda Sadeh 1, obj, 0, 0, NULL, NULL, NULL); 134459c2be1eSYehuda Sadeh if (ret < 0) 134559c2be1eSYehuda Sadeh goto fail_event; 134659c2be1eSYehuda Sadeh 134759c2be1eSYehuda Sadeh ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT); 134859c2be1eSYehuda Sadeh dout("ceph_osdc_wait_event returned %d\n", ret); 134959c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 135059c2be1eSYehuda Sadeh return 0; 135159c2be1eSYehuda Sadeh 135259c2be1eSYehuda Sadeh fail_event: 135359c2be1eSYehuda Sadeh ceph_osdc_cancel_event(event); 135459c2be1eSYehuda Sadeh fail: 135559c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 135659c2be1eSYehuda Sadeh return ret; 135759c2be1eSYehuda Sadeh } 135859c2be1eSYehuda Sadeh 135959c2be1eSYehuda Sadeh /* 1360602adf40SYehuda Sadeh * Request sync osd read 1361602adf40SYehuda Sadeh */ 1362602adf40SYehuda Sadeh static int rbd_req_sync_exec(struct rbd_device *dev, 1363602adf40SYehuda Sadeh const char *obj, 1364602adf40SYehuda Sadeh const char *cls, 1365602adf40SYehuda Sadeh const char *method, 1366602adf40SYehuda Sadeh const char *data, 136759c2be1eSYehuda Sadeh int len, 136859c2be1eSYehuda Sadeh u64 *ver) 1369602adf40SYehuda Sadeh { 1370602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1371602adf40SYehuda Sadeh int cls_len = strlen(cls); 1372602adf40SYehuda Sadeh int method_len = strlen(method); 1373602adf40SYehuda Sadeh int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, 1374602adf40SYehuda Sadeh cls_len + method_len + len); 1375602adf40SYehuda Sadeh if (ret < 0) 1376602adf40SYehuda Sadeh return ret; 1377602adf40SYehuda Sadeh 1378602adf40SYehuda Sadeh ops[0].cls.class_name = cls; 1379602adf40SYehuda Sadeh ops[0].cls.class_len = (__u8)cls_len; 1380602adf40SYehuda Sadeh ops[0].cls.method_name = method; 1381602adf40SYehuda Sadeh ops[0].cls.method_len = (__u8)method_len; 1382602adf40SYehuda Sadeh ops[0].cls.argc = 0; 1383602adf40SYehuda Sadeh ops[0].cls.indata = data; 1384602adf40SYehuda Sadeh ops[0].cls.indata_len = len; 1385602adf40SYehuda Sadeh 1386602adf40SYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 1387602adf40SYehuda Sadeh CEPH_NOSNAP, 1388602adf40SYehuda Sadeh 0, 1389602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1390602adf40SYehuda Sadeh ops, 139159c2be1eSYehuda Sadeh 1, obj, 0, 0, NULL, NULL, ver); 1392602adf40SYehuda Sadeh 1393602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1394602adf40SYehuda Sadeh 1395602adf40SYehuda Sadeh dout("cls_exec returned %d\n", ret); 1396602adf40SYehuda Sadeh return ret; 1397602adf40SYehuda Sadeh } 1398602adf40SYehuda Sadeh 13991fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) 14001fec7093SYehuda Sadeh { 14011fec7093SYehuda Sadeh struct rbd_req_coll *coll = 14021fec7093SYehuda Sadeh kzalloc(sizeof(struct rbd_req_coll) + 14031fec7093SYehuda Sadeh sizeof(struct rbd_req_status) * num_reqs, 14041fec7093SYehuda Sadeh GFP_ATOMIC); 14051fec7093SYehuda Sadeh 14061fec7093SYehuda Sadeh if (!coll) 14071fec7093SYehuda Sadeh return NULL; 14081fec7093SYehuda Sadeh coll->total = num_reqs; 14091fec7093SYehuda Sadeh kref_init(&coll->kref); 14101fec7093SYehuda Sadeh return coll; 14111fec7093SYehuda Sadeh } 14121fec7093SYehuda Sadeh 1413602adf40SYehuda Sadeh /* 1414602adf40SYehuda Sadeh * block device queue callback 1415602adf40SYehuda Sadeh */ 1416602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q) 1417602adf40SYehuda Sadeh { 1418602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1419602adf40SYehuda Sadeh struct request *rq; 1420602adf40SYehuda Sadeh struct bio_pair *bp = NULL; 1421602adf40SYehuda Sadeh 1422602adf40SYehuda Sadeh rq = blk_fetch_request(q); 1423602adf40SYehuda Sadeh 1424602adf40SYehuda Sadeh while (1) { 1425602adf40SYehuda Sadeh struct bio *bio; 1426602adf40SYehuda Sadeh struct bio *rq_bio, *next_bio = NULL; 1427602adf40SYehuda Sadeh bool do_write; 1428602adf40SYehuda Sadeh int size, op_size = 0; 1429602adf40SYehuda Sadeh u64 ofs; 14301fec7093SYehuda Sadeh int num_segs, cur_seg = 0; 14311fec7093SYehuda Sadeh struct rbd_req_coll *coll; 1432602adf40SYehuda Sadeh 1433602adf40SYehuda Sadeh /* peek at request from block layer */ 1434602adf40SYehuda Sadeh if (!rq) 1435602adf40SYehuda Sadeh break; 1436602adf40SYehuda Sadeh 1437602adf40SYehuda Sadeh dout("fetched request\n"); 1438602adf40SYehuda Sadeh 1439602adf40SYehuda Sadeh /* filter out block requests we don't understand */ 1440602adf40SYehuda Sadeh if ((rq->cmd_type != REQ_TYPE_FS)) { 1441602adf40SYehuda Sadeh __blk_end_request_all(rq, 0); 1442602adf40SYehuda Sadeh goto next; 1443602adf40SYehuda Sadeh } 1444602adf40SYehuda Sadeh 1445602adf40SYehuda Sadeh /* deduce our operation (read, write) */ 1446602adf40SYehuda Sadeh do_write = (rq_data_dir(rq) == WRITE); 1447602adf40SYehuda Sadeh 1448602adf40SYehuda Sadeh size = blk_rq_bytes(rq); 1449602adf40SYehuda Sadeh ofs = blk_rq_pos(rq) * 512ULL; 1450602adf40SYehuda Sadeh rq_bio = rq->bio; 1451602adf40SYehuda Sadeh if (do_write && rbd_dev->read_only) { 1452602adf40SYehuda Sadeh __blk_end_request_all(rq, -EROFS); 1453602adf40SYehuda Sadeh goto next; 1454602adf40SYehuda Sadeh } 1455602adf40SYehuda Sadeh 1456602adf40SYehuda Sadeh spin_unlock_irq(q->queue_lock); 1457602adf40SYehuda Sadeh 1458602adf40SYehuda Sadeh dout("%s 0x%x bytes at 0x%llx\n", 1459602adf40SYehuda Sadeh do_write ? "write" : "read", 1460602adf40SYehuda Sadeh size, blk_rq_pos(rq) * 512ULL); 1461602adf40SYehuda Sadeh 14621fec7093SYehuda Sadeh num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 14631fec7093SYehuda Sadeh coll = rbd_alloc_coll(num_segs); 14641fec7093SYehuda Sadeh if (!coll) { 14651fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 14661fec7093SYehuda Sadeh __blk_end_request_all(rq, -ENOMEM); 14671fec7093SYehuda Sadeh goto next; 14681fec7093SYehuda Sadeh } 14691fec7093SYehuda Sadeh 1470602adf40SYehuda Sadeh do { 1471602adf40SYehuda Sadeh /* a bio clone to be passed down to OSD req */ 1472602adf40SYehuda Sadeh dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); 1473602adf40SYehuda Sadeh op_size = rbd_get_segment(&rbd_dev->header, 1474602adf40SYehuda Sadeh rbd_dev->header.block_name, 1475602adf40SYehuda Sadeh ofs, size, 1476602adf40SYehuda Sadeh NULL, NULL); 14771fec7093SYehuda Sadeh kref_get(&coll->kref); 1478602adf40SYehuda Sadeh bio = bio_chain_clone(&rq_bio, &next_bio, &bp, 1479602adf40SYehuda Sadeh op_size, GFP_ATOMIC); 1480602adf40SYehuda Sadeh if (!bio) { 14811fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, cur_seg, 14821fec7093SYehuda Sadeh -ENOMEM, op_size); 14831fec7093SYehuda Sadeh goto next_seg; 1484602adf40SYehuda Sadeh } 1485602adf40SYehuda Sadeh 14861fec7093SYehuda Sadeh 1487602adf40SYehuda Sadeh /* init OSD command: write or read */ 1488602adf40SYehuda Sadeh if (do_write) 1489602adf40SYehuda Sadeh rbd_req_write(rq, rbd_dev, 1490602adf40SYehuda Sadeh rbd_dev->header.snapc, 1491602adf40SYehuda Sadeh ofs, 14921fec7093SYehuda Sadeh op_size, bio, 14931fec7093SYehuda Sadeh coll, cur_seg); 1494602adf40SYehuda Sadeh else 1495602adf40SYehuda Sadeh rbd_req_read(rq, rbd_dev, 1496602adf40SYehuda Sadeh cur_snap_id(rbd_dev), 1497602adf40SYehuda Sadeh ofs, 14981fec7093SYehuda Sadeh op_size, bio, 14991fec7093SYehuda Sadeh coll, cur_seg); 1500602adf40SYehuda Sadeh 15011fec7093SYehuda Sadeh next_seg: 1502602adf40SYehuda Sadeh size -= op_size; 1503602adf40SYehuda Sadeh ofs += op_size; 1504602adf40SYehuda Sadeh 15051fec7093SYehuda Sadeh cur_seg++; 1506602adf40SYehuda Sadeh rq_bio = next_bio; 1507602adf40SYehuda Sadeh } while (size > 0); 15081fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 1509602adf40SYehuda Sadeh 1510602adf40SYehuda Sadeh if (bp) 1511602adf40SYehuda Sadeh bio_pair_release(bp); 1512602adf40SYehuda Sadeh spin_lock_irq(q->queue_lock); 1513602adf40SYehuda Sadeh next: 1514602adf40SYehuda Sadeh rq = blk_fetch_request(q); 1515602adf40SYehuda Sadeh } 1516602adf40SYehuda Sadeh } 1517602adf40SYehuda Sadeh 1518602adf40SYehuda Sadeh /* 1519602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 1520602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 1521602adf40SYehuda Sadeh * which we handle later at bio_chain_clone 1522602adf40SYehuda Sadeh */ 1523602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 1524602adf40SYehuda Sadeh struct bio_vec *bvec) 1525602adf40SYehuda Sadeh { 1526602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1527602adf40SYehuda Sadeh unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); 1528602adf40SYehuda Sadeh sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); 1529602adf40SYehuda Sadeh unsigned int bio_sectors = bmd->bi_size >> 9; 1530602adf40SYehuda Sadeh int max; 1531602adf40SYehuda Sadeh 1532602adf40SYehuda Sadeh max = (chunk_sectors - ((sector & (chunk_sectors - 1)) 1533602adf40SYehuda Sadeh + bio_sectors)) << 9; 1534602adf40SYehuda Sadeh if (max < 0) 1535602adf40SYehuda Sadeh max = 0; /* bio_add cannot handle a negative return */ 1536602adf40SYehuda Sadeh if (max <= bvec->bv_len && bio_sectors == 0) 1537602adf40SYehuda Sadeh return bvec->bv_len; 1538602adf40SYehuda Sadeh return max; 1539602adf40SYehuda Sadeh } 1540602adf40SYehuda Sadeh 1541602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 1542602adf40SYehuda Sadeh { 1543602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 1544602adf40SYehuda Sadeh 1545602adf40SYehuda Sadeh if (!disk) 1546602adf40SYehuda Sadeh return; 1547602adf40SYehuda Sadeh 1548602adf40SYehuda Sadeh rbd_header_free(&rbd_dev->header); 1549602adf40SYehuda Sadeh 1550602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 1551602adf40SYehuda Sadeh del_gendisk(disk); 1552602adf40SYehuda Sadeh if (disk->queue) 1553602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 1554602adf40SYehuda Sadeh put_disk(disk); 1555602adf40SYehuda Sadeh } 1556602adf40SYehuda Sadeh 1557602adf40SYehuda Sadeh /* 1558602adf40SYehuda Sadeh * reload the ondisk the header 1559602adf40SYehuda Sadeh */ 1560602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 1561602adf40SYehuda Sadeh struct rbd_image_header *header) 1562602adf40SYehuda Sadeh { 1563602adf40SYehuda Sadeh ssize_t rc; 1564602adf40SYehuda Sadeh struct rbd_image_header_ondisk *dh; 1565602adf40SYehuda Sadeh int snap_count = 0; 1566602adf40SYehuda Sadeh u64 snap_names_len = 0; 156759c2be1eSYehuda Sadeh u64 ver; 1568602adf40SYehuda Sadeh 1569602adf40SYehuda Sadeh while (1) { 1570602adf40SYehuda Sadeh int len = sizeof(*dh) + 1571602adf40SYehuda Sadeh snap_count * sizeof(struct rbd_image_snap_ondisk) + 1572602adf40SYehuda Sadeh snap_names_len; 1573602adf40SYehuda Sadeh 1574602adf40SYehuda Sadeh rc = -ENOMEM; 1575602adf40SYehuda Sadeh dh = kmalloc(len, GFP_KERNEL); 1576602adf40SYehuda Sadeh if (!dh) 1577602adf40SYehuda Sadeh return -ENOMEM; 1578602adf40SYehuda Sadeh 1579602adf40SYehuda Sadeh rc = rbd_req_sync_read(rbd_dev, 1580602adf40SYehuda Sadeh NULL, CEPH_NOSNAP, 1581602adf40SYehuda Sadeh rbd_dev->obj_md_name, 1582602adf40SYehuda Sadeh 0, len, 158359c2be1eSYehuda Sadeh (char *)dh, &ver); 1584602adf40SYehuda Sadeh if (rc < 0) 1585602adf40SYehuda Sadeh goto out_dh; 1586602adf40SYehuda Sadeh 1587602adf40SYehuda Sadeh rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); 158881e759fbSJosh Durgin if (rc < 0) { 158981e759fbSJosh Durgin if (rc == -ENXIO) { 159081e759fbSJosh Durgin pr_warning("unrecognized header format" 159181e759fbSJosh Durgin " for image %s", rbd_dev->obj); 159281e759fbSJosh Durgin } 1593602adf40SYehuda Sadeh goto out_dh; 159481e759fbSJosh Durgin } 1595602adf40SYehuda Sadeh 1596602adf40SYehuda Sadeh if (snap_count != header->total_snaps) { 1597602adf40SYehuda Sadeh snap_count = header->total_snaps; 1598602adf40SYehuda Sadeh snap_names_len = header->snap_names_len; 1599602adf40SYehuda Sadeh rbd_header_free(header); 1600602adf40SYehuda Sadeh kfree(dh); 1601602adf40SYehuda Sadeh continue; 1602602adf40SYehuda Sadeh } 1603602adf40SYehuda Sadeh break; 1604602adf40SYehuda Sadeh } 160559c2be1eSYehuda Sadeh header->obj_version = ver; 1606602adf40SYehuda Sadeh 1607602adf40SYehuda Sadeh out_dh: 1608602adf40SYehuda Sadeh kfree(dh); 1609602adf40SYehuda Sadeh return rc; 1610602adf40SYehuda Sadeh } 1611602adf40SYehuda Sadeh 1612602adf40SYehuda Sadeh /* 1613602adf40SYehuda Sadeh * create a snapshot 1614602adf40SYehuda Sadeh */ 1615602adf40SYehuda Sadeh static int rbd_header_add_snap(struct rbd_device *dev, 1616602adf40SYehuda Sadeh const char *snap_name, 1617602adf40SYehuda Sadeh gfp_t gfp_flags) 1618602adf40SYehuda Sadeh { 1619602adf40SYehuda Sadeh int name_len = strlen(snap_name); 1620602adf40SYehuda Sadeh u64 new_snapid; 1621602adf40SYehuda Sadeh int ret; 1622916d4d67SSage Weil void *data, *p, *e; 162359c2be1eSYehuda Sadeh u64 ver; 1624602adf40SYehuda Sadeh 1625602adf40SYehuda Sadeh /* we should create a snapshot only if we're pointing at the head */ 1626602adf40SYehuda Sadeh if (dev->cur_snap) 1627602adf40SYehuda Sadeh return -EINVAL; 1628602adf40SYehuda Sadeh 1629602adf40SYehuda Sadeh ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, 1630602adf40SYehuda Sadeh &new_snapid); 1631602adf40SYehuda Sadeh dout("created snapid=%lld\n", new_snapid); 1632602adf40SYehuda Sadeh if (ret < 0) 1633602adf40SYehuda Sadeh return ret; 1634602adf40SYehuda Sadeh 1635602adf40SYehuda Sadeh data = kmalloc(name_len + 16, gfp_flags); 1636602adf40SYehuda Sadeh if (!data) 1637602adf40SYehuda Sadeh return -ENOMEM; 1638602adf40SYehuda Sadeh 1639916d4d67SSage Weil p = data; 1640916d4d67SSage Weil e = data + name_len + 16; 1641602adf40SYehuda Sadeh 1642916d4d67SSage Weil ceph_encode_string_safe(&p, e, snap_name, name_len, bad); 1643916d4d67SSage Weil ceph_encode_64_safe(&p, e, new_snapid, bad); 1644602adf40SYehuda Sadeh 1645602adf40SYehuda Sadeh ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", 1646916d4d67SSage Weil data, p - data, &ver); 1647602adf40SYehuda Sadeh 1648916d4d67SSage Weil kfree(data); 1649602adf40SYehuda Sadeh 1650602adf40SYehuda Sadeh if (ret < 0) 1651602adf40SYehuda Sadeh return ret; 1652602adf40SYehuda Sadeh 1653602adf40SYehuda Sadeh dev->header.snapc->seq = new_snapid; 1654602adf40SYehuda Sadeh 1655602adf40SYehuda Sadeh return 0; 1656602adf40SYehuda Sadeh bad: 1657602adf40SYehuda Sadeh return -ERANGE; 1658602adf40SYehuda Sadeh } 1659602adf40SYehuda Sadeh 1660dfc5606dSYehuda Sadeh static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) 1661dfc5606dSYehuda Sadeh { 1662dfc5606dSYehuda Sadeh struct rbd_snap *snap; 1663dfc5606dSYehuda Sadeh 1664dfc5606dSYehuda Sadeh while (!list_empty(&rbd_dev->snaps)) { 1665dfc5606dSYehuda Sadeh snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node); 1666dfc5606dSYehuda Sadeh __rbd_remove_snap_dev(rbd_dev, snap); 1667dfc5606dSYehuda Sadeh } 1668dfc5606dSYehuda Sadeh } 1669dfc5606dSYehuda Sadeh 1670602adf40SYehuda Sadeh /* 1671602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 1672602adf40SYehuda Sadeh */ 1673dfc5606dSYehuda Sadeh static int __rbd_update_snaps(struct rbd_device *rbd_dev) 1674602adf40SYehuda Sadeh { 1675602adf40SYehuda Sadeh int ret; 1676602adf40SYehuda Sadeh struct rbd_image_header h; 1677602adf40SYehuda Sadeh u64 snap_seq; 167859c2be1eSYehuda Sadeh int follow_seq = 0; 1679602adf40SYehuda Sadeh 1680602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 1681602adf40SYehuda Sadeh if (ret < 0) 1682602adf40SYehuda Sadeh return ret; 1683602adf40SYehuda Sadeh 16849db4b3e3SSage Weil /* resized? */ 16859db4b3e3SSage Weil set_capacity(rbd_dev->disk, h.image_size / 512ULL); 16869db4b3e3SSage Weil 1687602adf40SYehuda Sadeh down_write(&rbd_dev->header.snap_rwsem); 1688602adf40SYehuda Sadeh 1689602adf40SYehuda Sadeh snap_seq = rbd_dev->header.snapc->seq; 169059c2be1eSYehuda Sadeh if (rbd_dev->header.total_snaps && 169159c2be1eSYehuda Sadeh rbd_dev->header.snapc->snaps[0] == snap_seq) 169259c2be1eSYehuda Sadeh /* pointing at the head, will need to follow that 169359c2be1eSYehuda Sadeh if head moves */ 169459c2be1eSYehuda Sadeh follow_seq = 1; 1695602adf40SYehuda Sadeh 1696602adf40SYehuda Sadeh kfree(rbd_dev->header.snapc); 1697602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_names); 1698602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 1699602adf40SYehuda Sadeh 1700602adf40SYehuda Sadeh rbd_dev->header.total_snaps = h.total_snaps; 1701602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 1702602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 1703dfc5606dSYehuda Sadeh rbd_dev->header.snap_names_len = h.snap_names_len; 1704602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 170559c2be1eSYehuda Sadeh if (follow_seq) 170659c2be1eSYehuda Sadeh rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0]; 170759c2be1eSYehuda Sadeh else 1708602adf40SYehuda Sadeh rbd_dev->header.snapc->seq = snap_seq; 1709602adf40SYehuda Sadeh 1710dfc5606dSYehuda Sadeh ret = __rbd_init_snaps_header(rbd_dev); 1711dfc5606dSYehuda Sadeh 1712602adf40SYehuda Sadeh up_write(&rbd_dev->header.snap_rwsem); 1713602adf40SYehuda Sadeh 1714dfc5606dSYehuda Sadeh return ret; 1715602adf40SYehuda Sadeh } 1716602adf40SYehuda Sadeh 1717602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 1718602adf40SYehuda Sadeh { 1719602adf40SYehuda Sadeh struct gendisk *disk; 1720602adf40SYehuda Sadeh struct request_queue *q; 1721602adf40SYehuda Sadeh int rc; 1722602adf40SYehuda Sadeh u64 total_size = 0; 1723602adf40SYehuda Sadeh 1724602adf40SYehuda Sadeh /* contact OSD, request size info about the object being mapped */ 1725602adf40SYehuda Sadeh rc = rbd_read_header(rbd_dev, &rbd_dev->header); 1726602adf40SYehuda Sadeh if (rc) 1727602adf40SYehuda Sadeh return rc; 1728602adf40SYehuda Sadeh 1729dfc5606dSYehuda Sadeh /* no need to lock here, as rbd_dev is not registered yet */ 1730dfc5606dSYehuda Sadeh rc = __rbd_init_snaps_header(rbd_dev); 1731dfc5606dSYehuda Sadeh if (rc) 1732dfc5606dSYehuda Sadeh return rc; 1733dfc5606dSYehuda Sadeh 1734602adf40SYehuda Sadeh rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); 1735602adf40SYehuda Sadeh if (rc) 1736602adf40SYehuda Sadeh return rc; 1737602adf40SYehuda Sadeh 1738602adf40SYehuda Sadeh /* create gendisk info */ 1739602adf40SYehuda Sadeh rc = -ENOMEM; 1740602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 1741602adf40SYehuda Sadeh if (!disk) 1742602adf40SYehuda Sadeh goto out; 1743602adf40SYehuda Sadeh 1744aedfec59SSage Weil snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d", 1745aedfec59SSage Weil rbd_dev->id); 1746602adf40SYehuda Sadeh disk->major = rbd_dev->major; 1747602adf40SYehuda Sadeh disk->first_minor = 0; 1748602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 1749602adf40SYehuda Sadeh disk->private_data = rbd_dev; 1750602adf40SYehuda Sadeh 1751602adf40SYehuda Sadeh /* init rq */ 1752602adf40SYehuda Sadeh rc = -ENOMEM; 1753602adf40SYehuda Sadeh q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); 1754602adf40SYehuda Sadeh if (!q) 1755602adf40SYehuda Sadeh goto out_disk; 1756029bcbd8SJosh Durgin 1757029bcbd8SJosh Durgin /* set io sizes to object size */ 1758029bcbd8SJosh Durgin blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL); 1759029bcbd8SJosh Durgin blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header)); 1760029bcbd8SJosh Durgin blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header)); 1761029bcbd8SJosh Durgin blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header)); 1762029bcbd8SJosh Durgin 1763602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 1764602adf40SYehuda Sadeh disk->queue = q; 1765602adf40SYehuda Sadeh 1766602adf40SYehuda Sadeh q->queuedata = rbd_dev; 1767602adf40SYehuda Sadeh 1768602adf40SYehuda Sadeh rbd_dev->disk = disk; 1769602adf40SYehuda Sadeh rbd_dev->q = q; 1770602adf40SYehuda Sadeh 1771602adf40SYehuda Sadeh /* finally, announce the disk to the world */ 1772602adf40SYehuda Sadeh set_capacity(disk, total_size / 512ULL); 1773602adf40SYehuda Sadeh add_disk(disk); 1774602adf40SYehuda Sadeh 1775602adf40SYehuda Sadeh pr_info("%s: added with size 0x%llx\n", 1776602adf40SYehuda Sadeh disk->disk_name, (unsigned long long)total_size); 1777602adf40SYehuda Sadeh return 0; 1778602adf40SYehuda Sadeh 1779602adf40SYehuda Sadeh out_disk: 1780602adf40SYehuda Sadeh put_disk(disk); 1781602adf40SYehuda Sadeh out: 1782602adf40SYehuda Sadeh return rc; 1783602adf40SYehuda Sadeh } 1784602adf40SYehuda Sadeh 1785dfc5606dSYehuda Sadeh /* 1786dfc5606dSYehuda Sadeh sysfs 1787dfc5606dSYehuda Sadeh */ 1788602adf40SYehuda Sadeh 1789dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 1790dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1791602adf40SYehuda Sadeh { 1792dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 1793dfc5606dSYehuda Sadeh 1794dfc5606dSYehuda Sadeh return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); 1795602adf40SYehuda Sadeh } 1796602adf40SYehuda Sadeh 1797dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 1798dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1799602adf40SYehuda Sadeh { 1800dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 1801dfc5606dSYehuda Sadeh 1802dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 1803dfc5606dSYehuda Sadeh } 1804dfc5606dSYehuda Sadeh 1805dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 1806dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1807dfc5606dSYehuda Sadeh { 1808dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 1809dfc5606dSYehuda Sadeh 1810dfc5606dSYehuda Sadeh return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client)); 1811dfc5606dSYehuda Sadeh } 1812dfc5606dSYehuda Sadeh 1813dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 1814dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1815dfc5606dSYehuda Sadeh { 1816dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 1817dfc5606dSYehuda Sadeh 1818dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->pool_name); 1819dfc5606dSYehuda Sadeh } 1820dfc5606dSYehuda Sadeh 1821dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 1822dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1823dfc5606dSYehuda Sadeh { 1824dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 1825dfc5606dSYehuda Sadeh 1826dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->obj); 1827dfc5606dSYehuda Sadeh } 1828dfc5606dSYehuda Sadeh 1829dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 1830dfc5606dSYehuda Sadeh struct device_attribute *attr, 1831dfc5606dSYehuda Sadeh char *buf) 1832dfc5606dSYehuda Sadeh { 1833dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 1834dfc5606dSYehuda Sadeh 1835dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->snap_name); 1836dfc5606dSYehuda Sadeh } 1837dfc5606dSYehuda Sadeh 1838dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 1839dfc5606dSYehuda Sadeh struct device_attribute *attr, 1840dfc5606dSYehuda Sadeh const char *buf, 1841dfc5606dSYehuda Sadeh size_t size) 1842dfc5606dSYehuda Sadeh { 1843dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 1844dfc5606dSYehuda Sadeh int rc; 1845dfc5606dSYehuda Sadeh int ret = size; 1846602adf40SYehuda Sadeh 1847602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1848602adf40SYehuda Sadeh 1849dfc5606dSYehuda Sadeh rc = __rbd_update_snaps(rbd_dev); 1850dfc5606dSYehuda Sadeh if (rc < 0) 1851dfc5606dSYehuda Sadeh ret = rc; 1852602adf40SYehuda Sadeh 1853dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 1854dfc5606dSYehuda Sadeh return ret; 1855dfc5606dSYehuda Sadeh } 1856602adf40SYehuda Sadeh 1857dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 1858dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 1859dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 1860dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 1861dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 1862dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 1863dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 1864dfc5606dSYehuda Sadeh static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add); 1865dfc5606dSYehuda Sadeh 1866dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 1867dfc5606dSYehuda Sadeh &dev_attr_size.attr, 1868dfc5606dSYehuda Sadeh &dev_attr_major.attr, 1869dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 1870dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 1871dfc5606dSYehuda Sadeh &dev_attr_name.attr, 1872dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 1873dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 1874dfc5606dSYehuda Sadeh &dev_attr_create_snap.attr, 1875dfc5606dSYehuda Sadeh NULL 1876dfc5606dSYehuda Sadeh }; 1877dfc5606dSYehuda Sadeh 1878dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 1879dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 1880dfc5606dSYehuda Sadeh }; 1881dfc5606dSYehuda Sadeh 1882dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 1883dfc5606dSYehuda Sadeh &rbd_attr_group, 1884dfc5606dSYehuda Sadeh NULL 1885dfc5606dSYehuda Sadeh }; 1886dfc5606dSYehuda Sadeh 1887dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 1888dfc5606dSYehuda Sadeh { 1889dfc5606dSYehuda Sadeh } 1890dfc5606dSYehuda Sadeh 1891dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 1892dfc5606dSYehuda Sadeh .name = "rbd", 1893dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 1894dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 1895dfc5606dSYehuda Sadeh }; 1896dfc5606dSYehuda Sadeh 1897dfc5606dSYehuda Sadeh 1898dfc5606dSYehuda Sadeh /* 1899dfc5606dSYehuda Sadeh sysfs - snapshots 1900dfc5606dSYehuda Sadeh */ 1901dfc5606dSYehuda Sadeh 1902dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 1903dfc5606dSYehuda Sadeh struct device_attribute *attr, 1904dfc5606dSYehuda Sadeh char *buf) 1905dfc5606dSYehuda Sadeh { 1906dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1907dfc5606dSYehuda Sadeh 1908dfc5606dSYehuda Sadeh return sprintf(buf, "%lld\n", (long long)snap->size); 1909dfc5606dSYehuda Sadeh } 1910dfc5606dSYehuda Sadeh 1911dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 1912dfc5606dSYehuda Sadeh struct device_attribute *attr, 1913dfc5606dSYehuda Sadeh char *buf) 1914dfc5606dSYehuda Sadeh { 1915dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1916dfc5606dSYehuda Sadeh 1917dfc5606dSYehuda Sadeh return sprintf(buf, "%lld\n", (long long)snap->id); 1918dfc5606dSYehuda Sadeh } 1919dfc5606dSYehuda Sadeh 1920dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 1921dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 1922dfc5606dSYehuda Sadeh 1923dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 1924dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 1925dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 1926dfc5606dSYehuda Sadeh NULL, 1927dfc5606dSYehuda Sadeh }; 1928dfc5606dSYehuda Sadeh 1929dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 1930dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 1931dfc5606dSYehuda Sadeh }; 1932dfc5606dSYehuda Sadeh 1933dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 1934dfc5606dSYehuda Sadeh { 1935dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1936dfc5606dSYehuda Sadeh kfree(snap->name); 1937dfc5606dSYehuda Sadeh kfree(snap); 1938dfc5606dSYehuda Sadeh } 1939dfc5606dSYehuda Sadeh 1940dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 1941dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 1942dfc5606dSYehuda Sadeh NULL 1943dfc5606dSYehuda Sadeh }; 1944dfc5606dSYehuda Sadeh 1945dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 1946dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 1947dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 1948dfc5606dSYehuda Sadeh }; 1949dfc5606dSYehuda Sadeh 1950dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, 1951dfc5606dSYehuda Sadeh struct rbd_snap *snap) 1952dfc5606dSYehuda Sadeh { 1953dfc5606dSYehuda Sadeh list_del(&snap->node); 1954dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 1955dfc5606dSYehuda Sadeh } 1956dfc5606dSYehuda Sadeh 1957dfc5606dSYehuda Sadeh static int rbd_register_snap_dev(struct rbd_device *rbd_dev, 1958dfc5606dSYehuda Sadeh struct rbd_snap *snap, 1959dfc5606dSYehuda Sadeh struct device *parent) 1960dfc5606dSYehuda Sadeh { 1961dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 1962dfc5606dSYehuda Sadeh int ret; 1963dfc5606dSYehuda Sadeh 1964dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 1965dfc5606dSYehuda Sadeh dev->parent = parent; 1966dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 1967dfc5606dSYehuda Sadeh dev_set_name(dev, "snap_%s", snap->name); 1968dfc5606dSYehuda Sadeh ret = device_register(dev); 1969dfc5606dSYehuda Sadeh 1970dfc5606dSYehuda Sadeh return ret; 1971dfc5606dSYehuda Sadeh } 1972dfc5606dSYehuda Sadeh 1973dfc5606dSYehuda Sadeh static int __rbd_add_snap_dev(struct rbd_device *rbd_dev, 1974dfc5606dSYehuda Sadeh int i, const char *name, 1975dfc5606dSYehuda Sadeh struct rbd_snap **snapp) 1976dfc5606dSYehuda Sadeh { 1977dfc5606dSYehuda Sadeh int ret; 1978dfc5606dSYehuda Sadeh struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL); 1979dfc5606dSYehuda Sadeh if (!snap) 1980dfc5606dSYehuda Sadeh return -ENOMEM; 1981dfc5606dSYehuda Sadeh snap->name = kstrdup(name, GFP_KERNEL); 1982dfc5606dSYehuda Sadeh snap->size = rbd_dev->header.snap_sizes[i]; 1983dfc5606dSYehuda Sadeh snap->id = rbd_dev->header.snapc->snaps[i]; 1984dfc5606dSYehuda Sadeh if (device_is_registered(&rbd_dev->dev)) { 1985dfc5606dSYehuda Sadeh ret = rbd_register_snap_dev(rbd_dev, snap, 1986dfc5606dSYehuda Sadeh &rbd_dev->dev); 1987dfc5606dSYehuda Sadeh if (ret < 0) 1988dfc5606dSYehuda Sadeh goto err; 1989dfc5606dSYehuda Sadeh } 1990dfc5606dSYehuda Sadeh *snapp = snap; 1991dfc5606dSYehuda Sadeh return 0; 1992dfc5606dSYehuda Sadeh err: 1993dfc5606dSYehuda Sadeh kfree(snap->name); 1994dfc5606dSYehuda Sadeh kfree(snap); 1995dfc5606dSYehuda Sadeh return ret; 1996dfc5606dSYehuda Sadeh } 1997dfc5606dSYehuda Sadeh 1998dfc5606dSYehuda Sadeh /* 1999dfc5606dSYehuda Sadeh * search for the previous snap in a null delimited string list 2000dfc5606dSYehuda Sadeh */ 2001dfc5606dSYehuda Sadeh const char *rbd_prev_snap_name(const char *name, const char *start) 2002dfc5606dSYehuda Sadeh { 2003dfc5606dSYehuda Sadeh if (name < start + 2) 2004dfc5606dSYehuda Sadeh return NULL; 2005dfc5606dSYehuda Sadeh 2006dfc5606dSYehuda Sadeh name -= 2; 2007dfc5606dSYehuda Sadeh while (*name) { 2008dfc5606dSYehuda Sadeh if (name == start) 2009dfc5606dSYehuda Sadeh return start; 2010dfc5606dSYehuda Sadeh name--; 2011dfc5606dSYehuda Sadeh } 2012dfc5606dSYehuda Sadeh return name + 1; 2013dfc5606dSYehuda Sadeh } 2014dfc5606dSYehuda Sadeh 2015dfc5606dSYehuda Sadeh /* 2016dfc5606dSYehuda Sadeh * compare the old list of snapshots that we have to what's in the header 2017dfc5606dSYehuda Sadeh * and update it accordingly. Note that the header holds the snapshots 2018dfc5606dSYehuda Sadeh * in a reverse order (from newest to oldest) and we need to go from 2019dfc5606dSYehuda Sadeh * older to new so that we don't get a duplicate snap name when 2020dfc5606dSYehuda Sadeh * doing the process (e.g., removed snapshot and recreated a new 2021dfc5606dSYehuda Sadeh * one with the same name. 2022dfc5606dSYehuda Sadeh */ 2023dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) 2024dfc5606dSYehuda Sadeh { 2025dfc5606dSYehuda Sadeh const char *name, *first_name; 2026dfc5606dSYehuda Sadeh int i = rbd_dev->header.total_snaps; 2027dfc5606dSYehuda Sadeh struct rbd_snap *snap, *old_snap = NULL; 2028dfc5606dSYehuda Sadeh int ret; 2029dfc5606dSYehuda Sadeh struct list_head *p, *n; 2030dfc5606dSYehuda Sadeh 2031dfc5606dSYehuda Sadeh first_name = rbd_dev->header.snap_names; 2032dfc5606dSYehuda Sadeh name = first_name + rbd_dev->header.snap_names_len; 2033dfc5606dSYehuda Sadeh 2034dfc5606dSYehuda Sadeh list_for_each_prev_safe(p, n, &rbd_dev->snaps) { 2035dfc5606dSYehuda Sadeh u64 cur_id; 2036dfc5606dSYehuda Sadeh 2037dfc5606dSYehuda Sadeh old_snap = list_entry(p, struct rbd_snap, node); 2038dfc5606dSYehuda Sadeh 2039dfc5606dSYehuda Sadeh if (i) 2040dfc5606dSYehuda Sadeh cur_id = rbd_dev->header.snapc->snaps[i - 1]; 2041dfc5606dSYehuda Sadeh 2042dfc5606dSYehuda Sadeh if (!i || old_snap->id < cur_id) { 2043dfc5606dSYehuda Sadeh /* old_snap->id was skipped, thus was removed */ 2044dfc5606dSYehuda Sadeh __rbd_remove_snap_dev(rbd_dev, old_snap); 2045dfc5606dSYehuda Sadeh continue; 2046dfc5606dSYehuda Sadeh } 2047dfc5606dSYehuda Sadeh if (old_snap->id == cur_id) { 2048dfc5606dSYehuda Sadeh /* we have this snapshot already */ 2049dfc5606dSYehuda Sadeh i--; 2050dfc5606dSYehuda Sadeh name = rbd_prev_snap_name(name, first_name); 2051dfc5606dSYehuda Sadeh continue; 2052dfc5606dSYehuda Sadeh } 2053dfc5606dSYehuda Sadeh for (; i > 0; 2054dfc5606dSYehuda Sadeh i--, name = rbd_prev_snap_name(name, first_name)) { 2055dfc5606dSYehuda Sadeh if (!name) { 2056dfc5606dSYehuda Sadeh WARN_ON(1); 2057dfc5606dSYehuda Sadeh return -EINVAL; 2058dfc5606dSYehuda Sadeh } 2059dfc5606dSYehuda Sadeh cur_id = rbd_dev->header.snapc->snaps[i]; 2060dfc5606dSYehuda Sadeh /* snapshot removal? handle it above */ 2061dfc5606dSYehuda Sadeh if (cur_id >= old_snap->id) 2062dfc5606dSYehuda Sadeh break; 2063dfc5606dSYehuda Sadeh /* a new snapshot */ 2064dfc5606dSYehuda Sadeh ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap); 2065dfc5606dSYehuda Sadeh if (ret < 0) 2066dfc5606dSYehuda Sadeh return ret; 2067dfc5606dSYehuda Sadeh 2068dfc5606dSYehuda Sadeh /* note that we add it backward so using n and not p */ 2069dfc5606dSYehuda Sadeh list_add(&snap->node, n); 2070dfc5606dSYehuda Sadeh p = &snap->node; 2071dfc5606dSYehuda Sadeh } 2072dfc5606dSYehuda Sadeh } 2073dfc5606dSYehuda Sadeh /* we're done going over the old snap list, just add what's left */ 2074dfc5606dSYehuda Sadeh for (; i > 0; i--) { 2075dfc5606dSYehuda Sadeh name = rbd_prev_snap_name(name, first_name); 2076dfc5606dSYehuda Sadeh if (!name) { 2077dfc5606dSYehuda Sadeh WARN_ON(1); 2078dfc5606dSYehuda Sadeh return -EINVAL; 2079dfc5606dSYehuda Sadeh } 2080dfc5606dSYehuda Sadeh ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap); 2081dfc5606dSYehuda Sadeh if (ret < 0) 2082dfc5606dSYehuda Sadeh return ret; 2083dfc5606dSYehuda Sadeh list_add(&snap->node, &rbd_dev->snaps); 2084dfc5606dSYehuda Sadeh } 2085dfc5606dSYehuda Sadeh 2086dfc5606dSYehuda Sadeh return 0; 2087dfc5606dSYehuda Sadeh } 2088dfc5606dSYehuda Sadeh 2089dfc5606dSYehuda Sadeh 2090dfc5606dSYehuda Sadeh static void rbd_root_dev_release(struct device *dev) 2091dfc5606dSYehuda Sadeh { 2092dfc5606dSYehuda Sadeh } 2093dfc5606dSYehuda Sadeh 2094dfc5606dSYehuda Sadeh static struct device rbd_root_dev = { 2095dfc5606dSYehuda Sadeh .init_name = "rbd", 2096dfc5606dSYehuda Sadeh .release = rbd_root_dev_release, 2097dfc5606dSYehuda Sadeh }; 2098dfc5606dSYehuda Sadeh 2099dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2100dfc5606dSYehuda Sadeh { 2101dfc5606dSYehuda Sadeh int ret = -ENOMEM; 2102dfc5606dSYehuda Sadeh struct device *dev; 2103dfc5606dSYehuda Sadeh struct rbd_snap *snap; 2104dfc5606dSYehuda Sadeh 2105dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2106dfc5606dSYehuda Sadeh dev = &rbd_dev->dev; 2107dfc5606dSYehuda Sadeh 2108dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 2109dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 2110dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 2111dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 2112dfc5606dSYehuda Sadeh dev_set_name(dev, "%d", rbd_dev->id); 2113dfc5606dSYehuda Sadeh ret = device_register(dev); 2114dfc5606dSYehuda Sadeh if (ret < 0) 2115dfc5606dSYehuda Sadeh goto done_free; 2116dfc5606dSYehuda Sadeh 2117dfc5606dSYehuda Sadeh list_for_each_entry(snap, &rbd_dev->snaps, node) { 2118dfc5606dSYehuda Sadeh ret = rbd_register_snap_dev(rbd_dev, snap, 2119dfc5606dSYehuda Sadeh &rbd_dev->dev); 2120dfc5606dSYehuda Sadeh if (ret < 0) 2121602adf40SYehuda Sadeh break; 2122602adf40SYehuda Sadeh } 2123602adf40SYehuda Sadeh 2124602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2125dfc5606dSYehuda Sadeh return 0; 2126dfc5606dSYehuda Sadeh done_free: 2127dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 2128dfc5606dSYehuda Sadeh return ret; 2129602adf40SYehuda Sadeh } 2130602adf40SYehuda Sadeh 2131dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 2132dfc5606dSYehuda Sadeh { 2133dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 2134dfc5606dSYehuda Sadeh } 2135dfc5606dSYehuda Sadeh 213659c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev) 213759c2be1eSYehuda Sadeh { 213859c2be1eSYehuda Sadeh int ret, rc; 213959c2be1eSYehuda Sadeh 214059c2be1eSYehuda Sadeh do { 214159c2be1eSYehuda Sadeh ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name, 214259c2be1eSYehuda Sadeh rbd_dev->header.obj_version); 214359c2be1eSYehuda Sadeh if (ret == -ERANGE) { 214459c2be1eSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 214559c2be1eSYehuda Sadeh rc = __rbd_update_snaps(rbd_dev); 214659c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 214759c2be1eSYehuda Sadeh if (rc < 0) 214859c2be1eSYehuda Sadeh return rc; 214959c2be1eSYehuda Sadeh } 215059c2be1eSYehuda Sadeh } while (ret == -ERANGE); 215159c2be1eSYehuda Sadeh 215259c2be1eSYehuda Sadeh return ret; 215359c2be1eSYehuda Sadeh } 215459c2be1eSYehuda Sadeh 215559c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 215659c2be1eSYehuda Sadeh const char *buf, 215759c2be1eSYehuda Sadeh size_t count) 2158602adf40SYehuda Sadeh { 2159602adf40SYehuda Sadeh struct ceph_osd_client *osdc; 2160602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 2161602adf40SYehuda Sadeh ssize_t rc = -ENOMEM; 2162602adf40SYehuda Sadeh int irc, new_id = 0; 2163602adf40SYehuda Sadeh struct list_head *tmp; 2164602adf40SYehuda Sadeh char *mon_dev_name; 2165602adf40SYehuda Sadeh char *options; 2166602adf40SYehuda Sadeh 2167602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 2168602adf40SYehuda Sadeh return -ENODEV; 2169602adf40SYehuda Sadeh 2170602adf40SYehuda Sadeh mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); 2171602adf40SYehuda Sadeh if (!mon_dev_name) 2172602adf40SYehuda Sadeh goto err_out_mod; 2173602adf40SYehuda Sadeh 2174602adf40SYehuda Sadeh options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); 2175602adf40SYehuda Sadeh if (!options) 2176602adf40SYehuda Sadeh goto err_mon_dev; 2177602adf40SYehuda Sadeh 2178602adf40SYehuda Sadeh /* new rbd_device object */ 2179602adf40SYehuda Sadeh rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 2180602adf40SYehuda Sadeh if (!rbd_dev) 2181602adf40SYehuda Sadeh goto err_out_opt; 2182602adf40SYehuda Sadeh 2183602adf40SYehuda Sadeh /* static rbd_device initialization */ 2184602adf40SYehuda Sadeh spin_lock_init(&rbd_dev->lock); 2185602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->node); 2186dfc5606dSYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->snaps); 2187602adf40SYehuda Sadeh 21880e805a1dSAlex Elder init_rwsem(&rbd_dev->header.snap_rwsem); 21890e805a1dSAlex Elder 2190602adf40SYehuda Sadeh /* generate unique id: find highest unique id, add one */ 2191602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2192602adf40SYehuda Sadeh 2193602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 2194602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 2195602adf40SYehuda Sadeh 2196602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 2197602adf40SYehuda Sadeh if (rbd_dev->id >= new_id) 2198602adf40SYehuda Sadeh new_id = rbd_dev->id + 1; 2199602adf40SYehuda Sadeh } 2200602adf40SYehuda Sadeh 2201602adf40SYehuda Sadeh rbd_dev->id = new_id; 2202602adf40SYehuda Sadeh 2203602adf40SYehuda Sadeh /* add to global list */ 2204602adf40SYehuda Sadeh list_add_tail(&rbd_dev->node, &rbd_dev_list); 2205602adf40SYehuda Sadeh 2206602adf40SYehuda Sadeh /* parse add command */ 2207602adf40SYehuda Sadeh if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " 2208602adf40SYehuda Sadeh "%" __stringify(RBD_MAX_OPT_LEN) "s " 2209602adf40SYehuda Sadeh "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " 2210602adf40SYehuda Sadeh "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" 2211602adf40SYehuda Sadeh "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", 2212602adf40SYehuda Sadeh mon_dev_name, options, rbd_dev->pool_name, 2213602adf40SYehuda Sadeh rbd_dev->obj, rbd_dev->snap_name) < 4) { 2214602adf40SYehuda Sadeh rc = -EINVAL; 2215602adf40SYehuda Sadeh goto err_out_slot; 2216602adf40SYehuda Sadeh } 2217602adf40SYehuda Sadeh 2218602adf40SYehuda Sadeh if (rbd_dev->snap_name[0] == 0) 2219602adf40SYehuda Sadeh rbd_dev->snap_name[0] = '-'; 2220602adf40SYehuda Sadeh 2221602adf40SYehuda Sadeh rbd_dev->obj_len = strlen(rbd_dev->obj); 2222602adf40SYehuda Sadeh snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", 2223602adf40SYehuda Sadeh rbd_dev->obj, RBD_SUFFIX); 2224602adf40SYehuda Sadeh 2225602adf40SYehuda Sadeh /* initialize rest of new object */ 2226602adf40SYehuda Sadeh snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); 2227602adf40SYehuda Sadeh rc = rbd_get_client(rbd_dev, mon_dev_name, options); 2228602adf40SYehuda Sadeh if (rc < 0) 2229602adf40SYehuda Sadeh goto err_out_slot; 2230602adf40SYehuda Sadeh 2231602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2232602adf40SYehuda Sadeh 2233602adf40SYehuda Sadeh /* pick the pool */ 2234602adf40SYehuda Sadeh osdc = &rbd_dev->client->osdc; 2235602adf40SYehuda Sadeh rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); 2236602adf40SYehuda Sadeh if (rc < 0) 2237602adf40SYehuda Sadeh goto err_out_client; 2238602adf40SYehuda Sadeh rbd_dev->poolid = rc; 2239602adf40SYehuda Sadeh 2240602adf40SYehuda Sadeh /* register our block device */ 2241602adf40SYehuda Sadeh irc = register_blkdev(0, rbd_dev->name); 2242602adf40SYehuda Sadeh if (irc < 0) { 2243602adf40SYehuda Sadeh rc = irc; 2244602adf40SYehuda Sadeh goto err_out_client; 2245602adf40SYehuda Sadeh } 2246602adf40SYehuda Sadeh rbd_dev->major = irc; 2247602adf40SYehuda Sadeh 2248dfc5606dSYehuda Sadeh rc = rbd_bus_add_dev(rbd_dev); 2249dfc5606dSYehuda Sadeh if (rc) 2250766fc439SYehuda Sadeh goto err_out_blkdev; 2251766fc439SYehuda Sadeh 2252602adf40SYehuda Sadeh /* set up and announce blkdev mapping */ 2253602adf40SYehuda Sadeh rc = rbd_init_disk(rbd_dev); 2254602adf40SYehuda Sadeh if (rc) 2255766fc439SYehuda Sadeh goto err_out_bus; 2256602adf40SYehuda Sadeh 225759c2be1eSYehuda Sadeh rc = rbd_init_watch_dev(rbd_dev); 225859c2be1eSYehuda Sadeh if (rc) 225959c2be1eSYehuda Sadeh goto err_out_bus; 226059c2be1eSYehuda Sadeh 2261602adf40SYehuda Sadeh return count; 2262602adf40SYehuda Sadeh 2263766fc439SYehuda Sadeh err_out_bus: 2264766fc439SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2265766fc439SYehuda Sadeh list_del_init(&rbd_dev->node); 2266766fc439SYehuda Sadeh mutex_unlock(&ctl_mutex); 2267766fc439SYehuda Sadeh 2268766fc439SYehuda Sadeh /* this will also clean up rest of rbd_dev stuff */ 2269766fc439SYehuda Sadeh 2270766fc439SYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2271766fc439SYehuda Sadeh kfree(options); 2272766fc439SYehuda Sadeh kfree(mon_dev_name); 2273766fc439SYehuda Sadeh return rc; 2274766fc439SYehuda Sadeh 2275602adf40SYehuda Sadeh err_out_blkdev: 2276602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 2277602adf40SYehuda Sadeh err_out_client: 2278602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2279602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2280602adf40SYehuda Sadeh err_out_slot: 2281602adf40SYehuda Sadeh list_del_init(&rbd_dev->node); 2282602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2283602adf40SYehuda Sadeh 2284602adf40SYehuda Sadeh kfree(rbd_dev); 2285602adf40SYehuda Sadeh err_out_opt: 2286602adf40SYehuda Sadeh kfree(options); 2287602adf40SYehuda Sadeh err_mon_dev: 2288602adf40SYehuda Sadeh kfree(mon_dev_name); 2289602adf40SYehuda Sadeh err_out_mod: 2290602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 2291602adf40SYehuda Sadeh module_put(THIS_MODULE); 2292602adf40SYehuda Sadeh return rc; 2293602adf40SYehuda Sadeh } 2294602adf40SYehuda Sadeh 2295602adf40SYehuda Sadeh static struct rbd_device *__rbd_get_dev(unsigned long id) 2296602adf40SYehuda Sadeh { 2297602adf40SYehuda Sadeh struct list_head *tmp; 2298602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 2299602adf40SYehuda Sadeh 2300602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 2301602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 2302602adf40SYehuda Sadeh if (rbd_dev->id == id) 2303602adf40SYehuda Sadeh return rbd_dev; 2304602adf40SYehuda Sadeh } 2305602adf40SYehuda Sadeh return NULL; 2306602adf40SYehuda Sadeh } 2307602adf40SYehuda Sadeh 2308dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 2309602adf40SYehuda Sadeh { 2310dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = 2311dfc5606dSYehuda Sadeh container_of(dev, struct rbd_device, dev); 2312602adf40SYehuda Sadeh 231359c2be1eSYehuda Sadeh if (rbd_dev->watch_request) 231459c2be1eSYehuda Sadeh ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc, 231559c2be1eSYehuda Sadeh rbd_dev->watch_request); 231659c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 231779e3057cSYehuda Sadeh rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); 231859c2be1eSYehuda Sadeh 2319602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2320602adf40SYehuda Sadeh 2321602adf40SYehuda Sadeh /* clean up and free blkdev */ 2322602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 2323602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 2324602adf40SYehuda Sadeh kfree(rbd_dev); 2325602adf40SYehuda Sadeh 2326602adf40SYehuda Sadeh /* release module ref */ 2327602adf40SYehuda Sadeh module_put(THIS_MODULE); 2328602adf40SYehuda Sadeh } 2329602adf40SYehuda Sadeh 2330dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 2331602adf40SYehuda Sadeh const char *buf, 2332602adf40SYehuda Sadeh size_t count) 2333602adf40SYehuda Sadeh { 2334602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 2335602adf40SYehuda Sadeh int target_id, rc; 2336602adf40SYehuda Sadeh unsigned long ul; 2337602adf40SYehuda Sadeh int ret = count; 2338602adf40SYehuda Sadeh 2339602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 2340602adf40SYehuda Sadeh if (rc) 2341602adf40SYehuda Sadeh return rc; 2342602adf40SYehuda Sadeh 2343602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 2344602adf40SYehuda Sadeh target_id = (int) ul; 2345602adf40SYehuda Sadeh if (target_id != ul) 2346602adf40SYehuda Sadeh return -EINVAL; 2347602adf40SYehuda Sadeh 2348602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2349602adf40SYehuda Sadeh 2350602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 2351602adf40SYehuda Sadeh if (!rbd_dev) { 2352602adf40SYehuda Sadeh ret = -ENOENT; 2353602adf40SYehuda Sadeh goto done; 2354602adf40SYehuda Sadeh } 2355602adf40SYehuda Sadeh 2356dfc5606dSYehuda Sadeh list_del_init(&rbd_dev->node); 2357dfc5606dSYehuda Sadeh 2358dfc5606dSYehuda Sadeh __rbd_remove_all_snaps(rbd_dev); 2359dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2360602adf40SYehuda Sadeh 2361602adf40SYehuda Sadeh done: 2362602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2363602adf40SYehuda Sadeh return ret; 2364602adf40SYehuda Sadeh } 2365602adf40SYehuda Sadeh 2366dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 2367dfc5606dSYehuda Sadeh struct device_attribute *attr, 2368602adf40SYehuda Sadeh const char *buf, 2369602adf40SYehuda Sadeh size_t count) 2370602adf40SYehuda Sadeh { 2371dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = dev_to_rbd(dev); 2372dfc5606dSYehuda Sadeh int ret; 2373dfc5606dSYehuda Sadeh char *name = kmalloc(count + 1, GFP_KERNEL); 2374602adf40SYehuda Sadeh if (!name) 2375602adf40SYehuda Sadeh return -ENOMEM; 2376602adf40SYehuda Sadeh 2377dfc5606dSYehuda Sadeh snprintf(name, count, "%s", buf); 2378602adf40SYehuda Sadeh 2379602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2380602adf40SYehuda Sadeh 2381602adf40SYehuda Sadeh ret = rbd_header_add_snap(rbd_dev, 2382602adf40SYehuda Sadeh name, GFP_KERNEL); 2383602adf40SYehuda Sadeh if (ret < 0) 238459c2be1eSYehuda Sadeh goto err_unlock; 2385602adf40SYehuda Sadeh 2386dfc5606dSYehuda Sadeh ret = __rbd_update_snaps(rbd_dev); 2387602adf40SYehuda Sadeh if (ret < 0) 238859c2be1eSYehuda Sadeh goto err_unlock; 238959c2be1eSYehuda Sadeh 239059c2be1eSYehuda Sadeh /* shouldn't hold ctl_mutex when notifying.. notify might 239159c2be1eSYehuda Sadeh trigger a watch callback that would need to get that mutex */ 239259c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 239359c2be1eSYehuda Sadeh 239459c2be1eSYehuda Sadeh /* make a best effort, don't error if failed */ 239559c2be1eSYehuda Sadeh rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name); 2396602adf40SYehuda Sadeh 2397602adf40SYehuda Sadeh ret = count; 239859c2be1eSYehuda Sadeh kfree(name); 239959c2be1eSYehuda Sadeh return ret; 240059c2be1eSYehuda Sadeh 240159c2be1eSYehuda Sadeh err_unlock: 2402602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2403602adf40SYehuda Sadeh kfree(name); 2404602adf40SYehuda Sadeh return ret; 2405602adf40SYehuda Sadeh } 2406602adf40SYehuda Sadeh 2407dfc5606dSYehuda Sadeh static struct bus_attribute rbd_bus_attrs[] = { 2408dfc5606dSYehuda Sadeh __ATTR(add, S_IWUSR, NULL, rbd_add), 2409dfc5606dSYehuda Sadeh __ATTR(remove, S_IWUSR, NULL, rbd_remove), 2410602adf40SYehuda Sadeh __ATTR_NULL 2411602adf40SYehuda Sadeh }; 2412602adf40SYehuda Sadeh 2413602adf40SYehuda Sadeh /* 2414602adf40SYehuda Sadeh * create control files in sysfs 2415dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 2416602adf40SYehuda Sadeh */ 2417602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 2418602adf40SYehuda Sadeh { 2419dfc5606dSYehuda Sadeh int ret; 2420602adf40SYehuda Sadeh 2421dfc5606dSYehuda Sadeh rbd_bus_type.bus_attrs = rbd_bus_attrs; 2422602adf40SYehuda Sadeh 2423dfc5606dSYehuda Sadeh ret = bus_register(&rbd_bus_type); 2424dfc5606dSYehuda Sadeh if (ret < 0) 2425dfc5606dSYehuda Sadeh return ret; 2426602adf40SYehuda Sadeh 2427dfc5606dSYehuda Sadeh ret = device_register(&rbd_root_dev); 2428602adf40SYehuda Sadeh 2429602adf40SYehuda Sadeh return ret; 2430602adf40SYehuda Sadeh } 2431602adf40SYehuda Sadeh 2432602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 2433602adf40SYehuda Sadeh { 2434dfc5606dSYehuda Sadeh device_unregister(&rbd_root_dev); 2435dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 2436602adf40SYehuda Sadeh } 2437602adf40SYehuda Sadeh 2438602adf40SYehuda Sadeh int __init rbd_init(void) 2439602adf40SYehuda Sadeh { 2440602adf40SYehuda Sadeh int rc; 2441602adf40SYehuda Sadeh 2442602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 2443602adf40SYehuda Sadeh if (rc) 2444602adf40SYehuda Sadeh return rc; 2445602adf40SYehuda Sadeh pr_info("loaded " DRV_NAME_LONG "\n"); 2446602adf40SYehuda Sadeh return 0; 2447602adf40SYehuda Sadeh } 2448602adf40SYehuda Sadeh 2449602adf40SYehuda Sadeh void __exit rbd_exit(void) 2450602adf40SYehuda Sadeh { 2451602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 2452602adf40SYehuda Sadeh } 2453602adf40SYehuda Sadeh 2454602adf40SYehuda Sadeh module_init(rbd_init); 2455602adf40SYehuda Sadeh module_exit(rbd_exit); 2456602adf40SYehuda Sadeh 2457602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 2458602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 2459602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 2460602adf40SYehuda Sadeh 2461602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 2462602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 2463602adf40SYehuda Sadeh 2464602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 2465