1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44aafb230eSAlex Elder #define RBD_DEBUG /* Activate rbd_assert() calls */ 45aafb230eSAlex Elder 46593a9e7bSAlex Elder /* 47593a9e7bSAlex Elder * The basic unit of block I/O is a sector. It is interpreted in a 48593a9e7bSAlex Elder * number of contexts in Linux (blk, bio, genhd), but the default is 49593a9e7bSAlex Elder * universally 512 bytes. These symbols are just slightly more 50593a9e7bSAlex Elder * meaningful than the bare numbers they represent. 51593a9e7bSAlex Elder */ 52593a9e7bSAlex Elder #define SECTOR_SHIFT 9 53593a9e7bSAlex Elder #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 54593a9e7bSAlex Elder 55df111be6SAlex Elder /* It might be useful to have this defined elsewhere too */ 56df111be6SAlex Elder 57df111be6SAlex Elder #define U64_MAX ((u64) (~0ULL)) 58df111be6SAlex Elder 59f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd" 60f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)" 61602adf40SYehuda Sadeh 62602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 63602adf40SYehuda Sadeh 64d4b125e9SAlex Elder #define RBD_SNAP_DEV_NAME_PREFIX "snap_" 65d4b125e9SAlex Elder #define RBD_MAX_SNAP_NAME_LEN \ 66d4b125e9SAlex Elder (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) 67d4b125e9SAlex Elder 6835d489f9SAlex Elder #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ 69602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN 1024 70602adf40SYehuda Sadeh 71602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 72602adf40SYehuda Sadeh 73589d30e0SAlex Elder #define RBD_IMAGE_ID_LEN_MAX 64 741e130199SAlex Elder #define RBD_OBJ_PREFIX_LEN_MAX 64 75589d30e0SAlex Elder 76d889140cSAlex Elder /* Feature bits */ 77d889140cSAlex Elder 78d889140cSAlex Elder #define RBD_FEATURE_LAYERING 1 79d889140cSAlex Elder 80d889140cSAlex Elder /* Features supported by this (client software) implementation. */ 81d889140cSAlex Elder 82d889140cSAlex Elder #define RBD_FEATURES_ALL (0) 83d889140cSAlex Elder 8481a89793SAlex Elder /* 8581a89793SAlex Elder * An RBD device name will be "rbd#", where the "rbd" comes from 8681a89793SAlex Elder * RBD_DRV_NAME above, and # is a unique integer identifier. 8781a89793SAlex Elder * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big 8881a89793SAlex Elder * enough to hold all possible device names. 8981a89793SAlex Elder */ 90602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 9181a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 92602adf40SYehuda Sadeh 93cc0538b6SAlex Elder #define RBD_READ_ONLY_DEFAULT false 9459c2be1eSYehuda Sadeh 95602adf40SYehuda Sadeh /* 96602adf40SYehuda Sadeh * block device image metadata (in-memory version) 97602adf40SYehuda Sadeh */ 98602adf40SYehuda Sadeh struct rbd_image_header { 99f84344f3SAlex Elder /* These four fields never change for a given rbd image */ 100849b4260SAlex Elder char *object_prefix; 10134b13184SAlex Elder u64 features; 102602adf40SYehuda Sadeh __u8 obj_order; 103602adf40SYehuda Sadeh __u8 crypt_type; 104602adf40SYehuda Sadeh __u8 comp_type; 105602adf40SYehuda Sadeh 106f84344f3SAlex Elder /* The remaining fields need to be updated occasionally */ 107f84344f3SAlex Elder u64 image_size; 108f84344f3SAlex Elder struct ceph_snap_context *snapc; 109602adf40SYehuda Sadeh char *snap_names; 110602adf40SYehuda Sadeh u64 *snap_sizes; 11159c2be1eSYehuda Sadeh 11259c2be1eSYehuda Sadeh u64 obj_version; 11359c2be1eSYehuda Sadeh }; 11459c2be1eSYehuda Sadeh 1150d7dbfceSAlex Elder /* 1160d7dbfceSAlex Elder * An rbd image specification. 1170d7dbfceSAlex Elder * 1180d7dbfceSAlex Elder * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely 1190d7dbfceSAlex Elder * identify an image. 1200d7dbfceSAlex Elder */ 1210d7dbfceSAlex Elder struct rbd_spec { 1220d7dbfceSAlex Elder u64 pool_id; 1230d7dbfceSAlex Elder char *pool_name; 1240d7dbfceSAlex Elder 1250d7dbfceSAlex Elder char *image_id; 1260d7dbfceSAlex Elder size_t image_id_len; 1270d7dbfceSAlex Elder char *image_name; 1280d7dbfceSAlex Elder size_t image_name_len; 1290d7dbfceSAlex Elder 1300d7dbfceSAlex Elder u64 snap_id; 1310d7dbfceSAlex Elder char *snap_name; 1320d7dbfceSAlex Elder 1330d7dbfceSAlex Elder struct kref kref; 1340d7dbfceSAlex Elder }; 1350d7dbfceSAlex Elder 13659c2be1eSYehuda Sadeh struct rbd_options { 137cc0538b6SAlex Elder bool read_only; 138602adf40SYehuda Sadeh }; 139602adf40SYehuda Sadeh 140602adf40SYehuda Sadeh /* 141f0f8cef5SAlex Elder * an instance of the client. multiple devices may share an rbd client. 142602adf40SYehuda Sadeh */ 143602adf40SYehuda Sadeh struct rbd_client { 144602adf40SYehuda Sadeh struct ceph_client *client; 145602adf40SYehuda Sadeh struct kref kref; 146602adf40SYehuda Sadeh struct list_head node; 147602adf40SYehuda Sadeh }; 148602adf40SYehuda Sadeh 149602adf40SYehuda Sadeh /* 150f0f8cef5SAlex Elder * a request completion status 151602adf40SYehuda Sadeh */ 1521fec7093SYehuda Sadeh struct rbd_req_status { 1531fec7093SYehuda Sadeh int done; 1541fec7093SYehuda Sadeh int rc; 1551fec7093SYehuda Sadeh u64 bytes; 1561fec7093SYehuda Sadeh }; 1571fec7093SYehuda Sadeh 1581fec7093SYehuda Sadeh /* 1591fec7093SYehuda Sadeh * a collection of requests 1601fec7093SYehuda Sadeh */ 1611fec7093SYehuda Sadeh struct rbd_req_coll { 1621fec7093SYehuda Sadeh int total; 1631fec7093SYehuda Sadeh int num_done; 1641fec7093SYehuda Sadeh struct kref kref; 1651fec7093SYehuda Sadeh struct rbd_req_status status[0]; 166602adf40SYehuda Sadeh }; 167602adf40SYehuda Sadeh 168f0f8cef5SAlex Elder /* 169f0f8cef5SAlex Elder * a single io request 170f0f8cef5SAlex Elder */ 171f0f8cef5SAlex Elder struct rbd_request { 172f0f8cef5SAlex Elder struct request *rq; /* blk layer request */ 173f0f8cef5SAlex Elder struct bio *bio; /* cloned bio */ 174f0f8cef5SAlex Elder struct page **pages; /* list of used pages */ 175f0f8cef5SAlex Elder u64 len; 176f0f8cef5SAlex Elder int coll_index; 177f0f8cef5SAlex Elder struct rbd_req_coll *coll; 178f0f8cef5SAlex Elder }; 179f0f8cef5SAlex Elder 180dfc5606dSYehuda Sadeh struct rbd_snap { 181dfc5606dSYehuda Sadeh struct device dev; 182dfc5606dSYehuda Sadeh const char *name; 1833591538fSJosh Durgin u64 size; 184dfc5606dSYehuda Sadeh struct list_head node; 185dfc5606dSYehuda Sadeh u64 id; 18634b13184SAlex Elder u64 features; 187dfc5606dSYehuda Sadeh }; 188dfc5606dSYehuda Sadeh 189f84344f3SAlex Elder struct rbd_mapping { 19099c1f08fSAlex Elder u64 size; 19134b13184SAlex Elder u64 features; 192f84344f3SAlex Elder bool read_only; 193f84344f3SAlex Elder }; 194f84344f3SAlex Elder 195602adf40SYehuda Sadeh /* 196602adf40SYehuda Sadeh * a single device 197602adf40SYehuda Sadeh */ 198602adf40SYehuda Sadeh struct rbd_device { 199de71a297SAlex Elder int dev_id; /* blkdev unique id */ 200602adf40SYehuda Sadeh 201602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 202602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 203602adf40SYehuda Sadeh 204a30b71b9SAlex Elder u32 image_format; /* Either 1 or 2 */ 205602adf40SYehuda Sadeh struct rbd_client *rbd_client; 206602adf40SYehuda Sadeh 207602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 208602adf40SYehuda Sadeh 209602adf40SYehuda Sadeh spinlock_t lock; /* queue lock */ 210602adf40SYehuda Sadeh 211602adf40SYehuda Sadeh struct rbd_image_header header; 212daba5fdbSAlex Elder bool exists; 2130d7dbfceSAlex Elder struct rbd_spec *spec; 214602adf40SYehuda Sadeh 2150d7dbfceSAlex Elder char *header_name; 216971f839aSAlex Elder 21759c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 21859c2be1eSYehuda Sadeh struct ceph_osd_request *watch_request; 21959c2be1eSYehuda Sadeh 220c666601aSJosh Durgin /* protects updating the header */ 221c666601aSJosh Durgin struct rw_semaphore header_rwsem; 222f84344f3SAlex Elder 223f84344f3SAlex Elder struct rbd_mapping mapping; 224602adf40SYehuda Sadeh 225602adf40SYehuda Sadeh struct list_head node; 226dfc5606dSYehuda Sadeh 227dfc5606dSYehuda Sadeh /* list of snapshots */ 228dfc5606dSYehuda Sadeh struct list_head snaps; 229dfc5606dSYehuda Sadeh 230dfc5606dSYehuda Sadeh /* sysfs related */ 231dfc5606dSYehuda Sadeh struct device dev; 232dfc5606dSYehuda Sadeh }; 233dfc5606dSYehuda Sadeh 234602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 235e124a82fSAlex Elder 236602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 237e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock); 238e124a82fSAlex Elder 239602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 240432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock); 241602adf40SYehuda Sadeh 242304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); 243304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); 244304f6808SAlex Elder 245dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 24641f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap); 247dfc5606dSYehuda Sadeh 248f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf, 249f0f8cef5SAlex Elder size_t count); 250f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 251f0f8cef5SAlex Elder size_t count); 252f0f8cef5SAlex Elder 253f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = { 254f0f8cef5SAlex Elder __ATTR(add, S_IWUSR, NULL, rbd_add), 255f0f8cef5SAlex Elder __ATTR(remove, S_IWUSR, NULL, rbd_remove), 256f0f8cef5SAlex Elder __ATTR_NULL 257f0f8cef5SAlex Elder }; 258f0f8cef5SAlex Elder 259f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = { 260f0f8cef5SAlex Elder .name = "rbd", 261f0f8cef5SAlex Elder .bus_attrs = rbd_bus_attrs, 262f0f8cef5SAlex Elder }; 263f0f8cef5SAlex Elder 264f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev) 265f0f8cef5SAlex Elder { 266f0f8cef5SAlex Elder } 267f0f8cef5SAlex Elder 268f0f8cef5SAlex Elder static struct device rbd_root_dev = { 269f0f8cef5SAlex Elder .init_name = "rbd", 270f0f8cef5SAlex Elder .release = rbd_root_dev_release, 271f0f8cef5SAlex Elder }; 272f0f8cef5SAlex Elder 273aafb230eSAlex Elder #ifdef RBD_DEBUG 274aafb230eSAlex Elder #define rbd_assert(expr) \ 275aafb230eSAlex Elder if (unlikely(!(expr))) { \ 276aafb230eSAlex Elder printk(KERN_ERR "\nAssertion failure in %s() " \ 277aafb230eSAlex Elder "at line %d:\n\n" \ 278aafb230eSAlex Elder "\trbd_assert(%s);\n\n", \ 279aafb230eSAlex Elder __func__, __LINE__, #expr); \ 280aafb230eSAlex Elder BUG(); \ 281aafb230eSAlex Elder } 282aafb230eSAlex Elder #else /* !RBD_DEBUG */ 283aafb230eSAlex Elder # define rbd_assert(expr) ((void) 0) 284aafb230eSAlex Elder #endif /* !RBD_DEBUG */ 285dfc5606dSYehuda Sadeh 286dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 287dfc5606dSYehuda Sadeh { 288dfc5606dSYehuda Sadeh return get_device(&rbd_dev->dev); 289dfc5606dSYehuda Sadeh } 290dfc5606dSYehuda Sadeh 291dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev) 292dfc5606dSYehuda Sadeh { 293dfc5606dSYehuda Sadeh put_device(&rbd_dev->dev); 294dfc5606dSYehuda Sadeh } 295602adf40SYehuda Sadeh 296117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); 297117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); 29859c2be1eSYehuda Sadeh 299602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 300602adf40SYehuda Sadeh { 301f0f8cef5SAlex Elder struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 302602adf40SYehuda Sadeh 303f84344f3SAlex Elder if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) 304602adf40SYehuda Sadeh return -EROFS; 305602adf40SYehuda Sadeh 306340c7a2bSAlex Elder rbd_get_dev(rbd_dev); 307f84344f3SAlex Elder set_device_ro(bdev, rbd_dev->mapping.read_only); 308340c7a2bSAlex Elder 309602adf40SYehuda Sadeh return 0; 310602adf40SYehuda Sadeh } 311602adf40SYehuda Sadeh 312dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 313dfc5606dSYehuda Sadeh { 314dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 315dfc5606dSYehuda Sadeh 316dfc5606dSYehuda Sadeh rbd_put_dev(rbd_dev); 317dfc5606dSYehuda Sadeh 318dfc5606dSYehuda Sadeh return 0; 319dfc5606dSYehuda Sadeh } 320dfc5606dSYehuda Sadeh 321602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 322602adf40SYehuda Sadeh .owner = THIS_MODULE, 323602adf40SYehuda Sadeh .open = rbd_open, 324dfc5606dSYehuda Sadeh .release = rbd_release, 325602adf40SYehuda Sadeh }; 326602adf40SYehuda Sadeh 327602adf40SYehuda Sadeh /* 328602adf40SYehuda Sadeh * Initialize an rbd client instance. 32943ae4701SAlex Elder * We own *ceph_opts. 330602adf40SYehuda Sadeh */ 331f8c38929SAlex Elder static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 332602adf40SYehuda Sadeh { 333602adf40SYehuda Sadeh struct rbd_client *rbdc; 334602adf40SYehuda Sadeh int ret = -ENOMEM; 335602adf40SYehuda Sadeh 336602adf40SYehuda Sadeh dout("rbd_client_create\n"); 337602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 338602adf40SYehuda Sadeh if (!rbdc) 339602adf40SYehuda Sadeh goto out_opt; 340602adf40SYehuda Sadeh 341602adf40SYehuda Sadeh kref_init(&rbdc->kref); 342602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 343602adf40SYehuda Sadeh 344bc534d86SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 345bc534d86SAlex Elder 34643ae4701SAlex Elder rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); 347602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 348bc534d86SAlex Elder goto out_mutex; 34943ae4701SAlex Elder ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ 350602adf40SYehuda Sadeh 351602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 352602adf40SYehuda Sadeh if (ret < 0) 353602adf40SYehuda Sadeh goto out_err; 354602adf40SYehuda Sadeh 355432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 356602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 357432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 358602adf40SYehuda Sadeh 359bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 360bc534d86SAlex Elder 361602adf40SYehuda Sadeh dout("rbd_client_create created %p\n", rbdc); 362602adf40SYehuda Sadeh return rbdc; 363602adf40SYehuda Sadeh 364602adf40SYehuda Sadeh out_err: 365602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 366bc534d86SAlex Elder out_mutex: 367bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 368602adf40SYehuda Sadeh kfree(rbdc); 369602adf40SYehuda Sadeh out_opt: 37043ae4701SAlex Elder if (ceph_opts) 37143ae4701SAlex Elder ceph_destroy_options(ceph_opts); 37228f259b7SVasiliy Kulikov return ERR_PTR(ret); 373602adf40SYehuda Sadeh } 374602adf40SYehuda Sadeh 375602adf40SYehuda Sadeh /* 3761f7ba331SAlex Elder * Find a ceph client with specific addr and configuration. If 3771f7ba331SAlex Elder * found, bump its reference count. 378602adf40SYehuda Sadeh */ 3791f7ba331SAlex Elder static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) 380602adf40SYehuda Sadeh { 381602adf40SYehuda Sadeh struct rbd_client *client_node; 3821f7ba331SAlex Elder bool found = false; 383602adf40SYehuda Sadeh 38443ae4701SAlex Elder if (ceph_opts->flags & CEPH_OPT_NOSHARE) 385602adf40SYehuda Sadeh return NULL; 386602adf40SYehuda Sadeh 3871f7ba331SAlex Elder spin_lock(&rbd_client_list_lock); 3881f7ba331SAlex Elder list_for_each_entry(client_node, &rbd_client_list, node) { 3891f7ba331SAlex Elder if (!ceph_compare_options(ceph_opts, client_node->client)) { 3901f7ba331SAlex Elder kref_get(&client_node->kref); 3911f7ba331SAlex Elder found = true; 3921f7ba331SAlex Elder break; 3931f7ba331SAlex Elder } 3941f7ba331SAlex Elder } 3951f7ba331SAlex Elder spin_unlock(&rbd_client_list_lock); 3961f7ba331SAlex Elder 3971f7ba331SAlex Elder return found ? client_node : NULL; 398602adf40SYehuda Sadeh } 399602adf40SYehuda Sadeh 400602adf40SYehuda Sadeh /* 40159c2be1eSYehuda Sadeh * mount options 40259c2be1eSYehuda Sadeh */ 40359c2be1eSYehuda Sadeh enum { 40459c2be1eSYehuda Sadeh Opt_last_int, 40559c2be1eSYehuda Sadeh /* int args above */ 40659c2be1eSYehuda Sadeh Opt_last_string, 40759c2be1eSYehuda Sadeh /* string args above */ 408cc0538b6SAlex Elder Opt_read_only, 409cc0538b6SAlex Elder Opt_read_write, 410cc0538b6SAlex Elder /* Boolean args above */ 411cc0538b6SAlex Elder Opt_last_bool, 41259c2be1eSYehuda Sadeh }; 41359c2be1eSYehuda Sadeh 41443ae4701SAlex Elder static match_table_t rbd_opts_tokens = { 41559c2be1eSYehuda Sadeh /* int args above */ 41659c2be1eSYehuda Sadeh /* string args above */ 417be466c1cSAlex Elder {Opt_read_only, "read_only"}, 418cc0538b6SAlex Elder {Opt_read_only, "ro"}, /* Alternate spelling */ 419cc0538b6SAlex Elder {Opt_read_write, "read_write"}, 420cc0538b6SAlex Elder {Opt_read_write, "rw"}, /* Alternate spelling */ 421cc0538b6SAlex Elder /* Boolean args above */ 42259c2be1eSYehuda Sadeh {-1, NULL} 42359c2be1eSYehuda Sadeh }; 42459c2be1eSYehuda Sadeh 42559c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 42659c2be1eSYehuda Sadeh { 42743ae4701SAlex Elder struct rbd_options *rbd_opts = private; 42859c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 42959c2be1eSYehuda Sadeh int token, intval, ret; 43059c2be1eSYehuda Sadeh 43143ae4701SAlex Elder token = match_token(c, rbd_opts_tokens, argstr); 43259c2be1eSYehuda Sadeh if (token < 0) 43359c2be1eSYehuda Sadeh return -EINVAL; 43459c2be1eSYehuda Sadeh 43559c2be1eSYehuda Sadeh if (token < Opt_last_int) { 43659c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 43759c2be1eSYehuda Sadeh if (ret < 0) { 43859c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 43959c2be1eSYehuda Sadeh "at '%s'\n", c); 44059c2be1eSYehuda Sadeh return ret; 44159c2be1eSYehuda Sadeh } 44259c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 44359c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 44459c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 44559c2be1eSYehuda Sadeh argstr[0].from); 446cc0538b6SAlex Elder } else if (token > Opt_last_string && token < Opt_last_bool) { 447cc0538b6SAlex Elder dout("got Boolean token %d\n", token); 44859c2be1eSYehuda Sadeh } else { 44959c2be1eSYehuda Sadeh dout("got token %d\n", token); 45059c2be1eSYehuda Sadeh } 45159c2be1eSYehuda Sadeh 45259c2be1eSYehuda Sadeh switch (token) { 453cc0538b6SAlex Elder case Opt_read_only: 454cc0538b6SAlex Elder rbd_opts->read_only = true; 455cc0538b6SAlex Elder break; 456cc0538b6SAlex Elder case Opt_read_write: 457cc0538b6SAlex Elder rbd_opts->read_only = false; 458cc0538b6SAlex Elder break; 45959c2be1eSYehuda Sadeh default: 460aafb230eSAlex Elder rbd_assert(false); 461aafb230eSAlex Elder break; 46259c2be1eSYehuda Sadeh } 46359c2be1eSYehuda Sadeh return 0; 46459c2be1eSYehuda Sadeh } 46559c2be1eSYehuda Sadeh 46659c2be1eSYehuda Sadeh /* 467602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 468602adf40SYehuda Sadeh * not exist create it. 469602adf40SYehuda Sadeh */ 4709d3997fdSAlex Elder static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) 471602adf40SYehuda Sadeh { 472f8c38929SAlex Elder struct rbd_client *rbdc; 47359c2be1eSYehuda Sadeh 4741f7ba331SAlex Elder rbdc = rbd_client_find(ceph_opts); 4759d3997fdSAlex Elder if (rbdc) /* using an existing client */ 47643ae4701SAlex Elder ceph_destroy_options(ceph_opts); 4779d3997fdSAlex Elder else 478f8c38929SAlex Elder rbdc = rbd_client_create(ceph_opts); 479d720bcb0SAlex Elder 4809d3997fdSAlex Elder return rbdc; 481602adf40SYehuda Sadeh } 482602adf40SYehuda Sadeh 483602adf40SYehuda Sadeh /* 484602adf40SYehuda Sadeh * Destroy ceph client 485d23a4b3fSAlex Elder * 486432b8587SAlex Elder * Caller must hold rbd_client_list_lock. 487602adf40SYehuda Sadeh */ 488602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 489602adf40SYehuda Sadeh { 490602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 491602adf40SYehuda Sadeh 492602adf40SYehuda Sadeh dout("rbd_release_client %p\n", rbdc); 493cd9d9f5dSAlex Elder spin_lock(&rbd_client_list_lock); 494602adf40SYehuda Sadeh list_del(&rbdc->node); 495cd9d9f5dSAlex Elder spin_unlock(&rbd_client_list_lock); 496602adf40SYehuda Sadeh 497602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 498602adf40SYehuda Sadeh kfree(rbdc); 499602adf40SYehuda Sadeh } 500602adf40SYehuda Sadeh 501602adf40SYehuda Sadeh /* 502602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 503602adf40SYehuda Sadeh * it. 504602adf40SYehuda Sadeh */ 5059d3997fdSAlex Elder static void rbd_put_client(struct rbd_client *rbdc) 506602adf40SYehuda Sadeh { 5079d3997fdSAlex Elder kref_put(&rbdc->kref, rbd_client_release); 508602adf40SYehuda Sadeh } 509602adf40SYehuda Sadeh 5101fec7093SYehuda Sadeh /* 5111fec7093SYehuda Sadeh * Destroy requests collection 5121fec7093SYehuda Sadeh */ 5131fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref) 5141fec7093SYehuda Sadeh { 5151fec7093SYehuda Sadeh struct rbd_req_coll *coll = 5161fec7093SYehuda Sadeh container_of(kref, struct rbd_req_coll, kref); 5171fec7093SYehuda Sadeh 5181fec7093SYehuda Sadeh dout("rbd_coll_release %p\n", coll); 5191fec7093SYehuda Sadeh kfree(coll); 5201fec7093SYehuda Sadeh } 521602adf40SYehuda Sadeh 522a30b71b9SAlex Elder static bool rbd_image_format_valid(u32 image_format) 523a30b71b9SAlex Elder { 524a30b71b9SAlex Elder return image_format == 1 || image_format == 2; 525a30b71b9SAlex Elder } 526a30b71b9SAlex Elder 5278e94af8eSAlex Elder static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 5288e94af8eSAlex Elder { 529103a150fSAlex Elder size_t size; 530103a150fSAlex Elder u32 snap_count; 531103a150fSAlex Elder 532103a150fSAlex Elder /* The header has to start with the magic rbd header text */ 533103a150fSAlex Elder if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) 534103a150fSAlex Elder return false; 535103a150fSAlex Elder 536db2388b6SAlex Elder /* The bio layer requires at least sector-sized I/O */ 537db2388b6SAlex Elder 538db2388b6SAlex Elder if (ondisk->options.order < SECTOR_SHIFT) 539db2388b6SAlex Elder return false; 540db2388b6SAlex Elder 541db2388b6SAlex Elder /* If we use u64 in a few spots we may be able to loosen this */ 542db2388b6SAlex Elder 543db2388b6SAlex Elder if (ondisk->options.order > 8 * sizeof (int) - 1) 544db2388b6SAlex Elder return false; 545db2388b6SAlex Elder 546103a150fSAlex Elder /* 547103a150fSAlex Elder * The size of a snapshot header has to fit in a size_t, and 548103a150fSAlex Elder * that limits the number of snapshots. 549103a150fSAlex Elder */ 550103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 551103a150fSAlex Elder size = SIZE_MAX - sizeof (struct ceph_snap_context); 552103a150fSAlex Elder if (snap_count > size / sizeof (__le64)) 553103a150fSAlex Elder return false; 554103a150fSAlex Elder 555103a150fSAlex Elder /* 556103a150fSAlex Elder * Not only that, but the size of the entire the snapshot 557103a150fSAlex Elder * header must also be representable in a size_t. 558103a150fSAlex Elder */ 559103a150fSAlex Elder size -= snap_count * sizeof (__le64); 560103a150fSAlex Elder if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) 561103a150fSAlex Elder return false; 562103a150fSAlex Elder 563103a150fSAlex Elder return true; 5648e94af8eSAlex Elder } 5658e94af8eSAlex Elder 566602adf40SYehuda Sadeh /* 567602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 568602adf40SYehuda Sadeh * header. 569602adf40SYehuda Sadeh */ 570602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 5714156d998SAlex Elder struct rbd_image_header_ondisk *ondisk) 572602adf40SYehuda Sadeh { 573ccece235SAlex Elder u32 snap_count; 57458c17b0eSAlex Elder size_t len; 575d2bb24e5SAlex Elder size_t size; 576621901d6SAlex Elder u32 i; 577602adf40SYehuda Sadeh 5786a52325fSAlex Elder memset(header, 0, sizeof (*header)); 5796a52325fSAlex Elder 580103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 581103a150fSAlex Elder 58258c17b0eSAlex Elder len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); 58358c17b0eSAlex Elder header->object_prefix = kmalloc(len + 1, GFP_KERNEL); 5846a52325fSAlex Elder if (!header->object_prefix) 585602adf40SYehuda Sadeh return -ENOMEM; 58658c17b0eSAlex Elder memcpy(header->object_prefix, ondisk->object_prefix, len); 58758c17b0eSAlex Elder header->object_prefix[len] = '\0'; 58800f1f36fSAlex Elder 589602adf40SYehuda Sadeh if (snap_count) { 590f785cc1dSAlex Elder u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); 591f785cc1dSAlex Elder 592621901d6SAlex Elder /* Save a copy of the snapshot names */ 593621901d6SAlex Elder 594f785cc1dSAlex Elder if (snap_names_len > (u64) SIZE_MAX) 595f785cc1dSAlex Elder return -EIO; 596f785cc1dSAlex Elder header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); 597602adf40SYehuda Sadeh if (!header->snap_names) 5986a52325fSAlex Elder goto out_err; 599f785cc1dSAlex Elder /* 600f785cc1dSAlex Elder * Note that rbd_dev_v1_header_read() guarantees 601f785cc1dSAlex Elder * the ondisk buffer we're working with has 602f785cc1dSAlex Elder * snap_names_len bytes beyond the end of the 603f785cc1dSAlex Elder * snapshot id array, this memcpy() is safe. 604f785cc1dSAlex Elder */ 605f785cc1dSAlex Elder memcpy(header->snap_names, &ondisk->snaps[snap_count], 606f785cc1dSAlex Elder snap_names_len); 6076a52325fSAlex Elder 608621901d6SAlex Elder /* Record each snapshot's size */ 609621901d6SAlex Elder 610d2bb24e5SAlex Elder size = snap_count * sizeof (*header->snap_sizes); 611d2bb24e5SAlex Elder header->snap_sizes = kmalloc(size, GFP_KERNEL); 612602adf40SYehuda Sadeh if (!header->snap_sizes) 6136a52325fSAlex Elder goto out_err; 614621901d6SAlex Elder for (i = 0; i < snap_count; i++) 615621901d6SAlex Elder header->snap_sizes[i] = 616621901d6SAlex Elder le64_to_cpu(ondisk->snaps[i].image_size); 617602adf40SYehuda Sadeh } else { 618ccece235SAlex Elder WARN_ON(ondisk->snap_names_len); 619602adf40SYehuda Sadeh header->snap_names = NULL; 620602adf40SYehuda Sadeh header->snap_sizes = NULL; 621602adf40SYehuda Sadeh } 622849b4260SAlex Elder 62334b13184SAlex Elder header->features = 0; /* No features support in v1 images */ 624602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 625602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 626602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 6276a52325fSAlex Elder 628621901d6SAlex Elder /* Allocate and fill in the snapshot context */ 629621901d6SAlex Elder 630f84344f3SAlex Elder header->image_size = le64_to_cpu(ondisk->image_size); 6316a52325fSAlex Elder size = sizeof (struct ceph_snap_context); 6326a52325fSAlex Elder size += snap_count * sizeof (header->snapc->snaps[0]); 6336a52325fSAlex Elder header->snapc = kzalloc(size, GFP_KERNEL); 6346a52325fSAlex Elder if (!header->snapc) 6356a52325fSAlex Elder goto out_err; 636602adf40SYehuda Sadeh 637602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 638505cbb9bSAlex Elder header->snapc->seq = le64_to_cpu(ondisk->snap_seq); 639602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 640621901d6SAlex Elder for (i = 0; i < snap_count; i++) 641602adf40SYehuda Sadeh header->snapc->snaps[i] = 642602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 643602adf40SYehuda Sadeh 644602adf40SYehuda Sadeh return 0; 645602adf40SYehuda Sadeh 6466a52325fSAlex Elder out_err: 647849b4260SAlex Elder kfree(header->snap_sizes); 648ccece235SAlex Elder header->snap_sizes = NULL; 649602adf40SYehuda Sadeh kfree(header->snap_names); 650ccece235SAlex Elder header->snap_names = NULL; 6516a52325fSAlex Elder kfree(header->object_prefix); 6526a52325fSAlex Elder header->object_prefix = NULL; 653ccece235SAlex Elder 65400f1f36fSAlex Elder return -ENOMEM; 655602adf40SYehuda Sadeh } 656602adf40SYehuda Sadeh 6578836b995SAlex Elder static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) 658602adf40SYehuda Sadeh { 659602adf40SYehuda Sadeh 660e86924a8SAlex Elder struct rbd_snap *snap; 66100f1f36fSAlex Elder 662e86924a8SAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) { 663e86924a8SAlex Elder if (!strcmp(snap_name, snap->name)) { 6640d7dbfceSAlex Elder rbd_dev->spec->snap_id = snap->id; 665e86924a8SAlex Elder rbd_dev->mapping.size = snap->size; 66634b13184SAlex Elder rbd_dev->mapping.features = snap->features; 66700f1f36fSAlex Elder 668e86924a8SAlex Elder return 0; 669602adf40SYehuda Sadeh } 67000f1f36fSAlex Elder } 671e86924a8SAlex Elder 67200f1f36fSAlex Elder return -ENOENT; 67300f1f36fSAlex Elder } 674602adf40SYehuda Sadeh 675819d52bfSAlex Elder static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) 676602adf40SYehuda Sadeh { 67778dc447dSAlex Elder int ret; 678602adf40SYehuda Sadeh 6790d7dbfceSAlex Elder if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME, 680cc9d734cSJosh Durgin sizeof (RBD_SNAP_HEAD_NAME))) { 6810d7dbfceSAlex Elder rbd_dev->spec->snap_id = CEPH_NOSNAP; 68299c1f08fSAlex Elder rbd_dev->mapping.size = rbd_dev->header.image_size; 68334b13184SAlex Elder rbd_dev->mapping.features = rbd_dev->header.features; 684e86924a8SAlex Elder ret = 0; 685602adf40SYehuda Sadeh } else { 6860d7dbfceSAlex Elder ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name); 687602adf40SYehuda Sadeh if (ret < 0) 688602adf40SYehuda Sadeh goto done; 689f84344f3SAlex Elder rbd_dev->mapping.read_only = true; 690602adf40SYehuda Sadeh } 691daba5fdbSAlex Elder rbd_dev->exists = true; 692602adf40SYehuda Sadeh done: 693602adf40SYehuda Sadeh return ret; 694602adf40SYehuda Sadeh } 695602adf40SYehuda Sadeh 696602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 697602adf40SYehuda Sadeh { 698849b4260SAlex Elder kfree(header->object_prefix); 699d78fd7aeSAlex Elder header->object_prefix = NULL; 700602adf40SYehuda Sadeh kfree(header->snap_sizes); 701d78fd7aeSAlex Elder header->snap_sizes = NULL; 702849b4260SAlex Elder kfree(header->snap_names); 703d78fd7aeSAlex Elder header->snap_names = NULL; 704d1d25646SJosh Durgin ceph_put_snap_context(header->snapc); 705d78fd7aeSAlex Elder header->snapc = NULL; 706602adf40SYehuda Sadeh } 707602adf40SYehuda Sadeh 70865ccfe21SAlex Elder static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) 709602adf40SYehuda Sadeh { 71065ccfe21SAlex Elder char *name; 71165ccfe21SAlex Elder u64 segment; 71265ccfe21SAlex Elder int ret; 713602adf40SYehuda Sadeh 71465ccfe21SAlex Elder name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 71565ccfe21SAlex Elder if (!name) 71665ccfe21SAlex Elder return NULL; 71765ccfe21SAlex Elder segment = offset >> rbd_dev->header.obj_order; 71865ccfe21SAlex Elder ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx", 71965ccfe21SAlex Elder rbd_dev->header.object_prefix, segment); 72065ccfe21SAlex Elder if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) { 72165ccfe21SAlex Elder pr_err("error formatting segment name for #%llu (%d)\n", 72265ccfe21SAlex Elder segment, ret); 72365ccfe21SAlex Elder kfree(name); 72465ccfe21SAlex Elder name = NULL; 72565ccfe21SAlex Elder } 726602adf40SYehuda Sadeh 72765ccfe21SAlex Elder return name; 72865ccfe21SAlex Elder } 729602adf40SYehuda Sadeh 73065ccfe21SAlex Elder static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) 73165ccfe21SAlex Elder { 73265ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 733602adf40SYehuda Sadeh 73465ccfe21SAlex Elder return offset & (segment_size - 1); 73565ccfe21SAlex Elder } 73665ccfe21SAlex Elder 73765ccfe21SAlex Elder static u64 rbd_segment_length(struct rbd_device *rbd_dev, 73865ccfe21SAlex Elder u64 offset, u64 length) 73965ccfe21SAlex Elder { 74065ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 74165ccfe21SAlex Elder 74265ccfe21SAlex Elder offset &= segment_size - 1; 74365ccfe21SAlex Elder 744aafb230eSAlex Elder rbd_assert(length <= U64_MAX - offset); 74565ccfe21SAlex Elder if (offset + length > segment_size) 74665ccfe21SAlex Elder length = segment_size - offset; 74765ccfe21SAlex Elder 74865ccfe21SAlex Elder return length; 749602adf40SYehuda Sadeh } 750602adf40SYehuda Sadeh 7511fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header, 7521fec7093SYehuda Sadeh u64 ofs, u64 len) 7531fec7093SYehuda Sadeh { 754df111be6SAlex Elder u64 start_seg; 755df111be6SAlex Elder u64 end_seg; 756df111be6SAlex Elder 757df111be6SAlex Elder if (!len) 758df111be6SAlex Elder return 0; 759df111be6SAlex Elder if (len - 1 > U64_MAX - ofs) 760df111be6SAlex Elder return -ERANGE; 761df111be6SAlex Elder 762df111be6SAlex Elder start_seg = ofs >> header->obj_order; 763df111be6SAlex Elder end_seg = (ofs + len - 1) >> header->obj_order; 764df111be6SAlex Elder 7651fec7093SYehuda Sadeh return end_seg - start_seg + 1; 7661fec7093SYehuda Sadeh } 7671fec7093SYehuda Sadeh 768602adf40SYehuda Sadeh /* 769029bcbd8SJosh Durgin * returns the size of an object in the image 770029bcbd8SJosh Durgin */ 771029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 772029bcbd8SJosh Durgin { 773029bcbd8SJosh Durgin return 1 << header->obj_order; 774029bcbd8SJosh Durgin } 775029bcbd8SJosh Durgin 776029bcbd8SJosh Durgin /* 777602adf40SYehuda Sadeh * bio helpers 778602adf40SYehuda Sadeh */ 779602adf40SYehuda Sadeh 780602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 781602adf40SYehuda Sadeh { 782602adf40SYehuda Sadeh struct bio *tmp; 783602adf40SYehuda Sadeh 784602adf40SYehuda Sadeh while (chain) { 785602adf40SYehuda Sadeh tmp = chain; 786602adf40SYehuda Sadeh chain = chain->bi_next; 787602adf40SYehuda Sadeh bio_put(tmp); 788602adf40SYehuda Sadeh } 789602adf40SYehuda Sadeh } 790602adf40SYehuda Sadeh 791602adf40SYehuda Sadeh /* 792602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 793602adf40SYehuda Sadeh */ 794602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 795602adf40SYehuda Sadeh { 796602adf40SYehuda Sadeh struct bio_vec *bv; 797602adf40SYehuda Sadeh unsigned long flags; 798602adf40SYehuda Sadeh void *buf; 799602adf40SYehuda Sadeh int i; 800602adf40SYehuda Sadeh int pos = 0; 801602adf40SYehuda Sadeh 802602adf40SYehuda Sadeh while (chain) { 803602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 804602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 805602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 806602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 807602adf40SYehuda Sadeh memset(buf + remainder, 0, 808602adf40SYehuda Sadeh bv->bv_len - remainder); 80985b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 810602adf40SYehuda Sadeh } 811602adf40SYehuda Sadeh pos += bv->bv_len; 812602adf40SYehuda Sadeh } 813602adf40SYehuda Sadeh 814602adf40SYehuda Sadeh chain = chain->bi_next; 815602adf40SYehuda Sadeh } 816602adf40SYehuda Sadeh } 817602adf40SYehuda Sadeh 818602adf40SYehuda Sadeh /* 819f7760dadSAlex Elder * Clone a portion of a bio, starting at the given byte offset 820f7760dadSAlex Elder * and continuing for the number of bytes indicated. 821602adf40SYehuda Sadeh */ 822f7760dadSAlex Elder static struct bio *bio_clone_range(struct bio *bio_src, 823f7760dadSAlex Elder unsigned int offset, 824f7760dadSAlex Elder unsigned int len, 825f7760dadSAlex Elder gfp_t gfpmask) 826602adf40SYehuda Sadeh { 827f7760dadSAlex Elder struct bio_vec *bv; 828f7760dadSAlex Elder unsigned int resid; 829f7760dadSAlex Elder unsigned short idx; 830f7760dadSAlex Elder unsigned int voff; 831f7760dadSAlex Elder unsigned short end_idx; 832f7760dadSAlex Elder unsigned short vcnt; 833f7760dadSAlex Elder struct bio *bio; 834602adf40SYehuda Sadeh 835f7760dadSAlex Elder /* Handle the easy case for the caller */ 836f7760dadSAlex Elder 837f7760dadSAlex Elder if (!offset && len == bio_src->bi_size) 838f7760dadSAlex Elder return bio_clone(bio_src, gfpmask); 839f7760dadSAlex Elder 840f7760dadSAlex Elder if (WARN_ON_ONCE(!len)) 841f7760dadSAlex Elder return NULL; 842f7760dadSAlex Elder if (WARN_ON_ONCE(len > bio_src->bi_size)) 843f7760dadSAlex Elder return NULL; 844f7760dadSAlex Elder if (WARN_ON_ONCE(offset > bio_src->bi_size - len)) 845f7760dadSAlex Elder return NULL; 846f7760dadSAlex Elder 847f7760dadSAlex Elder /* Find first affected segment... */ 848f7760dadSAlex Elder 849f7760dadSAlex Elder resid = offset; 850f7760dadSAlex Elder __bio_for_each_segment(bv, bio_src, idx, 0) { 851f7760dadSAlex Elder if (resid < bv->bv_len) 852f7760dadSAlex Elder break; 853f7760dadSAlex Elder resid -= bv->bv_len; 854602adf40SYehuda Sadeh } 855f7760dadSAlex Elder voff = resid; 856602adf40SYehuda Sadeh 857f7760dadSAlex Elder /* ...and the last affected segment */ 858542582fcSAlex Elder 859f7760dadSAlex Elder resid += len; 860f7760dadSAlex Elder __bio_for_each_segment(bv, bio_src, end_idx, idx) { 861f7760dadSAlex Elder if (resid <= bv->bv_len) 862f7760dadSAlex Elder break; 863f7760dadSAlex Elder resid -= bv->bv_len; 864f7760dadSAlex Elder } 865f7760dadSAlex Elder vcnt = end_idx - idx + 1; 866602adf40SYehuda Sadeh 867f7760dadSAlex Elder /* Build the clone */ 868f7760dadSAlex Elder 869f7760dadSAlex Elder bio = bio_alloc(gfpmask, (unsigned int) vcnt); 870f7760dadSAlex Elder if (!bio) 871f7760dadSAlex Elder return NULL; /* ENOMEM */ 872f7760dadSAlex Elder 873f7760dadSAlex Elder bio->bi_bdev = bio_src->bi_bdev; 874f7760dadSAlex Elder bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); 875f7760dadSAlex Elder bio->bi_rw = bio_src->bi_rw; 876f7760dadSAlex Elder bio->bi_flags |= 1 << BIO_CLONED; 877602adf40SYehuda Sadeh 878602adf40SYehuda Sadeh /* 879f7760dadSAlex Elder * Copy over our part of the bio_vec, then update the first 880f7760dadSAlex Elder * and last (or only) entries. 881602adf40SYehuda Sadeh */ 882f7760dadSAlex Elder memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], 883f7760dadSAlex Elder vcnt * sizeof (struct bio_vec)); 884f7760dadSAlex Elder bio->bi_io_vec[0].bv_offset += voff; 885f7760dadSAlex Elder if (vcnt > 1) { 886f7760dadSAlex Elder bio->bi_io_vec[0].bv_len -= voff; 887f7760dadSAlex Elder bio->bi_io_vec[vcnt - 1].bv_len = resid; 888602adf40SYehuda Sadeh } else { 889f7760dadSAlex Elder bio->bi_io_vec[0].bv_len = len; 890602adf40SYehuda Sadeh } 891602adf40SYehuda Sadeh 892f7760dadSAlex Elder bio->bi_vcnt = vcnt; 893f7760dadSAlex Elder bio->bi_size = len; 894f7760dadSAlex Elder bio->bi_idx = 0; 895602adf40SYehuda Sadeh 896f7760dadSAlex Elder return bio; 897602adf40SYehuda Sadeh } 898602adf40SYehuda Sadeh 899f7760dadSAlex Elder /* 900f7760dadSAlex Elder * Clone a portion of a bio chain, starting at the given byte offset 901f7760dadSAlex Elder * into the first bio in the source chain and continuing for the 902f7760dadSAlex Elder * number of bytes indicated. The result is another bio chain of 903f7760dadSAlex Elder * exactly the given length, or a null pointer on error. 904f7760dadSAlex Elder * 905f7760dadSAlex Elder * The bio_src and offset parameters are both in-out. On entry they 906f7760dadSAlex Elder * refer to the first source bio and the offset into that bio where 907f7760dadSAlex Elder * the start of data to be cloned is located. 908f7760dadSAlex Elder * 909f7760dadSAlex Elder * On return, bio_src is updated to refer to the bio in the source 910f7760dadSAlex Elder * chain that contains first un-cloned byte, and *offset will 911f7760dadSAlex Elder * contain the offset of that byte within that bio. 912f7760dadSAlex Elder */ 913f7760dadSAlex Elder static struct bio *bio_chain_clone_range(struct bio **bio_src, 914f7760dadSAlex Elder unsigned int *offset, 915f7760dadSAlex Elder unsigned int len, 916f7760dadSAlex Elder gfp_t gfpmask) 917f7760dadSAlex Elder { 918f7760dadSAlex Elder struct bio *bi = *bio_src; 919f7760dadSAlex Elder unsigned int off = *offset; 920f7760dadSAlex Elder struct bio *chain = NULL; 921f7760dadSAlex Elder struct bio **end; 922602adf40SYehuda Sadeh 923f7760dadSAlex Elder /* Build up a chain of clone bios up to the limit */ 924602adf40SYehuda Sadeh 925f7760dadSAlex Elder if (!bi || off >= bi->bi_size || !len) 926f7760dadSAlex Elder return NULL; /* Nothing to clone */ 927602adf40SYehuda Sadeh 928f7760dadSAlex Elder end = &chain; 929f7760dadSAlex Elder while (len) { 930f7760dadSAlex Elder unsigned int bi_size; 931f7760dadSAlex Elder struct bio *bio; 932f7760dadSAlex Elder 933f7760dadSAlex Elder if (!bi) 934f7760dadSAlex Elder goto out_err; /* EINVAL; ran out of bio's */ 935f7760dadSAlex Elder bi_size = min_t(unsigned int, bi->bi_size - off, len); 936f7760dadSAlex Elder bio = bio_clone_range(bi, off, bi_size, gfpmask); 937f7760dadSAlex Elder if (!bio) 938f7760dadSAlex Elder goto out_err; /* ENOMEM */ 939f7760dadSAlex Elder 940f7760dadSAlex Elder *end = bio; 941f7760dadSAlex Elder end = &bio->bi_next; 942f7760dadSAlex Elder 943f7760dadSAlex Elder off += bi_size; 944f7760dadSAlex Elder if (off == bi->bi_size) { 945f7760dadSAlex Elder bi = bi->bi_next; 946f7760dadSAlex Elder off = 0; 947f7760dadSAlex Elder } 948f7760dadSAlex Elder len -= bi_size; 949f7760dadSAlex Elder } 950f7760dadSAlex Elder *bio_src = bi; 951f7760dadSAlex Elder *offset = off; 952f7760dadSAlex Elder 953f7760dadSAlex Elder return chain; 954f7760dadSAlex Elder out_err: 955f7760dadSAlex Elder bio_chain_put(chain); 956f7760dadSAlex Elder 957602adf40SYehuda Sadeh return NULL; 958602adf40SYehuda Sadeh } 959602adf40SYehuda Sadeh 960602adf40SYehuda Sadeh /* 961602adf40SYehuda Sadeh * helpers for osd request op vectors. 962602adf40SYehuda Sadeh */ 96357cfc106SAlex Elder static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, 96457cfc106SAlex Elder int opcode, u32 payload_len) 965602adf40SYehuda Sadeh { 96657cfc106SAlex Elder struct ceph_osd_req_op *ops; 96757cfc106SAlex Elder 96857cfc106SAlex Elder ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); 96957cfc106SAlex Elder if (!ops) 97057cfc106SAlex Elder return NULL; 97157cfc106SAlex Elder 97257cfc106SAlex Elder ops[0].op = opcode; 97357cfc106SAlex Elder 974602adf40SYehuda Sadeh /* 975602adf40SYehuda Sadeh * op extent offset and length will be set later on 976602adf40SYehuda Sadeh * in calc_raw_layout() 977602adf40SYehuda Sadeh */ 97857cfc106SAlex Elder ops[0].payload_len = payload_len; 97957cfc106SAlex Elder 98057cfc106SAlex Elder return ops; 981602adf40SYehuda Sadeh } 982602adf40SYehuda Sadeh 983602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops) 984602adf40SYehuda Sadeh { 985602adf40SYehuda Sadeh kfree(ops); 986602adf40SYehuda Sadeh } 987602adf40SYehuda Sadeh 9881fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq, 9891fec7093SYehuda Sadeh struct rbd_req_coll *coll, 9901fec7093SYehuda Sadeh int index, 9911fec7093SYehuda Sadeh int ret, u64 len) 9921fec7093SYehuda Sadeh { 9931fec7093SYehuda Sadeh struct request_queue *q; 9941fec7093SYehuda Sadeh int min, max, i; 9951fec7093SYehuda Sadeh 996bd919d45SAlex Elder dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", 997bd919d45SAlex Elder coll, index, ret, (unsigned long long) len); 9981fec7093SYehuda Sadeh 9991fec7093SYehuda Sadeh if (!rq) 10001fec7093SYehuda Sadeh return; 10011fec7093SYehuda Sadeh 10021fec7093SYehuda Sadeh if (!coll) { 10031fec7093SYehuda Sadeh blk_end_request(rq, ret, len); 10041fec7093SYehuda Sadeh return; 10051fec7093SYehuda Sadeh } 10061fec7093SYehuda Sadeh 10071fec7093SYehuda Sadeh q = rq->q; 10081fec7093SYehuda Sadeh 10091fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 10101fec7093SYehuda Sadeh coll->status[index].done = 1; 10111fec7093SYehuda Sadeh coll->status[index].rc = ret; 10121fec7093SYehuda Sadeh coll->status[index].bytes = len; 10131fec7093SYehuda Sadeh max = min = coll->num_done; 10141fec7093SYehuda Sadeh while (max < coll->total && coll->status[max].done) 10151fec7093SYehuda Sadeh max++; 10161fec7093SYehuda Sadeh 10171fec7093SYehuda Sadeh for (i = min; i<max; i++) { 10181fec7093SYehuda Sadeh __blk_end_request(rq, coll->status[i].rc, 10191fec7093SYehuda Sadeh coll->status[i].bytes); 10201fec7093SYehuda Sadeh coll->num_done++; 10211fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 10221fec7093SYehuda Sadeh } 10231fec7093SYehuda Sadeh spin_unlock_irq(q->queue_lock); 10241fec7093SYehuda Sadeh } 10251fec7093SYehuda Sadeh 10261fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req, 10271fec7093SYehuda Sadeh int ret, u64 len) 10281fec7093SYehuda Sadeh { 10291fec7093SYehuda Sadeh rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); 10301fec7093SYehuda Sadeh } 10311fec7093SYehuda Sadeh 1032602adf40SYehuda Sadeh /* 1033602adf40SYehuda Sadeh * Send ceph osd request 1034602adf40SYehuda Sadeh */ 1035602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq, 10360ce1a794SAlex Elder struct rbd_device *rbd_dev, 1037602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1038602adf40SYehuda Sadeh u64 snapid, 1039aded07eaSAlex Elder const char *object_name, u64 ofs, u64 len, 1040602adf40SYehuda Sadeh struct bio *bio, 1041602adf40SYehuda Sadeh struct page **pages, 1042602adf40SYehuda Sadeh int num_pages, 1043602adf40SYehuda Sadeh int flags, 1044602adf40SYehuda Sadeh struct ceph_osd_req_op *ops, 10451fec7093SYehuda Sadeh struct rbd_req_coll *coll, 10461fec7093SYehuda Sadeh int coll_index, 1047602adf40SYehuda Sadeh void (*rbd_cb)(struct ceph_osd_request *req, 104859c2be1eSYehuda Sadeh struct ceph_msg *msg), 104959c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 105059c2be1eSYehuda Sadeh u64 *ver) 1051602adf40SYehuda Sadeh { 1052602adf40SYehuda Sadeh struct ceph_osd_request *req; 1053602adf40SYehuda Sadeh struct ceph_file_layout *layout; 1054602adf40SYehuda Sadeh int ret; 1055602adf40SYehuda Sadeh u64 bno; 1056602adf40SYehuda Sadeh struct timespec mtime = CURRENT_TIME; 1057602adf40SYehuda Sadeh struct rbd_request *req_data; 1058602adf40SYehuda Sadeh struct ceph_osd_request_head *reqhead; 10591dbb4399SAlex Elder struct ceph_osd_client *osdc; 1060602adf40SYehuda Sadeh 1061602adf40SYehuda Sadeh req_data = kzalloc(sizeof(*req_data), GFP_NOIO); 10621fec7093SYehuda Sadeh if (!req_data) { 10631fec7093SYehuda Sadeh if (coll) 10641fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, coll_index, 10651fec7093SYehuda Sadeh -ENOMEM, len); 10661fec7093SYehuda Sadeh return -ENOMEM; 10671fec7093SYehuda Sadeh } 1068602adf40SYehuda Sadeh 10691fec7093SYehuda Sadeh if (coll) { 10701fec7093SYehuda Sadeh req_data->coll = coll; 10711fec7093SYehuda Sadeh req_data->coll_index = coll_index; 10721fec7093SYehuda Sadeh } 10731fec7093SYehuda Sadeh 1074f7760dadSAlex Elder dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", 1075f7760dadSAlex Elder object_name, (unsigned long long) ofs, 1076f7760dadSAlex Elder (unsigned long long) len, coll, coll_index); 1077602adf40SYehuda Sadeh 10780ce1a794SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 10791dbb4399SAlex Elder req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, 10801dbb4399SAlex Elder false, GFP_NOIO, pages, bio); 10814ad12621SSage Weil if (!req) { 10824ad12621SSage Weil ret = -ENOMEM; 1083602adf40SYehuda Sadeh goto done_pages; 1084602adf40SYehuda Sadeh } 1085602adf40SYehuda Sadeh 1086602adf40SYehuda Sadeh req->r_callback = rbd_cb; 1087602adf40SYehuda Sadeh 1088602adf40SYehuda Sadeh req_data->rq = rq; 1089602adf40SYehuda Sadeh req_data->bio = bio; 1090602adf40SYehuda Sadeh req_data->pages = pages; 1091602adf40SYehuda Sadeh req_data->len = len; 1092602adf40SYehuda Sadeh 1093602adf40SYehuda Sadeh req->r_priv = req_data; 1094602adf40SYehuda Sadeh 1095602adf40SYehuda Sadeh reqhead = req->r_request->front.iov_base; 1096602adf40SYehuda Sadeh reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); 1097602adf40SYehuda Sadeh 1098aded07eaSAlex Elder strncpy(req->r_oid, object_name, sizeof(req->r_oid)); 1099602adf40SYehuda Sadeh req->r_oid_len = strlen(req->r_oid); 1100602adf40SYehuda Sadeh 1101602adf40SYehuda Sadeh layout = &req->r_file_layout; 1102602adf40SYehuda Sadeh memset(layout, 0, sizeof(*layout)); 1103602adf40SYehuda Sadeh layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 1104602adf40SYehuda Sadeh layout->fl_stripe_count = cpu_to_le32(1); 1105602adf40SYehuda Sadeh layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 11060d7dbfceSAlex Elder layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); 11076cae3717SSage Weil ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, 11081dbb4399SAlex Elder req, ops); 11096cae3717SSage Weil rbd_assert(ret == 0); 1110602adf40SYehuda Sadeh 1111602adf40SYehuda Sadeh ceph_osdc_build_request(req, ofs, &len, 1112602adf40SYehuda Sadeh ops, 1113602adf40SYehuda Sadeh snapc, 1114602adf40SYehuda Sadeh &mtime, 1115602adf40SYehuda Sadeh req->r_oid, req->r_oid_len); 1116602adf40SYehuda Sadeh 111759c2be1eSYehuda Sadeh if (linger_req) { 11181dbb4399SAlex Elder ceph_osdc_set_request_linger(osdc, req); 111959c2be1eSYehuda Sadeh *linger_req = req; 112059c2be1eSYehuda Sadeh } 112159c2be1eSYehuda Sadeh 11221dbb4399SAlex Elder ret = ceph_osdc_start_request(osdc, req, false); 1123602adf40SYehuda Sadeh if (ret < 0) 1124602adf40SYehuda Sadeh goto done_err; 1125602adf40SYehuda Sadeh 1126602adf40SYehuda Sadeh if (!rbd_cb) { 11271dbb4399SAlex Elder ret = ceph_osdc_wait_request(osdc, req); 112859c2be1eSYehuda Sadeh if (ver) 112959c2be1eSYehuda Sadeh *ver = le64_to_cpu(req->r_reassert_version.version); 1130bd919d45SAlex Elder dout("reassert_ver=%llu\n", 1131bd919d45SAlex Elder (unsigned long long) 11321fec7093SYehuda Sadeh le64_to_cpu(req->r_reassert_version.version)); 1133602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1134602adf40SYehuda Sadeh } 1135602adf40SYehuda Sadeh return ret; 1136602adf40SYehuda Sadeh 1137602adf40SYehuda Sadeh done_err: 1138602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1139602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1140602adf40SYehuda Sadeh done_pages: 11411fec7093SYehuda Sadeh rbd_coll_end_req(req_data, ret, len); 1142602adf40SYehuda Sadeh kfree(req_data); 1143602adf40SYehuda Sadeh return ret; 1144602adf40SYehuda Sadeh } 1145602adf40SYehuda Sadeh 1146602adf40SYehuda Sadeh /* 1147602adf40SYehuda Sadeh * Ceph osd op callback 1148602adf40SYehuda Sadeh */ 1149602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 1150602adf40SYehuda Sadeh { 1151602adf40SYehuda Sadeh struct rbd_request *req_data = req->r_priv; 1152602adf40SYehuda Sadeh struct ceph_osd_reply_head *replyhead; 1153602adf40SYehuda Sadeh struct ceph_osd_op *op; 1154602adf40SYehuda Sadeh __s32 rc; 1155602adf40SYehuda Sadeh u64 bytes; 1156602adf40SYehuda Sadeh int read_op; 1157602adf40SYehuda Sadeh 1158602adf40SYehuda Sadeh /* parse reply */ 1159602adf40SYehuda Sadeh replyhead = msg->front.iov_base; 1160602adf40SYehuda Sadeh WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); 1161602adf40SYehuda Sadeh op = (void *)(replyhead + 1); 1162602adf40SYehuda Sadeh rc = le32_to_cpu(replyhead->result); 1163602adf40SYehuda Sadeh bytes = le64_to_cpu(op->extent.length); 1164895cfcc8SDan Carpenter read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); 1165602adf40SYehuda Sadeh 1166bd919d45SAlex Elder dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", 1167bd919d45SAlex Elder (unsigned long long) bytes, read_op, (int) rc); 1168602adf40SYehuda Sadeh 1169602adf40SYehuda Sadeh if (rc == -ENOENT && read_op) { 1170602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, 0); 1171602adf40SYehuda Sadeh rc = 0; 1172602adf40SYehuda Sadeh } else if (rc == 0 && read_op && bytes < req_data->len) { 1173602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, bytes); 1174602adf40SYehuda Sadeh bytes = req_data->len; 1175602adf40SYehuda Sadeh } 1176602adf40SYehuda Sadeh 11771fec7093SYehuda Sadeh rbd_coll_end_req(req_data, rc, bytes); 1178602adf40SYehuda Sadeh 1179602adf40SYehuda Sadeh if (req_data->bio) 1180602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1181602adf40SYehuda Sadeh 1182602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1183602adf40SYehuda Sadeh kfree(req_data); 1184602adf40SYehuda Sadeh } 1185602adf40SYehuda Sadeh 118659c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 118759c2be1eSYehuda Sadeh { 118859c2be1eSYehuda Sadeh ceph_osdc_put_request(req); 118959c2be1eSYehuda Sadeh } 119059c2be1eSYehuda Sadeh 1191602adf40SYehuda Sadeh /* 1192602adf40SYehuda Sadeh * Do a synchronous ceph osd operation 1193602adf40SYehuda Sadeh */ 11940ce1a794SAlex Elder static int rbd_req_sync_op(struct rbd_device *rbd_dev, 1195602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1196602adf40SYehuda Sadeh u64 snapid, 1197602adf40SYehuda Sadeh int flags, 1198913d2fdcSAlex Elder struct ceph_osd_req_op *ops, 1199aded07eaSAlex Elder const char *object_name, 1200f8d4de6eSAlex Elder u64 ofs, u64 inbound_size, 1201f8d4de6eSAlex Elder char *inbound, 120259c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 120359c2be1eSYehuda Sadeh u64 *ver) 1204602adf40SYehuda Sadeh { 1205602adf40SYehuda Sadeh int ret; 1206602adf40SYehuda Sadeh struct page **pages; 1207602adf40SYehuda Sadeh int num_pages; 1208913d2fdcSAlex Elder 1209aafb230eSAlex Elder rbd_assert(ops != NULL); 1210602adf40SYehuda Sadeh 1211f8d4de6eSAlex Elder num_pages = calc_pages_for(ofs, inbound_size); 1212602adf40SYehuda Sadeh pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 1213b8d0638aSDan Carpenter if (IS_ERR(pages)) 1214b8d0638aSDan Carpenter return PTR_ERR(pages); 1215602adf40SYehuda Sadeh 12160ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, 1217f8d4de6eSAlex Elder object_name, ofs, inbound_size, NULL, 1218602adf40SYehuda Sadeh pages, num_pages, 1219602adf40SYehuda Sadeh flags, 1220602adf40SYehuda Sadeh ops, 12211fec7093SYehuda Sadeh NULL, 0, 122259c2be1eSYehuda Sadeh NULL, 122359c2be1eSYehuda Sadeh linger_req, ver); 1224602adf40SYehuda Sadeh if (ret < 0) 1225913d2fdcSAlex Elder goto done; 1226602adf40SYehuda Sadeh 1227f8d4de6eSAlex Elder if ((flags & CEPH_OSD_FLAG_READ) && inbound) 1228f8d4de6eSAlex Elder ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); 1229602adf40SYehuda Sadeh 1230602adf40SYehuda Sadeh done: 1231602adf40SYehuda Sadeh ceph_release_page_vector(pages, num_pages); 1232602adf40SYehuda Sadeh return ret; 1233602adf40SYehuda Sadeh } 1234602adf40SYehuda Sadeh 1235602adf40SYehuda Sadeh /* 1236602adf40SYehuda Sadeh * Do an asynchronous ceph osd operation 1237602adf40SYehuda Sadeh */ 1238602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq, 1239602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1240602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1241602adf40SYehuda Sadeh u64 ofs, u64 len, 12421fec7093SYehuda Sadeh struct bio *bio, 12431fec7093SYehuda Sadeh struct rbd_req_coll *coll, 12441fec7093SYehuda Sadeh int coll_index) 1245602adf40SYehuda Sadeh { 1246602adf40SYehuda Sadeh char *seg_name; 1247602adf40SYehuda Sadeh u64 seg_ofs; 1248602adf40SYehuda Sadeh u64 seg_len; 1249602adf40SYehuda Sadeh int ret; 1250602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1251602adf40SYehuda Sadeh u32 payload_len; 1252ff2e4bb5SAlex Elder int opcode; 1253ff2e4bb5SAlex Elder int flags; 12544634246dSAlex Elder u64 snapid; 1255602adf40SYehuda Sadeh 125665ccfe21SAlex Elder seg_name = rbd_segment_name(rbd_dev, ofs); 1257602adf40SYehuda Sadeh if (!seg_name) 1258602adf40SYehuda Sadeh return -ENOMEM; 125965ccfe21SAlex Elder seg_len = rbd_segment_length(rbd_dev, ofs, len); 126065ccfe21SAlex Elder seg_ofs = rbd_segment_offset(rbd_dev, ofs); 1261602adf40SYehuda Sadeh 1262ff2e4bb5SAlex Elder if (rq_data_dir(rq) == WRITE) { 1263ff2e4bb5SAlex Elder opcode = CEPH_OSD_OP_WRITE; 1264ff2e4bb5SAlex Elder flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; 12654634246dSAlex Elder snapid = CEPH_NOSNAP; 1266ff2e4bb5SAlex Elder payload_len = seg_len; 1267ff2e4bb5SAlex Elder } else { 1268ff2e4bb5SAlex Elder opcode = CEPH_OSD_OP_READ; 1269ff2e4bb5SAlex Elder flags = CEPH_OSD_FLAG_READ; 12704634246dSAlex Elder snapc = NULL; 12710d7dbfceSAlex Elder snapid = rbd_dev->spec->snap_id; 1272ff2e4bb5SAlex Elder payload_len = 0; 1273ff2e4bb5SAlex Elder } 1274602adf40SYehuda Sadeh 127557cfc106SAlex Elder ret = -ENOMEM; 127657cfc106SAlex Elder ops = rbd_create_rw_ops(1, opcode, payload_len); 127757cfc106SAlex Elder if (!ops) 1278602adf40SYehuda Sadeh goto done; 1279602adf40SYehuda Sadeh 1280602adf40SYehuda Sadeh /* we've taken care of segment sizes earlier when we 1281602adf40SYehuda Sadeh cloned the bios. We should never have a segment 1282602adf40SYehuda Sadeh truncated at this point */ 1283aafb230eSAlex Elder rbd_assert(seg_len == len); 1284602adf40SYehuda Sadeh 1285602adf40SYehuda Sadeh ret = rbd_do_request(rq, rbd_dev, snapc, snapid, 1286602adf40SYehuda Sadeh seg_name, seg_ofs, seg_len, 1287602adf40SYehuda Sadeh bio, 1288602adf40SYehuda Sadeh NULL, 0, 1289602adf40SYehuda Sadeh flags, 1290602adf40SYehuda Sadeh ops, 12911fec7093SYehuda Sadeh coll, coll_index, 129259c2be1eSYehuda Sadeh rbd_req_cb, 0, NULL); 129311f77002SSage Weil 129411f77002SSage Weil rbd_destroy_ops(ops); 1295602adf40SYehuda Sadeh done: 1296602adf40SYehuda Sadeh kfree(seg_name); 1297602adf40SYehuda Sadeh return ret; 1298602adf40SYehuda Sadeh } 1299602adf40SYehuda Sadeh 1300602adf40SYehuda Sadeh /* 1301602adf40SYehuda Sadeh * Request sync osd read 1302602adf40SYehuda Sadeh */ 13030ce1a794SAlex Elder static int rbd_req_sync_read(struct rbd_device *rbd_dev, 1304602adf40SYehuda Sadeh u64 snapid, 1305aded07eaSAlex Elder const char *object_name, 1306602adf40SYehuda Sadeh u64 ofs, u64 len, 130759c2be1eSYehuda Sadeh char *buf, 130859c2be1eSYehuda Sadeh u64 *ver) 1309602adf40SYehuda Sadeh { 1310913d2fdcSAlex Elder struct ceph_osd_req_op *ops; 1311913d2fdcSAlex Elder int ret; 1312913d2fdcSAlex Elder 1313913d2fdcSAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); 1314913d2fdcSAlex Elder if (!ops) 1315913d2fdcSAlex Elder return -ENOMEM; 1316913d2fdcSAlex Elder 1317913d2fdcSAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 1318b06e6a6bSJosh Durgin snapid, 1319602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1320913d2fdcSAlex Elder ops, object_name, ofs, len, buf, NULL, ver); 1321913d2fdcSAlex Elder rbd_destroy_ops(ops); 1322913d2fdcSAlex Elder 1323913d2fdcSAlex Elder return ret; 1324602adf40SYehuda Sadeh } 1325602adf40SYehuda Sadeh 1326602adf40SYehuda Sadeh /* 132759c2be1eSYehuda Sadeh * Request sync osd watch 132859c2be1eSYehuda Sadeh */ 13290ce1a794SAlex Elder static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, 133059c2be1eSYehuda Sadeh u64 ver, 13317f0a24d8SAlex Elder u64 notify_id) 133259c2be1eSYehuda Sadeh { 133359c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 133411f77002SSage Weil int ret; 133511f77002SSage Weil 133657cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); 133757cfc106SAlex Elder if (!ops) 133857cfc106SAlex Elder return -ENOMEM; 133959c2be1eSYehuda Sadeh 1340a71b891bSJosh Durgin ops[0].watch.ver = cpu_to_le64(ver); 134159c2be1eSYehuda Sadeh ops[0].watch.cookie = notify_id; 134259c2be1eSYehuda Sadeh ops[0].watch.flag = 0; 134359c2be1eSYehuda Sadeh 13440ce1a794SAlex Elder ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, 13457f0a24d8SAlex Elder rbd_dev->header_name, 0, 0, NULL, 1346ad4f232fSAlex Elder NULL, 0, 134759c2be1eSYehuda Sadeh CEPH_OSD_FLAG_READ, 134859c2be1eSYehuda Sadeh ops, 13491fec7093SYehuda Sadeh NULL, 0, 135059c2be1eSYehuda Sadeh rbd_simple_req_cb, 0, NULL); 135159c2be1eSYehuda Sadeh 135259c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 135359c2be1eSYehuda Sadeh return ret; 135459c2be1eSYehuda Sadeh } 135559c2be1eSYehuda Sadeh 135659c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 135759c2be1eSYehuda Sadeh { 13580ce1a794SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 1359a71b891bSJosh Durgin u64 hver; 136013143d2dSSage Weil int rc; 136113143d2dSSage Weil 13620ce1a794SAlex Elder if (!rbd_dev) 136359c2be1eSYehuda Sadeh return; 136459c2be1eSYehuda Sadeh 1365bd919d45SAlex Elder dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", 1366bd919d45SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1367bd919d45SAlex Elder (unsigned int) opcode); 1368117973fbSAlex Elder rc = rbd_dev_refresh(rbd_dev, &hver); 136913143d2dSSage Weil if (rc) 1370f0f8cef5SAlex Elder pr_warning(RBD_DRV_NAME "%d got notification but failed to " 13710ce1a794SAlex Elder " update snaps: %d\n", rbd_dev->major, rc); 137259c2be1eSYehuda Sadeh 13737f0a24d8SAlex Elder rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); 137459c2be1eSYehuda Sadeh } 137559c2be1eSYehuda Sadeh 137659c2be1eSYehuda Sadeh /* 137759c2be1eSYehuda Sadeh * Request sync osd watch 137859c2be1eSYehuda Sadeh */ 13790e6f322dSAlex Elder static int rbd_req_sync_watch(struct rbd_device *rbd_dev) 138059c2be1eSYehuda Sadeh { 138159c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 13820ce1a794SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 138357cfc106SAlex Elder int ret; 138459c2be1eSYehuda Sadeh 138557cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 138657cfc106SAlex Elder if (!ops) 138757cfc106SAlex Elder return -ENOMEM; 138859c2be1eSYehuda Sadeh 138959c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, 13900ce1a794SAlex Elder (void *)rbd_dev, &rbd_dev->watch_event); 139159c2be1eSYehuda Sadeh if (ret < 0) 139259c2be1eSYehuda Sadeh goto fail; 139359c2be1eSYehuda Sadeh 13940e6f322dSAlex Elder ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); 13950ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 139659c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 139759c2be1eSYehuda Sadeh 13980ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 139959c2be1eSYehuda Sadeh CEPH_NOSNAP, 140059c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 140159c2be1eSYehuda Sadeh ops, 14020e6f322dSAlex Elder rbd_dev->header_name, 14030e6f322dSAlex Elder 0, 0, NULL, 14040ce1a794SAlex Elder &rbd_dev->watch_request, NULL); 140559c2be1eSYehuda Sadeh 140659c2be1eSYehuda Sadeh if (ret < 0) 140759c2be1eSYehuda Sadeh goto fail_event; 140859c2be1eSYehuda Sadeh 140959c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 141059c2be1eSYehuda Sadeh return 0; 141159c2be1eSYehuda Sadeh 141259c2be1eSYehuda Sadeh fail_event: 14130ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 14140ce1a794SAlex Elder rbd_dev->watch_event = NULL; 141559c2be1eSYehuda Sadeh fail: 141659c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 141759c2be1eSYehuda Sadeh return ret; 141859c2be1eSYehuda Sadeh } 141959c2be1eSYehuda Sadeh 142079e3057cSYehuda Sadeh /* 142179e3057cSYehuda Sadeh * Request sync osd unwatch 142279e3057cSYehuda Sadeh */ 1423070c633fSAlex Elder static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) 142479e3057cSYehuda Sadeh { 142579e3057cSYehuda Sadeh struct ceph_osd_req_op *ops; 142657cfc106SAlex Elder int ret; 142779e3057cSYehuda Sadeh 142857cfc106SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); 142957cfc106SAlex Elder if (!ops) 143057cfc106SAlex Elder return -ENOMEM; 143179e3057cSYehuda Sadeh 143279e3057cSYehuda Sadeh ops[0].watch.ver = 0; 14330ce1a794SAlex Elder ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); 143479e3057cSYehuda Sadeh ops[0].watch.flag = 0; 143579e3057cSYehuda Sadeh 14360ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 143779e3057cSYehuda Sadeh CEPH_NOSNAP, 143879e3057cSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 143979e3057cSYehuda Sadeh ops, 1440070c633fSAlex Elder rbd_dev->header_name, 1441070c633fSAlex Elder 0, 0, NULL, NULL, NULL); 1442070c633fSAlex Elder 144379e3057cSYehuda Sadeh 144479e3057cSYehuda Sadeh rbd_destroy_ops(ops); 14450ce1a794SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 14460ce1a794SAlex Elder rbd_dev->watch_event = NULL; 144779e3057cSYehuda Sadeh return ret; 144879e3057cSYehuda Sadeh } 144979e3057cSYehuda Sadeh 145059c2be1eSYehuda Sadeh /* 14513cb4a687SAlex Elder * Synchronous osd object method call 1452602adf40SYehuda Sadeh */ 14530ce1a794SAlex Elder static int rbd_req_sync_exec(struct rbd_device *rbd_dev, 1454aded07eaSAlex Elder const char *object_name, 1455aded07eaSAlex Elder const char *class_name, 1456aded07eaSAlex Elder const char *method_name, 14573cb4a687SAlex Elder const char *outbound, 14583cb4a687SAlex Elder size_t outbound_size, 1459f8d4de6eSAlex Elder char *inbound, 1460f8d4de6eSAlex Elder size_t inbound_size, 14613cb4a687SAlex Elder int flags, 146259c2be1eSYehuda Sadeh u64 *ver) 1463602adf40SYehuda Sadeh { 1464602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1465aded07eaSAlex Elder int class_name_len = strlen(class_name); 1466aded07eaSAlex Elder int method_name_len = strlen(method_name); 14673cb4a687SAlex Elder int payload_size; 146857cfc106SAlex Elder int ret; 146957cfc106SAlex Elder 14703cb4a687SAlex Elder /* 14713cb4a687SAlex Elder * Any input parameters required by the method we're calling 14723cb4a687SAlex Elder * will be sent along with the class and method names as 14733cb4a687SAlex Elder * part of the message payload. That data and its size are 14743cb4a687SAlex Elder * supplied via the indata and indata_len fields (named from 14753cb4a687SAlex Elder * the perspective of the server side) in the OSD request 14763cb4a687SAlex Elder * operation. 14773cb4a687SAlex Elder */ 14783cb4a687SAlex Elder payload_size = class_name_len + method_name_len + outbound_size; 14793cb4a687SAlex Elder ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); 148057cfc106SAlex Elder if (!ops) 148157cfc106SAlex Elder return -ENOMEM; 1482602adf40SYehuda Sadeh 1483aded07eaSAlex Elder ops[0].cls.class_name = class_name; 1484aded07eaSAlex Elder ops[0].cls.class_len = (__u8) class_name_len; 1485aded07eaSAlex Elder ops[0].cls.method_name = method_name; 1486aded07eaSAlex Elder ops[0].cls.method_len = (__u8) method_name_len; 1487602adf40SYehuda Sadeh ops[0].cls.argc = 0; 14883cb4a687SAlex Elder ops[0].cls.indata = outbound; 14893cb4a687SAlex Elder ops[0].cls.indata_len = outbound_size; 1490602adf40SYehuda Sadeh 14910ce1a794SAlex Elder ret = rbd_req_sync_op(rbd_dev, NULL, 1492602adf40SYehuda Sadeh CEPH_NOSNAP, 14933cb4a687SAlex Elder flags, ops, 1494f8d4de6eSAlex Elder object_name, 0, inbound_size, inbound, 1495f8d4de6eSAlex Elder NULL, ver); 1496602adf40SYehuda Sadeh 1497602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1498602adf40SYehuda Sadeh 1499602adf40SYehuda Sadeh dout("cls_exec returned %d\n", ret); 1500602adf40SYehuda Sadeh return ret; 1501602adf40SYehuda Sadeh } 1502602adf40SYehuda Sadeh 15031fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) 15041fec7093SYehuda Sadeh { 15051fec7093SYehuda Sadeh struct rbd_req_coll *coll = 15061fec7093SYehuda Sadeh kzalloc(sizeof(struct rbd_req_coll) + 15071fec7093SYehuda Sadeh sizeof(struct rbd_req_status) * num_reqs, 15081fec7093SYehuda Sadeh GFP_ATOMIC); 15091fec7093SYehuda Sadeh 15101fec7093SYehuda Sadeh if (!coll) 15111fec7093SYehuda Sadeh return NULL; 15121fec7093SYehuda Sadeh coll->total = num_reqs; 15131fec7093SYehuda Sadeh kref_init(&coll->kref); 15141fec7093SYehuda Sadeh return coll; 15151fec7093SYehuda Sadeh } 15161fec7093SYehuda Sadeh 1517602adf40SYehuda Sadeh /* 1518602adf40SYehuda Sadeh * block device queue callback 1519602adf40SYehuda Sadeh */ 1520602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q) 1521602adf40SYehuda Sadeh { 1522602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1523602adf40SYehuda Sadeh struct request *rq; 1524602adf40SYehuda Sadeh 152500f1f36fSAlex Elder while ((rq = blk_fetch_request(q))) { 1526602adf40SYehuda Sadeh struct bio *bio; 1527602adf40SYehuda Sadeh bool do_write; 1528bd919d45SAlex Elder unsigned int size; 1529602adf40SYehuda Sadeh u64 ofs; 15301fec7093SYehuda Sadeh int num_segs, cur_seg = 0; 15311fec7093SYehuda Sadeh struct rbd_req_coll *coll; 1532d1d25646SJosh Durgin struct ceph_snap_context *snapc; 1533f7760dadSAlex Elder unsigned int bio_offset; 1534602adf40SYehuda Sadeh 1535602adf40SYehuda Sadeh dout("fetched request\n"); 1536602adf40SYehuda Sadeh 1537602adf40SYehuda Sadeh /* filter out block requests we don't understand */ 1538602adf40SYehuda Sadeh if ((rq->cmd_type != REQ_TYPE_FS)) { 1539602adf40SYehuda Sadeh __blk_end_request_all(rq, 0); 154000f1f36fSAlex Elder continue; 1541602adf40SYehuda Sadeh } 1542602adf40SYehuda Sadeh 1543602adf40SYehuda Sadeh /* deduce our operation (read, write) */ 1544602adf40SYehuda Sadeh do_write = (rq_data_dir(rq) == WRITE); 1545f84344f3SAlex Elder if (do_write && rbd_dev->mapping.read_only) { 1546602adf40SYehuda Sadeh __blk_end_request_all(rq, -EROFS); 154700f1f36fSAlex Elder continue; 1548602adf40SYehuda Sadeh } 1549602adf40SYehuda Sadeh 1550602adf40SYehuda Sadeh spin_unlock_irq(q->queue_lock); 1551602adf40SYehuda Sadeh 1552e88a36ecSJosh Durgin down_read(&rbd_dev->header_rwsem); 1553e88a36ecSJosh Durgin 1554daba5fdbSAlex Elder if (!rbd_dev->exists) { 15550d7dbfceSAlex Elder rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); 1556d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1557e88a36ecSJosh Durgin dout("request for non-existent snapshot"); 1558e88a36ecSJosh Durgin spin_lock_irq(q->queue_lock); 1559e88a36ecSJosh Durgin __blk_end_request_all(rq, -ENXIO); 1560e88a36ecSJosh Durgin continue; 1561e88a36ecSJosh Durgin } 1562d1d25646SJosh Durgin 1563d1d25646SJosh Durgin snapc = ceph_get_snap_context(rbd_dev->header.snapc); 1564d1d25646SJosh Durgin 1565d1d25646SJosh Durgin up_read(&rbd_dev->header_rwsem); 1566e88a36ecSJosh Durgin 1567f7760dadSAlex Elder size = blk_rq_bytes(rq); 1568f7760dadSAlex Elder ofs = blk_rq_pos(rq) * SECTOR_SIZE; 1569f7760dadSAlex Elder bio = rq->bio; 1570f7760dadSAlex Elder 1571602adf40SYehuda Sadeh dout("%s 0x%x bytes at 0x%llx\n", 1572602adf40SYehuda Sadeh do_write ? "write" : "read", 1573bd919d45SAlex Elder size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); 1574602adf40SYehuda Sadeh 15751fec7093SYehuda Sadeh num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 1576df111be6SAlex Elder if (num_segs <= 0) { 1577df111be6SAlex Elder spin_lock_irq(q->queue_lock); 1578df111be6SAlex Elder __blk_end_request_all(rq, num_segs); 1579df111be6SAlex Elder ceph_put_snap_context(snapc); 1580df111be6SAlex Elder continue; 1581df111be6SAlex Elder } 15821fec7093SYehuda Sadeh coll = rbd_alloc_coll(num_segs); 15831fec7093SYehuda Sadeh if (!coll) { 15841fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 15851fec7093SYehuda Sadeh __blk_end_request_all(rq, -ENOMEM); 1586d1d25646SJosh Durgin ceph_put_snap_context(snapc); 158700f1f36fSAlex Elder continue; 15881fec7093SYehuda Sadeh } 15891fec7093SYehuda Sadeh 1590f7760dadSAlex Elder bio_offset = 0; 1591602adf40SYehuda Sadeh do { 1592f7760dadSAlex Elder u64 limit = rbd_segment_length(rbd_dev, ofs, size); 1593f7760dadSAlex Elder unsigned int chain_size; 1594f7760dadSAlex Elder struct bio *bio_chain; 1595f7760dadSAlex Elder 1596f7760dadSAlex Elder BUG_ON(limit > (u64) UINT_MAX); 1597f7760dadSAlex Elder chain_size = (unsigned int) limit; 1598bd919d45SAlex Elder dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); 1599f7760dadSAlex Elder 16001fec7093SYehuda Sadeh kref_get(&coll->kref); 1601f7760dadSAlex Elder 1602f7760dadSAlex Elder /* Pass a cloned bio chain via an osd request */ 1603f7760dadSAlex Elder 1604f7760dadSAlex Elder bio_chain = bio_chain_clone_range(&bio, 1605f7760dadSAlex Elder &bio_offset, chain_size, 1606f7760dadSAlex Elder GFP_ATOMIC); 1607f7760dadSAlex Elder if (bio_chain) 16084634246dSAlex Elder (void) rbd_do_op(rq, rbd_dev, snapc, 1609f7760dadSAlex Elder ofs, chain_size, 1610f7760dadSAlex Elder bio_chain, coll, cur_seg); 16114634246dSAlex Elder else 16121fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, cur_seg, 1613f7760dadSAlex Elder -ENOMEM, chain_size); 1614f7760dadSAlex Elder size -= chain_size; 1615f7760dadSAlex Elder ofs += chain_size; 1616602adf40SYehuda Sadeh 16171fec7093SYehuda Sadeh cur_seg++; 1618602adf40SYehuda Sadeh } while (size > 0); 16191fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 1620602adf40SYehuda Sadeh 1621602adf40SYehuda Sadeh spin_lock_irq(q->queue_lock); 1622d1d25646SJosh Durgin 1623d1d25646SJosh Durgin ceph_put_snap_context(snapc); 1624602adf40SYehuda Sadeh } 1625602adf40SYehuda Sadeh } 1626602adf40SYehuda Sadeh 1627602adf40SYehuda Sadeh /* 1628602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 1629602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 1630f7760dadSAlex Elder * which we handle later at bio_chain_clone_range() 1631602adf40SYehuda Sadeh */ 1632602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 1633602adf40SYehuda Sadeh struct bio_vec *bvec) 1634602adf40SYehuda Sadeh { 1635602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1636e5cfeed2SAlex Elder sector_t sector_offset; 1637e5cfeed2SAlex Elder sector_t sectors_per_obj; 1638e5cfeed2SAlex Elder sector_t obj_sector_offset; 1639e5cfeed2SAlex Elder int ret; 1640602adf40SYehuda Sadeh 1641e5cfeed2SAlex Elder /* 1642e5cfeed2SAlex Elder * Find how far into its rbd object the partition-relative 1643e5cfeed2SAlex Elder * bio start sector is to offset relative to the enclosing 1644e5cfeed2SAlex Elder * device. 1645e5cfeed2SAlex Elder */ 1646e5cfeed2SAlex Elder sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector; 1647e5cfeed2SAlex Elder sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); 1648e5cfeed2SAlex Elder obj_sector_offset = sector_offset & (sectors_per_obj - 1); 1649593a9e7bSAlex Elder 1650e5cfeed2SAlex Elder /* 1651e5cfeed2SAlex Elder * Compute the number of bytes from that offset to the end 1652e5cfeed2SAlex Elder * of the object. Account for what's already used by the bio. 1653e5cfeed2SAlex Elder */ 1654e5cfeed2SAlex Elder ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT; 1655e5cfeed2SAlex Elder if (ret > bmd->bi_size) 1656e5cfeed2SAlex Elder ret -= bmd->bi_size; 1657e5cfeed2SAlex Elder else 1658e5cfeed2SAlex Elder ret = 0; 1659e5cfeed2SAlex Elder 1660e5cfeed2SAlex Elder /* 1661e5cfeed2SAlex Elder * Don't send back more than was asked for. And if the bio 1662e5cfeed2SAlex Elder * was empty, let the whole thing through because: "Note 1663e5cfeed2SAlex Elder * that a block device *must* allow a single page to be 1664e5cfeed2SAlex Elder * added to an empty bio." 1665e5cfeed2SAlex Elder */ 1666e5cfeed2SAlex Elder rbd_assert(bvec->bv_len <= PAGE_SIZE); 1667e5cfeed2SAlex Elder if (ret > (int) bvec->bv_len || !bmd->bi_size) 1668e5cfeed2SAlex Elder ret = (int) bvec->bv_len; 1669e5cfeed2SAlex Elder 1670e5cfeed2SAlex Elder return ret; 1671602adf40SYehuda Sadeh } 1672602adf40SYehuda Sadeh 1673602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 1674602adf40SYehuda Sadeh { 1675602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 1676602adf40SYehuda Sadeh 1677602adf40SYehuda Sadeh if (!disk) 1678602adf40SYehuda Sadeh return; 1679602adf40SYehuda Sadeh 1680602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 1681602adf40SYehuda Sadeh del_gendisk(disk); 1682602adf40SYehuda Sadeh if (disk->queue) 1683602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 1684602adf40SYehuda Sadeh put_disk(disk); 1685602adf40SYehuda Sadeh } 1686602adf40SYehuda Sadeh 1687602adf40SYehuda Sadeh /* 16884156d998SAlex Elder * Read the complete header for the given rbd device. 16894156d998SAlex Elder * 16904156d998SAlex Elder * Returns a pointer to a dynamically-allocated buffer containing 16914156d998SAlex Elder * the complete and validated header. Caller can pass the address 16924156d998SAlex Elder * of a variable that will be filled in with the version of the 16934156d998SAlex Elder * header object at the time it was read. 16944156d998SAlex Elder * 16954156d998SAlex Elder * Returns a pointer-coded errno if a failure occurs. 16964156d998SAlex Elder */ 16974156d998SAlex Elder static struct rbd_image_header_ondisk * 16984156d998SAlex Elder rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) 16994156d998SAlex Elder { 17004156d998SAlex Elder struct rbd_image_header_ondisk *ondisk = NULL; 17014156d998SAlex Elder u32 snap_count = 0; 17024156d998SAlex Elder u64 names_size = 0; 17034156d998SAlex Elder u32 want_count; 17044156d998SAlex Elder int ret; 17054156d998SAlex Elder 17064156d998SAlex Elder /* 17074156d998SAlex Elder * The complete header will include an array of its 64-bit 17084156d998SAlex Elder * snapshot ids, followed by the names of those snapshots as 17094156d998SAlex Elder * a contiguous block of NUL-terminated strings. Note that 17104156d998SAlex Elder * the number of snapshots could change by the time we read 17114156d998SAlex Elder * it in, in which case we re-read it. 17124156d998SAlex Elder */ 17134156d998SAlex Elder do { 17144156d998SAlex Elder size_t size; 17154156d998SAlex Elder 17164156d998SAlex Elder kfree(ondisk); 17174156d998SAlex Elder 17184156d998SAlex Elder size = sizeof (*ondisk); 17194156d998SAlex Elder size += snap_count * sizeof (struct rbd_image_snap_ondisk); 17204156d998SAlex Elder size += names_size; 17214156d998SAlex Elder ondisk = kmalloc(size, GFP_KERNEL); 17224156d998SAlex Elder if (!ondisk) 17234156d998SAlex Elder return ERR_PTR(-ENOMEM); 17244156d998SAlex Elder 17254156d998SAlex Elder ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, 17264156d998SAlex Elder rbd_dev->header_name, 17274156d998SAlex Elder 0, size, 17284156d998SAlex Elder (char *) ondisk, version); 17294156d998SAlex Elder 17304156d998SAlex Elder if (ret < 0) 17314156d998SAlex Elder goto out_err; 17324156d998SAlex Elder if (WARN_ON((size_t) ret < size)) { 17334156d998SAlex Elder ret = -ENXIO; 17344156d998SAlex Elder pr_warning("short header read for image %s" 17354156d998SAlex Elder " (want %zd got %d)\n", 17360d7dbfceSAlex Elder rbd_dev->spec->image_name, size, ret); 17374156d998SAlex Elder goto out_err; 17384156d998SAlex Elder } 17394156d998SAlex Elder if (!rbd_dev_ondisk_valid(ondisk)) { 17404156d998SAlex Elder ret = -ENXIO; 17414156d998SAlex Elder pr_warning("invalid header for image %s\n", 17420d7dbfceSAlex Elder rbd_dev->spec->image_name); 17434156d998SAlex Elder goto out_err; 17444156d998SAlex Elder } 17454156d998SAlex Elder 17464156d998SAlex Elder names_size = le64_to_cpu(ondisk->snap_names_len); 17474156d998SAlex Elder want_count = snap_count; 17484156d998SAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 17494156d998SAlex Elder } while (snap_count != want_count); 17504156d998SAlex Elder 17514156d998SAlex Elder return ondisk; 17524156d998SAlex Elder 17534156d998SAlex Elder out_err: 17544156d998SAlex Elder kfree(ondisk); 17554156d998SAlex Elder 17564156d998SAlex Elder return ERR_PTR(ret); 17574156d998SAlex Elder } 17584156d998SAlex Elder 17594156d998SAlex Elder /* 1760602adf40SYehuda Sadeh * reload the ondisk the header 1761602adf40SYehuda Sadeh */ 1762602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 1763602adf40SYehuda Sadeh struct rbd_image_header *header) 1764602adf40SYehuda Sadeh { 17654156d998SAlex Elder struct rbd_image_header_ondisk *ondisk; 17664156d998SAlex Elder u64 ver = 0; 17674156d998SAlex Elder int ret; 1768602adf40SYehuda Sadeh 17694156d998SAlex Elder ondisk = rbd_dev_v1_header_read(rbd_dev, &ver); 17704156d998SAlex Elder if (IS_ERR(ondisk)) 17714156d998SAlex Elder return PTR_ERR(ondisk); 17724156d998SAlex Elder ret = rbd_header_from_disk(header, ondisk); 17734156d998SAlex Elder if (ret >= 0) 177459c2be1eSYehuda Sadeh header->obj_version = ver; 17754156d998SAlex Elder kfree(ondisk); 1776602adf40SYehuda Sadeh 17774156d998SAlex Elder return ret; 1778602adf40SYehuda Sadeh } 1779602adf40SYehuda Sadeh 178041f38c2bSAlex Elder static void rbd_remove_all_snaps(struct rbd_device *rbd_dev) 1781dfc5606dSYehuda Sadeh { 1782dfc5606dSYehuda Sadeh struct rbd_snap *snap; 1783a0593290SAlex Elder struct rbd_snap *next; 1784dfc5606dSYehuda Sadeh 1785a0593290SAlex Elder list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) 178641f38c2bSAlex Elder rbd_remove_snap_dev(snap); 1787dfc5606dSYehuda Sadeh } 1788dfc5606dSYehuda Sadeh 17899478554aSAlex Elder static void rbd_update_mapping_size(struct rbd_device *rbd_dev) 17909478554aSAlex Elder { 17919478554aSAlex Elder sector_t size; 17929478554aSAlex Elder 17930d7dbfceSAlex Elder if (rbd_dev->spec->snap_id != CEPH_NOSNAP) 17949478554aSAlex Elder return; 17959478554aSAlex Elder 17969478554aSAlex Elder size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; 17979478554aSAlex Elder dout("setting size to %llu sectors", (unsigned long long) size); 17989478554aSAlex Elder rbd_dev->mapping.size = (u64) size; 17999478554aSAlex Elder set_capacity(rbd_dev->disk, size); 18009478554aSAlex Elder } 18019478554aSAlex Elder 1802602adf40SYehuda Sadeh /* 1803602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 1804602adf40SYehuda Sadeh */ 1805117973fbSAlex Elder static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) 1806602adf40SYehuda Sadeh { 1807602adf40SYehuda Sadeh int ret; 1808602adf40SYehuda Sadeh struct rbd_image_header h; 1809602adf40SYehuda Sadeh 1810602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 1811602adf40SYehuda Sadeh if (ret < 0) 1812602adf40SYehuda Sadeh return ret; 1813602adf40SYehuda Sadeh 1814a51aa0c0SJosh Durgin down_write(&rbd_dev->header_rwsem); 1815a51aa0c0SJosh Durgin 18169478554aSAlex Elder /* Update image size, and check for resize of mapped image */ 18179478554aSAlex Elder rbd_dev->header.image_size = h.image_size; 18189478554aSAlex Elder rbd_update_mapping_size(rbd_dev); 18199db4b3e3SSage Weil 1820849b4260SAlex Elder /* rbd_dev->header.object_prefix shouldn't change */ 1821602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 1822849b4260SAlex Elder kfree(rbd_dev->header.snap_names); 1823d1d25646SJosh Durgin /* osd requests may still refer to snapc */ 1824d1d25646SJosh Durgin ceph_put_snap_context(rbd_dev->header.snapc); 1825602adf40SYehuda Sadeh 1826b813623aSAlex Elder if (hver) 1827b813623aSAlex Elder *hver = h.obj_version; 1828a71b891bSJosh Durgin rbd_dev->header.obj_version = h.obj_version; 182993a24e08SJosh Durgin rbd_dev->header.image_size = h.image_size; 1830602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 1831602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 1832602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 1833849b4260SAlex Elder /* Free the extra copy of the object prefix */ 1834849b4260SAlex Elder WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); 1835849b4260SAlex Elder kfree(h.object_prefix); 1836849b4260SAlex Elder 1837304f6808SAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 1838304f6808SAlex Elder if (!ret) 1839304f6808SAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 1840dfc5606dSYehuda Sadeh 1841c666601aSJosh Durgin up_write(&rbd_dev->header_rwsem); 1842602adf40SYehuda Sadeh 1843dfc5606dSYehuda Sadeh return ret; 1844602adf40SYehuda Sadeh } 1845602adf40SYehuda Sadeh 1846117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) 18471fe5e993SAlex Elder { 18481fe5e993SAlex Elder int ret; 18491fe5e993SAlex Elder 1850117973fbSAlex Elder rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 18511fe5e993SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1852117973fbSAlex Elder if (rbd_dev->image_format == 1) 1853117973fbSAlex Elder ret = rbd_dev_v1_refresh(rbd_dev, hver); 1854117973fbSAlex Elder else 1855117973fbSAlex Elder ret = rbd_dev_v2_refresh(rbd_dev, hver); 18561fe5e993SAlex Elder mutex_unlock(&ctl_mutex); 18571fe5e993SAlex Elder 18581fe5e993SAlex Elder return ret; 18591fe5e993SAlex Elder } 18601fe5e993SAlex Elder 1861602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 1862602adf40SYehuda Sadeh { 1863602adf40SYehuda Sadeh struct gendisk *disk; 1864602adf40SYehuda Sadeh struct request_queue *q; 1865593a9e7bSAlex Elder u64 segment_size; 1866602adf40SYehuda Sadeh 1867602adf40SYehuda Sadeh /* create gendisk info */ 1868602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 1869602adf40SYehuda Sadeh if (!disk) 18701fcdb8aaSAlex Elder return -ENOMEM; 1871602adf40SYehuda Sadeh 1872f0f8cef5SAlex Elder snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 1873de71a297SAlex Elder rbd_dev->dev_id); 1874602adf40SYehuda Sadeh disk->major = rbd_dev->major; 1875602adf40SYehuda Sadeh disk->first_minor = 0; 1876602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 1877602adf40SYehuda Sadeh disk->private_data = rbd_dev; 1878602adf40SYehuda Sadeh 1879602adf40SYehuda Sadeh /* init rq */ 1880602adf40SYehuda Sadeh q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); 1881602adf40SYehuda Sadeh if (!q) 1882602adf40SYehuda Sadeh goto out_disk; 1883029bcbd8SJosh Durgin 1884593a9e7bSAlex Elder /* We use the default size, but let's be explicit about it. */ 1885593a9e7bSAlex Elder blk_queue_physical_block_size(q, SECTOR_SIZE); 1886593a9e7bSAlex Elder 1887029bcbd8SJosh Durgin /* set io sizes to object size */ 1888593a9e7bSAlex Elder segment_size = rbd_obj_bytes(&rbd_dev->header); 1889593a9e7bSAlex Elder blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 1890593a9e7bSAlex Elder blk_queue_max_segment_size(q, segment_size); 1891593a9e7bSAlex Elder blk_queue_io_min(q, segment_size); 1892593a9e7bSAlex Elder blk_queue_io_opt(q, segment_size); 1893029bcbd8SJosh Durgin 1894602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 1895602adf40SYehuda Sadeh disk->queue = q; 1896602adf40SYehuda Sadeh 1897602adf40SYehuda Sadeh q->queuedata = rbd_dev; 1898602adf40SYehuda Sadeh 1899602adf40SYehuda Sadeh rbd_dev->disk = disk; 1900602adf40SYehuda Sadeh 190112f02944SAlex Elder set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); 190212f02944SAlex Elder 1903602adf40SYehuda Sadeh return 0; 1904602adf40SYehuda Sadeh out_disk: 1905602adf40SYehuda Sadeh put_disk(disk); 19061fcdb8aaSAlex Elder 19071fcdb8aaSAlex Elder return -ENOMEM; 1908602adf40SYehuda Sadeh } 1909602adf40SYehuda Sadeh 1910dfc5606dSYehuda Sadeh /* 1911dfc5606dSYehuda Sadeh sysfs 1912dfc5606dSYehuda Sadeh */ 1913602adf40SYehuda Sadeh 1914593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev) 1915593a9e7bSAlex Elder { 1916593a9e7bSAlex Elder return container_of(dev, struct rbd_device, dev); 1917593a9e7bSAlex Elder } 1918593a9e7bSAlex Elder 1919dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 1920dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1921602adf40SYehuda Sadeh { 1922593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1923a51aa0c0SJosh Durgin sector_t size; 1924dfc5606dSYehuda Sadeh 1925a51aa0c0SJosh Durgin down_read(&rbd_dev->header_rwsem); 1926a51aa0c0SJosh Durgin size = get_capacity(rbd_dev->disk); 1927a51aa0c0SJosh Durgin up_read(&rbd_dev->header_rwsem); 1928a51aa0c0SJosh Durgin 1929a51aa0c0SJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); 1930602adf40SYehuda Sadeh } 1931602adf40SYehuda Sadeh 193234b13184SAlex Elder /* 193334b13184SAlex Elder * Note this shows the features for whatever's mapped, which is not 193434b13184SAlex Elder * necessarily the base image. 193534b13184SAlex Elder */ 193634b13184SAlex Elder static ssize_t rbd_features_show(struct device *dev, 193734b13184SAlex Elder struct device_attribute *attr, char *buf) 193834b13184SAlex Elder { 193934b13184SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 194034b13184SAlex Elder 194134b13184SAlex Elder return sprintf(buf, "0x%016llx\n", 194234b13184SAlex Elder (unsigned long long) rbd_dev->mapping.features); 194334b13184SAlex Elder } 194434b13184SAlex Elder 1945dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 1946dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1947602adf40SYehuda Sadeh { 1948593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1949dfc5606dSYehuda Sadeh 1950dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 1951dfc5606dSYehuda Sadeh } 1952dfc5606dSYehuda Sadeh 1953dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 1954dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1955dfc5606dSYehuda Sadeh { 1956593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1957dfc5606dSYehuda Sadeh 19581dbb4399SAlex Elder return sprintf(buf, "client%lld\n", 19591dbb4399SAlex Elder ceph_client_id(rbd_dev->rbd_client->client)); 1960dfc5606dSYehuda Sadeh } 1961dfc5606dSYehuda Sadeh 1962dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 1963dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1964dfc5606dSYehuda Sadeh { 1965593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1966dfc5606dSYehuda Sadeh 19670d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); 1968dfc5606dSYehuda Sadeh } 1969dfc5606dSYehuda Sadeh 19709bb2f334SAlex Elder static ssize_t rbd_pool_id_show(struct device *dev, 19719bb2f334SAlex Elder struct device_attribute *attr, char *buf) 19729bb2f334SAlex Elder { 19739bb2f334SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 19749bb2f334SAlex Elder 19750d7dbfceSAlex Elder return sprintf(buf, "%llu\n", 19760d7dbfceSAlex Elder (unsigned long long) rbd_dev->spec->pool_id); 19779bb2f334SAlex Elder } 19789bb2f334SAlex Elder 1979dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 1980dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1981dfc5606dSYehuda Sadeh { 1982593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1983dfc5606dSYehuda Sadeh 19840d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->image_name); 1985dfc5606dSYehuda Sadeh } 1986dfc5606dSYehuda Sadeh 1987589d30e0SAlex Elder static ssize_t rbd_image_id_show(struct device *dev, 1988589d30e0SAlex Elder struct device_attribute *attr, char *buf) 1989589d30e0SAlex Elder { 1990589d30e0SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1991589d30e0SAlex Elder 19920d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->image_id); 1993589d30e0SAlex Elder } 1994589d30e0SAlex Elder 199534b13184SAlex Elder /* 199634b13184SAlex Elder * Shows the name of the currently-mapped snapshot (or 199734b13184SAlex Elder * RBD_SNAP_HEAD_NAME for the base image). 199834b13184SAlex Elder */ 1999dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 2000dfc5606dSYehuda Sadeh struct device_attribute *attr, 2001dfc5606dSYehuda Sadeh char *buf) 2002dfc5606dSYehuda Sadeh { 2003593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2004dfc5606dSYehuda Sadeh 20050d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); 2006dfc5606dSYehuda Sadeh } 2007dfc5606dSYehuda Sadeh 2008dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 2009dfc5606dSYehuda Sadeh struct device_attribute *attr, 2010dfc5606dSYehuda Sadeh const char *buf, 2011dfc5606dSYehuda Sadeh size_t size) 2012dfc5606dSYehuda Sadeh { 2013593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2014b813623aSAlex Elder int ret; 2015602adf40SYehuda Sadeh 2016117973fbSAlex Elder ret = rbd_dev_refresh(rbd_dev, NULL); 2017b813623aSAlex Elder 2018b813623aSAlex Elder return ret < 0 ? ret : size; 2019dfc5606dSYehuda Sadeh } 2020602adf40SYehuda Sadeh 2021dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 202234b13184SAlex Elder static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL); 2023dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 2024dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 2025dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 20269bb2f334SAlex Elder static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 2027dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 2028589d30e0SAlex Elder static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); 2029dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 2030dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 2031dfc5606dSYehuda Sadeh 2032dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 2033dfc5606dSYehuda Sadeh &dev_attr_size.attr, 203434b13184SAlex Elder &dev_attr_features.attr, 2035dfc5606dSYehuda Sadeh &dev_attr_major.attr, 2036dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 2037dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 20389bb2f334SAlex Elder &dev_attr_pool_id.attr, 2039dfc5606dSYehuda Sadeh &dev_attr_name.attr, 2040589d30e0SAlex Elder &dev_attr_image_id.attr, 2041dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 2042dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 2043dfc5606dSYehuda Sadeh NULL 2044dfc5606dSYehuda Sadeh }; 2045dfc5606dSYehuda Sadeh 2046dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 2047dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 2048dfc5606dSYehuda Sadeh }; 2049dfc5606dSYehuda Sadeh 2050dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 2051dfc5606dSYehuda Sadeh &rbd_attr_group, 2052dfc5606dSYehuda Sadeh NULL 2053dfc5606dSYehuda Sadeh }; 2054dfc5606dSYehuda Sadeh 2055dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 2056dfc5606dSYehuda Sadeh { 2057dfc5606dSYehuda Sadeh } 2058dfc5606dSYehuda Sadeh 2059dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 2060dfc5606dSYehuda Sadeh .name = "rbd", 2061dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 2062dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 2063dfc5606dSYehuda Sadeh }; 2064dfc5606dSYehuda Sadeh 2065dfc5606dSYehuda Sadeh 2066dfc5606dSYehuda Sadeh /* 2067dfc5606dSYehuda Sadeh sysfs - snapshots 2068dfc5606dSYehuda Sadeh */ 2069dfc5606dSYehuda Sadeh 2070dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 2071dfc5606dSYehuda Sadeh struct device_attribute *attr, 2072dfc5606dSYehuda Sadeh char *buf) 2073dfc5606dSYehuda Sadeh { 2074dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2075dfc5606dSYehuda Sadeh 20763591538fSJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long)snap->size); 2077dfc5606dSYehuda Sadeh } 2078dfc5606dSYehuda Sadeh 2079dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 2080dfc5606dSYehuda Sadeh struct device_attribute *attr, 2081dfc5606dSYehuda Sadeh char *buf) 2082dfc5606dSYehuda Sadeh { 2083dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2084dfc5606dSYehuda Sadeh 2085593a9e7bSAlex Elder return sprintf(buf, "%llu\n", (unsigned long long)snap->id); 2086dfc5606dSYehuda Sadeh } 2087dfc5606dSYehuda Sadeh 208834b13184SAlex Elder static ssize_t rbd_snap_features_show(struct device *dev, 208934b13184SAlex Elder struct device_attribute *attr, 209034b13184SAlex Elder char *buf) 209134b13184SAlex Elder { 209234b13184SAlex Elder struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 209334b13184SAlex Elder 209434b13184SAlex Elder return sprintf(buf, "0x%016llx\n", 209534b13184SAlex Elder (unsigned long long) snap->features); 209634b13184SAlex Elder } 209734b13184SAlex Elder 2098dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 2099dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 210034b13184SAlex Elder static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL); 2101dfc5606dSYehuda Sadeh 2102dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 2103dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 2104dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 210534b13184SAlex Elder &dev_attr_snap_features.attr, 2106dfc5606dSYehuda Sadeh NULL, 2107dfc5606dSYehuda Sadeh }; 2108dfc5606dSYehuda Sadeh 2109dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 2110dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 2111dfc5606dSYehuda Sadeh }; 2112dfc5606dSYehuda Sadeh 2113dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 2114dfc5606dSYehuda Sadeh { 2115dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2116dfc5606dSYehuda Sadeh kfree(snap->name); 2117dfc5606dSYehuda Sadeh kfree(snap); 2118dfc5606dSYehuda Sadeh } 2119dfc5606dSYehuda Sadeh 2120dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 2121dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 2122dfc5606dSYehuda Sadeh NULL 2123dfc5606dSYehuda Sadeh }; 2124dfc5606dSYehuda Sadeh 2125dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 2126dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 2127dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 2128dfc5606dSYehuda Sadeh }; 2129dfc5606dSYehuda Sadeh 21308b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) 21318b8fb99cSAlex Elder { 21328b8fb99cSAlex Elder kref_get(&spec->kref); 21338b8fb99cSAlex Elder 21348b8fb99cSAlex Elder return spec; 21358b8fb99cSAlex Elder } 21368b8fb99cSAlex Elder 21378b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref); 21388b8fb99cSAlex Elder static void rbd_spec_put(struct rbd_spec *spec) 21398b8fb99cSAlex Elder { 21408b8fb99cSAlex Elder if (spec) 21418b8fb99cSAlex Elder kref_put(&spec->kref, rbd_spec_free); 21428b8fb99cSAlex Elder } 21438b8fb99cSAlex Elder 21448b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_alloc(void) 21458b8fb99cSAlex Elder { 21468b8fb99cSAlex Elder struct rbd_spec *spec; 21478b8fb99cSAlex Elder 21488b8fb99cSAlex Elder spec = kzalloc(sizeof (*spec), GFP_KERNEL); 21498b8fb99cSAlex Elder if (!spec) 21508b8fb99cSAlex Elder return NULL; 21518b8fb99cSAlex Elder kref_init(&spec->kref); 21528b8fb99cSAlex Elder 21538b8fb99cSAlex Elder rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */ 21548b8fb99cSAlex Elder 21558b8fb99cSAlex Elder return spec; 21568b8fb99cSAlex Elder } 21578b8fb99cSAlex Elder 21588b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref) 21598b8fb99cSAlex Elder { 21608b8fb99cSAlex Elder struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); 21618b8fb99cSAlex Elder 21628b8fb99cSAlex Elder kfree(spec->pool_name); 21638b8fb99cSAlex Elder kfree(spec->image_id); 21648b8fb99cSAlex Elder kfree(spec->image_name); 21658b8fb99cSAlex Elder kfree(spec->snap_name); 21668b8fb99cSAlex Elder kfree(spec); 21678b8fb99cSAlex Elder } 21688b8fb99cSAlex Elder 2169304f6808SAlex Elder static bool rbd_snap_registered(struct rbd_snap *snap) 2170304f6808SAlex Elder { 2171304f6808SAlex Elder bool ret = snap->dev.type == &rbd_snap_device_type; 2172304f6808SAlex Elder bool reg = device_is_registered(&snap->dev); 2173304f6808SAlex Elder 2174304f6808SAlex Elder rbd_assert(!ret ^ reg); 2175304f6808SAlex Elder 2176304f6808SAlex Elder return ret; 2177304f6808SAlex Elder } 2178304f6808SAlex Elder 217941f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap) 2180dfc5606dSYehuda Sadeh { 2181dfc5606dSYehuda Sadeh list_del(&snap->node); 2182304f6808SAlex Elder if (device_is_registered(&snap->dev)) 2183dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 2184dfc5606dSYehuda Sadeh } 2185dfc5606dSYehuda Sadeh 218614e7085dSAlex Elder static int rbd_register_snap_dev(struct rbd_snap *snap, 2187dfc5606dSYehuda Sadeh struct device *parent) 2188dfc5606dSYehuda Sadeh { 2189dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 2190dfc5606dSYehuda Sadeh int ret; 2191dfc5606dSYehuda Sadeh 2192dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 2193dfc5606dSYehuda Sadeh dev->parent = parent; 2194dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 2195d4b125e9SAlex Elder dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name); 2196304f6808SAlex Elder dout("%s: registering device for snapshot %s\n", __func__, snap->name); 2197304f6808SAlex Elder 2198dfc5606dSYehuda Sadeh ret = device_register(dev); 2199dfc5606dSYehuda Sadeh 2200dfc5606dSYehuda Sadeh return ret; 2201dfc5606dSYehuda Sadeh } 2202dfc5606dSYehuda Sadeh 22034e891e0aSAlex Elder static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, 2204c8d18425SAlex Elder const char *snap_name, 220534b13184SAlex Elder u64 snap_id, u64 snap_size, 220634b13184SAlex Elder u64 snap_features) 2207dfc5606dSYehuda Sadeh { 22084e891e0aSAlex Elder struct rbd_snap *snap; 2209dfc5606dSYehuda Sadeh int ret; 22104e891e0aSAlex Elder 22114e891e0aSAlex Elder snap = kzalloc(sizeof (*snap), GFP_KERNEL); 2212dfc5606dSYehuda Sadeh if (!snap) 22134e891e0aSAlex Elder return ERR_PTR(-ENOMEM); 22144e891e0aSAlex Elder 22154e891e0aSAlex Elder ret = -ENOMEM; 2216c8d18425SAlex Elder snap->name = kstrdup(snap_name, GFP_KERNEL); 22174e891e0aSAlex Elder if (!snap->name) 22184e891e0aSAlex Elder goto err; 22194e891e0aSAlex Elder 2220c8d18425SAlex Elder snap->id = snap_id; 2221c8d18425SAlex Elder snap->size = snap_size; 222234b13184SAlex Elder snap->features = snap_features; 22234e891e0aSAlex Elder 22244e891e0aSAlex Elder return snap; 22254e891e0aSAlex Elder 2226dfc5606dSYehuda Sadeh err: 2227dfc5606dSYehuda Sadeh kfree(snap->name); 2228dfc5606dSYehuda Sadeh kfree(snap); 22294e891e0aSAlex Elder 22304e891e0aSAlex Elder return ERR_PTR(ret); 2231dfc5606dSYehuda Sadeh } 2232dfc5606dSYehuda Sadeh 2233cd892126SAlex Elder static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which, 2234cd892126SAlex Elder u64 *snap_size, u64 *snap_features) 2235cd892126SAlex Elder { 2236cd892126SAlex Elder char *snap_name; 2237cd892126SAlex Elder 2238cd892126SAlex Elder rbd_assert(which < rbd_dev->header.snapc->num_snaps); 2239cd892126SAlex Elder 2240cd892126SAlex Elder *snap_size = rbd_dev->header.snap_sizes[which]; 2241cd892126SAlex Elder *snap_features = 0; /* No features for v1 */ 2242cd892126SAlex Elder 2243cd892126SAlex Elder /* Skip over names until we find the one we are looking for */ 2244cd892126SAlex Elder 2245cd892126SAlex Elder snap_name = rbd_dev->header.snap_names; 2246cd892126SAlex Elder while (which--) 2247cd892126SAlex Elder snap_name += strlen(snap_name) + 1; 2248cd892126SAlex Elder 2249cd892126SAlex Elder return snap_name; 2250cd892126SAlex Elder } 2251cd892126SAlex Elder 2252dfc5606dSYehuda Sadeh /* 22539d475de5SAlex Elder * Get the size and object order for an image snapshot, or if 22549d475de5SAlex Elder * snap_id is CEPH_NOSNAP, gets this information for the base 22559d475de5SAlex Elder * image. 22569d475de5SAlex Elder */ 22579d475de5SAlex Elder static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, 22589d475de5SAlex Elder u8 *order, u64 *snap_size) 22599d475de5SAlex Elder { 22609d475de5SAlex Elder __le64 snapid = cpu_to_le64(snap_id); 22619d475de5SAlex Elder int ret; 22629d475de5SAlex Elder struct { 22639d475de5SAlex Elder u8 order; 22649d475de5SAlex Elder __le64 size; 22659d475de5SAlex Elder } __attribute__ ((packed)) size_buf = { 0 }; 22669d475de5SAlex Elder 22679d475de5SAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 22689d475de5SAlex Elder "rbd", "get_size", 22699d475de5SAlex Elder (char *) &snapid, sizeof (snapid), 22709d475de5SAlex Elder (char *) &size_buf, sizeof (size_buf), 22719d475de5SAlex Elder CEPH_OSD_FLAG_READ, NULL); 22729d475de5SAlex Elder dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); 22739d475de5SAlex Elder if (ret < 0) 22749d475de5SAlex Elder return ret; 22759d475de5SAlex Elder 22769d475de5SAlex Elder *order = size_buf.order; 22779d475de5SAlex Elder *snap_size = le64_to_cpu(size_buf.size); 22789d475de5SAlex Elder 22799d475de5SAlex Elder dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n", 22809d475de5SAlex Elder (unsigned long long) snap_id, (unsigned int) *order, 22819d475de5SAlex Elder (unsigned long long) *snap_size); 22829d475de5SAlex Elder 22839d475de5SAlex Elder return 0; 22849d475de5SAlex Elder } 22859d475de5SAlex Elder 22869d475de5SAlex Elder static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) 22879d475de5SAlex Elder { 22889d475de5SAlex Elder return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, 22899d475de5SAlex Elder &rbd_dev->header.obj_order, 22909d475de5SAlex Elder &rbd_dev->header.image_size); 22919d475de5SAlex Elder } 22929d475de5SAlex Elder 22931e130199SAlex Elder static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) 22941e130199SAlex Elder { 22951e130199SAlex Elder void *reply_buf; 22961e130199SAlex Elder int ret; 22971e130199SAlex Elder void *p; 22981e130199SAlex Elder 22991e130199SAlex Elder reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL); 23001e130199SAlex Elder if (!reply_buf) 23011e130199SAlex Elder return -ENOMEM; 23021e130199SAlex Elder 23031e130199SAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 23041e130199SAlex Elder "rbd", "get_object_prefix", 23051e130199SAlex Elder NULL, 0, 23061e130199SAlex Elder reply_buf, RBD_OBJ_PREFIX_LEN_MAX, 23071e130199SAlex Elder CEPH_OSD_FLAG_READ, NULL); 23081e130199SAlex Elder dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); 23091e130199SAlex Elder if (ret < 0) 23101e130199SAlex Elder goto out; 2311a0ea3a40SAlex Elder ret = 0; /* rbd_req_sync_exec() can return positive */ 23121e130199SAlex Elder 23131e130199SAlex Elder p = reply_buf; 23141e130199SAlex Elder rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, 23151e130199SAlex Elder p + RBD_OBJ_PREFIX_LEN_MAX, 23161e130199SAlex Elder NULL, GFP_NOIO); 23171e130199SAlex Elder 23181e130199SAlex Elder if (IS_ERR(rbd_dev->header.object_prefix)) { 23191e130199SAlex Elder ret = PTR_ERR(rbd_dev->header.object_prefix); 23201e130199SAlex Elder rbd_dev->header.object_prefix = NULL; 23211e130199SAlex Elder } else { 23221e130199SAlex Elder dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); 23231e130199SAlex Elder } 23241e130199SAlex Elder 23251e130199SAlex Elder out: 23261e130199SAlex Elder kfree(reply_buf); 23271e130199SAlex Elder 23281e130199SAlex Elder return ret; 23291e130199SAlex Elder } 23301e130199SAlex Elder 2331b1b5402aSAlex Elder static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, 2332b1b5402aSAlex Elder u64 *snap_features) 2333b1b5402aSAlex Elder { 2334b1b5402aSAlex Elder __le64 snapid = cpu_to_le64(snap_id); 2335b1b5402aSAlex Elder struct { 2336b1b5402aSAlex Elder __le64 features; 2337b1b5402aSAlex Elder __le64 incompat; 2338b1b5402aSAlex Elder } features_buf = { 0 }; 2339d889140cSAlex Elder u64 incompat; 2340b1b5402aSAlex Elder int ret; 2341b1b5402aSAlex Elder 2342b1b5402aSAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 2343b1b5402aSAlex Elder "rbd", "get_features", 2344b1b5402aSAlex Elder (char *) &snapid, sizeof (snapid), 2345b1b5402aSAlex Elder (char *) &features_buf, sizeof (features_buf), 2346b1b5402aSAlex Elder CEPH_OSD_FLAG_READ, NULL); 2347b1b5402aSAlex Elder dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); 2348b1b5402aSAlex Elder if (ret < 0) 2349b1b5402aSAlex Elder return ret; 2350d889140cSAlex Elder 2351d889140cSAlex Elder incompat = le64_to_cpu(features_buf.incompat); 2352d889140cSAlex Elder if (incompat & ~RBD_FEATURES_ALL) 2353d889140cSAlex Elder return -ENOTSUPP; 2354d889140cSAlex Elder 2355b1b5402aSAlex Elder *snap_features = le64_to_cpu(features_buf.features); 2356b1b5402aSAlex Elder 2357b1b5402aSAlex Elder dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", 2358b1b5402aSAlex Elder (unsigned long long) snap_id, 2359b1b5402aSAlex Elder (unsigned long long) *snap_features, 2360b1b5402aSAlex Elder (unsigned long long) le64_to_cpu(features_buf.incompat)); 2361b1b5402aSAlex Elder 2362b1b5402aSAlex Elder return 0; 2363b1b5402aSAlex Elder } 2364b1b5402aSAlex Elder 2365b1b5402aSAlex Elder static int rbd_dev_v2_features(struct rbd_device *rbd_dev) 2366b1b5402aSAlex Elder { 2367b1b5402aSAlex Elder return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, 2368b1b5402aSAlex Elder &rbd_dev->header.features); 2369b1b5402aSAlex Elder } 2370b1b5402aSAlex Elder 23716e14b1a6SAlex Elder static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) 237235d489f9SAlex Elder { 237335d489f9SAlex Elder size_t size; 237435d489f9SAlex Elder int ret; 237535d489f9SAlex Elder void *reply_buf; 237635d489f9SAlex Elder void *p; 237735d489f9SAlex Elder void *end; 237835d489f9SAlex Elder u64 seq; 237935d489f9SAlex Elder u32 snap_count; 238035d489f9SAlex Elder struct ceph_snap_context *snapc; 238135d489f9SAlex Elder u32 i; 238235d489f9SAlex Elder 238335d489f9SAlex Elder /* 238435d489f9SAlex Elder * We'll need room for the seq value (maximum snapshot id), 238535d489f9SAlex Elder * snapshot count, and array of that many snapshot ids. 238635d489f9SAlex Elder * For now we have a fixed upper limit on the number we're 238735d489f9SAlex Elder * prepared to receive. 238835d489f9SAlex Elder */ 238935d489f9SAlex Elder size = sizeof (__le64) + sizeof (__le32) + 239035d489f9SAlex Elder RBD_MAX_SNAP_COUNT * sizeof (__le64); 239135d489f9SAlex Elder reply_buf = kzalloc(size, GFP_KERNEL); 239235d489f9SAlex Elder if (!reply_buf) 239335d489f9SAlex Elder return -ENOMEM; 239435d489f9SAlex Elder 239535d489f9SAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 239635d489f9SAlex Elder "rbd", "get_snapcontext", 239735d489f9SAlex Elder NULL, 0, 239835d489f9SAlex Elder reply_buf, size, 23996e14b1a6SAlex Elder CEPH_OSD_FLAG_READ, ver); 240035d489f9SAlex Elder dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); 240135d489f9SAlex Elder if (ret < 0) 240235d489f9SAlex Elder goto out; 240335d489f9SAlex Elder 240435d489f9SAlex Elder ret = -ERANGE; 240535d489f9SAlex Elder p = reply_buf; 240635d489f9SAlex Elder end = (char *) reply_buf + size; 240735d489f9SAlex Elder ceph_decode_64_safe(&p, end, seq, out); 240835d489f9SAlex Elder ceph_decode_32_safe(&p, end, snap_count, out); 240935d489f9SAlex Elder 241035d489f9SAlex Elder /* 241135d489f9SAlex Elder * Make sure the reported number of snapshot ids wouldn't go 241235d489f9SAlex Elder * beyond the end of our buffer. But before checking that, 241335d489f9SAlex Elder * make sure the computed size of the snapshot context we 241435d489f9SAlex Elder * allocate is representable in a size_t. 241535d489f9SAlex Elder */ 241635d489f9SAlex Elder if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context)) 241735d489f9SAlex Elder / sizeof (u64)) { 241835d489f9SAlex Elder ret = -EINVAL; 241935d489f9SAlex Elder goto out; 242035d489f9SAlex Elder } 242135d489f9SAlex Elder if (!ceph_has_room(&p, end, snap_count * sizeof (__le64))) 242235d489f9SAlex Elder goto out; 242335d489f9SAlex Elder 242435d489f9SAlex Elder size = sizeof (struct ceph_snap_context) + 242535d489f9SAlex Elder snap_count * sizeof (snapc->snaps[0]); 242635d489f9SAlex Elder snapc = kmalloc(size, GFP_KERNEL); 242735d489f9SAlex Elder if (!snapc) { 242835d489f9SAlex Elder ret = -ENOMEM; 242935d489f9SAlex Elder goto out; 243035d489f9SAlex Elder } 243135d489f9SAlex Elder 243235d489f9SAlex Elder atomic_set(&snapc->nref, 1); 243335d489f9SAlex Elder snapc->seq = seq; 243435d489f9SAlex Elder snapc->num_snaps = snap_count; 243535d489f9SAlex Elder for (i = 0; i < snap_count; i++) 243635d489f9SAlex Elder snapc->snaps[i] = ceph_decode_64(&p); 243735d489f9SAlex Elder 243835d489f9SAlex Elder rbd_dev->header.snapc = snapc; 243935d489f9SAlex Elder 244035d489f9SAlex Elder dout(" snap context seq = %llu, snap_count = %u\n", 244135d489f9SAlex Elder (unsigned long long) seq, (unsigned int) snap_count); 244235d489f9SAlex Elder 244335d489f9SAlex Elder out: 244435d489f9SAlex Elder kfree(reply_buf); 244535d489f9SAlex Elder 244635d489f9SAlex Elder return 0; 244735d489f9SAlex Elder } 244835d489f9SAlex Elder 2449b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) 2450b8b1e2dbSAlex Elder { 2451b8b1e2dbSAlex Elder size_t size; 2452b8b1e2dbSAlex Elder void *reply_buf; 2453b8b1e2dbSAlex Elder __le64 snap_id; 2454b8b1e2dbSAlex Elder int ret; 2455b8b1e2dbSAlex Elder void *p; 2456b8b1e2dbSAlex Elder void *end; 2457b8b1e2dbSAlex Elder char *snap_name; 2458b8b1e2dbSAlex Elder 2459b8b1e2dbSAlex Elder size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; 2460b8b1e2dbSAlex Elder reply_buf = kmalloc(size, GFP_KERNEL); 2461b8b1e2dbSAlex Elder if (!reply_buf) 2462b8b1e2dbSAlex Elder return ERR_PTR(-ENOMEM); 2463b8b1e2dbSAlex Elder 2464b8b1e2dbSAlex Elder snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); 2465b8b1e2dbSAlex Elder ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, 2466b8b1e2dbSAlex Elder "rbd", "get_snapshot_name", 2467b8b1e2dbSAlex Elder (char *) &snap_id, sizeof (snap_id), 2468b8b1e2dbSAlex Elder reply_buf, size, 2469b8b1e2dbSAlex Elder CEPH_OSD_FLAG_READ, NULL); 2470b8b1e2dbSAlex Elder dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); 2471b8b1e2dbSAlex Elder if (ret < 0) 2472b8b1e2dbSAlex Elder goto out; 2473b8b1e2dbSAlex Elder 2474b8b1e2dbSAlex Elder p = reply_buf; 2475b8b1e2dbSAlex Elder end = (char *) reply_buf + size; 2476e5c35534SAlex Elder snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 2477b8b1e2dbSAlex Elder if (IS_ERR(snap_name)) { 2478b8b1e2dbSAlex Elder ret = PTR_ERR(snap_name); 2479b8b1e2dbSAlex Elder goto out; 2480b8b1e2dbSAlex Elder } else { 2481b8b1e2dbSAlex Elder dout(" snap_id 0x%016llx snap_name = %s\n", 2482b8b1e2dbSAlex Elder (unsigned long long) le64_to_cpu(snap_id), snap_name); 2483b8b1e2dbSAlex Elder } 2484b8b1e2dbSAlex Elder kfree(reply_buf); 2485b8b1e2dbSAlex Elder 2486b8b1e2dbSAlex Elder return snap_name; 2487b8b1e2dbSAlex Elder out: 2488b8b1e2dbSAlex Elder kfree(reply_buf); 2489b8b1e2dbSAlex Elder 2490b8b1e2dbSAlex Elder return ERR_PTR(ret); 2491b8b1e2dbSAlex Elder } 2492b8b1e2dbSAlex Elder 2493b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, 2494b8b1e2dbSAlex Elder u64 *snap_size, u64 *snap_features) 2495b8b1e2dbSAlex Elder { 2496b8b1e2dbSAlex Elder __le64 snap_id; 2497b8b1e2dbSAlex Elder u8 order; 2498b8b1e2dbSAlex Elder int ret; 2499b8b1e2dbSAlex Elder 2500b8b1e2dbSAlex Elder snap_id = rbd_dev->header.snapc->snaps[which]; 2501b8b1e2dbSAlex Elder ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size); 2502b8b1e2dbSAlex Elder if (ret) 2503b8b1e2dbSAlex Elder return ERR_PTR(ret); 2504b8b1e2dbSAlex Elder ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features); 2505b8b1e2dbSAlex Elder if (ret) 2506b8b1e2dbSAlex Elder return ERR_PTR(ret); 2507b8b1e2dbSAlex Elder 2508b8b1e2dbSAlex Elder return rbd_dev_v2_snap_name(rbd_dev, which); 2509b8b1e2dbSAlex Elder } 2510b8b1e2dbSAlex Elder 2511b8b1e2dbSAlex Elder static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, 2512b8b1e2dbSAlex Elder u64 *snap_size, u64 *snap_features) 2513b8b1e2dbSAlex Elder { 2514b8b1e2dbSAlex Elder if (rbd_dev->image_format == 1) 2515b8b1e2dbSAlex Elder return rbd_dev_v1_snap_info(rbd_dev, which, 2516b8b1e2dbSAlex Elder snap_size, snap_features); 2517b8b1e2dbSAlex Elder if (rbd_dev->image_format == 2) 2518b8b1e2dbSAlex Elder return rbd_dev_v2_snap_info(rbd_dev, which, 2519b8b1e2dbSAlex Elder snap_size, snap_features); 2520b8b1e2dbSAlex Elder return ERR_PTR(-EINVAL); 2521b8b1e2dbSAlex Elder } 2522b8b1e2dbSAlex Elder 2523117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) 2524117973fbSAlex Elder { 2525117973fbSAlex Elder int ret; 2526117973fbSAlex Elder __u8 obj_order; 2527117973fbSAlex Elder 2528117973fbSAlex Elder down_write(&rbd_dev->header_rwsem); 2529117973fbSAlex Elder 2530117973fbSAlex Elder /* Grab old order first, to see if it changes */ 2531117973fbSAlex Elder 2532117973fbSAlex Elder obj_order = rbd_dev->header.obj_order, 2533117973fbSAlex Elder ret = rbd_dev_v2_image_size(rbd_dev); 2534117973fbSAlex Elder if (ret) 2535117973fbSAlex Elder goto out; 2536117973fbSAlex Elder if (rbd_dev->header.obj_order != obj_order) { 2537117973fbSAlex Elder ret = -EIO; 2538117973fbSAlex Elder goto out; 2539117973fbSAlex Elder } 2540117973fbSAlex Elder rbd_update_mapping_size(rbd_dev); 2541117973fbSAlex Elder 2542117973fbSAlex Elder ret = rbd_dev_v2_snap_context(rbd_dev, hver); 2543117973fbSAlex Elder dout("rbd_dev_v2_snap_context returned %d\n", ret); 2544117973fbSAlex Elder if (ret) 2545117973fbSAlex Elder goto out; 2546117973fbSAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 2547117973fbSAlex Elder dout("rbd_dev_snaps_update returned %d\n", ret); 2548117973fbSAlex Elder if (ret) 2549117973fbSAlex Elder goto out; 2550117973fbSAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 2551117973fbSAlex Elder dout("rbd_dev_snaps_register returned %d\n", ret); 2552117973fbSAlex Elder out: 2553117973fbSAlex Elder up_write(&rbd_dev->header_rwsem); 2554117973fbSAlex Elder 2555117973fbSAlex Elder return ret; 2556117973fbSAlex Elder } 2557117973fbSAlex Elder 25589d475de5SAlex Elder /* 255935938150SAlex Elder * Scan the rbd device's current snapshot list and compare it to the 256035938150SAlex Elder * newly-received snapshot context. Remove any existing snapshots 256135938150SAlex Elder * not present in the new snapshot context. Add a new snapshot for 256235938150SAlex Elder * any snaphots in the snapshot context not in the current list. 256335938150SAlex Elder * And verify there are no changes to snapshots we already know 256435938150SAlex Elder * about. 256535938150SAlex Elder * 256635938150SAlex Elder * Assumes the snapshots in the snapshot context are sorted by 256735938150SAlex Elder * snapshot id, highest id first. (Snapshots in the rbd_dev's list 256835938150SAlex Elder * are also maintained in that order.) 2569dfc5606dSYehuda Sadeh */ 2570304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) 2571dfc5606dSYehuda Sadeh { 257235938150SAlex Elder struct ceph_snap_context *snapc = rbd_dev->header.snapc; 257335938150SAlex Elder const u32 snap_count = snapc->num_snaps; 257435938150SAlex Elder struct list_head *head = &rbd_dev->snaps; 257535938150SAlex Elder struct list_head *links = head->next; 257635938150SAlex Elder u32 index = 0; 2577dfc5606dSYehuda Sadeh 25789fcbb800SAlex Elder dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count); 257935938150SAlex Elder while (index < snap_count || links != head) { 258035938150SAlex Elder u64 snap_id; 258135938150SAlex Elder struct rbd_snap *snap; 2582cd892126SAlex Elder char *snap_name; 2583cd892126SAlex Elder u64 snap_size = 0; 2584cd892126SAlex Elder u64 snap_features = 0; 2585dfc5606dSYehuda Sadeh 258635938150SAlex Elder snap_id = index < snap_count ? snapc->snaps[index] 258735938150SAlex Elder : CEPH_NOSNAP; 258835938150SAlex Elder snap = links != head ? list_entry(links, struct rbd_snap, node) 258935938150SAlex Elder : NULL; 2590aafb230eSAlex Elder rbd_assert(!snap || snap->id != CEPH_NOSNAP); 2591dfc5606dSYehuda Sadeh 259235938150SAlex Elder if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { 259335938150SAlex Elder struct list_head *next = links->next; 2594dfc5606dSYehuda Sadeh 259535938150SAlex Elder /* Existing snapshot not in the new snap context */ 2596dfc5606dSYehuda Sadeh 25970d7dbfceSAlex Elder if (rbd_dev->spec->snap_id == snap->id) 2598daba5fdbSAlex Elder rbd_dev->exists = false; 259941f38c2bSAlex Elder rbd_remove_snap_dev(snap); 26009fcbb800SAlex Elder dout("%ssnap id %llu has been removed\n", 26010d7dbfceSAlex Elder rbd_dev->spec->snap_id == snap->id ? 26020d7dbfceSAlex Elder "mapped " : "", 26039fcbb800SAlex Elder (unsigned long long) snap->id); 2604dfc5606dSYehuda Sadeh 260535938150SAlex Elder /* Done with this list entry; advance */ 260635938150SAlex Elder 260735938150SAlex Elder links = next; 260835938150SAlex Elder continue; 2609dfc5606dSYehuda Sadeh } 261035938150SAlex Elder 2611b8b1e2dbSAlex Elder snap_name = rbd_dev_snap_info(rbd_dev, index, 2612cd892126SAlex Elder &snap_size, &snap_features); 2613cd892126SAlex Elder if (IS_ERR(snap_name)) 2614cd892126SAlex Elder return PTR_ERR(snap_name); 2615cd892126SAlex Elder 26169fcbb800SAlex Elder dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count, 26179fcbb800SAlex Elder (unsigned long long) snap_id); 261835938150SAlex Elder if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) { 261935938150SAlex Elder struct rbd_snap *new_snap; 262035938150SAlex Elder 262135938150SAlex Elder /* We haven't seen this snapshot before */ 262235938150SAlex Elder 2623c8d18425SAlex Elder new_snap = __rbd_add_snap_dev(rbd_dev, snap_name, 2624cd892126SAlex Elder snap_id, snap_size, snap_features); 26259fcbb800SAlex Elder if (IS_ERR(new_snap)) { 26269fcbb800SAlex Elder int err = PTR_ERR(new_snap); 26279fcbb800SAlex Elder 26289fcbb800SAlex Elder dout(" failed to add dev, error %d\n", err); 26299fcbb800SAlex Elder 26309fcbb800SAlex Elder return err; 26319fcbb800SAlex Elder } 263235938150SAlex Elder 263335938150SAlex Elder /* New goes before existing, or at end of list */ 263435938150SAlex Elder 26359fcbb800SAlex Elder dout(" added dev%s\n", snap ? "" : " at end\n"); 263635938150SAlex Elder if (snap) 263735938150SAlex Elder list_add_tail(&new_snap->node, &snap->node); 263835938150SAlex Elder else 2639523f3258SAlex Elder list_add_tail(&new_snap->node, head); 264035938150SAlex Elder } else { 264135938150SAlex Elder /* Already have this one */ 264235938150SAlex Elder 26439fcbb800SAlex Elder dout(" already present\n"); 26449fcbb800SAlex Elder 2645cd892126SAlex Elder rbd_assert(snap->size == snap_size); 2646aafb230eSAlex Elder rbd_assert(!strcmp(snap->name, snap_name)); 2647cd892126SAlex Elder rbd_assert(snap->features == snap_features); 264835938150SAlex Elder 264935938150SAlex Elder /* Done with this list entry; advance */ 265035938150SAlex Elder 265135938150SAlex Elder links = links->next; 2652dfc5606dSYehuda Sadeh } 265335938150SAlex Elder 265435938150SAlex Elder /* Advance to the next entry in the snapshot context */ 265535938150SAlex Elder 265635938150SAlex Elder index++; 2657dfc5606dSYehuda Sadeh } 26589fcbb800SAlex Elder dout("%s: done\n", __func__); 2659dfc5606dSYehuda Sadeh 2660dfc5606dSYehuda Sadeh return 0; 2661dfc5606dSYehuda Sadeh } 2662dfc5606dSYehuda Sadeh 2663304f6808SAlex Elder /* 2664304f6808SAlex Elder * Scan the list of snapshots and register the devices for any that 2665304f6808SAlex Elder * have not already been registered. 2666304f6808SAlex Elder */ 2667304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) 2668304f6808SAlex Elder { 2669304f6808SAlex Elder struct rbd_snap *snap; 2670304f6808SAlex Elder int ret = 0; 2671304f6808SAlex Elder 2672304f6808SAlex Elder dout("%s called\n", __func__); 267386ff77bbSAlex Elder if (WARN_ON(!device_is_registered(&rbd_dev->dev))) 267486ff77bbSAlex Elder return -EIO; 2675304f6808SAlex Elder 2676304f6808SAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) { 2677304f6808SAlex Elder if (!rbd_snap_registered(snap)) { 2678304f6808SAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 2679304f6808SAlex Elder if (ret < 0) 2680304f6808SAlex Elder break; 2681304f6808SAlex Elder } 2682304f6808SAlex Elder } 2683304f6808SAlex Elder dout("%s: returning %d\n", __func__, ret); 2684304f6808SAlex Elder 2685304f6808SAlex Elder return ret; 2686304f6808SAlex Elder } 2687304f6808SAlex Elder 2688dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2689dfc5606dSYehuda Sadeh { 2690dfc5606dSYehuda Sadeh struct device *dev; 2691cd789ab9SAlex Elder int ret; 2692dfc5606dSYehuda Sadeh 2693dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2694dfc5606dSYehuda Sadeh 2695cd789ab9SAlex Elder dev = &rbd_dev->dev; 2696dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 2697dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 2698dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 2699dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 2700de71a297SAlex Elder dev_set_name(dev, "%d", rbd_dev->dev_id); 2701dfc5606dSYehuda Sadeh ret = device_register(dev); 2702dfc5606dSYehuda Sadeh 2703dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 2704cd789ab9SAlex Elder 2705dfc5606dSYehuda Sadeh return ret; 2706602adf40SYehuda Sadeh } 2707602adf40SYehuda Sadeh 2708dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 2709dfc5606dSYehuda Sadeh { 2710dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 2711dfc5606dSYehuda Sadeh } 2712dfc5606dSYehuda Sadeh 271359c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev) 271459c2be1eSYehuda Sadeh { 271559c2be1eSYehuda Sadeh int ret, rc; 271659c2be1eSYehuda Sadeh 271759c2be1eSYehuda Sadeh do { 27180e6f322dSAlex Elder ret = rbd_req_sync_watch(rbd_dev); 271959c2be1eSYehuda Sadeh if (ret == -ERANGE) { 2720117973fbSAlex Elder rc = rbd_dev_refresh(rbd_dev, NULL); 272159c2be1eSYehuda Sadeh if (rc < 0) 272259c2be1eSYehuda Sadeh return rc; 272359c2be1eSYehuda Sadeh } 272459c2be1eSYehuda Sadeh } while (ret == -ERANGE); 272559c2be1eSYehuda Sadeh 272659c2be1eSYehuda Sadeh return ret; 272759c2be1eSYehuda Sadeh } 272859c2be1eSYehuda Sadeh 2729e2839308SAlex Elder static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); 27301ddbe94eSAlex Elder 27311ddbe94eSAlex Elder /* 2732499afd5bSAlex Elder * Get a unique rbd identifier for the given new rbd_dev, and add 2733499afd5bSAlex Elder * the rbd_dev to the global list. The minimum rbd id is 1. 27341ddbe94eSAlex Elder */ 2735e2839308SAlex Elder static void rbd_dev_id_get(struct rbd_device *rbd_dev) 2736b7f23c36SAlex Elder { 2737e2839308SAlex Elder rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max); 2738499afd5bSAlex Elder 2739499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2740499afd5bSAlex Elder list_add_tail(&rbd_dev->node, &rbd_dev_list); 2741499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 2742e2839308SAlex Elder dout("rbd_dev %p given dev id %llu\n", rbd_dev, 2743e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 2744b7f23c36SAlex Elder } 2745b7f23c36SAlex Elder 27461ddbe94eSAlex Elder /* 2747499afd5bSAlex Elder * Remove an rbd_dev from the global list, and record that its 2748499afd5bSAlex Elder * identifier is no longer in use. 27491ddbe94eSAlex Elder */ 2750e2839308SAlex Elder static void rbd_dev_id_put(struct rbd_device *rbd_dev) 27511ddbe94eSAlex Elder { 2752d184f6bfSAlex Elder struct list_head *tmp; 2753de71a297SAlex Elder int rbd_id = rbd_dev->dev_id; 2754d184f6bfSAlex Elder int max_id; 2755d184f6bfSAlex Elder 2756aafb230eSAlex Elder rbd_assert(rbd_id > 0); 2757499afd5bSAlex Elder 2758e2839308SAlex Elder dout("rbd_dev %p released dev id %llu\n", rbd_dev, 2759e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 2760499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2761499afd5bSAlex Elder list_del_init(&rbd_dev->node); 2762d184f6bfSAlex Elder 2763d184f6bfSAlex Elder /* 2764d184f6bfSAlex Elder * If the id being "put" is not the current maximum, there 2765d184f6bfSAlex Elder * is nothing special we need to do. 2766d184f6bfSAlex Elder */ 2767e2839308SAlex Elder if (rbd_id != atomic64_read(&rbd_dev_id_max)) { 2768d184f6bfSAlex Elder spin_unlock(&rbd_dev_list_lock); 2769d184f6bfSAlex Elder return; 2770d184f6bfSAlex Elder } 2771d184f6bfSAlex Elder 2772d184f6bfSAlex Elder /* 2773d184f6bfSAlex Elder * We need to update the current maximum id. Search the 2774d184f6bfSAlex Elder * list to find out what it is. We're more likely to find 2775d184f6bfSAlex Elder * the maximum at the end, so search the list backward. 2776d184f6bfSAlex Elder */ 2777d184f6bfSAlex Elder max_id = 0; 2778d184f6bfSAlex Elder list_for_each_prev(tmp, &rbd_dev_list) { 2779d184f6bfSAlex Elder struct rbd_device *rbd_dev; 2780d184f6bfSAlex Elder 2781d184f6bfSAlex Elder rbd_dev = list_entry(tmp, struct rbd_device, node); 2782b213e0b1SAlex Elder if (rbd_dev->dev_id > max_id) 2783b213e0b1SAlex Elder max_id = rbd_dev->dev_id; 2784d184f6bfSAlex Elder } 2785499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 27861ddbe94eSAlex Elder 27871ddbe94eSAlex Elder /* 2788e2839308SAlex Elder * The max id could have been updated by rbd_dev_id_get(), in 2789d184f6bfSAlex Elder * which case it now accurately reflects the new maximum. 2790d184f6bfSAlex Elder * Be careful not to overwrite the maximum value in that 2791d184f6bfSAlex Elder * case. 27921ddbe94eSAlex Elder */ 2793e2839308SAlex Elder atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id); 2794e2839308SAlex Elder dout(" max dev id has been reset\n"); 2795b7f23c36SAlex Elder } 2796b7f23c36SAlex Elder 2797a725f65eSAlex Elder /* 2798e28fff26SAlex Elder * Skips over white space at *buf, and updates *buf to point to the 2799e28fff26SAlex Elder * first found non-space character (if any). Returns the length of 2800593a9e7bSAlex Elder * the token (string of non-white space characters) found. Note 2801593a9e7bSAlex Elder * that *buf must be terminated with '\0'. 2802e28fff26SAlex Elder */ 2803e28fff26SAlex Elder static inline size_t next_token(const char **buf) 2804e28fff26SAlex Elder { 2805e28fff26SAlex Elder /* 2806e28fff26SAlex Elder * These are the characters that produce nonzero for 2807e28fff26SAlex Elder * isspace() in the "C" and "POSIX" locales. 2808e28fff26SAlex Elder */ 2809e28fff26SAlex Elder const char *spaces = " \f\n\r\t\v"; 2810e28fff26SAlex Elder 2811e28fff26SAlex Elder *buf += strspn(*buf, spaces); /* Find start of token */ 2812e28fff26SAlex Elder 2813e28fff26SAlex Elder return strcspn(*buf, spaces); /* Return token length */ 2814e28fff26SAlex Elder } 2815e28fff26SAlex Elder 2816e28fff26SAlex Elder /* 2817e28fff26SAlex Elder * Finds the next token in *buf, and if the provided token buffer is 2818e28fff26SAlex Elder * big enough, copies the found token into it. The result, if 2819593a9e7bSAlex Elder * copied, is guaranteed to be terminated with '\0'. Note that *buf 2820593a9e7bSAlex Elder * must be terminated with '\0' on entry. 2821e28fff26SAlex Elder * 2822e28fff26SAlex Elder * Returns the length of the token found (not including the '\0'). 2823e28fff26SAlex Elder * Return value will be 0 if no token is found, and it will be >= 2824e28fff26SAlex Elder * token_size if the token would not fit. 2825e28fff26SAlex Elder * 2826593a9e7bSAlex Elder * The *buf pointer will be updated to point beyond the end of the 2827e28fff26SAlex Elder * found token. Note that this occurs even if the token buffer is 2828e28fff26SAlex Elder * too small to hold it. 2829e28fff26SAlex Elder */ 2830e28fff26SAlex Elder static inline size_t copy_token(const char **buf, 2831e28fff26SAlex Elder char *token, 2832e28fff26SAlex Elder size_t token_size) 2833e28fff26SAlex Elder { 2834e28fff26SAlex Elder size_t len; 2835e28fff26SAlex Elder 2836e28fff26SAlex Elder len = next_token(buf); 2837e28fff26SAlex Elder if (len < token_size) { 2838e28fff26SAlex Elder memcpy(token, *buf, len); 2839e28fff26SAlex Elder *(token + len) = '\0'; 2840e28fff26SAlex Elder } 2841e28fff26SAlex Elder *buf += len; 2842e28fff26SAlex Elder 2843e28fff26SAlex Elder return len; 2844e28fff26SAlex Elder } 2845e28fff26SAlex Elder 2846e28fff26SAlex Elder /* 2847ea3352f4SAlex Elder * Finds the next token in *buf, dynamically allocates a buffer big 2848ea3352f4SAlex Elder * enough to hold a copy of it, and copies the token into the new 2849ea3352f4SAlex Elder * buffer. The copy is guaranteed to be terminated with '\0'. Note 2850ea3352f4SAlex Elder * that a duplicate buffer is created even for a zero-length token. 2851ea3352f4SAlex Elder * 2852ea3352f4SAlex Elder * Returns a pointer to the newly-allocated duplicate, or a null 2853ea3352f4SAlex Elder * pointer if memory for the duplicate was not available. If 2854ea3352f4SAlex Elder * the lenp argument is a non-null pointer, the length of the token 2855ea3352f4SAlex Elder * (not including the '\0') is returned in *lenp. 2856ea3352f4SAlex Elder * 2857ea3352f4SAlex Elder * If successful, the *buf pointer will be updated to point beyond 2858ea3352f4SAlex Elder * the end of the found token. 2859ea3352f4SAlex Elder * 2860ea3352f4SAlex Elder * Note: uses GFP_KERNEL for allocation. 2861ea3352f4SAlex Elder */ 2862ea3352f4SAlex Elder static inline char *dup_token(const char **buf, size_t *lenp) 2863ea3352f4SAlex Elder { 2864ea3352f4SAlex Elder char *dup; 2865ea3352f4SAlex Elder size_t len; 2866ea3352f4SAlex Elder 2867ea3352f4SAlex Elder len = next_token(buf); 2868ea3352f4SAlex Elder dup = kmalloc(len + 1, GFP_KERNEL); 2869ea3352f4SAlex Elder if (!dup) 2870ea3352f4SAlex Elder return NULL; 2871ea3352f4SAlex Elder 2872ea3352f4SAlex Elder memcpy(dup, *buf, len); 2873ea3352f4SAlex Elder *(dup + len) = '\0'; 2874ea3352f4SAlex Elder *buf += len; 2875ea3352f4SAlex Elder 2876ea3352f4SAlex Elder if (lenp) 2877ea3352f4SAlex Elder *lenp = len; 2878ea3352f4SAlex Elder 2879ea3352f4SAlex Elder return dup; 2880ea3352f4SAlex Elder } 2881ea3352f4SAlex Elder 2882ea3352f4SAlex Elder /* 2883859c31dfSAlex Elder * Parse the options provided for an "rbd add" (i.e., rbd image 2884859c31dfSAlex Elder * mapping) request. These arrive via a write to /sys/bus/rbd/add, 2885859c31dfSAlex Elder * and the data written is passed here via a NUL-terminated buffer. 2886859c31dfSAlex Elder * Returns 0 if successful or an error code otherwise. 2887d22f76e7SAlex Elder * 2888859c31dfSAlex Elder * The information extracted from these options is recorded in 2889859c31dfSAlex Elder * the other parameters which return dynamically-allocated 2890859c31dfSAlex Elder * structures: 2891859c31dfSAlex Elder * ceph_opts 2892859c31dfSAlex Elder * The address of a pointer that will refer to a ceph options 2893859c31dfSAlex Elder * structure. Caller must release the returned pointer using 2894859c31dfSAlex Elder * ceph_destroy_options() when it is no longer needed. 2895859c31dfSAlex Elder * rbd_opts 2896859c31dfSAlex Elder * Address of an rbd options pointer. Fully initialized by 2897859c31dfSAlex Elder * this function; caller must release with kfree(). 2898859c31dfSAlex Elder * spec 2899859c31dfSAlex Elder * Address of an rbd image specification pointer. Fully 2900859c31dfSAlex Elder * initialized by this function based on parsed options. 2901859c31dfSAlex Elder * Caller must release with rbd_spec_put(). 2902859c31dfSAlex Elder * 2903859c31dfSAlex Elder * The options passed take this form: 2904859c31dfSAlex Elder * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>] 2905859c31dfSAlex Elder * where: 2906859c31dfSAlex Elder * <mon_addrs> 2907859c31dfSAlex Elder * A comma-separated list of one or more monitor addresses. 2908859c31dfSAlex Elder * A monitor address is an ip address, optionally followed 2909859c31dfSAlex Elder * by a port number (separated by a colon). 2910859c31dfSAlex Elder * I.e.: ip1[:port1][,ip2[:port2]...] 2911859c31dfSAlex Elder * <options> 2912859c31dfSAlex Elder * A comma-separated list of ceph and/or rbd options. 2913859c31dfSAlex Elder * <pool_name> 2914859c31dfSAlex Elder * The name of the rados pool containing the rbd image. 2915859c31dfSAlex Elder * <image_name> 2916859c31dfSAlex Elder * The name of the image in that pool to map. 2917859c31dfSAlex Elder * <snap_id> 2918859c31dfSAlex Elder * An optional snapshot id. If provided, the mapping will 2919859c31dfSAlex Elder * present data from the image at the time that snapshot was 2920859c31dfSAlex Elder * created. The image head is used if no snapshot id is 2921859c31dfSAlex Elder * provided. Snapshot mappings are always read-only. 2922a725f65eSAlex Elder */ 2923859c31dfSAlex Elder static int rbd_add_parse_args(const char *buf, 2924dc79b113SAlex Elder struct ceph_options **ceph_opts, 2925859c31dfSAlex Elder struct rbd_options **opts, 2926859c31dfSAlex Elder struct rbd_spec **rbd_spec) 2927a725f65eSAlex Elder { 2928e28fff26SAlex Elder size_t len; 2929859c31dfSAlex Elder char *options; 29300ddebc0cSAlex Elder const char *mon_addrs; 29310ddebc0cSAlex Elder size_t mon_addrs_size; 2932859c31dfSAlex Elder struct rbd_spec *spec = NULL; 29334e9afebaSAlex Elder struct rbd_options *rbd_opts = NULL; 2934859c31dfSAlex Elder struct ceph_options *copts; 2935dc79b113SAlex Elder int ret; 2936e28fff26SAlex Elder 2937e28fff26SAlex Elder /* The first four tokens are required */ 2938e28fff26SAlex Elder 29397ef3214aSAlex Elder len = next_token(&buf); 29407ef3214aSAlex Elder if (!len) 2941dc79b113SAlex Elder return -EINVAL; /* Missing monitor address(es) */ 29420ddebc0cSAlex Elder mon_addrs = buf; 2943f28e565aSAlex Elder mon_addrs_size = len + 1; 29447ef3214aSAlex Elder buf += len; 2945a725f65eSAlex Elder 2946dc79b113SAlex Elder ret = -EINVAL; 2947f28e565aSAlex Elder options = dup_token(&buf, NULL); 2948f28e565aSAlex Elder if (!options) 2949dc79b113SAlex Elder return -ENOMEM; 2950f28e565aSAlex Elder if (!*options) 2951f28e565aSAlex Elder goto out_err; /* Missing options */ 2952a725f65eSAlex Elder 2953859c31dfSAlex Elder spec = rbd_spec_alloc(); 2954859c31dfSAlex Elder if (!spec) 2955f28e565aSAlex Elder goto out_mem; 2956859c31dfSAlex Elder 2957859c31dfSAlex Elder spec->pool_name = dup_token(&buf, NULL); 2958859c31dfSAlex Elder if (!spec->pool_name) 2959859c31dfSAlex Elder goto out_mem; 2960859c31dfSAlex Elder if (!*spec->pool_name) 2961f28e565aSAlex Elder goto out_err; /* Missing pool name */ 2962e28fff26SAlex Elder 2963859c31dfSAlex Elder spec->image_name = dup_token(&buf, &spec->image_name_len); 2964859c31dfSAlex Elder if (!spec->image_name) 2965f28e565aSAlex Elder goto out_mem; 2966859c31dfSAlex Elder if (!*spec->image_name) 2967f28e565aSAlex Elder goto out_err; /* Missing image name */ 2968e28fff26SAlex Elder 2969f28e565aSAlex Elder /* 2970f28e565aSAlex Elder * Snapshot name is optional; default is to use "-" 2971f28e565aSAlex Elder * (indicating the head/no snapshot). 2972f28e565aSAlex Elder */ 29733feeb894SAlex Elder len = next_token(&buf); 2974820a5f3eSAlex Elder if (!len) { 29753feeb894SAlex Elder buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ 29763feeb894SAlex Elder len = sizeof (RBD_SNAP_HEAD_NAME) - 1; 2977f28e565aSAlex Elder } else if (len > RBD_MAX_SNAP_NAME_LEN) { 2978dc79b113SAlex Elder ret = -ENAMETOOLONG; 2979f28e565aSAlex Elder goto out_err; 2980849b4260SAlex Elder } 2981859c31dfSAlex Elder spec->snap_name = kmalloc(len + 1, GFP_KERNEL); 2982859c31dfSAlex Elder if (!spec->snap_name) 2983f28e565aSAlex Elder goto out_mem; 2984859c31dfSAlex Elder memcpy(spec->snap_name, buf, len); 2985859c31dfSAlex Elder *(spec->snap_name + len) = '\0'; 2986e5c35534SAlex Elder 29870ddebc0cSAlex Elder /* Initialize all rbd options to the defaults */ 2988e28fff26SAlex Elder 29894e9afebaSAlex Elder rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL); 29904e9afebaSAlex Elder if (!rbd_opts) 29914e9afebaSAlex Elder goto out_mem; 29924e9afebaSAlex Elder 29934e9afebaSAlex Elder rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 2994d22f76e7SAlex Elder 2995859c31dfSAlex Elder copts = ceph_parse_options(options, mon_addrs, 29960ddebc0cSAlex Elder mon_addrs + mon_addrs_size - 1, 29974e9afebaSAlex Elder parse_rbd_opts_token, rbd_opts); 2998859c31dfSAlex Elder if (IS_ERR(copts)) { 2999859c31dfSAlex Elder ret = PTR_ERR(copts); 3000dc79b113SAlex Elder goto out_err; 3001dc79b113SAlex Elder } 3002859c31dfSAlex Elder kfree(options); 3003859c31dfSAlex Elder 3004859c31dfSAlex Elder *ceph_opts = copts; 30054e9afebaSAlex Elder *opts = rbd_opts; 3006859c31dfSAlex Elder *rbd_spec = spec; 30070ddebc0cSAlex Elder 3008dc79b113SAlex Elder return 0; 3009f28e565aSAlex Elder out_mem: 3010dc79b113SAlex Elder ret = -ENOMEM; 3011d22f76e7SAlex Elder out_err: 3012859c31dfSAlex Elder kfree(rbd_opts); 3013859c31dfSAlex Elder rbd_spec_put(spec); 3014f28e565aSAlex Elder kfree(options); 3015d22f76e7SAlex Elder 3016dc79b113SAlex Elder return ret; 3017a725f65eSAlex Elder } 3018a725f65eSAlex Elder 3019589d30e0SAlex Elder /* 3020589d30e0SAlex Elder * An rbd format 2 image has a unique identifier, distinct from the 3021589d30e0SAlex Elder * name given to it by the user. Internally, that identifier is 3022589d30e0SAlex Elder * what's used to specify the names of objects related to the image. 3023589d30e0SAlex Elder * 3024589d30e0SAlex Elder * A special "rbd id" object is used to map an rbd image name to its 3025589d30e0SAlex Elder * id. If that object doesn't exist, then there is no v2 rbd image 3026589d30e0SAlex Elder * with the supplied name. 3027589d30e0SAlex Elder * 3028589d30e0SAlex Elder * This function will record the given rbd_dev's image_id field if 3029589d30e0SAlex Elder * it can be determined, and in that case will return 0. If any 3030589d30e0SAlex Elder * errors occur a negative errno will be returned and the rbd_dev's 3031589d30e0SAlex Elder * image_id field will be unchanged (and should be NULL). 3032589d30e0SAlex Elder */ 3033589d30e0SAlex Elder static int rbd_dev_image_id(struct rbd_device *rbd_dev) 3034589d30e0SAlex Elder { 3035589d30e0SAlex Elder int ret; 3036589d30e0SAlex Elder size_t size; 3037589d30e0SAlex Elder char *object_name; 3038589d30e0SAlex Elder void *response; 3039589d30e0SAlex Elder void *p; 3040589d30e0SAlex Elder 3041589d30e0SAlex Elder /* 3042589d30e0SAlex Elder * First, see if the format 2 image id file exists, and if 3043589d30e0SAlex Elder * so, get the image's persistent id from it. 3044589d30e0SAlex Elder */ 30450d7dbfceSAlex Elder size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; 3046589d30e0SAlex Elder object_name = kmalloc(size, GFP_NOIO); 3047589d30e0SAlex Elder if (!object_name) 3048589d30e0SAlex Elder return -ENOMEM; 30490d7dbfceSAlex Elder sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name); 3050589d30e0SAlex Elder dout("rbd id object name is %s\n", object_name); 3051589d30e0SAlex Elder 3052589d30e0SAlex Elder /* Response will be an encoded string, which includes a length */ 3053589d30e0SAlex Elder 3054589d30e0SAlex Elder size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX; 3055589d30e0SAlex Elder response = kzalloc(size, GFP_NOIO); 3056589d30e0SAlex Elder if (!response) { 3057589d30e0SAlex Elder ret = -ENOMEM; 3058589d30e0SAlex Elder goto out; 3059589d30e0SAlex Elder } 3060589d30e0SAlex Elder 3061589d30e0SAlex Elder ret = rbd_req_sync_exec(rbd_dev, object_name, 3062589d30e0SAlex Elder "rbd", "get_id", 3063589d30e0SAlex Elder NULL, 0, 3064589d30e0SAlex Elder response, RBD_IMAGE_ID_LEN_MAX, 3065589d30e0SAlex Elder CEPH_OSD_FLAG_READ, NULL); 3066589d30e0SAlex Elder dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); 3067589d30e0SAlex Elder if (ret < 0) 3068589d30e0SAlex Elder goto out; 3069a0ea3a40SAlex Elder ret = 0; /* rbd_req_sync_exec() can return positive */ 3070589d30e0SAlex Elder 3071589d30e0SAlex Elder p = response; 30720d7dbfceSAlex Elder rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, 3073589d30e0SAlex Elder p + RBD_IMAGE_ID_LEN_MAX, 30740d7dbfceSAlex Elder &rbd_dev->spec->image_id_len, 3075589d30e0SAlex Elder GFP_NOIO); 30760d7dbfceSAlex Elder if (IS_ERR(rbd_dev->spec->image_id)) { 30770d7dbfceSAlex Elder ret = PTR_ERR(rbd_dev->spec->image_id); 30780d7dbfceSAlex Elder rbd_dev->spec->image_id = NULL; 3079589d30e0SAlex Elder } else { 30800d7dbfceSAlex Elder dout("image_id is %s\n", rbd_dev->spec->image_id); 3081589d30e0SAlex Elder } 3082589d30e0SAlex Elder out: 3083589d30e0SAlex Elder kfree(response); 3084589d30e0SAlex Elder kfree(object_name); 3085589d30e0SAlex Elder 3086589d30e0SAlex Elder return ret; 3087589d30e0SAlex Elder } 3088589d30e0SAlex Elder 3089a30b71b9SAlex Elder static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) 3090a30b71b9SAlex Elder { 3091a30b71b9SAlex Elder int ret; 3092a30b71b9SAlex Elder size_t size; 3093a30b71b9SAlex Elder 3094a30b71b9SAlex Elder /* Version 1 images have no id; empty string is used */ 3095a30b71b9SAlex Elder 30960d7dbfceSAlex Elder rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); 30970d7dbfceSAlex Elder if (!rbd_dev->spec->image_id) 3098a30b71b9SAlex Elder return -ENOMEM; 30990d7dbfceSAlex Elder rbd_dev->spec->image_id_len = 0; 3100a30b71b9SAlex Elder 3101a30b71b9SAlex Elder /* Record the header object name for this rbd image. */ 3102a30b71b9SAlex Elder 31030d7dbfceSAlex Elder size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); 3104a30b71b9SAlex Elder rbd_dev->header_name = kmalloc(size, GFP_KERNEL); 3105a30b71b9SAlex Elder if (!rbd_dev->header_name) { 3106a30b71b9SAlex Elder ret = -ENOMEM; 3107a30b71b9SAlex Elder goto out_err; 3108a30b71b9SAlex Elder } 31090d7dbfceSAlex Elder sprintf(rbd_dev->header_name, "%s%s", 31100d7dbfceSAlex Elder rbd_dev->spec->image_name, RBD_SUFFIX); 3111a30b71b9SAlex Elder 3112a30b71b9SAlex Elder /* Populate rbd image metadata */ 3113a30b71b9SAlex Elder 3114a30b71b9SAlex Elder ret = rbd_read_header(rbd_dev, &rbd_dev->header); 3115a30b71b9SAlex Elder if (ret < 0) 3116a30b71b9SAlex Elder goto out_err; 3117a30b71b9SAlex Elder rbd_dev->image_format = 1; 3118a30b71b9SAlex Elder 3119a30b71b9SAlex Elder dout("discovered version 1 image, header name is %s\n", 3120a30b71b9SAlex Elder rbd_dev->header_name); 3121a30b71b9SAlex Elder 3122a30b71b9SAlex Elder return 0; 3123a30b71b9SAlex Elder 3124a30b71b9SAlex Elder out_err: 3125a30b71b9SAlex Elder kfree(rbd_dev->header_name); 3126a30b71b9SAlex Elder rbd_dev->header_name = NULL; 31270d7dbfceSAlex Elder kfree(rbd_dev->spec->image_id); 31280d7dbfceSAlex Elder rbd_dev->spec->image_id = NULL; 3129a30b71b9SAlex Elder 3130a30b71b9SAlex Elder return ret; 3131a30b71b9SAlex Elder } 3132a30b71b9SAlex Elder 3133a30b71b9SAlex Elder static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) 3134a30b71b9SAlex Elder { 3135a30b71b9SAlex Elder size_t size; 31369d475de5SAlex Elder int ret; 31376e14b1a6SAlex Elder u64 ver = 0; 3138a30b71b9SAlex Elder 3139a30b71b9SAlex Elder /* 3140a30b71b9SAlex Elder * Image id was filled in by the caller. Record the header 3141a30b71b9SAlex Elder * object name for this rbd image. 3142a30b71b9SAlex Elder */ 31430d7dbfceSAlex Elder size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; 3144a30b71b9SAlex Elder rbd_dev->header_name = kmalloc(size, GFP_KERNEL); 3145a30b71b9SAlex Elder if (!rbd_dev->header_name) 3146a30b71b9SAlex Elder return -ENOMEM; 3147a30b71b9SAlex Elder sprintf(rbd_dev->header_name, "%s%s", 31480d7dbfceSAlex Elder RBD_HEADER_PREFIX, rbd_dev->spec->image_id); 31499d475de5SAlex Elder 31509d475de5SAlex Elder /* Get the size and object order for the image */ 31519d475de5SAlex Elder 31529d475de5SAlex Elder ret = rbd_dev_v2_image_size(rbd_dev); 31539d475de5SAlex Elder if (ret < 0) 31549d475de5SAlex Elder goto out_err; 31551e130199SAlex Elder 31561e130199SAlex Elder /* Get the object prefix (a.k.a. block_name) for the image */ 31571e130199SAlex Elder 31581e130199SAlex Elder ret = rbd_dev_v2_object_prefix(rbd_dev); 31591e130199SAlex Elder if (ret < 0) 31601e130199SAlex Elder goto out_err; 3161b1b5402aSAlex Elder 3162d889140cSAlex Elder /* Get the and check features for the image */ 3163b1b5402aSAlex Elder 3164b1b5402aSAlex Elder ret = rbd_dev_v2_features(rbd_dev); 3165b1b5402aSAlex Elder if (ret < 0) 3166b1b5402aSAlex Elder goto out_err; 316735d489f9SAlex Elder 31686e14b1a6SAlex Elder /* crypto and compression type aren't (yet) supported for v2 images */ 316935d489f9SAlex Elder 31706e14b1a6SAlex Elder rbd_dev->header.crypt_type = 0; 31716e14b1a6SAlex Elder rbd_dev->header.comp_type = 0; 31726e14b1a6SAlex Elder 31736e14b1a6SAlex Elder /* Get the snapshot context, plus the header version */ 31746e14b1a6SAlex Elder 31756e14b1a6SAlex Elder ret = rbd_dev_v2_snap_context(rbd_dev, &ver); 317635d489f9SAlex Elder if (ret) 317735d489f9SAlex Elder goto out_err; 31786e14b1a6SAlex Elder rbd_dev->header.obj_version = ver; 31796e14b1a6SAlex Elder 3180a30b71b9SAlex Elder rbd_dev->image_format = 2; 3181a30b71b9SAlex Elder 3182a30b71b9SAlex Elder dout("discovered version 2 image, header name is %s\n", 3183a30b71b9SAlex Elder rbd_dev->header_name); 3184a30b71b9SAlex Elder 318535152979SAlex Elder return 0; 31869d475de5SAlex Elder out_err: 31879d475de5SAlex Elder kfree(rbd_dev->header_name); 31889d475de5SAlex Elder rbd_dev->header_name = NULL; 31891e130199SAlex Elder kfree(rbd_dev->header.object_prefix); 31901e130199SAlex Elder rbd_dev->header.object_prefix = NULL; 31919d475de5SAlex Elder 31929d475de5SAlex Elder return ret; 3193a30b71b9SAlex Elder } 3194a30b71b9SAlex Elder 3195a30b71b9SAlex Elder /* 3196a30b71b9SAlex Elder * Probe for the existence of the header object for the given rbd 3197a30b71b9SAlex Elder * device. For format 2 images this includes determining the image 3198a30b71b9SAlex Elder * id. 3199a30b71b9SAlex Elder */ 3200a30b71b9SAlex Elder static int rbd_dev_probe(struct rbd_device *rbd_dev) 3201a30b71b9SAlex Elder { 3202a30b71b9SAlex Elder int ret; 3203a30b71b9SAlex Elder 3204a30b71b9SAlex Elder /* 3205a30b71b9SAlex Elder * Get the id from the image id object. If it's not a 3206a30b71b9SAlex Elder * format 2 image, we'll get ENOENT back, and we'll assume 3207a30b71b9SAlex Elder * it's a format 1 image. 3208a30b71b9SAlex Elder */ 3209a30b71b9SAlex Elder ret = rbd_dev_image_id(rbd_dev); 3210a30b71b9SAlex Elder if (ret) 3211a30b71b9SAlex Elder ret = rbd_dev_v1_probe(rbd_dev); 3212a30b71b9SAlex Elder else 3213a30b71b9SAlex Elder ret = rbd_dev_v2_probe(rbd_dev); 3214a30b71b9SAlex Elder if (ret) 3215a30b71b9SAlex Elder dout("probe failed, returning %d\n", ret); 3216a30b71b9SAlex Elder 3217a30b71b9SAlex Elder return ret; 3218a30b71b9SAlex Elder } 3219a30b71b9SAlex Elder 322059c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 322159c2be1eSYehuda Sadeh const char *buf, 322259c2be1eSYehuda Sadeh size_t count) 3223602adf40SYehuda Sadeh { 3224cb8627c7SAlex Elder struct rbd_device *rbd_dev = NULL; 3225dc79b113SAlex Elder struct ceph_options *ceph_opts = NULL; 32264e9afebaSAlex Elder struct rbd_options *rbd_opts = NULL; 3227859c31dfSAlex Elder struct rbd_spec *spec = NULL; 32289d3997fdSAlex Elder struct rbd_client *rbdc; 322927cc2594SAlex Elder struct ceph_osd_client *osdc; 323027cc2594SAlex Elder int rc = -ENOMEM; 3231602adf40SYehuda Sadeh 3232602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 3233602adf40SYehuda Sadeh return -ENODEV; 3234602adf40SYehuda Sadeh 3235cb8627c7SAlex Elder rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 3236cb8627c7SAlex Elder if (!rbd_dev) 32374e9afebaSAlex Elder return -ENOMEM; 3238602adf40SYehuda Sadeh 3239602adf40SYehuda Sadeh /* static rbd_device initialization */ 3240602adf40SYehuda Sadeh spin_lock_init(&rbd_dev->lock); 3241602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->node); 3242dfc5606dSYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->snaps); 3243c666601aSJosh Durgin init_rwsem(&rbd_dev->header_rwsem); 3244602adf40SYehuda Sadeh 3245a725f65eSAlex Elder /* parse add command */ 3246859c31dfSAlex Elder rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); 3247dc79b113SAlex Elder if (rc < 0) 324885ae8926SAlex Elder goto err_out_mem; 3249859c31dfSAlex Elder 32504e9afebaSAlex Elder rbd_dev->mapping.read_only = rbd_opts->read_only; 3251a725f65eSAlex Elder 32529d3997fdSAlex Elder rbdc = rbd_get_client(ceph_opts); 32539d3997fdSAlex Elder if (IS_ERR(rbdc)) { 32549d3997fdSAlex Elder rc = PTR_ERR(rbdc); 32550ddebc0cSAlex Elder goto err_out_args; 32569d3997fdSAlex Elder } 32579d3997fdSAlex Elder rbd_dev->rbd_client = rbdc; 325878cea76eSAlex Elder ceph_opts = NULL; /* ceph_opts now owned by rbd_dev client */ 3259602adf40SYehuda Sadeh 3260602adf40SYehuda Sadeh /* pick the pool */ 32619d3997fdSAlex Elder osdc = &rbdc->client->osdc; 3262859c31dfSAlex Elder rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); 3263602adf40SYehuda Sadeh if (rc < 0) 3264602adf40SYehuda Sadeh goto err_out_client; 3265859c31dfSAlex Elder spec->pool_id = (u64) rc; 3266859c31dfSAlex Elder 3267859c31dfSAlex Elder rbd_dev->spec = spec; 3268602adf40SYehuda Sadeh 3269a30b71b9SAlex Elder rc = rbd_dev_probe(rbd_dev); 3270a30b71b9SAlex Elder if (rc < 0) 3271589d30e0SAlex Elder goto err_out_client; 327205fd6f6fSAlex Elder 327305fd6f6fSAlex Elder /* no need to lock here, as rbd_dev is not registered yet */ 327405fd6f6fSAlex Elder rc = rbd_dev_snaps_update(rbd_dev); 327505fd6f6fSAlex Elder if (rc) 327641f38c2bSAlex Elder goto err_out_probe; 327705fd6f6fSAlex Elder 3278819d52bfSAlex Elder rc = rbd_dev_set_mapping(rbd_dev); 327905fd6f6fSAlex Elder if (rc) 328041f38c2bSAlex Elder goto err_out_snaps; 328105fd6f6fSAlex Elder 328285ae8926SAlex Elder /* generate unique id: find highest unique id, add one */ 328385ae8926SAlex Elder rbd_dev_id_get(rbd_dev); 328485ae8926SAlex Elder 328585ae8926SAlex Elder /* Fill in the device name, now that we have its id. */ 328685ae8926SAlex Elder BUILD_BUG_ON(DEV_NAME_LEN 328785ae8926SAlex Elder < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 328885ae8926SAlex Elder sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 328985ae8926SAlex Elder 329085ae8926SAlex Elder /* Get our block major device number. */ 329185ae8926SAlex Elder 329227cc2594SAlex Elder rc = register_blkdev(0, rbd_dev->name); 329327cc2594SAlex Elder if (rc < 0) 329485ae8926SAlex Elder goto err_out_id; 329527cc2594SAlex Elder rbd_dev->major = rc; 3296602adf40SYehuda Sadeh 32970f308a31SAlex Elder /* Set up the blkdev mapping. */ 32980f308a31SAlex Elder 32990f308a31SAlex Elder rc = rbd_init_disk(rbd_dev); 3300dfc5606dSYehuda Sadeh if (rc) 3301766fc439SYehuda Sadeh goto err_out_blkdev; 3302766fc439SYehuda Sadeh 33030f308a31SAlex Elder rc = rbd_bus_add_dev(rbd_dev); 33040f308a31SAlex Elder if (rc) 33050f308a31SAlex Elder goto err_out_disk; 33060f308a31SAlex Elder 330732eec68dSAlex Elder /* 330832eec68dSAlex Elder * At this point cleanup in the event of an error is the job 330932eec68dSAlex Elder * of the sysfs code (initiated by rbd_bus_del_dev()). 331032eec68dSAlex Elder */ 33112ac4e75dSAlex Elder 33124bb1f1edSAlex Elder down_write(&rbd_dev->header_rwsem); 33135ed16177SAlex Elder rc = rbd_dev_snaps_register(rbd_dev); 33144bb1f1edSAlex Elder up_write(&rbd_dev->header_rwsem); 33152ac4e75dSAlex Elder if (rc) 33162ac4e75dSAlex Elder goto err_out_bus; 33172ac4e75dSAlex Elder 331859c2be1eSYehuda Sadeh rc = rbd_init_watch_dev(rbd_dev); 331959c2be1eSYehuda Sadeh if (rc) 332059c2be1eSYehuda Sadeh goto err_out_bus; 332159c2be1eSYehuda Sadeh 33224e9afebaSAlex Elder kfree(rbd_opts); 33234e9afebaSAlex Elder 33243ee4001eSAlex Elder /* Everything's ready. Announce the disk to the world. */ 33253ee4001eSAlex Elder 33263ee4001eSAlex Elder add_disk(rbd_dev->disk); 33273ee4001eSAlex Elder 33283ee4001eSAlex Elder pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, 33293ee4001eSAlex Elder (unsigned long long) rbd_dev->mapping.size); 33303ee4001eSAlex Elder 3331602adf40SYehuda Sadeh return count; 3332602adf40SYehuda Sadeh 3333766fc439SYehuda Sadeh err_out_bus: 3334766fc439SYehuda Sadeh /* this will also clean up rest of rbd_dev stuff */ 3335766fc439SYehuda Sadeh 3336766fc439SYehuda Sadeh rbd_bus_del_dev(rbd_dev); 33374e9afebaSAlex Elder kfree(rbd_opts); 33384e9afebaSAlex Elder 3339766fc439SYehuda Sadeh return rc; 3340766fc439SYehuda Sadeh 33410f308a31SAlex Elder err_out_disk: 33420f308a31SAlex Elder rbd_free_disk(rbd_dev); 3343602adf40SYehuda Sadeh err_out_blkdev: 3344602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 334585ae8926SAlex Elder err_out_id: 334685ae8926SAlex Elder rbd_dev_id_put(rbd_dev); 334741f38c2bSAlex Elder err_out_snaps: 334841f38c2bSAlex Elder rbd_remove_all_snaps(rbd_dev); 334941f38c2bSAlex Elder err_out_probe: 335005fd6f6fSAlex Elder rbd_header_free(&rbd_dev->header); 3351602adf40SYehuda Sadeh err_out_client: 33523fcf2581SAlex Elder kfree(rbd_dev->header_name); 33539d3997fdSAlex Elder rbd_put_client(rbdc); 33540ddebc0cSAlex Elder err_out_args: 335578cea76eSAlex Elder if (ceph_opts) 335678cea76eSAlex Elder ceph_destroy_options(ceph_opts); 33574e9afebaSAlex Elder kfree(rbd_opts); 3358859c31dfSAlex Elder rbd_spec_put(spec); 335985ae8926SAlex Elder err_out_mem: 336027cc2594SAlex Elder kfree(rbd_dev); 336127cc2594SAlex Elder 3362602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 3363602adf40SYehuda Sadeh module_put(THIS_MODULE); 336427cc2594SAlex Elder 336527cc2594SAlex Elder return (ssize_t) rc; 3366602adf40SYehuda Sadeh } 3367602adf40SYehuda Sadeh 3368de71a297SAlex Elder static struct rbd_device *__rbd_get_dev(unsigned long dev_id) 3369602adf40SYehuda Sadeh { 3370602adf40SYehuda Sadeh struct list_head *tmp; 3371602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 3372602adf40SYehuda Sadeh 3373e124a82fSAlex Elder spin_lock(&rbd_dev_list_lock); 3374602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 3375602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 3376de71a297SAlex Elder if (rbd_dev->dev_id == dev_id) { 3377e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 3378602adf40SYehuda Sadeh return rbd_dev; 3379602adf40SYehuda Sadeh } 3380e124a82fSAlex Elder } 3381e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 3382602adf40SYehuda Sadeh return NULL; 3383602adf40SYehuda Sadeh } 3384602adf40SYehuda Sadeh 3385dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 3386602adf40SYehuda Sadeh { 3387593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 3388602adf40SYehuda Sadeh 33891dbb4399SAlex Elder if (rbd_dev->watch_request) { 33901dbb4399SAlex Elder struct ceph_client *client = rbd_dev->rbd_client->client; 33911dbb4399SAlex Elder 33921dbb4399SAlex Elder ceph_osdc_unregister_linger_request(&client->osdc, 339359c2be1eSYehuda Sadeh rbd_dev->watch_request); 33941dbb4399SAlex Elder } 339559c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 3396070c633fSAlex Elder rbd_req_sync_unwatch(rbd_dev); 339759c2be1eSYehuda Sadeh 33989d3997fdSAlex Elder rbd_put_client(rbd_dev->rbd_client); 3399602adf40SYehuda Sadeh 3400602adf40SYehuda Sadeh /* clean up and free blkdev */ 3401602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 3402602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 340332eec68dSAlex Elder 34042ac4e75dSAlex Elder /* release allocated disk header fields */ 34052ac4e75dSAlex Elder rbd_header_free(&rbd_dev->header); 34062ac4e75dSAlex Elder 340732eec68dSAlex Elder /* done with the id, and with the rbd_dev */ 34080bed54dcSAlex Elder kfree(rbd_dev->header_name); 3409e2839308SAlex Elder rbd_dev_id_put(rbd_dev); 34108b8fb99cSAlex Elder rbd_spec_put(rbd_dev->spec); 3411602adf40SYehuda Sadeh kfree(rbd_dev); 3412602adf40SYehuda Sadeh 3413602adf40SYehuda Sadeh /* release module ref */ 3414602adf40SYehuda Sadeh module_put(THIS_MODULE); 3415602adf40SYehuda Sadeh } 3416602adf40SYehuda Sadeh 3417dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 3418602adf40SYehuda Sadeh const char *buf, 3419602adf40SYehuda Sadeh size_t count) 3420602adf40SYehuda Sadeh { 3421602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 3422602adf40SYehuda Sadeh int target_id, rc; 3423602adf40SYehuda Sadeh unsigned long ul; 3424602adf40SYehuda Sadeh int ret = count; 3425602adf40SYehuda Sadeh 3426602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 3427602adf40SYehuda Sadeh if (rc) 3428602adf40SYehuda Sadeh return rc; 3429602adf40SYehuda Sadeh 3430602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 3431602adf40SYehuda Sadeh target_id = (int) ul; 3432602adf40SYehuda Sadeh if (target_id != ul) 3433602adf40SYehuda Sadeh return -EINVAL; 3434602adf40SYehuda Sadeh 3435602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 3436602adf40SYehuda Sadeh 3437602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 3438602adf40SYehuda Sadeh if (!rbd_dev) { 3439602adf40SYehuda Sadeh ret = -ENOENT; 3440602adf40SYehuda Sadeh goto done; 3441602adf40SYehuda Sadeh } 3442602adf40SYehuda Sadeh 344341f38c2bSAlex Elder rbd_remove_all_snaps(rbd_dev); 3444dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 3445602adf40SYehuda Sadeh 3446602adf40SYehuda Sadeh done: 3447602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 3448aafb230eSAlex Elder 3449602adf40SYehuda Sadeh return ret; 3450602adf40SYehuda Sadeh } 3451602adf40SYehuda Sadeh 3452602adf40SYehuda Sadeh /* 3453602adf40SYehuda Sadeh * create control files in sysfs 3454dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 3455602adf40SYehuda Sadeh */ 3456602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 3457602adf40SYehuda Sadeh { 3458dfc5606dSYehuda Sadeh int ret; 3459602adf40SYehuda Sadeh 3460fed4c143SAlex Elder ret = device_register(&rbd_root_dev); 3461dfc5606dSYehuda Sadeh if (ret < 0) 3462dfc5606dSYehuda Sadeh return ret; 3463602adf40SYehuda Sadeh 3464fed4c143SAlex Elder ret = bus_register(&rbd_bus_type); 3465fed4c143SAlex Elder if (ret < 0) 3466fed4c143SAlex Elder device_unregister(&rbd_root_dev); 3467602adf40SYehuda Sadeh 3468602adf40SYehuda Sadeh return ret; 3469602adf40SYehuda Sadeh } 3470602adf40SYehuda Sadeh 3471602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 3472602adf40SYehuda Sadeh { 3473dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 3474fed4c143SAlex Elder device_unregister(&rbd_root_dev); 3475602adf40SYehuda Sadeh } 3476602adf40SYehuda Sadeh 3477602adf40SYehuda Sadeh int __init rbd_init(void) 3478602adf40SYehuda Sadeh { 3479602adf40SYehuda Sadeh int rc; 3480602adf40SYehuda Sadeh 3481602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 3482602adf40SYehuda Sadeh if (rc) 3483602adf40SYehuda Sadeh return rc; 3484f0f8cef5SAlex Elder pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 3485602adf40SYehuda Sadeh return 0; 3486602adf40SYehuda Sadeh } 3487602adf40SYehuda Sadeh 3488602adf40SYehuda Sadeh void __exit rbd_exit(void) 3489602adf40SYehuda Sadeh { 3490602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 3491602adf40SYehuda Sadeh } 3492602adf40SYehuda Sadeh 3493602adf40SYehuda Sadeh module_init(rbd_init); 3494602adf40SYehuda Sadeh module_exit(rbd_exit); 3495602adf40SYehuda Sadeh 3496602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 3497602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 3498602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 3499602adf40SYehuda Sadeh 3500602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 3501602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 3502602adf40SYehuda Sadeh 3503602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 3504