1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44aafb230eSAlex Elder #define RBD_DEBUG /* Activate rbd_assert() calls */ 45aafb230eSAlex Elder 46593a9e7bSAlex Elder /* 47593a9e7bSAlex Elder * The basic unit of block I/O is a sector. It is interpreted in a 48593a9e7bSAlex Elder * number of contexts in Linux (blk, bio, genhd), but the default is 49593a9e7bSAlex Elder * universally 512 bytes. These symbols are just slightly more 50593a9e7bSAlex Elder * meaningful than the bare numbers they represent. 51593a9e7bSAlex Elder */ 52593a9e7bSAlex Elder #define SECTOR_SHIFT 9 53593a9e7bSAlex Elder #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 54593a9e7bSAlex Elder 552647ba38SAlex Elder /* It might be useful to have these defined elsewhere */ 56df111be6SAlex Elder 572647ba38SAlex Elder #define U8_MAX ((u8) (~0U)) 582647ba38SAlex Elder #define U16_MAX ((u16) (~0U)) 590ec8ce87SAlex Elder #define U32_MAX ((u32) (~0U)) 60df111be6SAlex Elder #define U64_MAX ((u64) (~0ULL)) 61df111be6SAlex Elder 62f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd" 63f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)" 64602adf40SYehuda Sadeh 65602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 66602adf40SYehuda Sadeh 67d4b125e9SAlex Elder #define RBD_SNAP_DEV_NAME_PREFIX "snap_" 68d4b125e9SAlex Elder #define RBD_MAX_SNAP_NAME_LEN \ 69d4b125e9SAlex Elder (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) 70d4b125e9SAlex Elder 7135d489f9SAlex Elder #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ 72602adf40SYehuda Sadeh 73602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 74602adf40SYehuda Sadeh 759e15b77dSAlex Elder /* This allows a single page to hold an image name sent by OSD */ 769e15b77dSAlex Elder #define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1) 77589d30e0SAlex Elder #define RBD_IMAGE_ID_LEN_MAX 64 789e15b77dSAlex Elder 791e130199SAlex Elder #define RBD_OBJ_PREFIX_LEN_MAX 64 80589d30e0SAlex Elder 81d889140cSAlex Elder /* Feature bits */ 82d889140cSAlex Elder 83d889140cSAlex Elder #define RBD_FEATURE_LAYERING 1 84d889140cSAlex Elder 85d889140cSAlex Elder /* Features supported by this (client software) implementation. */ 86d889140cSAlex Elder 87d889140cSAlex Elder #define RBD_FEATURES_ALL (0) 88d889140cSAlex Elder 8981a89793SAlex Elder /* 9081a89793SAlex Elder * An RBD device name will be "rbd#", where the "rbd" comes from 9181a89793SAlex Elder * RBD_DRV_NAME above, and # is a unique integer identifier. 9281a89793SAlex Elder * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big 9381a89793SAlex Elder * enough to hold all possible device names. 9481a89793SAlex Elder */ 95602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 9681a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 97602adf40SYehuda Sadeh 98602adf40SYehuda Sadeh /* 99602adf40SYehuda Sadeh * block device image metadata (in-memory version) 100602adf40SYehuda Sadeh */ 101602adf40SYehuda Sadeh struct rbd_image_header { 102f84344f3SAlex Elder /* These four fields never change for a given rbd image */ 103849b4260SAlex Elder char *object_prefix; 10434b13184SAlex Elder u64 features; 105602adf40SYehuda Sadeh __u8 obj_order; 106602adf40SYehuda Sadeh __u8 crypt_type; 107602adf40SYehuda Sadeh __u8 comp_type; 108602adf40SYehuda Sadeh 109f84344f3SAlex Elder /* The remaining fields need to be updated occasionally */ 110f84344f3SAlex Elder u64 image_size; 111f84344f3SAlex Elder struct ceph_snap_context *snapc; 112602adf40SYehuda Sadeh char *snap_names; 113602adf40SYehuda Sadeh u64 *snap_sizes; 11459c2be1eSYehuda Sadeh 11559c2be1eSYehuda Sadeh u64 obj_version; 11659c2be1eSYehuda Sadeh }; 11759c2be1eSYehuda Sadeh 1180d7dbfceSAlex Elder /* 1190d7dbfceSAlex Elder * An rbd image specification. 1200d7dbfceSAlex Elder * 1210d7dbfceSAlex Elder * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely 122c66c6e0cSAlex Elder * identify an image. Each rbd_dev structure includes a pointer to 123c66c6e0cSAlex Elder * an rbd_spec structure that encapsulates this identity. 124c66c6e0cSAlex Elder * 125c66c6e0cSAlex Elder * Each of the id's in an rbd_spec has an associated name. For a 126c66c6e0cSAlex Elder * user-mapped image, the names are supplied and the id's associated 127c66c6e0cSAlex Elder * with them are looked up. For a layered image, a parent image is 128c66c6e0cSAlex Elder * defined by the tuple, and the names are looked up. 129c66c6e0cSAlex Elder * 130c66c6e0cSAlex Elder * An rbd_dev structure contains a parent_spec pointer which is 131c66c6e0cSAlex Elder * non-null if the image it represents is a child in a layered 132c66c6e0cSAlex Elder * image. This pointer will refer to the rbd_spec structure used 133c66c6e0cSAlex Elder * by the parent rbd_dev for its own identity (i.e., the structure 134c66c6e0cSAlex Elder * is shared between the parent and child). 135c66c6e0cSAlex Elder * 136c66c6e0cSAlex Elder * Since these structures are populated once, during the discovery 137c66c6e0cSAlex Elder * phase of image construction, they are effectively immutable so 138c66c6e0cSAlex Elder * we make no effort to synchronize access to them. 139c66c6e0cSAlex Elder * 140c66c6e0cSAlex Elder * Note that code herein does not assume the image name is known (it 141c66c6e0cSAlex Elder * could be a null pointer). 1420d7dbfceSAlex Elder */ 1430d7dbfceSAlex Elder struct rbd_spec { 1440d7dbfceSAlex Elder u64 pool_id; 1450d7dbfceSAlex Elder char *pool_name; 1460d7dbfceSAlex Elder 1470d7dbfceSAlex Elder char *image_id; 1480d7dbfceSAlex Elder char *image_name; 1490d7dbfceSAlex Elder 1500d7dbfceSAlex Elder u64 snap_id; 1510d7dbfceSAlex Elder char *snap_name; 1520d7dbfceSAlex Elder 1530d7dbfceSAlex Elder struct kref kref; 1540d7dbfceSAlex Elder }; 1550d7dbfceSAlex Elder 156602adf40SYehuda Sadeh /* 157f0f8cef5SAlex Elder * an instance of the client. multiple devices may share an rbd client. 158602adf40SYehuda Sadeh */ 159602adf40SYehuda Sadeh struct rbd_client { 160602adf40SYehuda Sadeh struct ceph_client *client; 161602adf40SYehuda Sadeh struct kref kref; 162602adf40SYehuda Sadeh struct list_head node; 163602adf40SYehuda Sadeh }; 164602adf40SYehuda Sadeh 165bf0d5f50SAlex Elder struct rbd_img_request; 166bf0d5f50SAlex Elder typedef void (*rbd_img_callback_t)(struct rbd_img_request *); 167bf0d5f50SAlex Elder 168bf0d5f50SAlex Elder #define BAD_WHICH U32_MAX /* Good which or bad which, which? */ 169bf0d5f50SAlex Elder 170bf0d5f50SAlex Elder struct rbd_obj_request; 171bf0d5f50SAlex Elder typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); 172bf0d5f50SAlex Elder 1739969ebc5SAlex Elder enum obj_request_type { 1749969ebc5SAlex Elder OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES 1759969ebc5SAlex Elder }; 176bf0d5f50SAlex Elder 177bf0d5f50SAlex Elder struct rbd_obj_request { 178bf0d5f50SAlex Elder const char *object_name; 179bf0d5f50SAlex Elder u64 offset; /* object start byte */ 180bf0d5f50SAlex Elder u64 length; /* bytes from offset */ 181bf0d5f50SAlex Elder 182bf0d5f50SAlex Elder struct rbd_img_request *img_request; 183bf0d5f50SAlex Elder struct list_head links; /* img_request->obj_requests */ 184bf0d5f50SAlex Elder u32 which; /* posn image request list */ 185bf0d5f50SAlex Elder 186bf0d5f50SAlex Elder enum obj_request_type type; 187788e2df3SAlex Elder union { 188bf0d5f50SAlex Elder struct bio *bio_list; 189788e2df3SAlex Elder struct { 190788e2df3SAlex Elder struct page **pages; 191788e2df3SAlex Elder u32 page_count; 192788e2df3SAlex Elder }; 193788e2df3SAlex Elder }; 194bf0d5f50SAlex Elder 195bf0d5f50SAlex Elder struct ceph_osd_request *osd_req; 196bf0d5f50SAlex Elder 197bf0d5f50SAlex Elder u64 xferred; /* bytes transferred */ 198bf0d5f50SAlex Elder u64 version; 199bf0d5f50SAlex Elder s32 result; 200bf0d5f50SAlex Elder atomic_t done; 201bf0d5f50SAlex Elder 202bf0d5f50SAlex Elder rbd_obj_callback_t callback; 203788e2df3SAlex Elder struct completion completion; 204bf0d5f50SAlex Elder 205bf0d5f50SAlex Elder struct kref kref; 206bf0d5f50SAlex Elder }; 207bf0d5f50SAlex Elder 208bf0d5f50SAlex Elder struct rbd_img_request { 209bf0d5f50SAlex Elder struct request *rq; 210bf0d5f50SAlex Elder struct rbd_device *rbd_dev; 211bf0d5f50SAlex Elder u64 offset; /* starting image byte offset */ 212bf0d5f50SAlex Elder u64 length; /* byte count from offset */ 213bf0d5f50SAlex Elder bool write_request; /* false for read */ 214bf0d5f50SAlex Elder union { 215bf0d5f50SAlex Elder struct ceph_snap_context *snapc; /* for writes */ 216bf0d5f50SAlex Elder u64 snap_id; /* for reads */ 217bf0d5f50SAlex Elder }; 218bf0d5f50SAlex Elder spinlock_t completion_lock;/* protects next_completion */ 219bf0d5f50SAlex Elder u32 next_completion; 220bf0d5f50SAlex Elder rbd_img_callback_t callback; 221bf0d5f50SAlex Elder 222bf0d5f50SAlex Elder u32 obj_request_count; 223bf0d5f50SAlex Elder struct list_head obj_requests; /* rbd_obj_request structs */ 224bf0d5f50SAlex Elder 225bf0d5f50SAlex Elder struct kref kref; 226bf0d5f50SAlex Elder }; 227bf0d5f50SAlex Elder 228bf0d5f50SAlex Elder #define for_each_obj_request(ireq, oreq) \ 229ef06f4d3SAlex Elder list_for_each_entry(oreq, &(ireq)->obj_requests, links) 230bf0d5f50SAlex Elder #define for_each_obj_request_from(ireq, oreq) \ 231ef06f4d3SAlex Elder list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) 232bf0d5f50SAlex Elder #define for_each_obj_request_safe(ireq, oreq, n) \ 233ef06f4d3SAlex Elder list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) 234bf0d5f50SAlex Elder 235dfc5606dSYehuda Sadeh struct rbd_snap { 236dfc5606dSYehuda Sadeh struct device dev; 237dfc5606dSYehuda Sadeh const char *name; 2383591538fSJosh Durgin u64 size; 239dfc5606dSYehuda Sadeh struct list_head node; 240dfc5606dSYehuda Sadeh u64 id; 24134b13184SAlex Elder u64 features; 242dfc5606dSYehuda Sadeh }; 243dfc5606dSYehuda Sadeh 244f84344f3SAlex Elder struct rbd_mapping { 24599c1f08fSAlex Elder u64 size; 24634b13184SAlex Elder u64 features; 247f84344f3SAlex Elder bool read_only; 248f84344f3SAlex Elder }; 249f84344f3SAlex Elder 250602adf40SYehuda Sadeh /* 251602adf40SYehuda Sadeh * a single device 252602adf40SYehuda Sadeh */ 253602adf40SYehuda Sadeh struct rbd_device { 254de71a297SAlex Elder int dev_id; /* blkdev unique id */ 255602adf40SYehuda Sadeh 256602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 257602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 258602adf40SYehuda Sadeh 259a30b71b9SAlex Elder u32 image_format; /* Either 1 or 2 */ 260602adf40SYehuda Sadeh struct rbd_client *rbd_client; 261602adf40SYehuda Sadeh 262602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 263602adf40SYehuda Sadeh 264b82d167bSAlex Elder spinlock_t lock; /* queue, flags, open_count */ 265602adf40SYehuda Sadeh 266602adf40SYehuda Sadeh struct rbd_image_header header; 267b82d167bSAlex Elder unsigned long flags; /* possibly lock protected */ 2680d7dbfceSAlex Elder struct rbd_spec *spec; 269602adf40SYehuda Sadeh 2700d7dbfceSAlex Elder char *header_name; 271971f839aSAlex Elder 2720903e875SAlex Elder struct ceph_file_layout layout; 2730903e875SAlex Elder 27459c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 275975241afSAlex Elder struct rbd_obj_request *watch_request; 27659c2be1eSYehuda Sadeh 27786b00e0dSAlex Elder struct rbd_spec *parent_spec; 27886b00e0dSAlex Elder u64 parent_overlap; 27986b00e0dSAlex Elder 280c666601aSJosh Durgin /* protects updating the header */ 281c666601aSJosh Durgin struct rw_semaphore header_rwsem; 282f84344f3SAlex Elder 283f84344f3SAlex Elder struct rbd_mapping mapping; 284602adf40SYehuda Sadeh 285602adf40SYehuda Sadeh struct list_head node; 286dfc5606dSYehuda Sadeh 287dfc5606dSYehuda Sadeh /* list of snapshots */ 288dfc5606dSYehuda Sadeh struct list_head snaps; 289dfc5606dSYehuda Sadeh 290dfc5606dSYehuda Sadeh /* sysfs related */ 291dfc5606dSYehuda Sadeh struct device dev; 292b82d167bSAlex Elder unsigned long open_count; /* protected by lock */ 293dfc5606dSYehuda Sadeh }; 294dfc5606dSYehuda Sadeh 295b82d167bSAlex Elder /* 296b82d167bSAlex Elder * Flag bits for rbd_dev->flags. If atomicity is required, 297b82d167bSAlex Elder * rbd_dev->lock is used to protect access. 298b82d167bSAlex Elder * 299b82d167bSAlex Elder * Currently, only the "removing" flag (which is coupled with the 300b82d167bSAlex Elder * "open_count" field) requires atomic access. 301b82d167bSAlex Elder */ 3026d292906SAlex Elder enum rbd_dev_flags { 3036d292906SAlex Elder RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ 304b82d167bSAlex Elder RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ 3056d292906SAlex Elder }; 3066d292906SAlex Elder 307602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 308e124a82fSAlex Elder 309602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 310e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock); 311e124a82fSAlex Elder 312602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 313432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock); 314602adf40SYehuda Sadeh 315304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); 316304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); 317304f6808SAlex Elder 318dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 31941f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap); 320dfc5606dSYehuda Sadeh 321f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf, 322f0f8cef5SAlex Elder size_t count); 323f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 324f0f8cef5SAlex Elder size_t count); 325f0f8cef5SAlex Elder 326f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = { 327f0f8cef5SAlex Elder __ATTR(add, S_IWUSR, NULL, rbd_add), 328f0f8cef5SAlex Elder __ATTR(remove, S_IWUSR, NULL, rbd_remove), 329f0f8cef5SAlex Elder __ATTR_NULL 330f0f8cef5SAlex Elder }; 331f0f8cef5SAlex Elder 332f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = { 333f0f8cef5SAlex Elder .name = "rbd", 334f0f8cef5SAlex Elder .bus_attrs = rbd_bus_attrs, 335f0f8cef5SAlex Elder }; 336f0f8cef5SAlex Elder 337f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev) 338f0f8cef5SAlex Elder { 339f0f8cef5SAlex Elder } 340f0f8cef5SAlex Elder 341f0f8cef5SAlex Elder static struct device rbd_root_dev = { 342f0f8cef5SAlex Elder .init_name = "rbd", 343f0f8cef5SAlex Elder .release = rbd_root_dev_release, 344f0f8cef5SAlex Elder }; 345f0f8cef5SAlex Elder 34606ecc6cbSAlex Elder static __printf(2, 3) 34706ecc6cbSAlex Elder void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) 34806ecc6cbSAlex Elder { 34906ecc6cbSAlex Elder struct va_format vaf; 35006ecc6cbSAlex Elder va_list args; 35106ecc6cbSAlex Elder 35206ecc6cbSAlex Elder va_start(args, fmt); 35306ecc6cbSAlex Elder vaf.fmt = fmt; 35406ecc6cbSAlex Elder vaf.va = &args; 35506ecc6cbSAlex Elder 35606ecc6cbSAlex Elder if (!rbd_dev) 35706ecc6cbSAlex Elder printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); 35806ecc6cbSAlex Elder else if (rbd_dev->disk) 35906ecc6cbSAlex Elder printk(KERN_WARNING "%s: %s: %pV\n", 36006ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); 36106ecc6cbSAlex Elder else if (rbd_dev->spec && rbd_dev->spec->image_name) 36206ecc6cbSAlex Elder printk(KERN_WARNING "%s: image %s: %pV\n", 36306ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); 36406ecc6cbSAlex Elder else if (rbd_dev->spec && rbd_dev->spec->image_id) 36506ecc6cbSAlex Elder printk(KERN_WARNING "%s: id %s: %pV\n", 36606ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); 36706ecc6cbSAlex Elder else /* punt */ 36806ecc6cbSAlex Elder printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", 36906ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev, &vaf); 37006ecc6cbSAlex Elder va_end(args); 37106ecc6cbSAlex Elder } 37206ecc6cbSAlex Elder 373aafb230eSAlex Elder #ifdef RBD_DEBUG 374aafb230eSAlex Elder #define rbd_assert(expr) \ 375aafb230eSAlex Elder if (unlikely(!(expr))) { \ 376aafb230eSAlex Elder printk(KERN_ERR "\nAssertion failure in %s() " \ 377aafb230eSAlex Elder "at line %d:\n\n" \ 378aafb230eSAlex Elder "\trbd_assert(%s);\n\n", \ 379aafb230eSAlex Elder __func__, __LINE__, #expr); \ 380aafb230eSAlex Elder BUG(); \ 381aafb230eSAlex Elder } 382aafb230eSAlex Elder #else /* !RBD_DEBUG */ 383aafb230eSAlex Elder # define rbd_assert(expr) ((void) 0) 384aafb230eSAlex Elder #endif /* !RBD_DEBUG */ 385dfc5606dSYehuda Sadeh 386117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); 387117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); 38859c2be1eSYehuda Sadeh 389602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 390602adf40SYehuda Sadeh { 391f0f8cef5SAlex Elder struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 392b82d167bSAlex Elder bool removing = false; 393602adf40SYehuda Sadeh 394f84344f3SAlex Elder if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) 395602adf40SYehuda Sadeh return -EROFS; 396602adf40SYehuda Sadeh 397a14ea269SAlex Elder spin_lock_irq(&rbd_dev->lock); 398b82d167bSAlex Elder if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) 399b82d167bSAlex Elder removing = true; 400b82d167bSAlex Elder else 401b82d167bSAlex Elder rbd_dev->open_count++; 402a14ea269SAlex Elder spin_unlock_irq(&rbd_dev->lock); 403b82d167bSAlex Elder if (removing) 404b82d167bSAlex Elder return -ENOENT; 405b82d167bSAlex Elder 40642382b70SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 407c3e946ceSAlex Elder (void) get_device(&rbd_dev->dev); 408f84344f3SAlex Elder set_device_ro(bdev, rbd_dev->mapping.read_only); 40942382b70SAlex Elder mutex_unlock(&ctl_mutex); 410340c7a2bSAlex Elder 411602adf40SYehuda Sadeh return 0; 412602adf40SYehuda Sadeh } 413602adf40SYehuda Sadeh 414dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 415dfc5606dSYehuda Sadeh { 416dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 417b82d167bSAlex Elder unsigned long open_count_before; 418b82d167bSAlex Elder 419a14ea269SAlex Elder spin_lock_irq(&rbd_dev->lock); 420b82d167bSAlex Elder open_count_before = rbd_dev->open_count--; 421a14ea269SAlex Elder spin_unlock_irq(&rbd_dev->lock); 422b82d167bSAlex Elder rbd_assert(open_count_before > 0); 423dfc5606dSYehuda Sadeh 42442382b70SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 425c3e946ceSAlex Elder put_device(&rbd_dev->dev); 42642382b70SAlex Elder mutex_unlock(&ctl_mutex); 427dfc5606dSYehuda Sadeh 428dfc5606dSYehuda Sadeh return 0; 429dfc5606dSYehuda Sadeh } 430dfc5606dSYehuda Sadeh 431602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 432602adf40SYehuda Sadeh .owner = THIS_MODULE, 433602adf40SYehuda Sadeh .open = rbd_open, 434dfc5606dSYehuda Sadeh .release = rbd_release, 435602adf40SYehuda Sadeh }; 436602adf40SYehuda Sadeh 437602adf40SYehuda Sadeh /* 438602adf40SYehuda Sadeh * Initialize an rbd client instance. 43943ae4701SAlex Elder * We own *ceph_opts. 440602adf40SYehuda Sadeh */ 441f8c38929SAlex Elder static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 442602adf40SYehuda Sadeh { 443602adf40SYehuda Sadeh struct rbd_client *rbdc; 444602adf40SYehuda Sadeh int ret = -ENOMEM; 445602adf40SYehuda Sadeh 44637206ee5SAlex Elder dout("%s:\n", __func__); 447602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 448602adf40SYehuda Sadeh if (!rbdc) 449602adf40SYehuda Sadeh goto out_opt; 450602adf40SYehuda Sadeh 451602adf40SYehuda Sadeh kref_init(&rbdc->kref); 452602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 453602adf40SYehuda Sadeh 454bc534d86SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 455bc534d86SAlex Elder 45643ae4701SAlex Elder rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); 457602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 458bc534d86SAlex Elder goto out_mutex; 45943ae4701SAlex Elder ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ 460602adf40SYehuda Sadeh 461602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 462602adf40SYehuda Sadeh if (ret < 0) 463602adf40SYehuda Sadeh goto out_err; 464602adf40SYehuda Sadeh 465432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 466602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 467432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 468602adf40SYehuda Sadeh 469bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 47037206ee5SAlex Elder dout("%s: rbdc %p\n", __func__, rbdc); 471bc534d86SAlex Elder 472602adf40SYehuda Sadeh return rbdc; 473602adf40SYehuda Sadeh 474602adf40SYehuda Sadeh out_err: 475602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 476bc534d86SAlex Elder out_mutex: 477bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 478602adf40SYehuda Sadeh kfree(rbdc); 479602adf40SYehuda Sadeh out_opt: 48043ae4701SAlex Elder if (ceph_opts) 48143ae4701SAlex Elder ceph_destroy_options(ceph_opts); 48237206ee5SAlex Elder dout("%s: error %d\n", __func__, ret); 48337206ee5SAlex Elder 48428f259b7SVasiliy Kulikov return ERR_PTR(ret); 485602adf40SYehuda Sadeh } 486602adf40SYehuda Sadeh 487602adf40SYehuda Sadeh /* 4881f7ba331SAlex Elder * Find a ceph client with specific addr and configuration. If 4891f7ba331SAlex Elder * found, bump its reference count. 490602adf40SYehuda Sadeh */ 4911f7ba331SAlex Elder static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) 492602adf40SYehuda Sadeh { 493602adf40SYehuda Sadeh struct rbd_client *client_node; 4941f7ba331SAlex Elder bool found = false; 495602adf40SYehuda Sadeh 49643ae4701SAlex Elder if (ceph_opts->flags & CEPH_OPT_NOSHARE) 497602adf40SYehuda Sadeh return NULL; 498602adf40SYehuda Sadeh 4991f7ba331SAlex Elder spin_lock(&rbd_client_list_lock); 5001f7ba331SAlex Elder list_for_each_entry(client_node, &rbd_client_list, node) { 5011f7ba331SAlex Elder if (!ceph_compare_options(ceph_opts, client_node->client)) { 5021f7ba331SAlex Elder kref_get(&client_node->kref); 5031f7ba331SAlex Elder found = true; 5041f7ba331SAlex Elder break; 5051f7ba331SAlex Elder } 5061f7ba331SAlex Elder } 5071f7ba331SAlex Elder spin_unlock(&rbd_client_list_lock); 5081f7ba331SAlex Elder 5091f7ba331SAlex Elder return found ? client_node : NULL; 510602adf40SYehuda Sadeh } 511602adf40SYehuda Sadeh 512602adf40SYehuda Sadeh /* 51359c2be1eSYehuda Sadeh * mount options 51459c2be1eSYehuda Sadeh */ 51559c2be1eSYehuda Sadeh enum { 51659c2be1eSYehuda Sadeh Opt_last_int, 51759c2be1eSYehuda Sadeh /* int args above */ 51859c2be1eSYehuda Sadeh Opt_last_string, 51959c2be1eSYehuda Sadeh /* string args above */ 520cc0538b6SAlex Elder Opt_read_only, 521cc0538b6SAlex Elder Opt_read_write, 522cc0538b6SAlex Elder /* Boolean args above */ 523cc0538b6SAlex Elder Opt_last_bool, 52459c2be1eSYehuda Sadeh }; 52559c2be1eSYehuda Sadeh 52643ae4701SAlex Elder static match_table_t rbd_opts_tokens = { 52759c2be1eSYehuda Sadeh /* int args above */ 52859c2be1eSYehuda Sadeh /* string args above */ 529be466c1cSAlex Elder {Opt_read_only, "read_only"}, 530cc0538b6SAlex Elder {Opt_read_only, "ro"}, /* Alternate spelling */ 531cc0538b6SAlex Elder {Opt_read_write, "read_write"}, 532cc0538b6SAlex Elder {Opt_read_write, "rw"}, /* Alternate spelling */ 533cc0538b6SAlex Elder /* Boolean args above */ 53459c2be1eSYehuda Sadeh {-1, NULL} 53559c2be1eSYehuda Sadeh }; 53659c2be1eSYehuda Sadeh 53798571b5aSAlex Elder struct rbd_options { 53898571b5aSAlex Elder bool read_only; 53998571b5aSAlex Elder }; 54098571b5aSAlex Elder 54198571b5aSAlex Elder #define RBD_READ_ONLY_DEFAULT false 54298571b5aSAlex Elder 54359c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 54459c2be1eSYehuda Sadeh { 54543ae4701SAlex Elder struct rbd_options *rbd_opts = private; 54659c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 54759c2be1eSYehuda Sadeh int token, intval, ret; 54859c2be1eSYehuda Sadeh 54943ae4701SAlex Elder token = match_token(c, rbd_opts_tokens, argstr); 55059c2be1eSYehuda Sadeh if (token < 0) 55159c2be1eSYehuda Sadeh return -EINVAL; 55259c2be1eSYehuda Sadeh 55359c2be1eSYehuda Sadeh if (token < Opt_last_int) { 55459c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 55559c2be1eSYehuda Sadeh if (ret < 0) { 55659c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 55759c2be1eSYehuda Sadeh "at '%s'\n", c); 55859c2be1eSYehuda Sadeh return ret; 55959c2be1eSYehuda Sadeh } 56059c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 56159c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 56259c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 56359c2be1eSYehuda Sadeh argstr[0].from); 564cc0538b6SAlex Elder } else if (token > Opt_last_string && token < Opt_last_bool) { 565cc0538b6SAlex Elder dout("got Boolean token %d\n", token); 56659c2be1eSYehuda Sadeh } else { 56759c2be1eSYehuda Sadeh dout("got token %d\n", token); 56859c2be1eSYehuda Sadeh } 56959c2be1eSYehuda Sadeh 57059c2be1eSYehuda Sadeh switch (token) { 571cc0538b6SAlex Elder case Opt_read_only: 572cc0538b6SAlex Elder rbd_opts->read_only = true; 573cc0538b6SAlex Elder break; 574cc0538b6SAlex Elder case Opt_read_write: 575cc0538b6SAlex Elder rbd_opts->read_only = false; 576cc0538b6SAlex Elder break; 57759c2be1eSYehuda Sadeh default: 578aafb230eSAlex Elder rbd_assert(false); 579aafb230eSAlex Elder break; 58059c2be1eSYehuda Sadeh } 58159c2be1eSYehuda Sadeh return 0; 58259c2be1eSYehuda Sadeh } 58359c2be1eSYehuda Sadeh 58459c2be1eSYehuda Sadeh /* 585602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 586602adf40SYehuda Sadeh * not exist create it. 587602adf40SYehuda Sadeh */ 5889d3997fdSAlex Elder static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) 589602adf40SYehuda Sadeh { 590f8c38929SAlex Elder struct rbd_client *rbdc; 59159c2be1eSYehuda Sadeh 5921f7ba331SAlex Elder rbdc = rbd_client_find(ceph_opts); 5939d3997fdSAlex Elder if (rbdc) /* using an existing client */ 59443ae4701SAlex Elder ceph_destroy_options(ceph_opts); 5959d3997fdSAlex Elder else 596f8c38929SAlex Elder rbdc = rbd_client_create(ceph_opts); 597d720bcb0SAlex Elder 5989d3997fdSAlex Elder return rbdc; 599602adf40SYehuda Sadeh } 600602adf40SYehuda Sadeh 601602adf40SYehuda Sadeh /* 602602adf40SYehuda Sadeh * Destroy ceph client 603d23a4b3fSAlex Elder * 604432b8587SAlex Elder * Caller must hold rbd_client_list_lock. 605602adf40SYehuda Sadeh */ 606602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 607602adf40SYehuda Sadeh { 608602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 609602adf40SYehuda Sadeh 61037206ee5SAlex Elder dout("%s: rbdc %p\n", __func__, rbdc); 611cd9d9f5dSAlex Elder spin_lock(&rbd_client_list_lock); 612602adf40SYehuda Sadeh list_del(&rbdc->node); 613cd9d9f5dSAlex Elder spin_unlock(&rbd_client_list_lock); 614602adf40SYehuda Sadeh 615602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 616602adf40SYehuda Sadeh kfree(rbdc); 617602adf40SYehuda Sadeh } 618602adf40SYehuda Sadeh 619602adf40SYehuda Sadeh /* 620602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 621602adf40SYehuda Sadeh * it. 622602adf40SYehuda Sadeh */ 6239d3997fdSAlex Elder static void rbd_put_client(struct rbd_client *rbdc) 624602adf40SYehuda Sadeh { 625c53d5893SAlex Elder if (rbdc) 6269d3997fdSAlex Elder kref_put(&rbdc->kref, rbd_client_release); 627602adf40SYehuda Sadeh } 628602adf40SYehuda Sadeh 629a30b71b9SAlex Elder static bool rbd_image_format_valid(u32 image_format) 630a30b71b9SAlex Elder { 631a30b71b9SAlex Elder return image_format == 1 || image_format == 2; 632a30b71b9SAlex Elder } 633a30b71b9SAlex Elder 6348e94af8eSAlex Elder static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 6358e94af8eSAlex Elder { 636103a150fSAlex Elder size_t size; 637103a150fSAlex Elder u32 snap_count; 638103a150fSAlex Elder 639103a150fSAlex Elder /* The header has to start with the magic rbd header text */ 640103a150fSAlex Elder if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) 641103a150fSAlex Elder return false; 642103a150fSAlex Elder 643db2388b6SAlex Elder /* The bio layer requires at least sector-sized I/O */ 644db2388b6SAlex Elder 645db2388b6SAlex Elder if (ondisk->options.order < SECTOR_SHIFT) 646db2388b6SAlex Elder return false; 647db2388b6SAlex Elder 648db2388b6SAlex Elder /* If we use u64 in a few spots we may be able to loosen this */ 649db2388b6SAlex Elder 650db2388b6SAlex Elder if (ondisk->options.order > 8 * sizeof (int) - 1) 651db2388b6SAlex Elder return false; 652db2388b6SAlex Elder 653103a150fSAlex Elder /* 654103a150fSAlex Elder * The size of a snapshot header has to fit in a size_t, and 655103a150fSAlex Elder * that limits the number of snapshots. 656103a150fSAlex Elder */ 657103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 658103a150fSAlex Elder size = SIZE_MAX - sizeof (struct ceph_snap_context); 659103a150fSAlex Elder if (snap_count > size / sizeof (__le64)) 660103a150fSAlex Elder return false; 661103a150fSAlex Elder 662103a150fSAlex Elder /* 663103a150fSAlex Elder * Not only that, but the size of the entire the snapshot 664103a150fSAlex Elder * header must also be representable in a size_t. 665103a150fSAlex Elder */ 666103a150fSAlex Elder size -= snap_count * sizeof (__le64); 667103a150fSAlex Elder if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) 668103a150fSAlex Elder return false; 669103a150fSAlex Elder 670103a150fSAlex Elder return true; 6718e94af8eSAlex Elder } 6728e94af8eSAlex Elder 673602adf40SYehuda Sadeh /* 674602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 675602adf40SYehuda Sadeh * header. 676602adf40SYehuda Sadeh */ 677602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 6784156d998SAlex Elder struct rbd_image_header_ondisk *ondisk) 679602adf40SYehuda Sadeh { 680ccece235SAlex Elder u32 snap_count; 68158c17b0eSAlex Elder size_t len; 682d2bb24e5SAlex Elder size_t size; 683621901d6SAlex Elder u32 i; 684602adf40SYehuda Sadeh 6856a52325fSAlex Elder memset(header, 0, sizeof (*header)); 6866a52325fSAlex Elder 687103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 688103a150fSAlex Elder 68958c17b0eSAlex Elder len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); 69058c17b0eSAlex Elder header->object_prefix = kmalloc(len + 1, GFP_KERNEL); 6916a52325fSAlex Elder if (!header->object_prefix) 692602adf40SYehuda Sadeh return -ENOMEM; 69358c17b0eSAlex Elder memcpy(header->object_prefix, ondisk->object_prefix, len); 69458c17b0eSAlex Elder header->object_prefix[len] = '\0'; 69500f1f36fSAlex Elder 696602adf40SYehuda Sadeh if (snap_count) { 697f785cc1dSAlex Elder u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); 698f785cc1dSAlex Elder 699621901d6SAlex Elder /* Save a copy of the snapshot names */ 700621901d6SAlex Elder 701f785cc1dSAlex Elder if (snap_names_len > (u64) SIZE_MAX) 702f785cc1dSAlex Elder return -EIO; 703f785cc1dSAlex Elder header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); 704602adf40SYehuda Sadeh if (!header->snap_names) 7056a52325fSAlex Elder goto out_err; 706f785cc1dSAlex Elder /* 707f785cc1dSAlex Elder * Note that rbd_dev_v1_header_read() guarantees 708f785cc1dSAlex Elder * the ondisk buffer we're working with has 709f785cc1dSAlex Elder * snap_names_len bytes beyond the end of the 710f785cc1dSAlex Elder * snapshot id array, this memcpy() is safe. 711f785cc1dSAlex Elder */ 712f785cc1dSAlex Elder memcpy(header->snap_names, &ondisk->snaps[snap_count], 713f785cc1dSAlex Elder snap_names_len); 7146a52325fSAlex Elder 715621901d6SAlex Elder /* Record each snapshot's size */ 716621901d6SAlex Elder 717d2bb24e5SAlex Elder size = snap_count * sizeof (*header->snap_sizes); 718d2bb24e5SAlex Elder header->snap_sizes = kmalloc(size, GFP_KERNEL); 719602adf40SYehuda Sadeh if (!header->snap_sizes) 7206a52325fSAlex Elder goto out_err; 721621901d6SAlex Elder for (i = 0; i < snap_count; i++) 722621901d6SAlex Elder header->snap_sizes[i] = 723621901d6SAlex Elder le64_to_cpu(ondisk->snaps[i].image_size); 724602adf40SYehuda Sadeh } else { 725ccece235SAlex Elder WARN_ON(ondisk->snap_names_len); 726602adf40SYehuda Sadeh header->snap_names = NULL; 727602adf40SYehuda Sadeh header->snap_sizes = NULL; 728602adf40SYehuda Sadeh } 729849b4260SAlex Elder 73034b13184SAlex Elder header->features = 0; /* No features support in v1 images */ 731602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 732602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 733602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 7346a52325fSAlex Elder 735621901d6SAlex Elder /* Allocate and fill in the snapshot context */ 736621901d6SAlex Elder 737f84344f3SAlex Elder header->image_size = le64_to_cpu(ondisk->image_size); 7386a52325fSAlex Elder size = sizeof (struct ceph_snap_context); 7396a52325fSAlex Elder size += snap_count * sizeof (header->snapc->snaps[0]); 7406a52325fSAlex Elder header->snapc = kzalloc(size, GFP_KERNEL); 7416a52325fSAlex Elder if (!header->snapc) 7426a52325fSAlex Elder goto out_err; 743602adf40SYehuda Sadeh 744602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 745505cbb9bSAlex Elder header->snapc->seq = le64_to_cpu(ondisk->snap_seq); 746602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 747621901d6SAlex Elder for (i = 0; i < snap_count; i++) 748602adf40SYehuda Sadeh header->snapc->snaps[i] = 749602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 750602adf40SYehuda Sadeh 751602adf40SYehuda Sadeh return 0; 752602adf40SYehuda Sadeh 7536a52325fSAlex Elder out_err: 754849b4260SAlex Elder kfree(header->snap_sizes); 755ccece235SAlex Elder header->snap_sizes = NULL; 756602adf40SYehuda Sadeh kfree(header->snap_names); 757ccece235SAlex Elder header->snap_names = NULL; 7586a52325fSAlex Elder kfree(header->object_prefix); 7596a52325fSAlex Elder header->object_prefix = NULL; 760ccece235SAlex Elder 76100f1f36fSAlex Elder return -ENOMEM; 762602adf40SYehuda Sadeh } 763602adf40SYehuda Sadeh 7649e15b77dSAlex Elder static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) 7659e15b77dSAlex Elder { 7669e15b77dSAlex Elder struct rbd_snap *snap; 7679e15b77dSAlex Elder 7689e15b77dSAlex Elder if (snap_id == CEPH_NOSNAP) 7699e15b77dSAlex Elder return RBD_SNAP_HEAD_NAME; 7709e15b77dSAlex Elder 7719e15b77dSAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) 7729e15b77dSAlex Elder if (snap_id == snap->id) 7739e15b77dSAlex Elder return snap->name; 7749e15b77dSAlex Elder 7759e15b77dSAlex Elder return NULL; 7769e15b77dSAlex Elder } 7779e15b77dSAlex Elder 7788836b995SAlex Elder static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) 779602adf40SYehuda Sadeh { 780602adf40SYehuda Sadeh 781e86924a8SAlex Elder struct rbd_snap *snap; 78200f1f36fSAlex Elder 783e86924a8SAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) { 784e86924a8SAlex Elder if (!strcmp(snap_name, snap->name)) { 7850d7dbfceSAlex Elder rbd_dev->spec->snap_id = snap->id; 786e86924a8SAlex Elder rbd_dev->mapping.size = snap->size; 78734b13184SAlex Elder rbd_dev->mapping.features = snap->features; 78800f1f36fSAlex Elder 789e86924a8SAlex Elder return 0; 790602adf40SYehuda Sadeh } 79100f1f36fSAlex Elder } 792e86924a8SAlex Elder 79300f1f36fSAlex Elder return -ENOENT; 79400f1f36fSAlex Elder } 795602adf40SYehuda Sadeh 796819d52bfSAlex Elder static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) 797602adf40SYehuda Sadeh { 79878dc447dSAlex Elder int ret; 799602adf40SYehuda Sadeh 8000d7dbfceSAlex Elder if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME, 801cc9d734cSJosh Durgin sizeof (RBD_SNAP_HEAD_NAME))) { 8020d7dbfceSAlex Elder rbd_dev->spec->snap_id = CEPH_NOSNAP; 80399c1f08fSAlex Elder rbd_dev->mapping.size = rbd_dev->header.image_size; 80434b13184SAlex Elder rbd_dev->mapping.features = rbd_dev->header.features; 805e86924a8SAlex Elder ret = 0; 806602adf40SYehuda Sadeh } else { 8070d7dbfceSAlex Elder ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name); 808602adf40SYehuda Sadeh if (ret < 0) 809602adf40SYehuda Sadeh goto done; 810f84344f3SAlex Elder rbd_dev->mapping.read_only = true; 811602adf40SYehuda Sadeh } 8126d292906SAlex Elder set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 8136d292906SAlex Elder 814602adf40SYehuda Sadeh done: 815602adf40SYehuda Sadeh return ret; 816602adf40SYehuda Sadeh } 817602adf40SYehuda Sadeh 818602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 819602adf40SYehuda Sadeh { 820849b4260SAlex Elder kfree(header->object_prefix); 821d78fd7aeSAlex Elder header->object_prefix = NULL; 822602adf40SYehuda Sadeh kfree(header->snap_sizes); 823d78fd7aeSAlex Elder header->snap_sizes = NULL; 824849b4260SAlex Elder kfree(header->snap_names); 825d78fd7aeSAlex Elder header->snap_names = NULL; 826d1d25646SJosh Durgin ceph_put_snap_context(header->snapc); 827d78fd7aeSAlex Elder header->snapc = NULL; 828602adf40SYehuda Sadeh } 829602adf40SYehuda Sadeh 83098571b5aSAlex Elder static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) 831602adf40SYehuda Sadeh { 83265ccfe21SAlex Elder char *name; 83365ccfe21SAlex Elder u64 segment; 83465ccfe21SAlex Elder int ret; 835602adf40SYehuda Sadeh 8362fd82b9eSAlex Elder name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO); 83765ccfe21SAlex Elder if (!name) 83865ccfe21SAlex Elder return NULL; 83965ccfe21SAlex Elder segment = offset >> rbd_dev->header.obj_order; 8402fd82b9eSAlex Elder ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx", 84165ccfe21SAlex Elder rbd_dev->header.object_prefix, segment); 8422fd82b9eSAlex Elder if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { 84365ccfe21SAlex Elder pr_err("error formatting segment name for #%llu (%d)\n", 84465ccfe21SAlex Elder segment, ret); 84565ccfe21SAlex Elder kfree(name); 84665ccfe21SAlex Elder name = NULL; 84765ccfe21SAlex Elder } 848602adf40SYehuda Sadeh 84965ccfe21SAlex Elder return name; 85065ccfe21SAlex Elder } 851602adf40SYehuda Sadeh 85265ccfe21SAlex Elder static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) 85365ccfe21SAlex Elder { 85465ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 855602adf40SYehuda Sadeh 85665ccfe21SAlex Elder return offset & (segment_size - 1); 85765ccfe21SAlex Elder } 85865ccfe21SAlex Elder 85965ccfe21SAlex Elder static u64 rbd_segment_length(struct rbd_device *rbd_dev, 86065ccfe21SAlex Elder u64 offset, u64 length) 86165ccfe21SAlex Elder { 86265ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 86365ccfe21SAlex Elder 86465ccfe21SAlex Elder offset &= segment_size - 1; 86565ccfe21SAlex Elder 866aafb230eSAlex Elder rbd_assert(length <= U64_MAX - offset); 86765ccfe21SAlex Elder if (offset + length > segment_size) 86865ccfe21SAlex Elder length = segment_size - offset; 86965ccfe21SAlex Elder 87065ccfe21SAlex Elder return length; 871602adf40SYehuda Sadeh } 872602adf40SYehuda Sadeh 873602adf40SYehuda Sadeh /* 874029bcbd8SJosh Durgin * returns the size of an object in the image 875029bcbd8SJosh Durgin */ 876029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 877029bcbd8SJosh Durgin { 878029bcbd8SJosh Durgin return 1 << header->obj_order; 879029bcbd8SJosh Durgin } 880029bcbd8SJosh Durgin 881029bcbd8SJosh Durgin /* 882602adf40SYehuda Sadeh * bio helpers 883602adf40SYehuda Sadeh */ 884602adf40SYehuda Sadeh 885602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 886602adf40SYehuda Sadeh { 887602adf40SYehuda Sadeh struct bio *tmp; 888602adf40SYehuda Sadeh 889602adf40SYehuda Sadeh while (chain) { 890602adf40SYehuda Sadeh tmp = chain; 891602adf40SYehuda Sadeh chain = chain->bi_next; 892602adf40SYehuda Sadeh bio_put(tmp); 893602adf40SYehuda Sadeh } 894602adf40SYehuda Sadeh } 895602adf40SYehuda Sadeh 896602adf40SYehuda Sadeh /* 897602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 898602adf40SYehuda Sadeh */ 899602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 900602adf40SYehuda Sadeh { 901602adf40SYehuda Sadeh struct bio_vec *bv; 902602adf40SYehuda Sadeh unsigned long flags; 903602adf40SYehuda Sadeh void *buf; 904602adf40SYehuda Sadeh int i; 905602adf40SYehuda Sadeh int pos = 0; 906602adf40SYehuda Sadeh 907602adf40SYehuda Sadeh while (chain) { 908602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 909602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 910602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 911602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 912602adf40SYehuda Sadeh memset(buf + remainder, 0, 913602adf40SYehuda Sadeh bv->bv_len - remainder); 91485b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 915602adf40SYehuda Sadeh } 916602adf40SYehuda Sadeh pos += bv->bv_len; 917602adf40SYehuda Sadeh } 918602adf40SYehuda Sadeh 919602adf40SYehuda Sadeh chain = chain->bi_next; 920602adf40SYehuda Sadeh } 921602adf40SYehuda Sadeh } 922602adf40SYehuda Sadeh 923602adf40SYehuda Sadeh /* 924f7760dadSAlex Elder * Clone a portion of a bio, starting at the given byte offset 925f7760dadSAlex Elder * and continuing for the number of bytes indicated. 926602adf40SYehuda Sadeh */ 927f7760dadSAlex Elder static struct bio *bio_clone_range(struct bio *bio_src, 928f7760dadSAlex Elder unsigned int offset, 929f7760dadSAlex Elder unsigned int len, 930f7760dadSAlex Elder gfp_t gfpmask) 931602adf40SYehuda Sadeh { 932f7760dadSAlex Elder struct bio_vec *bv; 933f7760dadSAlex Elder unsigned int resid; 934f7760dadSAlex Elder unsigned short idx; 935f7760dadSAlex Elder unsigned int voff; 936f7760dadSAlex Elder unsigned short end_idx; 937f7760dadSAlex Elder unsigned short vcnt; 938f7760dadSAlex Elder struct bio *bio; 939602adf40SYehuda Sadeh 940f7760dadSAlex Elder /* Handle the easy case for the caller */ 941f7760dadSAlex Elder 942f7760dadSAlex Elder if (!offset && len == bio_src->bi_size) 943f7760dadSAlex Elder return bio_clone(bio_src, gfpmask); 944f7760dadSAlex Elder 945f7760dadSAlex Elder if (WARN_ON_ONCE(!len)) 946f7760dadSAlex Elder return NULL; 947f7760dadSAlex Elder if (WARN_ON_ONCE(len > bio_src->bi_size)) 948f7760dadSAlex Elder return NULL; 949f7760dadSAlex Elder if (WARN_ON_ONCE(offset > bio_src->bi_size - len)) 950f7760dadSAlex Elder return NULL; 951f7760dadSAlex Elder 952f7760dadSAlex Elder /* Find first affected segment... */ 953f7760dadSAlex Elder 954f7760dadSAlex Elder resid = offset; 955f7760dadSAlex Elder __bio_for_each_segment(bv, bio_src, idx, 0) { 956f7760dadSAlex Elder if (resid < bv->bv_len) 957f7760dadSAlex Elder break; 958f7760dadSAlex Elder resid -= bv->bv_len; 959602adf40SYehuda Sadeh } 960f7760dadSAlex Elder voff = resid; 961602adf40SYehuda Sadeh 962f7760dadSAlex Elder /* ...and the last affected segment */ 963542582fcSAlex Elder 964f7760dadSAlex Elder resid += len; 965f7760dadSAlex Elder __bio_for_each_segment(bv, bio_src, end_idx, idx) { 966f7760dadSAlex Elder if (resid <= bv->bv_len) 967f7760dadSAlex Elder break; 968f7760dadSAlex Elder resid -= bv->bv_len; 969f7760dadSAlex Elder } 970f7760dadSAlex Elder vcnt = end_idx - idx + 1; 971602adf40SYehuda Sadeh 972f7760dadSAlex Elder /* Build the clone */ 973f7760dadSAlex Elder 974f7760dadSAlex Elder bio = bio_alloc(gfpmask, (unsigned int) vcnt); 975f7760dadSAlex Elder if (!bio) 976f7760dadSAlex Elder return NULL; /* ENOMEM */ 977f7760dadSAlex Elder 978f7760dadSAlex Elder bio->bi_bdev = bio_src->bi_bdev; 979f7760dadSAlex Elder bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); 980f7760dadSAlex Elder bio->bi_rw = bio_src->bi_rw; 981f7760dadSAlex Elder bio->bi_flags |= 1 << BIO_CLONED; 982602adf40SYehuda Sadeh 983602adf40SYehuda Sadeh /* 984f7760dadSAlex Elder * Copy over our part of the bio_vec, then update the first 985f7760dadSAlex Elder * and last (or only) entries. 986602adf40SYehuda Sadeh */ 987f7760dadSAlex Elder memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], 988f7760dadSAlex Elder vcnt * sizeof (struct bio_vec)); 989f7760dadSAlex Elder bio->bi_io_vec[0].bv_offset += voff; 990f7760dadSAlex Elder if (vcnt > 1) { 991f7760dadSAlex Elder bio->bi_io_vec[0].bv_len -= voff; 992f7760dadSAlex Elder bio->bi_io_vec[vcnt - 1].bv_len = resid; 993602adf40SYehuda Sadeh } else { 994f7760dadSAlex Elder bio->bi_io_vec[0].bv_len = len; 995602adf40SYehuda Sadeh } 996602adf40SYehuda Sadeh 997f7760dadSAlex Elder bio->bi_vcnt = vcnt; 998f7760dadSAlex Elder bio->bi_size = len; 999f7760dadSAlex Elder bio->bi_idx = 0; 1000602adf40SYehuda Sadeh 1001f7760dadSAlex Elder return bio; 1002602adf40SYehuda Sadeh } 1003602adf40SYehuda Sadeh 1004f7760dadSAlex Elder /* 1005f7760dadSAlex Elder * Clone a portion of a bio chain, starting at the given byte offset 1006f7760dadSAlex Elder * into the first bio in the source chain and continuing for the 1007f7760dadSAlex Elder * number of bytes indicated. The result is another bio chain of 1008f7760dadSAlex Elder * exactly the given length, or a null pointer on error. 1009f7760dadSAlex Elder * 1010f7760dadSAlex Elder * The bio_src and offset parameters are both in-out. On entry they 1011f7760dadSAlex Elder * refer to the first source bio and the offset into that bio where 1012f7760dadSAlex Elder * the start of data to be cloned is located. 1013f7760dadSAlex Elder * 1014f7760dadSAlex Elder * On return, bio_src is updated to refer to the bio in the source 1015f7760dadSAlex Elder * chain that contains first un-cloned byte, and *offset will 1016f7760dadSAlex Elder * contain the offset of that byte within that bio. 1017f7760dadSAlex Elder */ 1018f7760dadSAlex Elder static struct bio *bio_chain_clone_range(struct bio **bio_src, 1019f7760dadSAlex Elder unsigned int *offset, 1020f7760dadSAlex Elder unsigned int len, 1021f7760dadSAlex Elder gfp_t gfpmask) 1022f7760dadSAlex Elder { 1023f7760dadSAlex Elder struct bio *bi = *bio_src; 1024f7760dadSAlex Elder unsigned int off = *offset; 1025f7760dadSAlex Elder struct bio *chain = NULL; 1026f7760dadSAlex Elder struct bio **end; 1027602adf40SYehuda Sadeh 1028f7760dadSAlex Elder /* Build up a chain of clone bios up to the limit */ 1029602adf40SYehuda Sadeh 1030f7760dadSAlex Elder if (!bi || off >= bi->bi_size || !len) 1031f7760dadSAlex Elder return NULL; /* Nothing to clone */ 1032602adf40SYehuda Sadeh 1033f7760dadSAlex Elder end = &chain; 1034f7760dadSAlex Elder while (len) { 1035f7760dadSAlex Elder unsigned int bi_size; 1036f7760dadSAlex Elder struct bio *bio; 1037f7760dadSAlex Elder 1038f5400b7aSAlex Elder if (!bi) { 1039f5400b7aSAlex Elder rbd_warn(NULL, "bio_chain exhausted with %u left", len); 1040f7760dadSAlex Elder goto out_err; /* EINVAL; ran out of bio's */ 1041f5400b7aSAlex Elder } 1042f7760dadSAlex Elder bi_size = min_t(unsigned int, bi->bi_size - off, len); 1043f7760dadSAlex Elder bio = bio_clone_range(bi, off, bi_size, gfpmask); 1044f7760dadSAlex Elder if (!bio) 1045f7760dadSAlex Elder goto out_err; /* ENOMEM */ 1046f7760dadSAlex Elder 1047f7760dadSAlex Elder *end = bio; 1048f7760dadSAlex Elder end = &bio->bi_next; 1049f7760dadSAlex Elder 1050f7760dadSAlex Elder off += bi_size; 1051f7760dadSAlex Elder if (off == bi->bi_size) { 1052f7760dadSAlex Elder bi = bi->bi_next; 1053f7760dadSAlex Elder off = 0; 1054f7760dadSAlex Elder } 1055f7760dadSAlex Elder len -= bi_size; 1056f7760dadSAlex Elder } 1057f7760dadSAlex Elder *bio_src = bi; 1058f7760dadSAlex Elder *offset = off; 1059f7760dadSAlex Elder 1060f7760dadSAlex Elder return chain; 1061f7760dadSAlex Elder out_err: 1062f7760dadSAlex Elder bio_chain_put(chain); 1063f7760dadSAlex Elder 1064602adf40SYehuda Sadeh return NULL; 1065602adf40SYehuda Sadeh } 1066602adf40SYehuda Sadeh 1067bf0d5f50SAlex Elder static void rbd_obj_request_get(struct rbd_obj_request *obj_request) 1068bf0d5f50SAlex Elder { 106937206ee5SAlex Elder dout("%s: obj %p (was %d)\n", __func__, obj_request, 107037206ee5SAlex Elder atomic_read(&obj_request->kref.refcount)); 1071bf0d5f50SAlex Elder kref_get(&obj_request->kref); 1072bf0d5f50SAlex Elder } 1073bf0d5f50SAlex Elder 1074bf0d5f50SAlex Elder static void rbd_obj_request_destroy(struct kref *kref); 1075bf0d5f50SAlex Elder static void rbd_obj_request_put(struct rbd_obj_request *obj_request) 1076bf0d5f50SAlex Elder { 1077bf0d5f50SAlex Elder rbd_assert(obj_request != NULL); 107837206ee5SAlex Elder dout("%s: obj %p (was %d)\n", __func__, obj_request, 107937206ee5SAlex Elder atomic_read(&obj_request->kref.refcount)); 1080bf0d5f50SAlex Elder kref_put(&obj_request->kref, rbd_obj_request_destroy); 1081bf0d5f50SAlex Elder } 1082bf0d5f50SAlex Elder 1083bf0d5f50SAlex Elder static void rbd_img_request_get(struct rbd_img_request *img_request) 1084bf0d5f50SAlex Elder { 108537206ee5SAlex Elder dout("%s: img %p (was %d)\n", __func__, img_request, 108637206ee5SAlex Elder atomic_read(&img_request->kref.refcount)); 1087bf0d5f50SAlex Elder kref_get(&img_request->kref); 1088bf0d5f50SAlex Elder } 1089bf0d5f50SAlex Elder 1090bf0d5f50SAlex Elder static void rbd_img_request_destroy(struct kref *kref); 1091bf0d5f50SAlex Elder static void rbd_img_request_put(struct rbd_img_request *img_request) 1092bf0d5f50SAlex Elder { 1093bf0d5f50SAlex Elder rbd_assert(img_request != NULL); 109437206ee5SAlex Elder dout("%s: img %p (was %d)\n", __func__, img_request, 109537206ee5SAlex Elder atomic_read(&img_request->kref.refcount)); 1096bf0d5f50SAlex Elder kref_put(&img_request->kref, rbd_img_request_destroy); 1097bf0d5f50SAlex Elder } 1098bf0d5f50SAlex Elder 1099bf0d5f50SAlex Elder static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, 1100bf0d5f50SAlex Elder struct rbd_obj_request *obj_request) 1101bf0d5f50SAlex Elder { 110225dcf954SAlex Elder rbd_assert(obj_request->img_request == NULL); 110325dcf954SAlex Elder 1104bf0d5f50SAlex Elder rbd_obj_request_get(obj_request); 1105bf0d5f50SAlex Elder obj_request->img_request = img_request; 110625dcf954SAlex Elder obj_request->which = img_request->obj_request_count; 1107bf0d5f50SAlex Elder rbd_assert(obj_request->which != BAD_WHICH); 110825dcf954SAlex Elder img_request->obj_request_count++; 110925dcf954SAlex Elder list_add_tail(&obj_request->links, &img_request->obj_requests); 111037206ee5SAlex Elder dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, 111137206ee5SAlex Elder obj_request->which); 1112bf0d5f50SAlex Elder } 1113bf0d5f50SAlex Elder 1114bf0d5f50SAlex Elder static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, 1115bf0d5f50SAlex Elder struct rbd_obj_request *obj_request) 1116bf0d5f50SAlex Elder { 1117bf0d5f50SAlex Elder rbd_assert(obj_request->which != BAD_WHICH); 111825dcf954SAlex Elder 111937206ee5SAlex Elder dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, 112037206ee5SAlex Elder obj_request->which); 1121bf0d5f50SAlex Elder list_del(&obj_request->links); 112225dcf954SAlex Elder rbd_assert(img_request->obj_request_count > 0); 112325dcf954SAlex Elder img_request->obj_request_count--; 112425dcf954SAlex Elder rbd_assert(obj_request->which == img_request->obj_request_count); 112525dcf954SAlex Elder obj_request->which = BAD_WHICH; 1126bf0d5f50SAlex Elder rbd_assert(obj_request->img_request == img_request); 1127bf0d5f50SAlex Elder obj_request->img_request = NULL; 112825dcf954SAlex Elder obj_request->callback = NULL; 1129bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1130bf0d5f50SAlex Elder } 1131bf0d5f50SAlex Elder 1132bf0d5f50SAlex Elder static bool obj_request_type_valid(enum obj_request_type type) 1133bf0d5f50SAlex Elder { 1134bf0d5f50SAlex Elder switch (type) { 11359969ebc5SAlex Elder case OBJ_REQUEST_NODATA: 1136bf0d5f50SAlex Elder case OBJ_REQUEST_BIO: 1137788e2df3SAlex Elder case OBJ_REQUEST_PAGES: 1138bf0d5f50SAlex Elder return true; 1139bf0d5f50SAlex Elder default: 1140bf0d5f50SAlex Elder return false; 1141bf0d5f50SAlex Elder } 1142bf0d5f50SAlex Elder } 1143bf0d5f50SAlex Elder 1144cc344fa1SAlex Elder static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) 11458d23bf29SAlex Elder { 11468d23bf29SAlex Elder struct ceph_osd_req_op *op; 11478d23bf29SAlex Elder va_list args; 11482647ba38SAlex Elder size_t size; 11498d23bf29SAlex Elder 11508d23bf29SAlex Elder op = kzalloc(sizeof (*op), GFP_NOIO); 11518d23bf29SAlex Elder if (!op) 11528d23bf29SAlex Elder return NULL; 11538d23bf29SAlex Elder op->op = opcode; 11548d23bf29SAlex Elder va_start(args, opcode); 11558d23bf29SAlex Elder switch (opcode) { 11568d23bf29SAlex Elder case CEPH_OSD_OP_READ: 11578d23bf29SAlex Elder case CEPH_OSD_OP_WRITE: 11588d23bf29SAlex Elder /* rbd_osd_req_op_create(READ, offset, length) */ 11598d23bf29SAlex Elder /* rbd_osd_req_op_create(WRITE, offset, length) */ 11608d23bf29SAlex Elder op->extent.offset = va_arg(args, u64); 11618d23bf29SAlex Elder op->extent.length = va_arg(args, u64); 11628d23bf29SAlex Elder if (opcode == CEPH_OSD_OP_WRITE) 11638d23bf29SAlex Elder op->payload_len = op->extent.length; 11648d23bf29SAlex Elder break; 1165fbfab539SAlex Elder case CEPH_OSD_OP_STAT: 1166fbfab539SAlex Elder break; 11672647ba38SAlex Elder case CEPH_OSD_OP_CALL: 11682647ba38SAlex Elder /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ 11692647ba38SAlex Elder op->cls.class_name = va_arg(args, char *); 11702647ba38SAlex Elder size = strlen(op->cls.class_name); 11712647ba38SAlex Elder rbd_assert(size <= (size_t) U8_MAX); 11722647ba38SAlex Elder op->cls.class_len = size; 11732647ba38SAlex Elder op->payload_len = size; 11742647ba38SAlex Elder 11752647ba38SAlex Elder op->cls.method_name = va_arg(args, char *); 11762647ba38SAlex Elder size = strlen(op->cls.method_name); 11772647ba38SAlex Elder rbd_assert(size <= (size_t) U8_MAX); 11782647ba38SAlex Elder op->cls.method_len = size; 11792647ba38SAlex Elder op->payload_len += size; 11802647ba38SAlex Elder 11812647ba38SAlex Elder op->cls.argc = 0; 11822647ba38SAlex Elder op->cls.indata = va_arg(args, void *); 11832647ba38SAlex Elder size = va_arg(args, size_t); 11842647ba38SAlex Elder rbd_assert(size <= (size_t) U32_MAX); 11852647ba38SAlex Elder op->cls.indata_len = (u32) size; 11862647ba38SAlex Elder op->payload_len += size; 11872647ba38SAlex Elder break; 11885efea49aSAlex Elder case CEPH_OSD_OP_NOTIFY_ACK: 11895efea49aSAlex Elder case CEPH_OSD_OP_WATCH: 11905efea49aSAlex Elder /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ 11915efea49aSAlex Elder /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ 11925efea49aSAlex Elder op->watch.cookie = va_arg(args, u64); 11935efea49aSAlex Elder op->watch.ver = va_arg(args, u64); 11945efea49aSAlex Elder op->watch.ver = cpu_to_le64(op->watch.ver); 11955efea49aSAlex Elder if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) 11965efea49aSAlex Elder op->watch.flag = (u8) 1; 11975efea49aSAlex Elder break; 11988d23bf29SAlex Elder default: 11998d23bf29SAlex Elder rbd_warn(NULL, "unsupported opcode %hu\n", opcode); 12008d23bf29SAlex Elder kfree(op); 12018d23bf29SAlex Elder op = NULL; 12028d23bf29SAlex Elder break; 12038d23bf29SAlex Elder } 12048d23bf29SAlex Elder va_end(args); 12058d23bf29SAlex Elder 12068d23bf29SAlex Elder return op; 12078d23bf29SAlex Elder } 12088d23bf29SAlex Elder 12098d23bf29SAlex Elder static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) 12108d23bf29SAlex Elder { 12118d23bf29SAlex Elder kfree(op); 12128d23bf29SAlex Elder } 12138d23bf29SAlex Elder 1214bf0d5f50SAlex Elder static int rbd_obj_request_submit(struct ceph_osd_client *osdc, 1215bf0d5f50SAlex Elder struct rbd_obj_request *obj_request) 1216bf0d5f50SAlex Elder { 121737206ee5SAlex Elder dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); 121837206ee5SAlex Elder 1219bf0d5f50SAlex Elder return ceph_osdc_start_request(osdc, obj_request->osd_req, false); 1220bf0d5f50SAlex Elder } 1221bf0d5f50SAlex Elder 1222bf0d5f50SAlex Elder static void rbd_img_request_complete(struct rbd_img_request *img_request) 1223bf0d5f50SAlex Elder { 122437206ee5SAlex Elder dout("%s: img %p\n", __func__, img_request); 1225bf0d5f50SAlex Elder if (img_request->callback) 1226bf0d5f50SAlex Elder img_request->callback(img_request); 1227bf0d5f50SAlex Elder else 1228bf0d5f50SAlex Elder rbd_img_request_put(img_request); 1229bf0d5f50SAlex Elder } 1230bf0d5f50SAlex Elder 1231788e2df3SAlex Elder /* Caller is responsible for rbd_obj_request_destroy(obj_request) */ 1232788e2df3SAlex Elder 1233788e2df3SAlex Elder static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) 1234788e2df3SAlex Elder { 123537206ee5SAlex Elder dout("%s: obj %p\n", __func__, obj_request); 123637206ee5SAlex Elder 1237788e2df3SAlex Elder return wait_for_completion_interruptible(&obj_request->completion); 1238788e2df3SAlex Elder } 1239788e2df3SAlex Elder 124007741308SAlex Elder static void obj_request_done_init(struct rbd_obj_request *obj_request) 124107741308SAlex Elder { 124207741308SAlex Elder atomic_set(&obj_request->done, 0); 124307741308SAlex Elder smp_wmb(); 124407741308SAlex Elder } 124507741308SAlex Elder 124607741308SAlex Elder static void obj_request_done_set(struct rbd_obj_request *obj_request) 124707741308SAlex Elder { 1248632b88caSAlex Elder int done; 1249632b88caSAlex Elder 1250632b88caSAlex Elder done = atomic_inc_return(&obj_request->done); 1251632b88caSAlex Elder if (done > 1) { 1252632b88caSAlex Elder struct rbd_img_request *img_request = obj_request->img_request; 1253632b88caSAlex Elder struct rbd_device *rbd_dev; 1254632b88caSAlex Elder 1255632b88caSAlex Elder rbd_dev = img_request ? img_request->rbd_dev : NULL; 1256632b88caSAlex Elder rbd_warn(rbd_dev, "obj_request %p was already done\n", 1257632b88caSAlex Elder obj_request); 1258632b88caSAlex Elder } 125907741308SAlex Elder } 126007741308SAlex Elder 126107741308SAlex Elder static bool obj_request_done_test(struct rbd_obj_request *obj_request) 126207741308SAlex Elder { 1263632b88caSAlex Elder smp_mb(); 126407741308SAlex Elder return atomic_read(&obj_request->done) != 0; 126507741308SAlex Elder } 126607741308SAlex Elder 12679969ebc5SAlex Elder static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request, 12689969ebc5SAlex Elder struct ceph_osd_op *op) 12699969ebc5SAlex Elder { 127037206ee5SAlex Elder dout("%s: obj %p\n", __func__, obj_request); 127107741308SAlex Elder obj_request_done_set(obj_request); 12729969ebc5SAlex Elder } 12739969ebc5SAlex Elder 1274bf0d5f50SAlex Elder static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) 1275bf0d5f50SAlex Elder { 127637206ee5SAlex Elder dout("%s: obj %p cb %p\n", __func__, obj_request, 127737206ee5SAlex Elder obj_request->callback); 1278bf0d5f50SAlex Elder if (obj_request->callback) 1279bf0d5f50SAlex Elder obj_request->callback(obj_request); 1280788e2df3SAlex Elder else 1281788e2df3SAlex Elder complete_all(&obj_request->completion); 1282bf0d5f50SAlex Elder } 1283bf0d5f50SAlex Elder 1284bf0d5f50SAlex Elder static void rbd_osd_read_callback(struct rbd_obj_request *obj_request, 1285bf0d5f50SAlex Elder struct ceph_osd_op *op) 1286bf0d5f50SAlex Elder { 1287bf0d5f50SAlex Elder u64 xferred; 1288bf0d5f50SAlex Elder 1289bf0d5f50SAlex Elder /* 1290bf0d5f50SAlex Elder * We support a 64-bit length, but ultimately it has to be 1291bf0d5f50SAlex Elder * passed to blk_end_request(), which takes an unsigned int. 1292bf0d5f50SAlex Elder */ 1293bf0d5f50SAlex Elder xferred = le64_to_cpu(op->extent.length); 1294bf0d5f50SAlex Elder rbd_assert(xferred < (u64) UINT_MAX); 129537206ee5SAlex Elder dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, 129637206ee5SAlex Elder obj_request->result, xferred, obj_request->length); 1297bf0d5f50SAlex Elder if (obj_request->result == (s32) -ENOENT) { 1298bf0d5f50SAlex Elder zero_bio_chain(obj_request->bio_list, 0); 1299bf0d5f50SAlex Elder obj_request->result = 0; 1300bf0d5f50SAlex Elder } else if (xferred < obj_request->length && !obj_request->result) { 1301bf0d5f50SAlex Elder zero_bio_chain(obj_request->bio_list, xferred); 1302bf0d5f50SAlex Elder xferred = obj_request->length; 1303bf0d5f50SAlex Elder } 1304bf0d5f50SAlex Elder obj_request->xferred = xferred; 130507741308SAlex Elder obj_request_done_set(obj_request); 1306bf0d5f50SAlex Elder } 1307bf0d5f50SAlex Elder 1308bf0d5f50SAlex Elder static void rbd_osd_write_callback(struct rbd_obj_request *obj_request, 1309bf0d5f50SAlex Elder struct ceph_osd_op *op) 1310bf0d5f50SAlex Elder { 131137206ee5SAlex Elder 1312bf0d5f50SAlex Elder obj_request->xferred = le64_to_cpu(op->extent.length); 131337206ee5SAlex Elder dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, 131437206ee5SAlex Elder obj_request->result, obj_request->xferred, obj_request->length); 131537206ee5SAlex Elder 131637206ee5SAlex Elder /* A short write really shouldn't occur. Warn if we see one */ 131737206ee5SAlex Elder 131837206ee5SAlex Elder if (obj_request->xferred != obj_request->length) { 131937206ee5SAlex Elder struct rbd_img_request *img_request = obj_request->img_request; 132037206ee5SAlex Elder struct rbd_device *rbd_dev; 132137206ee5SAlex Elder 132237206ee5SAlex Elder rbd_dev = img_request ? img_request->rbd_dev : NULL; 132337206ee5SAlex Elder rbd_warn(rbd_dev, "wrote %llu want %llu\n", 132437206ee5SAlex Elder obj_request->xferred, obj_request->length); 132537206ee5SAlex Elder } 132637206ee5SAlex Elder 132707741308SAlex Elder obj_request_done_set(obj_request); 1328bf0d5f50SAlex Elder } 1329bf0d5f50SAlex Elder 1330fbfab539SAlex Elder /* 1331fbfab539SAlex Elder * For a simple stat call there's nothing to do. We'll do more if 1332fbfab539SAlex Elder * this is part of a write sequence for a layered image. 1333fbfab539SAlex Elder */ 1334fbfab539SAlex Elder static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request, 1335fbfab539SAlex Elder struct ceph_osd_op *op) 1336fbfab539SAlex Elder { 133737206ee5SAlex Elder dout("%s: obj %p\n", __func__, obj_request); 1338fbfab539SAlex Elder obj_request_done_set(obj_request); 1339fbfab539SAlex Elder } 1340fbfab539SAlex Elder 1341bf0d5f50SAlex Elder static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, 1342bf0d5f50SAlex Elder struct ceph_msg *msg) 1343bf0d5f50SAlex Elder { 1344bf0d5f50SAlex Elder struct rbd_obj_request *obj_request = osd_req->r_priv; 1345bf0d5f50SAlex Elder struct ceph_osd_reply_head *reply_head; 1346bf0d5f50SAlex Elder struct ceph_osd_op *op; 1347bf0d5f50SAlex Elder u32 num_ops; 1348bf0d5f50SAlex Elder u16 opcode; 1349bf0d5f50SAlex Elder 135037206ee5SAlex Elder dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); 1351bf0d5f50SAlex Elder rbd_assert(osd_req == obj_request->osd_req); 1352bf0d5f50SAlex Elder rbd_assert(!!obj_request->img_request ^ 1353bf0d5f50SAlex Elder (obj_request->which == BAD_WHICH)); 1354bf0d5f50SAlex Elder 1355bf0d5f50SAlex Elder obj_request->xferred = le32_to_cpu(msg->hdr.data_len); 1356bf0d5f50SAlex Elder reply_head = msg->front.iov_base; 1357bf0d5f50SAlex Elder obj_request->result = (s32) le32_to_cpu(reply_head->result); 1358bf0d5f50SAlex Elder obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); 1359bf0d5f50SAlex Elder 1360bf0d5f50SAlex Elder num_ops = le32_to_cpu(reply_head->num_ops); 1361bf0d5f50SAlex Elder WARN_ON(num_ops != 1); /* For now */ 1362bf0d5f50SAlex Elder 1363bf0d5f50SAlex Elder op = &reply_head->ops[0]; 1364bf0d5f50SAlex Elder opcode = le16_to_cpu(op->op); 1365bf0d5f50SAlex Elder switch (opcode) { 1366bf0d5f50SAlex Elder case CEPH_OSD_OP_READ: 1367bf0d5f50SAlex Elder rbd_osd_read_callback(obj_request, op); 1368bf0d5f50SAlex Elder break; 1369bf0d5f50SAlex Elder case CEPH_OSD_OP_WRITE: 1370bf0d5f50SAlex Elder rbd_osd_write_callback(obj_request, op); 1371bf0d5f50SAlex Elder break; 1372fbfab539SAlex Elder case CEPH_OSD_OP_STAT: 1373fbfab539SAlex Elder rbd_osd_stat_callback(obj_request, op); 1374fbfab539SAlex Elder break; 137536be9a76SAlex Elder case CEPH_OSD_OP_CALL: 1376b8d70035SAlex Elder case CEPH_OSD_OP_NOTIFY_ACK: 13779969ebc5SAlex Elder case CEPH_OSD_OP_WATCH: 13789969ebc5SAlex Elder rbd_osd_trivial_callback(obj_request, op); 13799969ebc5SAlex Elder break; 1380bf0d5f50SAlex Elder default: 1381bf0d5f50SAlex Elder rbd_warn(NULL, "%s: unsupported op %hu\n", 1382bf0d5f50SAlex Elder obj_request->object_name, (unsigned short) opcode); 1383bf0d5f50SAlex Elder break; 1384bf0d5f50SAlex Elder } 1385bf0d5f50SAlex Elder 138607741308SAlex Elder if (obj_request_done_test(obj_request)) 1387bf0d5f50SAlex Elder rbd_obj_request_complete(obj_request); 1388bf0d5f50SAlex Elder } 1389bf0d5f50SAlex Elder 1390bf0d5f50SAlex Elder static struct ceph_osd_request *rbd_osd_req_create( 1391bf0d5f50SAlex Elder struct rbd_device *rbd_dev, 1392bf0d5f50SAlex Elder bool write_request, 1393bf0d5f50SAlex Elder struct rbd_obj_request *obj_request, 1394bf0d5f50SAlex Elder struct ceph_osd_req_op *op) 1395bf0d5f50SAlex Elder { 1396bf0d5f50SAlex Elder struct rbd_img_request *img_request = obj_request->img_request; 1397bf0d5f50SAlex Elder struct ceph_snap_context *snapc = NULL; 1398bf0d5f50SAlex Elder struct ceph_osd_client *osdc; 1399bf0d5f50SAlex Elder struct ceph_osd_request *osd_req; 1400bf0d5f50SAlex Elder struct timespec now; 1401bf0d5f50SAlex Elder struct timespec *mtime; 1402bf0d5f50SAlex Elder u64 snap_id = CEPH_NOSNAP; 1403bf0d5f50SAlex Elder u64 offset = obj_request->offset; 1404bf0d5f50SAlex Elder u64 length = obj_request->length; 1405bf0d5f50SAlex Elder 1406bf0d5f50SAlex Elder if (img_request) { 1407bf0d5f50SAlex Elder rbd_assert(img_request->write_request == write_request); 1408bf0d5f50SAlex Elder if (img_request->write_request) 1409bf0d5f50SAlex Elder snapc = img_request->snapc; 1410bf0d5f50SAlex Elder else 1411bf0d5f50SAlex Elder snap_id = img_request->snap_id; 1412bf0d5f50SAlex Elder } 1413bf0d5f50SAlex Elder 1414bf0d5f50SAlex Elder /* Allocate and initialize the request, for the single op */ 1415bf0d5f50SAlex Elder 1416bf0d5f50SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 1417bf0d5f50SAlex Elder osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); 1418bf0d5f50SAlex Elder if (!osd_req) 1419bf0d5f50SAlex Elder return NULL; /* ENOMEM */ 1420bf0d5f50SAlex Elder 1421bf0d5f50SAlex Elder rbd_assert(obj_request_type_valid(obj_request->type)); 1422bf0d5f50SAlex Elder switch (obj_request->type) { 14239969ebc5SAlex Elder case OBJ_REQUEST_NODATA: 14249969ebc5SAlex Elder break; /* Nothing to do */ 1425bf0d5f50SAlex Elder case OBJ_REQUEST_BIO: 1426bf0d5f50SAlex Elder rbd_assert(obj_request->bio_list != NULL); 1427bf0d5f50SAlex Elder osd_req->r_bio = obj_request->bio_list; 1428bf0d5f50SAlex Elder break; 1429788e2df3SAlex Elder case OBJ_REQUEST_PAGES: 1430788e2df3SAlex Elder osd_req->r_pages = obj_request->pages; 1431788e2df3SAlex Elder osd_req->r_num_pages = obj_request->page_count; 1432788e2df3SAlex Elder osd_req->r_page_alignment = offset & ~PAGE_MASK; 1433788e2df3SAlex Elder break; 1434bf0d5f50SAlex Elder } 1435bf0d5f50SAlex Elder 1436bf0d5f50SAlex Elder if (write_request) { 1437bf0d5f50SAlex Elder osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; 1438bf0d5f50SAlex Elder now = CURRENT_TIME; 1439bf0d5f50SAlex Elder mtime = &now; 1440bf0d5f50SAlex Elder } else { 1441bf0d5f50SAlex Elder osd_req->r_flags = CEPH_OSD_FLAG_READ; 1442bf0d5f50SAlex Elder mtime = NULL; /* not needed for reads */ 1443bf0d5f50SAlex Elder offset = 0; /* These are not used... */ 1444bf0d5f50SAlex Elder length = 0; /* ...for osd read requests */ 1445bf0d5f50SAlex Elder } 1446bf0d5f50SAlex Elder 1447bf0d5f50SAlex Elder osd_req->r_callback = rbd_osd_req_callback; 1448bf0d5f50SAlex Elder osd_req->r_priv = obj_request; 1449bf0d5f50SAlex Elder 1450bf0d5f50SAlex Elder osd_req->r_oid_len = strlen(obj_request->object_name); 1451bf0d5f50SAlex Elder rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); 1452bf0d5f50SAlex Elder memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); 1453bf0d5f50SAlex Elder 1454bf0d5f50SAlex Elder osd_req->r_file_layout = rbd_dev->layout; /* struct */ 1455bf0d5f50SAlex Elder 1456bf0d5f50SAlex Elder /* osd_req will get its own reference to snapc (if non-null) */ 1457bf0d5f50SAlex Elder 1458bf0d5f50SAlex Elder ceph_osdc_build_request(osd_req, offset, length, 1, op, 1459bf0d5f50SAlex Elder snapc, snap_id, mtime); 1460bf0d5f50SAlex Elder 1461bf0d5f50SAlex Elder return osd_req; 1462bf0d5f50SAlex Elder } 1463bf0d5f50SAlex Elder 1464bf0d5f50SAlex Elder static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) 1465bf0d5f50SAlex Elder { 1466bf0d5f50SAlex Elder ceph_osdc_put_request(osd_req); 1467bf0d5f50SAlex Elder } 1468bf0d5f50SAlex Elder 1469bf0d5f50SAlex Elder /* object_name is assumed to be a non-null pointer and NUL-terminated */ 1470bf0d5f50SAlex Elder 1471bf0d5f50SAlex Elder static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, 1472bf0d5f50SAlex Elder u64 offset, u64 length, 1473bf0d5f50SAlex Elder enum obj_request_type type) 1474bf0d5f50SAlex Elder { 1475bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 1476bf0d5f50SAlex Elder size_t size; 1477bf0d5f50SAlex Elder char *name; 1478bf0d5f50SAlex Elder 1479bf0d5f50SAlex Elder rbd_assert(obj_request_type_valid(type)); 1480bf0d5f50SAlex Elder 1481bf0d5f50SAlex Elder size = strlen(object_name) + 1; 1482bf0d5f50SAlex Elder obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); 1483bf0d5f50SAlex Elder if (!obj_request) 1484bf0d5f50SAlex Elder return NULL; 1485bf0d5f50SAlex Elder 1486bf0d5f50SAlex Elder name = (char *)(obj_request + 1); 1487bf0d5f50SAlex Elder obj_request->object_name = memcpy(name, object_name, size); 1488bf0d5f50SAlex Elder obj_request->offset = offset; 1489bf0d5f50SAlex Elder obj_request->length = length; 1490bf0d5f50SAlex Elder obj_request->which = BAD_WHICH; 1491bf0d5f50SAlex Elder obj_request->type = type; 1492bf0d5f50SAlex Elder INIT_LIST_HEAD(&obj_request->links); 149307741308SAlex Elder obj_request_done_init(obj_request); 1494788e2df3SAlex Elder init_completion(&obj_request->completion); 1495bf0d5f50SAlex Elder kref_init(&obj_request->kref); 1496bf0d5f50SAlex Elder 149737206ee5SAlex Elder dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, 149837206ee5SAlex Elder offset, length, (int)type, obj_request); 149937206ee5SAlex Elder 1500bf0d5f50SAlex Elder return obj_request; 1501bf0d5f50SAlex Elder } 1502bf0d5f50SAlex Elder 1503bf0d5f50SAlex Elder static void rbd_obj_request_destroy(struct kref *kref) 1504bf0d5f50SAlex Elder { 1505bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 1506bf0d5f50SAlex Elder 1507bf0d5f50SAlex Elder obj_request = container_of(kref, struct rbd_obj_request, kref); 1508bf0d5f50SAlex Elder 150937206ee5SAlex Elder dout("%s: obj %p\n", __func__, obj_request); 151037206ee5SAlex Elder 1511bf0d5f50SAlex Elder rbd_assert(obj_request->img_request == NULL); 1512bf0d5f50SAlex Elder rbd_assert(obj_request->which == BAD_WHICH); 1513bf0d5f50SAlex Elder 1514bf0d5f50SAlex Elder if (obj_request->osd_req) 1515bf0d5f50SAlex Elder rbd_osd_req_destroy(obj_request->osd_req); 1516bf0d5f50SAlex Elder 1517bf0d5f50SAlex Elder rbd_assert(obj_request_type_valid(obj_request->type)); 1518bf0d5f50SAlex Elder switch (obj_request->type) { 15199969ebc5SAlex Elder case OBJ_REQUEST_NODATA: 15209969ebc5SAlex Elder break; /* Nothing to do */ 1521bf0d5f50SAlex Elder case OBJ_REQUEST_BIO: 1522bf0d5f50SAlex Elder if (obj_request->bio_list) 1523bf0d5f50SAlex Elder bio_chain_put(obj_request->bio_list); 1524bf0d5f50SAlex Elder break; 1525788e2df3SAlex Elder case OBJ_REQUEST_PAGES: 1526788e2df3SAlex Elder if (obj_request->pages) 1527788e2df3SAlex Elder ceph_release_page_vector(obj_request->pages, 1528788e2df3SAlex Elder obj_request->page_count); 1529788e2df3SAlex Elder break; 1530bf0d5f50SAlex Elder } 1531bf0d5f50SAlex Elder 1532bf0d5f50SAlex Elder kfree(obj_request); 1533bf0d5f50SAlex Elder } 1534bf0d5f50SAlex Elder 1535bf0d5f50SAlex Elder /* 1536bf0d5f50SAlex Elder * Caller is responsible for filling in the list of object requests 1537bf0d5f50SAlex Elder * that comprises the image request, and the Linux request pointer 1538bf0d5f50SAlex Elder * (if there is one). 1539bf0d5f50SAlex Elder */ 1540cc344fa1SAlex Elder static struct rbd_img_request *rbd_img_request_create( 1541cc344fa1SAlex Elder struct rbd_device *rbd_dev, 1542bf0d5f50SAlex Elder u64 offset, u64 length, 1543bf0d5f50SAlex Elder bool write_request) 1544bf0d5f50SAlex Elder { 1545bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1546bf0d5f50SAlex Elder struct ceph_snap_context *snapc = NULL; 1547bf0d5f50SAlex Elder 1548bf0d5f50SAlex Elder img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); 1549bf0d5f50SAlex Elder if (!img_request) 1550bf0d5f50SAlex Elder return NULL; 1551bf0d5f50SAlex Elder 1552bf0d5f50SAlex Elder if (write_request) { 1553bf0d5f50SAlex Elder down_read(&rbd_dev->header_rwsem); 1554bf0d5f50SAlex Elder snapc = ceph_get_snap_context(rbd_dev->header.snapc); 1555bf0d5f50SAlex Elder up_read(&rbd_dev->header_rwsem); 1556bf0d5f50SAlex Elder if (WARN_ON(!snapc)) { 1557bf0d5f50SAlex Elder kfree(img_request); 1558bf0d5f50SAlex Elder return NULL; /* Shouldn't happen */ 1559bf0d5f50SAlex Elder } 1560bf0d5f50SAlex Elder } 1561bf0d5f50SAlex Elder 1562bf0d5f50SAlex Elder img_request->rq = NULL; 1563bf0d5f50SAlex Elder img_request->rbd_dev = rbd_dev; 1564bf0d5f50SAlex Elder img_request->offset = offset; 1565bf0d5f50SAlex Elder img_request->length = length; 1566bf0d5f50SAlex Elder img_request->write_request = write_request; 1567bf0d5f50SAlex Elder if (write_request) 1568bf0d5f50SAlex Elder img_request->snapc = snapc; 1569bf0d5f50SAlex Elder else 1570bf0d5f50SAlex Elder img_request->snap_id = rbd_dev->spec->snap_id; 1571bf0d5f50SAlex Elder spin_lock_init(&img_request->completion_lock); 1572bf0d5f50SAlex Elder img_request->next_completion = 0; 1573bf0d5f50SAlex Elder img_request->callback = NULL; 1574bf0d5f50SAlex Elder img_request->obj_request_count = 0; 1575bf0d5f50SAlex Elder INIT_LIST_HEAD(&img_request->obj_requests); 1576bf0d5f50SAlex Elder kref_init(&img_request->kref); 1577bf0d5f50SAlex Elder 1578bf0d5f50SAlex Elder rbd_img_request_get(img_request); /* Avoid a warning */ 1579bf0d5f50SAlex Elder rbd_img_request_put(img_request); /* TEMPORARY */ 1580bf0d5f50SAlex Elder 158137206ee5SAlex Elder dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, 158237206ee5SAlex Elder write_request ? "write" : "read", offset, length, 158337206ee5SAlex Elder img_request); 158437206ee5SAlex Elder 1585bf0d5f50SAlex Elder return img_request; 1586bf0d5f50SAlex Elder } 1587bf0d5f50SAlex Elder 1588bf0d5f50SAlex Elder static void rbd_img_request_destroy(struct kref *kref) 1589bf0d5f50SAlex Elder { 1590bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1591bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 1592bf0d5f50SAlex Elder struct rbd_obj_request *next_obj_request; 1593bf0d5f50SAlex Elder 1594bf0d5f50SAlex Elder img_request = container_of(kref, struct rbd_img_request, kref); 1595bf0d5f50SAlex Elder 159637206ee5SAlex Elder dout("%s: img %p\n", __func__, img_request); 159737206ee5SAlex Elder 1598bf0d5f50SAlex Elder for_each_obj_request_safe(img_request, obj_request, next_obj_request) 1599bf0d5f50SAlex Elder rbd_img_obj_request_del(img_request, obj_request); 160025dcf954SAlex Elder rbd_assert(img_request->obj_request_count == 0); 1601bf0d5f50SAlex Elder 1602bf0d5f50SAlex Elder if (img_request->write_request) 1603bf0d5f50SAlex Elder ceph_put_snap_context(img_request->snapc); 1604bf0d5f50SAlex Elder 1605bf0d5f50SAlex Elder kfree(img_request); 1606bf0d5f50SAlex Elder } 1607bf0d5f50SAlex Elder 1608bf0d5f50SAlex Elder static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, 1609bf0d5f50SAlex Elder struct bio *bio_list) 1610bf0d5f50SAlex Elder { 1611bf0d5f50SAlex Elder struct rbd_device *rbd_dev = img_request->rbd_dev; 1612bf0d5f50SAlex Elder struct rbd_obj_request *obj_request = NULL; 1613bf0d5f50SAlex Elder struct rbd_obj_request *next_obj_request; 1614bf0d5f50SAlex Elder unsigned int bio_offset; 1615bf0d5f50SAlex Elder u64 image_offset; 1616bf0d5f50SAlex Elder u64 resid; 1617bf0d5f50SAlex Elder u16 opcode; 1618bf0d5f50SAlex Elder 161937206ee5SAlex Elder dout("%s: img %p bio %p\n", __func__, img_request, bio_list); 162037206ee5SAlex Elder 1621bf0d5f50SAlex Elder opcode = img_request->write_request ? CEPH_OSD_OP_WRITE 1622bf0d5f50SAlex Elder : CEPH_OSD_OP_READ; 1623bf0d5f50SAlex Elder bio_offset = 0; 1624bf0d5f50SAlex Elder image_offset = img_request->offset; 1625bf0d5f50SAlex Elder rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); 1626bf0d5f50SAlex Elder resid = img_request->length; 16274dda41d3SAlex Elder rbd_assert(resid > 0); 1628bf0d5f50SAlex Elder while (resid) { 1629bf0d5f50SAlex Elder const char *object_name; 1630bf0d5f50SAlex Elder unsigned int clone_size; 1631bf0d5f50SAlex Elder struct ceph_osd_req_op *op; 1632bf0d5f50SAlex Elder u64 offset; 1633bf0d5f50SAlex Elder u64 length; 1634bf0d5f50SAlex Elder 1635bf0d5f50SAlex Elder object_name = rbd_segment_name(rbd_dev, image_offset); 1636bf0d5f50SAlex Elder if (!object_name) 1637bf0d5f50SAlex Elder goto out_unwind; 1638bf0d5f50SAlex Elder offset = rbd_segment_offset(rbd_dev, image_offset); 1639bf0d5f50SAlex Elder length = rbd_segment_length(rbd_dev, image_offset, resid); 1640bf0d5f50SAlex Elder obj_request = rbd_obj_request_create(object_name, 1641bf0d5f50SAlex Elder offset, length, 1642bf0d5f50SAlex Elder OBJ_REQUEST_BIO); 1643bf0d5f50SAlex Elder kfree(object_name); /* object request has its own copy */ 1644bf0d5f50SAlex Elder if (!obj_request) 1645bf0d5f50SAlex Elder goto out_unwind; 1646bf0d5f50SAlex Elder 1647bf0d5f50SAlex Elder rbd_assert(length <= (u64) UINT_MAX); 1648bf0d5f50SAlex Elder clone_size = (unsigned int) length; 1649bf0d5f50SAlex Elder obj_request->bio_list = bio_chain_clone_range(&bio_list, 1650bf0d5f50SAlex Elder &bio_offset, clone_size, 1651bf0d5f50SAlex Elder GFP_ATOMIC); 1652bf0d5f50SAlex Elder if (!obj_request->bio_list) 1653bf0d5f50SAlex Elder goto out_partial; 1654bf0d5f50SAlex Elder 1655bf0d5f50SAlex Elder /* 1656bf0d5f50SAlex Elder * Build up the op to use in building the osd 1657bf0d5f50SAlex Elder * request. Note that the contents of the op are 1658bf0d5f50SAlex Elder * copied by rbd_osd_req_create(). 1659bf0d5f50SAlex Elder */ 1660bf0d5f50SAlex Elder op = rbd_osd_req_op_create(opcode, offset, length); 1661bf0d5f50SAlex Elder if (!op) 1662bf0d5f50SAlex Elder goto out_partial; 1663bf0d5f50SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, 1664bf0d5f50SAlex Elder img_request->write_request, 1665bf0d5f50SAlex Elder obj_request, op); 1666bf0d5f50SAlex Elder rbd_osd_req_op_destroy(op); 1667bf0d5f50SAlex Elder if (!obj_request->osd_req) 1668bf0d5f50SAlex Elder goto out_partial; 1669bf0d5f50SAlex Elder /* status and version are initially zero-filled */ 1670bf0d5f50SAlex Elder 1671bf0d5f50SAlex Elder rbd_img_obj_request_add(img_request, obj_request); 1672bf0d5f50SAlex Elder 1673bf0d5f50SAlex Elder image_offset += length; 1674bf0d5f50SAlex Elder resid -= length; 1675bf0d5f50SAlex Elder } 1676bf0d5f50SAlex Elder 1677bf0d5f50SAlex Elder return 0; 1678bf0d5f50SAlex Elder 1679bf0d5f50SAlex Elder out_partial: 1680bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1681bf0d5f50SAlex Elder out_unwind: 1682bf0d5f50SAlex Elder for_each_obj_request_safe(img_request, obj_request, next_obj_request) 1683bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1684bf0d5f50SAlex Elder 1685bf0d5f50SAlex Elder return -ENOMEM; 1686bf0d5f50SAlex Elder } 1687bf0d5f50SAlex Elder 1688bf0d5f50SAlex Elder static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) 1689bf0d5f50SAlex Elder { 1690bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1691bf0d5f50SAlex Elder u32 which = obj_request->which; 1692bf0d5f50SAlex Elder bool more = true; 1693bf0d5f50SAlex Elder 1694bf0d5f50SAlex Elder img_request = obj_request->img_request; 16954dda41d3SAlex Elder 169637206ee5SAlex Elder dout("%s: img %p obj %p\n", __func__, img_request, obj_request); 1697bf0d5f50SAlex Elder rbd_assert(img_request != NULL); 1698bf0d5f50SAlex Elder rbd_assert(img_request->rq != NULL); 16994dda41d3SAlex Elder rbd_assert(img_request->obj_request_count > 0); 1700bf0d5f50SAlex Elder rbd_assert(which != BAD_WHICH); 1701bf0d5f50SAlex Elder rbd_assert(which < img_request->obj_request_count); 1702bf0d5f50SAlex Elder rbd_assert(which >= img_request->next_completion); 1703bf0d5f50SAlex Elder 1704bf0d5f50SAlex Elder spin_lock_irq(&img_request->completion_lock); 1705bf0d5f50SAlex Elder if (which != img_request->next_completion) 1706bf0d5f50SAlex Elder goto out; 1707bf0d5f50SAlex Elder 1708bf0d5f50SAlex Elder for_each_obj_request_from(img_request, obj_request) { 1709bf0d5f50SAlex Elder unsigned int xferred; 1710bf0d5f50SAlex Elder int result; 1711bf0d5f50SAlex Elder 1712bf0d5f50SAlex Elder rbd_assert(more); 1713bf0d5f50SAlex Elder rbd_assert(which < img_request->obj_request_count); 1714bf0d5f50SAlex Elder 171507741308SAlex Elder if (!obj_request_done_test(obj_request)) 1716bf0d5f50SAlex Elder break; 1717bf0d5f50SAlex Elder 1718bf0d5f50SAlex Elder rbd_assert(obj_request->xferred <= (u64) UINT_MAX); 1719bf0d5f50SAlex Elder xferred = (unsigned int) obj_request->xferred; 1720bf0d5f50SAlex Elder result = (int) obj_request->result; 1721bf0d5f50SAlex Elder if (result) 1722bf0d5f50SAlex Elder rbd_warn(NULL, "obj_request %s result %d xferred %u\n", 1723bf0d5f50SAlex Elder img_request->write_request ? "write" : "read", 1724bf0d5f50SAlex Elder result, xferred); 1725bf0d5f50SAlex Elder 1726bf0d5f50SAlex Elder more = blk_end_request(img_request->rq, result, xferred); 1727bf0d5f50SAlex Elder which++; 1728bf0d5f50SAlex Elder } 1729bf0d5f50SAlex Elder rbd_assert(more ^ (which == img_request->obj_request_count)); 1730bf0d5f50SAlex Elder img_request->next_completion = which; 1731bf0d5f50SAlex Elder out: 1732bf0d5f50SAlex Elder spin_unlock_irq(&img_request->completion_lock); 1733bf0d5f50SAlex Elder 1734bf0d5f50SAlex Elder if (!more) 1735bf0d5f50SAlex Elder rbd_img_request_complete(img_request); 1736bf0d5f50SAlex Elder } 1737bf0d5f50SAlex Elder 1738bf0d5f50SAlex Elder static int rbd_img_request_submit(struct rbd_img_request *img_request) 1739bf0d5f50SAlex Elder { 1740bf0d5f50SAlex Elder struct rbd_device *rbd_dev = img_request->rbd_dev; 1741bf0d5f50SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 1742bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 1743bf0d5f50SAlex Elder 174437206ee5SAlex Elder dout("%s: img %p\n", __func__, img_request); 1745bf0d5f50SAlex Elder for_each_obj_request(img_request, obj_request) { 1746bf0d5f50SAlex Elder int ret; 1747bf0d5f50SAlex Elder 1748bf0d5f50SAlex Elder obj_request->callback = rbd_img_obj_callback; 1749bf0d5f50SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 1750bf0d5f50SAlex Elder if (ret) 1751bf0d5f50SAlex Elder return ret; 1752bf0d5f50SAlex Elder /* 1753bf0d5f50SAlex Elder * The image request has its own reference to each 1754bf0d5f50SAlex Elder * of its object requests, so we can safely drop the 1755bf0d5f50SAlex Elder * initial one here. 1756bf0d5f50SAlex Elder */ 1757bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1758bf0d5f50SAlex Elder } 1759bf0d5f50SAlex Elder 1760bf0d5f50SAlex Elder return 0; 1761bf0d5f50SAlex Elder } 1762bf0d5f50SAlex Elder 1763cf81b60eSAlex Elder static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, 1764b8d70035SAlex Elder u64 ver, u64 notify_id) 1765b8d70035SAlex Elder { 1766b8d70035SAlex Elder struct rbd_obj_request *obj_request; 1767b8d70035SAlex Elder struct ceph_osd_req_op *op; 1768b8d70035SAlex Elder struct ceph_osd_client *osdc; 1769b8d70035SAlex Elder int ret; 1770b8d70035SAlex Elder 1771b8d70035SAlex Elder obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, 1772b8d70035SAlex Elder OBJ_REQUEST_NODATA); 1773b8d70035SAlex Elder if (!obj_request) 1774b8d70035SAlex Elder return -ENOMEM; 1775b8d70035SAlex Elder 1776b8d70035SAlex Elder ret = -ENOMEM; 1777b8d70035SAlex Elder op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); 1778b8d70035SAlex Elder if (!op) 1779b8d70035SAlex Elder goto out; 1780b8d70035SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1781b8d70035SAlex Elder obj_request, op); 1782b8d70035SAlex Elder rbd_osd_req_op_destroy(op); 1783b8d70035SAlex Elder if (!obj_request->osd_req) 1784b8d70035SAlex Elder goto out; 1785b8d70035SAlex Elder 1786b8d70035SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 1787cf81b60eSAlex Elder obj_request->callback = rbd_obj_request_put; 1788b8d70035SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 1789b8d70035SAlex Elder out: 1790cf81b60eSAlex Elder if (ret) 1791b8d70035SAlex Elder rbd_obj_request_put(obj_request); 1792b8d70035SAlex Elder 1793b8d70035SAlex Elder return ret; 1794b8d70035SAlex Elder } 1795b8d70035SAlex Elder 1796b8d70035SAlex Elder static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 1797b8d70035SAlex Elder { 1798b8d70035SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 1799b8d70035SAlex Elder u64 hver; 1800b8d70035SAlex Elder int rc; 1801b8d70035SAlex Elder 1802b8d70035SAlex Elder if (!rbd_dev) 1803b8d70035SAlex Elder return; 1804b8d70035SAlex Elder 180537206ee5SAlex Elder dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, 1806b8d70035SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1807b8d70035SAlex Elder (unsigned int) opcode); 1808b8d70035SAlex Elder rc = rbd_dev_refresh(rbd_dev, &hver); 1809b8d70035SAlex Elder if (rc) 1810b8d70035SAlex Elder rbd_warn(rbd_dev, "got notification but failed to " 1811b8d70035SAlex Elder " update snaps: %d\n", rc); 1812b8d70035SAlex Elder 1813cf81b60eSAlex Elder rbd_obj_notify_ack(rbd_dev, hver, notify_id); 1814b8d70035SAlex Elder } 1815b8d70035SAlex Elder 18169969ebc5SAlex Elder /* 18179969ebc5SAlex Elder * Request sync osd watch/unwatch. The value of "start" determines 18189969ebc5SAlex Elder * whether a watch request is being initiated or torn down. 18199969ebc5SAlex Elder */ 18209969ebc5SAlex Elder static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) 18219969ebc5SAlex Elder { 18229969ebc5SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 18239969ebc5SAlex Elder struct rbd_obj_request *obj_request; 18249969ebc5SAlex Elder struct ceph_osd_req_op *op; 18259969ebc5SAlex Elder int ret; 18269969ebc5SAlex Elder 18279969ebc5SAlex Elder rbd_assert(start ^ !!rbd_dev->watch_event); 18289969ebc5SAlex Elder rbd_assert(start ^ !!rbd_dev->watch_request); 18299969ebc5SAlex Elder 18309969ebc5SAlex Elder if (start) { 18313c663bbdSAlex Elder ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, 18329969ebc5SAlex Elder &rbd_dev->watch_event); 18339969ebc5SAlex Elder if (ret < 0) 18349969ebc5SAlex Elder return ret; 18358eb87565SAlex Elder rbd_assert(rbd_dev->watch_event != NULL); 18369969ebc5SAlex Elder } 18379969ebc5SAlex Elder 18389969ebc5SAlex Elder ret = -ENOMEM; 18399969ebc5SAlex Elder obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, 18409969ebc5SAlex Elder OBJ_REQUEST_NODATA); 18419969ebc5SAlex Elder if (!obj_request) 18429969ebc5SAlex Elder goto out_cancel; 18439969ebc5SAlex Elder 18449969ebc5SAlex Elder op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, 18459969ebc5SAlex Elder rbd_dev->watch_event->cookie, 18469969ebc5SAlex Elder rbd_dev->header.obj_version, start); 18479969ebc5SAlex Elder if (!op) 18489969ebc5SAlex Elder goto out_cancel; 18499969ebc5SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 18509969ebc5SAlex Elder obj_request, op); 18519969ebc5SAlex Elder rbd_osd_req_op_destroy(op); 18529969ebc5SAlex Elder if (!obj_request->osd_req) 18539969ebc5SAlex Elder goto out_cancel; 18549969ebc5SAlex Elder 18558eb87565SAlex Elder if (start) 1856975241afSAlex Elder ceph_osdc_set_request_linger(osdc, obj_request->osd_req); 18578eb87565SAlex Elder else 18586977c3f9SAlex Elder ceph_osdc_unregister_linger_request(osdc, 1859975241afSAlex Elder rbd_dev->watch_request->osd_req); 18609969ebc5SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 18619969ebc5SAlex Elder if (ret) 18629969ebc5SAlex Elder goto out_cancel; 18639969ebc5SAlex Elder ret = rbd_obj_request_wait(obj_request); 18649969ebc5SAlex Elder if (ret) 18659969ebc5SAlex Elder goto out_cancel; 18669969ebc5SAlex Elder ret = obj_request->result; 18679969ebc5SAlex Elder if (ret) 18689969ebc5SAlex Elder goto out_cancel; 18699969ebc5SAlex Elder 18708eb87565SAlex Elder /* 18718eb87565SAlex Elder * A watch request is set to linger, so the underlying osd 18728eb87565SAlex Elder * request won't go away until we unregister it. We retain 18738eb87565SAlex Elder * a pointer to the object request during that time (in 18748eb87565SAlex Elder * rbd_dev->watch_request), so we'll keep a reference to 18758eb87565SAlex Elder * it. We'll drop that reference (below) after we've 18768eb87565SAlex Elder * unregistered it. 18778eb87565SAlex Elder */ 18788eb87565SAlex Elder if (start) { 18798eb87565SAlex Elder rbd_dev->watch_request = obj_request; 18808eb87565SAlex Elder 18818eb87565SAlex Elder return 0; 18828eb87565SAlex Elder } 18838eb87565SAlex Elder 18848eb87565SAlex Elder /* We have successfully torn down the watch request */ 18858eb87565SAlex Elder 18868eb87565SAlex Elder rbd_obj_request_put(rbd_dev->watch_request); 18878eb87565SAlex Elder rbd_dev->watch_request = NULL; 18889969ebc5SAlex Elder out_cancel: 18899969ebc5SAlex Elder /* Cancel the event if we're tearing down, or on error */ 18909969ebc5SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 18919969ebc5SAlex Elder rbd_dev->watch_event = NULL; 18929969ebc5SAlex Elder if (obj_request) 18939969ebc5SAlex Elder rbd_obj_request_put(obj_request); 18949969ebc5SAlex Elder 18959969ebc5SAlex Elder return ret; 18969969ebc5SAlex Elder } 18979969ebc5SAlex Elder 189836be9a76SAlex Elder /* 189936be9a76SAlex Elder * Synchronous osd object method call 190036be9a76SAlex Elder */ 190136be9a76SAlex Elder static int rbd_obj_method_sync(struct rbd_device *rbd_dev, 190236be9a76SAlex Elder const char *object_name, 190336be9a76SAlex Elder const char *class_name, 190436be9a76SAlex Elder const char *method_name, 190536be9a76SAlex Elder const char *outbound, 190636be9a76SAlex Elder size_t outbound_size, 190736be9a76SAlex Elder char *inbound, 190836be9a76SAlex Elder size_t inbound_size, 190936be9a76SAlex Elder u64 *version) 191036be9a76SAlex Elder { 191136be9a76SAlex Elder struct rbd_obj_request *obj_request; 191236be9a76SAlex Elder struct ceph_osd_client *osdc; 191336be9a76SAlex Elder struct ceph_osd_req_op *op; 191436be9a76SAlex Elder struct page **pages; 191536be9a76SAlex Elder u32 page_count; 191636be9a76SAlex Elder int ret; 191736be9a76SAlex Elder 191836be9a76SAlex Elder /* 191936be9a76SAlex Elder * Method calls are ultimately read operations but they 192036be9a76SAlex Elder * don't involve object data (so no offset or length). 192136be9a76SAlex Elder * The result should placed into the inbound buffer 192236be9a76SAlex Elder * provided. They also supply outbound data--parameters for 192336be9a76SAlex Elder * the object method. Currently if this is present it will 192436be9a76SAlex Elder * be a snapshot id. 192536be9a76SAlex Elder */ 192636be9a76SAlex Elder page_count = (u32) calc_pages_for(0, inbound_size); 192736be9a76SAlex Elder pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); 192836be9a76SAlex Elder if (IS_ERR(pages)) 192936be9a76SAlex Elder return PTR_ERR(pages); 193036be9a76SAlex Elder 193136be9a76SAlex Elder ret = -ENOMEM; 193236be9a76SAlex Elder obj_request = rbd_obj_request_create(object_name, 0, 0, 193336be9a76SAlex Elder OBJ_REQUEST_PAGES); 193436be9a76SAlex Elder if (!obj_request) 193536be9a76SAlex Elder goto out; 193636be9a76SAlex Elder 193736be9a76SAlex Elder obj_request->pages = pages; 193836be9a76SAlex Elder obj_request->page_count = page_count; 193936be9a76SAlex Elder 194036be9a76SAlex Elder op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, 194136be9a76SAlex Elder method_name, outbound, outbound_size); 194236be9a76SAlex Elder if (!op) 194336be9a76SAlex Elder goto out; 194436be9a76SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 194536be9a76SAlex Elder obj_request, op); 194636be9a76SAlex Elder rbd_osd_req_op_destroy(op); 194736be9a76SAlex Elder if (!obj_request->osd_req) 194836be9a76SAlex Elder goto out; 194936be9a76SAlex Elder 195036be9a76SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 195136be9a76SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 195236be9a76SAlex Elder if (ret) 195336be9a76SAlex Elder goto out; 195436be9a76SAlex Elder ret = rbd_obj_request_wait(obj_request); 195536be9a76SAlex Elder if (ret) 195636be9a76SAlex Elder goto out; 195736be9a76SAlex Elder 195836be9a76SAlex Elder ret = obj_request->result; 195936be9a76SAlex Elder if (ret < 0) 196036be9a76SAlex Elder goto out; 196123ed6e13SAlex Elder ret = 0; 1962903bb32eSAlex Elder ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); 196336be9a76SAlex Elder if (version) 196436be9a76SAlex Elder *version = obj_request->version; 196536be9a76SAlex Elder out: 196636be9a76SAlex Elder if (obj_request) 196736be9a76SAlex Elder rbd_obj_request_put(obj_request); 196836be9a76SAlex Elder else 196936be9a76SAlex Elder ceph_release_page_vector(pages, page_count); 197036be9a76SAlex Elder 197136be9a76SAlex Elder return ret; 197236be9a76SAlex Elder } 197336be9a76SAlex Elder 1974bf0d5f50SAlex Elder static void rbd_request_fn(struct request_queue *q) 1975cc344fa1SAlex Elder __releases(q->queue_lock) __acquires(q->queue_lock) 1976bf0d5f50SAlex Elder { 1977bf0d5f50SAlex Elder struct rbd_device *rbd_dev = q->queuedata; 1978bf0d5f50SAlex Elder bool read_only = rbd_dev->mapping.read_only; 1979bf0d5f50SAlex Elder struct request *rq; 1980bf0d5f50SAlex Elder int result; 1981bf0d5f50SAlex Elder 1982bf0d5f50SAlex Elder while ((rq = blk_fetch_request(q))) { 1983bf0d5f50SAlex Elder bool write_request = rq_data_dir(rq) == WRITE; 1984bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1985bf0d5f50SAlex Elder u64 offset; 1986bf0d5f50SAlex Elder u64 length; 1987bf0d5f50SAlex Elder 1988bf0d5f50SAlex Elder /* Ignore any non-FS requests that filter through. */ 1989bf0d5f50SAlex Elder 1990bf0d5f50SAlex Elder if (rq->cmd_type != REQ_TYPE_FS) { 19914dda41d3SAlex Elder dout("%s: non-fs request type %d\n", __func__, 19924dda41d3SAlex Elder (int) rq->cmd_type); 19934dda41d3SAlex Elder __blk_end_request_all(rq, 0); 19944dda41d3SAlex Elder continue; 19954dda41d3SAlex Elder } 19964dda41d3SAlex Elder 19974dda41d3SAlex Elder /* Ignore/skip any zero-length requests */ 19984dda41d3SAlex Elder 19994dda41d3SAlex Elder offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; 20004dda41d3SAlex Elder length = (u64) blk_rq_bytes(rq); 20014dda41d3SAlex Elder 20024dda41d3SAlex Elder if (!length) { 20034dda41d3SAlex Elder dout("%s: zero-length request\n", __func__); 2004bf0d5f50SAlex Elder __blk_end_request_all(rq, 0); 2005bf0d5f50SAlex Elder continue; 2006bf0d5f50SAlex Elder } 2007bf0d5f50SAlex Elder 2008bf0d5f50SAlex Elder spin_unlock_irq(q->queue_lock); 2009bf0d5f50SAlex Elder 2010bf0d5f50SAlex Elder /* Disallow writes to a read-only device */ 2011bf0d5f50SAlex Elder 2012bf0d5f50SAlex Elder if (write_request) { 2013bf0d5f50SAlex Elder result = -EROFS; 2014bf0d5f50SAlex Elder if (read_only) 2015bf0d5f50SAlex Elder goto end_request; 2016bf0d5f50SAlex Elder rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); 2017bf0d5f50SAlex Elder } 2018bf0d5f50SAlex Elder 20196d292906SAlex Elder /* 20206d292906SAlex Elder * Quit early if the mapped snapshot no longer 20216d292906SAlex Elder * exists. It's still possible the snapshot will 20226d292906SAlex Elder * have disappeared by the time our request arrives 20236d292906SAlex Elder * at the osd, but there's no sense in sending it if 20246d292906SAlex Elder * we already know. 20256d292906SAlex Elder */ 20266d292906SAlex Elder if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { 2027bf0d5f50SAlex Elder dout("request for non-existent snapshot"); 2028bf0d5f50SAlex Elder rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); 2029bf0d5f50SAlex Elder result = -ENXIO; 2030bf0d5f50SAlex Elder goto end_request; 2031bf0d5f50SAlex Elder } 2032bf0d5f50SAlex Elder 2033bf0d5f50SAlex Elder result = -EINVAL; 2034bf0d5f50SAlex Elder if (WARN_ON(offset && length > U64_MAX - offset + 1)) 2035bf0d5f50SAlex Elder goto end_request; /* Shouldn't happen */ 2036bf0d5f50SAlex Elder 2037bf0d5f50SAlex Elder result = -ENOMEM; 2038bf0d5f50SAlex Elder img_request = rbd_img_request_create(rbd_dev, offset, length, 2039bf0d5f50SAlex Elder write_request); 2040bf0d5f50SAlex Elder if (!img_request) 2041bf0d5f50SAlex Elder goto end_request; 2042bf0d5f50SAlex Elder 2043bf0d5f50SAlex Elder img_request->rq = rq; 2044bf0d5f50SAlex Elder 2045bf0d5f50SAlex Elder result = rbd_img_request_fill_bio(img_request, rq->bio); 2046bf0d5f50SAlex Elder if (!result) 2047bf0d5f50SAlex Elder result = rbd_img_request_submit(img_request); 2048bf0d5f50SAlex Elder if (result) 2049bf0d5f50SAlex Elder rbd_img_request_put(img_request); 2050bf0d5f50SAlex Elder end_request: 2051bf0d5f50SAlex Elder spin_lock_irq(q->queue_lock); 2052bf0d5f50SAlex Elder if (result < 0) { 2053bf0d5f50SAlex Elder rbd_warn(rbd_dev, "obj_request %s result %d\n", 2054bf0d5f50SAlex Elder write_request ? "write" : "read", result); 2055bf0d5f50SAlex Elder __blk_end_request_all(rq, result); 2056bf0d5f50SAlex Elder } 2057bf0d5f50SAlex Elder } 2058bf0d5f50SAlex Elder } 2059bf0d5f50SAlex Elder 2060602adf40SYehuda Sadeh /* 2061602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 2062602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 2063f7760dadSAlex Elder * which we handle later at bio_chain_clone_range() 2064602adf40SYehuda Sadeh */ 2065602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 2066602adf40SYehuda Sadeh struct bio_vec *bvec) 2067602adf40SYehuda Sadeh { 2068602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 2069e5cfeed2SAlex Elder sector_t sector_offset; 2070e5cfeed2SAlex Elder sector_t sectors_per_obj; 2071e5cfeed2SAlex Elder sector_t obj_sector_offset; 2072e5cfeed2SAlex Elder int ret; 2073602adf40SYehuda Sadeh 2074e5cfeed2SAlex Elder /* 2075e5cfeed2SAlex Elder * Find how far into its rbd object the partition-relative 2076e5cfeed2SAlex Elder * bio start sector is to offset relative to the enclosing 2077e5cfeed2SAlex Elder * device. 2078e5cfeed2SAlex Elder */ 2079e5cfeed2SAlex Elder sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector; 2080e5cfeed2SAlex Elder sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); 2081e5cfeed2SAlex Elder obj_sector_offset = sector_offset & (sectors_per_obj - 1); 2082593a9e7bSAlex Elder 2083e5cfeed2SAlex Elder /* 2084e5cfeed2SAlex Elder * Compute the number of bytes from that offset to the end 2085e5cfeed2SAlex Elder * of the object. Account for what's already used by the bio. 2086e5cfeed2SAlex Elder */ 2087e5cfeed2SAlex Elder ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT; 2088e5cfeed2SAlex Elder if (ret > bmd->bi_size) 2089e5cfeed2SAlex Elder ret -= bmd->bi_size; 2090e5cfeed2SAlex Elder else 2091e5cfeed2SAlex Elder ret = 0; 2092e5cfeed2SAlex Elder 2093e5cfeed2SAlex Elder /* 2094e5cfeed2SAlex Elder * Don't send back more than was asked for. And if the bio 2095e5cfeed2SAlex Elder * was empty, let the whole thing through because: "Note 2096e5cfeed2SAlex Elder * that a block device *must* allow a single page to be 2097e5cfeed2SAlex Elder * added to an empty bio." 2098e5cfeed2SAlex Elder */ 2099e5cfeed2SAlex Elder rbd_assert(bvec->bv_len <= PAGE_SIZE); 2100e5cfeed2SAlex Elder if (ret > (int) bvec->bv_len || !bmd->bi_size) 2101e5cfeed2SAlex Elder ret = (int) bvec->bv_len; 2102e5cfeed2SAlex Elder 2103e5cfeed2SAlex Elder return ret; 2104602adf40SYehuda Sadeh } 2105602adf40SYehuda Sadeh 2106602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 2107602adf40SYehuda Sadeh { 2108602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 2109602adf40SYehuda Sadeh 2110602adf40SYehuda Sadeh if (!disk) 2111602adf40SYehuda Sadeh return; 2112602adf40SYehuda Sadeh 2113602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 2114602adf40SYehuda Sadeh del_gendisk(disk); 2115602adf40SYehuda Sadeh if (disk->queue) 2116602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 2117602adf40SYehuda Sadeh put_disk(disk); 2118602adf40SYehuda Sadeh } 2119602adf40SYehuda Sadeh 2120788e2df3SAlex Elder static int rbd_obj_read_sync(struct rbd_device *rbd_dev, 2121788e2df3SAlex Elder const char *object_name, 2122788e2df3SAlex Elder u64 offset, u64 length, 2123788e2df3SAlex Elder char *buf, u64 *version) 2124788e2df3SAlex Elder 2125788e2df3SAlex Elder { 2126788e2df3SAlex Elder struct ceph_osd_req_op *op; 2127788e2df3SAlex Elder struct rbd_obj_request *obj_request; 2128788e2df3SAlex Elder struct ceph_osd_client *osdc; 2129788e2df3SAlex Elder struct page **pages = NULL; 2130788e2df3SAlex Elder u32 page_count; 21311ceae7efSAlex Elder size_t size; 2132788e2df3SAlex Elder int ret; 2133788e2df3SAlex Elder 2134788e2df3SAlex Elder page_count = (u32) calc_pages_for(offset, length); 2135788e2df3SAlex Elder pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); 2136788e2df3SAlex Elder if (IS_ERR(pages)) 2137788e2df3SAlex Elder ret = PTR_ERR(pages); 2138788e2df3SAlex Elder 2139788e2df3SAlex Elder ret = -ENOMEM; 2140788e2df3SAlex Elder obj_request = rbd_obj_request_create(object_name, offset, length, 2141788e2df3SAlex Elder OBJ_REQUEST_PAGES); 2142788e2df3SAlex Elder if (!obj_request) 2143788e2df3SAlex Elder goto out; 2144788e2df3SAlex Elder 2145788e2df3SAlex Elder obj_request->pages = pages; 2146788e2df3SAlex Elder obj_request->page_count = page_count; 2147788e2df3SAlex Elder 2148788e2df3SAlex Elder op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); 2149788e2df3SAlex Elder if (!op) 2150788e2df3SAlex Elder goto out; 2151788e2df3SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 2152788e2df3SAlex Elder obj_request, op); 2153788e2df3SAlex Elder rbd_osd_req_op_destroy(op); 2154788e2df3SAlex Elder if (!obj_request->osd_req) 2155788e2df3SAlex Elder goto out; 2156788e2df3SAlex Elder 2157788e2df3SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 2158788e2df3SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 2159788e2df3SAlex Elder if (ret) 2160788e2df3SAlex Elder goto out; 2161788e2df3SAlex Elder ret = rbd_obj_request_wait(obj_request); 2162788e2df3SAlex Elder if (ret) 2163788e2df3SAlex Elder goto out; 2164788e2df3SAlex Elder 2165788e2df3SAlex Elder ret = obj_request->result; 2166788e2df3SAlex Elder if (ret < 0) 2167788e2df3SAlex Elder goto out; 21681ceae7efSAlex Elder 21691ceae7efSAlex Elder rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); 21701ceae7efSAlex Elder size = (size_t) obj_request->xferred; 2171903bb32eSAlex Elder ceph_copy_from_page_vector(pages, buf, 0, size); 217223ed6e13SAlex Elder rbd_assert(size <= (size_t) INT_MAX); 217323ed6e13SAlex Elder ret = (int) size; 2174788e2df3SAlex Elder if (version) 2175788e2df3SAlex Elder *version = obj_request->version; 2176788e2df3SAlex Elder out: 2177788e2df3SAlex Elder if (obj_request) 2178788e2df3SAlex Elder rbd_obj_request_put(obj_request); 2179788e2df3SAlex Elder else 2180788e2df3SAlex Elder ceph_release_page_vector(pages, page_count); 2181788e2df3SAlex Elder 2182788e2df3SAlex Elder return ret; 2183788e2df3SAlex Elder } 2184788e2df3SAlex Elder 2185602adf40SYehuda Sadeh /* 21864156d998SAlex Elder * Read the complete header for the given rbd device. 21874156d998SAlex Elder * 21884156d998SAlex Elder * Returns a pointer to a dynamically-allocated buffer containing 21894156d998SAlex Elder * the complete and validated header. Caller can pass the address 21904156d998SAlex Elder * of a variable that will be filled in with the version of the 21914156d998SAlex Elder * header object at the time it was read. 21924156d998SAlex Elder * 21934156d998SAlex Elder * Returns a pointer-coded errno if a failure occurs. 21944156d998SAlex Elder */ 21954156d998SAlex Elder static struct rbd_image_header_ondisk * 21964156d998SAlex Elder rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) 21974156d998SAlex Elder { 21984156d998SAlex Elder struct rbd_image_header_ondisk *ondisk = NULL; 21994156d998SAlex Elder u32 snap_count = 0; 22004156d998SAlex Elder u64 names_size = 0; 22014156d998SAlex Elder u32 want_count; 22024156d998SAlex Elder int ret; 22034156d998SAlex Elder 22044156d998SAlex Elder /* 22054156d998SAlex Elder * The complete header will include an array of its 64-bit 22064156d998SAlex Elder * snapshot ids, followed by the names of those snapshots as 22074156d998SAlex Elder * a contiguous block of NUL-terminated strings. Note that 22084156d998SAlex Elder * the number of snapshots could change by the time we read 22094156d998SAlex Elder * it in, in which case we re-read it. 22104156d998SAlex Elder */ 22114156d998SAlex Elder do { 22124156d998SAlex Elder size_t size; 22134156d998SAlex Elder 22144156d998SAlex Elder kfree(ondisk); 22154156d998SAlex Elder 22164156d998SAlex Elder size = sizeof (*ondisk); 22174156d998SAlex Elder size += snap_count * sizeof (struct rbd_image_snap_ondisk); 22184156d998SAlex Elder size += names_size; 22194156d998SAlex Elder ondisk = kmalloc(size, GFP_KERNEL); 22204156d998SAlex Elder if (!ondisk) 22214156d998SAlex Elder return ERR_PTR(-ENOMEM); 22224156d998SAlex Elder 2223788e2df3SAlex Elder ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 22244156d998SAlex Elder 0, size, 22254156d998SAlex Elder (char *) ondisk, version); 22264156d998SAlex Elder if (ret < 0) 22274156d998SAlex Elder goto out_err; 22284156d998SAlex Elder if (WARN_ON((size_t) ret < size)) { 22294156d998SAlex Elder ret = -ENXIO; 223006ecc6cbSAlex Elder rbd_warn(rbd_dev, "short header read (want %zd got %d)", 223106ecc6cbSAlex Elder size, ret); 22324156d998SAlex Elder goto out_err; 22334156d998SAlex Elder } 22344156d998SAlex Elder if (!rbd_dev_ondisk_valid(ondisk)) { 22354156d998SAlex Elder ret = -ENXIO; 223606ecc6cbSAlex Elder rbd_warn(rbd_dev, "invalid header"); 22374156d998SAlex Elder goto out_err; 22384156d998SAlex Elder } 22394156d998SAlex Elder 22404156d998SAlex Elder names_size = le64_to_cpu(ondisk->snap_names_len); 22414156d998SAlex Elder want_count = snap_count; 22424156d998SAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 22434156d998SAlex Elder } while (snap_count != want_count); 22444156d998SAlex Elder 22454156d998SAlex Elder return ondisk; 22464156d998SAlex Elder 22474156d998SAlex Elder out_err: 22484156d998SAlex Elder kfree(ondisk); 22494156d998SAlex Elder 22504156d998SAlex Elder return ERR_PTR(ret); 22514156d998SAlex Elder } 22524156d998SAlex Elder 22534156d998SAlex Elder /* 2254602adf40SYehuda Sadeh * reload the ondisk the header 2255602adf40SYehuda Sadeh */ 2256602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 2257602adf40SYehuda Sadeh struct rbd_image_header *header) 2258602adf40SYehuda Sadeh { 22594156d998SAlex Elder struct rbd_image_header_ondisk *ondisk; 22604156d998SAlex Elder u64 ver = 0; 22614156d998SAlex Elder int ret; 2262602adf40SYehuda Sadeh 22634156d998SAlex Elder ondisk = rbd_dev_v1_header_read(rbd_dev, &ver); 22644156d998SAlex Elder if (IS_ERR(ondisk)) 22654156d998SAlex Elder return PTR_ERR(ondisk); 22664156d998SAlex Elder ret = rbd_header_from_disk(header, ondisk); 22674156d998SAlex Elder if (ret >= 0) 226859c2be1eSYehuda Sadeh header->obj_version = ver; 22694156d998SAlex Elder kfree(ondisk); 2270602adf40SYehuda Sadeh 22714156d998SAlex Elder return ret; 2272602adf40SYehuda Sadeh } 2273602adf40SYehuda Sadeh 227441f38c2bSAlex Elder static void rbd_remove_all_snaps(struct rbd_device *rbd_dev) 2275dfc5606dSYehuda Sadeh { 2276dfc5606dSYehuda Sadeh struct rbd_snap *snap; 2277a0593290SAlex Elder struct rbd_snap *next; 2278dfc5606dSYehuda Sadeh 2279a0593290SAlex Elder list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) 228041f38c2bSAlex Elder rbd_remove_snap_dev(snap); 2281dfc5606dSYehuda Sadeh } 2282dfc5606dSYehuda Sadeh 22839478554aSAlex Elder static void rbd_update_mapping_size(struct rbd_device *rbd_dev) 22849478554aSAlex Elder { 22859478554aSAlex Elder sector_t size; 22869478554aSAlex Elder 22870d7dbfceSAlex Elder if (rbd_dev->spec->snap_id != CEPH_NOSNAP) 22889478554aSAlex Elder return; 22899478554aSAlex Elder 22909478554aSAlex Elder size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; 22919478554aSAlex Elder dout("setting size to %llu sectors", (unsigned long long) size); 22929478554aSAlex Elder rbd_dev->mapping.size = (u64) size; 22939478554aSAlex Elder set_capacity(rbd_dev->disk, size); 22949478554aSAlex Elder } 22959478554aSAlex Elder 2296602adf40SYehuda Sadeh /* 2297602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 2298602adf40SYehuda Sadeh */ 2299117973fbSAlex Elder static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) 2300602adf40SYehuda Sadeh { 2301602adf40SYehuda Sadeh int ret; 2302602adf40SYehuda Sadeh struct rbd_image_header h; 2303602adf40SYehuda Sadeh 2304602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 2305602adf40SYehuda Sadeh if (ret < 0) 2306602adf40SYehuda Sadeh return ret; 2307602adf40SYehuda Sadeh 2308a51aa0c0SJosh Durgin down_write(&rbd_dev->header_rwsem); 2309a51aa0c0SJosh Durgin 23109478554aSAlex Elder /* Update image size, and check for resize of mapped image */ 23119478554aSAlex Elder rbd_dev->header.image_size = h.image_size; 23129478554aSAlex Elder rbd_update_mapping_size(rbd_dev); 23139db4b3e3SSage Weil 2314849b4260SAlex Elder /* rbd_dev->header.object_prefix shouldn't change */ 2315602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 2316849b4260SAlex Elder kfree(rbd_dev->header.snap_names); 2317d1d25646SJosh Durgin /* osd requests may still refer to snapc */ 2318d1d25646SJosh Durgin ceph_put_snap_context(rbd_dev->header.snapc); 2319602adf40SYehuda Sadeh 2320b813623aSAlex Elder if (hver) 2321b813623aSAlex Elder *hver = h.obj_version; 2322a71b891bSJosh Durgin rbd_dev->header.obj_version = h.obj_version; 232393a24e08SJosh Durgin rbd_dev->header.image_size = h.image_size; 2324602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 2325602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 2326602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 2327849b4260SAlex Elder /* Free the extra copy of the object prefix */ 2328849b4260SAlex Elder WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); 2329849b4260SAlex Elder kfree(h.object_prefix); 2330849b4260SAlex Elder 2331304f6808SAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 2332304f6808SAlex Elder if (!ret) 2333304f6808SAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 2334dfc5606dSYehuda Sadeh 2335c666601aSJosh Durgin up_write(&rbd_dev->header_rwsem); 2336602adf40SYehuda Sadeh 2337dfc5606dSYehuda Sadeh return ret; 2338602adf40SYehuda Sadeh } 2339602adf40SYehuda Sadeh 2340117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) 23411fe5e993SAlex Elder { 23421fe5e993SAlex Elder int ret; 23431fe5e993SAlex Elder 2344117973fbSAlex Elder rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 23451fe5e993SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2346117973fbSAlex Elder if (rbd_dev->image_format == 1) 2347117973fbSAlex Elder ret = rbd_dev_v1_refresh(rbd_dev, hver); 2348117973fbSAlex Elder else 2349117973fbSAlex Elder ret = rbd_dev_v2_refresh(rbd_dev, hver); 23501fe5e993SAlex Elder mutex_unlock(&ctl_mutex); 23511fe5e993SAlex Elder 23521fe5e993SAlex Elder return ret; 23531fe5e993SAlex Elder } 23541fe5e993SAlex Elder 2355602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 2356602adf40SYehuda Sadeh { 2357602adf40SYehuda Sadeh struct gendisk *disk; 2358602adf40SYehuda Sadeh struct request_queue *q; 2359593a9e7bSAlex Elder u64 segment_size; 2360602adf40SYehuda Sadeh 2361602adf40SYehuda Sadeh /* create gendisk info */ 2362602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 2363602adf40SYehuda Sadeh if (!disk) 23641fcdb8aaSAlex Elder return -ENOMEM; 2365602adf40SYehuda Sadeh 2366f0f8cef5SAlex Elder snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 2367de71a297SAlex Elder rbd_dev->dev_id); 2368602adf40SYehuda Sadeh disk->major = rbd_dev->major; 2369602adf40SYehuda Sadeh disk->first_minor = 0; 2370602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 2371602adf40SYehuda Sadeh disk->private_data = rbd_dev; 2372602adf40SYehuda Sadeh 2373bf0d5f50SAlex Elder q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); 2374602adf40SYehuda Sadeh if (!q) 2375602adf40SYehuda Sadeh goto out_disk; 2376029bcbd8SJosh Durgin 2377593a9e7bSAlex Elder /* We use the default size, but let's be explicit about it. */ 2378593a9e7bSAlex Elder blk_queue_physical_block_size(q, SECTOR_SIZE); 2379593a9e7bSAlex Elder 2380029bcbd8SJosh Durgin /* set io sizes to object size */ 2381593a9e7bSAlex Elder segment_size = rbd_obj_bytes(&rbd_dev->header); 2382593a9e7bSAlex Elder blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 2383593a9e7bSAlex Elder blk_queue_max_segment_size(q, segment_size); 2384593a9e7bSAlex Elder blk_queue_io_min(q, segment_size); 2385593a9e7bSAlex Elder blk_queue_io_opt(q, segment_size); 2386029bcbd8SJosh Durgin 2387602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 2388602adf40SYehuda Sadeh disk->queue = q; 2389602adf40SYehuda Sadeh 2390602adf40SYehuda Sadeh q->queuedata = rbd_dev; 2391602adf40SYehuda Sadeh 2392602adf40SYehuda Sadeh rbd_dev->disk = disk; 2393602adf40SYehuda Sadeh 239412f02944SAlex Elder set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); 239512f02944SAlex Elder 2396602adf40SYehuda Sadeh return 0; 2397602adf40SYehuda Sadeh out_disk: 2398602adf40SYehuda Sadeh put_disk(disk); 23991fcdb8aaSAlex Elder 24001fcdb8aaSAlex Elder return -ENOMEM; 2401602adf40SYehuda Sadeh } 2402602adf40SYehuda Sadeh 2403dfc5606dSYehuda Sadeh /* 2404dfc5606dSYehuda Sadeh sysfs 2405dfc5606dSYehuda Sadeh */ 2406602adf40SYehuda Sadeh 2407593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev) 2408593a9e7bSAlex Elder { 2409593a9e7bSAlex Elder return container_of(dev, struct rbd_device, dev); 2410593a9e7bSAlex Elder } 2411593a9e7bSAlex Elder 2412dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 2413dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2414602adf40SYehuda Sadeh { 2415593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2416a51aa0c0SJosh Durgin sector_t size; 2417dfc5606dSYehuda Sadeh 2418a51aa0c0SJosh Durgin down_read(&rbd_dev->header_rwsem); 2419a51aa0c0SJosh Durgin size = get_capacity(rbd_dev->disk); 2420a51aa0c0SJosh Durgin up_read(&rbd_dev->header_rwsem); 2421a51aa0c0SJosh Durgin 2422a51aa0c0SJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); 2423602adf40SYehuda Sadeh } 2424602adf40SYehuda Sadeh 242534b13184SAlex Elder /* 242634b13184SAlex Elder * Note this shows the features for whatever's mapped, which is not 242734b13184SAlex Elder * necessarily the base image. 242834b13184SAlex Elder */ 242934b13184SAlex Elder static ssize_t rbd_features_show(struct device *dev, 243034b13184SAlex Elder struct device_attribute *attr, char *buf) 243134b13184SAlex Elder { 243234b13184SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 243334b13184SAlex Elder 243434b13184SAlex Elder return sprintf(buf, "0x%016llx\n", 243534b13184SAlex Elder (unsigned long long) rbd_dev->mapping.features); 243634b13184SAlex Elder } 243734b13184SAlex Elder 2438dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 2439dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2440602adf40SYehuda Sadeh { 2441593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2442dfc5606dSYehuda Sadeh 2443dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 2444dfc5606dSYehuda Sadeh } 2445dfc5606dSYehuda Sadeh 2446dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 2447dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2448dfc5606dSYehuda Sadeh { 2449593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2450dfc5606dSYehuda Sadeh 24511dbb4399SAlex Elder return sprintf(buf, "client%lld\n", 24521dbb4399SAlex Elder ceph_client_id(rbd_dev->rbd_client->client)); 2453dfc5606dSYehuda Sadeh } 2454dfc5606dSYehuda Sadeh 2455dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 2456dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2457dfc5606dSYehuda Sadeh { 2458593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2459dfc5606dSYehuda Sadeh 24600d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); 2461dfc5606dSYehuda Sadeh } 2462dfc5606dSYehuda Sadeh 24639bb2f334SAlex Elder static ssize_t rbd_pool_id_show(struct device *dev, 24649bb2f334SAlex Elder struct device_attribute *attr, char *buf) 24659bb2f334SAlex Elder { 24669bb2f334SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 24679bb2f334SAlex Elder 24680d7dbfceSAlex Elder return sprintf(buf, "%llu\n", 24690d7dbfceSAlex Elder (unsigned long long) rbd_dev->spec->pool_id); 24709bb2f334SAlex Elder } 24719bb2f334SAlex Elder 2472dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 2473dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2474dfc5606dSYehuda Sadeh { 2475593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2476dfc5606dSYehuda Sadeh 2477a92ffdf8SAlex Elder if (rbd_dev->spec->image_name) 24780d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->image_name); 2479a92ffdf8SAlex Elder 2480a92ffdf8SAlex Elder return sprintf(buf, "(unknown)\n"); 2481dfc5606dSYehuda Sadeh } 2482dfc5606dSYehuda Sadeh 2483589d30e0SAlex Elder static ssize_t rbd_image_id_show(struct device *dev, 2484589d30e0SAlex Elder struct device_attribute *attr, char *buf) 2485589d30e0SAlex Elder { 2486589d30e0SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2487589d30e0SAlex Elder 24880d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->image_id); 2489589d30e0SAlex Elder } 2490589d30e0SAlex Elder 249134b13184SAlex Elder /* 249234b13184SAlex Elder * Shows the name of the currently-mapped snapshot (or 249334b13184SAlex Elder * RBD_SNAP_HEAD_NAME for the base image). 249434b13184SAlex Elder */ 2495dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 2496dfc5606dSYehuda Sadeh struct device_attribute *attr, 2497dfc5606dSYehuda Sadeh char *buf) 2498dfc5606dSYehuda Sadeh { 2499593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2500dfc5606dSYehuda Sadeh 25010d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); 2502dfc5606dSYehuda Sadeh } 2503dfc5606dSYehuda Sadeh 250486b00e0dSAlex Elder /* 250586b00e0dSAlex Elder * For an rbd v2 image, shows the pool id, image id, and snapshot id 250686b00e0dSAlex Elder * for the parent image. If there is no parent, simply shows 250786b00e0dSAlex Elder * "(no parent image)". 250886b00e0dSAlex Elder */ 250986b00e0dSAlex Elder static ssize_t rbd_parent_show(struct device *dev, 251086b00e0dSAlex Elder struct device_attribute *attr, 251186b00e0dSAlex Elder char *buf) 251286b00e0dSAlex Elder { 251386b00e0dSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 251486b00e0dSAlex Elder struct rbd_spec *spec = rbd_dev->parent_spec; 251586b00e0dSAlex Elder int count; 251686b00e0dSAlex Elder char *bufp = buf; 251786b00e0dSAlex Elder 251886b00e0dSAlex Elder if (!spec) 251986b00e0dSAlex Elder return sprintf(buf, "(no parent image)\n"); 252086b00e0dSAlex Elder 252186b00e0dSAlex Elder count = sprintf(bufp, "pool_id %llu\npool_name %s\n", 252286b00e0dSAlex Elder (unsigned long long) spec->pool_id, spec->pool_name); 252386b00e0dSAlex Elder if (count < 0) 252486b00e0dSAlex Elder return count; 252586b00e0dSAlex Elder bufp += count; 252686b00e0dSAlex Elder 252786b00e0dSAlex Elder count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id, 252886b00e0dSAlex Elder spec->image_name ? spec->image_name : "(unknown)"); 252986b00e0dSAlex Elder if (count < 0) 253086b00e0dSAlex Elder return count; 253186b00e0dSAlex Elder bufp += count; 253286b00e0dSAlex Elder 253386b00e0dSAlex Elder count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n", 253486b00e0dSAlex Elder (unsigned long long) spec->snap_id, spec->snap_name); 253586b00e0dSAlex Elder if (count < 0) 253686b00e0dSAlex Elder return count; 253786b00e0dSAlex Elder bufp += count; 253886b00e0dSAlex Elder 253986b00e0dSAlex Elder count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap); 254086b00e0dSAlex Elder if (count < 0) 254186b00e0dSAlex Elder return count; 254286b00e0dSAlex Elder bufp += count; 254386b00e0dSAlex Elder 254486b00e0dSAlex Elder return (ssize_t) (bufp - buf); 254586b00e0dSAlex Elder } 254686b00e0dSAlex Elder 2547dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 2548dfc5606dSYehuda Sadeh struct device_attribute *attr, 2549dfc5606dSYehuda Sadeh const char *buf, 2550dfc5606dSYehuda Sadeh size_t size) 2551dfc5606dSYehuda Sadeh { 2552593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2553b813623aSAlex Elder int ret; 2554602adf40SYehuda Sadeh 2555117973fbSAlex Elder ret = rbd_dev_refresh(rbd_dev, NULL); 2556b813623aSAlex Elder 2557b813623aSAlex Elder return ret < 0 ? ret : size; 2558dfc5606dSYehuda Sadeh } 2559602adf40SYehuda Sadeh 2560dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 256134b13184SAlex Elder static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL); 2562dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 2563dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 2564dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 25659bb2f334SAlex Elder static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 2566dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 2567589d30e0SAlex Elder static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); 2568dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 2569dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 257086b00e0dSAlex Elder static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL); 2571dfc5606dSYehuda Sadeh 2572dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 2573dfc5606dSYehuda Sadeh &dev_attr_size.attr, 257434b13184SAlex Elder &dev_attr_features.attr, 2575dfc5606dSYehuda Sadeh &dev_attr_major.attr, 2576dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 2577dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 25789bb2f334SAlex Elder &dev_attr_pool_id.attr, 2579dfc5606dSYehuda Sadeh &dev_attr_name.attr, 2580589d30e0SAlex Elder &dev_attr_image_id.attr, 2581dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 258286b00e0dSAlex Elder &dev_attr_parent.attr, 2583dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 2584dfc5606dSYehuda Sadeh NULL 2585dfc5606dSYehuda Sadeh }; 2586dfc5606dSYehuda Sadeh 2587dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 2588dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 2589dfc5606dSYehuda Sadeh }; 2590dfc5606dSYehuda Sadeh 2591dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 2592dfc5606dSYehuda Sadeh &rbd_attr_group, 2593dfc5606dSYehuda Sadeh NULL 2594dfc5606dSYehuda Sadeh }; 2595dfc5606dSYehuda Sadeh 2596dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 2597dfc5606dSYehuda Sadeh { 2598dfc5606dSYehuda Sadeh } 2599dfc5606dSYehuda Sadeh 2600dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 2601dfc5606dSYehuda Sadeh .name = "rbd", 2602dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 2603dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 2604dfc5606dSYehuda Sadeh }; 2605dfc5606dSYehuda Sadeh 2606dfc5606dSYehuda Sadeh 2607dfc5606dSYehuda Sadeh /* 2608dfc5606dSYehuda Sadeh sysfs - snapshots 2609dfc5606dSYehuda Sadeh */ 2610dfc5606dSYehuda Sadeh 2611dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 2612dfc5606dSYehuda Sadeh struct device_attribute *attr, 2613dfc5606dSYehuda Sadeh char *buf) 2614dfc5606dSYehuda Sadeh { 2615dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2616dfc5606dSYehuda Sadeh 26173591538fSJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long)snap->size); 2618dfc5606dSYehuda Sadeh } 2619dfc5606dSYehuda Sadeh 2620dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 2621dfc5606dSYehuda Sadeh struct device_attribute *attr, 2622dfc5606dSYehuda Sadeh char *buf) 2623dfc5606dSYehuda Sadeh { 2624dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2625dfc5606dSYehuda Sadeh 2626593a9e7bSAlex Elder return sprintf(buf, "%llu\n", (unsigned long long)snap->id); 2627dfc5606dSYehuda Sadeh } 2628dfc5606dSYehuda Sadeh 262934b13184SAlex Elder static ssize_t rbd_snap_features_show(struct device *dev, 263034b13184SAlex Elder struct device_attribute *attr, 263134b13184SAlex Elder char *buf) 263234b13184SAlex Elder { 263334b13184SAlex Elder struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 263434b13184SAlex Elder 263534b13184SAlex Elder return sprintf(buf, "0x%016llx\n", 263634b13184SAlex Elder (unsigned long long) snap->features); 263734b13184SAlex Elder } 263834b13184SAlex Elder 2639dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 2640dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 264134b13184SAlex Elder static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL); 2642dfc5606dSYehuda Sadeh 2643dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 2644dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 2645dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 264634b13184SAlex Elder &dev_attr_snap_features.attr, 2647dfc5606dSYehuda Sadeh NULL, 2648dfc5606dSYehuda Sadeh }; 2649dfc5606dSYehuda Sadeh 2650dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 2651dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 2652dfc5606dSYehuda Sadeh }; 2653dfc5606dSYehuda Sadeh 2654dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 2655dfc5606dSYehuda Sadeh { 2656dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2657dfc5606dSYehuda Sadeh kfree(snap->name); 2658dfc5606dSYehuda Sadeh kfree(snap); 2659dfc5606dSYehuda Sadeh } 2660dfc5606dSYehuda Sadeh 2661dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 2662dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 2663dfc5606dSYehuda Sadeh NULL 2664dfc5606dSYehuda Sadeh }; 2665dfc5606dSYehuda Sadeh 2666dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 2667dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 2668dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 2669dfc5606dSYehuda Sadeh }; 2670dfc5606dSYehuda Sadeh 26718b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) 26728b8fb99cSAlex Elder { 26738b8fb99cSAlex Elder kref_get(&spec->kref); 26748b8fb99cSAlex Elder 26758b8fb99cSAlex Elder return spec; 26768b8fb99cSAlex Elder } 26778b8fb99cSAlex Elder 26788b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref); 26798b8fb99cSAlex Elder static void rbd_spec_put(struct rbd_spec *spec) 26808b8fb99cSAlex Elder { 26818b8fb99cSAlex Elder if (spec) 26828b8fb99cSAlex Elder kref_put(&spec->kref, rbd_spec_free); 26838b8fb99cSAlex Elder } 26848b8fb99cSAlex Elder 26858b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_alloc(void) 26868b8fb99cSAlex Elder { 26878b8fb99cSAlex Elder struct rbd_spec *spec; 26888b8fb99cSAlex Elder 26898b8fb99cSAlex Elder spec = kzalloc(sizeof (*spec), GFP_KERNEL); 26908b8fb99cSAlex Elder if (!spec) 26918b8fb99cSAlex Elder return NULL; 26928b8fb99cSAlex Elder kref_init(&spec->kref); 26938b8fb99cSAlex Elder 26948b8fb99cSAlex Elder rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */ 26958b8fb99cSAlex Elder 26968b8fb99cSAlex Elder return spec; 26978b8fb99cSAlex Elder } 26988b8fb99cSAlex Elder 26998b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref) 27008b8fb99cSAlex Elder { 27018b8fb99cSAlex Elder struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); 27028b8fb99cSAlex Elder 27038b8fb99cSAlex Elder kfree(spec->pool_name); 27048b8fb99cSAlex Elder kfree(spec->image_id); 27058b8fb99cSAlex Elder kfree(spec->image_name); 27068b8fb99cSAlex Elder kfree(spec->snap_name); 27078b8fb99cSAlex Elder kfree(spec); 27088b8fb99cSAlex Elder } 27098b8fb99cSAlex Elder 2710cc344fa1SAlex Elder static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, 2711c53d5893SAlex Elder struct rbd_spec *spec) 2712c53d5893SAlex Elder { 2713c53d5893SAlex Elder struct rbd_device *rbd_dev; 2714c53d5893SAlex Elder 2715c53d5893SAlex Elder rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL); 2716c53d5893SAlex Elder if (!rbd_dev) 2717c53d5893SAlex Elder return NULL; 2718c53d5893SAlex Elder 2719c53d5893SAlex Elder spin_lock_init(&rbd_dev->lock); 27206d292906SAlex Elder rbd_dev->flags = 0; 2721c53d5893SAlex Elder INIT_LIST_HEAD(&rbd_dev->node); 2722c53d5893SAlex Elder INIT_LIST_HEAD(&rbd_dev->snaps); 2723c53d5893SAlex Elder init_rwsem(&rbd_dev->header_rwsem); 2724c53d5893SAlex Elder 2725c53d5893SAlex Elder rbd_dev->spec = spec; 2726c53d5893SAlex Elder rbd_dev->rbd_client = rbdc; 2727c53d5893SAlex Elder 27280903e875SAlex Elder /* Initialize the layout used for all rbd requests */ 27290903e875SAlex Elder 27300903e875SAlex Elder rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 27310903e875SAlex Elder rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); 27320903e875SAlex Elder rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 27330903e875SAlex Elder rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); 27340903e875SAlex Elder 2735c53d5893SAlex Elder return rbd_dev; 2736c53d5893SAlex Elder } 2737c53d5893SAlex Elder 2738c53d5893SAlex Elder static void rbd_dev_destroy(struct rbd_device *rbd_dev) 2739c53d5893SAlex Elder { 274086b00e0dSAlex Elder rbd_spec_put(rbd_dev->parent_spec); 2741c53d5893SAlex Elder kfree(rbd_dev->header_name); 2742c53d5893SAlex Elder rbd_put_client(rbd_dev->rbd_client); 2743c53d5893SAlex Elder rbd_spec_put(rbd_dev->spec); 2744c53d5893SAlex Elder kfree(rbd_dev); 2745c53d5893SAlex Elder } 2746c53d5893SAlex Elder 2747304f6808SAlex Elder static bool rbd_snap_registered(struct rbd_snap *snap) 2748304f6808SAlex Elder { 2749304f6808SAlex Elder bool ret = snap->dev.type == &rbd_snap_device_type; 2750304f6808SAlex Elder bool reg = device_is_registered(&snap->dev); 2751304f6808SAlex Elder 2752304f6808SAlex Elder rbd_assert(!ret ^ reg); 2753304f6808SAlex Elder 2754304f6808SAlex Elder return ret; 2755304f6808SAlex Elder } 2756304f6808SAlex Elder 275741f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap) 2758dfc5606dSYehuda Sadeh { 2759dfc5606dSYehuda Sadeh list_del(&snap->node); 2760304f6808SAlex Elder if (device_is_registered(&snap->dev)) 2761dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 2762dfc5606dSYehuda Sadeh } 2763dfc5606dSYehuda Sadeh 276414e7085dSAlex Elder static int rbd_register_snap_dev(struct rbd_snap *snap, 2765dfc5606dSYehuda Sadeh struct device *parent) 2766dfc5606dSYehuda Sadeh { 2767dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 2768dfc5606dSYehuda Sadeh int ret; 2769dfc5606dSYehuda Sadeh 2770dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 2771dfc5606dSYehuda Sadeh dev->parent = parent; 2772dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 2773d4b125e9SAlex Elder dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name); 2774304f6808SAlex Elder dout("%s: registering device for snapshot %s\n", __func__, snap->name); 2775304f6808SAlex Elder 2776dfc5606dSYehuda Sadeh ret = device_register(dev); 2777dfc5606dSYehuda Sadeh 2778dfc5606dSYehuda Sadeh return ret; 2779dfc5606dSYehuda Sadeh } 2780dfc5606dSYehuda Sadeh 27814e891e0aSAlex Elder static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, 2782c8d18425SAlex Elder const char *snap_name, 278334b13184SAlex Elder u64 snap_id, u64 snap_size, 278434b13184SAlex Elder u64 snap_features) 2785dfc5606dSYehuda Sadeh { 27864e891e0aSAlex Elder struct rbd_snap *snap; 2787dfc5606dSYehuda Sadeh int ret; 27884e891e0aSAlex Elder 27894e891e0aSAlex Elder snap = kzalloc(sizeof (*snap), GFP_KERNEL); 2790dfc5606dSYehuda Sadeh if (!snap) 27914e891e0aSAlex Elder return ERR_PTR(-ENOMEM); 27924e891e0aSAlex Elder 27934e891e0aSAlex Elder ret = -ENOMEM; 2794c8d18425SAlex Elder snap->name = kstrdup(snap_name, GFP_KERNEL); 27954e891e0aSAlex Elder if (!snap->name) 27964e891e0aSAlex Elder goto err; 27974e891e0aSAlex Elder 2798c8d18425SAlex Elder snap->id = snap_id; 2799c8d18425SAlex Elder snap->size = snap_size; 280034b13184SAlex Elder snap->features = snap_features; 28014e891e0aSAlex Elder 28024e891e0aSAlex Elder return snap; 28034e891e0aSAlex Elder 2804dfc5606dSYehuda Sadeh err: 2805dfc5606dSYehuda Sadeh kfree(snap->name); 2806dfc5606dSYehuda Sadeh kfree(snap); 28074e891e0aSAlex Elder 28084e891e0aSAlex Elder return ERR_PTR(ret); 2809dfc5606dSYehuda Sadeh } 2810dfc5606dSYehuda Sadeh 2811cd892126SAlex Elder static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which, 2812cd892126SAlex Elder u64 *snap_size, u64 *snap_features) 2813cd892126SAlex Elder { 2814cd892126SAlex Elder char *snap_name; 2815cd892126SAlex Elder 2816cd892126SAlex Elder rbd_assert(which < rbd_dev->header.snapc->num_snaps); 2817cd892126SAlex Elder 2818cd892126SAlex Elder *snap_size = rbd_dev->header.snap_sizes[which]; 2819cd892126SAlex Elder *snap_features = 0; /* No features for v1 */ 2820cd892126SAlex Elder 2821cd892126SAlex Elder /* Skip over names until we find the one we are looking for */ 2822cd892126SAlex Elder 2823cd892126SAlex Elder snap_name = rbd_dev->header.snap_names; 2824cd892126SAlex Elder while (which--) 2825cd892126SAlex Elder snap_name += strlen(snap_name) + 1; 2826cd892126SAlex Elder 2827cd892126SAlex Elder return snap_name; 2828cd892126SAlex Elder } 2829cd892126SAlex Elder 2830dfc5606dSYehuda Sadeh /* 28319d475de5SAlex Elder * Get the size and object order for an image snapshot, or if 28329d475de5SAlex Elder * snap_id is CEPH_NOSNAP, gets this information for the base 28339d475de5SAlex Elder * image. 28349d475de5SAlex Elder */ 28359d475de5SAlex Elder static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, 28369d475de5SAlex Elder u8 *order, u64 *snap_size) 28379d475de5SAlex Elder { 28389d475de5SAlex Elder __le64 snapid = cpu_to_le64(snap_id); 28399d475de5SAlex Elder int ret; 28409d475de5SAlex Elder struct { 28419d475de5SAlex Elder u8 order; 28429d475de5SAlex Elder __le64 size; 28439d475de5SAlex Elder } __attribute__ ((packed)) size_buf = { 0 }; 28449d475de5SAlex Elder 284536be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 28469d475de5SAlex Elder "rbd", "get_size", 28479d475de5SAlex Elder (char *) &snapid, sizeof (snapid), 284807b2391fSAlex Elder (char *) &size_buf, sizeof (size_buf), NULL); 284936be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 28509d475de5SAlex Elder if (ret < 0) 28519d475de5SAlex Elder return ret; 28529d475de5SAlex Elder 28539d475de5SAlex Elder *order = size_buf.order; 28549d475de5SAlex Elder *snap_size = le64_to_cpu(size_buf.size); 28559d475de5SAlex Elder 28569d475de5SAlex Elder dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n", 28579d475de5SAlex Elder (unsigned long long) snap_id, (unsigned int) *order, 28589d475de5SAlex Elder (unsigned long long) *snap_size); 28599d475de5SAlex Elder 28609d475de5SAlex Elder return 0; 28619d475de5SAlex Elder } 28629d475de5SAlex Elder 28639d475de5SAlex Elder static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) 28649d475de5SAlex Elder { 28659d475de5SAlex Elder return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, 28669d475de5SAlex Elder &rbd_dev->header.obj_order, 28679d475de5SAlex Elder &rbd_dev->header.image_size); 28689d475de5SAlex Elder } 28699d475de5SAlex Elder 28701e130199SAlex Elder static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) 28711e130199SAlex Elder { 28721e130199SAlex Elder void *reply_buf; 28731e130199SAlex Elder int ret; 28741e130199SAlex Elder void *p; 28751e130199SAlex Elder 28761e130199SAlex Elder reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL); 28771e130199SAlex Elder if (!reply_buf) 28781e130199SAlex Elder return -ENOMEM; 28791e130199SAlex Elder 288036be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 28811e130199SAlex Elder "rbd", "get_object_prefix", 28821e130199SAlex Elder NULL, 0, 288307b2391fSAlex Elder reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); 288436be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 28851e130199SAlex Elder if (ret < 0) 28861e130199SAlex Elder goto out; 28871e130199SAlex Elder 28881e130199SAlex Elder p = reply_buf; 28891e130199SAlex Elder rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, 28901e130199SAlex Elder p + RBD_OBJ_PREFIX_LEN_MAX, 28911e130199SAlex Elder NULL, GFP_NOIO); 28921e130199SAlex Elder 28931e130199SAlex Elder if (IS_ERR(rbd_dev->header.object_prefix)) { 28941e130199SAlex Elder ret = PTR_ERR(rbd_dev->header.object_prefix); 28951e130199SAlex Elder rbd_dev->header.object_prefix = NULL; 28961e130199SAlex Elder } else { 28971e130199SAlex Elder dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); 28981e130199SAlex Elder } 28991e130199SAlex Elder 29001e130199SAlex Elder out: 29011e130199SAlex Elder kfree(reply_buf); 29021e130199SAlex Elder 29031e130199SAlex Elder return ret; 29041e130199SAlex Elder } 29051e130199SAlex Elder 2906b1b5402aSAlex Elder static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, 2907b1b5402aSAlex Elder u64 *snap_features) 2908b1b5402aSAlex Elder { 2909b1b5402aSAlex Elder __le64 snapid = cpu_to_le64(snap_id); 2910b1b5402aSAlex Elder struct { 2911b1b5402aSAlex Elder __le64 features; 2912b1b5402aSAlex Elder __le64 incompat; 2913b1b5402aSAlex Elder } features_buf = { 0 }; 2914d889140cSAlex Elder u64 incompat; 2915b1b5402aSAlex Elder int ret; 2916b1b5402aSAlex Elder 291736be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 2918b1b5402aSAlex Elder "rbd", "get_features", 2919b1b5402aSAlex Elder (char *) &snapid, sizeof (snapid), 2920b1b5402aSAlex Elder (char *) &features_buf, sizeof (features_buf), 292107b2391fSAlex Elder NULL); 292236be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 2923b1b5402aSAlex Elder if (ret < 0) 2924b1b5402aSAlex Elder return ret; 2925d889140cSAlex Elder 2926d889140cSAlex Elder incompat = le64_to_cpu(features_buf.incompat); 2927d889140cSAlex Elder if (incompat & ~RBD_FEATURES_ALL) 2928b8f5c6edSAlex Elder return -ENXIO; 2929d889140cSAlex Elder 2930b1b5402aSAlex Elder *snap_features = le64_to_cpu(features_buf.features); 2931b1b5402aSAlex Elder 2932b1b5402aSAlex Elder dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", 2933b1b5402aSAlex Elder (unsigned long long) snap_id, 2934b1b5402aSAlex Elder (unsigned long long) *snap_features, 2935b1b5402aSAlex Elder (unsigned long long) le64_to_cpu(features_buf.incompat)); 2936b1b5402aSAlex Elder 2937b1b5402aSAlex Elder return 0; 2938b1b5402aSAlex Elder } 2939b1b5402aSAlex Elder 2940b1b5402aSAlex Elder static int rbd_dev_v2_features(struct rbd_device *rbd_dev) 2941b1b5402aSAlex Elder { 2942b1b5402aSAlex Elder return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, 2943b1b5402aSAlex Elder &rbd_dev->header.features); 2944b1b5402aSAlex Elder } 2945b1b5402aSAlex Elder 294686b00e0dSAlex Elder static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) 294786b00e0dSAlex Elder { 294886b00e0dSAlex Elder struct rbd_spec *parent_spec; 294986b00e0dSAlex Elder size_t size; 295086b00e0dSAlex Elder void *reply_buf = NULL; 295186b00e0dSAlex Elder __le64 snapid; 295286b00e0dSAlex Elder void *p; 295386b00e0dSAlex Elder void *end; 295486b00e0dSAlex Elder char *image_id; 295586b00e0dSAlex Elder u64 overlap; 295686b00e0dSAlex Elder int ret; 295786b00e0dSAlex Elder 295886b00e0dSAlex Elder parent_spec = rbd_spec_alloc(); 295986b00e0dSAlex Elder if (!parent_spec) 296086b00e0dSAlex Elder return -ENOMEM; 296186b00e0dSAlex Elder 296286b00e0dSAlex Elder size = sizeof (__le64) + /* pool_id */ 296386b00e0dSAlex Elder sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */ 296486b00e0dSAlex Elder sizeof (__le64) + /* snap_id */ 296586b00e0dSAlex Elder sizeof (__le64); /* overlap */ 296686b00e0dSAlex Elder reply_buf = kmalloc(size, GFP_KERNEL); 296786b00e0dSAlex Elder if (!reply_buf) { 296886b00e0dSAlex Elder ret = -ENOMEM; 296986b00e0dSAlex Elder goto out_err; 297086b00e0dSAlex Elder } 297186b00e0dSAlex Elder 297286b00e0dSAlex Elder snapid = cpu_to_le64(CEPH_NOSNAP); 297336be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 297486b00e0dSAlex Elder "rbd", "get_parent", 297586b00e0dSAlex Elder (char *) &snapid, sizeof (snapid), 297607b2391fSAlex Elder (char *) reply_buf, size, NULL); 297736be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 297886b00e0dSAlex Elder if (ret < 0) 297986b00e0dSAlex Elder goto out_err; 298086b00e0dSAlex Elder 298186b00e0dSAlex Elder ret = -ERANGE; 298286b00e0dSAlex Elder p = reply_buf; 298386b00e0dSAlex Elder end = (char *) reply_buf + size; 298486b00e0dSAlex Elder ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); 298586b00e0dSAlex Elder if (parent_spec->pool_id == CEPH_NOPOOL) 298686b00e0dSAlex Elder goto out; /* No parent? No problem. */ 298786b00e0dSAlex Elder 29880903e875SAlex Elder /* The ceph file layout needs to fit pool id in 32 bits */ 29890903e875SAlex Elder 29900903e875SAlex Elder ret = -EIO; 29910903e875SAlex Elder if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) 29920903e875SAlex Elder goto out; 29930903e875SAlex Elder 2994979ed480SAlex Elder image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 299586b00e0dSAlex Elder if (IS_ERR(image_id)) { 299686b00e0dSAlex Elder ret = PTR_ERR(image_id); 299786b00e0dSAlex Elder goto out_err; 299886b00e0dSAlex Elder } 299986b00e0dSAlex Elder parent_spec->image_id = image_id; 300086b00e0dSAlex Elder ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); 300186b00e0dSAlex Elder ceph_decode_64_safe(&p, end, overlap, out_err); 300286b00e0dSAlex Elder 300386b00e0dSAlex Elder rbd_dev->parent_overlap = overlap; 300486b00e0dSAlex Elder rbd_dev->parent_spec = parent_spec; 300586b00e0dSAlex Elder parent_spec = NULL; /* rbd_dev now owns this */ 300686b00e0dSAlex Elder out: 300786b00e0dSAlex Elder ret = 0; 300886b00e0dSAlex Elder out_err: 300986b00e0dSAlex Elder kfree(reply_buf); 301086b00e0dSAlex Elder rbd_spec_put(parent_spec); 301186b00e0dSAlex Elder 301286b00e0dSAlex Elder return ret; 301386b00e0dSAlex Elder } 301486b00e0dSAlex Elder 30159e15b77dSAlex Elder static char *rbd_dev_image_name(struct rbd_device *rbd_dev) 30169e15b77dSAlex Elder { 30179e15b77dSAlex Elder size_t image_id_size; 30189e15b77dSAlex Elder char *image_id; 30199e15b77dSAlex Elder void *p; 30209e15b77dSAlex Elder void *end; 30219e15b77dSAlex Elder size_t size; 30229e15b77dSAlex Elder void *reply_buf = NULL; 30239e15b77dSAlex Elder size_t len = 0; 30249e15b77dSAlex Elder char *image_name = NULL; 30259e15b77dSAlex Elder int ret; 30269e15b77dSAlex Elder 30279e15b77dSAlex Elder rbd_assert(!rbd_dev->spec->image_name); 30289e15b77dSAlex Elder 302969e7a02fSAlex Elder len = strlen(rbd_dev->spec->image_id); 303069e7a02fSAlex Elder image_id_size = sizeof (__le32) + len; 30319e15b77dSAlex Elder image_id = kmalloc(image_id_size, GFP_KERNEL); 30329e15b77dSAlex Elder if (!image_id) 30339e15b77dSAlex Elder return NULL; 30349e15b77dSAlex Elder 30359e15b77dSAlex Elder p = image_id; 30369e15b77dSAlex Elder end = (char *) image_id + image_id_size; 303769e7a02fSAlex Elder ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); 30389e15b77dSAlex Elder 30399e15b77dSAlex Elder size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; 30409e15b77dSAlex Elder reply_buf = kmalloc(size, GFP_KERNEL); 30419e15b77dSAlex Elder if (!reply_buf) 30429e15b77dSAlex Elder goto out; 30439e15b77dSAlex Elder 304436be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, 30459e15b77dSAlex Elder "rbd", "dir_get_name", 30469e15b77dSAlex Elder image_id, image_id_size, 304707b2391fSAlex Elder (char *) reply_buf, size, NULL); 30489e15b77dSAlex Elder if (ret < 0) 30499e15b77dSAlex Elder goto out; 30509e15b77dSAlex Elder p = reply_buf; 30519e15b77dSAlex Elder end = (char *) reply_buf + size; 30529e15b77dSAlex Elder image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); 30539e15b77dSAlex Elder if (IS_ERR(image_name)) 30549e15b77dSAlex Elder image_name = NULL; 30559e15b77dSAlex Elder else 30569e15b77dSAlex Elder dout("%s: name is %s len is %zd\n", __func__, image_name, len); 30579e15b77dSAlex Elder out: 30589e15b77dSAlex Elder kfree(reply_buf); 30599e15b77dSAlex Elder kfree(image_id); 30609e15b77dSAlex Elder 30619e15b77dSAlex Elder return image_name; 30629e15b77dSAlex Elder } 30639e15b77dSAlex Elder 30649e15b77dSAlex Elder /* 30659e15b77dSAlex Elder * When a parent image gets probed, we only have the pool, image, 30669e15b77dSAlex Elder * and snapshot ids but not the names of any of them. This call 30679e15b77dSAlex Elder * is made later to fill in those names. It has to be done after 30689e15b77dSAlex Elder * rbd_dev_snaps_update() has completed because some of the 30699e15b77dSAlex Elder * information (in particular, snapshot name) is not available 30709e15b77dSAlex Elder * until then. 30719e15b77dSAlex Elder */ 30729e15b77dSAlex Elder static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) 30739e15b77dSAlex Elder { 30749e15b77dSAlex Elder struct ceph_osd_client *osdc; 30759e15b77dSAlex Elder const char *name; 30769e15b77dSAlex Elder void *reply_buf = NULL; 30779e15b77dSAlex Elder int ret; 30789e15b77dSAlex Elder 30799e15b77dSAlex Elder if (rbd_dev->spec->pool_name) 30809e15b77dSAlex Elder return 0; /* Already have the names */ 30819e15b77dSAlex Elder 30829e15b77dSAlex Elder /* Look up the pool name */ 30839e15b77dSAlex Elder 30849e15b77dSAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 30859e15b77dSAlex Elder name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); 3086935dc89fSAlex Elder if (!name) { 3087935dc89fSAlex Elder rbd_warn(rbd_dev, "there is no pool with id %llu", 3088935dc89fSAlex Elder rbd_dev->spec->pool_id); /* Really a BUG() */ 3089935dc89fSAlex Elder return -EIO; 3090935dc89fSAlex Elder } 30919e15b77dSAlex Elder 30929e15b77dSAlex Elder rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); 30939e15b77dSAlex Elder if (!rbd_dev->spec->pool_name) 30949e15b77dSAlex Elder return -ENOMEM; 30959e15b77dSAlex Elder 30969e15b77dSAlex Elder /* Fetch the image name; tolerate failure here */ 30979e15b77dSAlex Elder 30989e15b77dSAlex Elder name = rbd_dev_image_name(rbd_dev); 309969e7a02fSAlex Elder if (name) 31009e15b77dSAlex Elder rbd_dev->spec->image_name = (char *) name; 310169e7a02fSAlex Elder else 310206ecc6cbSAlex Elder rbd_warn(rbd_dev, "unable to get image name"); 31039e15b77dSAlex Elder 31049e15b77dSAlex Elder /* Look up the snapshot name. */ 31059e15b77dSAlex Elder 31069e15b77dSAlex Elder name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); 31079e15b77dSAlex Elder if (!name) { 3108935dc89fSAlex Elder rbd_warn(rbd_dev, "no snapshot with id %llu", 3109935dc89fSAlex Elder rbd_dev->spec->snap_id); /* Really a BUG() */ 31109e15b77dSAlex Elder ret = -EIO; 31119e15b77dSAlex Elder goto out_err; 31129e15b77dSAlex Elder } 31139e15b77dSAlex Elder rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL); 31149e15b77dSAlex Elder if(!rbd_dev->spec->snap_name) 31159e15b77dSAlex Elder goto out_err; 31169e15b77dSAlex Elder 31179e15b77dSAlex Elder return 0; 31189e15b77dSAlex Elder out_err: 31199e15b77dSAlex Elder kfree(reply_buf); 31209e15b77dSAlex Elder kfree(rbd_dev->spec->pool_name); 31219e15b77dSAlex Elder rbd_dev->spec->pool_name = NULL; 31229e15b77dSAlex Elder 31239e15b77dSAlex Elder return ret; 31249e15b77dSAlex Elder } 31259e15b77dSAlex Elder 31266e14b1a6SAlex Elder static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) 312735d489f9SAlex Elder { 312835d489f9SAlex Elder size_t size; 312935d489f9SAlex Elder int ret; 313035d489f9SAlex Elder void *reply_buf; 313135d489f9SAlex Elder void *p; 313235d489f9SAlex Elder void *end; 313335d489f9SAlex Elder u64 seq; 313435d489f9SAlex Elder u32 snap_count; 313535d489f9SAlex Elder struct ceph_snap_context *snapc; 313635d489f9SAlex Elder u32 i; 313735d489f9SAlex Elder 313835d489f9SAlex Elder /* 313935d489f9SAlex Elder * We'll need room for the seq value (maximum snapshot id), 314035d489f9SAlex Elder * snapshot count, and array of that many snapshot ids. 314135d489f9SAlex Elder * For now we have a fixed upper limit on the number we're 314235d489f9SAlex Elder * prepared to receive. 314335d489f9SAlex Elder */ 314435d489f9SAlex Elder size = sizeof (__le64) + sizeof (__le32) + 314535d489f9SAlex Elder RBD_MAX_SNAP_COUNT * sizeof (__le64); 314635d489f9SAlex Elder reply_buf = kzalloc(size, GFP_KERNEL); 314735d489f9SAlex Elder if (!reply_buf) 314835d489f9SAlex Elder return -ENOMEM; 314935d489f9SAlex Elder 315036be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 315135d489f9SAlex Elder "rbd", "get_snapcontext", 315235d489f9SAlex Elder NULL, 0, 315307b2391fSAlex Elder reply_buf, size, ver); 315436be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 315535d489f9SAlex Elder if (ret < 0) 315635d489f9SAlex Elder goto out; 315735d489f9SAlex Elder 315835d489f9SAlex Elder ret = -ERANGE; 315935d489f9SAlex Elder p = reply_buf; 316035d489f9SAlex Elder end = (char *) reply_buf + size; 316135d489f9SAlex Elder ceph_decode_64_safe(&p, end, seq, out); 316235d489f9SAlex Elder ceph_decode_32_safe(&p, end, snap_count, out); 316335d489f9SAlex Elder 316435d489f9SAlex Elder /* 316535d489f9SAlex Elder * Make sure the reported number of snapshot ids wouldn't go 316635d489f9SAlex Elder * beyond the end of our buffer. But before checking that, 316735d489f9SAlex Elder * make sure the computed size of the snapshot context we 316835d489f9SAlex Elder * allocate is representable in a size_t. 316935d489f9SAlex Elder */ 317035d489f9SAlex Elder if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context)) 317135d489f9SAlex Elder / sizeof (u64)) { 317235d489f9SAlex Elder ret = -EINVAL; 317335d489f9SAlex Elder goto out; 317435d489f9SAlex Elder } 317535d489f9SAlex Elder if (!ceph_has_room(&p, end, snap_count * sizeof (__le64))) 317635d489f9SAlex Elder goto out; 317735d489f9SAlex Elder 317835d489f9SAlex Elder size = sizeof (struct ceph_snap_context) + 317935d489f9SAlex Elder snap_count * sizeof (snapc->snaps[0]); 318035d489f9SAlex Elder snapc = kmalloc(size, GFP_KERNEL); 318135d489f9SAlex Elder if (!snapc) { 318235d489f9SAlex Elder ret = -ENOMEM; 318335d489f9SAlex Elder goto out; 318435d489f9SAlex Elder } 318535d489f9SAlex Elder 318635d489f9SAlex Elder atomic_set(&snapc->nref, 1); 318735d489f9SAlex Elder snapc->seq = seq; 318835d489f9SAlex Elder snapc->num_snaps = snap_count; 318935d489f9SAlex Elder for (i = 0; i < snap_count; i++) 319035d489f9SAlex Elder snapc->snaps[i] = ceph_decode_64(&p); 319135d489f9SAlex Elder 319235d489f9SAlex Elder rbd_dev->header.snapc = snapc; 319335d489f9SAlex Elder 319435d489f9SAlex Elder dout(" snap context seq = %llu, snap_count = %u\n", 319535d489f9SAlex Elder (unsigned long long) seq, (unsigned int) snap_count); 319635d489f9SAlex Elder 319735d489f9SAlex Elder out: 319835d489f9SAlex Elder kfree(reply_buf); 319935d489f9SAlex Elder 320035d489f9SAlex Elder return 0; 320135d489f9SAlex Elder } 320235d489f9SAlex Elder 3203b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) 3204b8b1e2dbSAlex Elder { 3205b8b1e2dbSAlex Elder size_t size; 3206b8b1e2dbSAlex Elder void *reply_buf; 3207b8b1e2dbSAlex Elder __le64 snap_id; 3208b8b1e2dbSAlex Elder int ret; 3209b8b1e2dbSAlex Elder void *p; 3210b8b1e2dbSAlex Elder void *end; 3211b8b1e2dbSAlex Elder char *snap_name; 3212b8b1e2dbSAlex Elder 3213b8b1e2dbSAlex Elder size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; 3214b8b1e2dbSAlex Elder reply_buf = kmalloc(size, GFP_KERNEL); 3215b8b1e2dbSAlex Elder if (!reply_buf) 3216b8b1e2dbSAlex Elder return ERR_PTR(-ENOMEM); 3217b8b1e2dbSAlex Elder 3218b8b1e2dbSAlex Elder snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); 321936be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 3220b8b1e2dbSAlex Elder "rbd", "get_snapshot_name", 3221b8b1e2dbSAlex Elder (char *) &snap_id, sizeof (snap_id), 322207b2391fSAlex Elder reply_buf, size, NULL); 322336be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 3224b8b1e2dbSAlex Elder if (ret < 0) 3225b8b1e2dbSAlex Elder goto out; 3226b8b1e2dbSAlex Elder 3227b8b1e2dbSAlex Elder p = reply_buf; 3228b8b1e2dbSAlex Elder end = (char *) reply_buf + size; 3229e5c35534SAlex Elder snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 3230b8b1e2dbSAlex Elder if (IS_ERR(snap_name)) { 3231b8b1e2dbSAlex Elder ret = PTR_ERR(snap_name); 3232b8b1e2dbSAlex Elder goto out; 3233b8b1e2dbSAlex Elder } else { 3234b8b1e2dbSAlex Elder dout(" snap_id 0x%016llx snap_name = %s\n", 3235b8b1e2dbSAlex Elder (unsigned long long) le64_to_cpu(snap_id), snap_name); 3236b8b1e2dbSAlex Elder } 3237b8b1e2dbSAlex Elder kfree(reply_buf); 3238b8b1e2dbSAlex Elder 3239b8b1e2dbSAlex Elder return snap_name; 3240b8b1e2dbSAlex Elder out: 3241b8b1e2dbSAlex Elder kfree(reply_buf); 3242b8b1e2dbSAlex Elder 3243b8b1e2dbSAlex Elder return ERR_PTR(ret); 3244b8b1e2dbSAlex Elder } 3245b8b1e2dbSAlex Elder 3246b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, 3247b8b1e2dbSAlex Elder u64 *snap_size, u64 *snap_features) 3248b8b1e2dbSAlex Elder { 3249e0b49868SAlex Elder u64 snap_id; 3250b8b1e2dbSAlex Elder u8 order; 3251b8b1e2dbSAlex Elder int ret; 3252b8b1e2dbSAlex Elder 3253b8b1e2dbSAlex Elder snap_id = rbd_dev->header.snapc->snaps[which]; 3254b8b1e2dbSAlex Elder ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size); 3255b8b1e2dbSAlex Elder if (ret) 3256b8b1e2dbSAlex Elder return ERR_PTR(ret); 3257b8b1e2dbSAlex Elder ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features); 3258b8b1e2dbSAlex Elder if (ret) 3259b8b1e2dbSAlex Elder return ERR_PTR(ret); 3260b8b1e2dbSAlex Elder 3261b8b1e2dbSAlex Elder return rbd_dev_v2_snap_name(rbd_dev, which); 3262b8b1e2dbSAlex Elder } 3263b8b1e2dbSAlex Elder 3264b8b1e2dbSAlex Elder static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, 3265b8b1e2dbSAlex Elder u64 *snap_size, u64 *snap_features) 3266b8b1e2dbSAlex Elder { 3267b8b1e2dbSAlex Elder if (rbd_dev->image_format == 1) 3268b8b1e2dbSAlex Elder return rbd_dev_v1_snap_info(rbd_dev, which, 3269b8b1e2dbSAlex Elder snap_size, snap_features); 3270b8b1e2dbSAlex Elder if (rbd_dev->image_format == 2) 3271b8b1e2dbSAlex Elder return rbd_dev_v2_snap_info(rbd_dev, which, 3272b8b1e2dbSAlex Elder snap_size, snap_features); 3273b8b1e2dbSAlex Elder return ERR_PTR(-EINVAL); 3274b8b1e2dbSAlex Elder } 3275b8b1e2dbSAlex Elder 3276117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) 3277117973fbSAlex Elder { 3278117973fbSAlex Elder int ret; 3279117973fbSAlex Elder __u8 obj_order; 3280117973fbSAlex Elder 3281117973fbSAlex Elder down_write(&rbd_dev->header_rwsem); 3282117973fbSAlex Elder 3283117973fbSAlex Elder /* Grab old order first, to see if it changes */ 3284117973fbSAlex Elder 3285117973fbSAlex Elder obj_order = rbd_dev->header.obj_order, 3286117973fbSAlex Elder ret = rbd_dev_v2_image_size(rbd_dev); 3287117973fbSAlex Elder if (ret) 3288117973fbSAlex Elder goto out; 3289117973fbSAlex Elder if (rbd_dev->header.obj_order != obj_order) { 3290117973fbSAlex Elder ret = -EIO; 3291117973fbSAlex Elder goto out; 3292117973fbSAlex Elder } 3293117973fbSAlex Elder rbd_update_mapping_size(rbd_dev); 3294117973fbSAlex Elder 3295117973fbSAlex Elder ret = rbd_dev_v2_snap_context(rbd_dev, hver); 3296117973fbSAlex Elder dout("rbd_dev_v2_snap_context returned %d\n", ret); 3297117973fbSAlex Elder if (ret) 3298117973fbSAlex Elder goto out; 3299117973fbSAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 3300117973fbSAlex Elder dout("rbd_dev_snaps_update returned %d\n", ret); 3301117973fbSAlex Elder if (ret) 3302117973fbSAlex Elder goto out; 3303117973fbSAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 3304117973fbSAlex Elder dout("rbd_dev_snaps_register returned %d\n", ret); 3305117973fbSAlex Elder out: 3306117973fbSAlex Elder up_write(&rbd_dev->header_rwsem); 3307117973fbSAlex Elder 3308117973fbSAlex Elder return ret; 3309117973fbSAlex Elder } 3310117973fbSAlex Elder 33119d475de5SAlex Elder /* 331235938150SAlex Elder * Scan the rbd device's current snapshot list and compare it to the 331335938150SAlex Elder * newly-received snapshot context. Remove any existing snapshots 331435938150SAlex Elder * not present in the new snapshot context. Add a new snapshot for 331535938150SAlex Elder * any snaphots in the snapshot context not in the current list. 331635938150SAlex Elder * And verify there are no changes to snapshots we already know 331735938150SAlex Elder * about. 331835938150SAlex Elder * 331935938150SAlex Elder * Assumes the snapshots in the snapshot context are sorted by 332035938150SAlex Elder * snapshot id, highest id first. (Snapshots in the rbd_dev's list 332135938150SAlex Elder * are also maintained in that order.) 3322dfc5606dSYehuda Sadeh */ 3323304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) 3324dfc5606dSYehuda Sadeh { 332535938150SAlex Elder struct ceph_snap_context *snapc = rbd_dev->header.snapc; 332635938150SAlex Elder const u32 snap_count = snapc->num_snaps; 332735938150SAlex Elder struct list_head *head = &rbd_dev->snaps; 332835938150SAlex Elder struct list_head *links = head->next; 332935938150SAlex Elder u32 index = 0; 3330dfc5606dSYehuda Sadeh 33319fcbb800SAlex Elder dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count); 333235938150SAlex Elder while (index < snap_count || links != head) { 333335938150SAlex Elder u64 snap_id; 333435938150SAlex Elder struct rbd_snap *snap; 3335cd892126SAlex Elder char *snap_name; 3336cd892126SAlex Elder u64 snap_size = 0; 3337cd892126SAlex Elder u64 snap_features = 0; 3338dfc5606dSYehuda Sadeh 333935938150SAlex Elder snap_id = index < snap_count ? snapc->snaps[index] 334035938150SAlex Elder : CEPH_NOSNAP; 334135938150SAlex Elder snap = links != head ? list_entry(links, struct rbd_snap, node) 334235938150SAlex Elder : NULL; 3343aafb230eSAlex Elder rbd_assert(!snap || snap->id != CEPH_NOSNAP); 3344dfc5606dSYehuda Sadeh 334535938150SAlex Elder if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { 334635938150SAlex Elder struct list_head *next = links->next; 3347dfc5606dSYehuda Sadeh 33486d292906SAlex Elder /* 33496d292906SAlex Elder * A previously-existing snapshot is not in 33506d292906SAlex Elder * the new snap context. 33516d292906SAlex Elder * 33526d292906SAlex Elder * If the now missing snapshot is the one the 33536d292906SAlex Elder * image is mapped to, clear its exists flag 33546d292906SAlex Elder * so we can avoid sending any more requests 33556d292906SAlex Elder * to it. 33566d292906SAlex Elder */ 33570d7dbfceSAlex Elder if (rbd_dev->spec->snap_id == snap->id) 33586d292906SAlex Elder clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 335941f38c2bSAlex Elder rbd_remove_snap_dev(snap); 33609fcbb800SAlex Elder dout("%ssnap id %llu has been removed\n", 33610d7dbfceSAlex Elder rbd_dev->spec->snap_id == snap->id ? 33620d7dbfceSAlex Elder "mapped " : "", 33639fcbb800SAlex Elder (unsigned long long) snap->id); 3364dfc5606dSYehuda Sadeh 336535938150SAlex Elder /* Done with this list entry; advance */ 336635938150SAlex Elder 336735938150SAlex Elder links = next; 336835938150SAlex Elder continue; 3369dfc5606dSYehuda Sadeh } 337035938150SAlex Elder 3371b8b1e2dbSAlex Elder snap_name = rbd_dev_snap_info(rbd_dev, index, 3372cd892126SAlex Elder &snap_size, &snap_features); 3373cd892126SAlex Elder if (IS_ERR(snap_name)) 3374cd892126SAlex Elder return PTR_ERR(snap_name); 3375cd892126SAlex Elder 33769fcbb800SAlex Elder dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count, 33779fcbb800SAlex Elder (unsigned long long) snap_id); 337835938150SAlex Elder if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) { 337935938150SAlex Elder struct rbd_snap *new_snap; 338035938150SAlex Elder 338135938150SAlex Elder /* We haven't seen this snapshot before */ 338235938150SAlex Elder 3383c8d18425SAlex Elder new_snap = __rbd_add_snap_dev(rbd_dev, snap_name, 3384cd892126SAlex Elder snap_id, snap_size, snap_features); 33859fcbb800SAlex Elder if (IS_ERR(new_snap)) { 33869fcbb800SAlex Elder int err = PTR_ERR(new_snap); 33879fcbb800SAlex Elder 33889fcbb800SAlex Elder dout(" failed to add dev, error %d\n", err); 33899fcbb800SAlex Elder 33909fcbb800SAlex Elder return err; 33919fcbb800SAlex Elder } 339235938150SAlex Elder 339335938150SAlex Elder /* New goes before existing, or at end of list */ 339435938150SAlex Elder 33959fcbb800SAlex Elder dout(" added dev%s\n", snap ? "" : " at end\n"); 339635938150SAlex Elder if (snap) 339735938150SAlex Elder list_add_tail(&new_snap->node, &snap->node); 339835938150SAlex Elder else 3399523f3258SAlex Elder list_add_tail(&new_snap->node, head); 340035938150SAlex Elder } else { 340135938150SAlex Elder /* Already have this one */ 340235938150SAlex Elder 34039fcbb800SAlex Elder dout(" already present\n"); 34049fcbb800SAlex Elder 3405cd892126SAlex Elder rbd_assert(snap->size == snap_size); 3406aafb230eSAlex Elder rbd_assert(!strcmp(snap->name, snap_name)); 3407cd892126SAlex Elder rbd_assert(snap->features == snap_features); 340835938150SAlex Elder 340935938150SAlex Elder /* Done with this list entry; advance */ 341035938150SAlex Elder 341135938150SAlex Elder links = links->next; 3412dfc5606dSYehuda Sadeh } 341335938150SAlex Elder 341435938150SAlex Elder /* Advance to the next entry in the snapshot context */ 341535938150SAlex Elder 341635938150SAlex Elder index++; 3417dfc5606dSYehuda Sadeh } 34189fcbb800SAlex Elder dout("%s: done\n", __func__); 3419dfc5606dSYehuda Sadeh 3420dfc5606dSYehuda Sadeh return 0; 3421dfc5606dSYehuda Sadeh } 3422dfc5606dSYehuda Sadeh 3423304f6808SAlex Elder /* 3424304f6808SAlex Elder * Scan the list of snapshots and register the devices for any that 3425304f6808SAlex Elder * have not already been registered. 3426304f6808SAlex Elder */ 3427304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) 3428304f6808SAlex Elder { 3429304f6808SAlex Elder struct rbd_snap *snap; 3430304f6808SAlex Elder int ret = 0; 3431304f6808SAlex Elder 343237206ee5SAlex Elder dout("%s:\n", __func__); 343386ff77bbSAlex Elder if (WARN_ON(!device_is_registered(&rbd_dev->dev))) 343486ff77bbSAlex Elder return -EIO; 3435304f6808SAlex Elder 3436304f6808SAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) { 3437304f6808SAlex Elder if (!rbd_snap_registered(snap)) { 3438304f6808SAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 3439304f6808SAlex Elder if (ret < 0) 3440304f6808SAlex Elder break; 3441304f6808SAlex Elder } 3442304f6808SAlex Elder } 3443304f6808SAlex Elder dout("%s: returning %d\n", __func__, ret); 3444304f6808SAlex Elder 3445304f6808SAlex Elder return ret; 3446304f6808SAlex Elder } 3447304f6808SAlex Elder 3448dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 3449dfc5606dSYehuda Sadeh { 3450dfc5606dSYehuda Sadeh struct device *dev; 3451cd789ab9SAlex Elder int ret; 3452dfc5606dSYehuda Sadeh 3453dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 3454dfc5606dSYehuda Sadeh 3455cd789ab9SAlex Elder dev = &rbd_dev->dev; 3456dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 3457dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 3458dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 3459dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 3460de71a297SAlex Elder dev_set_name(dev, "%d", rbd_dev->dev_id); 3461dfc5606dSYehuda Sadeh ret = device_register(dev); 3462dfc5606dSYehuda Sadeh 3463dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 3464cd789ab9SAlex Elder 3465dfc5606dSYehuda Sadeh return ret; 3466602adf40SYehuda Sadeh } 3467602adf40SYehuda Sadeh 3468dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 3469dfc5606dSYehuda Sadeh { 3470dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 3471dfc5606dSYehuda Sadeh } 3472dfc5606dSYehuda Sadeh 3473e2839308SAlex Elder static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); 34741ddbe94eSAlex Elder 34751ddbe94eSAlex Elder /* 3476499afd5bSAlex Elder * Get a unique rbd identifier for the given new rbd_dev, and add 3477499afd5bSAlex Elder * the rbd_dev to the global list. The minimum rbd id is 1. 34781ddbe94eSAlex Elder */ 3479e2839308SAlex Elder static void rbd_dev_id_get(struct rbd_device *rbd_dev) 3480b7f23c36SAlex Elder { 3481e2839308SAlex Elder rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max); 3482499afd5bSAlex Elder 3483499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 3484499afd5bSAlex Elder list_add_tail(&rbd_dev->node, &rbd_dev_list); 3485499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 3486e2839308SAlex Elder dout("rbd_dev %p given dev id %llu\n", rbd_dev, 3487e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 3488b7f23c36SAlex Elder } 3489b7f23c36SAlex Elder 34901ddbe94eSAlex Elder /* 3491499afd5bSAlex Elder * Remove an rbd_dev from the global list, and record that its 3492499afd5bSAlex Elder * identifier is no longer in use. 34931ddbe94eSAlex Elder */ 3494e2839308SAlex Elder static void rbd_dev_id_put(struct rbd_device *rbd_dev) 34951ddbe94eSAlex Elder { 3496d184f6bfSAlex Elder struct list_head *tmp; 3497de71a297SAlex Elder int rbd_id = rbd_dev->dev_id; 3498d184f6bfSAlex Elder int max_id; 3499d184f6bfSAlex Elder 3500aafb230eSAlex Elder rbd_assert(rbd_id > 0); 3501499afd5bSAlex Elder 3502e2839308SAlex Elder dout("rbd_dev %p released dev id %llu\n", rbd_dev, 3503e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 3504499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 3505499afd5bSAlex Elder list_del_init(&rbd_dev->node); 3506d184f6bfSAlex Elder 3507d184f6bfSAlex Elder /* 3508d184f6bfSAlex Elder * If the id being "put" is not the current maximum, there 3509d184f6bfSAlex Elder * is nothing special we need to do. 3510d184f6bfSAlex Elder */ 3511e2839308SAlex Elder if (rbd_id != atomic64_read(&rbd_dev_id_max)) { 3512d184f6bfSAlex Elder spin_unlock(&rbd_dev_list_lock); 3513d184f6bfSAlex Elder return; 3514d184f6bfSAlex Elder } 3515d184f6bfSAlex Elder 3516d184f6bfSAlex Elder /* 3517d184f6bfSAlex Elder * We need to update the current maximum id. Search the 3518d184f6bfSAlex Elder * list to find out what it is. We're more likely to find 3519d184f6bfSAlex Elder * the maximum at the end, so search the list backward. 3520d184f6bfSAlex Elder */ 3521d184f6bfSAlex Elder max_id = 0; 3522d184f6bfSAlex Elder list_for_each_prev(tmp, &rbd_dev_list) { 3523d184f6bfSAlex Elder struct rbd_device *rbd_dev; 3524d184f6bfSAlex Elder 3525d184f6bfSAlex Elder rbd_dev = list_entry(tmp, struct rbd_device, node); 3526b213e0b1SAlex Elder if (rbd_dev->dev_id > max_id) 3527b213e0b1SAlex Elder max_id = rbd_dev->dev_id; 3528d184f6bfSAlex Elder } 3529499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 35301ddbe94eSAlex Elder 35311ddbe94eSAlex Elder /* 3532e2839308SAlex Elder * The max id could have been updated by rbd_dev_id_get(), in 3533d184f6bfSAlex Elder * which case it now accurately reflects the new maximum. 3534d184f6bfSAlex Elder * Be careful not to overwrite the maximum value in that 3535d184f6bfSAlex Elder * case. 35361ddbe94eSAlex Elder */ 3537e2839308SAlex Elder atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id); 3538e2839308SAlex Elder dout(" max dev id has been reset\n"); 3539b7f23c36SAlex Elder } 3540b7f23c36SAlex Elder 3541a725f65eSAlex Elder /* 3542e28fff26SAlex Elder * Skips over white space at *buf, and updates *buf to point to the 3543e28fff26SAlex Elder * first found non-space character (if any). Returns the length of 3544593a9e7bSAlex Elder * the token (string of non-white space characters) found. Note 3545593a9e7bSAlex Elder * that *buf must be terminated with '\0'. 3546e28fff26SAlex Elder */ 3547e28fff26SAlex Elder static inline size_t next_token(const char **buf) 3548e28fff26SAlex Elder { 3549e28fff26SAlex Elder /* 3550e28fff26SAlex Elder * These are the characters that produce nonzero for 3551e28fff26SAlex Elder * isspace() in the "C" and "POSIX" locales. 3552e28fff26SAlex Elder */ 3553e28fff26SAlex Elder const char *spaces = " \f\n\r\t\v"; 3554e28fff26SAlex Elder 3555e28fff26SAlex Elder *buf += strspn(*buf, spaces); /* Find start of token */ 3556e28fff26SAlex Elder 3557e28fff26SAlex Elder return strcspn(*buf, spaces); /* Return token length */ 3558e28fff26SAlex Elder } 3559e28fff26SAlex Elder 3560e28fff26SAlex Elder /* 3561e28fff26SAlex Elder * Finds the next token in *buf, and if the provided token buffer is 3562e28fff26SAlex Elder * big enough, copies the found token into it. The result, if 3563593a9e7bSAlex Elder * copied, is guaranteed to be terminated with '\0'. Note that *buf 3564593a9e7bSAlex Elder * must be terminated with '\0' on entry. 3565e28fff26SAlex Elder * 3566e28fff26SAlex Elder * Returns the length of the token found (not including the '\0'). 3567e28fff26SAlex Elder * Return value will be 0 if no token is found, and it will be >= 3568e28fff26SAlex Elder * token_size if the token would not fit. 3569e28fff26SAlex Elder * 3570593a9e7bSAlex Elder * The *buf pointer will be updated to point beyond the end of the 3571e28fff26SAlex Elder * found token. Note that this occurs even if the token buffer is 3572e28fff26SAlex Elder * too small to hold it. 3573e28fff26SAlex Elder */ 3574e28fff26SAlex Elder static inline size_t copy_token(const char **buf, 3575e28fff26SAlex Elder char *token, 3576e28fff26SAlex Elder size_t token_size) 3577e28fff26SAlex Elder { 3578e28fff26SAlex Elder size_t len; 3579e28fff26SAlex Elder 3580e28fff26SAlex Elder len = next_token(buf); 3581e28fff26SAlex Elder if (len < token_size) { 3582e28fff26SAlex Elder memcpy(token, *buf, len); 3583e28fff26SAlex Elder *(token + len) = '\0'; 3584e28fff26SAlex Elder } 3585e28fff26SAlex Elder *buf += len; 3586e28fff26SAlex Elder 3587e28fff26SAlex Elder return len; 3588e28fff26SAlex Elder } 3589e28fff26SAlex Elder 3590e28fff26SAlex Elder /* 3591ea3352f4SAlex Elder * Finds the next token in *buf, dynamically allocates a buffer big 3592ea3352f4SAlex Elder * enough to hold a copy of it, and copies the token into the new 3593ea3352f4SAlex Elder * buffer. The copy is guaranteed to be terminated with '\0'. Note 3594ea3352f4SAlex Elder * that a duplicate buffer is created even for a zero-length token. 3595ea3352f4SAlex Elder * 3596ea3352f4SAlex Elder * Returns a pointer to the newly-allocated duplicate, or a null 3597ea3352f4SAlex Elder * pointer if memory for the duplicate was not available. If 3598ea3352f4SAlex Elder * the lenp argument is a non-null pointer, the length of the token 3599ea3352f4SAlex Elder * (not including the '\0') is returned in *lenp. 3600ea3352f4SAlex Elder * 3601ea3352f4SAlex Elder * If successful, the *buf pointer will be updated to point beyond 3602ea3352f4SAlex Elder * the end of the found token. 3603ea3352f4SAlex Elder * 3604ea3352f4SAlex Elder * Note: uses GFP_KERNEL for allocation. 3605ea3352f4SAlex Elder */ 3606ea3352f4SAlex Elder static inline char *dup_token(const char **buf, size_t *lenp) 3607ea3352f4SAlex Elder { 3608ea3352f4SAlex Elder char *dup; 3609ea3352f4SAlex Elder size_t len; 3610ea3352f4SAlex Elder 3611ea3352f4SAlex Elder len = next_token(buf); 36124caf35f9SAlex Elder dup = kmemdup(*buf, len + 1, GFP_KERNEL); 3613ea3352f4SAlex Elder if (!dup) 3614ea3352f4SAlex Elder return NULL; 3615ea3352f4SAlex Elder *(dup + len) = '\0'; 3616ea3352f4SAlex Elder *buf += len; 3617ea3352f4SAlex Elder 3618ea3352f4SAlex Elder if (lenp) 3619ea3352f4SAlex Elder *lenp = len; 3620ea3352f4SAlex Elder 3621ea3352f4SAlex Elder return dup; 3622ea3352f4SAlex Elder } 3623ea3352f4SAlex Elder 3624ea3352f4SAlex Elder /* 3625859c31dfSAlex Elder * Parse the options provided for an "rbd add" (i.e., rbd image 3626859c31dfSAlex Elder * mapping) request. These arrive via a write to /sys/bus/rbd/add, 3627859c31dfSAlex Elder * and the data written is passed here via a NUL-terminated buffer. 3628859c31dfSAlex Elder * Returns 0 if successful or an error code otherwise. 3629d22f76e7SAlex Elder * 3630859c31dfSAlex Elder * The information extracted from these options is recorded in 3631859c31dfSAlex Elder * the other parameters which return dynamically-allocated 3632859c31dfSAlex Elder * structures: 3633859c31dfSAlex Elder * ceph_opts 3634859c31dfSAlex Elder * The address of a pointer that will refer to a ceph options 3635859c31dfSAlex Elder * structure. Caller must release the returned pointer using 3636859c31dfSAlex Elder * ceph_destroy_options() when it is no longer needed. 3637859c31dfSAlex Elder * rbd_opts 3638859c31dfSAlex Elder * Address of an rbd options pointer. Fully initialized by 3639859c31dfSAlex Elder * this function; caller must release with kfree(). 3640859c31dfSAlex Elder * spec 3641859c31dfSAlex Elder * Address of an rbd image specification pointer. Fully 3642859c31dfSAlex Elder * initialized by this function based on parsed options. 3643859c31dfSAlex Elder * Caller must release with rbd_spec_put(). 3644859c31dfSAlex Elder * 3645859c31dfSAlex Elder * The options passed take this form: 3646859c31dfSAlex Elder * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>] 3647859c31dfSAlex Elder * where: 3648859c31dfSAlex Elder * <mon_addrs> 3649859c31dfSAlex Elder * A comma-separated list of one or more monitor addresses. 3650859c31dfSAlex Elder * A monitor address is an ip address, optionally followed 3651859c31dfSAlex Elder * by a port number (separated by a colon). 3652859c31dfSAlex Elder * I.e.: ip1[:port1][,ip2[:port2]...] 3653859c31dfSAlex Elder * <options> 3654859c31dfSAlex Elder * A comma-separated list of ceph and/or rbd options. 3655859c31dfSAlex Elder * <pool_name> 3656859c31dfSAlex Elder * The name of the rados pool containing the rbd image. 3657859c31dfSAlex Elder * <image_name> 3658859c31dfSAlex Elder * The name of the image in that pool to map. 3659859c31dfSAlex Elder * <snap_id> 3660859c31dfSAlex Elder * An optional snapshot id. If provided, the mapping will 3661859c31dfSAlex Elder * present data from the image at the time that snapshot was 3662859c31dfSAlex Elder * created. The image head is used if no snapshot id is 3663859c31dfSAlex Elder * provided. Snapshot mappings are always read-only. 3664a725f65eSAlex Elder */ 3665859c31dfSAlex Elder static int rbd_add_parse_args(const char *buf, 3666dc79b113SAlex Elder struct ceph_options **ceph_opts, 3667859c31dfSAlex Elder struct rbd_options **opts, 3668859c31dfSAlex Elder struct rbd_spec **rbd_spec) 3669a725f65eSAlex Elder { 3670e28fff26SAlex Elder size_t len; 3671859c31dfSAlex Elder char *options; 36720ddebc0cSAlex Elder const char *mon_addrs; 36730ddebc0cSAlex Elder size_t mon_addrs_size; 3674859c31dfSAlex Elder struct rbd_spec *spec = NULL; 36754e9afebaSAlex Elder struct rbd_options *rbd_opts = NULL; 3676859c31dfSAlex Elder struct ceph_options *copts; 3677dc79b113SAlex Elder int ret; 3678e28fff26SAlex Elder 3679e28fff26SAlex Elder /* The first four tokens are required */ 3680e28fff26SAlex Elder 36817ef3214aSAlex Elder len = next_token(&buf); 36824fb5d671SAlex Elder if (!len) { 36834fb5d671SAlex Elder rbd_warn(NULL, "no monitor address(es) provided"); 36844fb5d671SAlex Elder return -EINVAL; 36854fb5d671SAlex Elder } 36860ddebc0cSAlex Elder mon_addrs = buf; 3687f28e565aSAlex Elder mon_addrs_size = len + 1; 36887ef3214aSAlex Elder buf += len; 3689a725f65eSAlex Elder 3690dc79b113SAlex Elder ret = -EINVAL; 3691f28e565aSAlex Elder options = dup_token(&buf, NULL); 3692f28e565aSAlex Elder if (!options) 3693dc79b113SAlex Elder return -ENOMEM; 36944fb5d671SAlex Elder if (!*options) { 36954fb5d671SAlex Elder rbd_warn(NULL, "no options provided"); 36964fb5d671SAlex Elder goto out_err; 36974fb5d671SAlex Elder } 3698a725f65eSAlex Elder 3699859c31dfSAlex Elder spec = rbd_spec_alloc(); 3700859c31dfSAlex Elder if (!spec) 3701f28e565aSAlex Elder goto out_mem; 3702859c31dfSAlex Elder 3703859c31dfSAlex Elder spec->pool_name = dup_token(&buf, NULL); 3704859c31dfSAlex Elder if (!spec->pool_name) 3705859c31dfSAlex Elder goto out_mem; 37064fb5d671SAlex Elder if (!*spec->pool_name) { 37074fb5d671SAlex Elder rbd_warn(NULL, "no pool name provided"); 37084fb5d671SAlex Elder goto out_err; 37094fb5d671SAlex Elder } 3710e28fff26SAlex Elder 371169e7a02fSAlex Elder spec->image_name = dup_token(&buf, NULL); 3712859c31dfSAlex Elder if (!spec->image_name) 3713f28e565aSAlex Elder goto out_mem; 37144fb5d671SAlex Elder if (!*spec->image_name) { 37154fb5d671SAlex Elder rbd_warn(NULL, "no image name provided"); 37164fb5d671SAlex Elder goto out_err; 37174fb5d671SAlex Elder } 3718e28fff26SAlex Elder 3719f28e565aSAlex Elder /* 3720f28e565aSAlex Elder * Snapshot name is optional; default is to use "-" 3721f28e565aSAlex Elder * (indicating the head/no snapshot). 3722f28e565aSAlex Elder */ 37233feeb894SAlex Elder len = next_token(&buf); 3724820a5f3eSAlex Elder if (!len) { 37253feeb894SAlex Elder buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ 37263feeb894SAlex Elder len = sizeof (RBD_SNAP_HEAD_NAME) - 1; 3727f28e565aSAlex Elder } else if (len > RBD_MAX_SNAP_NAME_LEN) { 3728dc79b113SAlex Elder ret = -ENAMETOOLONG; 3729f28e565aSAlex Elder goto out_err; 3730849b4260SAlex Elder } 37314caf35f9SAlex Elder spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); 3732859c31dfSAlex Elder if (!spec->snap_name) 3733f28e565aSAlex Elder goto out_mem; 3734859c31dfSAlex Elder *(spec->snap_name + len) = '\0'; 3735e5c35534SAlex Elder 37360ddebc0cSAlex Elder /* Initialize all rbd options to the defaults */ 3737e28fff26SAlex Elder 37384e9afebaSAlex Elder rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL); 37394e9afebaSAlex Elder if (!rbd_opts) 37404e9afebaSAlex Elder goto out_mem; 37414e9afebaSAlex Elder 37424e9afebaSAlex Elder rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 3743d22f76e7SAlex Elder 3744859c31dfSAlex Elder copts = ceph_parse_options(options, mon_addrs, 37450ddebc0cSAlex Elder mon_addrs + mon_addrs_size - 1, 37464e9afebaSAlex Elder parse_rbd_opts_token, rbd_opts); 3747859c31dfSAlex Elder if (IS_ERR(copts)) { 3748859c31dfSAlex Elder ret = PTR_ERR(copts); 3749dc79b113SAlex Elder goto out_err; 3750dc79b113SAlex Elder } 3751859c31dfSAlex Elder kfree(options); 3752859c31dfSAlex Elder 3753859c31dfSAlex Elder *ceph_opts = copts; 37544e9afebaSAlex Elder *opts = rbd_opts; 3755859c31dfSAlex Elder *rbd_spec = spec; 37560ddebc0cSAlex Elder 3757dc79b113SAlex Elder return 0; 3758f28e565aSAlex Elder out_mem: 3759dc79b113SAlex Elder ret = -ENOMEM; 3760d22f76e7SAlex Elder out_err: 3761859c31dfSAlex Elder kfree(rbd_opts); 3762859c31dfSAlex Elder rbd_spec_put(spec); 3763f28e565aSAlex Elder kfree(options); 3764d22f76e7SAlex Elder 3765dc79b113SAlex Elder return ret; 3766a725f65eSAlex Elder } 3767a725f65eSAlex Elder 3768589d30e0SAlex Elder /* 3769589d30e0SAlex Elder * An rbd format 2 image has a unique identifier, distinct from the 3770589d30e0SAlex Elder * name given to it by the user. Internally, that identifier is 3771589d30e0SAlex Elder * what's used to specify the names of objects related to the image. 3772589d30e0SAlex Elder * 3773589d30e0SAlex Elder * A special "rbd id" object is used to map an rbd image name to its 3774589d30e0SAlex Elder * id. If that object doesn't exist, then there is no v2 rbd image 3775589d30e0SAlex Elder * with the supplied name. 3776589d30e0SAlex Elder * 3777589d30e0SAlex Elder * This function will record the given rbd_dev's image_id field if 3778589d30e0SAlex Elder * it can be determined, and in that case will return 0. If any 3779589d30e0SAlex Elder * errors occur a negative errno will be returned and the rbd_dev's 3780589d30e0SAlex Elder * image_id field will be unchanged (and should be NULL). 3781589d30e0SAlex Elder */ 3782589d30e0SAlex Elder static int rbd_dev_image_id(struct rbd_device *rbd_dev) 3783589d30e0SAlex Elder { 3784589d30e0SAlex Elder int ret; 3785589d30e0SAlex Elder size_t size; 3786589d30e0SAlex Elder char *object_name; 3787589d30e0SAlex Elder void *response; 3788589d30e0SAlex Elder void *p; 3789589d30e0SAlex Elder 3790589d30e0SAlex Elder /* 37912c0d0a10SAlex Elder * When probing a parent image, the image id is already 37922c0d0a10SAlex Elder * known (and the image name likely is not). There's no 37932c0d0a10SAlex Elder * need to fetch the image id again in this case. 37942c0d0a10SAlex Elder */ 37952c0d0a10SAlex Elder if (rbd_dev->spec->image_id) 37962c0d0a10SAlex Elder return 0; 37972c0d0a10SAlex Elder 37982c0d0a10SAlex Elder /* 3799589d30e0SAlex Elder * First, see if the format 2 image id file exists, and if 3800589d30e0SAlex Elder * so, get the image's persistent id from it. 3801589d30e0SAlex Elder */ 380269e7a02fSAlex Elder size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); 3803589d30e0SAlex Elder object_name = kmalloc(size, GFP_NOIO); 3804589d30e0SAlex Elder if (!object_name) 3805589d30e0SAlex Elder return -ENOMEM; 38060d7dbfceSAlex Elder sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name); 3807589d30e0SAlex Elder dout("rbd id object name is %s\n", object_name); 3808589d30e0SAlex Elder 3809589d30e0SAlex Elder /* Response will be an encoded string, which includes a length */ 3810589d30e0SAlex Elder 3811589d30e0SAlex Elder size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX; 3812589d30e0SAlex Elder response = kzalloc(size, GFP_NOIO); 3813589d30e0SAlex Elder if (!response) { 3814589d30e0SAlex Elder ret = -ENOMEM; 3815589d30e0SAlex Elder goto out; 3816589d30e0SAlex Elder } 3817589d30e0SAlex Elder 381836be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, object_name, 3819589d30e0SAlex Elder "rbd", "get_id", 3820589d30e0SAlex Elder NULL, 0, 382107b2391fSAlex Elder response, RBD_IMAGE_ID_LEN_MAX, NULL); 382236be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 3823589d30e0SAlex Elder if (ret < 0) 3824589d30e0SAlex Elder goto out; 3825589d30e0SAlex Elder 3826589d30e0SAlex Elder p = response; 38270d7dbfceSAlex Elder rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, 3828589d30e0SAlex Elder p + RBD_IMAGE_ID_LEN_MAX, 3829979ed480SAlex Elder NULL, GFP_NOIO); 38300d7dbfceSAlex Elder if (IS_ERR(rbd_dev->spec->image_id)) { 38310d7dbfceSAlex Elder ret = PTR_ERR(rbd_dev->spec->image_id); 38320d7dbfceSAlex Elder rbd_dev->spec->image_id = NULL; 3833589d30e0SAlex Elder } else { 38340d7dbfceSAlex Elder dout("image_id is %s\n", rbd_dev->spec->image_id); 3835589d30e0SAlex Elder } 3836589d30e0SAlex Elder out: 3837589d30e0SAlex Elder kfree(response); 3838589d30e0SAlex Elder kfree(object_name); 3839589d30e0SAlex Elder 3840589d30e0SAlex Elder return ret; 3841589d30e0SAlex Elder } 3842589d30e0SAlex Elder 3843a30b71b9SAlex Elder static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) 3844a30b71b9SAlex Elder { 3845a30b71b9SAlex Elder int ret; 3846a30b71b9SAlex Elder size_t size; 3847a30b71b9SAlex Elder 3848a30b71b9SAlex Elder /* Version 1 images have no id; empty string is used */ 3849a30b71b9SAlex Elder 38500d7dbfceSAlex Elder rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); 38510d7dbfceSAlex Elder if (!rbd_dev->spec->image_id) 3852a30b71b9SAlex Elder return -ENOMEM; 3853a30b71b9SAlex Elder 3854a30b71b9SAlex Elder /* Record the header object name for this rbd image. */ 3855a30b71b9SAlex Elder 385669e7a02fSAlex Elder size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); 3857a30b71b9SAlex Elder rbd_dev->header_name = kmalloc(size, GFP_KERNEL); 3858a30b71b9SAlex Elder if (!rbd_dev->header_name) { 3859a30b71b9SAlex Elder ret = -ENOMEM; 3860a30b71b9SAlex Elder goto out_err; 3861a30b71b9SAlex Elder } 38620d7dbfceSAlex Elder sprintf(rbd_dev->header_name, "%s%s", 38630d7dbfceSAlex Elder rbd_dev->spec->image_name, RBD_SUFFIX); 3864a30b71b9SAlex Elder 3865a30b71b9SAlex Elder /* Populate rbd image metadata */ 3866a30b71b9SAlex Elder 3867a30b71b9SAlex Elder ret = rbd_read_header(rbd_dev, &rbd_dev->header); 3868a30b71b9SAlex Elder if (ret < 0) 3869a30b71b9SAlex Elder goto out_err; 387086b00e0dSAlex Elder 387186b00e0dSAlex Elder /* Version 1 images have no parent (no layering) */ 387286b00e0dSAlex Elder 387386b00e0dSAlex Elder rbd_dev->parent_spec = NULL; 387486b00e0dSAlex Elder rbd_dev->parent_overlap = 0; 387586b00e0dSAlex Elder 3876a30b71b9SAlex Elder rbd_dev->image_format = 1; 3877a30b71b9SAlex Elder 3878a30b71b9SAlex Elder dout("discovered version 1 image, header name is %s\n", 3879a30b71b9SAlex Elder rbd_dev->header_name); 3880a30b71b9SAlex Elder 3881a30b71b9SAlex Elder return 0; 3882a30b71b9SAlex Elder 3883a30b71b9SAlex Elder out_err: 3884a30b71b9SAlex Elder kfree(rbd_dev->header_name); 3885a30b71b9SAlex Elder rbd_dev->header_name = NULL; 38860d7dbfceSAlex Elder kfree(rbd_dev->spec->image_id); 38870d7dbfceSAlex Elder rbd_dev->spec->image_id = NULL; 3888a30b71b9SAlex Elder 3889a30b71b9SAlex Elder return ret; 3890a30b71b9SAlex Elder } 3891a30b71b9SAlex Elder 3892a30b71b9SAlex Elder static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) 3893a30b71b9SAlex Elder { 3894a30b71b9SAlex Elder size_t size; 38959d475de5SAlex Elder int ret; 38966e14b1a6SAlex Elder u64 ver = 0; 3897a30b71b9SAlex Elder 3898a30b71b9SAlex Elder /* 3899a30b71b9SAlex Elder * Image id was filled in by the caller. Record the header 3900a30b71b9SAlex Elder * object name for this rbd image. 3901a30b71b9SAlex Elder */ 3902979ed480SAlex Elder size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); 3903a30b71b9SAlex Elder rbd_dev->header_name = kmalloc(size, GFP_KERNEL); 3904a30b71b9SAlex Elder if (!rbd_dev->header_name) 3905a30b71b9SAlex Elder return -ENOMEM; 3906a30b71b9SAlex Elder sprintf(rbd_dev->header_name, "%s%s", 39070d7dbfceSAlex Elder RBD_HEADER_PREFIX, rbd_dev->spec->image_id); 39089d475de5SAlex Elder 39099d475de5SAlex Elder /* Get the size and object order for the image */ 39109d475de5SAlex Elder 39119d475de5SAlex Elder ret = rbd_dev_v2_image_size(rbd_dev); 39129d475de5SAlex Elder if (ret < 0) 39139d475de5SAlex Elder goto out_err; 39141e130199SAlex Elder 39151e130199SAlex Elder /* Get the object prefix (a.k.a. block_name) for the image */ 39161e130199SAlex Elder 39171e130199SAlex Elder ret = rbd_dev_v2_object_prefix(rbd_dev); 39181e130199SAlex Elder if (ret < 0) 39191e130199SAlex Elder goto out_err; 3920b1b5402aSAlex Elder 3921d889140cSAlex Elder /* Get the and check features for the image */ 3922b1b5402aSAlex Elder 3923b1b5402aSAlex Elder ret = rbd_dev_v2_features(rbd_dev); 3924b1b5402aSAlex Elder if (ret < 0) 3925b1b5402aSAlex Elder goto out_err; 392635d489f9SAlex Elder 392786b00e0dSAlex Elder /* If the image supports layering, get the parent info */ 392886b00e0dSAlex Elder 392986b00e0dSAlex Elder if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { 393086b00e0dSAlex Elder ret = rbd_dev_v2_parent_info(rbd_dev); 393186b00e0dSAlex Elder if (ret < 0) 393286b00e0dSAlex Elder goto out_err; 393386b00e0dSAlex Elder } 393486b00e0dSAlex Elder 39356e14b1a6SAlex Elder /* crypto and compression type aren't (yet) supported for v2 images */ 393635d489f9SAlex Elder 39376e14b1a6SAlex Elder rbd_dev->header.crypt_type = 0; 39386e14b1a6SAlex Elder rbd_dev->header.comp_type = 0; 39396e14b1a6SAlex Elder 39406e14b1a6SAlex Elder /* Get the snapshot context, plus the header version */ 39416e14b1a6SAlex Elder 39426e14b1a6SAlex Elder ret = rbd_dev_v2_snap_context(rbd_dev, &ver); 394335d489f9SAlex Elder if (ret) 394435d489f9SAlex Elder goto out_err; 39456e14b1a6SAlex Elder rbd_dev->header.obj_version = ver; 39466e14b1a6SAlex Elder 3947a30b71b9SAlex Elder rbd_dev->image_format = 2; 3948a30b71b9SAlex Elder 3949a30b71b9SAlex Elder dout("discovered version 2 image, header name is %s\n", 3950a30b71b9SAlex Elder rbd_dev->header_name); 3951a30b71b9SAlex Elder 395235152979SAlex Elder return 0; 39539d475de5SAlex Elder out_err: 395486b00e0dSAlex Elder rbd_dev->parent_overlap = 0; 395586b00e0dSAlex Elder rbd_spec_put(rbd_dev->parent_spec); 395686b00e0dSAlex Elder rbd_dev->parent_spec = NULL; 39579d475de5SAlex Elder kfree(rbd_dev->header_name); 39589d475de5SAlex Elder rbd_dev->header_name = NULL; 39591e130199SAlex Elder kfree(rbd_dev->header.object_prefix); 39601e130199SAlex Elder rbd_dev->header.object_prefix = NULL; 39619d475de5SAlex Elder 39629d475de5SAlex Elder return ret; 3963a30b71b9SAlex Elder } 3964a30b71b9SAlex Elder 396583a06263SAlex Elder static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) 396683a06263SAlex Elder { 396783a06263SAlex Elder int ret; 396883a06263SAlex Elder 396983a06263SAlex Elder /* no need to lock here, as rbd_dev is not registered yet */ 397083a06263SAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 397183a06263SAlex Elder if (ret) 397283a06263SAlex Elder return ret; 397383a06263SAlex Elder 39749e15b77dSAlex Elder ret = rbd_dev_probe_update_spec(rbd_dev); 39759e15b77dSAlex Elder if (ret) 39769e15b77dSAlex Elder goto err_out_snaps; 39779e15b77dSAlex Elder 397883a06263SAlex Elder ret = rbd_dev_set_mapping(rbd_dev); 397983a06263SAlex Elder if (ret) 398083a06263SAlex Elder goto err_out_snaps; 398183a06263SAlex Elder 398283a06263SAlex Elder /* generate unique id: find highest unique id, add one */ 398383a06263SAlex Elder rbd_dev_id_get(rbd_dev); 398483a06263SAlex Elder 398583a06263SAlex Elder /* Fill in the device name, now that we have its id. */ 398683a06263SAlex Elder BUILD_BUG_ON(DEV_NAME_LEN 398783a06263SAlex Elder < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 398883a06263SAlex Elder sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 398983a06263SAlex Elder 399083a06263SAlex Elder /* Get our block major device number. */ 399183a06263SAlex Elder 399283a06263SAlex Elder ret = register_blkdev(0, rbd_dev->name); 399383a06263SAlex Elder if (ret < 0) 399483a06263SAlex Elder goto err_out_id; 399583a06263SAlex Elder rbd_dev->major = ret; 399683a06263SAlex Elder 399783a06263SAlex Elder /* Set up the blkdev mapping. */ 399883a06263SAlex Elder 399983a06263SAlex Elder ret = rbd_init_disk(rbd_dev); 400083a06263SAlex Elder if (ret) 400183a06263SAlex Elder goto err_out_blkdev; 400283a06263SAlex Elder 400383a06263SAlex Elder ret = rbd_bus_add_dev(rbd_dev); 400483a06263SAlex Elder if (ret) 400583a06263SAlex Elder goto err_out_disk; 400683a06263SAlex Elder 400783a06263SAlex Elder /* 400883a06263SAlex Elder * At this point cleanup in the event of an error is the job 400983a06263SAlex Elder * of the sysfs code (initiated by rbd_bus_del_dev()). 401083a06263SAlex Elder */ 401183a06263SAlex Elder down_write(&rbd_dev->header_rwsem); 401283a06263SAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 401383a06263SAlex Elder up_write(&rbd_dev->header_rwsem); 401483a06263SAlex Elder if (ret) 401583a06263SAlex Elder goto err_out_bus; 401683a06263SAlex Elder 40179969ebc5SAlex Elder ret = rbd_dev_header_watch_sync(rbd_dev, 1); 401883a06263SAlex Elder if (ret) 401983a06263SAlex Elder goto err_out_bus; 402083a06263SAlex Elder 402183a06263SAlex Elder /* Everything's ready. Announce the disk to the world. */ 402283a06263SAlex Elder 402383a06263SAlex Elder add_disk(rbd_dev->disk); 402483a06263SAlex Elder 402583a06263SAlex Elder pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, 402683a06263SAlex Elder (unsigned long long) rbd_dev->mapping.size); 402783a06263SAlex Elder 402883a06263SAlex Elder return ret; 402983a06263SAlex Elder err_out_bus: 403083a06263SAlex Elder /* this will also clean up rest of rbd_dev stuff */ 403183a06263SAlex Elder 403283a06263SAlex Elder rbd_bus_del_dev(rbd_dev); 403383a06263SAlex Elder 403483a06263SAlex Elder return ret; 403583a06263SAlex Elder err_out_disk: 403683a06263SAlex Elder rbd_free_disk(rbd_dev); 403783a06263SAlex Elder err_out_blkdev: 403883a06263SAlex Elder unregister_blkdev(rbd_dev->major, rbd_dev->name); 403983a06263SAlex Elder err_out_id: 404083a06263SAlex Elder rbd_dev_id_put(rbd_dev); 404183a06263SAlex Elder err_out_snaps: 404283a06263SAlex Elder rbd_remove_all_snaps(rbd_dev); 404383a06263SAlex Elder 404483a06263SAlex Elder return ret; 404583a06263SAlex Elder } 404683a06263SAlex Elder 4047a30b71b9SAlex Elder /* 4048a30b71b9SAlex Elder * Probe for the existence of the header object for the given rbd 4049a30b71b9SAlex Elder * device. For format 2 images this includes determining the image 4050a30b71b9SAlex Elder * id. 4051a30b71b9SAlex Elder */ 4052a30b71b9SAlex Elder static int rbd_dev_probe(struct rbd_device *rbd_dev) 4053a30b71b9SAlex Elder { 4054a30b71b9SAlex Elder int ret; 4055a30b71b9SAlex Elder 4056a30b71b9SAlex Elder /* 4057a30b71b9SAlex Elder * Get the id from the image id object. If it's not a 4058a30b71b9SAlex Elder * format 2 image, we'll get ENOENT back, and we'll assume 4059a30b71b9SAlex Elder * it's a format 1 image. 4060a30b71b9SAlex Elder */ 4061a30b71b9SAlex Elder ret = rbd_dev_image_id(rbd_dev); 4062a30b71b9SAlex Elder if (ret) 4063a30b71b9SAlex Elder ret = rbd_dev_v1_probe(rbd_dev); 4064a30b71b9SAlex Elder else 4065a30b71b9SAlex Elder ret = rbd_dev_v2_probe(rbd_dev); 406683a06263SAlex Elder if (ret) { 4067a30b71b9SAlex Elder dout("probe failed, returning %d\n", ret); 4068a30b71b9SAlex Elder 4069a30b71b9SAlex Elder return ret; 4070a30b71b9SAlex Elder } 4071a30b71b9SAlex Elder 407283a06263SAlex Elder ret = rbd_dev_probe_finish(rbd_dev); 407383a06263SAlex Elder if (ret) 407483a06263SAlex Elder rbd_header_free(&rbd_dev->header); 407583a06263SAlex Elder 407683a06263SAlex Elder return ret; 407783a06263SAlex Elder } 407883a06263SAlex Elder 407959c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 408059c2be1eSYehuda Sadeh const char *buf, 408159c2be1eSYehuda Sadeh size_t count) 4082602adf40SYehuda Sadeh { 4083cb8627c7SAlex Elder struct rbd_device *rbd_dev = NULL; 4084dc79b113SAlex Elder struct ceph_options *ceph_opts = NULL; 40854e9afebaSAlex Elder struct rbd_options *rbd_opts = NULL; 4086859c31dfSAlex Elder struct rbd_spec *spec = NULL; 40879d3997fdSAlex Elder struct rbd_client *rbdc; 408827cc2594SAlex Elder struct ceph_osd_client *osdc; 408927cc2594SAlex Elder int rc = -ENOMEM; 4090602adf40SYehuda Sadeh 4091602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 4092602adf40SYehuda Sadeh return -ENODEV; 4093602adf40SYehuda Sadeh 4094a725f65eSAlex Elder /* parse add command */ 4095859c31dfSAlex Elder rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); 4096dc79b113SAlex Elder if (rc < 0) 4097bd4ba655SAlex Elder goto err_out_module; 4098a725f65eSAlex Elder 40999d3997fdSAlex Elder rbdc = rbd_get_client(ceph_opts); 41009d3997fdSAlex Elder if (IS_ERR(rbdc)) { 41019d3997fdSAlex Elder rc = PTR_ERR(rbdc); 41020ddebc0cSAlex Elder goto err_out_args; 41039d3997fdSAlex Elder } 4104c53d5893SAlex Elder ceph_opts = NULL; /* rbd_dev client now owns this */ 4105602adf40SYehuda Sadeh 4106602adf40SYehuda Sadeh /* pick the pool */ 41079d3997fdSAlex Elder osdc = &rbdc->client->osdc; 4108859c31dfSAlex Elder rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); 4109602adf40SYehuda Sadeh if (rc < 0) 4110602adf40SYehuda Sadeh goto err_out_client; 4111859c31dfSAlex Elder spec->pool_id = (u64) rc; 4112859c31dfSAlex Elder 41130903e875SAlex Elder /* The ceph file layout needs to fit pool id in 32 bits */ 41140903e875SAlex Elder 41150903e875SAlex Elder if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { 41160903e875SAlex Elder rc = -EIO; 41170903e875SAlex Elder goto err_out_client; 41180903e875SAlex Elder } 41190903e875SAlex Elder 4120c53d5893SAlex Elder rbd_dev = rbd_dev_create(rbdc, spec); 4121bd4ba655SAlex Elder if (!rbd_dev) 4122bd4ba655SAlex Elder goto err_out_client; 4123c53d5893SAlex Elder rbdc = NULL; /* rbd_dev now owns this */ 4124c53d5893SAlex Elder spec = NULL; /* rbd_dev now owns this */ 4125602adf40SYehuda Sadeh 4126bd4ba655SAlex Elder rbd_dev->mapping.read_only = rbd_opts->read_only; 4127c53d5893SAlex Elder kfree(rbd_opts); 4128c53d5893SAlex Elder rbd_opts = NULL; /* done with this */ 4129bd4ba655SAlex Elder 4130a30b71b9SAlex Elder rc = rbd_dev_probe(rbd_dev); 4131a30b71b9SAlex Elder if (rc < 0) 4132c53d5893SAlex Elder goto err_out_rbd_dev; 413305fd6f6fSAlex Elder 4134602adf40SYehuda Sadeh return count; 4135c53d5893SAlex Elder err_out_rbd_dev: 4136c53d5893SAlex Elder rbd_dev_destroy(rbd_dev); 4137bd4ba655SAlex Elder err_out_client: 41389d3997fdSAlex Elder rbd_put_client(rbdc); 41390ddebc0cSAlex Elder err_out_args: 414078cea76eSAlex Elder if (ceph_opts) 414178cea76eSAlex Elder ceph_destroy_options(ceph_opts); 41424e9afebaSAlex Elder kfree(rbd_opts); 4143859c31dfSAlex Elder rbd_spec_put(spec); 4144bd4ba655SAlex Elder err_out_module: 4145bd4ba655SAlex Elder module_put(THIS_MODULE); 414627cc2594SAlex Elder 4147602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 414827cc2594SAlex Elder 414927cc2594SAlex Elder return (ssize_t) rc; 4150602adf40SYehuda Sadeh } 4151602adf40SYehuda Sadeh 4152de71a297SAlex Elder static struct rbd_device *__rbd_get_dev(unsigned long dev_id) 4153602adf40SYehuda Sadeh { 4154602adf40SYehuda Sadeh struct list_head *tmp; 4155602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 4156602adf40SYehuda Sadeh 4157e124a82fSAlex Elder spin_lock(&rbd_dev_list_lock); 4158602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 4159602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 4160de71a297SAlex Elder if (rbd_dev->dev_id == dev_id) { 4161e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 4162602adf40SYehuda Sadeh return rbd_dev; 4163602adf40SYehuda Sadeh } 4164e124a82fSAlex Elder } 4165e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 4166602adf40SYehuda Sadeh return NULL; 4167602adf40SYehuda Sadeh } 4168602adf40SYehuda Sadeh 4169dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 4170602adf40SYehuda Sadeh { 4171593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 4172602adf40SYehuda Sadeh 417359c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 41749969ebc5SAlex Elder rbd_dev_header_watch_sync(rbd_dev, 0); 4175602adf40SYehuda Sadeh 4176602adf40SYehuda Sadeh /* clean up and free blkdev */ 4177602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 4178602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 417932eec68dSAlex Elder 41802ac4e75dSAlex Elder /* release allocated disk header fields */ 41812ac4e75dSAlex Elder rbd_header_free(&rbd_dev->header); 41822ac4e75dSAlex Elder 418332eec68dSAlex Elder /* done with the id, and with the rbd_dev */ 4184e2839308SAlex Elder rbd_dev_id_put(rbd_dev); 4185c53d5893SAlex Elder rbd_assert(rbd_dev->rbd_client != NULL); 4186c53d5893SAlex Elder rbd_dev_destroy(rbd_dev); 4187602adf40SYehuda Sadeh 4188602adf40SYehuda Sadeh /* release module ref */ 4189602adf40SYehuda Sadeh module_put(THIS_MODULE); 4190602adf40SYehuda Sadeh } 4191602adf40SYehuda Sadeh 4192dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 4193602adf40SYehuda Sadeh const char *buf, 4194602adf40SYehuda Sadeh size_t count) 4195602adf40SYehuda Sadeh { 4196602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 4197602adf40SYehuda Sadeh int target_id, rc; 4198602adf40SYehuda Sadeh unsigned long ul; 4199602adf40SYehuda Sadeh int ret = count; 4200602adf40SYehuda Sadeh 4201602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 4202602adf40SYehuda Sadeh if (rc) 4203602adf40SYehuda Sadeh return rc; 4204602adf40SYehuda Sadeh 4205602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 4206602adf40SYehuda Sadeh target_id = (int) ul; 4207602adf40SYehuda Sadeh if (target_id != ul) 4208602adf40SYehuda Sadeh return -EINVAL; 4209602adf40SYehuda Sadeh 4210602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 4211602adf40SYehuda Sadeh 4212602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 4213602adf40SYehuda Sadeh if (!rbd_dev) { 4214602adf40SYehuda Sadeh ret = -ENOENT; 4215602adf40SYehuda Sadeh goto done; 4216602adf40SYehuda Sadeh } 4217602adf40SYehuda Sadeh 4218a14ea269SAlex Elder spin_lock_irq(&rbd_dev->lock); 4219b82d167bSAlex Elder if (rbd_dev->open_count) 422042382b70SAlex Elder ret = -EBUSY; 4221b82d167bSAlex Elder else 4222b82d167bSAlex Elder set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); 4223a14ea269SAlex Elder spin_unlock_irq(&rbd_dev->lock); 4224b82d167bSAlex Elder if (ret < 0) 422542382b70SAlex Elder goto done; 422642382b70SAlex Elder 422741f38c2bSAlex Elder rbd_remove_all_snaps(rbd_dev); 4228dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 4229602adf40SYehuda Sadeh 4230602adf40SYehuda Sadeh done: 4231602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 4232aafb230eSAlex Elder 4233602adf40SYehuda Sadeh return ret; 4234602adf40SYehuda Sadeh } 4235602adf40SYehuda Sadeh 4236602adf40SYehuda Sadeh /* 4237602adf40SYehuda Sadeh * create control files in sysfs 4238dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 4239602adf40SYehuda Sadeh */ 4240602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 4241602adf40SYehuda Sadeh { 4242dfc5606dSYehuda Sadeh int ret; 4243602adf40SYehuda Sadeh 4244fed4c143SAlex Elder ret = device_register(&rbd_root_dev); 4245dfc5606dSYehuda Sadeh if (ret < 0) 4246dfc5606dSYehuda Sadeh return ret; 4247602adf40SYehuda Sadeh 4248fed4c143SAlex Elder ret = bus_register(&rbd_bus_type); 4249fed4c143SAlex Elder if (ret < 0) 4250fed4c143SAlex Elder device_unregister(&rbd_root_dev); 4251602adf40SYehuda Sadeh 4252602adf40SYehuda Sadeh return ret; 4253602adf40SYehuda Sadeh } 4254602adf40SYehuda Sadeh 4255602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 4256602adf40SYehuda Sadeh { 4257dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 4258fed4c143SAlex Elder device_unregister(&rbd_root_dev); 4259602adf40SYehuda Sadeh } 4260602adf40SYehuda Sadeh 4261cc344fa1SAlex Elder static int __init rbd_init(void) 4262602adf40SYehuda Sadeh { 4263602adf40SYehuda Sadeh int rc; 4264602adf40SYehuda Sadeh 42651e32d34cSAlex Elder if (!libceph_compatible(NULL)) { 42661e32d34cSAlex Elder rbd_warn(NULL, "libceph incompatibility (quitting)"); 42671e32d34cSAlex Elder 42681e32d34cSAlex Elder return -EINVAL; 42691e32d34cSAlex Elder } 4270602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 4271602adf40SYehuda Sadeh if (rc) 4272602adf40SYehuda Sadeh return rc; 4273f0f8cef5SAlex Elder pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 4274602adf40SYehuda Sadeh return 0; 4275602adf40SYehuda Sadeh } 4276602adf40SYehuda Sadeh 4277cc344fa1SAlex Elder static void __exit rbd_exit(void) 4278602adf40SYehuda Sadeh { 4279602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 4280602adf40SYehuda Sadeh } 4281602adf40SYehuda Sadeh 4282602adf40SYehuda Sadeh module_init(rbd_init); 4283602adf40SYehuda Sadeh module_exit(rbd_exit); 4284602adf40SYehuda Sadeh 4285602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 4286602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 4287602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 4288602adf40SYehuda Sadeh 4289602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 4290602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 4291602adf40SYehuda Sadeh 4292602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 4293