1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44aafb230eSAlex Elder #define RBD_DEBUG /* Activate rbd_assert() calls */ 45aafb230eSAlex Elder 46593a9e7bSAlex Elder /* 47593a9e7bSAlex Elder * The basic unit of block I/O is a sector. It is interpreted in a 48593a9e7bSAlex Elder * number of contexts in Linux (blk, bio, genhd), but the default is 49593a9e7bSAlex Elder * universally 512 bytes. These symbols are just slightly more 50593a9e7bSAlex Elder * meaningful than the bare numbers they represent. 51593a9e7bSAlex Elder */ 52593a9e7bSAlex Elder #define SECTOR_SHIFT 9 53593a9e7bSAlex Elder #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 54593a9e7bSAlex Elder 55f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd" 56f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)" 57602adf40SYehuda Sadeh 58602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 59602adf40SYehuda Sadeh 60d4b125e9SAlex Elder #define RBD_SNAP_DEV_NAME_PREFIX "snap_" 61d4b125e9SAlex Elder #define RBD_MAX_SNAP_NAME_LEN \ 62d4b125e9SAlex Elder (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) 63d4b125e9SAlex Elder 6435d489f9SAlex Elder #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ 65602adf40SYehuda Sadeh 66602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 67602adf40SYehuda Sadeh 689e15b77dSAlex Elder /* This allows a single page to hold an image name sent by OSD */ 699e15b77dSAlex Elder #define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1) 70589d30e0SAlex Elder #define RBD_IMAGE_ID_LEN_MAX 64 719e15b77dSAlex Elder 721e130199SAlex Elder #define RBD_OBJ_PREFIX_LEN_MAX 64 73589d30e0SAlex Elder 74d889140cSAlex Elder /* Feature bits */ 75d889140cSAlex Elder 765cbf6f12SAlex Elder #define RBD_FEATURE_LAYERING (1<<0) 775cbf6f12SAlex Elder #define RBD_FEATURE_STRIPINGV2 (1<<1) 785cbf6f12SAlex Elder #define RBD_FEATURES_ALL \ 795cbf6f12SAlex Elder (RBD_FEATURE_LAYERING | RBD_FEATURE_STRIPINGV2) 80d889140cSAlex Elder 81d889140cSAlex Elder /* Features supported by this (client software) implementation. */ 82d889140cSAlex Elder 835cbf6f12SAlex Elder #define RBD_FEATURES_SUPPORTED (0) 84d889140cSAlex Elder 8581a89793SAlex Elder /* 8681a89793SAlex Elder * An RBD device name will be "rbd#", where the "rbd" comes from 8781a89793SAlex Elder * RBD_DRV_NAME above, and # is a unique integer identifier. 8881a89793SAlex Elder * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big 8981a89793SAlex Elder * enough to hold all possible device names. 9081a89793SAlex Elder */ 91602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 9281a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 93602adf40SYehuda Sadeh 94602adf40SYehuda Sadeh /* 95602adf40SYehuda Sadeh * block device image metadata (in-memory version) 96602adf40SYehuda Sadeh */ 97602adf40SYehuda Sadeh struct rbd_image_header { 98f84344f3SAlex Elder /* These four fields never change for a given rbd image */ 99849b4260SAlex Elder char *object_prefix; 10034b13184SAlex Elder u64 features; 101602adf40SYehuda Sadeh __u8 obj_order; 102602adf40SYehuda Sadeh __u8 crypt_type; 103602adf40SYehuda Sadeh __u8 comp_type; 104602adf40SYehuda Sadeh 105f84344f3SAlex Elder /* The remaining fields need to be updated occasionally */ 106f84344f3SAlex Elder u64 image_size; 107f84344f3SAlex Elder struct ceph_snap_context *snapc; 108602adf40SYehuda Sadeh char *snap_names; 109602adf40SYehuda Sadeh u64 *snap_sizes; 11059c2be1eSYehuda Sadeh 11159c2be1eSYehuda Sadeh u64 obj_version; 11259c2be1eSYehuda Sadeh }; 11359c2be1eSYehuda Sadeh 1140d7dbfceSAlex Elder /* 1150d7dbfceSAlex Elder * An rbd image specification. 1160d7dbfceSAlex Elder * 1170d7dbfceSAlex Elder * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely 118c66c6e0cSAlex Elder * identify an image. Each rbd_dev structure includes a pointer to 119c66c6e0cSAlex Elder * an rbd_spec structure that encapsulates this identity. 120c66c6e0cSAlex Elder * 121c66c6e0cSAlex Elder * Each of the id's in an rbd_spec has an associated name. For a 122c66c6e0cSAlex Elder * user-mapped image, the names are supplied and the id's associated 123c66c6e0cSAlex Elder * with them are looked up. For a layered image, a parent image is 124c66c6e0cSAlex Elder * defined by the tuple, and the names are looked up. 125c66c6e0cSAlex Elder * 126c66c6e0cSAlex Elder * An rbd_dev structure contains a parent_spec pointer which is 127c66c6e0cSAlex Elder * non-null if the image it represents is a child in a layered 128c66c6e0cSAlex Elder * image. This pointer will refer to the rbd_spec structure used 129c66c6e0cSAlex Elder * by the parent rbd_dev for its own identity (i.e., the structure 130c66c6e0cSAlex Elder * is shared between the parent and child). 131c66c6e0cSAlex Elder * 132c66c6e0cSAlex Elder * Since these structures are populated once, during the discovery 133c66c6e0cSAlex Elder * phase of image construction, they are effectively immutable so 134c66c6e0cSAlex Elder * we make no effort to synchronize access to them. 135c66c6e0cSAlex Elder * 136c66c6e0cSAlex Elder * Note that code herein does not assume the image name is known (it 137c66c6e0cSAlex Elder * could be a null pointer). 1380d7dbfceSAlex Elder */ 1390d7dbfceSAlex Elder struct rbd_spec { 1400d7dbfceSAlex Elder u64 pool_id; 1410d7dbfceSAlex Elder char *pool_name; 1420d7dbfceSAlex Elder 1430d7dbfceSAlex Elder char *image_id; 1440d7dbfceSAlex Elder char *image_name; 1450d7dbfceSAlex Elder 1460d7dbfceSAlex Elder u64 snap_id; 1470d7dbfceSAlex Elder char *snap_name; 1480d7dbfceSAlex Elder 1490d7dbfceSAlex Elder struct kref kref; 1500d7dbfceSAlex Elder }; 1510d7dbfceSAlex Elder 152602adf40SYehuda Sadeh /* 153f0f8cef5SAlex Elder * an instance of the client. multiple devices may share an rbd client. 154602adf40SYehuda Sadeh */ 155602adf40SYehuda Sadeh struct rbd_client { 156602adf40SYehuda Sadeh struct ceph_client *client; 157602adf40SYehuda Sadeh struct kref kref; 158602adf40SYehuda Sadeh struct list_head node; 159602adf40SYehuda Sadeh }; 160602adf40SYehuda Sadeh 161bf0d5f50SAlex Elder struct rbd_img_request; 162bf0d5f50SAlex Elder typedef void (*rbd_img_callback_t)(struct rbd_img_request *); 163bf0d5f50SAlex Elder 164bf0d5f50SAlex Elder #define BAD_WHICH U32_MAX /* Good which or bad which, which? */ 165bf0d5f50SAlex Elder 166bf0d5f50SAlex Elder struct rbd_obj_request; 167bf0d5f50SAlex Elder typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); 168bf0d5f50SAlex Elder 1699969ebc5SAlex Elder enum obj_request_type { 1709969ebc5SAlex Elder OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES 1719969ebc5SAlex Elder }; 172bf0d5f50SAlex Elder 173bf0d5f50SAlex Elder struct rbd_obj_request { 174bf0d5f50SAlex Elder const char *object_name; 175bf0d5f50SAlex Elder u64 offset; /* object start byte */ 176bf0d5f50SAlex Elder u64 length; /* bytes from offset */ 177bf0d5f50SAlex Elder 178bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1797da22d29SAlex Elder u64 img_offset; /* image relative offset */ 180bf0d5f50SAlex Elder struct list_head links; /* img_request->obj_requests */ 181bf0d5f50SAlex Elder u32 which; /* posn image request list */ 182bf0d5f50SAlex Elder 183bf0d5f50SAlex Elder enum obj_request_type type; 184788e2df3SAlex Elder union { 185bf0d5f50SAlex Elder struct bio *bio_list; 186788e2df3SAlex Elder struct { 187788e2df3SAlex Elder struct page **pages; 188788e2df3SAlex Elder u32 page_count; 189788e2df3SAlex Elder }; 190788e2df3SAlex Elder }; 191bf0d5f50SAlex Elder 192bf0d5f50SAlex Elder struct ceph_osd_request *osd_req; 193bf0d5f50SAlex Elder 194bf0d5f50SAlex Elder u64 xferred; /* bytes transferred */ 195bf0d5f50SAlex Elder u64 version; 1961b83bef2SSage Weil int result; 197bf0d5f50SAlex Elder atomic_t done; 198bf0d5f50SAlex Elder 199bf0d5f50SAlex Elder rbd_obj_callback_t callback; 200788e2df3SAlex Elder struct completion completion; 201bf0d5f50SAlex Elder 202bf0d5f50SAlex Elder struct kref kref; 203bf0d5f50SAlex Elder }; 204bf0d5f50SAlex Elder 2050c425248SAlex Elder enum img_req_flags { 2069849e986SAlex Elder IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */ 2079849e986SAlex Elder IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ 2080c425248SAlex Elder }; 2090c425248SAlex Elder 210bf0d5f50SAlex Elder struct rbd_img_request { 211bf0d5f50SAlex Elder struct rbd_device *rbd_dev; 212bf0d5f50SAlex Elder u64 offset; /* starting image byte offset */ 213bf0d5f50SAlex Elder u64 length; /* byte count from offset */ 2140c425248SAlex Elder unsigned long flags; 215bf0d5f50SAlex Elder union { 216bf0d5f50SAlex Elder u64 snap_id; /* for reads */ 2179849e986SAlex Elder struct ceph_snap_context *snapc; /* for writes */ 2189849e986SAlex Elder }; 2199849e986SAlex Elder union { 2209849e986SAlex Elder struct request *rq; /* block request */ 2219849e986SAlex Elder struct rbd_obj_request *obj_request; /* obj req initiator */ 222bf0d5f50SAlex Elder }; 223bf0d5f50SAlex Elder spinlock_t completion_lock;/* protects next_completion */ 224bf0d5f50SAlex Elder u32 next_completion; 225bf0d5f50SAlex Elder rbd_img_callback_t callback; 22655f27e09SAlex Elder u64 xferred;/* aggregate bytes transferred */ 227a5a337d4SAlex Elder int result; /* first nonzero obj_request result */ 228bf0d5f50SAlex Elder 229bf0d5f50SAlex Elder u32 obj_request_count; 230bf0d5f50SAlex Elder struct list_head obj_requests; /* rbd_obj_request structs */ 231bf0d5f50SAlex Elder 232bf0d5f50SAlex Elder struct kref kref; 233bf0d5f50SAlex Elder }; 234bf0d5f50SAlex Elder 235bf0d5f50SAlex Elder #define for_each_obj_request(ireq, oreq) \ 236ef06f4d3SAlex Elder list_for_each_entry(oreq, &(ireq)->obj_requests, links) 237bf0d5f50SAlex Elder #define for_each_obj_request_from(ireq, oreq) \ 238ef06f4d3SAlex Elder list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) 239bf0d5f50SAlex Elder #define for_each_obj_request_safe(ireq, oreq, n) \ 240ef06f4d3SAlex Elder list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) 241bf0d5f50SAlex Elder 242dfc5606dSYehuda Sadeh struct rbd_snap { 243dfc5606dSYehuda Sadeh struct device dev; 244dfc5606dSYehuda Sadeh const char *name; 2453591538fSJosh Durgin u64 size; 246dfc5606dSYehuda Sadeh struct list_head node; 247dfc5606dSYehuda Sadeh u64 id; 24834b13184SAlex Elder u64 features; 249dfc5606dSYehuda Sadeh }; 250dfc5606dSYehuda Sadeh 251f84344f3SAlex Elder struct rbd_mapping { 25299c1f08fSAlex Elder u64 size; 25334b13184SAlex Elder u64 features; 254f84344f3SAlex Elder bool read_only; 255f84344f3SAlex Elder }; 256f84344f3SAlex Elder 257602adf40SYehuda Sadeh /* 258602adf40SYehuda Sadeh * a single device 259602adf40SYehuda Sadeh */ 260602adf40SYehuda Sadeh struct rbd_device { 261de71a297SAlex Elder int dev_id; /* blkdev unique id */ 262602adf40SYehuda Sadeh 263602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 264602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 265602adf40SYehuda Sadeh 266a30b71b9SAlex Elder u32 image_format; /* Either 1 or 2 */ 267602adf40SYehuda Sadeh struct rbd_client *rbd_client; 268602adf40SYehuda Sadeh 269602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 270602adf40SYehuda Sadeh 271b82d167bSAlex Elder spinlock_t lock; /* queue, flags, open_count */ 272602adf40SYehuda Sadeh 273602adf40SYehuda Sadeh struct rbd_image_header header; 274b82d167bSAlex Elder unsigned long flags; /* possibly lock protected */ 2750d7dbfceSAlex Elder struct rbd_spec *spec; 276602adf40SYehuda Sadeh 2770d7dbfceSAlex Elder char *header_name; 278971f839aSAlex Elder 2790903e875SAlex Elder struct ceph_file_layout layout; 2800903e875SAlex Elder 28159c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 282975241afSAlex Elder struct rbd_obj_request *watch_request; 28359c2be1eSYehuda Sadeh 28486b00e0dSAlex Elder struct rbd_spec *parent_spec; 28586b00e0dSAlex Elder u64 parent_overlap; 28686b00e0dSAlex Elder 287c666601aSJosh Durgin /* protects updating the header */ 288c666601aSJosh Durgin struct rw_semaphore header_rwsem; 289f84344f3SAlex Elder 290f84344f3SAlex Elder struct rbd_mapping mapping; 291602adf40SYehuda Sadeh 292602adf40SYehuda Sadeh struct list_head node; 293dfc5606dSYehuda Sadeh 294dfc5606dSYehuda Sadeh /* list of snapshots */ 295dfc5606dSYehuda Sadeh struct list_head snaps; 296dfc5606dSYehuda Sadeh 297dfc5606dSYehuda Sadeh /* sysfs related */ 298dfc5606dSYehuda Sadeh struct device dev; 299b82d167bSAlex Elder unsigned long open_count; /* protected by lock */ 300dfc5606dSYehuda Sadeh }; 301dfc5606dSYehuda Sadeh 302b82d167bSAlex Elder /* 303b82d167bSAlex Elder * Flag bits for rbd_dev->flags. If atomicity is required, 304b82d167bSAlex Elder * rbd_dev->lock is used to protect access. 305b82d167bSAlex Elder * 306b82d167bSAlex Elder * Currently, only the "removing" flag (which is coupled with the 307b82d167bSAlex Elder * "open_count" field) requires atomic access. 308b82d167bSAlex Elder */ 3096d292906SAlex Elder enum rbd_dev_flags { 3106d292906SAlex Elder RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ 311b82d167bSAlex Elder RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ 3126d292906SAlex Elder }; 3136d292906SAlex Elder 314602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 315e124a82fSAlex Elder 316602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 317e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock); 318e124a82fSAlex Elder 319602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 320432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock); 321602adf40SYehuda Sadeh 322304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); 323304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); 324304f6808SAlex Elder 325dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 32641f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap); 327dfc5606dSYehuda Sadeh 328f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf, 329f0f8cef5SAlex Elder size_t count); 330f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 331f0f8cef5SAlex Elder size_t count); 332f0f8cef5SAlex Elder 333f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = { 334f0f8cef5SAlex Elder __ATTR(add, S_IWUSR, NULL, rbd_add), 335f0f8cef5SAlex Elder __ATTR(remove, S_IWUSR, NULL, rbd_remove), 336f0f8cef5SAlex Elder __ATTR_NULL 337f0f8cef5SAlex Elder }; 338f0f8cef5SAlex Elder 339f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = { 340f0f8cef5SAlex Elder .name = "rbd", 341f0f8cef5SAlex Elder .bus_attrs = rbd_bus_attrs, 342f0f8cef5SAlex Elder }; 343f0f8cef5SAlex Elder 344f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev) 345f0f8cef5SAlex Elder { 346f0f8cef5SAlex Elder } 347f0f8cef5SAlex Elder 348f0f8cef5SAlex Elder static struct device rbd_root_dev = { 349f0f8cef5SAlex Elder .init_name = "rbd", 350f0f8cef5SAlex Elder .release = rbd_root_dev_release, 351f0f8cef5SAlex Elder }; 352f0f8cef5SAlex Elder 35306ecc6cbSAlex Elder static __printf(2, 3) 35406ecc6cbSAlex Elder void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) 35506ecc6cbSAlex Elder { 35606ecc6cbSAlex Elder struct va_format vaf; 35706ecc6cbSAlex Elder va_list args; 35806ecc6cbSAlex Elder 35906ecc6cbSAlex Elder va_start(args, fmt); 36006ecc6cbSAlex Elder vaf.fmt = fmt; 36106ecc6cbSAlex Elder vaf.va = &args; 36206ecc6cbSAlex Elder 36306ecc6cbSAlex Elder if (!rbd_dev) 36406ecc6cbSAlex Elder printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); 36506ecc6cbSAlex Elder else if (rbd_dev->disk) 36606ecc6cbSAlex Elder printk(KERN_WARNING "%s: %s: %pV\n", 36706ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); 36806ecc6cbSAlex Elder else if (rbd_dev->spec && rbd_dev->spec->image_name) 36906ecc6cbSAlex Elder printk(KERN_WARNING "%s: image %s: %pV\n", 37006ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); 37106ecc6cbSAlex Elder else if (rbd_dev->spec && rbd_dev->spec->image_id) 37206ecc6cbSAlex Elder printk(KERN_WARNING "%s: id %s: %pV\n", 37306ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); 37406ecc6cbSAlex Elder else /* punt */ 37506ecc6cbSAlex Elder printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", 37606ecc6cbSAlex Elder RBD_DRV_NAME, rbd_dev, &vaf); 37706ecc6cbSAlex Elder va_end(args); 37806ecc6cbSAlex Elder } 37906ecc6cbSAlex Elder 380aafb230eSAlex Elder #ifdef RBD_DEBUG 381aafb230eSAlex Elder #define rbd_assert(expr) \ 382aafb230eSAlex Elder if (unlikely(!(expr))) { \ 383aafb230eSAlex Elder printk(KERN_ERR "\nAssertion failure in %s() " \ 384aafb230eSAlex Elder "at line %d:\n\n" \ 385aafb230eSAlex Elder "\trbd_assert(%s);\n\n", \ 386aafb230eSAlex Elder __func__, __LINE__, #expr); \ 387aafb230eSAlex Elder BUG(); \ 388aafb230eSAlex Elder } 389aafb230eSAlex Elder #else /* !RBD_DEBUG */ 390aafb230eSAlex Elder # define rbd_assert(expr) ((void) 0) 391aafb230eSAlex Elder #endif /* !RBD_DEBUG */ 392dfc5606dSYehuda Sadeh 393117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); 394117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); 39559c2be1eSYehuda Sadeh 396602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 397602adf40SYehuda Sadeh { 398f0f8cef5SAlex Elder struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 399b82d167bSAlex Elder bool removing = false; 400602adf40SYehuda Sadeh 401f84344f3SAlex Elder if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) 402602adf40SYehuda Sadeh return -EROFS; 403602adf40SYehuda Sadeh 404a14ea269SAlex Elder spin_lock_irq(&rbd_dev->lock); 405b82d167bSAlex Elder if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) 406b82d167bSAlex Elder removing = true; 407b82d167bSAlex Elder else 408b82d167bSAlex Elder rbd_dev->open_count++; 409a14ea269SAlex Elder spin_unlock_irq(&rbd_dev->lock); 410b82d167bSAlex Elder if (removing) 411b82d167bSAlex Elder return -ENOENT; 412b82d167bSAlex Elder 41342382b70SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 414c3e946ceSAlex Elder (void) get_device(&rbd_dev->dev); 415f84344f3SAlex Elder set_device_ro(bdev, rbd_dev->mapping.read_only); 41642382b70SAlex Elder mutex_unlock(&ctl_mutex); 417340c7a2bSAlex Elder 418602adf40SYehuda Sadeh return 0; 419602adf40SYehuda Sadeh } 420602adf40SYehuda Sadeh 421dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 422dfc5606dSYehuda Sadeh { 423dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 424b82d167bSAlex Elder unsigned long open_count_before; 425b82d167bSAlex Elder 426a14ea269SAlex Elder spin_lock_irq(&rbd_dev->lock); 427b82d167bSAlex Elder open_count_before = rbd_dev->open_count--; 428a14ea269SAlex Elder spin_unlock_irq(&rbd_dev->lock); 429b82d167bSAlex Elder rbd_assert(open_count_before > 0); 430dfc5606dSYehuda Sadeh 43142382b70SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 432c3e946ceSAlex Elder put_device(&rbd_dev->dev); 43342382b70SAlex Elder mutex_unlock(&ctl_mutex); 434dfc5606dSYehuda Sadeh 435dfc5606dSYehuda Sadeh return 0; 436dfc5606dSYehuda Sadeh } 437dfc5606dSYehuda Sadeh 438602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 439602adf40SYehuda Sadeh .owner = THIS_MODULE, 440602adf40SYehuda Sadeh .open = rbd_open, 441dfc5606dSYehuda Sadeh .release = rbd_release, 442602adf40SYehuda Sadeh }; 443602adf40SYehuda Sadeh 444602adf40SYehuda Sadeh /* 445602adf40SYehuda Sadeh * Initialize an rbd client instance. 44643ae4701SAlex Elder * We own *ceph_opts. 447602adf40SYehuda Sadeh */ 448f8c38929SAlex Elder static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 449602adf40SYehuda Sadeh { 450602adf40SYehuda Sadeh struct rbd_client *rbdc; 451602adf40SYehuda Sadeh int ret = -ENOMEM; 452602adf40SYehuda Sadeh 45337206ee5SAlex Elder dout("%s:\n", __func__); 454602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 455602adf40SYehuda Sadeh if (!rbdc) 456602adf40SYehuda Sadeh goto out_opt; 457602adf40SYehuda Sadeh 458602adf40SYehuda Sadeh kref_init(&rbdc->kref); 459602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 460602adf40SYehuda Sadeh 461bc534d86SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 462bc534d86SAlex Elder 46343ae4701SAlex Elder rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); 464602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 465bc534d86SAlex Elder goto out_mutex; 46643ae4701SAlex Elder ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ 467602adf40SYehuda Sadeh 468602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 469602adf40SYehuda Sadeh if (ret < 0) 470602adf40SYehuda Sadeh goto out_err; 471602adf40SYehuda Sadeh 472432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 473602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 474432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 475602adf40SYehuda Sadeh 476bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 47737206ee5SAlex Elder dout("%s: rbdc %p\n", __func__, rbdc); 478bc534d86SAlex Elder 479602adf40SYehuda Sadeh return rbdc; 480602adf40SYehuda Sadeh 481602adf40SYehuda Sadeh out_err: 482602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 483bc534d86SAlex Elder out_mutex: 484bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 485602adf40SYehuda Sadeh kfree(rbdc); 486602adf40SYehuda Sadeh out_opt: 48743ae4701SAlex Elder if (ceph_opts) 48843ae4701SAlex Elder ceph_destroy_options(ceph_opts); 48937206ee5SAlex Elder dout("%s: error %d\n", __func__, ret); 49037206ee5SAlex Elder 49128f259b7SVasiliy Kulikov return ERR_PTR(ret); 492602adf40SYehuda Sadeh } 493602adf40SYehuda Sadeh 494602adf40SYehuda Sadeh /* 4951f7ba331SAlex Elder * Find a ceph client with specific addr and configuration. If 4961f7ba331SAlex Elder * found, bump its reference count. 497602adf40SYehuda Sadeh */ 4981f7ba331SAlex Elder static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) 499602adf40SYehuda Sadeh { 500602adf40SYehuda Sadeh struct rbd_client *client_node; 5011f7ba331SAlex Elder bool found = false; 502602adf40SYehuda Sadeh 50343ae4701SAlex Elder if (ceph_opts->flags & CEPH_OPT_NOSHARE) 504602adf40SYehuda Sadeh return NULL; 505602adf40SYehuda Sadeh 5061f7ba331SAlex Elder spin_lock(&rbd_client_list_lock); 5071f7ba331SAlex Elder list_for_each_entry(client_node, &rbd_client_list, node) { 5081f7ba331SAlex Elder if (!ceph_compare_options(ceph_opts, client_node->client)) { 5091f7ba331SAlex Elder kref_get(&client_node->kref); 5101f7ba331SAlex Elder found = true; 5111f7ba331SAlex Elder break; 5121f7ba331SAlex Elder } 5131f7ba331SAlex Elder } 5141f7ba331SAlex Elder spin_unlock(&rbd_client_list_lock); 5151f7ba331SAlex Elder 5161f7ba331SAlex Elder return found ? client_node : NULL; 517602adf40SYehuda Sadeh } 518602adf40SYehuda Sadeh 519602adf40SYehuda Sadeh /* 52059c2be1eSYehuda Sadeh * mount options 52159c2be1eSYehuda Sadeh */ 52259c2be1eSYehuda Sadeh enum { 52359c2be1eSYehuda Sadeh Opt_last_int, 52459c2be1eSYehuda Sadeh /* int args above */ 52559c2be1eSYehuda Sadeh Opt_last_string, 52659c2be1eSYehuda Sadeh /* string args above */ 527cc0538b6SAlex Elder Opt_read_only, 528cc0538b6SAlex Elder Opt_read_write, 529cc0538b6SAlex Elder /* Boolean args above */ 530cc0538b6SAlex Elder Opt_last_bool, 53159c2be1eSYehuda Sadeh }; 53259c2be1eSYehuda Sadeh 53343ae4701SAlex Elder static match_table_t rbd_opts_tokens = { 53459c2be1eSYehuda Sadeh /* int args above */ 53559c2be1eSYehuda Sadeh /* string args above */ 536be466c1cSAlex Elder {Opt_read_only, "read_only"}, 537cc0538b6SAlex Elder {Opt_read_only, "ro"}, /* Alternate spelling */ 538cc0538b6SAlex Elder {Opt_read_write, "read_write"}, 539cc0538b6SAlex Elder {Opt_read_write, "rw"}, /* Alternate spelling */ 540cc0538b6SAlex Elder /* Boolean args above */ 54159c2be1eSYehuda Sadeh {-1, NULL} 54259c2be1eSYehuda Sadeh }; 54359c2be1eSYehuda Sadeh 54498571b5aSAlex Elder struct rbd_options { 54598571b5aSAlex Elder bool read_only; 54698571b5aSAlex Elder }; 54798571b5aSAlex Elder 54898571b5aSAlex Elder #define RBD_READ_ONLY_DEFAULT false 54998571b5aSAlex Elder 55059c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 55159c2be1eSYehuda Sadeh { 55243ae4701SAlex Elder struct rbd_options *rbd_opts = private; 55359c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 55459c2be1eSYehuda Sadeh int token, intval, ret; 55559c2be1eSYehuda Sadeh 55643ae4701SAlex Elder token = match_token(c, rbd_opts_tokens, argstr); 55759c2be1eSYehuda Sadeh if (token < 0) 55859c2be1eSYehuda Sadeh return -EINVAL; 55959c2be1eSYehuda Sadeh 56059c2be1eSYehuda Sadeh if (token < Opt_last_int) { 56159c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 56259c2be1eSYehuda Sadeh if (ret < 0) { 56359c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 56459c2be1eSYehuda Sadeh "at '%s'\n", c); 56559c2be1eSYehuda Sadeh return ret; 56659c2be1eSYehuda Sadeh } 56759c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 56859c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 56959c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 57059c2be1eSYehuda Sadeh argstr[0].from); 571cc0538b6SAlex Elder } else if (token > Opt_last_string && token < Opt_last_bool) { 572cc0538b6SAlex Elder dout("got Boolean token %d\n", token); 57359c2be1eSYehuda Sadeh } else { 57459c2be1eSYehuda Sadeh dout("got token %d\n", token); 57559c2be1eSYehuda Sadeh } 57659c2be1eSYehuda Sadeh 57759c2be1eSYehuda Sadeh switch (token) { 578cc0538b6SAlex Elder case Opt_read_only: 579cc0538b6SAlex Elder rbd_opts->read_only = true; 580cc0538b6SAlex Elder break; 581cc0538b6SAlex Elder case Opt_read_write: 582cc0538b6SAlex Elder rbd_opts->read_only = false; 583cc0538b6SAlex Elder break; 58459c2be1eSYehuda Sadeh default: 585aafb230eSAlex Elder rbd_assert(false); 586aafb230eSAlex Elder break; 58759c2be1eSYehuda Sadeh } 58859c2be1eSYehuda Sadeh return 0; 58959c2be1eSYehuda Sadeh } 59059c2be1eSYehuda Sadeh 59159c2be1eSYehuda Sadeh /* 592602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 593602adf40SYehuda Sadeh * not exist create it. 594602adf40SYehuda Sadeh */ 5959d3997fdSAlex Elder static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) 596602adf40SYehuda Sadeh { 597f8c38929SAlex Elder struct rbd_client *rbdc; 59859c2be1eSYehuda Sadeh 5991f7ba331SAlex Elder rbdc = rbd_client_find(ceph_opts); 6009d3997fdSAlex Elder if (rbdc) /* using an existing client */ 60143ae4701SAlex Elder ceph_destroy_options(ceph_opts); 6029d3997fdSAlex Elder else 603f8c38929SAlex Elder rbdc = rbd_client_create(ceph_opts); 604d720bcb0SAlex Elder 6059d3997fdSAlex Elder return rbdc; 606602adf40SYehuda Sadeh } 607602adf40SYehuda Sadeh 608602adf40SYehuda Sadeh /* 609602adf40SYehuda Sadeh * Destroy ceph client 610d23a4b3fSAlex Elder * 611432b8587SAlex Elder * Caller must hold rbd_client_list_lock. 612602adf40SYehuda Sadeh */ 613602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 614602adf40SYehuda Sadeh { 615602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 616602adf40SYehuda Sadeh 61737206ee5SAlex Elder dout("%s: rbdc %p\n", __func__, rbdc); 618cd9d9f5dSAlex Elder spin_lock(&rbd_client_list_lock); 619602adf40SYehuda Sadeh list_del(&rbdc->node); 620cd9d9f5dSAlex Elder spin_unlock(&rbd_client_list_lock); 621602adf40SYehuda Sadeh 622602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 623602adf40SYehuda Sadeh kfree(rbdc); 624602adf40SYehuda Sadeh } 625602adf40SYehuda Sadeh 626602adf40SYehuda Sadeh /* 627602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 628602adf40SYehuda Sadeh * it. 629602adf40SYehuda Sadeh */ 6309d3997fdSAlex Elder static void rbd_put_client(struct rbd_client *rbdc) 631602adf40SYehuda Sadeh { 632c53d5893SAlex Elder if (rbdc) 6339d3997fdSAlex Elder kref_put(&rbdc->kref, rbd_client_release); 634602adf40SYehuda Sadeh } 635602adf40SYehuda Sadeh 636a30b71b9SAlex Elder static bool rbd_image_format_valid(u32 image_format) 637a30b71b9SAlex Elder { 638a30b71b9SAlex Elder return image_format == 1 || image_format == 2; 639a30b71b9SAlex Elder } 640a30b71b9SAlex Elder 6418e94af8eSAlex Elder static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 6428e94af8eSAlex Elder { 643103a150fSAlex Elder size_t size; 644103a150fSAlex Elder u32 snap_count; 645103a150fSAlex Elder 646103a150fSAlex Elder /* The header has to start with the magic rbd header text */ 647103a150fSAlex Elder if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) 648103a150fSAlex Elder return false; 649103a150fSAlex Elder 650db2388b6SAlex Elder /* The bio layer requires at least sector-sized I/O */ 651db2388b6SAlex Elder 652db2388b6SAlex Elder if (ondisk->options.order < SECTOR_SHIFT) 653db2388b6SAlex Elder return false; 654db2388b6SAlex Elder 655db2388b6SAlex Elder /* If we use u64 in a few spots we may be able to loosen this */ 656db2388b6SAlex Elder 657db2388b6SAlex Elder if (ondisk->options.order > 8 * sizeof (int) - 1) 658db2388b6SAlex Elder return false; 659db2388b6SAlex Elder 660103a150fSAlex Elder /* 661103a150fSAlex Elder * The size of a snapshot header has to fit in a size_t, and 662103a150fSAlex Elder * that limits the number of snapshots. 663103a150fSAlex Elder */ 664103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 665103a150fSAlex Elder size = SIZE_MAX - sizeof (struct ceph_snap_context); 666103a150fSAlex Elder if (snap_count > size / sizeof (__le64)) 667103a150fSAlex Elder return false; 668103a150fSAlex Elder 669103a150fSAlex Elder /* 670103a150fSAlex Elder * Not only that, but the size of the entire the snapshot 671103a150fSAlex Elder * header must also be representable in a size_t. 672103a150fSAlex Elder */ 673103a150fSAlex Elder size -= snap_count * sizeof (__le64); 674103a150fSAlex Elder if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) 675103a150fSAlex Elder return false; 676103a150fSAlex Elder 677103a150fSAlex Elder return true; 6788e94af8eSAlex Elder } 6798e94af8eSAlex Elder 680602adf40SYehuda Sadeh /* 681602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 682602adf40SYehuda Sadeh * header. 683602adf40SYehuda Sadeh */ 684602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 6854156d998SAlex Elder struct rbd_image_header_ondisk *ondisk) 686602adf40SYehuda Sadeh { 687ccece235SAlex Elder u32 snap_count; 68858c17b0eSAlex Elder size_t len; 689d2bb24e5SAlex Elder size_t size; 690621901d6SAlex Elder u32 i; 691602adf40SYehuda Sadeh 6926a52325fSAlex Elder memset(header, 0, sizeof (*header)); 6936a52325fSAlex Elder 694103a150fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 695103a150fSAlex Elder 69658c17b0eSAlex Elder len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); 69758c17b0eSAlex Elder header->object_prefix = kmalloc(len + 1, GFP_KERNEL); 6986a52325fSAlex Elder if (!header->object_prefix) 699602adf40SYehuda Sadeh return -ENOMEM; 70058c17b0eSAlex Elder memcpy(header->object_prefix, ondisk->object_prefix, len); 70158c17b0eSAlex Elder header->object_prefix[len] = '\0'; 70200f1f36fSAlex Elder 703602adf40SYehuda Sadeh if (snap_count) { 704f785cc1dSAlex Elder u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); 705f785cc1dSAlex Elder 706621901d6SAlex Elder /* Save a copy of the snapshot names */ 707621901d6SAlex Elder 708f785cc1dSAlex Elder if (snap_names_len > (u64) SIZE_MAX) 709f785cc1dSAlex Elder return -EIO; 710f785cc1dSAlex Elder header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); 711602adf40SYehuda Sadeh if (!header->snap_names) 7126a52325fSAlex Elder goto out_err; 713f785cc1dSAlex Elder /* 714f785cc1dSAlex Elder * Note that rbd_dev_v1_header_read() guarantees 715f785cc1dSAlex Elder * the ondisk buffer we're working with has 716f785cc1dSAlex Elder * snap_names_len bytes beyond the end of the 717f785cc1dSAlex Elder * snapshot id array, this memcpy() is safe. 718f785cc1dSAlex Elder */ 719f785cc1dSAlex Elder memcpy(header->snap_names, &ondisk->snaps[snap_count], 720f785cc1dSAlex Elder snap_names_len); 7216a52325fSAlex Elder 722621901d6SAlex Elder /* Record each snapshot's size */ 723621901d6SAlex Elder 724d2bb24e5SAlex Elder size = snap_count * sizeof (*header->snap_sizes); 725d2bb24e5SAlex Elder header->snap_sizes = kmalloc(size, GFP_KERNEL); 726602adf40SYehuda Sadeh if (!header->snap_sizes) 7276a52325fSAlex Elder goto out_err; 728621901d6SAlex Elder for (i = 0; i < snap_count; i++) 729621901d6SAlex Elder header->snap_sizes[i] = 730621901d6SAlex Elder le64_to_cpu(ondisk->snaps[i].image_size); 731602adf40SYehuda Sadeh } else { 732ccece235SAlex Elder WARN_ON(ondisk->snap_names_len); 733602adf40SYehuda Sadeh header->snap_names = NULL; 734602adf40SYehuda Sadeh header->snap_sizes = NULL; 735602adf40SYehuda Sadeh } 736849b4260SAlex Elder 73734b13184SAlex Elder header->features = 0; /* No features support in v1 images */ 738602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 739602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 740602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 7416a52325fSAlex Elder 742621901d6SAlex Elder /* Allocate and fill in the snapshot context */ 743621901d6SAlex Elder 744f84344f3SAlex Elder header->image_size = le64_to_cpu(ondisk->image_size); 7456a52325fSAlex Elder size = sizeof (struct ceph_snap_context); 7466a52325fSAlex Elder size += snap_count * sizeof (header->snapc->snaps[0]); 7476a52325fSAlex Elder header->snapc = kzalloc(size, GFP_KERNEL); 7486a52325fSAlex Elder if (!header->snapc) 7496a52325fSAlex Elder goto out_err; 750602adf40SYehuda Sadeh 751602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 752505cbb9bSAlex Elder header->snapc->seq = le64_to_cpu(ondisk->snap_seq); 753602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 754621901d6SAlex Elder for (i = 0; i < snap_count; i++) 755602adf40SYehuda Sadeh header->snapc->snaps[i] = 756602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 757602adf40SYehuda Sadeh 758602adf40SYehuda Sadeh return 0; 759602adf40SYehuda Sadeh 7606a52325fSAlex Elder out_err: 761849b4260SAlex Elder kfree(header->snap_sizes); 762ccece235SAlex Elder header->snap_sizes = NULL; 763602adf40SYehuda Sadeh kfree(header->snap_names); 764ccece235SAlex Elder header->snap_names = NULL; 7656a52325fSAlex Elder kfree(header->object_prefix); 7666a52325fSAlex Elder header->object_prefix = NULL; 767ccece235SAlex Elder 76800f1f36fSAlex Elder return -ENOMEM; 769602adf40SYehuda Sadeh } 770602adf40SYehuda Sadeh 7719e15b77dSAlex Elder static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) 7729e15b77dSAlex Elder { 7739e15b77dSAlex Elder struct rbd_snap *snap; 7749e15b77dSAlex Elder 7759e15b77dSAlex Elder if (snap_id == CEPH_NOSNAP) 7769e15b77dSAlex Elder return RBD_SNAP_HEAD_NAME; 7779e15b77dSAlex Elder 7789e15b77dSAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) 7799e15b77dSAlex Elder if (snap_id == snap->id) 7809e15b77dSAlex Elder return snap->name; 7819e15b77dSAlex Elder 7829e15b77dSAlex Elder return NULL; 7839e15b77dSAlex Elder } 7849e15b77dSAlex Elder 7858836b995SAlex Elder static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) 786602adf40SYehuda Sadeh { 787602adf40SYehuda Sadeh 788e86924a8SAlex Elder struct rbd_snap *snap; 78900f1f36fSAlex Elder 790e86924a8SAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) { 791e86924a8SAlex Elder if (!strcmp(snap_name, snap->name)) { 7920d7dbfceSAlex Elder rbd_dev->spec->snap_id = snap->id; 793e86924a8SAlex Elder rbd_dev->mapping.size = snap->size; 79434b13184SAlex Elder rbd_dev->mapping.features = snap->features; 79500f1f36fSAlex Elder 796e86924a8SAlex Elder return 0; 797602adf40SYehuda Sadeh } 79800f1f36fSAlex Elder } 799e86924a8SAlex Elder 80000f1f36fSAlex Elder return -ENOENT; 80100f1f36fSAlex Elder } 802602adf40SYehuda Sadeh 803819d52bfSAlex Elder static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) 804602adf40SYehuda Sadeh { 80578dc447dSAlex Elder int ret; 806602adf40SYehuda Sadeh 8070d7dbfceSAlex Elder if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME, 808cc9d734cSJosh Durgin sizeof (RBD_SNAP_HEAD_NAME))) { 8090d7dbfceSAlex Elder rbd_dev->spec->snap_id = CEPH_NOSNAP; 81099c1f08fSAlex Elder rbd_dev->mapping.size = rbd_dev->header.image_size; 81134b13184SAlex Elder rbd_dev->mapping.features = rbd_dev->header.features; 812e86924a8SAlex Elder ret = 0; 813602adf40SYehuda Sadeh } else { 8140d7dbfceSAlex Elder ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name); 815602adf40SYehuda Sadeh if (ret < 0) 816602adf40SYehuda Sadeh goto done; 817f84344f3SAlex Elder rbd_dev->mapping.read_only = true; 818602adf40SYehuda Sadeh } 8196d292906SAlex Elder set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 8206d292906SAlex Elder 821602adf40SYehuda Sadeh done: 822602adf40SYehuda Sadeh return ret; 823602adf40SYehuda Sadeh } 824602adf40SYehuda Sadeh 825602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 826602adf40SYehuda Sadeh { 827849b4260SAlex Elder kfree(header->object_prefix); 828d78fd7aeSAlex Elder header->object_prefix = NULL; 829602adf40SYehuda Sadeh kfree(header->snap_sizes); 830d78fd7aeSAlex Elder header->snap_sizes = NULL; 831849b4260SAlex Elder kfree(header->snap_names); 832d78fd7aeSAlex Elder header->snap_names = NULL; 833d1d25646SJosh Durgin ceph_put_snap_context(header->snapc); 834d78fd7aeSAlex Elder header->snapc = NULL; 835602adf40SYehuda Sadeh } 836602adf40SYehuda Sadeh 83798571b5aSAlex Elder static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) 838602adf40SYehuda Sadeh { 83965ccfe21SAlex Elder char *name; 84065ccfe21SAlex Elder u64 segment; 84165ccfe21SAlex Elder int ret; 842602adf40SYehuda Sadeh 8432fd82b9eSAlex Elder name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO); 84465ccfe21SAlex Elder if (!name) 84565ccfe21SAlex Elder return NULL; 84665ccfe21SAlex Elder segment = offset >> rbd_dev->header.obj_order; 8472fd82b9eSAlex Elder ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx", 84865ccfe21SAlex Elder rbd_dev->header.object_prefix, segment); 8492fd82b9eSAlex Elder if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { 85065ccfe21SAlex Elder pr_err("error formatting segment name for #%llu (%d)\n", 85165ccfe21SAlex Elder segment, ret); 85265ccfe21SAlex Elder kfree(name); 85365ccfe21SAlex Elder name = NULL; 85465ccfe21SAlex Elder } 855602adf40SYehuda Sadeh 85665ccfe21SAlex Elder return name; 85765ccfe21SAlex Elder } 858602adf40SYehuda Sadeh 85965ccfe21SAlex Elder static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) 86065ccfe21SAlex Elder { 86165ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 862602adf40SYehuda Sadeh 86365ccfe21SAlex Elder return offset & (segment_size - 1); 86465ccfe21SAlex Elder } 86565ccfe21SAlex Elder 86665ccfe21SAlex Elder static u64 rbd_segment_length(struct rbd_device *rbd_dev, 86765ccfe21SAlex Elder u64 offset, u64 length) 86865ccfe21SAlex Elder { 86965ccfe21SAlex Elder u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; 87065ccfe21SAlex Elder 87165ccfe21SAlex Elder offset &= segment_size - 1; 87265ccfe21SAlex Elder 873aafb230eSAlex Elder rbd_assert(length <= U64_MAX - offset); 87465ccfe21SAlex Elder if (offset + length > segment_size) 87565ccfe21SAlex Elder length = segment_size - offset; 87665ccfe21SAlex Elder 87765ccfe21SAlex Elder return length; 878602adf40SYehuda Sadeh } 879602adf40SYehuda Sadeh 880602adf40SYehuda Sadeh /* 881029bcbd8SJosh Durgin * returns the size of an object in the image 882029bcbd8SJosh Durgin */ 883029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 884029bcbd8SJosh Durgin { 885029bcbd8SJosh Durgin return 1 << header->obj_order; 886029bcbd8SJosh Durgin } 887029bcbd8SJosh Durgin 888029bcbd8SJosh Durgin /* 889602adf40SYehuda Sadeh * bio helpers 890602adf40SYehuda Sadeh */ 891602adf40SYehuda Sadeh 892602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 893602adf40SYehuda Sadeh { 894602adf40SYehuda Sadeh struct bio *tmp; 895602adf40SYehuda Sadeh 896602adf40SYehuda Sadeh while (chain) { 897602adf40SYehuda Sadeh tmp = chain; 898602adf40SYehuda Sadeh chain = chain->bi_next; 899602adf40SYehuda Sadeh bio_put(tmp); 900602adf40SYehuda Sadeh } 901602adf40SYehuda Sadeh } 902602adf40SYehuda Sadeh 903602adf40SYehuda Sadeh /* 904602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 905602adf40SYehuda Sadeh */ 906602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 907602adf40SYehuda Sadeh { 908602adf40SYehuda Sadeh struct bio_vec *bv; 909602adf40SYehuda Sadeh unsigned long flags; 910602adf40SYehuda Sadeh void *buf; 911602adf40SYehuda Sadeh int i; 912602adf40SYehuda Sadeh int pos = 0; 913602adf40SYehuda Sadeh 914602adf40SYehuda Sadeh while (chain) { 915602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 916602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 917602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 918602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 919602adf40SYehuda Sadeh memset(buf + remainder, 0, 920602adf40SYehuda Sadeh bv->bv_len - remainder); 92185b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 922602adf40SYehuda Sadeh } 923602adf40SYehuda Sadeh pos += bv->bv_len; 924602adf40SYehuda Sadeh } 925602adf40SYehuda Sadeh 926602adf40SYehuda Sadeh chain = chain->bi_next; 927602adf40SYehuda Sadeh } 928602adf40SYehuda Sadeh } 929602adf40SYehuda Sadeh 930602adf40SYehuda Sadeh /* 931f7760dadSAlex Elder * Clone a portion of a bio, starting at the given byte offset 932f7760dadSAlex Elder * and continuing for the number of bytes indicated. 933602adf40SYehuda Sadeh */ 934f7760dadSAlex Elder static struct bio *bio_clone_range(struct bio *bio_src, 935f7760dadSAlex Elder unsigned int offset, 936f7760dadSAlex Elder unsigned int len, 937f7760dadSAlex Elder gfp_t gfpmask) 938602adf40SYehuda Sadeh { 939f7760dadSAlex Elder struct bio_vec *bv; 940f7760dadSAlex Elder unsigned int resid; 941f7760dadSAlex Elder unsigned short idx; 942f7760dadSAlex Elder unsigned int voff; 943f7760dadSAlex Elder unsigned short end_idx; 944f7760dadSAlex Elder unsigned short vcnt; 945f7760dadSAlex Elder struct bio *bio; 946602adf40SYehuda Sadeh 947f7760dadSAlex Elder /* Handle the easy case for the caller */ 948f7760dadSAlex Elder 949f7760dadSAlex Elder if (!offset && len == bio_src->bi_size) 950f7760dadSAlex Elder return bio_clone(bio_src, gfpmask); 951f7760dadSAlex Elder 952f7760dadSAlex Elder if (WARN_ON_ONCE(!len)) 953f7760dadSAlex Elder return NULL; 954f7760dadSAlex Elder if (WARN_ON_ONCE(len > bio_src->bi_size)) 955f7760dadSAlex Elder return NULL; 956f7760dadSAlex Elder if (WARN_ON_ONCE(offset > bio_src->bi_size - len)) 957f7760dadSAlex Elder return NULL; 958f7760dadSAlex Elder 959f7760dadSAlex Elder /* Find first affected segment... */ 960f7760dadSAlex Elder 961f7760dadSAlex Elder resid = offset; 962f7760dadSAlex Elder __bio_for_each_segment(bv, bio_src, idx, 0) { 963f7760dadSAlex Elder if (resid < bv->bv_len) 964f7760dadSAlex Elder break; 965f7760dadSAlex Elder resid -= bv->bv_len; 966602adf40SYehuda Sadeh } 967f7760dadSAlex Elder voff = resid; 968602adf40SYehuda Sadeh 969f7760dadSAlex Elder /* ...and the last affected segment */ 970542582fcSAlex Elder 971f7760dadSAlex Elder resid += len; 972f7760dadSAlex Elder __bio_for_each_segment(bv, bio_src, end_idx, idx) { 973f7760dadSAlex Elder if (resid <= bv->bv_len) 974f7760dadSAlex Elder break; 975f7760dadSAlex Elder resid -= bv->bv_len; 976f7760dadSAlex Elder } 977f7760dadSAlex Elder vcnt = end_idx - idx + 1; 978602adf40SYehuda Sadeh 979f7760dadSAlex Elder /* Build the clone */ 980f7760dadSAlex Elder 981f7760dadSAlex Elder bio = bio_alloc(gfpmask, (unsigned int) vcnt); 982f7760dadSAlex Elder if (!bio) 983f7760dadSAlex Elder return NULL; /* ENOMEM */ 984f7760dadSAlex Elder 985f7760dadSAlex Elder bio->bi_bdev = bio_src->bi_bdev; 986f7760dadSAlex Elder bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); 987f7760dadSAlex Elder bio->bi_rw = bio_src->bi_rw; 988f7760dadSAlex Elder bio->bi_flags |= 1 << BIO_CLONED; 989602adf40SYehuda Sadeh 990602adf40SYehuda Sadeh /* 991f7760dadSAlex Elder * Copy over our part of the bio_vec, then update the first 992f7760dadSAlex Elder * and last (or only) entries. 993602adf40SYehuda Sadeh */ 994f7760dadSAlex Elder memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], 995f7760dadSAlex Elder vcnt * sizeof (struct bio_vec)); 996f7760dadSAlex Elder bio->bi_io_vec[0].bv_offset += voff; 997f7760dadSAlex Elder if (vcnt > 1) { 998f7760dadSAlex Elder bio->bi_io_vec[0].bv_len -= voff; 999f7760dadSAlex Elder bio->bi_io_vec[vcnt - 1].bv_len = resid; 1000602adf40SYehuda Sadeh } else { 1001f7760dadSAlex Elder bio->bi_io_vec[0].bv_len = len; 1002602adf40SYehuda Sadeh } 1003602adf40SYehuda Sadeh 1004f7760dadSAlex Elder bio->bi_vcnt = vcnt; 1005f7760dadSAlex Elder bio->bi_size = len; 1006f7760dadSAlex Elder bio->bi_idx = 0; 1007602adf40SYehuda Sadeh 1008f7760dadSAlex Elder return bio; 1009602adf40SYehuda Sadeh } 1010602adf40SYehuda Sadeh 1011f7760dadSAlex Elder /* 1012f7760dadSAlex Elder * Clone a portion of a bio chain, starting at the given byte offset 1013f7760dadSAlex Elder * into the first bio in the source chain and continuing for the 1014f7760dadSAlex Elder * number of bytes indicated. The result is another bio chain of 1015f7760dadSAlex Elder * exactly the given length, or a null pointer on error. 1016f7760dadSAlex Elder * 1017f7760dadSAlex Elder * The bio_src and offset parameters are both in-out. On entry they 1018f7760dadSAlex Elder * refer to the first source bio and the offset into that bio where 1019f7760dadSAlex Elder * the start of data to be cloned is located. 1020f7760dadSAlex Elder * 1021f7760dadSAlex Elder * On return, bio_src is updated to refer to the bio in the source 1022f7760dadSAlex Elder * chain that contains first un-cloned byte, and *offset will 1023f7760dadSAlex Elder * contain the offset of that byte within that bio. 1024f7760dadSAlex Elder */ 1025f7760dadSAlex Elder static struct bio *bio_chain_clone_range(struct bio **bio_src, 1026f7760dadSAlex Elder unsigned int *offset, 1027f7760dadSAlex Elder unsigned int len, 1028f7760dadSAlex Elder gfp_t gfpmask) 1029f7760dadSAlex Elder { 1030f7760dadSAlex Elder struct bio *bi = *bio_src; 1031f7760dadSAlex Elder unsigned int off = *offset; 1032f7760dadSAlex Elder struct bio *chain = NULL; 1033f7760dadSAlex Elder struct bio **end; 1034602adf40SYehuda Sadeh 1035f7760dadSAlex Elder /* Build up a chain of clone bios up to the limit */ 1036602adf40SYehuda Sadeh 1037f7760dadSAlex Elder if (!bi || off >= bi->bi_size || !len) 1038f7760dadSAlex Elder return NULL; /* Nothing to clone */ 1039602adf40SYehuda Sadeh 1040f7760dadSAlex Elder end = &chain; 1041f7760dadSAlex Elder while (len) { 1042f7760dadSAlex Elder unsigned int bi_size; 1043f7760dadSAlex Elder struct bio *bio; 1044f7760dadSAlex Elder 1045f5400b7aSAlex Elder if (!bi) { 1046f5400b7aSAlex Elder rbd_warn(NULL, "bio_chain exhausted with %u left", len); 1047f7760dadSAlex Elder goto out_err; /* EINVAL; ran out of bio's */ 1048f5400b7aSAlex Elder } 1049f7760dadSAlex Elder bi_size = min_t(unsigned int, bi->bi_size - off, len); 1050f7760dadSAlex Elder bio = bio_clone_range(bi, off, bi_size, gfpmask); 1051f7760dadSAlex Elder if (!bio) 1052f7760dadSAlex Elder goto out_err; /* ENOMEM */ 1053f7760dadSAlex Elder 1054f7760dadSAlex Elder *end = bio; 1055f7760dadSAlex Elder end = &bio->bi_next; 1056f7760dadSAlex Elder 1057f7760dadSAlex Elder off += bi_size; 1058f7760dadSAlex Elder if (off == bi->bi_size) { 1059f7760dadSAlex Elder bi = bi->bi_next; 1060f7760dadSAlex Elder off = 0; 1061f7760dadSAlex Elder } 1062f7760dadSAlex Elder len -= bi_size; 1063f7760dadSAlex Elder } 1064f7760dadSAlex Elder *bio_src = bi; 1065f7760dadSAlex Elder *offset = off; 1066f7760dadSAlex Elder 1067f7760dadSAlex Elder return chain; 1068f7760dadSAlex Elder out_err: 1069f7760dadSAlex Elder bio_chain_put(chain); 1070f7760dadSAlex Elder 1071602adf40SYehuda Sadeh return NULL; 1072602adf40SYehuda Sadeh } 1073602adf40SYehuda Sadeh 1074bf0d5f50SAlex Elder static void rbd_obj_request_get(struct rbd_obj_request *obj_request) 1075bf0d5f50SAlex Elder { 107637206ee5SAlex Elder dout("%s: obj %p (was %d)\n", __func__, obj_request, 107737206ee5SAlex Elder atomic_read(&obj_request->kref.refcount)); 1078bf0d5f50SAlex Elder kref_get(&obj_request->kref); 1079bf0d5f50SAlex Elder } 1080bf0d5f50SAlex Elder 1081bf0d5f50SAlex Elder static void rbd_obj_request_destroy(struct kref *kref); 1082bf0d5f50SAlex Elder static void rbd_obj_request_put(struct rbd_obj_request *obj_request) 1083bf0d5f50SAlex Elder { 1084bf0d5f50SAlex Elder rbd_assert(obj_request != NULL); 108537206ee5SAlex Elder dout("%s: obj %p (was %d)\n", __func__, obj_request, 108637206ee5SAlex Elder atomic_read(&obj_request->kref.refcount)); 1087bf0d5f50SAlex Elder kref_put(&obj_request->kref, rbd_obj_request_destroy); 1088bf0d5f50SAlex Elder } 1089bf0d5f50SAlex Elder 1090bf0d5f50SAlex Elder static void rbd_img_request_get(struct rbd_img_request *img_request) 1091bf0d5f50SAlex Elder { 109237206ee5SAlex Elder dout("%s: img %p (was %d)\n", __func__, img_request, 109337206ee5SAlex Elder atomic_read(&img_request->kref.refcount)); 1094bf0d5f50SAlex Elder kref_get(&img_request->kref); 1095bf0d5f50SAlex Elder } 1096bf0d5f50SAlex Elder 1097bf0d5f50SAlex Elder static void rbd_img_request_destroy(struct kref *kref); 1098bf0d5f50SAlex Elder static void rbd_img_request_put(struct rbd_img_request *img_request) 1099bf0d5f50SAlex Elder { 1100bf0d5f50SAlex Elder rbd_assert(img_request != NULL); 110137206ee5SAlex Elder dout("%s: img %p (was %d)\n", __func__, img_request, 110237206ee5SAlex Elder atomic_read(&img_request->kref.refcount)); 1103bf0d5f50SAlex Elder kref_put(&img_request->kref, rbd_img_request_destroy); 1104bf0d5f50SAlex Elder } 1105bf0d5f50SAlex Elder 1106bf0d5f50SAlex Elder static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, 1107bf0d5f50SAlex Elder struct rbd_obj_request *obj_request) 1108bf0d5f50SAlex Elder { 110925dcf954SAlex Elder rbd_assert(obj_request->img_request == NULL); 111025dcf954SAlex Elder 1111bf0d5f50SAlex Elder rbd_obj_request_get(obj_request); 1112bf0d5f50SAlex Elder obj_request->img_request = img_request; 111325dcf954SAlex Elder obj_request->which = img_request->obj_request_count; 1114bf0d5f50SAlex Elder rbd_assert(obj_request->which != BAD_WHICH); 111525dcf954SAlex Elder img_request->obj_request_count++; 111625dcf954SAlex Elder list_add_tail(&obj_request->links, &img_request->obj_requests); 111737206ee5SAlex Elder dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, 111837206ee5SAlex Elder obj_request->which); 1119bf0d5f50SAlex Elder } 1120bf0d5f50SAlex Elder 1121bf0d5f50SAlex Elder static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, 1122bf0d5f50SAlex Elder struct rbd_obj_request *obj_request) 1123bf0d5f50SAlex Elder { 1124bf0d5f50SAlex Elder rbd_assert(obj_request->which != BAD_WHICH); 112525dcf954SAlex Elder 112637206ee5SAlex Elder dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, 112737206ee5SAlex Elder obj_request->which); 1128bf0d5f50SAlex Elder list_del(&obj_request->links); 112925dcf954SAlex Elder rbd_assert(img_request->obj_request_count > 0); 113025dcf954SAlex Elder img_request->obj_request_count--; 113125dcf954SAlex Elder rbd_assert(obj_request->which == img_request->obj_request_count); 113225dcf954SAlex Elder obj_request->which = BAD_WHICH; 1133bf0d5f50SAlex Elder rbd_assert(obj_request->img_request == img_request); 1134bf0d5f50SAlex Elder obj_request->img_request = NULL; 113525dcf954SAlex Elder obj_request->callback = NULL; 1136bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1137bf0d5f50SAlex Elder } 1138bf0d5f50SAlex Elder 1139bf0d5f50SAlex Elder static bool obj_request_type_valid(enum obj_request_type type) 1140bf0d5f50SAlex Elder { 1141bf0d5f50SAlex Elder switch (type) { 11429969ebc5SAlex Elder case OBJ_REQUEST_NODATA: 1143bf0d5f50SAlex Elder case OBJ_REQUEST_BIO: 1144788e2df3SAlex Elder case OBJ_REQUEST_PAGES: 1145bf0d5f50SAlex Elder return true; 1146bf0d5f50SAlex Elder default: 1147bf0d5f50SAlex Elder return false; 1148bf0d5f50SAlex Elder } 1149bf0d5f50SAlex Elder } 1150bf0d5f50SAlex Elder 1151bf0d5f50SAlex Elder static int rbd_obj_request_submit(struct ceph_osd_client *osdc, 1152bf0d5f50SAlex Elder struct rbd_obj_request *obj_request) 1153bf0d5f50SAlex Elder { 115437206ee5SAlex Elder dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); 115537206ee5SAlex Elder 1156bf0d5f50SAlex Elder return ceph_osdc_start_request(osdc, obj_request->osd_req, false); 1157bf0d5f50SAlex Elder } 1158bf0d5f50SAlex Elder 1159bf0d5f50SAlex Elder static void rbd_img_request_complete(struct rbd_img_request *img_request) 1160bf0d5f50SAlex Elder { 116155f27e09SAlex Elder 116237206ee5SAlex Elder dout("%s: img %p\n", __func__, img_request); 116355f27e09SAlex Elder 116455f27e09SAlex Elder /* 116555f27e09SAlex Elder * If no error occurred, compute the aggregate transfer 116655f27e09SAlex Elder * count for the image request. We could instead use 116755f27e09SAlex Elder * atomic64_cmpxchg() to update it as each object request 116855f27e09SAlex Elder * completes; not clear which way is better off hand. 116955f27e09SAlex Elder */ 117055f27e09SAlex Elder if (!img_request->result) { 117155f27e09SAlex Elder struct rbd_obj_request *obj_request; 117255f27e09SAlex Elder u64 xferred = 0; 117355f27e09SAlex Elder 117455f27e09SAlex Elder for_each_obj_request(img_request, obj_request) 117555f27e09SAlex Elder xferred += obj_request->xferred; 117655f27e09SAlex Elder img_request->xferred = xferred; 117755f27e09SAlex Elder } 117855f27e09SAlex Elder 1179bf0d5f50SAlex Elder if (img_request->callback) 1180bf0d5f50SAlex Elder img_request->callback(img_request); 1181bf0d5f50SAlex Elder else 1182bf0d5f50SAlex Elder rbd_img_request_put(img_request); 1183bf0d5f50SAlex Elder } 1184bf0d5f50SAlex Elder 1185788e2df3SAlex Elder /* Caller is responsible for rbd_obj_request_destroy(obj_request) */ 1186788e2df3SAlex Elder 1187788e2df3SAlex Elder static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) 1188788e2df3SAlex Elder { 118937206ee5SAlex Elder dout("%s: obj %p\n", __func__, obj_request); 119037206ee5SAlex Elder 1191788e2df3SAlex Elder return wait_for_completion_interruptible(&obj_request->completion); 1192788e2df3SAlex Elder } 1193788e2df3SAlex Elder 119407741308SAlex Elder static void obj_request_done_init(struct rbd_obj_request *obj_request) 119507741308SAlex Elder { 119607741308SAlex Elder atomic_set(&obj_request->done, 0); 119707741308SAlex Elder smp_wmb(); 119807741308SAlex Elder } 119907741308SAlex Elder 120007741308SAlex Elder static void obj_request_done_set(struct rbd_obj_request *obj_request) 120107741308SAlex Elder { 1202632b88caSAlex Elder int done; 1203632b88caSAlex Elder 1204632b88caSAlex Elder done = atomic_inc_return(&obj_request->done); 1205632b88caSAlex Elder if (done > 1) { 1206632b88caSAlex Elder struct rbd_img_request *img_request = obj_request->img_request; 1207632b88caSAlex Elder struct rbd_device *rbd_dev; 1208632b88caSAlex Elder 1209632b88caSAlex Elder rbd_dev = img_request ? img_request->rbd_dev : NULL; 1210632b88caSAlex Elder rbd_warn(rbd_dev, "obj_request %p was already done\n", 1211632b88caSAlex Elder obj_request); 1212632b88caSAlex Elder } 121307741308SAlex Elder } 121407741308SAlex Elder 121507741308SAlex Elder static bool obj_request_done_test(struct rbd_obj_request *obj_request) 121607741308SAlex Elder { 1217632b88caSAlex Elder smp_mb(); 121807741308SAlex Elder return atomic_read(&obj_request->done) != 0; 121907741308SAlex Elder } 122007741308SAlex Elder 12210c425248SAlex Elder /* 12220c425248SAlex Elder * The default/initial value for all image request flags is 0. Each 12230c425248SAlex Elder * is conditionally set to 1 at image request initialization time 12240c425248SAlex Elder * and currently never change thereafter. 12250c425248SAlex Elder */ 12260c425248SAlex Elder static void img_request_write_set(struct rbd_img_request *img_request) 12270c425248SAlex Elder { 12280c425248SAlex Elder set_bit(IMG_REQ_WRITE, &img_request->flags); 12290c425248SAlex Elder smp_mb(); 12300c425248SAlex Elder } 12310c425248SAlex Elder 12320c425248SAlex Elder static bool img_request_write_test(struct rbd_img_request *img_request) 12330c425248SAlex Elder { 12340c425248SAlex Elder smp_mb(); 12350c425248SAlex Elder return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0; 12360c425248SAlex Elder } 12370c425248SAlex Elder 12389849e986SAlex Elder static void img_request_child_set(struct rbd_img_request *img_request) 12399849e986SAlex Elder { 12409849e986SAlex Elder set_bit(IMG_REQ_CHILD, &img_request->flags); 12419849e986SAlex Elder smp_mb(); 12429849e986SAlex Elder } 12439849e986SAlex Elder 12449849e986SAlex Elder static bool img_request_child_test(struct rbd_img_request *img_request) 12459849e986SAlex Elder { 12469849e986SAlex Elder smp_mb(); 12479849e986SAlex Elder return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0; 12489849e986SAlex Elder } 12499849e986SAlex Elder 12506e2a4505SAlex Elder static void 12516e2a4505SAlex Elder rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) 12526e2a4505SAlex Elder { 12536e2a4505SAlex Elder dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, 12546e2a4505SAlex Elder obj_request, obj_request->img_request, obj_request->result, 12556e2a4505SAlex Elder obj_request->xferred, obj_request->length); 12566e2a4505SAlex Elder /* 12576e2a4505SAlex Elder * ENOENT means a hole in the image. We zero-fill the 12586e2a4505SAlex Elder * entire length of the request. A short read also implies 12596e2a4505SAlex Elder * zero-fill to the end of the request. Either way we 12606e2a4505SAlex Elder * update the xferred count to indicate the whole request 12616e2a4505SAlex Elder * was satisfied. 12626e2a4505SAlex Elder */ 12636e2a4505SAlex Elder BUG_ON(obj_request->type != OBJ_REQUEST_BIO); 12646e2a4505SAlex Elder if (obj_request->result == -ENOENT) { 12656e2a4505SAlex Elder zero_bio_chain(obj_request->bio_list, 0); 12666e2a4505SAlex Elder obj_request->result = 0; 12676e2a4505SAlex Elder obj_request->xferred = obj_request->length; 12686e2a4505SAlex Elder } else if (obj_request->xferred < obj_request->length && 12696e2a4505SAlex Elder !obj_request->result) { 12706e2a4505SAlex Elder zero_bio_chain(obj_request->bio_list, obj_request->xferred); 12716e2a4505SAlex Elder obj_request->xferred = obj_request->length; 12726e2a4505SAlex Elder } 12736e2a4505SAlex Elder obj_request_done_set(obj_request); 12746e2a4505SAlex Elder } 12756e2a4505SAlex Elder 1276bf0d5f50SAlex Elder static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) 1277bf0d5f50SAlex Elder { 127837206ee5SAlex Elder dout("%s: obj %p cb %p\n", __func__, obj_request, 127937206ee5SAlex Elder obj_request->callback); 1280bf0d5f50SAlex Elder if (obj_request->callback) 1281bf0d5f50SAlex Elder obj_request->callback(obj_request); 1282788e2df3SAlex Elder else 1283788e2df3SAlex Elder complete_all(&obj_request->completion); 1284bf0d5f50SAlex Elder } 1285bf0d5f50SAlex Elder 1286c47f9371SAlex Elder static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) 128739bf2c5dSAlex Elder { 128839bf2c5dSAlex Elder dout("%s: obj %p\n", __func__, obj_request); 128939bf2c5dSAlex Elder obj_request_done_set(obj_request); 129039bf2c5dSAlex Elder } 129139bf2c5dSAlex Elder 1292c47f9371SAlex Elder static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) 1293bf0d5f50SAlex Elder { 129437206ee5SAlex Elder dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, 1295c47f9371SAlex Elder obj_request->result, obj_request->xferred, obj_request->length); 12966e2a4505SAlex Elder if (obj_request->img_request) 12976e2a4505SAlex Elder rbd_img_obj_request_read_callback(obj_request); 12986e2a4505SAlex Elder else 129907741308SAlex Elder obj_request_done_set(obj_request); 1300bf0d5f50SAlex Elder } 1301bf0d5f50SAlex Elder 1302c47f9371SAlex Elder static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) 1303bf0d5f50SAlex Elder { 13041b83bef2SSage Weil dout("%s: obj %p result %d %llu\n", __func__, obj_request, 13051b83bef2SSage Weil obj_request->result, obj_request->length); 13061b83bef2SSage Weil /* 13071b83bef2SSage Weil * There is no such thing as a successful short write. 13081b83bef2SSage Weil * Our xferred value is the number of bytes transferred 13091b83bef2SSage Weil * back. Set it to our originally-requested length. 13101b83bef2SSage Weil */ 13111b83bef2SSage Weil obj_request->xferred = obj_request->length; 131207741308SAlex Elder obj_request_done_set(obj_request); 1313bf0d5f50SAlex Elder } 1314bf0d5f50SAlex Elder 1315fbfab539SAlex Elder /* 1316fbfab539SAlex Elder * For a simple stat call there's nothing to do. We'll do more if 1317fbfab539SAlex Elder * this is part of a write sequence for a layered image. 1318fbfab539SAlex Elder */ 1319c47f9371SAlex Elder static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) 1320fbfab539SAlex Elder { 132137206ee5SAlex Elder dout("%s: obj %p\n", __func__, obj_request); 1322fbfab539SAlex Elder obj_request_done_set(obj_request); 1323fbfab539SAlex Elder } 1324fbfab539SAlex Elder 1325bf0d5f50SAlex Elder static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, 1326bf0d5f50SAlex Elder struct ceph_msg *msg) 1327bf0d5f50SAlex Elder { 1328bf0d5f50SAlex Elder struct rbd_obj_request *obj_request = osd_req->r_priv; 1329bf0d5f50SAlex Elder u16 opcode; 1330bf0d5f50SAlex Elder 133137206ee5SAlex Elder dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); 1332bf0d5f50SAlex Elder rbd_assert(osd_req == obj_request->osd_req); 1333bf0d5f50SAlex Elder rbd_assert(!!obj_request->img_request ^ 1334bf0d5f50SAlex Elder (obj_request->which == BAD_WHICH)); 1335bf0d5f50SAlex Elder 13361b83bef2SSage Weil if (osd_req->r_result < 0) 13371b83bef2SSage Weil obj_request->result = osd_req->r_result; 1338bf0d5f50SAlex Elder obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); 1339bf0d5f50SAlex Elder 13401b83bef2SSage Weil WARN_ON(osd_req->r_num_ops != 1); /* For now */ 1341bf0d5f50SAlex Elder 1342c47f9371SAlex Elder /* 1343c47f9371SAlex Elder * We support a 64-bit length, but ultimately it has to be 1344c47f9371SAlex Elder * passed to blk_end_request(), which takes an unsigned int. 1345c47f9371SAlex Elder */ 13461b83bef2SSage Weil obj_request->xferred = osd_req->r_reply_op_len[0]; 1347c47f9371SAlex Elder rbd_assert(obj_request->xferred < (u64) UINT_MAX); 134879528734SAlex Elder opcode = osd_req->r_ops[0].op; 1349bf0d5f50SAlex Elder switch (opcode) { 1350bf0d5f50SAlex Elder case CEPH_OSD_OP_READ: 1351c47f9371SAlex Elder rbd_osd_read_callback(obj_request); 1352bf0d5f50SAlex Elder break; 1353bf0d5f50SAlex Elder case CEPH_OSD_OP_WRITE: 1354c47f9371SAlex Elder rbd_osd_write_callback(obj_request); 1355bf0d5f50SAlex Elder break; 1356fbfab539SAlex Elder case CEPH_OSD_OP_STAT: 1357c47f9371SAlex Elder rbd_osd_stat_callback(obj_request); 1358fbfab539SAlex Elder break; 135936be9a76SAlex Elder case CEPH_OSD_OP_CALL: 1360b8d70035SAlex Elder case CEPH_OSD_OP_NOTIFY_ACK: 13619969ebc5SAlex Elder case CEPH_OSD_OP_WATCH: 1362c47f9371SAlex Elder rbd_osd_trivial_callback(obj_request); 13639969ebc5SAlex Elder break; 1364bf0d5f50SAlex Elder default: 1365bf0d5f50SAlex Elder rbd_warn(NULL, "%s: unsupported op %hu\n", 1366bf0d5f50SAlex Elder obj_request->object_name, (unsigned short) opcode); 1367bf0d5f50SAlex Elder break; 1368bf0d5f50SAlex Elder } 1369bf0d5f50SAlex Elder 137007741308SAlex Elder if (obj_request_done_test(obj_request)) 1371bf0d5f50SAlex Elder rbd_obj_request_complete(obj_request); 1372bf0d5f50SAlex Elder } 1373bf0d5f50SAlex Elder 13742fa12320SAlex Elder static void rbd_osd_req_format(struct rbd_obj_request *obj_request, 137579528734SAlex Elder bool write_request) 1376430c28c3SAlex Elder { 1377430c28c3SAlex Elder struct rbd_img_request *img_request = obj_request->img_request; 13788c042b0dSAlex Elder struct ceph_osd_request *osd_req = obj_request->osd_req; 1379430c28c3SAlex Elder struct ceph_snap_context *snapc = NULL; 1380430c28c3SAlex Elder u64 snap_id = CEPH_NOSNAP; 1381430c28c3SAlex Elder struct timespec *mtime = NULL; 1382430c28c3SAlex Elder struct timespec now; 1383430c28c3SAlex Elder 13848c042b0dSAlex Elder rbd_assert(osd_req != NULL); 1385430c28c3SAlex Elder 1386430c28c3SAlex Elder if (write_request) { 1387430c28c3SAlex Elder now = CURRENT_TIME; 1388430c28c3SAlex Elder mtime = &now; 1389430c28c3SAlex Elder if (img_request) 1390430c28c3SAlex Elder snapc = img_request->snapc; 13912fa12320SAlex Elder } else if (img_request) { 1392430c28c3SAlex Elder snap_id = img_request->snap_id; 1393430c28c3SAlex Elder } 13948c042b0dSAlex Elder ceph_osdc_build_request(osd_req, obj_request->offset, 139579528734SAlex Elder snapc, snap_id, mtime); 1396430c28c3SAlex Elder } 1397430c28c3SAlex Elder 1398bf0d5f50SAlex Elder static struct ceph_osd_request *rbd_osd_req_create( 1399bf0d5f50SAlex Elder struct rbd_device *rbd_dev, 1400bf0d5f50SAlex Elder bool write_request, 1401430c28c3SAlex Elder struct rbd_obj_request *obj_request) 1402bf0d5f50SAlex Elder { 1403bf0d5f50SAlex Elder struct rbd_img_request *img_request = obj_request->img_request; 1404bf0d5f50SAlex Elder struct ceph_snap_context *snapc = NULL; 1405bf0d5f50SAlex Elder struct ceph_osd_client *osdc; 1406bf0d5f50SAlex Elder struct ceph_osd_request *osd_req; 1407bf0d5f50SAlex Elder 1408bf0d5f50SAlex Elder if (img_request) { 14090c425248SAlex Elder rbd_assert(write_request == 14100c425248SAlex Elder img_request_write_test(img_request)); 14110c425248SAlex Elder if (write_request) 1412bf0d5f50SAlex Elder snapc = img_request->snapc; 1413bf0d5f50SAlex Elder } 1414bf0d5f50SAlex Elder 1415bf0d5f50SAlex Elder /* Allocate and initialize the request, for the single op */ 1416bf0d5f50SAlex Elder 1417bf0d5f50SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 1418bf0d5f50SAlex Elder osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); 1419bf0d5f50SAlex Elder if (!osd_req) 1420bf0d5f50SAlex Elder return NULL; /* ENOMEM */ 1421bf0d5f50SAlex Elder 1422430c28c3SAlex Elder if (write_request) 1423bf0d5f50SAlex Elder osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; 1424430c28c3SAlex Elder else 1425bf0d5f50SAlex Elder osd_req->r_flags = CEPH_OSD_FLAG_READ; 1426bf0d5f50SAlex Elder 1427bf0d5f50SAlex Elder osd_req->r_callback = rbd_osd_req_callback; 1428bf0d5f50SAlex Elder osd_req->r_priv = obj_request; 1429bf0d5f50SAlex Elder 1430bf0d5f50SAlex Elder osd_req->r_oid_len = strlen(obj_request->object_name); 1431bf0d5f50SAlex Elder rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); 1432bf0d5f50SAlex Elder memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); 1433bf0d5f50SAlex Elder 1434bf0d5f50SAlex Elder osd_req->r_file_layout = rbd_dev->layout; /* struct */ 1435bf0d5f50SAlex Elder 1436bf0d5f50SAlex Elder return osd_req; 1437bf0d5f50SAlex Elder } 1438bf0d5f50SAlex Elder 1439bf0d5f50SAlex Elder static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) 1440bf0d5f50SAlex Elder { 1441bf0d5f50SAlex Elder ceph_osdc_put_request(osd_req); 1442bf0d5f50SAlex Elder } 1443bf0d5f50SAlex Elder 1444bf0d5f50SAlex Elder /* object_name is assumed to be a non-null pointer and NUL-terminated */ 1445bf0d5f50SAlex Elder 1446bf0d5f50SAlex Elder static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, 1447bf0d5f50SAlex Elder u64 offset, u64 length, 1448bf0d5f50SAlex Elder enum obj_request_type type) 1449bf0d5f50SAlex Elder { 1450bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 1451bf0d5f50SAlex Elder size_t size; 1452bf0d5f50SAlex Elder char *name; 1453bf0d5f50SAlex Elder 1454bf0d5f50SAlex Elder rbd_assert(obj_request_type_valid(type)); 1455bf0d5f50SAlex Elder 1456bf0d5f50SAlex Elder size = strlen(object_name) + 1; 1457bf0d5f50SAlex Elder obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); 1458bf0d5f50SAlex Elder if (!obj_request) 1459bf0d5f50SAlex Elder return NULL; 1460bf0d5f50SAlex Elder 1461bf0d5f50SAlex Elder name = (char *)(obj_request + 1); 1462bf0d5f50SAlex Elder obj_request->object_name = memcpy(name, object_name, size); 1463bf0d5f50SAlex Elder obj_request->offset = offset; 1464bf0d5f50SAlex Elder obj_request->length = length; 1465bf0d5f50SAlex Elder obj_request->which = BAD_WHICH; 1466bf0d5f50SAlex Elder obj_request->type = type; 1467bf0d5f50SAlex Elder INIT_LIST_HEAD(&obj_request->links); 146807741308SAlex Elder obj_request_done_init(obj_request); 1469788e2df3SAlex Elder init_completion(&obj_request->completion); 1470bf0d5f50SAlex Elder kref_init(&obj_request->kref); 1471bf0d5f50SAlex Elder 147237206ee5SAlex Elder dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, 147337206ee5SAlex Elder offset, length, (int)type, obj_request); 147437206ee5SAlex Elder 1475bf0d5f50SAlex Elder return obj_request; 1476bf0d5f50SAlex Elder } 1477bf0d5f50SAlex Elder 1478bf0d5f50SAlex Elder static void rbd_obj_request_destroy(struct kref *kref) 1479bf0d5f50SAlex Elder { 1480bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 1481bf0d5f50SAlex Elder 1482bf0d5f50SAlex Elder obj_request = container_of(kref, struct rbd_obj_request, kref); 1483bf0d5f50SAlex Elder 148437206ee5SAlex Elder dout("%s: obj %p\n", __func__, obj_request); 148537206ee5SAlex Elder 1486bf0d5f50SAlex Elder rbd_assert(obj_request->img_request == NULL); 1487bf0d5f50SAlex Elder rbd_assert(obj_request->which == BAD_WHICH); 1488bf0d5f50SAlex Elder 1489bf0d5f50SAlex Elder if (obj_request->osd_req) 1490bf0d5f50SAlex Elder rbd_osd_req_destroy(obj_request->osd_req); 1491bf0d5f50SAlex Elder 1492bf0d5f50SAlex Elder rbd_assert(obj_request_type_valid(obj_request->type)); 1493bf0d5f50SAlex Elder switch (obj_request->type) { 14949969ebc5SAlex Elder case OBJ_REQUEST_NODATA: 14959969ebc5SAlex Elder break; /* Nothing to do */ 1496bf0d5f50SAlex Elder case OBJ_REQUEST_BIO: 1497bf0d5f50SAlex Elder if (obj_request->bio_list) 1498bf0d5f50SAlex Elder bio_chain_put(obj_request->bio_list); 1499bf0d5f50SAlex Elder break; 1500788e2df3SAlex Elder case OBJ_REQUEST_PAGES: 1501788e2df3SAlex Elder if (obj_request->pages) 1502788e2df3SAlex Elder ceph_release_page_vector(obj_request->pages, 1503788e2df3SAlex Elder obj_request->page_count); 1504788e2df3SAlex Elder break; 1505bf0d5f50SAlex Elder } 1506bf0d5f50SAlex Elder 1507bf0d5f50SAlex Elder kfree(obj_request); 1508bf0d5f50SAlex Elder } 1509bf0d5f50SAlex Elder 1510bf0d5f50SAlex Elder /* 1511bf0d5f50SAlex Elder * Caller is responsible for filling in the list of object requests 1512bf0d5f50SAlex Elder * that comprises the image request, and the Linux request pointer 1513bf0d5f50SAlex Elder * (if there is one). 1514bf0d5f50SAlex Elder */ 1515cc344fa1SAlex Elder static struct rbd_img_request *rbd_img_request_create( 1516cc344fa1SAlex Elder struct rbd_device *rbd_dev, 1517bf0d5f50SAlex Elder u64 offset, u64 length, 15189849e986SAlex Elder bool write_request, 15199849e986SAlex Elder bool child_request) 1520bf0d5f50SAlex Elder { 1521bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1522bf0d5f50SAlex Elder struct ceph_snap_context *snapc = NULL; 1523bf0d5f50SAlex Elder 1524bf0d5f50SAlex Elder img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); 1525bf0d5f50SAlex Elder if (!img_request) 1526bf0d5f50SAlex Elder return NULL; 1527bf0d5f50SAlex Elder 1528bf0d5f50SAlex Elder if (write_request) { 1529bf0d5f50SAlex Elder down_read(&rbd_dev->header_rwsem); 1530bf0d5f50SAlex Elder snapc = ceph_get_snap_context(rbd_dev->header.snapc); 1531bf0d5f50SAlex Elder up_read(&rbd_dev->header_rwsem); 1532bf0d5f50SAlex Elder if (WARN_ON(!snapc)) { 1533bf0d5f50SAlex Elder kfree(img_request); 1534bf0d5f50SAlex Elder return NULL; /* Shouldn't happen */ 1535bf0d5f50SAlex Elder } 15360c425248SAlex Elder 1537bf0d5f50SAlex Elder } 1538bf0d5f50SAlex Elder 1539bf0d5f50SAlex Elder img_request->rq = NULL; 1540bf0d5f50SAlex Elder img_request->rbd_dev = rbd_dev; 1541bf0d5f50SAlex Elder img_request->offset = offset; 1542bf0d5f50SAlex Elder img_request->length = length; 15430c425248SAlex Elder img_request->flags = 0; 15440c425248SAlex Elder if (write_request) { 15450c425248SAlex Elder img_request_write_set(img_request); 1546bf0d5f50SAlex Elder img_request->snapc = snapc; 15470c425248SAlex Elder } else { 1548bf0d5f50SAlex Elder img_request->snap_id = rbd_dev->spec->snap_id; 15490c425248SAlex Elder } 15509849e986SAlex Elder if (child_request) 15519849e986SAlex Elder img_request_child_set(img_request); 1552bf0d5f50SAlex Elder spin_lock_init(&img_request->completion_lock); 1553bf0d5f50SAlex Elder img_request->next_completion = 0; 1554bf0d5f50SAlex Elder img_request->callback = NULL; 1555a5a337d4SAlex Elder img_request->result = 0; 1556bf0d5f50SAlex Elder img_request->obj_request_count = 0; 1557bf0d5f50SAlex Elder INIT_LIST_HEAD(&img_request->obj_requests); 1558bf0d5f50SAlex Elder kref_init(&img_request->kref); 1559bf0d5f50SAlex Elder 1560bf0d5f50SAlex Elder rbd_img_request_get(img_request); /* Avoid a warning */ 1561bf0d5f50SAlex Elder rbd_img_request_put(img_request); /* TEMPORARY */ 1562bf0d5f50SAlex Elder 156337206ee5SAlex Elder dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, 156437206ee5SAlex Elder write_request ? "write" : "read", offset, length, 156537206ee5SAlex Elder img_request); 156637206ee5SAlex Elder 1567bf0d5f50SAlex Elder return img_request; 1568bf0d5f50SAlex Elder } 1569bf0d5f50SAlex Elder 1570bf0d5f50SAlex Elder static void rbd_img_request_destroy(struct kref *kref) 1571bf0d5f50SAlex Elder { 1572bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1573bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 1574bf0d5f50SAlex Elder struct rbd_obj_request *next_obj_request; 1575bf0d5f50SAlex Elder 1576bf0d5f50SAlex Elder img_request = container_of(kref, struct rbd_img_request, kref); 1577bf0d5f50SAlex Elder 157837206ee5SAlex Elder dout("%s: img %p\n", __func__, img_request); 157937206ee5SAlex Elder 1580bf0d5f50SAlex Elder for_each_obj_request_safe(img_request, obj_request, next_obj_request) 1581bf0d5f50SAlex Elder rbd_img_obj_request_del(img_request, obj_request); 158225dcf954SAlex Elder rbd_assert(img_request->obj_request_count == 0); 1583bf0d5f50SAlex Elder 15840c425248SAlex Elder if (img_request_write_test(img_request)) 1585bf0d5f50SAlex Elder ceph_put_snap_context(img_request->snapc); 1586bf0d5f50SAlex Elder 1587bf0d5f50SAlex Elder kfree(img_request); 1588bf0d5f50SAlex Elder } 1589bf0d5f50SAlex Elder 15902169238dSAlex Elder static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) 15912169238dSAlex Elder { 15922169238dSAlex Elder struct rbd_img_request *img_request; 15932169238dSAlex Elder u32 which = obj_request->which; 15942169238dSAlex Elder bool more = true; 15952169238dSAlex Elder 15962169238dSAlex Elder img_request = obj_request->img_request; 15972169238dSAlex Elder 15982169238dSAlex Elder dout("%s: img %p obj %p\n", __func__, img_request, obj_request); 15992169238dSAlex Elder rbd_assert(img_request != NULL); 16009849e986SAlex Elder rbd_assert(!img_request_child_test(img_request)) 16012169238dSAlex Elder rbd_assert(img_request->rq != NULL); 16029849e986SAlex Elder 16032169238dSAlex Elder rbd_assert(img_request->obj_request_count > 0); 16042169238dSAlex Elder rbd_assert(which != BAD_WHICH); 16052169238dSAlex Elder rbd_assert(which < img_request->obj_request_count); 16062169238dSAlex Elder rbd_assert(which >= img_request->next_completion); 16072169238dSAlex Elder 16082169238dSAlex Elder spin_lock_irq(&img_request->completion_lock); 16092169238dSAlex Elder if (which != img_request->next_completion) 16102169238dSAlex Elder goto out; 16112169238dSAlex Elder 16122169238dSAlex Elder for_each_obj_request_from(img_request, obj_request) { 16132169238dSAlex Elder unsigned int xferred; 16142169238dSAlex Elder int result; 16152169238dSAlex Elder 16162169238dSAlex Elder rbd_assert(more); 16172169238dSAlex Elder rbd_assert(which < img_request->obj_request_count); 16182169238dSAlex Elder 16192169238dSAlex Elder if (!obj_request_done_test(obj_request)) 16202169238dSAlex Elder break; 16212169238dSAlex Elder 16222169238dSAlex Elder rbd_assert(obj_request->xferred <= (u64)UINT_MAX); 16232169238dSAlex Elder xferred = (unsigned int)obj_request->xferred; 1624a5a337d4SAlex Elder result = obj_request->result; 1625a5a337d4SAlex Elder if (result) { 16267da22d29SAlex Elder struct rbd_device *rbd_dev = img_request->rbd_dev; 16277da22d29SAlex Elder 16287da22d29SAlex Elder rbd_warn(rbd_dev, "%s %llx at %llx (%llx)\n", 16290c425248SAlex Elder img_request_write_test(img_request) ? "write" 16300c425248SAlex Elder : "read", 16317da22d29SAlex Elder obj_request->length, obj_request->img_offset, 16327da22d29SAlex Elder obj_request->offset); 16337da22d29SAlex Elder rbd_warn(rbd_dev, " result %d xferred %x\n", 16342169238dSAlex Elder result, xferred); 1635a5a337d4SAlex Elder if (!img_request->result) 1636a5a337d4SAlex Elder img_request->result = result; 1637a5a337d4SAlex Elder } 16382169238dSAlex Elder 16392169238dSAlex Elder more = blk_end_request(img_request->rq, result, xferred); 16402169238dSAlex Elder which++; 16412169238dSAlex Elder } 16422169238dSAlex Elder 16432169238dSAlex Elder rbd_assert(more ^ (which == img_request->obj_request_count)); 16442169238dSAlex Elder img_request->next_completion = which; 16452169238dSAlex Elder out: 16462169238dSAlex Elder spin_unlock_irq(&img_request->completion_lock); 16472169238dSAlex Elder 16482169238dSAlex Elder if (!more) 16492169238dSAlex Elder rbd_img_request_complete(img_request); 16502169238dSAlex Elder } 16512169238dSAlex Elder 1652bf0d5f50SAlex Elder static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, 1653bf0d5f50SAlex Elder struct bio *bio_list) 1654bf0d5f50SAlex Elder { 1655bf0d5f50SAlex Elder struct rbd_device *rbd_dev = img_request->rbd_dev; 1656bf0d5f50SAlex Elder struct rbd_obj_request *obj_request = NULL; 1657bf0d5f50SAlex Elder struct rbd_obj_request *next_obj_request; 16580c425248SAlex Elder bool write_request = img_request_write_test(img_request); 1659bf0d5f50SAlex Elder unsigned int bio_offset; 16607da22d29SAlex Elder u64 img_offset; 1661bf0d5f50SAlex Elder u64 resid; 1662bf0d5f50SAlex Elder u16 opcode; 1663bf0d5f50SAlex Elder 166437206ee5SAlex Elder dout("%s: img %p bio %p\n", __func__, img_request, bio_list); 166537206ee5SAlex Elder 1666430c28c3SAlex Elder opcode = write_request ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ; 1667bf0d5f50SAlex Elder bio_offset = 0; 16687da22d29SAlex Elder img_offset = img_request->offset; 16697da22d29SAlex Elder rbd_assert(img_offset == bio_list->bi_sector << SECTOR_SHIFT); 1670bf0d5f50SAlex Elder resid = img_request->length; 16714dda41d3SAlex Elder rbd_assert(resid > 0); 1672bf0d5f50SAlex Elder while (resid) { 16732fa12320SAlex Elder struct ceph_osd_request *osd_req; 1674bf0d5f50SAlex Elder const char *object_name; 1675bf0d5f50SAlex Elder unsigned int clone_size; 1676bf0d5f50SAlex Elder u64 offset; 1677bf0d5f50SAlex Elder u64 length; 1678bf0d5f50SAlex Elder 16797da22d29SAlex Elder object_name = rbd_segment_name(rbd_dev, img_offset); 1680bf0d5f50SAlex Elder if (!object_name) 1681bf0d5f50SAlex Elder goto out_unwind; 16827da22d29SAlex Elder offset = rbd_segment_offset(rbd_dev, img_offset); 16837da22d29SAlex Elder length = rbd_segment_length(rbd_dev, img_offset, resid); 1684bf0d5f50SAlex Elder obj_request = rbd_obj_request_create(object_name, 1685bf0d5f50SAlex Elder offset, length, 1686bf0d5f50SAlex Elder OBJ_REQUEST_BIO); 1687bf0d5f50SAlex Elder kfree(object_name); /* object request has its own copy */ 1688bf0d5f50SAlex Elder if (!obj_request) 1689bf0d5f50SAlex Elder goto out_unwind; 1690bf0d5f50SAlex Elder 1691bf0d5f50SAlex Elder rbd_assert(length <= (u64) UINT_MAX); 1692bf0d5f50SAlex Elder clone_size = (unsigned int) length; 1693bf0d5f50SAlex Elder obj_request->bio_list = bio_chain_clone_range(&bio_list, 1694bf0d5f50SAlex Elder &bio_offset, clone_size, 1695bf0d5f50SAlex Elder GFP_ATOMIC); 1696bf0d5f50SAlex Elder if (!obj_request->bio_list) 1697bf0d5f50SAlex Elder goto out_partial; 1698bf0d5f50SAlex Elder 16992fa12320SAlex Elder osd_req = rbd_osd_req_create(rbd_dev, write_request, 17002fa12320SAlex Elder obj_request); 17012fa12320SAlex Elder if (!osd_req) 1702bf0d5f50SAlex Elder goto out_partial; 17032fa12320SAlex Elder obj_request->osd_req = osd_req; 17042169238dSAlex Elder obj_request->callback = rbd_img_obj_callback; 1705430c28c3SAlex Elder 17062fa12320SAlex Elder osd_req_op_extent_init(osd_req, 0, opcode, offset, length, 17072fa12320SAlex Elder 0, 0); 1708a4ce40a9SAlex Elder osd_req_op_extent_osd_data_bio(osd_req, 0, write_request, 1709a4ce40a9SAlex Elder obj_request->bio_list, obj_request->length); 17102fa12320SAlex Elder rbd_osd_req_format(obj_request, write_request); 1711430c28c3SAlex Elder 17127da22d29SAlex Elder obj_request->img_offset = img_offset; 1713bf0d5f50SAlex Elder rbd_img_obj_request_add(img_request, obj_request); 1714bf0d5f50SAlex Elder 17157da22d29SAlex Elder img_offset += length; 1716bf0d5f50SAlex Elder resid -= length; 1717bf0d5f50SAlex Elder } 1718bf0d5f50SAlex Elder 1719bf0d5f50SAlex Elder return 0; 1720bf0d5f50SAlex Elder 1721bf0d5f50SAlex Elder out_partial: 1722bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1723bf0d5f50SAlex Elder out_unwind: 1724bf0d5f50SAlex Elder for_each_obj_request_safe(img_request, obj_request, next_obj_request) 1725bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1726bf0d5f50SAlex Elder 1727bf0d5f50SAlex Elder return -ENOMEM; 1728bf0d5f50SAlex Elder } 1729bf0d5f50SAlex Elder 1730bf0d5f50SAlex Elder static int rbd_img_request_submit(struct rbd_img_request *img_request) 1731bf0d5f50SAlex Elder { 1732bf0d5f50SAlex Elder struct rbd_device *rbd_dev = img_request->rbd_dev; 1733bf0d5f50SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 1734bf0d5f50SAlex Elder struct rbd_obj_request *obj_request; 173546faeed4SAlex Elder struct rbd_obj_request *next_obj_request; 1736bf0d5f50SAlex Elder 173737206ee5SAlex Elder dout("%s: img %p\n", __func__, img_request); 173846faeed4SAlex Elder for_each_obj_request_safe(img_request, obj_request, next_obj_request) { 1739bf0d5f50SAlex Elder int ret; 1740bf0d5f50SAlex Elder 1741bf0d5f50SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 1742bf0d5f50SAlex Elder if (ret) 1743bf0d5f50SAlex Elder return ret; 1744bf0d5f50SAlex Elder /* 1745bf0d5f50SAlex Elder * The image request has its own reference to each 1746bf0d5f50SAlex Elder * of its object requests, so we can safely drop the 1747bf0d5f50SAlex Elder * initial one here. 1748bf0d5f50SAlex Elder */ 1749bf0d5f50SAlex Elder rbd_obj_request_put(obj_request); 1750bf0d5f50SAlex Elder } 1751bf0d5f50SAlex Elder 1752bf0d5f50SAlex Elder return 0; 1753bf0d5f50SAlex Elder } 1754bf0d5f50SAlex Elder 1755cf81b60eSAlex Elder static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, 1756b8d70035SAlex Elder u64 ver, u64 notify_id) 1757b8d70035SAlex Elder { 1758b8d70035SAlex Elder struct rbd_obj_request *obj_request; 17592169238dSAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 1760b8d70035SAlex Elder int ret; 1761b8d70035SAlex Elder 1762b8d70035SAlex Elder obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, 1763b8d70035SAlex Elder OBJ_REQUEST_NODATA); 1764b8d70035SAlex Elder if (!obj_request) 1765b8d70035SAlex Elder return -ENOMEM; 1766b8d70035SAlex Elder 1767b8d70035SAlex Elder ret = -ENOMEM; 1768430c28c3SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); 1769b8d70035SAlex Elder if (!obj_request->osd_req) 1770b8d70035SAlex Elder goto out; 17712169238dSAlex Elder obj_request->callback = rbd_obj_request_put; 1772b8d70035SAlex Elder 1773c99d2d4aSAlex Elder osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, 1774c99d2d4aSAlex Elder notify_id, ver, 0); 17752fa12320SAlex Elder rbd_osd_req_format(obj_request, false); 1776430c28c3SAlex Elder 1777b8d70035SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 1778b8d70035SAlex Elder out: 1779cf81b60eSAlex Elder if (ret) 1780b8d70035SAlex Elder rbd_obj_request_put(obj_request); 1781b8d70035SAlex Elder 1782b8d70035SAlex Elder return ret; 1783b8d70035SAlex Elder } 1784b8d70035SAlex Elder 1785b8d70035SAlex Elder static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 1786b8d70035SAlex Elder { 1787b8d70035SAlex Elder struct rbd_device *rbd_dev = (struct rbd_device *)data; 1788b8d70035SAlex Elder u64 hver; 1789b8d70035SAlex Elder int rc; 1790b8d70035SAlex Elder 1791b8d70035SAlex Elder if (!rbd_dev) 1792b8d70035SAlex Elder return; 1793b8d70035SAlex Elder 179437206ee5SAlex Elder dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, 1795b8d70035SAlex Elder rbd_dev->header_name, (unsigned long long) notify_id, 1796b8d70035SAlex Elder (unsigned int) opcode); 1797b8d70035SAlex Elder rc = rbd_dev_refresh(rbd_dev, &hver); 1798b8d70035SAlex Elder if (rc) 1799b8d70035SAlex Elder rbd_warn(rbd_dev, "got notification but failed to " 1800b8d70035SAlex Elder " update snaps: %d\n", rc); 1801b8d70035SAlex Elder 1802cf81b60eSAlex Elder rbd_obj_notify_ack(rbd_dev, hver, notify_id); 1803b8d70035SAlex Elder } 1804b8d70035SAlex Elder 18059969ebc5SAlex Elder /* 18069969ebc5SAlex Elder * Request sync osd watch/unwatch. The value of "start" determines 18079969ebc5SAlex Elder * whether a watch request is being initiated or torn down. 18089969ebc5SAlex Elder */ 18099969ebc5SAlex Elder static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) 18109969ebc5SAlex Elder { 18119969ebc5SAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 18129969ebc5SAlex Elder struct rbd_obj_request *obj_request; 18139969ebc5SAlex Elder int ret; 18149969ebc5SAlex Elder 18159969ebc5SAlex Elder rbd_assert(start ^ !!rbd_dev->watch_event); 18169969ebc5SAlex Elder rbd_assert(start ^ !!rbd_dev->watch_request); 18179969ebc5SAlex Elder 18189969ebc5SAlex Elder if (start) { 18193c663bbdSAlex Elder ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, 18209969ebc5SAlex Elder &rbd_dev->watch_event); 18219969ebc5SAlex Elder if (ret < 0) 18229969ebc5SAlex Elder return ret; 18238eb87565SAlex Elder rbd_assert(rbd_dev->watch_event != NULL); 18249969ebc5SAlex Elder } 18259969ebc5SAlex Elder 18269969ebc5SAlex Elder ret = -ENOMEM; 18279969ebc5SAlex Elder obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, 18289969ebc5SAlex Elder OBJ_REQUEST_NODATA); 18299969ebc5SAlex Elder if (!obj_request) 18309969ebc5SAlex Elder goto out_cancel; 18319969ebc5SAlex Elder 1832430c28c3SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, obj_request); 1833430c28c3SAlex Elder if (!obj_request->osd_req) 1834430c28c3SAlex Elder goto out_cancel; 1835430c28c3SAlex Elder 18368eb87565SAlex Elder if (start) 1837975241afSAlex Elder ceph_osdc_set_request_linger(osdc, obj_request->osd_req); 18388eb87565SAlex Elder else 18396977c3f9SAlex Elder ceph_osdc_unregister_linger_request(osdc, 1840975241afSAlex Elder rbd_dev->watch_request->osd_req); 18412169238dSAlex Elder 18422169238dSAlex Elder osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, 18432169238dSAlex Elder rbd_dev->watch_event->cookie, 18442169238dSAlex Elder rbd_dev->header.obj_version, start); 18452169238dSAlex Elder rbd_osd_req_format(obj_request, true); 18462169238dSAlex Elder 18479969ebc5SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 18489969ebc5SAlex Elder if (ret) 18499969ebc5SAlex Elder goto out_cancel; 18509969ebc5SAlex Elder ret = rbd_obj_request_wait(obj_request); 18519969ebc5SAlex Elder if (ret) 18529969ebc5SAlex Elder goto out_cancel; 18539969ebc5SAlex Elder ret = obj_request->result; 18549969ebc5SAlex Elder if (ret) 18559969ebc5SAlex Elder goto out_cancel; 18569969ebc5SAlex Elder 18578eb87565SAlex Elder /* 18588eb87565SAlex Elder * A watch request is set to linger, so the underlying osd 18598eb87565SAlex Elder * request won't go away until we unregister it. We retain 18608eb87565SAlex Elder * a pointer to the object request during that time (in 18618eb87565SAlex Elder * rbd_dev->watch_request), so we'll keep a reference to 18628eb87565SAlex Elder * it. We'll drop that reference (below) after we've 18638eb87565SAlex Elder * unregistered it. 18648eb87565SAlex Elder */ 18658eb87565SAlex Elder if (start) { 18668eb87565SAlex Elder rbd_dev->watch_request = obj_request; 18678eb87565SAlex Elder 18688eb87565SAlex Elder return 0; 18698eb87565SAlex Elder } 18708eb87565SAlex Elder 18718eb87565SAlex Elder /* We have successfully torn down the watch request */ 18728eb87565SAlex Elder 18738eb87565SAlex Elder rbd_obj_request_put(rbd_dev->watch_request); 18748eb87565SAlex Elder rbd_dev->watch_request = NULL; 18759969ebc5SAlex Elder out_cancel: 18769969ebc5SAlex Elder /* Cancel the event if we're tearing down, or on error */ 18779969ebc5SAlex Elder ceph_osdc_cancel_event(rbd_dev->watch_event); 18789969ebc5SAlex Elder rbd_dev->watch_event = NULL; 18799969ebc5SAlex Elder if (obj_request) 18809969ebc5SAlex Elder rbd_obj_request_put(obj_request); 18819969ebc5SAlex Elder 18829969ebc5SAlex Elder return ret; 18839969ebc5SAlex Elder } 18849969ebc5SAlex Elder 188536be9a76SAlex Elder /* 188636be9a76SAlex Elder * Synchronous osd object method call 188736be9a76SAlex Elder */ 188836be9a76SAlex Elder static int rbd_obj_method_sync(struct rbd_device *rbd_dev, 188936be9a76SAlex Elder const char *object_name, 189036be9a76SAlex Elder const char *class_name, 189136be9a76SAlex Elder const char *method_name, 189236be9a76SAlex Elder const char *outbound, 189336be9a76SAlex Elder size_t outbound_size, 189436be9a76SAlex Elder char *inbound, 189536be9a76SAlex Elder size_t inbound_size, 189636be9a76SAlex Elder u64 *version) 189736be9a76SAlex Elder { 18982169238dSAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 189936be9a76SAlex Elder struct rbd_obj_request *obj_request; 190036be9a76SAlex Elder struct page **pages; 190136be9a76SAlex Elder u32 page_count; 190236be9a76SAlex Elder int ret; 190336be9a76SAlex Elder 190436be9a76SAlex Elder /* 19056010a451SAlex Elder * Method calls are ultimately read operations. The result 19066010a451SAlex Elder * should placed into the inbound buffer provided. They 19076010a451SAlex Elder * also supply outbound data--parameters for the object 19086010a451SAlex Elder * method. Currently if this is present it will be a 19096010a451SAlex Elder * snapshot id. 191036be9a76SAlex Elder */ 191136be9a76SAlex Elder page_count = (u32) calc_pages_for(0, inbound_size); 191236be9a76SAlex Elder pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); 191336be9a76SAlex Elder if (IS_ERR(pages)) 191436be9a76SAlex Elder return PTR_ERR(pages); 191536be9a76SAlex Elder 191636be9a76SAlex Elder ret = -ENOMEM; 19176010a451SAlex Elder obj_request = rbd_obj_request_create(object_name, 0, inbound_size, 191836be9a76SAlex Elder OBJ_REQUEST_PAGES); 191936be9a76SAlex Elder if (!obj_request) 192036be9a76SAlex Elder goto out; 192136be9a76SAlex Elder 192236be9a76SAlex Elder obj_request->pages = pages; 192336be9a76SAlex Elder obj_request->page_count = page_count; 192436be9a76SAlex Elder 1925430c28c3SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); 192636be9a76SAlex Elder if (!obj_request->osd_req) 192736be9a76SAlex Elder goto out; 192836be9a76SAlex Elder 1929c99d2d4aSAlex Elder osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL, 193004017e29SAlex Elder class_name, method_name); 193104017e29SAlex Elder if (outbound_size) { 193204017e29SAlex Elder struct ceph_pagelist *pagelist; 193304017e29SAlex Elder 193404017e29SAlex Elder pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS); 193504017e29SAlex Elder if (!pagelist) 193604017e29SAlex Elder goto out; 193704017e29SAlex Elder 193804017e29SAlex Elder ceph_pagelist_init(pagelist); 193904017e29SAlex Elder ceph_pagelist_append(pagelist, outbound, outbound_size); 194004017e29SAlex Elder osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0, 194104017e29SAlex Elder pagelist); 194204017e29SAlex Elder } 1943a4ce40a9SAlex Elder osd_req_op_cls_response_data_pages(obj_request->osd_req, 0, 1944a4ce40a9SAlex Elder obj_request->pages, inbound_size, 194544cd188dSAlex Elder 0, false, false); 19462fa12320SAlex Elder rbd_osd_req_format(obj_request, false); 1947430c28c3SAlex Elder 194836be9a76SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 194936be9a76SAlex Elder if (ret) 195036be9a76SAlex Elder goto out; 195136be9a76SAlex Elder ret = rbd_obj_request_wait(obj_request); 195236be9a76SAlex Elder if (ret) 195336be9a76SAlex Elder goto out; 195436be9a76SAlex Elder 195536be9a76SAlex Elder ret = obj_request->result; 195636be9a76SAlex Elder if (ret < 0) 195736be9a76SAlex Elder goto out; 195823ed6e13SAlex Elder ret = 0; 1959903bb32eSAlex Elder ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); 196036be9a76SAlex Elder if (version) 196136be9a76SAlex Elder *version = obj_request->version; 196236be9a76SAlex Elder out: 196336be9a76SAlex Elder if (obj_request) 196436be9a76SAlex Elder rbd_obj_request_put(obj_request); 196536be9a76SAlex Elder else 196636be9a76SAlex Elder ceph_release_page_vector(pages, page_count); 196736be9a76SAlex Elder 196836be9a76SAlex Elder return ret; 196936be9a76SAlex Elder } 197036be9a76SAlex Elder 1971bf0d5f50SAlex Elder static void rbd_request_fn(struct request_queue *q) 1972cc344fa1SAlex Elder __releases(q->queue_lock) __acquires(q->queue_lock) 1973bf0d5f50SAlex Elder { 1974bf0d5f50SAlex Elder struct rbd_device *rbd_dev = q->queuedata; 1975bf0d5f50SAlex Elder bool read_only = rbd_dev->mapping.read_only; 1976bf0d5f50SAlex Elder struct request *rq; 1977bf0d5f50SAlex Elder int result; 1978bf0d5f50SAlex Elder 1979bf0d5f50SAlex Elder while ((rq = blk_fetch_request(q))) { 1980bf0d5f50SAlex Elder bool write_request = rq_data_dir(rq) == WRITE; 1981bf0d5f50SAlex Elder struct rbd_img_request *img_request; 1982bf0d5f50SAlex Elder u64 offset; 1983bf0d5f50SAlex Elder u64 length; 1984bf0d5f50SAlex Elder 1985bf0d5f50SAlex Elder /* Ignore any non-FS requests that filter through. */ 1986bf0d5f50SAlex Elder 1987bf0d5f50SAlex Elder if (rq->cmd_type != REQ_TYPE_FS) { 19884dda41d3SAlex Elder dout("%s: non-fs request type %d\n", __func__, 19894dda41d3SAlex Elder (int) rq->cmd_type); 19904dda41d3SAlex Elder __blk_end_request_all(rq, 0); 19914dda41d3SAlex Elder continue; 19924dda41d3SAlex Elder } 19934dda41d3SAlex Elder 19944dda41d3SAlex Elder /* Ignore/skip any zero-length requests */ 19954dda41d3SAlex Elder 19964dda41d3SAlex Elder offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; 19974dda41d3SAlex Elder length = (u64) blk_rq_bytes(rq); 19984dda41d3SAlex Elder 19994dda41d3SAlex Elder if (!length) { 20004dda41d3SAlex Elder dout("%s: zero-length request\n", __func__); 2001bf0d5f50SAlex Elder __blk_end_request_all(rq, 0); 2002bf0d5f50SAlex Elder continue; 2003bf0d5f50SAlex Elder } 2004bf0d5f50SAlex Elder 2005bf0d5f50SAlex Elder spin_unlock_irq(q->queue_lock); 2006bf0d5f50SAlex Elder 2007bf0d5f50SAlex Elder /* Disallow writes to a read-only device */ 2008bf0d5f50SAlex Elder 2009bf0d5f50SAlex Elder if (write_request) { 2010bf0d5f50SAlex Elder result = -EROFS; 2011bf0d5f50SAlex Elder if (read_only) 2012bf0d5f50SAlex Elder goto end_request; 2013bf0d5f50SAlex Elder rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); 2014bf0d5f50SAlex Elder } 2015bf0d5f50SAlex Elder 20166d292906SAlex Elder /* 20176d292906SAlex Elder * Quit early if the mapped snapshot no longer 20186d292906SAlex Elder * exists. It's still possible the snapshot will 20196d292906SAlex Elder * have disappeared by the time our request arrives 20206d292906SAlex Elder * at the osd, but there's no sense in sending it if 20216d292906SAlex Elder * we already know. 20226d292906SAlex Elder */ 20236d292906SAlex Elder if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { 2024bf0d5f50SAlex Elder dout("request for non-existent snapshot"); 2025bf0d5f50SAlex Elder rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); 2026bf0d5f50SAlex Elder result = -ENXIO; 2027bf0d5f50SAlex Elder goto end_request; 2028bf0d5f50SAlex Elder } 2029bf0d5f50SAlex Elder 2030bf0d5f50SAlex Elder result = -EINVAL; 2031bf0d5f50SAlex Elder if (WARN_ON(offset && length > U64_MAX - offset + 1)) 2032bf0d5f50SAlex Elder goto end_request; /* Shouldn't happen */ 2033bf0d5f50SAlex Elder 2034bf0d5f50SAlex Elder result = -ENOMEM; 2035bf0d5f50SAlex Elder img_request = rbd_img_request_create(rbd_dev, offset, length, 20369849e986SAlex Elder write_request, false); 2037bf0d5f50SAlex Elder if (!img_request) 2038bf0d5f50SAlex Elder goto end_request; 2039bf0d5f50SAlex Elder 2040bf0d5f50SAlex Elder img_request->rq = rq; 2041bf0d5f50SAlex Elder 2042bf0d5f50SAlex Elder result = rbd_img_request_fill_bio(img_request, rq->bio); 2043bf0d5f50SAlex Elder if (!result) 2044bf0d5f50SAlex Elder result = rbd_img_request_submit(img_request); 2045bf0d5f50SAlex Elder if (result) 2046bf0d5f50SAlex Elder rbd_img_request_put(img_request); 2047bf0d5f50SAlex Elder end_request: 2048bf0d5f50SAlex Elder spin_lock_irq(q->queue_lock); 2049bf0d5f50SAlex Elder if (result < 0) { 20507da22d29SAlex Elder rbd_warn(rbd_dev, "%s %llx at %llx result %d\n", 20517da22d29SAlex Elder write_request ? "write" : "read", 20527da22d29SAlex Elder length, offset, result); 20537da22d29SAlex Elder 2054bf0d5f50SAlex Elder __blk_end_request_all(rq, result); 2055bf0d5f50SAlex Elder } 2056bf0d5f50SAlex Elder } 2057bf0d5f50SAlex Elder } 2058bf0d5f50SAlex Elder 2059602adf40SYehuda Sadeh /* 2060602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 2061602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 2062f7760dadSAlex Elder * which we handle later at bio_chain_clone_range() 2063602adf40SYehuda Sadeh */ 2064602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 2065602adf40SYehuda Sadeh struct bio_vec *bvec) 2066602adf40SYehuda Sadeh { 2067602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 2068e5cfeed2SAlex Elder sector_t sector_offset; 2069e5cfeed2SAlex Elder sector_t sectors_per_obj; 2070e5cfeed2SAlex Elder sector_t obj_sector_offset; 2071e5cfeed2SAlex Elder int ret; 2072602adf40SYehuda Sadeh 2073e5cfeed2SAlex Elder /* 2074e5cfeed2SAlex Elder * Find how far into its rbd object the partition-relative 2075e5cfeed2SAlex Elder * bio start sector is to offset relative to the enclosing 2076e5cfeed2SAlex Elder * device. 2077e5cfeed2SAlex Elder */ 2078e5cfeed2SAlex Elder sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector; 2079e5cfeed2SAlex Elder sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); 2080e5cfeed2SAlex Elder obj_sector_offset = sector_offset & (sectors_per_obj - 1); 2081593a9e7bSAlex Elder 2082e5cfeed2SAlex Elder /* 2083e5cfeed2SAlex Elder * Compute the number of bytes from that offset to the end 2084e5cfeed2SAlex Elder * of the object. Account for what's already used by the bio. 2085e5cfeed2SAlex Elder */ 2086e5cfeed2SAlex Elder ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT; 2087e5cfeed2SAlex Elder if (ret > bmd->bi_size) 2088e5cfeed2SAlex Elder ret -= bmd->bi_size; 2089e5cfeed2SAlex Elder else 2090e5cfeed2SAlex Elder ret = 0; 2091e5cfeed2SAlex Elder 2092e5cfeed2SAlex Elder /* 2093e5cfeed2SAlex Elder * Don't send back more than was asked for. And if the bio 2094e5cfeed2SAlex Elder * was empty, let the whole thing through because: "Note 2095e5cfeed2SAlex Elder * that a block device *must* allow a single page to be 2096e5cfeed2SAlex Elder * added to an empty bio." 2097e5cfeed2SAlex Elder */ 2098e5cfeed2SAlex Elder rbd_assert(bvec->bv_len <= PAGE_SIZE); 2099e5cfeed2SAlex Elder if (ret > (int) bvec->bv_len || !bmd->bi_size) 2100e5cfeed2SAlex Elder ret = (int) bvec->bv_len; 2101e5cfeed2SAlex Elder 2102e5cfeed2SAlex Elder return ret; 2103602adf40SYehuda Sadeh } 2104602adf40SYehuda Sadeh 2105602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 2106602adf40SYehuda Sadeh { 2107602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 2108602adf40SYehuda Sadeh 2109602adf40SYehuda Sadeh if (!disk) 2110602adf40SYehuda Sadeh return; 2111602adf40SYehuda Sadeh 2112602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 2113602adf40SYehuda Sadeh del_gendisk(disk); 2114602adf40SYehuda Sadeh if (disk->queue) 2115602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 2116602adf40SYehuda Sadeh put_disk(disk); 2117602adf40SYehuda Sadeh } 2118602adf40SYehuda Sadeh 2119788e2df3SAlex Elder static int rbd_obj_read_sync(struct rbd_device *rbd_dev, 2120788e2df3SAlex Elder const char *object_name, 2121788e2df3SAlex Elder u64 offset, u64 length, 2122788e2df3SAlex Elder char *buf, u64 *version) 2123788e2df3SAlex Elder 2124788e2df3SAlex Elder { 21252169238dSAlex Elder struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 2126788e2df3SAlex Elder struct rbd_obj_request *obj_request; 2127788e2df3SAlex Elder struct page **pages = NULL; 2128788e2df3SAlex Elder u32 page_count; 21291ceae7efSAlex Elder size_t size; 2130788e2df3SAlex Elder int ret; 2131788e2df3SAlex Elder 2132788e2df3SAlex Elder page_count = (u32) calc_pages_for(offset, length); 2133788e2df3SAlex Elder pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); 2134788e2df3SAlex Elder if (IS_ERR(pages)) 2135788e2df3SAlex Elder ret = PTR_ERR(pages); 2136788e2df3SAlex Elder 2137788e2df3SAlex Elder ret = -ENOMEM; 2138788e2df3SAlex Elder obj_request = rbd_obj_request_create(object_name, offset, length, 2139788e2df3SAlex Elder OBJ_REQUEST_PAGES); 2140788e2df3SAlex Elder if (!obj_request) 2141788e2df3SAlex Elder goto out; 2142788e2df3SAlex Elder 2143788e2df3SAlex Elder obj_request->pages = pages; 2144788e2df3SAlex Elder obj_request->page_count = page_count; 2145788e2df3SAlex Elder 2146430c28c3SAlex Elder obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); 2147788e2df3SAlex Elder if (!obj_request->osd_req) 2148788e2df3SAlex Elder goto out; 2149788e2df3SAlex Elder 2150c99d2d4aSAlex Elder osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ, 2151c99d2d4aSAlex Elder offset, length, 0, 0); 2152a4ce40a9SAlex Elder osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0, false, 2153a4ce40a9SAlex Elder obj_request->pages, 215444cd188dSAlex Elder obj_request->length, 215544cd188dSAlex Elder obj_request->offset & ~PAGE_MASK, 215644cd188dSAlex Elder false, false); 21572fa12320SAlex Elder rbd_osd_req_format(obj_request, false); 2158430c28c3SAlex Elder 2159788e2df3SAlex Elder ret = rbd_obj_request_submit(osdc, obj_request); 2160788e2df3SAlex Elder if (ret) 2161788e2df3SAlex Elder goto out; 2162788e2df3SAlex Elder ret = rbd_obj_request_wait(obj_request); 2163788e2df3SAlex Elder if (ret) 2164788e2df3SAlex Elder goto out; 2165788e2df3SAlex Elder 2166788e2df3SAlex Elder ret = obj_request->result; 2167788e2df3SAlex Elder if (ret < 0) 2168788e2df3SAlex Elder goto out; 21691ceae7efSAlex Elder 21701ceae7efSAlex Elder rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); 21711ceae7efSAlex Elder size = (size_t) obj_request->xferred; 2172903bb32eSAlex Elder ceph_copy_from_page_vector(pages, buf, 0, size); 217323ed6e13SAlex Elder rbd_assert(size <= (size_t) INT_MAX); 217423ed6e13SAlex Elder ret = (int) size; 2175788e2df3SAlex Elder if (version) 2176788e2df3SAlex Elder *version = obj_request->version; 2177788e2df3SAlex Elder out: 2178788e2df3SAlex Elder if (obj_request) 2179788e2df3SAlex Elder rbd_obj_request_put(obj_request); 2180788e2df3SAlex Elder else 2181788e2df3SAlex Elder ceph_release_page_vector(pages, page_count); 2182788e2df3SAlex Elder 2183788e2df3SAlex Elder return ret; 2184788e2df3SAlex Elder } 2185788e2df3SAlex Elder 2186602adf40SYehuda Sadeh /* 21874156d998SAlex Elder * Read the complete header for the given rbd device. 21884156d998SAlex Elder * 21894156d998SAlex Elder * Returns a pointer to a dynamically-allocated buffer containing 21904156d998SAlex Elder * the complete and validated header. Caller can pass the address 21914156d998SAlex Elder * of a variable that will be filled in with the version of the 21924156d998SAlex Elder * header object at the time it was read. 21934156d998SAlex Elder * 21944156d998SAlex Elder * Returns a pointer-coded errno if a failure occurs. 21954156d998SAlex Elder */ 21964156d998SAlex Elder static struct rbd_image_header_ondisk * 21974156d998SAlex Elder rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) 21984156d998SAlex Elder { 21994156d998SAlex Elder struct rbd_image_header_ondisk *ondisk = NULL; 22004156d998SAlex Elder u32 snap_count = 0; 22014156d998SAlex Elder u64 names_size = 0; 22024156d998SAlex Elder u32 want_count; 22034156d998SAlex Elder int ret; 22044156d998SAlex Elder 22054156d998SAlex Elder /* 22064156d998SAlex Elder * The complete header will include an array of its 64-bit 22074156d998SAlex Elder * snapshot ids, followed by the names of those snapshots as 22084156d998SAlex Elder * a contiguous block of NUL-terminated strings. Note that 22094156d998SAlex Elder * the number of snapshots could change by the time we read 22104156d998SAlex Elder * it in, in which case we re-read it. 22114156d998SAlex Elder */ 22124156d998SAlex Elder do { 22134156d998SAlex Elder size_t size; 22144156d998SAlex Elder 22154156d998SAlex Elder kfree(ondisk); 22164156d998SAlex Elder 22174156d998SAlex Elder size = sizeof (*ondisk); 22184156d998SAlex Elder size += snap_count * sizeof (struct rbd_image_snap_ondisk); 22194156d998SAlex Elder size += names_size; 22204156d998SAlex Elder ondisk = kmalloc(size, GFP_KERNEL); 22214156d998SAlex Elder if (!ondisk) 22224156d998SAlex Elder return ERR_PTR(-ENOMEM); 22234156d998SAlex Elder 2224788e2df3SAlex Elder ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 22254156d998SAlex Elder 0, size, 22264156d998SAlex Elder (char *) ondisk, version); 22274156d998SAlex Elder if (ret < 0) 22284156d998SAlex Elder goto out_err; 22294156d998SAlex Elder if (WARN_ON((size_t) ret < size)) { 22304156d998SAlex Elder ret = -ENXIO; 223106ecc6cbSAlex Elder rbd_warn(rbd_dev, "short header read (want %zd got %d)", 223206ecc6cbSAlex Elder size, ret); 22334156d998SAlex Elder goto out_err; 22344156d998SAlex Elder } 22354156d998SAlex Elder if (!rbd_dev_ondisk_valid(ondisk)) { 22364156d998SAlex Elder ret = -ENXIO; 223706ecc6cbSAlex Elder rbd_warn(rbd_dev, "invalid header"); 22384156d998SAlex Elder goto out_err; 22394156d998SAlex Elder } 22404156d998SAlex Elder 22414156d998SAlex Elder names_size = le64_to_cpu(ondisk->snap_names_len); 22424156d998SAlex Elder want_count = snap_count; 22434156d998SAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 22444156d998SAlex Elder } while (snap_count != want_count); 22454156d998SAlex Elder 22464156d998SAlex Elder return ondisk; 22474156d998SAlex Elder 22484156d998SAlex Elder out_err: 22494156d998SAlex Elder kfree(ondisk); 22504156d998SAlex Elder 22514156d998SAlex Elder return ERR_PTR(ret); 22524156d998SAlex Elder } 22534156d998SAlex Elder 22544156d998SAlex Elder /* 2255602adf40SYehuda Sadeh * reload the ondisk the header 2256602adf40SYehuda Sadeh */ 2257602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 2258602adf40SYehuda Sadeh struct rbd_image_header *header) 2259602adf40SYehuda Sadeh { 22604156d998SAlex Elder struct rbd_image_header_ondisk *ondisk; 22614156d998SAlex Elder u64 ver = 0; 22624156d998SAlex Elder int ret; 2263602adf40SYehuda Sadeh 22644156d998SAlex Elder ondisk = rbd_dev_v1_header_read(rbd_dev, &ver); 22654156d998SAlex Elder if (IS_ERR(ondisk)) 22664156d998SAlex Elder return PTR_ERR(ondisk); 22674156d998SAlex Elder ret = rbd_header_from_disk(header, ondisk); 22684156d998SAlex Elder if (ret >= 0) 226959c2be1eSYehuda Sadeh header->obj_version = ver; 22704156d998SAlex Elder kfree(ondisk); 2271602adf40SYehuda Sadeh 22724156d998SAlex Elder return ret; 2273602adf40SYehuda Sadeh } 2274602adf40SYehuda Sadeh 227541f38c2bSAlex Elder static void rbd_remove_all_snaps(struct rbd_device *rbd_dev) 2276dfc5606dSYehuda Sadeh { 2277dfc5606dSYehuda Sadeh struct rbd_snap *snap; 2278a0593290SAlex Elder struct rbd_snap *next; 2279dfc5606dSYehuda Sadeh 2280a0593290SAlex Elder list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) 228141f38c2bSAlex Elder rbd_remove_snap_dev(snap); 2282dfc5606dSYehuda Sadeh } 2283dfc5606dSYehuda Sadeh 22849478554aSAlex Elder static void rbd_update_mapping_size(struct rbd_device *rbd_dev) 22859478554aSAlex Elder { 22869478554aSAlex Elder sector_t size; 22879478554aSAlex Elder 22880d7dbfceSAlex Elder if (rbd_dev->spec->snap_id != CEPH_NOSNAP) 22899478554aSAlex Elder return; 22909478554aSAlex Elder 22919478554aSAlex Elder size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; 22929478554aSAlex Elder dout("setting size to %llu sectors", (unsigned long long) size); 22939478554aSAlex Elder rbd_dev->mapping.size = (u64) size; 22949478554aSAlex Elder set_capacity(rbd_dev->disk, size); 22959478554aSAlex Elder } 22969478554aSAlex Elder 2297602adf40SYehuda Sadeh /* 2298602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 2299602adf40SYehuda Sadeh */ 2300117973fbSAlex Elder static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) 2301602adf40SYehuda Sadeh { 2302602adf40SYehuda Sadeh int ret; 2303602adf40SYehuda Sadeh struct rbd_image_header h; 2304602adf40SYehuda Sadeh 2305602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 2306602adf40SYehuda Sadeh if (ret < 0) 2307602adf40SYehuda Sadeh return ret; 2308602adf40SYehuda Sadeh 2309a51aa0c0SJosh Durgin down_write(&rbd_dev->header_rwsem); 2310a51aa0c0SJosh Durgin 23119478554aSAlex Elder /* Update image size, and check for resize of mapped image */ 23129478554aSAlex Elder rbd_dev->header.image_size = h.image_size; 23139478554aSAlex Elder rbd_update_mapping_size(rbd_dev); 23149db4b3e3SSage Weil 2315849b4260SAlex Elder /* rbd_dev->header.object_prefix shouldn't change */ 2316602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 2317849b4260SAlex Elder kfree(rbd_dev->header.snap_names); 2318d1d25646SJosh Durgin /* osd requests may still refer to snapc */ 2319d1d25646SJosh Durgin ceph_put_snap_context(rbd_dev->header.snapc); 2320602adf40SYehuda Sadeh 2321b813623aSAlex Elder if (hver) 2322b813623aSAlex Elder *hver = h.obj_version; 2323a71b891bSJosh Durgin rbd_dev->header.obj_version = h.obj_version; 232493a24e08SJosh Durgin rbd_dev->header.image_size = h.image_size; 2325602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 2326602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 2327602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 2328849b4260SAlex Elder /* Free the extra copy of the object prefix */ 2329849b4260SAlex Elder WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); 2330849b4260SAlex Elder kfree(h.object_prefix); 2331849b4260SAlex Elder 2332304f6808SAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 2333304f6808SAlex Elder if (!ret) 2334304f6808SAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 2335dfc5606dSYehuda Sadeh 2336c666601aSJosh Durgin up_write(&rbd_dev->header_rwsem); 2337602adf40SYehuda Sadeh 2338dfc5606dSYehuda Sadeh return ret; 2339602adf40SYehuda Sadeh } 2340602adf40SYehuda Sadeh 2341117973fbSAlex Elder static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) 23421fe5e993SAlex Elder { 23431fe5e993SAlex Elder int ret; 23441fe5e993SAlex Elder 2345117973fbSAlex Elder rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 23461fe5e993SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2347117973fbSAlex Elder if (rbd_dev->image_format == 1) 2348117973fbSAlex Elder ret = rbd_dev_v1_refresh(rbd_dev, hver); 2349117973fbSAlex Elder else 2350117973fbSAlex Elder ret = rbd_dev_v2_refresh(rbd_dev, hver); 23511fe5e993SAlex Elder mutex_unlock(&ctl_mutex); 23521fe5e993SAlex Elder 23531fe5e993SAlex Elder return ret; 23541fe5e993SAlex Elder } 23551fe5e993SAlex Elder 2356602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 2357602adf40SYehuda Sadeh { 2358602adf40SYehuda Sadeh struct gendisk *disk; 2359602adf40SYehuda Sadeh struct request_queue *q; 2360593a9e7bSAlex Elder u64 segment_size; 2361602adf40SYehuda Sadeh 2362602adf40SYehuda Sadeh /* create gendisk info */ 2363602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 2364602adf40SYehuda Sadeh if (!disk) 23651fcdb8aaSAlex Elder return -ENOMEM; 2366602adf40SYehuda Sadeh 2367f0f8cef5SAlex Elder snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 2368de71a297SAlex Elder rbd_dev->dev_id); 2369602adf40SYehuda Sadeh disk->major = rbd_dev->major; 2370602adf40SYehuda Sadeh disk->first_minor = 0; 2371602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 2372602adf40SYehuda Sadeh disk->private_data = rbd_dev; 2373602adf40SYehuda Sadeh 2374bf0d5f50SAlex Elder q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); 2375602adf40SYehuda Sadeh if (!q) 2376602adf40SYehuda Sadeh goto out_disk; 2377029bcbd8SJosh Durgin 2378593a9e7bSAlex Elder /* We use the default size, but let's be explicit about it. */ 2379593a9e7bSAlex Elder blk_queue_physical_block_size(q, SECTOR_SIZE); 2380593a9e7bSAlex Elder 2381029bcbd8SJosh Durgin /* set io sizes to object size */ 2382593a9e7bSAlex Elder segment_size = rbd_obj_bytes(&rbd_dev->header); 2383593a9e7bSAlex Elder blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 2384593a9e7bSAlex Elder blk_queue_max_segment_size(q, segment_size); 2385593a9e7bSAlex Elder blk_queue_io_min(q, segment_size); 2386593a9e7bSAlex Elder blk_queue_io_opt(q, segment_size); 2387029bcbd8SJosh Durgin 2388602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 2389602adf40SYehuda Sadeh disk->queue = q; 2390602adf40SYehuda Sadeh 2391602adf40SYehuda Sadeh q->queuedata = rbd_dev; 2392602adf40SYehuda Sadeh 2393602adf40SYehuda Sadeh rbd_dev->disk = disk; 2394602adf40SYehuda Sadeh 239512f02944SAlex Elder set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); 239612f02944SAlex Elder 2397602adf40SYehuda Sadeh return 0; 2398602adf40SYehuda Sadeh out_disk: 2399602adf40SYehuda Sadeh put_disk(disk); 24001fcdb8aaSAlex Elder 24011fcdb8aaSAlex Elder return -ENOMEM; 2402602adf40SYehuda Sadeh } 2403602adf40SYehuda Sadeh 2404dfc5606dSYehuda Sadeh /* 2405dfc5606dSYehuda Sadeh sysfs 2406dfc5606dSYehuda Sadeh */ 2407602adf40SYehuda Sadeh 2408593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev) 2409593a9e7bSAlex Elder { 2410593a9e7bSAlex Elder return container_of(dev, struct rbd_device, dev); 2411593a9e7bSAlex Elder } 2412593a9e7bSAlex Elder 2413dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 2414dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2415602adf40SYehuda Sadeh { 2416593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2417a51aa0c0SJosh Durgin sector_t size; 2418dfc5606dSYehuda Sadeh 2419a51aa0c0SJosh Durgin down_read(&rbd_dev->header_rwsem); 2420a51aa0c0SJosh Durgin size = get_capacity(rbd_dev->disk); 2421a51aa0c0SJosh Durgin up_read(&rbd_dev->header_rwsem); 2422a51aa0c0SJosh Durgin 2423a51aa0c0SJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); 2424602adf40SYehuda Sadeh } 2425602adf40SYehuda Sadeh 242634b13184SAlex Elder /* 242734b13184SAlex Elder * Note this shows the features for whatever's mapped, which is not 242834b13184SAlex Elder * necessarily the base image. 242934b13184SAlex Elder */ 243034b13184SAlex Elder static ssize_t rbd_features_show(struct device *dev, 243134b13184SAlex Elder struct device_attribute *attr, char *buf) 243234b13184SAlex Elder { 243334b13184SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 243434b13184SAlex Elder 243534b13184SAlex Elder return sprintf(buf, "0x%016llx\n", 243634b13184SAlex Elder (unsigned long long) rbd_dev->mapping.features); 243734b13184SAlex Elder } 243834b13184SAlex Elder 2439dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 2440dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2441602adf40SYehuda Sadeh { 2442593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2443dfc5606dSYehuda Sadeh 2444dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 2445dfc5606dSYehuda Sadeh } 2446dfc5606dSYehuda Sadeh 2447dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 2448dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2449dfc5606dSYehuda Sadeh { 2450593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2451dfc5606dSYehuda Sadeh 24521dbb4399SAlex Elder return sprintf(buf, "client%lld\n", 24531dbb4399SAlex Elder ceph_client_id(rbd_dev->rbd_client->client)); 2454dfc5606dSYehuda Sadeh } 2455dfc5606dSYehuda Sadeh 2456dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 2457dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2458dfc5606dSYehuda Sadeh { 2459593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2460dfc5606dSYehuda Sadeh 24610d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); 2462dfc5606dSYehuda Sadeh } 2463dfc5606dSYehuda Sadeh 24649bb2f334SAlex Elder static ssize_t rbd_pool_id_show(struct device *dev, 24659bb2f334SAlex Elder struct device_attribute *attr, char *buf) 24669bb2f334SAlex Elder { 24679bb2f334SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 24689bb2f334SAlex Elder 24690d7dbfceSAlex Elder return sprintf(buf, "%llu\n", 24700d7dbfceSAlex Elder (unsigned long long) rbd_dev->spec->pool_id); 24719bb2f334SAlex Elder } 24729bb2f334SAlex Elder 2473dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 2474dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 2475dfc5606dSYehuda Sadeh { 2476593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2477dfc5606dSYehuda Sadeh 2478a92ffdf8SAlex Elder if (rbd_dev->spec->image_name) 24790d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->image_name); 2480a92ffdf8SAlex Elder 2481a92ffdf8SAlex Elder return sprintf(buf, "(unknown)\n"); 2482dfc5606dSYehuda Sadeh } 2483dfc5606dSYehuda Sadeh 2484589d30e0SAlex Elder static ssize_t rbd_image_id_show(struct device *dev, 2485589d30e0SAlex Elder struct device_attribute *attr, char *buf) 2486589d30e0SAlex Elder { 2487589d30e0SAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2488589d30e0SAlex Elder 24890d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->image_id); 2490589d30e0SAlex Elder } 2491589d30e0SAlex Elder 249234b13184SAlex Elder /* 249334b13184SAlex Elder * Shows the name of the currently-mapped snapshot (or 249434b13184SAlex Elder * RBD_SNAP_HEAD_NAME for the base image). 249534b13184SAlex Elder */ 2496dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 2497dfc5606dSYehuda Sadeh struct device_attribute *attr, 2498dfc5606dSYehuda Sadeh char *buf) 2499dfc5606dSYehuda Sadeh { 2500593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2501dfc5606dSYehuda Sadeh 25020d7dbfceSAlex Elder return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); 2503dfc5606dSYehuda Sadeh } 2504dfc5606dSYehuda Sadeh 250586b00e0dSAlex Elder /* 250686b00e0dSAlex Elder * For an rbd v2 image, shows the pool id, image id, and snapshot id 250786b00e0dSAlex Elder * for the parent image. If there is no parent, simply shows 250886b00e0dSAlex Elder * "(no parent image)". 250986b00e0dSAlex Elder */ 251086b00e0dSAlex Elder static ssize_t rbd_parent_show(struct device *dev, 251186b00e0dSAlex Elder struct device_attribute *attr, 251286b00e0dSAlex Elder char *buf) 251386b00e0dSAlex Elder { 251486b00e0dSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 251586b00e0dSAlex Elder struct rbd_spec *spec = rbd_dev->parent_spec; 251686b00e0dSAlex Elder int count; 251786b00e0dSAlex Elder char *bufp = buf; 251886b00e0dSAlex Elder 251986b00e0dSAlex Elder if (!spec) 252086b00e0dSAlex Elder return sprintf(buf, "(no parent image)\n"); 252186b00e0dSAlex Elder 252286b00e0dSAlex Elder count = sprintf(bufp, "pool_id %llu\npool_name %s\n", 252386b00e0dSAlex Elder (unsigned long long) spec->pool_id, spec->pool_name); 252486b00e0dSAlex Elder if (count < 0) 252586b00e0dSAlex Elder return count; 252686b00e0dSAlex Elder bufp += count; 252786b00e0dSAlex Elder 252886b00e0dSAlex Elder count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id, 252986b00e0dSAlex Elder spec->image_name ? spec->image_name : "(unknown)"); 253086b00e0dSAlex Elder if (count < 0) 253186b00e0dSAlex Elder return count; 253286b00e0dSAlex Elder bufp += count; 253386b00e0dSAlex Elder 253486b00e0dSAlex Elder count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n", 253586b00e0dSAlex Elder (unsigned long long) spec->snap_id, spec->snap_name); 253686b00e0dSAlex Elder if (count < 0) 253786b00e0dSAlex Elder return count; 253886b00e0dSAlex Elder bufp += count; 253986b00e0dSAlex Elder 254086b00e0dSAlex Elder count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap); 254186b00e0dSAlex Elder if (count < 0) 254286b00e0dSAlex Elder return count; 254386b00e0dSAlex Elder bufp += count; 254486b00e0dSAlex Elder 254586b00e0dSAlex Elder return (ssize_t) (bufp - buf); 254686b00e0dSAlex Elder } 254786b00e0dSAlex Elder 2548dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 2549dfc5606dSYehuda Sadeh struct device_attribute *attr, 2550dfc5606dSYehuda Sadeh const char *buf, 2551dfc5606dSYehuda Sadeh size_t size) 2552dfc5606dSYehuda Sadeh { 2553593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2554b813623aSAlex Elder int ret; 2555602adf40SYehuda Sadeh 2556117973fbSAlex Elder ret = rbd_dev_refresh(rbd_dev, NULL); 2557b813623aSAlex Elder 2558b813623aSAlex Elder return ret < 0 ? ret : size; 2559dfc5606dSYehuda Sadeh } 2560602adf40SYehuda Sadeh 2561dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 256234b13184SAlex Elder static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL); 2563dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 2564dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 2565dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 25669bb2f334SAlex Elder static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 2567dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 2568589d30e0SAlex Elder static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); 2569dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 2570dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 257186b00e0dSAlex Elder static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL); 2572dfc5606dSYehuda Sadeh 2573dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 2574dfc5606dSYehuda Sadeh &dev_attr_size.attr, 257534b13184SAlex Elder &dev_attr_features.attr, 2576dfc5606dSYehuda Sadeh &dev_attr_major.attr, 2577dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 2578dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 25799bb2f334SAlex Elder &dev_attr_pool_id.attr, 2580dfc5606dSYehuda Sadeh &dev_attr_name.attr, 2581589d30e0SAlex Elder &dev_attr_image_id.attr, 2582dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 258386b00e0dSAlex Elder &dev_attr_parent.attr, 2584dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 2585dfc5606dSYehuda Sadeh NULL 2586dfc5606dSYehuda Sadeh }; 2587dfc5606dSYehuda Sadeh 2588dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 2589dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 2590dfc5606dSYehuda Sadeh }; 2591dfc5606dSYehuda Sadeh 2592dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 2593dfc5606dSYehuda Sadeh &rbd_attr_group, 2594dfc5606dSYehuda Sadeh NULL 2595dfc5606dSYehuda Sadeh }; 2596dfc5606dSYehuda Sadeh 2597dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 2598dfc5606dSYehuda Sadeh { 2599dfc5606dSYehuda Sadeh } 2600dfc5606dSYehuda Sadeh 2601dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 2602dfc5606dSYehuda Sadeh .name = "rbd", 2603dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 2604dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 2605dfc5606dSYehuda Sadeh }; 2606dfc5606dSYehuda Sadeh 2607dfc5606dSYehuda Sadeh 2608dfc5606dSYehuda Sadeh /* 2609dfc5606dSYehuda Sadeh sysfs - snapshots 2610dfc5606dSYehuda Sadeh */ 2611dfc5606dSYehuda Sadeh 2612dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 2613dfc5606dSYehuda Sadeh struct device_attribute *attr, 2614dfc5606dSYehuda Sadeh char *buf) 2615dfc5606dSYehuda Sadeh { 2616dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2617dfc5606dSYehuda Sadeh 26183591538fSJosh Durgin return sprintf(buf, "%llu\n", (unsigned long long)snap->size); 2619dfc5606dSYehuda Sadeh } 2620dfc5606dSYehuda Sadeh 2621dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 2622dfc5606dSYehuda Sadeh struct device_attribute *attr, 2623dfc5606dSYehuda Sadeh char *buf) 2624dfc5606dSYehuda Sadeh { 2625dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2626dfc5606dSYehuda Sadeh 2627593a9e7bSAlex Elder return sprintf(buf, "%llu\n", (unsigned long long)snap->id); 2628dfc5606dSYehuda Sadeh } 2629dfc5606dSYehuda Sadeh 263034b13184SAlex Elder static ssize_t rbd_snap_features_show(struct device *dev, 263134b13184SAlex Elder struct device_attribute *attr, 263234b13184SAlex Elder char *buf) 263334b13184SAlex Elder { 263434b13184SAlex Elder struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 263534b13184SAlex Elder 263634b13184SAlex Elder return sprintf(buf, "0x%016llx\n", 263734b13184SAlex Elder (unsigned long long) snap->features); 263834b13184SAlex Elder } 263934b13184SAlex Elder 2640dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 2641dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 264234b13184SAlex Elder static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL); 2643dfc5606dSYehuda Sadeh 2644dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 2645dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 2646dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 264734b13184SAlex Elder &dev_attr_snap_features.attr, 2648dfc5606dSYehuda Sadeh NULL, 2649dfc5606dSYehuda Sadeh }; 2650dfc5606dSYehuda Sadeh 2651dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 2652dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 2653dfc5606dSYehuda Sadeh }; 2654dfc5606dSYehuda Sadeh 2655dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 2656dfc5606dSYehuda Sadeh { 2657dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 2658dfc5606dSYehuda Sadeh kfree(snap->name); 2659dfc5606dSYehuda Sadeh kfree(snap); 2660dfc5606dSYehuda Sadeh } 2661dfc5606dSYehuda Sadeh 2662dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 2663dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 2664dfc5606dSYehuda Sadeh NULL 2665dfc5606dSYehuda Sadeh }; 2666dfc5606dSYehuda Sadeh 2667dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 2668dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 2669dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 2670dfc5606dSYehuda Sadeh }; 2671dfc5606dSYehuda Sadeh 26728b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) 26738b8fb99cSAlex Elder { 26748b8fb99cSAlex Elder kref_get(&spec->kref); 26758b8fb99cSAlex Elder 26768b8fb99cSAlex Elder return spec; 26778b8fb99cSAlex Elder } 26788b8fb99cSAlex Elder 26798b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref); 26808b8fb99cSAlex Elder static void rbd_spec_put(struct rbd_spec *spec) 26818b8fb99cSAlex Elder { 26828b8fb99cSAlex Elder if (spec) 26838b8fb99cSAlex Elder kref_put(&spec->kref, rbd_spec_free); 26848b8fb99cSAlex Elder } 26858b8fb99cSAlex Elder 26868b8fb99cSAlex Elder static struct rbd_spec *rbd_spec_alloc(void) 26878b8fb99cSAlex Elder { 26888b8fb99cSAlex Elder struct rbd_spec *spec; 26898b8fb99cSAlex Elder 26908b8fb99cSAlex Elder spec = kzalloc(sizeof (*spec), GFP_KERNEL); 26918b8fb99cSAlex Elder if (!spec) 26928b8fb99cSAlex Elder return NULL; 26938b8fb99cSAlex Elder kref_init(&spec->kref); 26948b8fb99cSAlex Elder 26958b8fb99cSAlex Elder rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */ 26968b8fb99cSAlex Elder 26978b8fb99cSAlex Elder return spec; 26988b8fb99cSAlex Elder } 26998b8fb99cSAlex Elder 27008b8fb99cSAlex Elder static void rbd_spec_free(struct kref *kref) 27018b8fb99cSAlex Elder { 27028b8fb99cSAlex Elder struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); 27038b8fb99cSAlex Elder 27048b8fb99cSAlex Elder kfree(spec->pool_name); 27058b8fb99cSAlex Elder kfree(spec->image_id); 27068b8fb99cSAlex Elder kfree(spec->image_name); 27078b8fb99cSAlex Elder kfree(spec->snap_name); 27088b8fb99cSAlex Elder kfree(spec); 27098b8fb99cSAlex Elder } 27108b8fb99cSAlex Elder 2711cc344fa1SAlex Elder static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, 2712c53d5893SAlex Elder struct rbd_spec *spec) 2713c53d5893SAlex Elder { 2714c53d5893SAlex Elder struct rbd_device *rbd_dev; 2715c53d5893SAlex Elder 2716c53d5893SAlex Elder rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL); 2717c53d5893SAlex Elder if (!rbd_dev) 2718c53d5893SAlex Elder return NULL; 2719c53d5893SAlex Elder 2720c53d5893SAlex Elder spin_lock_init(&rbd_dev->lock); 27216d292906SAlex Elder rbd_dev->flags = 0; 2722c53d5893SAlex Elder INIT_LIST_HEAD(&rbd_dev->node); 2723c53d5893SAlex Elder INIT_LIST_HEAD(&rbd_dev->snaps); 2724c53d5893SAlex Elder init_rwsem(&rbd_dev->header_rwsem); 2725c53d5893SAlex Elder 2726c53d5893SAlex Elder rbd_dev->spec = spec; 2727c53d5893SAlex Elder rbd_dev->rbd_client = rbdc; 2728c53d5893SAlex Elder 27290903e875SAlex Elder /* Initialize the layout used for all rbd requests */ 27300903e875SAlex Elder 27310903e875SAlex Elder rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 27320903e875SAlex Elder rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); 27330903e875SAlex Elder rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 27340903e875SAlex Elder rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); 27350903e875SAlex Elder 2736c53d5893SAlex Elder return rbd_dev; 2737c53d5893SAlex Elder } 2738c53d5893SAlex Elder 2739c53d5893SAlex Elder static void rbd_dev_destroy(struct rbd_device *rbd_dev) 2740c53d5893SAlex Elder { 274186b00e0dSAlex Elder rbd_spec_put(rbd_dev->parent_spec); 2742c53d5893SAlex Elder kfree(rbd_dev->header_name); 2743c53d5893SAlex Elder rbd_put_client(rbd_dev->rbd_client); 2744c53d5893SAlex Elder rbd_spec_put(rbd_dev->spec); 2745c53d5893SAlex Elder kfree(rbd_dev); 2746c53d5893SAlex Elder } 2747c53d5893SAlex Elder 2748304f6808SAlex Elder static bool rbd_snap_registered(struct rbd_snap *snap) 2749304f6808SAlex Elder { 2750304f6808SAlex Elder bool ret = snap->dev.type == &rbd_snap_device_type; 2751304f6808SAlex Elder bool reg = device_is_registered(&snap->dev); 2752304f6808SAlex Elder 2753304f6808SAlex Elder rbd_assert(!ret ^ reg); 2754304f6808SAlex Elder 2755304f6808SAlex Elder return ret; 2756304f6808SAlex Elder } 2757304f6808SAlex Elder 275841f38c2bSAlex Elder static void rbd_remove_snap_dev(struct rbd_snap *snap) 2759dfc5606dSYehuda Sadeh { 2760dfc5606dSYehuda Sadeh list_del(&snap->node); 2761304f6808SAlex Elder if (device_is_registered(&snap->dev)) 2762dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 2763dfc5606dSYehuda Sadeh } 2764dfc5606dSYehuda Sadeh 276514e7085dSAlex Elder static int rbd_register_snap_dev(struct rbd_snap *snap, 2766dfc5606dSYehuda Sadeh struct device *parent) 2767dfc5606dSYehuda Sadeh { 2768dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 2769dfc5606dSYehuda Sadeh int ret; 2770dfc5606dSYehuda Sadeh 2771dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 2772dfc5606dSYehuda Sadeh dev->parent = parent; 2773dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 2774d4b125e9SAlex Elder dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name); 2775304f6808SAlex Elder dout("%s: registering device for snapshot %s\n", __func__, snap->name); 2776304f6808SAlex Elder 2777dfc5606dSYehuda Sadeh ret = device_register(dev); 2778dfc5606dSYehuda Sadeh 2779dfc5606dSYehuda Sadeh return ret; 2780dfc5606dSYehuda Sadeh } 2781dfc5606dSYehuda Sadeh 27824e891e0aSAlex Elder static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, 2783c8d18425SAlex Elder const char *snap_name, 278434b13184SAlex Elder u64 snap_id, u64 snap_size, 278534b13184SAlex Elder u64 snap_features) 2786dfc5606dSYehuda Sadeh { 27874e891e0aSAlex Elder struct rbd_snap *snap; 2788dfc5606dSYehuda Sadeh int ret; 27894e891e0aSAlex Elder 27904e891e0aSAlex Elder snap = kzalloc(sizeof (*snap), GFP_KERNEL); 2791dfc5606dSYehuda Sadeh if (!snap) 27924e891e0aSAlex Elder return ERR_PTR(-ENOMEM); 27934e891e0aSAlex Elder 27944e891e0aSAlex Elder ret = -ENOMEM; 2795c8d18425SAlex Elder snap->name = kstrdup(snap_name, GFP_KERNEL); 27964e891e0aSAlex Elder if (!snap->name) 27974e891e0aSAlex Elder goto err; 27984e891e0aSAlex Elder 2799c8d18425SAlex Elder snap->id = snap_id; 2800c8d18425SAlex Elder snap->size = snap_size; 280134b13184SAlex Elder snap->features = snap_features; 28024e891e0aSAlex Elder 28034e891e0aSAlex Elder return snap; 28044e891e0aSAlex Elder 2805dfc5606dSYehuda Sadeh err: 2806dfc5606dSYehuda Sadeh kfree(snap->name); 2807dfc5606dSYehuda Sadeh kfree(snap); 28084e891e0aSAlex Elder 28094e891e0aSAlex Elder return ERR_PTR(ret); 2810dfc5606dSYehuda Sadeh } 2811dfc5606dSYehuda Sadeh 2812cd892126SAlex Elder static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which, 2813cd892126SAlex Elder u64 *snap_size, u64 *snap_features) 2814cd892126SAlex Elder { 2815cd892126SAlex Elder char *snap_name; 2816cd892126SAlex Elder 2817cd892126SAlex Elder rbd_assert(which < rbd_dev->header.snapc->num_snaps); 2818cd892126SAlex Elder 2819cd892126SAlex Elder *snap_size = rbd_dev->header.snap_sizes[which]; 2820cd892126SAlex Elder *snap_features = 0; /* No features for v1 */ 2821cd892126SAlex Elder 2822cd892126SAlex Elder /* Skip over names until we find the one we are looking for */ 2823cd892126SAlex Elder 2824cd892126SAlex Elder snap_name = rbd_dev->header.snap_names; 2825cd892126SAlex Elder while (which--) 2826cd892126SAlex Elder snap_name += strlen(snap_name) + 1; 2827cd892126SAlex Elder 2828cd892126SAlex Elder return snap_name; 2829cd892126SAlex Elder } 2830cd892126SAlex Elder 2831dfc5606dSYehuda Sadeh /* 28329d475de5SAlex Elder * Get the size and object order for an image snapshot, or if 28339d475de5SAlex Elder * snap_id is CEPH_NOSNAP, gets this information for the base 28349d475de5SAlex Elder * image. 28359d475de5SAlex Elder */ 28369d475de5SAlex Elder static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, 28379d475de5SAlex Elder u8 *order, u64 *snap_size) 28389d475de5SAlex Elder { 28399d475de5SAlex Elder __le64 snapid = cpu_to_le64(snap_id); 28409d475de5SAlex Elder int ret; 28419d475de5SAlex Elder struct { 28429d475de5SAlex Elder u8 order; 28439d475de5SAlex Elder __le64 size; 28449d475de5SAlex Elder } __attribute__ ((packed)) size_buf = { 0 }; 28459d475de5SAlex Elder 284636be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 28479d475de5SAlex Elder "rbd", "get_size", 28489d475de5SAlex Elder (char *) &snapid, sizeof (snapid), 284907b2391fSAlex Elder (char *) &size_buf, sizeof (size_buf), NULL); 285036be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 28519d475de5SAlex Elder if (ret < 0) 28529d475de5SAlex Elder return ret; 28539d475de5SAlex Elder 28549d475de5SAlex Elder *order = size_buf.order; 28559d475de5SAlex Elder *snap_size = le64_to_cpu(size_buf.size); 28569d475de5SAlex Elder 28579d475de5SAlex Elder dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n", 28589d475de5SAlex Elder (unsigned long long) snap_id, (unsigned int) *order, 28599d475de5SAlex Elder (unsigned long long) *snap_size); 28609d475de5SAlex Elder 28619d475de5SAlex Elder return 0; 28629d475de5SAlex Elder } 28639d475de5SAlex Elder 28649d475de5SAlex Elder static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) 28659d475de5SAlex Elder { 28669d475de5SAlex Elder return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, 28679d475de5SAlex Elder &rbd_dev->header.obj_order, 28689d475de5SAlex Elder &rbd_dev->header.image_size); 28699d475de5SAlex Elder } 28709d475de5SAlex Elder 28711e130199SAlex Elder static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) 28721e130199SAlex Elder { 28731e130199SAlex Elder void *reply_buf; 28741e130199SAlex Elder int ret; 28751e130199SAlex Elder void *p; 28761e130199SAlex Elder 28771e130199SAlex Elder reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL); 28781e130199SAlex Elder if (!reply_buf) 28791e130199SAlex Elder return -ENOMEM; 28801e130199SAlex Elder 288136be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 28821e130199SAlex Elder "rbd", "get_object_prefix", 28831e130199SAlex Elder NULL, 0, 288407b2391fSAlex Elder reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); 288536be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 28861e130199SAlex Elder if (ret < 0) 28871e130199SAlex Elder goto out; 28881e130199SAlex Elder 28891e130199SAlex Elder p = reply_buf; 28901e130199SAlex Elder rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, 28911e130199SAlex Elder p + RBD_OBJ_PREFIX_LEN_MAX, 28921e130199SAlex Elder NULL, GFP_NOIO); 28931e130199SAlex Elder 28941e130199SAlex Elder if (IS_ERR(rbd_dev->header.object_prefix)) { 28951e130199SAlex Elder ret = PTR_ERR(rbd_dev->header.object_prefix); 28961e130199SAlex Elder rbd_dev->header.object_prefix = NULL; 28971e130199SAlex Elder } else { 28981e130199SAlex Elder dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); 28991e130199SAlex Elder } 29001e130199SAlex Elder 29011e130199SAlex Elder out: 29021e130199SAlex Elder kfree(reply_buf); 29031e130199SAlex Elder 29041e130199SAlex Elder return ret; 29051e130199SAlex Elder } 29061e130199SAlex Elder 2907b1b5402aSAlex Elder static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, 2908b1b5402aSAlex Elder u64 *snap_features) 2909b1b5402aSAlex Elder { 2910b1b5402aSAlex Elder __le64 snapid = cpu_to_le64(snap_id); 2911b1b5402aSAlex Elder struct { 2912b1b5402aSAlex Elder __le64 features; 2913b1b5402aSAlex Elder __le64 incompat; 2914b1b5402aSAlex Elder } features_buf = { 0 }; 2915d889140cSAlex Elder u64 incompat; 2916b1b5402aSAlex Elder int ret; 2917b1b5402aSAlex Elder 291836be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 2919b1b5402aSAlex Elder "rbd", "get_features", 2920b1b5402aSAlex Elder (char *) &snapid, sizeof (snapid), 2921b1b5402aSAlex Elder (char *) &features_buf, sizeof (features_buf), 292207b2391fSAlex Elder NULL); 292336be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 2924b1b5402aSAlex Elder if (ret < 0) 2925b1b5402aSAlex Elder return ret; 2926d889140cSAlex Elder 2927d889140cSAlex Elder incompat = le64_to_cpu(features_buf.incompat); 29285cbf6f12SAlex Elder if (incompat & ~RBD_FEATURES_SUPPORTED) 2929b8f5c6edSAlex Elder return -ENXIO; 2930d889140cSAlex Elder 2931b1b5402aSAlex Elder *snap_features = le64_to_cpu(features_buf.features); 2932b1b5402aSAlex Elder 2933b1b5402aSAlex Elder dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", 2934b1b5402aSAlex Elder (unsigned long long) snap_id, 2935b1b5402aSAlex Elder (unsigned long long) *snap_features, 2936b1b5402aSAlex Elder (unsigned long long) le64_to_cpu(features_buf.incompat)); 2937b1b5402aSAlex Elder 2938b1b5402aSAlex Elder return 0; 2939b1b5402aSAlex Elder } 2940b1b5402aSAlex Elder 2941b1b5402aSAlex Elder static int rbd_dev_v2_features(struct rbd_device *rbd_dev) 2942b1b5402aSAlex Elder { 2943b1b5402aSAlex Elder return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, 2944b1b5402aSAlex Elder &rbd_dev->header.features); 2945b1b5402aSAlex Elder } 2946b1b5402aSAlex Elder 294786b00e0dSAlex Elder static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) 294886b00e0dSAlex Elder { 294986b00e0dSAlex Elder struct rbd_spec *parent_spec; 295086b00e0dSAlex Elder size_t size; 295186b00e0dSAlex Elder void *reply_buf = NULL; 295286b00e0dSAlex Elder __le64 snapid; 295386b00e0dSAlex Elder void *p; 295486b00e0dSAlex Elder void *end; 295586b00e0dSAlex Elder char *image_id; 295686b00e0dSAlex Elder u64 overlap; 295786b00e0dSAlex Elder int ret; 295886b00e0dSAlex Elder 295986b00e0dSAlex Elder parent_spec = rbd_spec_alloc(); 296086b00e0dSAlex Elder if (!parent_spec) 296186b00e0dSAlex Elder return -ENOMEM; 296286b00e0dSAlex Elder 296386b00e0dSAlex Elder size = sizeof (__le64) + /* pool_id */ 296486b00e0dSAlex Elder sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */ 296586b00e0dSAlex Elder sizeof (__le64) + /* snap_id */ 296686b00e0dSAlex Elder sizeof (__le64); /* overlap */ 296786b00e0dSAlex Elder reply_buf = kmalloc(size, GFP_KERNEL); 296886b00e0dSAlex Elder if (!reply_buf) { 296986b00e0dSAlex Elder ret = -ENOMEM; 297086b00e0dSAlex Elder goto out_err; 297186b00e0dSAlex Elder } 297286b00e0dSAlex Elder 297386b00e0dSAlex Elder snapid = cpu_to_le64(CEPH_NOSNAP); 297436be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 297586b00e0dSAlex Elder "rbd", "get_parent", 297686b00e0dSAlex Elder (char *) &snapid, sizeof (snapid), 297707b2391fSAlex Elder (char *) reply_buf, size, NULL); 297836be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 297986b00e0dSAlex Elder if (ret < 0) 298086b00e0dSAlex Elder goto out_err; 298186b00e0dSAlex Elder 298286b00e0dSAlex Elder ret = -ERANGE; 298386b00e0dSAlex Elder p = reply_buf; 298486b00e0dSAlex Elder end = (char *) reply_buf + size; 298586b00e0dSAlex Elder ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); 298686b00e0dSAlex Elder if (parent_spec->pool_id == CEPH_NOPOOL) 298786b00e0dSAlex Elder goto out; /* No parent? No problem. */ 298886b00e0dSAlex Elder 29890903e875SAlex Elder /* The ceph file layout needs to fit pool id in 32 bits */ 29900903e875SAlex Elder 29910903e875SAlex Elder ret = -EIO; 29920903e875SAlex Elder if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) 29930903e875SAlex Elder goto out; 29940903e875SAlex Elder 2995979ed480SAlex Elder image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 299686b00e0dSAlex Elder if (IS_ERR(image_id)) { 299786b00e0dSAlex Elder ret = PTR_ERR(image_id); 299886b00e0dSAlex Elder goto out_err; 299986b00e0dSAlex Elder } 300086b00e0dSAlex Elder parent_spec->image_id = image_id; 300186b00e0dSAlex Elder ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); 300286b00e0dSAlex Elder ceph_decode_64_safe(&p, end, overlap, out_err); 300386b00e0dSAlex Elder 300486b00e0dSAlex Elder rbd_dev->parent_overlap = overlap; 300586b00e0dSAlex Elder rbd_dev->parent_spec = parent_spec; 300686b00e0dSAlex Elder parent_spec = NULL; /* rbd_dev now owns this */ 300786b00e0dSAlex Elder out: 300886b00e0dSAlex Elder ret = 0; 300986b00e0dSAlex Elder out_err: 301086b00e0dSAlex Elder kfree(reply_buf); 301186b00e0dSAlex Elder rbd_spec_put(parent_spec); 301286b00e0dSAlex Elder 301386b00e0dSAlex Elder return ret; 301486b00e0dSAlex Elder } 301586b00e0dSAlex Elder 30169e15b77dSAlex Elder static char *rbd_dev_image_name(struct rbd_device *rbd_dev) 30179e15b77dSAlex Elder { 30189e15b77dSAlex Elder size_t image_id_size; 30199e15b77dSAlex Elder char *image_id; 30209e15b77dSAlex Elder void *p; 30219e15b77dSAlex Elder void *end; 30229e15b77dSAlex Elder size_t size; 30239e15b77dSAlex Elder void *reply_buf = NULL; 30249e15b77dSAlex Elder size_t len = 0; 30259e15b77dSAlex Elder char *image_name = NULL; 30269e15b77dSAlex Elder int ret; 30279e15b77dSAlex Elder 30289e15b77dSAlex Elder rbd_assert(!rbd_dev->spec->image_name); 30299e15b77dSAlex Elder 303069e7a02fSAlex Elder len = strlen(rbd_dev->spec->image_id); 303169e7a02fSAlex Elder image_id_size = sizeof (__le32) + len; 30329e15b77dSAlex Elder image_id = kmalloc(image_id_size, GFP_KERNEL); 30339e15b77dSAlex Elder if (!image_id) 30349e15b77dSAlex Elder return NULL; 30359e15b77dSAlex Elder 30369e15b77dSAlex Elder p = image_id; 30379e15b77dSAlex Elder end = (char *) image_id + image_id_size; 303869e7a02fSAlex Elder ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); 30399e15b77dSAlex Elder 30409e15b77dSAlex Elder size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; 30419e15b77dSAlex Elder reply_buf = kmalloc(size, GFP_KERNEL); 30429e15b77dSAlex Elder if (!reply_buf) 30439e15b77dSAlex Elder goto out; 30449e15b77dSAlex Elder 304536be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, 30469e15b77dSAlex Elder "rbd", "dir_get_name", 30479e15b77dSAlex Elder image_id, image_id_size, 304807b2391fSAlex Elder (char *) reply_buf, size, NULL); 30499e15b77dSAlex Elder if (ret < 0) 30509e15b77dSAlex Elder goto out; 30519e15b77dSAlex Elder p = reply_buf; 30529e15b77dSAlex Elder end = (char *) reply_buf + size; 30539e15b77dSAlex Elder image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); 30549e15b77dSAlex Elder if (IS_ERR(image_name)) 30559e15b77dSAlex Elder image_name = NULL; 30569e15b77dSAlex Elder else 30579e15b77dSAlex Elder dout("%s: name is %s len is %zd\n", __func__, image_name, len); 30589e15b77dSAlex Elder out: 30599e15b77dSAlex Elder kfree(reply_buf); 30609e15b77dSAlex Elder kfree(image_id); 30619e15b77dSAlex Elder 30629e15b77dSAlex Elder return image_name; 30639e15b77dSAlex Elder } 30649e15b77dSAlex Elder 30659e15b77dSAlex Elder /* 30669e15b77dSAlex Elder * When a parent image gets probed, we only have the pool, image, 30679e15b77dSAlex Elder * and snapshot ids but not the names of any of them. This call 30689e15b77dSAlex Elder * is made later to fill in those names. It has to be done after 30699e15b77dSAlex Elder * rbd_dev_snaps_update() has completed because some of the 30709e15b77dSAlex Elder * information (in particular, snapshot name) is not available 30719e15b77dSAlex Elder * until then. 30729e15b77dSAlex Elder */ 30739e15b77dSAlex Elder static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) 30749e15b77dSAlex Elder { 30759e15b77dSAlex Elder struct ceph_osd_client *osdc; 30769e15b77dSAlex Elder const char *name; 30779e15b77dSAlex Elder void *reply_buf = NULL; 30789e15b77dSAlex Elder int ret; 30799e15b77dSAlex Elder 30809e15b77dSAlex Elder if (rbd_dev->spec->pool_name) 30819e15b77dSAlex Elder return 0; /* Already have the names */ 30829e15b77dSAlex Elder 30839e15b77dSAlex Elder /* Look up the pool name */ 30849e15b77dSAlex Elder 30859e15b77dSAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 30869e15b77dSAlex Elder name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); 3087935dc89fSAlex Elder if (!name) { 3088935dc89fSAlex Elder rbd_warn(rbd_dev, "there is no pool with id %llu", 3089935dc89fSAlex Elder rbd_dev->spec->pool_id); /* Really a BUG() */ 3090935dc89fSAlex Elder return -EIO; 3091935dc89fSAlex Elder } 30929e15b77dSAlex Elder 30939e15b77dSAlex Elder rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); 30949e15b77dSAlex Elder if (!rbd_dev->spec->pool_name) 30959e15b77dSAlex Elder return -ENOMEM; 30969e15b77dSAlex Elder 30979e15b77dSAlex Elder /* Fetch the image name; tolerate failure here */ 30989e15b77dSAlex Elder 30999e15b77dSAlex Elder name = rbd_dev_image_name(rbd_dev); 310069e7a02fSAlex Elder if (name) 31019e15b77dSAlex Elder rbd_dev->spec->image_name = (char *) name; 310269e7a02fSAlex Elder else 310306ecc6cbSAlex Elder rbd_warn(rbd_dev, "unable to get image name"); 31049e15b77dSAlex Elder 31059e15b77dSAlex Elder /* Look up the snapshot name. */ 31069e15b77dSAlex Elder 31079e15b77dSAlex Elder name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); 31089e15b77dSAlex Elder if (!name) { 3109935dc89fSAlex Elder rbd_warn(rbd_dev, "no snapshot with id %llu", 3110935dc89fSAlex Elder rbd_dev->spec->snap_id); /* Really a BUG() */ 31119e15b77dSAlex Elder ret = -EIO; 31129e15b77dSAlex Elder goto out_err; 31139e15b77dSAlex Elder } 31149e15b77dSAlex Elder rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL); 31159e15b77dSAlex Elder if(!rbd_dev->spec->snap_name) 31169e15b77dSAlex Elder goto out_err; 31179e15b77dSAlex Elder 31189e15b77dSAlex Elder return 0; 31199e15b77dSAlex Elder out_err: 31209e15b77dSAlex Elder kfree(reply_buf); 31219e15b77dSAlex Elder kfree(rbd_dev->spec->pool_name); 31229e15b77dSAlex Elder rbd_dev->spec->pool_name = NULL; 31239e15b77dSAlex Elder 31249e15b77dSAlex Elder return ret; 31259e15b77dSAlex Elder } 31269e15b77dSAlex Elder 31276e14b1a6SAlex Elder static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) 312835d489f9SAlex Elder { 312935d489f9SAlex Elder size_t size; 313035d489f9SAlex Elder int ret; 313135d489f9SAlex Elder void *reply_buf; 313235d489f9SAlex Elder void *p; 313335d489f9SAlex Elder void *end; 313435d489f9SAlex Elder u64 seq; 313535d489f9SAlex Elder u32 snap_count; 313635d489f9SAlex Elder struct ceph_snap_context *snapc; 313735d489f9SAlex Elder u32 i; 313835d489f9SAlex Elder 313935d489f9SAlex Elder /* 314035d489f9SAlex Elder * We'll need room for the seq value (maximum snapshot id), 314135d489f9SAlex Elder * snapshot count, and array of that many snapshot ids. 314235d489f9SAlex Elder * For now we have a fixed upper limit on the number we're 314335d489f9SAlex Elder * prepared to receive. 314435d489f9SAlex Elder */ 314535d489f9SAlex Elder size = sizeof (__le64) + sizeof (__le32) + 314635d489f9SAlex Elder RBD_MAX_SNAP_COUNT * sizeof (__le64); 314735d489f9SAlex Elder reply_buf = kzalloc(size, GFP_KERNEL); 314835d489f9SAlex Elder if (!reply_buf) 314935d489f9SAlex Elder return -ENOMEM; 315035d489f9SAlex Elder 315136be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 315235d489f9SAlex Elder "rbd", "get_snapcontext", 315335d489f9SAlex Elder NULL, 0, 315407b2391fSAlex Elder reply_buf, size, ver); 315536be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 315635d489f9SAlex Elder if (ret < 0) 315735d489f9SAlex Elder goto out; 315835d489f9SAlex Elder 315935d489f9SAlex Elder ret = -ERANGE; 316035d489f9SAlex Elder p = reply_buf; 316135d489f9SAlex Elder end = (char *) reply_buf + size; 316235d489f9SAlex Elder ceph_decode_64_safe(&p, end, seq, out); 316335d489f9SAlex Elder ceph_decode_32_safe(&p, end, snap_count, out); 316435d489f9SAlex Elder 316535d489f9SAlex Elder /* 316635d489f9SAlex Elder * Make sure the reported number of snapshot ids wouldn't go 316735d489f9SAlex Elder * beyond the end of our buffer. But before checking that, 316835d489f9SAlex Elder * make sure the computed size of the snapshot context we 316935d489f9SAlex Elder * allocate is representable in a size_t. 317035d489f9SAlex Elder */ 317135d489f9SAlex Elder if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context)) 317235d489f9SAlex Elder / sizeof (u64)) { 317335d489f9SAlex Elder ret = -EINVAL; 317435d489f9SAlex Elder goto out; 317535d489f9SAlex Elder } 317635d489f9SAlex Elder if (!ceph_has_room(&p, end, snap_count * sizeof (__le64))) 317735d489f9SAlex Elder goto out; 317835d489f9SAlex Elder 317935d489f9SAlex Elder size = sizeof (struct ceph_snap_context) + 318035d489f9SAlex Elder snap_count * sizeof (snapc->snaps[0]); 318135d489f9SAlex Elder snapc = kmalloc(size, GFP_KERNEL); 318235d489f9SAlex Elder if (!snapc) { 318335d489f9SAlex Elder ret = -ENOMEM; 318435d489f9SAlex Elder goto out; 318535d489f9SAlex Elder } 318635d489f9SAlex Elder 318735d489f9SAlex Elder atomic_set(&snapc->nref, 1); 318835d489f9SAlex Elder snapc->seq = seq; 318935d489f9SAlex Elder snapc->num_snaps = snap_count; 319035d489f9SAlex Elder for (i = 0; i < snap_count; i++) 319135d489f9SAlex Elder snapc->snaps[i] = ceph_decode_64(&p); 319235d489f9SAlex Elder 319335d489f9SAlex Elder rbd_dev->header.snapc = snapc; 319435d489f9SAlex Elder 319535d489f9SAlex Elder dout(" snap context seq = %llu, snap_count = %u\n", 319635d489f9SAlex Elder (unsigned long long) seq, (unsigned int) snap_count); 319735d489f9SAlex Elder 319835d489f9SAlex Elder out: 319935d489f9SAlex Elder kfree(reply_buf); 320035d489f9SAlex Elder 320135d489f9SAlex Elder return 0; 320235d489f9SAlex Elder } 320335d489f9SAlex Elder 3204b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) 3205b8b1e2dbSAlex Elder { 3206b8b1e2dbSAlex Elder size_t size; 3207b8b1e2dbSAlex Elder void *reply_buf; 3208b8b1e2dbSAlex Elder __le64 snap_id; 3209b8b1e2dbSAlex Elder int ret; 3210b8b1e2dbSAlex Elder void *p; 3211b8b1e2dbSAlex Elder void *end; 3212b8b1e2dbSAlex Elder char *snap_name; 3213b8b1e2dbSAlex Elder 3214b8b1e2dbSAlex Elder size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; 3215b8b1e2dbSAlex Elder reply_buf = kmalloc(size, GFP_KERNEL); 3216b8b1e2dbSAlex Elder if (!reply_buf) 3217b8b1e2dbSAlex Elder return ERR_PTR(-ENOMEM); 3218b8b1e2dbSAlex Elder 3219b8b1e2dbSAlex Elder snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); 322036be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, 3221b8b1e2dbSAlex Elder "rbd", "get_snapshot_name", 3222b8b1e2dbSAlex Elder (char *) &snap_id, sizeof (snap_id), 322307b2391fSAlex Elder reply_buf, size, NULL); 322436be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 3225b8b1e2dbSAlex Elder if (ret < 0) 3226b8b1e2dbSAlex Elder goto out; 3227b8b1e2dbSAlex Elder 3228b8b1e2dbSAlex Elder p = reply_buf; 3229b8b1e2dbSAlex Elder end = (char *) reply_buf + size; 3230e5c35534SAlex Elder snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 3231b8b1e2dbSAlex Elder if (IS_ERR(snap_name)) { 3232b8b1e2dbSAlex Elder ret = PTR_ERR(snap_name); 3233b8b1e2dbSAlex Elder goto out; 3234b8b1e2dbSAlex Elder } else { 3235b8b1e2dbSAlex Elder dout(" snap_id 0x%016llx snap_name = %s\n", 3236b8b1e2dbSAlex Elder (unsigned long long) le64_to_cpu(snap_id), snap_name); 3237b8b1e2dbSAlex Elder } 3238b8b1e2dbSAlex Elder kfree(reply_buf); 3239b8b1e2dbSAlex Elder 3240b8b1e2dbSAlex Elder return snap_name; 3241b8b1e2dbSAlex Elder out: 3242b8b1e2dbSAlex Elder kfree(reply_buf); 3243b8b1e2dbSAlex Elder 3244b8b1e2dbSAlex Elder return ERR_PTR(ret); 3245b8b1e2dbSAlex Elder } 3246b8b1e2dbSAlex Elder 3247b8b1e2dbSAlex Elder static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, 3248b8b1e2dbSAlex Elder u64 *snap_size, u64 *snap_features) 3249b8b1e2dbSAlex Elder { 3250e0b49868SAlex Elder u64 snap_id; 3251b8b1e2dbSAlex Elder u8 order; 3252b8b1e2dbSAlex Elder int ret; 3253b8b1e2dbSAlex Elder 3254b8b1e2dbSAlex Elder snap_id = rbd_dev->header.snapc->snaps[which]; 3255b8b1e2dbSAlex Elder ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size); 3256b8b1e2dbSAlex Elder if (ret) 3257b8b1e2dbSAlex Elder return ERR_PTR(ret); 3258b8b1e2dbSAlex Elder ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features); 3259b8b1e2dbSAlex Elder if (ret) 3260b8b1e2dbSAlex Elder return ERR_PTR(ret); 3261b8b1e2dbSAlex Elder 3262b8b1e2dbSAlex Elder return rbd_dev_v2_snap_name(rbd_dev, which); 3263b8b1e2dbSAlex Elder } 3264b8b1e2dbSAlex Elder 3265b8b1e2dbSAlex Elder static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, 3266b8b1e2dbSAlex Elder u64 *snap_size, u64 *snap_features) 3267b8b1e2dbSAlex Elder { 3268b8b1e2dbSAlex Elder if (rbd_dev->image_format == 1) 3269b8b1e2dbSAlex Elder return rbd_dev_v1_snap_info(rbd_dev, which, 3270b8b1e2dbSAlex Elder snap_size, snap_features); 3271b8b1e2dbSAlex Elder if (rbd_dev->image_format == 2) 3272b8b1e2dbSAlex Elder return rbd_dev_v2_snap_info(rbd_dev, which, 3273b8b1e2dbSAlex Elder snap_size, snap_features); 3274b8b1e2dbSAlex Elder return ERR_PTR(-EINVAL); 3275b8b1e2dbSAlex Elder } 3276b8b1e2dbSAlex Elder 3277117973fbSAlex Elder static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) 3278117973fbSAlex Elder { 3279117973fbSAlex Elder int ret; 3280117973fbSAlex Elder __u8 obj_order; 3281117973fbSAlex Elder 3282117973fbSAlex Elder down_write(&rbd_dev->header_rwsem); 3283117973fbSAlex Elder 3284117973fbSAlex Elder /* Grab old order first, to see if it changes */ 3285117973fbSAlex Elder 3286117973fbSAlex Elder obj_order = rbd_dev->header.obj_order, 3287117973fbSAlex Elder ret = rbd_dev_v2_image_size(rbd_dev); 3288117973fbSAlex Elder if (ret) 3289117973fbSAlex Elder goto out; 3290117973fbSAlex Elder if (rbd_dev->header.obj_order != obj_order) { 3291117973fbSAlex Elder ret = -EIO; 3292117973fbSAlex Elder goto out; 3293117973fbSAlex Elder } 3294117973fbSAlex Elder rbd_update_mapping_size(rbd_dev); 3295117973fbSAlex Elder 3296117973fbSAlex Elder ret = rbd_dev_v2_snap_context(rbd_dev, hver); 3297117973fbSAlex Elder dout("rbd_dev_v2_snap_context returned %d\n", ret); 3298117973fbSAlex Elder if (ret) 3299117973fbSAlex Elder goto out; 3300117973fbSAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 3301117973fbSAlex Elder dout("rbd_dev_snaps_update returned %d\n", ret); 3302117973fbSAlex Elder if (ret) 3303117973fbSAlex Elder goto out; 3304117973fbSAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 3305117973fbSAlex Elder dout("rbd_dev_snaps_register returned %d\n", ret); 3306117973fbSAlex Elder out: 3307117973fbSAlex Elder up_write(&rbd_dev->header_rwsem); 3308117973fbSAlex Elder 3309117973fbSAlex Elder return ret; 3310117973fbSAlex Elder } 3311117973fbSAlex Elder 33129d475de5SAlex Elder /* 331335938150SAlex Elder * Scan the rbd device's current snapshot list and compare it to the 331435938150SAlex Elder * newly-received snapshot context. Remove any existing snapshots 331535938150SAlex Elder * not present in the new snapshot context. Add a new snapshot for 331635938150SAlex Elder * any snaphots in the snapshot context not in the current list. 331735938150SAlex Elder * And verify there are no changes to snapshots we already know 331835938150SAlex Elder * about. 331935938150SAlex Elder * 332035938150SAlex Elder * Assumes the snapshots in the snapshot context are sorted by 332135938150SAlex Elder * snapshot id, highest id first. (Snapshots in the rbd_dev's list 332235938150SAlex Elder * are also maintained in that order.) 3323dfc5606dSYehuda Sadeh */ 3324304f6808SAlex Elder static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) 3325dfc5606dSYehuda Sadeh { 332635938150SAlex Elder struct ceph_snap_context *snapc = rbd_dev->header.snapc; 332735938150SAlex Elder const u32 snap_count = snapc->num_snaps; 332835938150SAlex Elder struct list_head *head = &rbd_dev->snaps; 332935938150SAlex Elder struct list_head *links = head->next; 333035938150SAlex Elder u32 index = 0; 3331dfc5606dSYehuda Sadeh 33329fcbb800SAlex Elder dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count); 333335938150SAlex Elder while (index < snap_count || links != head) { 333435938150SAlex Elder u64 snap_id; 333535938150SAlex Elder struct rbd_snap *snap; 3336cd892126SAlex Elder char *snap_name; 3337cd892126SAlex Elder u64 snap_size = 0; 3338cd892126SAlex Elder u64 snap_features = 0; 3339dfc5606dSYehuda Sadeh 334035938150SAlex Elder snap_id = index < snap_count ? snapc->snaps[index] 334135938150SAlex Elder : CEPH_NOSNAP; 334235938150SAlex Elder snap = links != head ? list_entry(links, struct rbd_snap, node) 334335938150SAlex Elder : NULL; 3344aafb230eSAlex Elder rbd_assert(!snap || snap->id != CEPH_NOSNAP); 3345dfc5606dSYehuda Sadeh 334635938150SAlex Elder if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { 334735938150SAlex Elder struct list_head *next = links->next; 3348dfc5606dSYehuda Sadeh 33496d292906SAlex Elder /* 33506d292906SAlex Elder * A previously-existing snapshot is not in 33516d292906SAlex Elder * the new snap context. 33526d292906SAlex Elder * 33536d292906SAlex Elder * If the now missing snapshot is the one the 33546d292906SAlex Elder * image is mapped to, clear its exists flag 33556d292906SAlex Elder * so we can avoid sending any more requests 33566d292906SAlex Elder * to it. 33576d292906SAlex Elder */ 33580d7dbfceSAlex Elder if (rbd_dev->spec->snap_id == snap->id) 33596d292906SAlex Elder clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 336041f38c2bSAlex Elder rbd_remove_snap_dev(snap); 33619fcbb800SAlex Elder dout("%ssnap id %llu has been removed\n", 33620d7dbfceSAlex Elder rbd_dev->spec->snap_id == snap->id ? 33630d7dbfceSAlex Elder "mapped " : "", 33649fcbb800SAlex Elder (unsigned long long) snap->id); 3365dfc5606dSYehuda Sadeh 336635938150SAlex Elder /* Done with this list entry; advance */ 336735938150SAlex Elder 336835938150SAlex Elder links = next; 336935938150SAlex Elder continue; 3370dfc5606dSYehuda Sadeh } 337135938150SAlex Elder 3372b8b1e2dbSAlex Elder snap_name = rbd_dev_snap_info(rbd_dev, index, 3373cd892126SAlex Elder &snap_size, &snap_features); 3374cd892126SAlex Elder if (IS_ERR(snap_name)) 3375cd892126SAlex Elder return PTR_ERR(snap_name); 3376cd892126SAlex Elder 33779fcbb800SAlex Elder dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count, 33789fcbb800SAlex Elder (unsigned long long) snap_id); 337935938150SAlex Elder if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) { 338035938150SAlex Elder struct rbd_snap *new_snap; 338135938150SAlex Elder 338235938150SAlex Elder /* We haven't seen this snapshot before */ 338335938150SAlex Elder 3384c8d18425SAlex Elder new_snap = __rbd_add_snap_dev(rbd_dev, snap_name, 3385cd892126SAlex Elder snap_id, snap_size, snap_features); 33869fcbb800SAlex Elder if (IS_ERR(new_snap)) { 33879fcbb800SAlex Elder int err = PTR_ERR(new_snap); 33889fcbb800SAlex Elder 33899fcbb800SAlex Elder dout(" failed to add dev, error %d\n", err); 33909fcbb800SAlex Elder 33919fcbb800SAlex Elder return err; 33929fcbb800SAlex Elder } 339335938150SAlex Elder 339435938150SAlex Elder /* New goes before existing, or at end of list */ 339535938150SAlex Elder 33969fcbb800SAlex Elder dout(" added dev%s\n", snap ? "" : " at end\n"); 339735938150SAlex Elder if (snap) 339835938150SAlex Elder list_add_tail(&new_snap->node, &snap->node); 339935938150SAlex Elder else 3400523f3258SAlex Elder list_add_tail(&new_snap->node, head); 340135938150SAlex Elder } else { 340235938150SAlex Elder /* Already have this one */ 340335938150SAlex Elder 34049fcbb800SAlex Elder dout(" already present\n"); 34059fcbb800SAlex Elder 3406cd892126SAlex Elder rbd_assert(snap->size == snap_size); 3407aafb230eSAlex Elder rbd_assert(!strcmp(snap->name, snap_name)); 3408cd892126SAlex Elder rbd_assert(snap->features == snap_features); 340935938150SAlex Elder 341035938150SAlex Elder /* Done with this list entry; advance */ 341135938150SAlex Elder 341235938150SAlex Elder links = links->next; 3413dfc5606dSYehuda Sadeh } 341435938150SAlex Elder 341535938150SAlex Elder /* Advance to the next entry in the snapshot context */ 341635938150SAlex Elder 341735938150SAlex Elder index++; 3418dfc5606dSYehuda Sadeh } 34199fcbb800SAlex Elder dout("%s: done\n", __func__); 3420dfc5606dSYehuda Sadeh 3421dfc5606dSYehuda Sadeh return 0; 3422dfc5606dSYehuda Sadeh } 3423dfc5606dSYehuda Sadeh 3424304f6808SAlex Elder /* 3425304f6808SAlex Elder * Scan the list of snapshots and register the devices for any that 3426304f6808SAlex Elder * have not already been registered. 3427304f6808SAlex Elder */ 3428304f6808SAlex Elder static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) 3429304f6808SAlex Elder { 3430304f6808SAlex Elder struct rbd_snap *snap; 3431304f6808SAlex Elder int ret = 0; 3432304f6808SAlex Elder 343337206ee5SAlex Elder dout("%s:\n", __func__); 343486ff77bbSAlex Elder if (WARN_ON(!device_is_registered(&rbd_dev->dev))) 343586ff77bbSAlex Elder return -EIO; 3436304f6808SAlex Elder 3437304f6808SAlex Elder list_for_each_entry(snap, &rbd_dev->snaps, node) { 3438304f6808SAlex Elder if (!rbd_snap_registered(snap)) { 3439304f6808SAlex Elder ret = rbd_register_snap_dev(snap, &rbd_dev->dev); 3440304f6808SAlex Elder if (ret < 0) 3441304f6808SAlex Elder break; 3442304f6808SAlex Elder } 3443304f6808SAlex Elder } 3444304f6808SAlex Elder dout("%s: returning %d\n", __func__, ret); 3445304f6808SAlex Elder 3446304f6808SAlex Elder return ret; 3447304f6808SAlex Elder } 3448304f6808SAlex Elder 3449dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 3450dfc5606dSYehuda Sadeh { 3451dfc5606dSYehuda Sadeh struct device *dev; 3452cd789ab9SAlex Elder int ret; 3453dfc5606dSYehuda Sadeh 3454dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 3455dfc5606dSYehuda Sadeh 3456cd789ab9SAlex Elder dev = &rbd_dev->dev; 3457dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 3458dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 3459dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 3460dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 3461de71a297SAlex Elder dev_set_name(dev, "%d", rbd_dev->dev_id); 3462dfc5606dSYehuda Sadeh ret = device_register(dev); 3463dfc5606dSYehuda Sadeh 3464dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 3465cd789ab9SAlex Elder 3466dfc5606dSYehuda Sadeh return ret; 3467602adf40SYehuda Sadeh } 3468602adf40SYehuda Sadeh 3469dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 3470dfc5606dSYehuda Sadeh { 3471dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 3472dfc5606dSYehuda Sadeh } 3473dfc5606dSYehuda Sadeh 3474e2839308SAlex Elder static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); 34751ddbe94eSAlex Elder 34761ddbe94eSAlex Elder /* 3477499afd5bSAlex Elder * Get a unique rbd identifier for the given new rbd_dev, and add 3478499afd5bSAlex Elder * the rbd_dev to the global list. The minimum rbd id is 1. 34791ddbe94eSAlex Elder */ 3480e2839308SAlex Elder static void rbd_dev_id_get(struct rbd_device *rbd_dev) 3481b7f23c36SAlex Elder { 3482e2839308SAlex Elder rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max); 3483499afd5bSAlex Elder 3484499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 3485499afd5bSAlex Elder list_add_tail(&rbd_dev->node, &rbd_dev_list); 3486499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 3487e2839308SAlex Elder dout("rbd_dev %p given dev id %llu\n", rbd_dev, 3488e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 3489b7f23c36SAlex Elder } 3490b7f23c36SAlex Elder 34911ddbe94eSAlex Elder /* 3492499afd5bSAlex Elder * Remove an rbd_dev from the global list, and record that its 3493499afd5bSAlex Elder * identifier is no longer in use. 34941ddbe94eSAlex Elder */ 3495e2839308SAlex Elder static void rbd_dev_id_put(struct rbd_device *rbd_dev) 34961ddbe94eSAlex Elder { 3497d184f6bfSAlex Elder struct list_head *tmp; 3498de71a297SAlex Elder int rbd_id = rbd_dev->dev_id; 3499d184f6bfSAlex Elder int max_id; 3500d184f6bfSAlex Elder 3501aafb230eSAlex Elder rbd_assert(rbd_id > 0); 3502499afd5bSAlex Elder 3503e2839308SAlex Elder dout("rbd_dev %p released dev id %llu\n", rbd_dev, 3504e2839308SAlex Elder (unsigned long long) rbd_dev->dev_id); 3505499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 3506499afd5bSAlex Elder list_del_init(&rbd_dev->node); 3507d184f6bfSAlex Elder 3508d184f6bfSAlex Elder /* 3509d184f6bfSAlex Elder * If the id being "put" is not the current maximum, there 3510d184f6bfSAlex Elder * is nothing special we need to do. 3511d184f6bfSAlex Elder */ 3512e2839308SAlex Elder if (rbd_id != atomic64_read(&rbd_dev_id_max)) { 3513d184f6bfSAlex Elder spin_unlock(&rbd_dev_list_lock); 3514d184f6bfSAlex Elder return; 3515d184f6bfSAlex Elder } 3516d184f6bfSAlex Elder 3517d184f6bfSAlex Elder /* 3518d184f6bfSAlex Elder * We need to update the current maximum id. Search the 3519d184f6bfSAlex Elder * list to find out what it is. We're more likely to find 3520d184f6bfSAlex Elder * the maximum at the end, so search the list backward. 3521d184f6bfSAlex Elder */ 3522d184f6bfSAlex Elder max_id = 0; 3523d184f6bfSAlex Elder list_for_each_prev(tmp, &rbd_dev_list) { 3524d184f6bfSAlex Elder struct rbd_device *rbd_dev; 3525d184f6bfSAlex Elder 3526d184f6bfSAlex Elder rbd_dev = list_entry(tmp, struct rbd_device, node); 3527b213e0b1SAlex Elder if (rbd_dev->dev_id > max_id) 3528b213e0b1SAlex Elder max_id = rbd_dev->dev_id; 3529d184f6bfSAlex Elder } 3530499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 35311ddbe94eSAlex Elder 35321ddbe94eSAlex Elder /* 3533e2839308SAlex Elder * The max id could have been updated by rbd_dev_id_get(), in 3534d184f6bfSAlex Elder * which case it now accurately reflects the new maximum. 3535d184f6bfSAlex Elder * Be careful not to overwrite the maximum value in that 3536d184f6bfSAlex Elder * case. 35371ddbe94eSAlex Elder */ 3538e2839308SAlex Elder atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id); 3539e2839308SAlex Elder dout(" max dev id has been reset\n"); 3540b7f23c36SAlex Elder } 3541b7f23c36SAlex Elder 3542a725f65eSAlex Elder /* 3543e28fff26SAlex Elder * Skips over white space at *buf, and updates *buf to point to the 3544e28fff26SAlex Elder * first found non-space character (if any). Returns the length of 3545593a9e7bSAlex Elder * the token (string of non-white space characters) found. Note 3546593a9e7bSAlex Elder * that *buf must be terminated with '\0'. 3547e28fff26SAlex Elder */ 3548e28fff26SAlex Elder static inline size_t next_token(const char **buf) 3549e28fff26SAlex Elder { 3550e28fff26SAlex Elder /* 3551e28fff26SAlex Elder * These are the characters that produce nonzero for 3552e28fff26SAlex Elder * isspace() in the "C" and "POSIX" locales. 3553e28fff26SAlex Elder */ 3554e28fff26SAlex Elder const char *spaces = " \f\n\r\t\v"; 3555e28fff26SAlex Elder 3556e28fff26SAlex Elder *buf += strspn(*buf, spaces); /* Find start of token */ 3557e28fff26SAlex Elder 3558e28fff26SAlex Elder return strcspn(*buf, spaces); /* Return token length */ 3559e28fff26SAlex Elder } 3560e28fff26SAlex Elder 3561e28fff26SAlex Elder /* 3562e28fff26SAlex Elder * Finds the next token in *buf, and if the provided token buffer is 3563e28fff26SAlex Elder * big enough, copies the found token into it. The result, if 3564593a9e7bSAlex Elder * copied, is guaranteed to be terminated with '\0'. Note that *buf 3565593a9e7bSAlex Elder * must be terminated with '\0' on entry. 3566e28fff26SAlex Elder * 3567e28fff26SAlex Elder * Returns the length of the token found (not including the '\0'). 3568e28fff26SAlex Elder * Return value will be 0 if no token is found, and it will be >= 3569e28fff26SAlex Elder * token_size if the token would not fit. 3570e28fff26SAlex Elder * 3571593a9e7bSAlex Elder * The *buf pointer will be updated to point beyond the end of the 3572e28fff26SAlex Elder * found token. Note that this occurs even if the token buffer is 3573e28fff26SAlex Elder * too small to hold it. 3574e28fff26SAlex Elder */ 3575e28fff26SAlex Elder static inline size_t copy_token(const char **buf, 3576e28fff26SAlex Elder char *token, 3577e28fff26SAlex Elder size_t token_size) 3578e28fff26SAlex Elder { 3579e28fff26SAlex Elder size_t len; 3580e28fff26SAlex Elder 3581e28fff26SAlex Elder len = next_token(buf); 3582e28fff26SAlex Elder if (len < token_size) { 3583e28fff26SAlex Elder memcpy(token, *buf, len); 3584e28fff26SAlex Elder *(token + len) = '\0'; 3585e28fff26SAlex Elder } 3586e28fff26SAlex Elder *buf += len; 3587e28fff26SAlex Elder 3588e28fff26SAlex Elder return len; 3589e28fff26SAlex Elder } 3590e28fff26SAlex Elder 3591e28fff26SAlex Elder /* 3592ea3352f4SAlex Elder * Finds the next token in *buf, dynamically allocates a buffer big 3593ea3352f4SAlex Elder * enough to hold a copy of it, and copies the token into the new 3594ea3352f4SAlex Elder * buffer. The copy is guaranteed to be terminated with '\0'. Note 3595ea3352f4SAlex Elder * that a duplicate buffer is created even for a zero-length token. 3596ea3352f4SAlex Elder * 3597ea3352f4SAlex Elder * Returns a pointer to the newly-allocated duplicate, or a null 3598ea3352f4SAlex Elder * pointer if memory for the duplicate was not available. If 3599ea3352f4SAlex Elder * the lenp argument is a non-null pointer, the length of the token 3600ea3352f4SAlex Elder * (not including the '\0') is returned in *lenp. 3601ea3352f4SAlex Elder * 3602ea3352f4SAlex Elder * If successful, the *buf pointer will be updated to point beyond 3603ea3352f4SAlex Elder * the end of the found token. 3604ea3352f4SAlex Elder * 3605ea3352f4SAlex Elder * Note: uses GFP_KERNEL for allocation. 3606ea3352f4SAlex Elder */ 3607ea3352f4SAlex Elder static inline char *dup_token(const char **buf, size_t *lenp) 3608ea3352f4SAlex Elder { 3609ea3352f4SAlex Elder char *dup; 3610ea3352f4SAlex Elder size_t len; 3611ea3352f4SAlex Elder 3612ea3352f4SAlex Elder len = next_token(buf); 36134caf35f9SAlex Elder dup = kmemdup(*buf, len + 1, GFP_KERNEL); 3614ea3352f4SAlex Elder if (!dup) 3615ea3352f4SAlex Elder return NULL; 3616ea3352f4SAlex Elder *(dup + len) = '\0'; 3617ea3352f4SAlex Elder *buf += len; 3618ea3352f4SAlex Elder 3619ea3352f4SAlex Elder if (lenp) 3620ea3352f4SAlex Elder *lenp = len; 3621ea3352f4SAlex Elder 3622ea3352f4SAlex Elder return dup; 3623ea3352f4SAlex Elder } 3624ea3352f4SAlex Elder 3625ea3352f4SAlex Elder /* 3626859c31dfSAlex Elder * Parse the options provided for an "rbd add" (i.e., rbd image 3627859c31dfSAlex Elder * mapping) request. These arrive via a write to /sys/bus/rbd/add, 3628859c31dfSAlex Elder * and the data written is passed here via a NUL-terminated buffer. 3629859c31dfSAlex Elder * Returns 0 if successful or an error code otherwise. 3630d22f76e7SAlex Elder * 3631859c31dfSAlex Elder * The information extracted from these options is recorded in 3632859c31dfSAlex Elder * the other parameters which return dynamically-allocated 3633859c31dfSAlex Elder * structures: 3634859c31dfSAlex Elder * ceph_opts 3635859c31dfSAlex Elder * The address of a pointer that will refer to a ceph options 3636859c31dfSAlex Elder * structure. Caller must release the returned pointer using 3637859c31dfSAlex Elder * ceph_destroy_options() when it is no longer needed. 3638859c31dfSAlex Elder * rbd_opts 3639859c31dfSAlex Elder * Address of an rbd options pointer. Fully initialized by 3640859c31dfSAlex Elder * this function; caller must release with kfree(). 3641859c31dfSAlex Elder * spec 3642859c31dfSAlex Elder * Address of an rbd image specification pointer. Fully 3643859c31dfSAlex Elder * initialized by this function based on parsed options. 3644859c31dfSAlex Elder * Caller must release with rbd_spec_put(). 3645859c31dfSAlex Elder * 3646859c31dfSAlex Elder * The options passed take this form: 3647859c31dfSAlex Elder * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>] 3648859c31dfSAlex Elder * where: 3649859c31dfSAlex Elder * <mon_addrs> 3650859c31dfSAlex Elder * A comma-separated list of one or more monitor addresses. 3651859c31dfSAlex Elder * A monitor address is an ip address, optionally followed 3652859c31dfSAlex Elder * by a port number (separated by a colon). 3653859c31dfSAlex Elder * I.e.: ip1[:port1][,ip2[:port2]...] 3654859c31dfSAlex Elder * <options> 3655859c31dfSAlex Elder * A comma-separated list of ceph and/or rbd options. 3656859c31dfSAlex Elder * <pool_name> 3657859c31dfSAlex Elder * The name of the rados pool containing the rbd image. 3658859c31dfSAlex Elder * <image_name> 3659859c31dfSAlex Elder * The name of the image in that pool to map. 3660859c31dfSAlex Elder * <snap_id> 3661859c31dfSAlex Elder * An optional snapshot id. If provided, the mapping will 3662859c31dfSAlex Elder * present data from the image at the time that snapshot was 3663859c31dfSAlex Elder * created. The image head is used if no snapshot id is 3664859c31dfSAlex Elder * provided. Snapshot mappings are always read-only. 3665a725f65eSAlex Elder */ 3666859c31dfSAlex Elder static int rbd_add_parse_args(const char *buf, 3667dc79b113SAlex Elder struct ceph_options **ceph_opts, 3668859c31dfSAlex Elder struct rbd_options **opts, 3669859c31dfSAlex Elder struct rbd_spec **rbd_spec) 3670a725f65eSAlex Elder { 3671e28fff26SAlex Elder size_t len; 3672859c31dfSAlex Elder char *options; 36730ddebc0cSAlex Elder const char *mon_addrs; 36740ddebc0cSAlex Elder size_t mon_addrs_size; 3675859c31dfSAlex Elder struct rbd_spec *spec = NULL; 36764e9afebaSAlex Elder struct rbd_options *rbd_opts = NULL; 3677859c31dfSAlex Elder struct ceph_options *copts; 3678dc79b113SAlex Elder int ret; 3679e28fff26SAlex Elder 3680e28fff26SAlex Elder /* The first four tokens are required */ 3681e28fff26SAlex Elder 36827ef3214aSAlex Elder len = next_token(&buf); 36834fb5d671SAlex Elder if (!len) { 36844fb5d671SAlex Elder rbd_warn(NULL, "no monitor address(es) provided"); 36854fb5d671SAlex Elder return -EINVAL; 36864fb5d671SAlex Elder } 36870ddebc0cSAlex Elder mon_addrs = buf; 3688f28e565aSAlex Elder mon_addrs_size = len + 1; 36897ef3214aSAlex Elder buf += len; 3690a725f65eSAlex Elder 3691dc79b113SAlex Elder ret = -EINVAL; 3692f28e565aSAlex Elder options = dup_token(&buf, NULL); 3693f28e565aSAlex Elder if (!options) 3694dc79b113SAlex Elder return -ENOMEM; 36954fb5d671SAlex Elder if (!*options) { 36964fb5d671SAlex Elder rbd_warn(NULL, "no options provided"); 36974fb5d671SAlex Elder goto out_err; 36984fb5d671SAlex Elder } 3699a725f65eSAlex Elder 3700859c31dfSAlex Elder spec = rbd_spec_alloc(); 3701859c31dfSAlex Elder if (!spec) 3702f28e565aSAlex Elder goto out_mem; 3703859c31dfSAlex Elder 3704859c31dfSAlex Elder spec->pool_name = dup_token(&buf, NULL); 3705859c31dfSAlex Elder if (!spec->pool_name) 3706859c31dfSAlex Elder goto out_mem; 37074fb5d671SAlex Elder if (!*spec->pool_name) { 37084fb5d671SAlex Elder rbd_warn(NULL, "no pool name provided"); 37094fb5d671SAlex Elder goto out_err; 37104fb5d671SAlex Elder } 3711e28fff26SAlex Elder 371269e7a02fSAlex Elder spec->image_name = dup_token(&buf, NULL); 3713859c31dfSAlex Elder if (!spec->image_name) 3714f28e565aSAlex Elder goto out_mem; 37154fb5d671SAlex Elder if (!*spec->image_name) { 37164fb5d671SAlex Elder rbd_warn(NULL, "no image name provided"); 37174fb5d671SAlex Elder goto out_err; 37184fb5d671SAlex Elder } 3719e28fff26SAlex Elder 3720f28e565aSAlex Elder /* 3721f28e565aSAlex Elder * Snapshot name is optional; default is to use "-" 3722f28e565aSAlex Elder * (indicating the head/no snapshot). 3723f28e565aSAlex Elder */ 37243feeb894SAlex Elder len = next_token(&buf); 3725820a5f3eSAlex Elder if (!len) { 37263feeb894SAlex Elder buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ 37273feeb894SAlex Elder len = sizeof (RBD_SNAP_HEAD_NAME) - 1; 3728f28e565aSAlex Elder } else if (len > RBD_MAX_SNAP_NAME_LEN) { 3729dc79b113SAlex Elder ret = -ENAMETOOLONG; 3730f28e565aSAlex Elder goto out_err; 3731849b4260SAlex Elder } 37324caf35f9SAlex Elder spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); 3733859c31dfSAlex Elder if (!spec->snap_name) 3734f28e565aSAlex Elder goto out_mem; 3735859c31dfSAlex Elder *(spec->snap_name + len) = '\0'; 3736e5c35534SAlex Elder 37370ddebc0cSAlex Elder /* Initialize all rbd options to the defaults */ 3738e28fff26SAlex Elder 37394e9afebaSAlex Elder rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL); 37404e9afebaSAlex Elder if (!rbd_opts) 37414e9afebaSAlex Elder goto out_mem; 37424e9afebaSAlex Elder 37434e9afebaSAlex Elder rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 3744d22f76e7SAlex Elder 3745859c31dfSAlex Elder copts = ceph_parse_options(options, mon_addrs, 37460ddebc0cSAlex Elder mon_addrs + mon_addrs_size - 1, 37474e9afebaSAlex Elder parse_rbd_opts_token, rbd_opts); 3748859c31dfSAlex Elder if (IS_ERR(copts)) { 3749859c31dfSAlex Elder ret = PTR_ERR(copts); 3750dc79b113SAlex Elder goto out_err; 3751dc79b113SAlex Elder } 3752859c31dfSAlex Elder kfree(options); 3753859c31dfSAlex Elder 3754859c31dfSAlex Elder *ceph_opts = copts; 37554e9afebaSAlex Elder *opts = rbd_opts; 3756859c31dfSAlex Elder *rbd_spec = spec; 37570ddebc0cSAlex Elder 3758dc79b113SAlex Elder return 0; 3759f28e565aSAlex Elder out_mem: 3760dc79b113SAlex Elder ret = -ENOMEM; 3761d22f76e7SAlex Elder out_err: 3762859c31dfSAlex Elder kfree(rbd_opts); 3763859c31dfSAlex Elder rbd_spec_put(spec); 3764f28e565aSAlex Elder kfree(options); 3765d22f76e7SAlex Elder 3766dc79b113SAlex Elder return ret; 3767a725f65eSAlex Elder } 3768a725f65eSAlex Elder 3769589d30e0SAlex Elder /* 3770589d30e0SAlex Elder * An rbd format 2 image has a unique identifier, distinct from the 3771589d30e0SAlex Elder * name given to it by the user. Internally, that identifier is 3772589d30e0SAlex Elder * what's used to specify the names of objects related to the image. 3773589d30e0SAlex Elder * 3774589d30e0SAlex Elder * A special "rbd id" object is used to map an rbd image name to its 3775589d30e0SAlex Elder * id. If that object doesn't exist, then there is no v2 rbd image 3776589d30e0SAlex Elder * with the supplied name. 3777589d30e0SAlex Elder * 3778589d30e0SAlex Elder * This function will record the given rbd_dev's image_id field if 3779589d30e0SAlex Elder * it can be determined, and in that case will return 0. If any 3780589d30e0SAlex Elder * errors occur a negative errno will be returned and the rbd_dev's 3781589d30e0SAlex Elder * image_id field will be unchanged (and should be NULL). 3782589d30e0SAlex Elder */ 3783589d30e0SAlex Elder static int rbd_dev_image_id(struct rbd_device *rbd_dev) 3784589d30e0SAlex Elder { 3785589d30e0SAlex Elder int ret; 3786589d30e0SAlex Elder size_t size; 3787589d30e0SAlex Elder char *object_name; 3788589d30e0SAlex Elder void *response; 3789589d30e0SAlex Elder void *p; 3790589d30e0SAlex Elder 3791589d30e0SAlex Elder /* 37922c0d0a10SAlex Elder * When probing a parent image, the image id is already 37932c0d0a10SAlex Elder * known (and the image name likely is not). There's no 37942c0d0a10SAlex Elder * need to fetch the image id again in this case. 37952c0d0a10SAlex Elder */ 37962c0d0a10SAlex Elder if (rbd_dev->spec->image_id) 37972c0d0a10SAlex Elder return 0; 37982c0d0a10SAlex Elder 37992c0d0a10SAlex Elder /* 3800589d30e0SAlex Elder * First, see if the format 2 image id file exists, and if 3801589d30e0SAlex Elder * so, get the image's persistent id from it. 3802589d30e0SAlex Elder */ 380369e7a02fSAlex Elder size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); 3804589d30e0SAlex Elder object_name = kmalloc(size, GFP_NOIO); 3805589d30e0SAlex Elder if (!object_name) 3806589d30e0SAlex Elder return -ENOMEM; 38070d7dbfceSAlex Elder sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name); 3808589d30e0SAlex Elder dout("rbd id object name is %s\n", object_name); 3809589d30e0SAlex Elder 3810589d30e0SAlex Elder /* Response will be an encoded string, which includes a length */ 3811589d30e0SAlex Elder 3812589d30e0SAlex Elder size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX; 3813589d30e0SAlex Elder response = kzalloc(size, GFP_NOIO); 3814589d30e0SAlex Elder if (!response) { 3815589d30e0SAlex Elder ret = -ENOMEM; 3816589d30e0SAlex Elder goto out; 3817589d30e0SAlex Elder } 3818589d30e0SAlex Elder 381936be9a76SAlex Elder ret = rbd_obj_method_sync(rbd_dev, object_name, 3820589d30e0SAlex Elder "rbd", "get_id", 3821589d30e0SAlex Elder NULL, 0, 382207b2391fSAlex Elder response, RBD_IMAGE_ID_LEN_MAX, NULL); 382336be9a76SAlex Elder dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 3824589d30e0SAlex Elder if (ret < 0) 3825589d30e0SAlex Elder goto out; 3826589d30e0SAlex Elder 3827589d30e0SAlex Elder p = response; 38280d7dbfceSAlex Elder rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, 3829589d30e0SAlex Elder p + RBD_IMAGE_ID_LEN_MAX, 3830979ed480SAlex Elder NULL, GFP_NOIO); 38310d7dbfceSAlex Elder if (IS_ERR(rbd_dev->spec->image_id)) { 38320d7dbfceSAlex Elder ret = PTR_ERR(rbd_dev->spec->image_id); 38330d7dbfceSAlex Elder rbd_dev->spec->image_id = NULL; 3834589d30e0SAlex Elder } else { 38350d7dbfceSAlex Elder dout("image_id is %s\n", rbd_dev->spec->image_id); 3836589d30e0SAlex Elder } 3837589d30e0SAlex Elder out: 3838589d30e0SAlex Elder kfree(response); 3839589d30e0SAlex Elder kfree(object_name); 3840589d30e0SAlex Elder 3841589d30e0SAlex Elder return ret; 3842589d30e0SAlex Elder } 3843589d30e0SAlex Elder 3844a30b71b9SAlex Elder static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) 3845a30b71b9SAlex Elder { 3846a30b71b9SAlex Elder int ret; 3847a30b71b9SAlex Elder size_t size; 3848a30b71b9SAlex Elder 3849a30b71b9SAlex Elder /* Version 1 images have no id; empty string is used */ 3850a30b71b9SAlex Elder 38510d7dbfceSAlex Elder rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); 38520d7dbfceSAlex Elder if (!rbd_dev->spec->image_id) 3853a30b71b9SAlex Elder return -ENOMEM; 3854a30b71b9SAlex Elder 3855a30b71b9SAlex Elder /* Record the header object name for this rbd image. */ 3856a30b71b9SAlex Elder 385769e7a02fSAlex Elder size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); 3858a30b71b9SAlex Elder rbd_dev->header_name = kmalloc(size, GFP_KERNEL); 3859a30b71b9SAlex Elder if (!rbd_dev->header_name) { 3860a30b71b9SAlex Elder ret = -ENOMEM; 3861a30b71b9SAlex Elder goto out_err; 3862a30b71b9SAlex Elder } 38630d7dbfceSAlex Elder sprintf(rbd_dev->header_name, "%s%s", 38640d7dbfceSAlex Elder rbd_dev->spec->image_name, RBD_SUFFIX); 3865a30b71b9SAlex Elder 3866a30b71b9SAlex Elder /* Populate rbd image metadata */ 3867a30b71b9SAlex Elder 3868a30b71b9SAlex Elder ret = rbd_read_header(rbd_dev, &rbd_dev->header); 3869a30b71b9SAlex Elder if (ret < 0) 3870a30b71b9SAlex Elder goto out_err; 387186b00e0dSAlex Elder 387286b00e0dSAlex Elder /* Version 1 images have no parent (no layering) */ 387386b00e0dSAlex Elder 387486b00e0dSAlex Elder rbd_dev->parent_spec = NULL; 387586b00e0dSAlex Elder rbd_dev->parent_overlap = 0; 387686b00e0dSAlex Elder 3877a30b71b9SAlex Elder rbd_dev->image_format = 1; 3878a30b71b9SAlex Elder 3879a30b71b9SAlex Elder dout("discovered version 1 image, header name is %s\n", 3880a30b71b9SAlex Elder rbd_dev->header_name); 3881a30b71b9SAlex Elder 3882a30b71b9SAlex Elder return 0; 3883a30b71b9SAlex Elder 3884a30b71b9SAlex Elder out_err: 3885a30b71b9SAlex Elder kfree(rbd_dev->header_name); 3886a30b71b9SAlex Elder rbd_dev->header_name = NULL; 38870d7dbfceSAlex Elder kfree(rbd_dev->spec->image_id); 38880d7dbfceSAlex Elder rbd_dev->spec->image_id = NULL; 3889a30b71b9SAlex Elder 3890a30b71b9SAlex Elder return ret; 3891a30b71b9SAlex Elder } 3892a30b71b9SAlex Elder 3893a30b71b9SAlex Elder static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) 3894a30b71b9SAlex Elder { 3895a30b71b9SAlex Elder size_t size; 38969d475de5SAlex Elder int ret; 38976e14b1a6SAlex Elder u64 ver = 0; 3898a30b71b9SAlex Elder 3899a30b71b9SAlex Elder /* 3900a30b71b9SAlex Elder * Image id was filled in by the caller. Record the header 3901a30b71b9SAlex Elder * object name for this rbd image. 3902a30b71b9SAlex Elder */ 3903979ed480SAlex Elder size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); 3904a30b71b9SAlex Elder rbd_dev->header_name = kmalloc(size, GFP_KERNEL); 3905a30b71b9SAlex Elder if (!rbd_dev->header_name) 3906a30b71b9SAlex Elder return -ENOMEM; 3907a30b71b9SAlex Elder sprintf(rbd_dev->header_name, "%s%s", 39080d7dbfceSAlex Elder RBD_HEADER_PREFIX, rbd_dev->spec->image_id); 39099d475de5SAlex Elder 39109d475de5SAlex Elder /* Get the size and object order for the image */ 39119d475de5SAlex Elder 39129d475de5SAlex Elder ret = rbd_dev_v2_image_size(rbd_dev); 39139d475de5SAlex Elder if (ret < 0) 39149d475de5SAlex Elder goto out_err; 39151e130199SAlex Elder 39161e130199SAlex Elder /* Get the object prefix (a.k.a. block_name) for the image */ 39171e130199SAlex Elder 39181e130199SAlex Elder ret = rbd_dev_v2_object_prefix(rbd_dev); 39191e130199SAlex Elder if (ret < 0) 39201e130199SAlex Elder goto out_err; 3921b1b5402aSAlex Elder 3922d889140cSAlex Elder /* Get the and check features for the image */ 3923b1b5402aSAlex Elder 3924b1b5402aSAlex Elder ret = rbd_dev_v2_features(rbd_dev); 3925b1b5402aSAlex Elder if (ret < 0) 3926b1b5402aSAlex Elder goto out_err; 392735d489f9SAlex Elder 392886b00e0dSAlex Elder /* If the image supports layering, get the parent info */ 392986b00e0dSAlex Elder 393086b00e0dSAlex Elder if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { 393186b00e0dSAlex Elder ret = rbd_dev_v2_parent_info(rbd_dev); 393286b00e0dSAlex Elder if (ret < 0) 393386b00e0dSAlex Elder goto out_err; 393486b00e0dSAlex Elder } 393586b00e0dSAlex Elder 39366e14b1a6SAlex Elder /* crypto and compression type aren't (yet) supported for v2 images */ 393735d489f9SAlex Elder 39386e14b1a6SAlex Elder rbd_dev->header.crypt_type = 0; 39396e14b1a6SAlex Elder rbd_dev->header.comp_type = 0; 39406e14b1a6SAlex Elder 39416e14b1a6SAlex Elder /* Get the snapshot context, plus the header version */ 39426e14b1a6SAlex Elder 39436e14b1a6SAlex Elder ret = rbd_dev_v2_snap_context(rbd_dev, &ver); 394435d489f9SAlex Elder if (ret) 394535d489f9SAlex Elder goto out_err; 39466e14b1a6SAlex Elder rbd_dev->header.obj_version = ver; 39476e14b1a6SAlex Elder 3948a30b71b9SAlex Elder rbd_dev->image_format = 2; 3949a30b71b9SAlex Elder 3950a30b71b9SAlex Elder dout("discovered version 2 image, header name is %s\n", 3951a30b71b9SAlex Elder rbd_dev->header_name); 3952a30b71b9SAlex Elder 395335152979SAlex Elder return 0; 39549d475de5SAlex Elder out_err: 395586b00e0dSAlex Elder rbd_dev->parent_overlap = 0; 395686b00e0dSAlex Elder rbd_spec_put(rbd_dev->parent_spec); 395786b00e0dSAlex Elder rbd_dev->parent_spec = NULL; 39589d475de5SAlex Elder kfree(rbd_dev->header_name); 39599d475de5SAlex Elder rbd_dev->header_name = NULL; 39601e130199SAlex Elder kfree(rbd_dev->header.object_prefix); 39611e130199SAlex Elder rbd_dev->header.object_prefix = NULL; 39629d475de5SAlex Elder 39639d475de5SAlex Elder return ret; 3964a30b71b9SAlex Elder } 3965a30b71b9SAlex Elder 396683a06263SAlex Elder static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) 396783a06263SAlex Elder { 396883a06263SAlex Elder int ret; 396983a06263SAlex Elder 397083a06263SAlex Elder /* no need to lock here, as rbd_dev is not registered yet */ 397183a06263SAlex Elder ret = rbd_dev_snaps_update(rbd_dev); 397283a06263SAlex Elder if (ret) 397383a06263SAlex Elder return ret; 397483a06263SAlex Elder 39759e15b77dSAlex Elder ret = rbd_dev_probe_update_spec(rbd_dev); 39769e15b77dSAlex Elder if (ret) 39779e15b77dSAlex Elder goto err_out_snaps; 39789e15b77dSAlex Elder 397983a06263SAlex Elder ret = rbd_dev_set_mapping(rbd_dev); 398083a06263SAlex Elder if (ret) 398183a06263SAlex Elder goto err_out_snaps; 398283a06263SAlex Elder 398383a06263SAlex Elder /* generate unique id: find highest unique id, add one */ 398483a06263SAlex Elder rbd_dev_id_get(rbd_dev); 398583a06263SAlex Elder 398683a06263SAlex Elder /* Fill in the device name, now that we have its id. */ 398783a06263SAlex Elder BUILD_BUG_ON(DEV_NAME_LEN 398883a06263SAlex Elder < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 398983a06263SAlex Elder sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 399083a06263SAlex Elder 399183a06263SAlex Elder /* Get our block major device number. */ 399283a06263SAlex Elder 399383a06263SAlex Elder ret = register_blkdev(0, rbd_dev->name); 399483a06263SAlex Elder if (ret < 0) 399583a06263SAlex Elder goto err_out_id; 399683a06263SAlex Elder rbd_dev->major = ret; 399783a06263SAlex Elder 399883a06263SAlex Elder /* Set up the blkdev mapping. */ 399983a06263SAlex Elder 400083a06263SAlex Elder ret = rbd_init_disk(rbd_dev); 400183a06263SAlex Elder if (ret) 400283a06263SAlex Elder goto err_out_blkdev; 400383a06263SAlex Elder 400483a06263SAlex Elder ret = rbd_bus_add_dev(rbd_dev); 400583a06263SAlex Elder if (ret) 400683a06263SAlex Elder goto err_out_disk; 400783a06263SAlex Elder 400883a06263SAlex Elder /* 400983a06263SAlex Elder * At this point cleanup in the event of an error is the job 401083a06263SAlex Elder * of the sysfs code (initiated by rbd_bus_del_dev()). 401183a06263SAlex Elder */ 401283a06263SAlex Elder down_write(&rbd_dev->header_rwsem); 401383a06263SAlex Elder ret = rbd_dev_snaps_register(rbd_dev); 401483a06263SAlex Elder up_write(&rbd_dev->header_rwsem); 401583a06263SAlex Elder if (ret) 401683a06263SAlex Elder goto err_out_bus; 401783a06263SAlex Elder 40189969ebc5SAlex Elder ret = rbd_dev_header_watch_sync(rbd_dev, 1); 401983a06263SAlex Elder if (ret) 402083a06263SAlex Elder goto err_out_bus; 402183a06263SAlex Elder 402283a06263SAlex Elder /* Everything's ready. Announce the disk to the world. */ 402383a06263SAlex Elder 402483a06263SAlex Elder add_disk(rbd_dev->disk); 402583a06263SAlex Elder 402683a06263SAlex Elder pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, 402783a06263SAlex Elder (unsigned long long) rbd_dev->mapping.size); 402883a06263SAlex Elder 402983a06263SAlex Elder return ret; 403083a06263SAlex Elder err_out_bus: 403183a06263SAlex Elder /* this will also clean up rest of rbd_dev stuff */ 403283a06263SAlex Elder 403383a06263SAlex Elder rbd_bus_del_dev(rbd_dev); 403483a06263SAlex Elder 403583a06263SAlex Elder return ret; 403683a06263SAlex Elder err_out_disk: 403783a06263SAlex Elder rbd_free_disk(rbd_dev); 403883a06263SAlex Elder err_out_blkdev: 403983a06263SAlex Elder unregister_blkdev(rbd_dev->major, rbd_dev->name); 404083a06263SAlex Elder err_out_id: 404183a06263SAlex Elder rbd_dev_id_put(rbd_dev); 404283a06263SAlex Elder err_out_snaps: 404383a06263SAlex Elder rbd_remove_all_snaps(rbd_dev); 404483a06263SAlex Elder 404583a06263SAlex Elder return ret; 404683a06263SAlex Elder } 404783a06263SAlex Elder 4048a30b71b9SAlex Elder /* 4049a30b71b9SAlex Elder * Probe for the existence of the header object for the given rbd 4050a30b71b9SAlex Elder * device. For format 2 images this includes determining the image 4051a30b71b9SAlex Elder * id. 4052a30b71b9SAlex Elder */ 4053a30b71b9SAlex Elder static int rbd_dev_probe(struct rbd_device *rbd_dev) 4054a30b71b9SAlex Elder { 4055a30b71b9SAlex Elder int ret; 4056a30b71b9SAlex Elder 4057a30b71b9SAlex Elder /* 4058a30b71b9SAlex Elder * Get the id from the image id object. If it's not a 4059a30b71b9SAlex Elder * format 2 image, we'll get ENOENT back, and we'll assume 4060a30b71b9SAlex Elder * it's a format 1 image. 4061a30b71b9SAlex Elder */ 4062a30b71b9SAlex Elder ret = rbd_dev_image_id(rbd_dev); 4063a30b71b9SAlex Elder if (ret) 4064a30b71b9SAlex Elder ret = rbd_dev_v1_probe(rbd_dev); 4065a30b71b9SAlex Elder else 4066a30b71b9SAlex Elder ret = rbd_dev_v2_probe(rbd_dev); 406783a06263SAlex Elder if (ret) { 4068a30b71b9SAlex Elder dout("probe failed, returning %d\n", ret); 4069a30b71b9SAlex Elder 4070a30b71b9SAlex Elder return ret; 4071a30b71b9SAlex Elder } 4072a30b71b9SAlex Elder 407383a06263SAlex Elder ret = rbd_dev_probe_finish(rbd_dev); 407483a06263SAlex Elder if (ret) 407583a06263SAlex Elder rbd_header_free(&rbd_dev->header); 407683a06263SAlex Elder 407783a06263SAlex Elder return ret; 407883a06263SAlex Elder } 407983a06263SAlex Elder 408059c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 408159c2be1eSYehuda Sadeh const char *buf, 408259c2be1eSYehuda Sadeh size_t count) 4083602adf40SYehuda Sadeh { 4084cb8627c7SAlex Elder struct rbd_device *rbd_dev = NULL; 4085dc79b113SAlex Elder struct ceph_options *ceph_opts = NULL; 40864e9afebaSAlex Elder struct rbd_options *rbd_opts = NULL; 4087859c31dfSAlex Elder struct rbd_spec *spec = NULL; 40889d3997fdSAlex Elder struct rbd_client *rbdc; 408927cc2594SAlex Elder struct ceph_osd_client *osdc; 409027cc2594SAlex Elder int rc = -ENOMEM; 4091602adf40SYehuda Sadeh 4092602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 4093602adf40SYehuda Sadeh return -ENODEV; 4094602adf40SYehuda Sadeh 4095a725f65eSAlex Elder /* parse add command */ 4096859c31dfSAlex Elder rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); 4097dc79b113SAlex Elder if (rc < 0) 4098bd4ba655SAlex Elder goto err_out_module; 4099a725f65eSAlex Elder 41009d3997fdSAlex Elder rbdc = rbd_get_client(ceph_opts); 41019d3997fdSAlex Elder if (IS_ERR(rbdc)) { 41029d3997fdSAlex Elder rc = PTR_ERR(rbdc); 41030ddebc0cSAlex Elder goto err_out_args; 41049d3997fdSAlex Elder } 4105c53d5893SAlex Elder ceph_opts = NULL; /* rbd_dev client now owns this */ 4106602adf40SYehuda Sadeh 4107602adf40SYehuda Sadeh /* pick the pool */ 41089d3997fdSAlex Elder osdc = &rbdc->client->osdc; 4109859c31dfSAlex Elder rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); 4110602adf40SYehuda Sadeh if (rc < 0) 4111602adf40SYehuda Sadeh goto err_out_client; 4112859c31dfSAlex Elder spec->pool_id = (u64) rc; 4113859c31dfSAlex Elder 41140903e875SAlex Elder /* The ceph file layout needs to fit pool id in 32 bits */ 41150903e875SAlex Elder 41160903e875SAlex Elder if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { 41170903e875SAlex Elder rc = -EIO; 41180903e875SAlex Elder goto err_out_client; 41190903e875SAlex Elder } 41200903e875SAlex Elder 4121c53d5893SAlex Elder rbd_dev = rbd_dev_create(rbdc, spec); 4122bd4ba655SAlex Elder if (!rbd_dev) 4123bd4ba655SAlex Elder goto err_out_client; 4124c53d5893SAlex Elder rbdc = NULL; /* rbd_dev now owns this */ 4125c53d5893SAlex Elder spec = NULL; /* rbd_dev now owns this */ 4126602adf40SYehuda Sadeh 4127bd4ba655SAlex Elder rbd_dev->mapping.read_only = rbd_opts->read_only; 4128c53d5893SAlex Elder kfree(rbd_opts); 4129c53d5893SAlex Elder rbd_opts = NULL; /* done with this */ 4130bd4ba655SAlex Elder 4131a30b71b9SAlex Elder rc = rbd_dev_probe(rbd_dev); 4132a30b71b9SAlex Elder if (rc < 0) 4133c53d5893SAlex Elder goto err_out_rbd_dev; 413405fd6f6fSAlex Elder 4135602adf40SYehuda Sadeh return count; 4136c53d5893SAlex Elder err_out_rbd_dev: 4137c53d5893SAlex Elder rbd_dev_destroy(rbd_dev); 4138bd4ba655SAlex Elder err_out_client: 41399d3997fdSAlex Elder rbd_put_client(rbdc); 41400ddebc0cSAlex Elder err_out_args: 414178cea76eSAlex Elder if (ceph_opts) 414278cea76eSAlex Elder ceph_destroy_options(ceph_opts); 41434e9afebaSAlex Elder kfree(rbd_opts); 4144859c31dfSAlex Elder rbd_spec_put(spec); 4145bd4ba655SAlex Elder err_out_module: 4146bd4ba655SAlex Elder module_put(THIS_MODULE); 414727cc2594SAlex Elder 4148602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 414927cc2594SAlex Elder 415027cc2594SAlex Elder return (ssize_t) rc; 4151602adf40SYehuda Sadeh } 4152602adf40SYehuda Sadeh 4153de71a297SAlex Elder static struct rbd_device *__rbd_get_dev(unsigned long dev_id) 4154602adf40SYehuda Sadeh { 4155602adf40SYehuda Sadeh struct list_head *tmp; 4156602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 4157602adf40SYehuda Sadeh 4158e124a82fSAlex Elder spin_lock(&rbd_dev_list_lock); 4159602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 4160602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 4161de71a297SAlex Elder if (rbd_dev->dev_id == dev_id) { 4162e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 4163602adf40SYehuda Sadeh return rbd_dev; 4164602adf40SYehuda Sadeh } 4165e124a82fSAlex Elder } 4166e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 4167602adf40SYehuda Sadeh return NULL; 4168602adf40SYehuda Sadeh } 4169602adf40SYehuda Sadeh 4170dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 4171602adf40SYehuda Sadeh { 4172593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 4173602adf40SYehuda Sadeh 417459c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 41759969ebc5SAlex Elder rbd_dev_header_watch_sync(rbd_dev, 0); 4176602adf40SYehuda Sadeh 4177602adf40SYehuda Sadeh /* clean up and free blkdev */ 4178602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 4179602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 418032eec68dSAlex Elder 41812ac4e75dSAlex Elder /* release allocated disk header fields */ 41822ac4e75dSAlex Elder rbd_header_free(&rbd_dev->header); 41832ac4e75dSAlex Elder 418432eec68dSAlex Elder /* done with the id, and with the rbd_dev */ 4185e2839308SAlex Elder rbd_dev_id_put(rbd_dev); 4186c53d5893SAlex Elder rbd_assert(rbd_dev->rbd_client != NULL); 4187c53d5893SAlex Elder rbd_dev_destroy(rbd_dev); 4188602adf40SYehuda Sadeh 4189602adf40SYehuda Sadeh /* release module ref */ 4190602adf40SYehuda Sadeh module_put(THIS_MODULE); 4191602adf40SYehuda Sadeh } 4192602adf40SYehuda Sadeh 4193dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 4194602adf40SYehuda Sadeh const char *buf, 4195602adf40SYehuda Sadeh size_t count) 4196602adf40SYehuda Sadeh { 4197602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 4198602adf40SYehuda Sadeh int target_id, rc; 4199602adf40SYehuda Sadeh unsigned long ul; 4200602adf40SYehuda Sadeh int ret = count; 4201602adf40SYehuda Sadeh 4202602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 4203602adf40SYehuda Sadeh if (rc) 4204602adf40SYehuda Sadeh return rc; 4205602adf40SYehuda Sadeh 4206602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 4207602adf40SYehuda Sadeh target_id = (int) ul; 4208602adf40SYehuda Sadeh if (target_id != ul) 4209602adf40SYehuda Sadeh return -EINVAL; 4210602adf40SYehuda Sadeh 4211602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 4212602adf40SYehuda Sadeh 4213602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 4214602adf40SYehuda Sadeh if (!rbd_dev) { 4215602adf40SYehuda Sadeh ret = -ENOENT; 4216602adf40SYehuda Sadeh goto done; 4217602adf40SYehuda Sadeh } 4218602adf40SYehuda Sadeh 4219a14ea269SAlex Elder spin_lock_irq(&rbd_dev->lock); 4220b82d167bSAlex Elder if (rbd_dev->open_count) 422142382b70SAlex Elder ret = -EBUSY; 4222b82d167bSAlex Elder else 4223b82d167bSAlex Elder set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); 4224a14ea269SAlex Elder spin_unlock_irq(&rbd_dev->lock); 4225b82d167bSAlex Elder if (ret < 0) 422642382b70SAlex Elder goto done; 422742382b70SAlex Elder 422841f38c2bSAlex Elder rbd_remove_all_snaps(rbd_dev); 4229dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 4230602adf40SYehuda Sadeh 4231602adf40SYehuda Sadeh done: 4232602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 4233aafb230eSAlex Elder 4234602adf40SYehuda Sadeh return ret; 4235602adf40SYehuda Sadeh } 4236602adf40SYehuda Sadeh 4237602adf40SYehuda Sadeh /* 4238602adf40SYehuda Sadeh * create control files in sysfs 4239dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 4240602adf40SYehuda Sadeh */ 4241602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 4242602adf40SYehuda Sadeh { 4243dfc5606dSYehuda Sadeh int ret; 4244602adf40SYehuda Sadeh 4245fed4c143SAlex Elder ret = device_register(&rbd_root_dev); 4246dfc5606dSYehuda Sadeh if (ret < 0) 4247dfc5606dSYehuda Sadeh return ret; 4248602adf40SYehuda Sadeh 4249fed4c143SAlex Elder ret = bus_register(&rbd_bus_type); 4250fed4c143SAlex Elder if (ret < 0) 4251fed4c143SAlex Elder device_unregister(&rbd_root_dev); 4252602adf40SYehuda Sadeh 4253602adf40SYehuda Sadeh return ret; 4254602adf40SYehuda Sadeh } 4255602adf40SYehuda Sadeh 4256602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 4257602adf40SYehuda Sadeh { 4258dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 4259fed4c143SAlex Elder device_unregister(&rbd_root_dev); 4260602adf40SYehuda Sadeh } 4261602adf40SYehuda Sadeh 4262cc344fa1SAlex Elder static int __init rbd_init(void) 4263602adf40SYehuda Sadeh { 4264602adf40SYehuda Sadeh int rc; 4265602adf40SYehuda Sadeh 42661e32d34cSAlex Elder if (!libceph_compatible(NULL)) { 42671e32d34cSAlex Elder rbd_warn(NULL, "libceph incompatibility (quitting)"); 42681e32d34cSAlex Elder 42691e32d34cSAlex Elder return -EINVAL; 42701e32d34cSAlex Elder } 4271602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 4272602adf40SYehuda Sadeh if (rc) 4273602adf40SYehuda Sadeh return rc; 4274f0f8cef5SAlex Elder pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 4275602adf40SYehuda Sadeh return 0; 4276602adf40SYehuda Sadeh } 4277602adf40SYehuda Sadeh 4278cc344fa1SAlex Elder static void __exit rbd_exit(void) 4279602adf40SYehuda Sadeh { 4280602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 4281602adf40SYehuda Sadeh } 4282602adf40SYehuda Sadeh 4283602adf40SYehuda Sadeh module_init(rbd_init); 4284602adf40SYehuda Sadeh module_exit(rbd_exit); 4285602adf40SYehuda Sadeh 4286602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 4287602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 4288602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 4289602adf40SYehuda Sadeh 4290602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 4291602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 4292602adf40SYehuda Sadeh 4293602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 4294