1602adf40SYehuda Sadeh /* 2602adf40SYehuda Sadeh rbd.c -- Export ceph rados objects as a Linux block device 3602adf40SYehuda Sadeh 4602adf40SYehuda Sadeh 5602adf40SYehuda Sadeh based on drivers/block/osdblk.c: 6602adf40SYehuda Sadeh 7602adf40SYehuda Sadeh Copyright 2009 Red Hat, Inc. 8602adf40SYehuda Sadeh 9602adf40SYehuda Sadeh This program is free software; you can redistribute it and/or modify 10602adf40SYehuda Sadeh it under the terms of the GNU General Public License as published by 11602adf40SYehuda Sadeh the Free Software Foundation. 12602adf40SYehuda Sadeh 13602adf40SYehuda Sadeh This program is distributed in the hope that it will be useful, 14602adf40SYehuda Sadeh but WITHOUT ANY WARRANTY; without even the implied warranty of 15602adf40SYehuda Sadeh MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16602adf40SYehuda Sadeh GNU General Public License for more details. 17602adf40SYehuda Sadeh 18602adf40SYehuda Sadeh You should have received a copy of the GNU General Public License 19602adf40SYehuda Sadeh along with this program; see the file COPYING. If not, write to 20602adf40SYehuda Sadeh the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21602adf40SYehuda Sadeh 22602adf40SYehuda Sadeh 23602adf40SYehuda Sadeh 24dfc5606dSYehuda Sadeh For usage instructions, please refer to: 25602adf40SYehuda Sadeh 26dfc5606dSYehuda Sadeh Documentation/ABI/testing/sysfs-bus-rbd 27602adf40SYehuda Sadeh 28602adf40SYehuda Sadeh */ 29602adf40SYehuda Sadeh 30602adf40SYehuda Sadeh #include <linux/ceph/libceph.h> 31602adf40SYehuda Sadeh #include <linux/ceph/osd_client.h> 32602adf40SYehuda Sadeh #include <linux/ceph/mon_client.h> 33602adf40SYehuda Sadeh #include <linux/ceph/decode.h> 3459c2be1eSYehuda Sadeh #include <linux/parser.h> 35602adf40SYehuda Sadeh 36602adf40SYehuda Sadeh #include <linux/kernel.h> 37602adf40SYehuda Sadeh #include <linux/device.h> 38602adf40SYehuda Sadeh #include <linux/module.h> 39602adf40SYehuda Sadeh #include <linux/fs.h> 40602adf40SYehuda Sadeh #include <linux/blkdev.h> 41602adf40SYehuda Sadeh 42602adf40SYehuda Sadeh #include "rbd_types.h" 43602adf40SYehuda Sadeh 44593a9e7bSAlex Elder /* 45593a9e7bSAlex Elder * The basic unit of block I/O is a sector. It is interpreted in a 46593a9e7bSAlex Elder * number of contexts in Linux (blk, bio, genhd), but the default is 47593a9e7bSAlex Elder * universally 512 bytes. These symbols are just slightly more 48593a9e7bSAlex Elder * meaningful than the bare numbers they represent. 49593a9e7bSAlex Elder */ 50593a9e7bSAlex Elder #define SECTOR_SHIFT 9 51593a9e7bSAlex Elder #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 52593a9e7bSAlex Elder 53f0f8cef5SAlex Elder #define RBD_DRV_NAME "rbd" 54f0f8cef5SAlex Elder #define RBD_DRV_NAME_LONG "rbd (rados block device)" 55602adf40SYehuda Sadeh 56602adf40SYehuda Sadeh #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 57602adf40SYehuda Sadeh 5821079786SAlex Elder #define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX)) 59602adf40SYehuda Sadeh #define RBD_MAX_POOL_NAME_LEN 64 60602adf40SYehuda Sadeh #define RBD_MAX_SNAP_NAME_LEN 32 61602adf40SYehuda Sadeh #define RBD_MAX_OPT_LEN 1024 62602adf40SYehuda Sadeh 63602adf40SYehuda Sadeh #define RBD_SNAP_HEAD_NAME "-" 64602adf40SYehuda Sadeh 6581a89793SAlex Elder /* 6681a89793SAlex Elder * An RBD device name will be "rbd#", where the "rbd" comes from 6781a89793SAlex Elder * RBD_DRV_NAME above, and # is a unique integer identifier. 6881a89793SAlex Elder * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big 6981a89793SAlex Elder * enough to hold all possible device names. 7081a89793SAlex Elder */ 71602adf40SYehuda Sadeh #define DEV_NAME_LEN 32 7281a89793SAlex Elder #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 73602adf40SYehuda Sadeh 7459c2be1eSYehuda Sadeh #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 7559c2be1eSYehuda Sadeh 76602adf40SYehuda Sadeh /* 77602adf40SYehuda Sadeh * block device image metadata (in-memory version) 78602adf40SYehuda Sadeh */ 79602adf40SYehuda Sadeh struct rbd_image_header { 80602adf40SYehuda Sadeh u64 image_size; 81602adf40SYehuda Sadeh char block_name[32]; 82602adf40SYehuda Sadeh __u8 obj_order; 83602adf40SYehuda Sadeh __u8 crypt_type; 84602adf40SYehuda Sadeh __u8 comp_type; 85602adf40SYehuda Sadeh struct ceph_snap_context *snapc; 86602adf40SYehuda Sadeh size_t snap_names_len; 87602adf40SYehuda Sadeh u64 snap_seq; 88602adf40SYehuda Sadeh u32 total_snaps; 89602adf40SYehuda Sadeh 90602adf40SYehuda Sadeh char *snap_names; 91602adf40SYehuda Sadeh u64 *snap_sizes; 9259c2be1eSYehuda Sadeh 9359c2be1eSYehuda Sadeh u64 obj_version; 9459c2be1eSYehuda Sadeh }; 9559c2be1eSYehuda Sadeh 9659c2be1eSYehuda Sadeh struct rbd_options { 9759c2be1eSYehuda Sadeh int notify_timeout; 98602adf40SYehuda Sadeh }; 99602adf40SYehuda Sadeh 100602adf40SYehuda Sadeh /* 101f0f8cef5SAlex Elder * an instance of the client. multiple devices may share an rbd client. 102602adf40SYehuda Sadeh */ 103602adf40SYehuda Sadeh struct rbd_client { 104602adf40SYehuda Sadeh struct ceph_client *client; 10559c2be1eSYehuda Sadeh struct rbd_options *rbd_opts; 106602adf40SYehuda Sadeh struct kref kref; 107602adf40SYehuda Sadeh struct list_head node; 108602adf40SYehuda Sadeh }; 109602adf40SYehuda Sadeh 110602adf40SYehuda Sadeh /* 111f0f8cef5SAlex Elder * a request completion status 112602adf40SYehuda Sadeh */ 1131fec7093SYehuda Sadeh struct rbd_req_status { 1141fec7093SYehuda Sadeh int done; 1151fec7093SYehuda Sadeh int rc; 1161fec7093SYehuda Sadeh u64 bytes; 1171fec7093SYehuda Sadeh }; 1181fec7093SYehuda Sadeh 1191fec7093SYehuda Sadeh /* 1201fec7093SYehuda Sadeh * a collection of requests 1211fec7093SYehuda Sadeh */ 1221fec7093SYehuda Sadeh struct rbd_req_coll { 1231fec7093SYehuda Sadeh int total; 1241fec7093SYehuda Sadeh int num_done; 1251fec7093SYehuda Sadeh struct kref kref; 1261fec7093SYehuda Sadeh struct rbd_req_status status[0]; 127602adf40SYehuda Sadeh }; 128602adf40SYehuda Sadeh 129f0f8cef5SAlex Elder /* 130f0f8cef5SAlex Elder * a single io request 131f0f8cef5SAlex Elder */ 132f0f8cef5SAlex Elder struct rbd_request { 133f0f8cef5SAlex Elder struct request *rq; /* blk layer request */ 134f0f8cef5SAlex Elder struct bio *bio; /* cloned bio */ 135f0f8cef5SAlex Elder struct page **pages; /* list of used pages */ 136f0f8cef5SAlex Elder u64 len; 137f0f8cef5SAlex Elder int coll_index; 138f0f8cef5SAlex Elder struct rbd_req_coll *coll; 139f0f8cef5SAlex Elder }; 140f0f8cef5SAlex Elder 141dfc5606dSYehuda Sadeh struct rbd_snap { 142dfc5606dSYehuda Sadeh struct device dev; 143dfc5606dSYehuda Sadeh const char *name; 144dfc5606dSYehuda Sadeh size_t size; 145dfc5606dSYehuda Sadeh struct list_head node; 146dfc5606dSYehuda Sadeh u64 id; 147dfc5606dSYehuda Sadeh }; 148dfc5606dSYehuda Sadeh 149602adf40SYehuda Sadeh /* 150602adf40SYehuda Sadeh * a single device 151602adf40SYehuda Sadeh */ 152602adf40SYehuda Sadeh struct rbd_device { 153602adf40SYehuda Sadeh int id; /* blkdev unique id */ 154602adf40SYehuda Sadeh 155602adf40SYehuda Sadeh int major; /* blkdev assigned major */ 156602adf40SYehuda Sadeh struct gendisk *disk; /* blkdev's gendisk and rq */ 157602adf40SYehuda Sadeh struct request_queue *q; 158602adf40SYehuda Sadeh 159602adf40SYehuda Sadeh struct rbd_client *rbd_client; 160602adf40SYehuda Sadeh 161602adf40SYehuda Sadeh char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 162602adf40SYehuda Sadeh 163602adf40SYehuda Sadeh spinlock_t lock; /* queue lock */ 164602adf40SYehuda Sadeh 165602adf40SYehuda Sadeh struct rbd_image_header header; 166602adf40SYehuda Sadeh char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ 167602adf40SYehuda Sadeh int obj_len; 168602adf40SYehuda Sadeh char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ 169602adf40SYehuda Sadeh char pool_name[RBD_MAX_POOL_NAME_LEN]; 170602adf40SYehuda Sadeh int poolid; 171602adf40SYehuda Sadeh 17259c2be1eSYehuda Sadeh struct ceph_osd_event *watch_event; 17359c2be1eSYehuda Sadeh struct ceph_osd_request *watch_request; 17459c2be1eSYehuda Sadeh 175c666601aSJosh Durgin /* protects updating the header */ 176c666601aSJosh Durgin struct rw_semaphore header_rwsem; 177602adf40SYehuda Sadeh char snap_name[RBD_MAX_SNAP_NAME_LEN]; 178602adf40SYehuda Sadeh u32 cur_snap; /* index+1 of current snapshot within snap context 179602adf40SYehuda Sadeh 0 - for the head */ 180602adf40SYehuda Sadeh int read_only; 181602adf40SYehuda Sadeh 182602adf40SYehuda Sadeh struct list_head node; 183dfc5606dSYehuda Sadeh 184dfc5606dSYehuda Sadeh /* list of snapshots */ 185dfc5606dSYehuda Sadeh struct list_head snaps; 186dfc5606dSYehuda Sadeh 187dfc5606dSYehuda Sadeh /* sysfs related */ 188dfc5606dSYehuda Sadeh struct device dev; 189dfc5606dSYehuda Sadeh }; 190dfc5606dSYehuda Sadeh 191602adf40SYehuda Sadeh static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 192e124a82fSAlex Elder 193602adf40SYehuda Sadeh static LIST_HEAD(rbd_dev_list); /* devices */ 194e124a82fSAlex Elder static DEFINE_SPINLOCK(rbd_dev_list_lock); 195e124a82fSAlex Elder 196602adf40SYehuda Sadeh static LIST_HEAD(rbd_client_list); /* clients */ 197432b8587SAlex Elder static DEFINE_SPINLOCK(rbd_client_list_lock); 198602adf40SYehuda Sadeh 199dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); 200dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev); 201dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 202dfc5606dSYehuda Sadeh struct device_attribute *attr, 203dfc5606dSYehuda Sadeh const char *buf, 204dfc5606dSYehuda Sadeh size_t count); 205dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, 20669932487SJustin P. Mattock struct rbd_snap *snap); 207dfc5606dSYehuda Sadeh 208f0f8cef5SAlex Elder static ssize_t rbd_add(struct bus_type *bus, const char *buf, 209f0f8cef5SAlex Elder size_t count); 210f0f8cef5SAlex Elder static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 211f0f8cef5SAlex Elder size_t count); 212f0f8cef5SAlex Elder 213f0f8cef5SAlex Elder static struct bus_attribute rbd_bus_attrs[] = { 214f0f8cef5SAlex Elder __ATTR(add, S_IWUSR, NULL, rbd_add), 215f0f8cef5SAlex Elder __ATTR(remove, S_IWUSR, NULL, rbd_remove), 216f0f8cef5SAlex Elder __ATTR_NULL 217f0f8cef5SAlex Elder }; 218f0f8cef5SAlex Elder 219f0f8cef5SAlex Elder static struct bus_type rbd_bus_type = { 220f0f8cef5SAlex Elder .name = "rbd", 221f0f8cef5SAlex Elder .bus_attrs = rbd_bus_attrs, 222f0f8cef5SAlex Elder }; 223f0f8cef5SAlex Elder 224f0f8cef5SAlex Elder static void rbd_root_dev_release(struct device *dev) 225f0f8cef5SAlex Elder { 226f0f8cef5SAlex Elder } 227f0f8cef5SAlex Elder 228f0f8cef5SAlex Elder static struct device rbd_root_dev = { 229f0f8cef5SAlex Elder .init_name = "rbd", 230f0f8cef5SAlex Elder .release = rbd_root_dev_release, 231f0f8cef5SAlex Elder }; 232f0f8cef5SAlex Elder 233dfc5606dSYehuda Sadeh 234dfc5606dSYehuda Sadeh static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 235dfc5606dSYehuda Sadeh { 236dfc5606dSYehuda Sadeh return get_device(&rbd_dev->dev); 237dfc5606dSYehuda Sadeh } 238dfc5606dSYehuda Sadeh 239dfc5606dSYehuda Sadeh static void rbd_put_dev(struct rbd_device *rbd_dev) 240dfc5606dSYehuda Sadeh { 241dfc5606dSYehuda Sadeh put_device(&rbd_dev->dev); 242dfc5606dSYehuda Sadeh } 243602adf40SYehuda Sadeh 24459c2be1eSYehuda Sadeh static int __rbd_update_snaps(struct rbd_device *rbd_dev); 24559c2be1eSYehuda Sadeh 246602adf40SYehuda Sadeh static int rbd_open(struct block_device *bdev, fmode_t mode) 247602adf40SYehuda Sadeh { 248f0f8cef5SAlex Elder struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 249602adf40SYehuda Sadeh 250dfc5606dSYehuda Sadeh rbd_get_dev(rbd_dev); 251dfc5606dSYehuda Sadeh 252602adf40SYehuda Sadeh set_device_ro(bdev, rbd_dev->read_only); 253602adf40SYehuda Sadeh 254602adf40SYehuda Sadeh if ((mode & FMODE_WRITE) && rbd_dev->read_only) 255602adf40SYehuda Sadeh return -EROFS; 256602adf40SYehuda Sadeh 257602adf40SYehuda Sadeh return 0; 258602adf40SYehuda Sadeh } 259602adf40SYehuda Sadeh 260dfc5606dSYehuda Sadeh static int rbd_release(struct gendisk *disk, fmode_t mode) 261dfc5606dSYehuda Sadeh { 262dfc5606dSYehuda Sadeh struct rbd_device *rbd_dev = disk->private_data; 263dfc5606dSYehuda Sadeh 264dfc5606dSYehuda Sadeh rbd_put_dev(rbd_dev); 265dfc5606dSYehuda Sadeh 266dfc5606dSYehuda Sadeh return 0; 267dfc5606dSYehuda Sadeh } 268dfc5606dSYehuda Sadeh 269602adf40SYehuda Sadeh static const struct block_device_operations rbd_bd_ops = { 270602adf40SYehuda Sadeh .owner = THIS_MODULE, 271602adf40SYehuda Sadeh .open = rbd_open, 272dfc5606dSYehuda Sadeh .release = rbd_release, 273602adf40SYehuda Sadeh }; 274602adf40SYehuda Sadeh 275602adf40SYehuda Sadeh /* 276602adf40SYehuda Sadeh * Initialize an rbd client instance. 277602adf40SYehuda Sadeh * We own *opt. 278602adf40SYehuda Sadeh */ 27959c2be1eSYehuda Sadeh static struct rbd_client *rbd_client_create(struct ceph_options *opt, 28059c2be1eSYehuda Sadeh struct rbd_options *rbd_opts) 281602adf40SYehuda Sadeh { 282602adf40SYehuda Sadeh struct rbd_client *rbdc; 283602adf40SYehuda Sadeh int ret = -ENOMEM; 284602adf40SYehuda Sadeh 285602adf40SYehuda Sadeh dout("rbd_client_create\n"); 286602adf40SYehuda Sadeh rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 287602adf40SYehuda Sadeh if (!rbdc) 288602adf40SYehuda Sadeh goto out_opt; 289602adf40SYehuda Sadeh 290602adf40SYehuda Sadeh kref_init(&rbdc->kref); 291602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbdc->node); 292602adf40SYehuda Sadeh 293bc534d86SAlex Elder mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 294bc534d86SAlex Elder 2956ab00d46SSage Weil rbdc->client = ceph_create_client(opt, rbdc, 0, 0); 296602adf40SYehuda Sadeh if (IS_ERR(rbdc->client)) 297bc534d86SAlex Elder goto out_mutex; 29828f259b7SVasiliy Kulikov opt = NULL; /* Now rbdc->client is responsible for opt */ 299602adf40SYehuda Sadeh 300602adf40SYehuda Sadeh ret = ceph_open_session(rbdc->client); 301602adf40SYehuda Sadeh if (ret < 0) 302602adf40SYehuda Sadeh goto out_err; 303602adf40SYehuda Sadeh 30459c2be1eSYehuda Sadeh rbdc->rbd_opts = rbd_opts; 30559c2be1eSYehuda Sadeh 306432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 307602adf40SYehuda Sadeh list_add_tail(&rbdc->node, &rbd_client_list); 308432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 309602adf40SYehuda Sadeh 310bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 311bc534d86SAlex Elder 312602adf40SYehuda Sadeh dout("rbd_client_create created %p\n", rbdc); 313602adf40SYehuda Sadeh return rbdc; 314602adf40SYehuda Sadeh 315602adf40SYehuda Sadeh out_err: 316602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 317bc534d86SAlex Elder out_mutex: 318bc534d86SAlex Elder mutex_unlock(&ctl_mutex); 319602adf40SYehuda Sadeh kfree(rbdc); 320602adf40SYehuda Sadeh out_opt: 32128f259b7SVasiliy Kulikov if (opt) 322602adf40SYehuda Sadeh ceph_destroy_options(opt); 32328f259b7SVasiliy Kulikov return ERR_PTR(ret); 324602adf40SYehuda Sadeh } 325602adf40SYehuda Sadeh 326602adf40SYehuda Sadeh /* 327602adf40SYehuda Sadeh * Find a ceph client with specific addr and configuration. 328602adf40SYehuda Sadeh */ 329602adf40SYehuda Sadeh static struct rbd_client *__rbd_client_find(struct ceph_options *opt) 330602adf40SYehuda Sadeh { 331602adf40SYehuda Sadeh struct rbd_client *client_node; 332602adf40SYehuda Sadeh 333602adf40SYehuda Sadeh if (opt->flags & CEPH_OPT_NOSHARE) 334602adf40SYehuda Sadeh return NULL; 335602adf40SYehuda Sadeh 336602adf40SYehuda Sadeh list_for_each_entry(client_node, &rbd_client_list, node) 337602adf40SYehuda Sadeh if (ceph_compare_options(opt, client_node->client) == 0) 338602adf40SYehuda Sadeh return client_node; 339602adf40SYehuda Sadeh return NULL; 340602adf40SYehuda Sadeh } 341602adf40SYehuda Sadeh 342602adf40SYehuda Sadeh /* 34359c2be1eSYehuda Sadeh * mount options 34459c2be1eSYehuda Sadeh */ 34559c2be1eSYehuda Sadeh enum { 34659c2be1eSYehuda Sadeh Opt_notify_timeout, 34759c2be1eSYehuda Sadeh Opt_last_int, 34859c2be1eSYehuda Sadeh /* int args above */ 34959c2be1eSYehuda Sadeh Opt_last_string, 35059c2be1eSYehuda Sadeh /* string args above */ 35159c2be1eSYehuda Sadeh }; 35259c2be1eSYehuda Sadeh 35359c2be1eSYehuda Sadeh static match_table_t rbdopt_tokens = { 35459c2be1eSYehuda Sadeh {Opt_notify_timeout, "notify_timeout=%d"}, 35559c2be1eSYehuda Sadeh /* int args above */ 35659c2be1eSYehuda Sadeh /* string args above */ 35759c2be1eSYehuda Sadeh {-1, NULL} 35859c2be1eSYehuda Sadeh }; 35959c2be1eSYehuda Sadeh 36059c2be1eSYehuda Sadeh static int parse_rbd_opts_token(char *c, void *private) 36159c2be1eSYehuda Sadeh { 36259c2be1eSYehuda Sadeh struct rbd_options *rbdopt = private; 36359c2be1eSYehuda Sadeh substring_t argstr[MAX_OPT_ARGS]; 36459c2be1eSYehuda Sadeh int token, intval, ret; 36559c2be1eSYehuda Sadeh 36621079786SAlex Elder token = match_token(c, rbdopt_tokens, argstr); 36759c2be1eSYehuda Sadeh if (token < 0) 36859c2be1eSYehuda Sadeh return -EINVAL; 36959c2be1eSYehuda Sadeh 37059c2be1eSYehuda Sadeh if (token < Opt_last_int) { 37159c2be1eSYehuda Sadeh ret = match_int(&argstr[0], &intval); 37259c2be1eSYehuda Sadeh if (ret < 0) { 37359c2be1eSYehuda Sadeh pr_err("bad mount option arg (not int) " 37459c2be1eSYehuda Sadeh "at '%s'\n", c); 37559c2be1eSYehuda Sadeh return ret; 37659c2be1eSYehuda Sadeh } 37759c2be1eSYehuda Sadeh dout("got int token %d val %d\n", token, intval); 37859c2be1eSYehuda Sadeh } else if (token > Opt_last_int && token < Opt_last_string) { 37959c2be1eSYehuda Sadeh dout("got string token %d val %s\n", token, 38059c2be1eSYehuda Sadeh argstr[0].from); 38159c2be1eSYehuda Sadeh } else { 38259c2be1eSYehuda Sadeh dout("got token %d\n", token); 38359c2be1eSYehuda Sadeh } 38459c2be1eSYehuda Sadeh 38559c2be1eSYehuda Sadeh switch (token) { 38659c2be1eSYehuda Sadeh case Opt_notify_timeout: 38759c2be1eSYehuda Sadeh rbdopt->notify_timeout = intval; 38859c2be1eSYehuda Sadeh break; 38959c2be1eSYehuda Sadeh default: 39059c2be1eSYehuda Sadeh BUG_ON(token); 39159c2be1eSYehuda Sadeh } 39259c2be1eSYehuda Sadeh return 0; 39359c2be1eSYehuda Sadeh } 39459c2be1eSYehuda Sadeh 39559c2be1eSYehuda Sadeh /* 396602adf40SYehuda Sadeh * Get a ceph client with specific addr and configuration, if one does 397602adf40SYehuda Sadeh * not exist create it. 398602adf40SYehuda Sadeh */ 3995214ecc4SAlex Elder static struct rbd_client *rbd_get_client(const char *mon_addr, 4005214ecc4SAlex Elder size_t mon_addr_len, 4015214ecc4SAlex Elder char *options) 402602adf40SYehuda Sadeh { 403602adf40SYehuda Sadeh struct rbd_client *rbdc; 404602adf40SYehuda Sadeh struct ceph_options *opt; 40559c2be1eSYehuda Sadeh struct rbd_options *rbd_opts; 40659c2be1eSYehuda Sadeh 40759c2be1eSYehuda Sadeh rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); 40859c2be1eSYehuda Sadeh if (!rbd_opts) 409d720bcb0SAlex Elder return ERR_PTR(-ENOMEM); 41059c2be1eSYehuda Sadeh 41159c2be1eSYehuda Sadeh rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; 412602adf40SYehuda Sadeh 413ee57741cSAlex Elder opt = ceph_parse_options(options, mon_addr, 4145214ecc4SAlex Elder mon_addr + mon_addr_len, 41521079786SAlex Elder parse_rbd_opts_token, rbd_opts); 416ee57741cSAlex Elder if (IS_ERR(opt)) { 417d720bcb0SAlex Elder kfree(rbd_opts); 418d720bcb0SAlex Elder return ERR_CAST(opt); 419ee57741cSAlex Elder } 420602adf40SYehuda Sadeh 421432b8587SAlex Elder spin_lock(&rbd_client_list_lock); 422602adf40SYehuda Sadeh rbdc = __rbd_client_find(opt); 423602adf40SYehuda Sadeh if (rbdc) { 424e6994d3dSAlex Elder /* using an existing client */ 425e6994d3dSAlex Elder kref_get(&rbdc->kref); 426432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 427e6994d3dSAlex Elder 428602adf40SYehuda Sadeh ceph_destroy_options(opt); 42997bb59a0SAlex Elder kfree(rbd_opts); 430602adf40SYehuda Sadeh 431d720bcb0SAlex Elder return rbdc; 432602adf40SYehuda Sadeh } 433432b8587SAlex Elder spin_unlock(&rbd_client_list_lock); 434602adf40SYehuda Sadeh 43559c2be1eSYehuda Sadeh rbdc = rbd_client_create(opt, rbd_opts); 436d97081b0SAlex Elder 437d720bcb0SAlex Elder if (IS_ERR(rbdc)) 43859c2be1eSYehuda Sadeh kfree(rbd_opts); 439d720bcb0SAlex Elder 440d720bcb0SAlex Elder return rbdc; 441602adf40SYehuda Sadeh } 442602adf40SYehuda Sadeh 443602adf40SYehuda Sadeh /* 444602adf40SYehuda Sadeh * Destroy ceph client 445d23a4b3fSAlex Elder * 446432b8587SAlex Elder * Caller must hold rbd_client_list_lock. 447602adf40SYehuda Sadeh */ 448602adf40SYehuda Sadeh static void rbd_client_release(struct kref *kref) 449602adf40SYehuda Sadeh { 450602adf40SYehuda Sadeh struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 451602adf40SYehuda Sadeh 452602adf40SYehuda Sadeh dout("rbd_release_client %p\n", rbdc); 453cd9d9f5dSAlex Elder spin_lock(&rbd_client_list_lock); 454602adf40SYehuda Sadeh list_del(&rbdc->node); 455cd9d9f5dSAlex Elder spin_unlock(&rbd_client_list_lock); 456602adf40SYehuda Sadeh 457602adf40SYehuda Sadeh ceph_destroy_client(rbdc->client); 45859c2be1eSYehuda Sadeh kfree(rbdc->rbd_opts); 459602adf40SYehuda Sadeh kfree(rbdc); 460602adf40SYehuda Sadeh } 461602adf40SYehuda Sadeh 462602adf40SYehuda Sadeh /* 463602adf40SYehuda Sadeh * Drop reference to ceph client node. If it's not referenced anymore, release 464602adf40SYehuda Sadeh * it. 465602adf40SYehuda Sadeh */ 466602adf40SYehuda Sadeh static void rbd_put_client(struct rbd_device *rbd_dev) 467602adf40SYehuda Sadeh { 468602adf40SYehuda Sadeh kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); 469602adf40SYehuda Sadeh rbd_dev->rbd_client = NULL; 470602adf40SYehuda Sadeh } 471602adf40SYehuda Sadeh 4721fec7093SYehuda Sadeh /* 4731fec7093SYehuda Sadeh * Destroy requests collection 4741fec7093SYehuda Sadeh */ 4751fec7093SYehuda Sadeh static void rbd_coll_release(struct kref *kref) 4761fec7093SYehuda Sadeh { 4771fec7093SYehuda Sadeh struct rbd_req_coll *coll = 4781fec7093SYehuda Sadeh container_of(kref, struct rbd_req_coll, kref); 4791fec7093SYehuda Sadeh 4801fec7093SYehuda Sadeh dout("rbd_coll_release %p\n", coll); 4811fec7093SYehuda Sadeh kfree(coll); 4821fec7093SYehuda Sadeh } 483602adf40SYehuda Sadeh 484602adf40SYehuda Sadeh /* 485602adf40SYehuda Sadeh * Create a new header structure, translate header format from the on-disk 486602adf40SYehuda Sadeh * header. 487602adf40SYehuda Sadeh */ 488602adf40SYehuda Sadeh static int rbd_header_from_disk(struct rbd_image_header *header, 489602adf40SYehuda Sadeh struct rbd_image_header_ondisk *ondisk, 490602adf40SYehuda Sadeh int allocated_snaps, 491602adf40SYehuda Sadeh gfp_t gfp_flags) 492602adf40SYehuda Sadeh { 493602adf40SYehuda Sadeh int i; 49400f1f36fSAlex Elder u32 snap_count; 495602adf40SYehuda Sadeh 49621079786SAlex Elder if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) 49781e759fbSJosh Durgin return -ENXIO; 49881e759fbSJosh Durgin 49900f1f36fSAlex Elder snap_count = le32_to_cpu(ondisk->snap_count); 500602adf40SYehuda Sadeh header->snapc = kmalloc(sizeof(struct ceph_snap_context) + 50121079786SAlex Elder snap_count * sizeof (*ondisk), 502602adf40SYehuda Sadeh gfp_flags); 503602adf40SYehuda Sadeh if (!header->snapc) 504602adf40SYehuda Sadeh return -ENOMEM; 50500f1f36fSAlex Elder 50600f1f36fSAlex Elder header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); 507602adf40SYehuda Sadeh if (snap_count) { 508602adf40SYehuda Sadeh header->snap_names = kmalloc(header->snap_names_len, 509f8ad495aSDan Carpenter gfp_flags); 510602adf40SYehuda Sadeh if (!header->snap_names) 511602adf40SYehuda Sadeh goto err_snapc; 512602adf40SYehuda Sadeh header->snap_sizes = kmalloc(snap_count * sizeof(u64), 513f8ad495aSDan Carpenter gfp_flags); 514602adf40SYehuda Sadeh if (!header->snap_sizes) 515602adf40SYehuda Sadeh goto err_names; 516602adf40SYehuda Sadeh } else { 517602adf40SYehuda Sadeh header->snap_names = NULL; 518602adf40SYehuda Sadeh header->snap_sizes = NULL; 519602adf40SYehuda Sadeh } 520602adf40SYehuda Sadeh memcpy(header->block_name, ondisk->block_name, 521602adf40SYehuda Sadeh sizeof(ondisk->block_name)); 522602adf40SYehuda Sadeh 523602adf40SYehuda Sadeh header->image_size = le64_to_cpu(ondisk->image_size); 524602adf40SYehuda Sadeh header->obj_order = ondisk->options.order; 525602adf40SYehuda Sadeh header->crypt_type = ondisk->options.crypt_type; 526602adf40SYehuda Sadeh header->comp_type = ondisk->options.comp_type; 527602adf40SYehuda Sadeh 528602adf40SYehuda Sadeh atomic_set(&header->snapc->nref, 1); 529602adf40SYehuda Sadeh header->snap_seq = le64_to_cpu(ondisk->snap_seq); 530602adf40SYehuda Sadeh header->snapc->num_snaps = snap_count; 531602adf40SYehuda Sadeh header->total_snaps = snap_count; 532602adf40SYehuda Sadeh 53321079786SAlex Elder if (snap_count && allocated_snaps == snap_count) { 534602adf40SYehuda Sadeh for (i = 0; i < snap_count; i++) { 535602adf40SYehuda Sadeh header->snapc->snaps[i] = 536602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].id); 537602adf40SYehuda Sadeh header->snap_sizes[i] = 538602adf40SYehuda Sadeh le64_to_cpu(ondisk->snaps[i].image_size); 539602adf40SYehuda Sadeh } 540602adf40SYehuda Sadeh 541602adf40SYehuda Sadeh /* copy snapshot names */ 542602adf40SYehuda Sadeh memcpy(header->snap_names, &ondisk->snaps[i], 543602adf40SYehuda Sadeh header->snap_names_len); 544602adf40SYehuda Sadeh } 545602adf40SYehuda Sadeh 546602adf40SYehuda Sadeh return 0; 547602adf40SYehuda Sadeh 548602adf40SYehuda Sadeh err_names: 549602adf40SYehuda Sadeh kfree(header->snap_names); 550602adf40SYehuda Sadeh err_snapc: 551602adf40SYehuda Sadeh kfree(header->snapc); 55200f1f36fSAlex Elder return -ENOMEM; 553602adf40SYehuda Sadeh } 554602adf40SYehuda Sadeh 555602adf40SYehuda Sadeh static int snap_index(struct rbd_image_header *header, int snap_num) 556602adf40SYehuda Sadeh { 557602adf40SYehuda Sadeh return header->total_snaps - snap_num; 558602adf40SYehuda Sadeh } 559602adf40SYehuda Sadeh 560602adf40SYehuda Sadeh static u64 cur_snap_id(struct rbd_device *rbd_dev) 561602adf40SYehuda Sadeh { 562602adf40SYehuda Sadeh struct rbd_image_header *header = &rbd_dev->header; 563602adf40SYehuda Sadeh 564602adf40SYehuda Sadeh if (!rbd_dev->cur_snap) 565602adf40SYehuda Sadeh return 0; 566602adf40SYehuda Sadeh 567602adf40SYehuda Sadeh return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; 568602adf40SYehuda Sadeh } 569602adf40SYehuda Sadeh 570602adf40SYehuda Sadeh static int snap_by_name(struct rbd_image_header *header, const char *snap_name, 571602adf40SYehuda Sadeh u64 *seq, u64 *size) 572602adf40SYehuda Sadeh { 573602adf40SYehuda Sadeh int i; 574602adf40SYehuda Sadeh char *p = header->snap_names; 575602adf40SYehuda Sadeh 57600f1f36fSAlex Elder for (i = 0; i < header->total_snaps; i++) { 57700f1f36fSAlex Elder if (!strcmp(snap_name, p)) { 57800f1f36fSAlex Elder 57900f1f36fSAlex Elder /* Found it. Pass back its id and/or size */ 58000f1f36fSAlex Elder 581602adf40SYehuda Sadeh if (seq) 582602adf40SYehuda Sadeh *seq = header->snapc->snaps[i]; 583602adf40SYehuda Sadeh if (size) 584602adf40SYehuda Sadeh *size = header->snap_sizes[i]; 585602adf40SYehuda Sadeh return i; 586602adf40SYehuda Sadeh } 58700f1f36fSAlex Elder p += strlen(p) + 1; /* Skip ahead to the next name */ 58800f1f36fSAlex Elder } 58900f1f36fSAlex Elder return -ENOENT; 59000f1f36fSAlex Elder } 591602adf40SYehuda Sadeh 592cc9d734cSJosh Durgin static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) 593602adf40SYehuda Sadeh { 594602adf40SYehuda Sadeh struct rbd_image_header *header = &dev->header; 595602adf40SYehuda Sadeh struct ceph_snap_context *snapc = header->snapc; 596602adf40SYehuda Sadeh int ret = -ENOENT; 597602adf40SYehuda Sadeh 598cc9d734cSJosh Durgin BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME)); 599cc9d734cSJosh Durgin 600c666601aSJosh Durgin down_write(&dev->header_rwsem); 601602adf40SYehuda Sadeh 602cc9d734cSJosh Durgin if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME, 603cc9d734cSJosh Durgin sizeof (RBD_SNAP_HEAD_NAME))) { 604602adf40SYehuda Sadeh if (header->total_snaps) 605602adf40SYehuda Sadeh snapc->seq = header->snap_seq; 606602adf40SYehuda Sadeh else 607602adf40SYehuda Sadeh snapc->seq = 0; 608602adf40SYehuda Sadeh dev->cur_snap = 0; 609602adf40SYehuda Sadeh dev->read_only = 0; 610602adf40SYehuda Sadeh if (size) 611602adf40SYehuda Sadeh *size = header->image_size; 612602adf40SYehuda Sadeh } else { 613cc9d734cSJosh Durgin ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); 614602adf40SYehuda Sadeh if (ret < 0) 615602adf40SYehuda Sadeh goto done; 616602adf40SYehuda Sadeh 617602adf40SYehuda Sadeh dev->cur_snap = header->total_snaps - ret; 618602adf40SYehuda Sadeh dev->read_only = 1; 619602adf40SYehuda Sadeh } 620602adf40SYehuda Sadeh 621602adf40SYehuda Sadeh ret = 0; 622602adf40SYehuda Sadeh done: 623c666601aSJosh Durgin up_write(&dev->header_rwsem); 624602adf40SYehuda Sadeh return ret; 625602adf40SYehuda Sadeh } 626602adf40SYehuda Sadeh 627602adf40SYehuda Sadeh static void rbd_header_free(struct rbd_image_header *header) 628602adf40SYehuda Sadeh { 629602adf40SYehuda Sadeh kfree(header->snapc); 630602adf40SYehuda Sadeh kfree(header->snap_names); 631602adf40SYehuda Sadeh kfree(header->snap_sizes); 632602adf40SYehuda Sadeh } 633602adf40SYehuda Sadeh 634602adf40SYehuda Sadeh /* 635602adf40SYehuda Sadeh * get the actual striped segment name, offset and length 636602adf40SYehuda Sadeh */ 637602adf40SYehuda Sadeh static u64 rbd_get_segment(struct rbd_image_header *header, 638602adf40SYehuda Sadeh const char *block_name, 639602adf40SYehuda Sadeh u64 ofs, u64 len, 640602adf40SYehuda Sadeh char *seg_name, u64 *segofs) 641602adf40SYehuda Sadeh { 642602adf40SYehuda Sadeh u64 seg = ofs >> header->obj_order; 643602adf40SYehuda Sadeh 644602adf40SYehuda Sadeh if (seg_name) 645602adf40SYehuda Sadeh snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, 646602adf40SYehuda Sadeh "%s.%012llx", block_name, seg); 647602adf40SYehuda Sadeh 648602adf40SYehuda Sadeh ofs = ofs & ((1 << header->obj_order) - 1); 649602adf40SYehuda Sadeh len = min_t(u64, len, (1 << header->obj_order) - ofs); 650602adf40SYehuda Sadeh 651602adf40SYehuda Sadeh if (segofs) 652602adf40SYehuda Sadeh *segofs = ofs; 653602adf40SYehuda Sadeh 654602adf40SYehuda Sadeh return len; 655602adf40SYehuda Sadeh } 656602adf40SYehuda Sadeh 6571fec7093SYehuda Sadeh static int rbd_get_num_segments(struct rbd_image_header *header, 6581fec7093SYehuda Sadeh u64 ofs, u64 len) 6591fec7093SYehuda Sadeh { 6601fec7093SYehuda Sadeh u64 start_seg = ofs >> header->obj_order; 6611fec7093SYehuda Sadeh u64 end_seg = (ofs + len - 1) >> header->obj_order; 6621fec7093SYehuda Sadeh return end_seg - start_seg + 1; 6631fec7093SYehuda Sadeh } 6641fec7093SYehuda Sadeh 665602adf40SYehuda Sadeh /* 666029bcbd8SJosh Durgin * returns the size of an object in the image 667029bcbd8SJosh Durgin */ 668029bcbd8SJosh Durgin static u64 rbd_obj_bytes(struct rbd_image_header *header) 669029bcbd8SJosh Durgin { 670029bcbd8SJosh Durgin return 1 << header->obj_order; 671029bcbd8SJosh Durgin } 672029bcbd8SJosh Durgin 673029bcbd8SJosh Durgin /* 674602adf40SYehuda Sadeh * bio helpers 675602adf40SYehuda Sadeh */ 676602adf40SYehuda Sadeh 677602adf40SYehuda Sadeh static void bio_chain_put(struct bio *chain) 678602adf40SYehuda Sadeh { 679602adf40SYehuda Sadeh struct bio *tmp; 680602adf40SYehuda Sadeh 681602adf40SYehuda Sadeh while (chain) { 682602adf40SYehuda Sadeh tmp = chain; 683602adf40SYehuda Sadeh chain = chain->bi_next; 684602adf40SYehuda Sadeh bio_put(tmp); 685602adf40SYehuda Sadeh } 686602adf40SYehuda Sadeh } 687602adf40SYehuda Sadeh 688602adf40SYehuda Sadeh /* 689602adf40SYehuda Sadeh * zeros a bio chain, starting at specific offset 690602adf40SYehuda Sadeh */ 691602adf40SYehuda Sadeh static void zero_bio_chain(struct bio *chain, int start_ofs) 692602adf40SYehuda Sadeh { 693602adf40SYehuda Sadeh struct bio_vec *bv; 694602adf40SYehuda Sadeh unsigned long flags; 695602adf40SYehuda Sadeh void *buf; 696602adf40SYehuda Sadeh int i; 697602adf40SYehuda Sadeh int pos = 0; 698602adf40SYehuda Sadeh 699602adf40SYehuda Sadeh while (chain) { 700602adf40SYehuda Sadeh bio_for_each_segment(bv, chain, i) { 701602adf40SYehuda Sadeh if (pos + bv->bv_len > start_ofs) { 702602adf40SYehuda Sadeh int remainder = max(start_ofs - pos, 0); 703602adf40SYehuda Sadeh buf = bvec_kmap_irq(bv, &flags); 704602adf40SYehuda Sadeh memset(buf + remainder, 0, 705602adf40SYehuda Sadeh bv->bv_len - remainder); 70685b5aaa6SDan Carpenter bvec_kunmap_irq(buf, &flags); 707602adf40SYehuda Sadeh } 708602adf40SYehuda Sadeh pos += bv->bv_len; 709602adf40SYehuda Sadeh } 710602adf40SYehuda Sadeh 711602adf40SYehuda Sadeh chain = chain->bi_next; 712602adf40SYehuda Sadeh } 713602adf40SYehuda Sadeh } 714602adf40SYehuda Sadeh 715602adf40SYehuda Sadeh /* 716602adf40SYehuda Sadeh * bio_chain_clone - clone a chain of bios up to a certain length. 717602adf40SYehuda Sadeh * might return a bio_pair that will need to be released. 718602adf40SYehuda Sadeh */ 719602adf40SYehuda Sadeh static struct bio *bio_chain_clone(struct bio **old, struct bio **next, 720602adf40SYehuda Sadeh struct bio_pair **bp, 721602adf40SYehuda Sadeh int len, gfp_t gfpmask) 722602adf40SYehuda Sadeh { 723602adf40SYehuda Sadeh struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; 724602adf40SYehuda Sadeh int total = 0; 725602adf40SYehuda Sadeh 726602adf40SYehuda Sadeh if (*bp) { 727602adf40SYehuda Sadeh bio_pair_release(*bp); 728602adf40SYehuda Sadeh *bp = NULL; 729602adf40SYehuda Sadeh } 730602adf40SYehuda Sadeh 731602adf40SYehuda Sadeh while (old_chain && (total < len)) { 732602adf40SYehuda Sadeh tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 733602adf40SYehuda Sadeh if (!tmp) 734602adf40SYehuda Sadeh goto err_out; 735602adf40SYehuda Sadeh 736602adf40SYehuda Sadeh if (total + old_chain->bi_size > len) { 737602adf40SYehuda Sadeh struct bio_pair *bp; 738602adf40SYehuda Sadeh 739602adf40SYehuda Sadeh /* 740602adf40SYehuda Sadeh * this split can only happen with a single paged bio, 741602adf40SYehuda Sadeh * split_bio will BUG_ON if this is not the case 742602adf40SYehuda Sadeh */ 743602adf40SYehuda Sadeh dout("bio_chain_clone split! total=%d remaining=%d" 744602adf40SYehuda Sadeh "bi_size=%d\n", 745602adf40SYehuda Sadeh (int)total, (int)len-total, 746602adf40SYehuda Sadeh (int)old_chain->bi_size); 747602adf40SYehuda Sadeh 748602adf40SYehuda Sadeh /* split the bio. We'll release it either in the next 749602adf40SYehuda Sadeh call, or it will have to be released outside */ 750593a9e7bSAlex Elder bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); 751602adf40SYehuda Sadeh if (!bp) 752602adf40SYehuda Sadeh goto err_out; 753602adf40SYehuda Sadeh 754602adf40SYehuda Sadeh __bio_clone(tmp, &bp->bio1); 755602adf40SYehuda Sadeh 756602adf40SYehuda Sadeh *next = &bp->bio2; 757602adf40SYehuda Sadeh } else { 758602adf40SYehuda Sadeh __bio_clone(tmp, old_chain); 759602adf40SYehuda Sadeh *next = old_chain->bi_next; 760602adf40SYehuda Sadeh } 761602adf40SYehuda Sadeh 762602adf40SYehuda Sadeh tmp->bi_bdev = NULL; 763602adf40SYehuda Sadeh gfpmask &= ~__GFP_WAIT; 764602adf40SYehuda Sadeh tmp->bi_next = NULL; 765602adf40SYehuda Sadeh 766602adf40SYehuda Sadeh if (!new_chain) { 767602adf40SYehuda Sadeh new_chain = tail = tmp; 768602adf40SYehuda Sadeh } else { 769602adf40SYehuda Sadeh tail->bi_next = tmp; 770602adf40SYehuda Sadeh tail = tmp; 771602adf40SYehuda Sadeh } 772602adf40SYehuda Sadeh old_chain = old_chain->bi_next; 773602adf40SYehuda Sadeh 774602adf40SYehuda Sadeh total += tmp->bi_size; 775602adf40SYehuda Sadeh } 776602adf40SYehuda Sadeh 777602adf40SYehuda Sadeh BUG_ON(total < len); 778602adf40SYehuda Sadeh 779602adf40SYehuda Sadeh if (tail) 780602adf40SYehuda Sadeh tail->bi_next = NULL; 781602adf40SYehuda Sadeh 782602adf40SYehuda Sadeh *old = old_chain; 783602adf40SYehuda Sadeh 784602adf40SYehuda Sadeh return new_chain; 785602adf40SYehuda Sadeh 786602adf40SYehuda Sadeh err_out: 787602adf40SYehuda Sadeh dout("bio_chain_clone with err\n"); 788602adf40SYehuda Sadeh bio_chain_put(new_chain); 789602adf40SYehuda Sadeh return NULL; 790602adf40SYehuda Sadeh } 791602adf40SYehuda Sadeh 792602adf40SYehuda Sadeh /* 793602adf40SYehuda Sadeh * helpers for osd request op vectors. 794602adf40SYehuda Sadeh */ 795602adf40SYehuda Sadeh static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, 796602adf40SYehuda Sadeh int num_ops, 797602adf40SYehuda Sadeh int opcode, 798602adf40SYehuda Sadeh u32 payload_len) 799602adf40SYehuda Sadeh { 800602adf40SYehuda Sadeh *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), 801602adf40SYehuda Sadeh GFP_NOIO); 802602adf40SYehuda Sadeh if (!*ops) 803602adf40SYehuda Sadeh return -ENOMEM; 804602adf40SYehuda Sadeh (*ops)[0].op = opcode; 805602adf40SYehuda Sadeh /* 806602adf40SYehuda Sadeh * op extent offset and length will be set later on 807602adf40SYehuda Sadeh * in calc_raw_layout() 808602adf40SYehuda Sadeh */ 809602adf40SYehuda Sadeh (*ops)[0].payload_len = payload_len; 810602adf40SYehuda Sadeh return 0; 811602adf40SYehuda Sadeh } 812602adf40SYehuda Sadeh 813602adf40SYehuda Sadeh static void rbd_destroy_ops(struct ceph_osd_req_op *ops) 814602adf40SYehuda Sadeh { 815602adf40SYehuda Sadeh kfree(ops); 816602adf40SYehuda Sadeh } 817602adf40SYehuda Sadeh 8181fec7093SYehuda Sadeh static void rbd_coll_end_req_index(struct request *rq, 8191fec7093SYehuda Sadeh struct rbd_req_coll *coll, 8201fec7093SYehuda Sadeh int index, 8211fec7093SYehuda Sadeh int ret, u64 len) 8221fec7093SYehuda Sadeh { 8231fec7093SYehuda Sadeh struct request_queue *q; 8241fec7093SYehuda Sadeh int min, max, i; 8251fec7093SYehuda Sadeh 8261fec7093SYehuda Sadeh dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n", 8271fec7093SYehuda Sadeh coll, index, ret, len); 8281fec7093SYehuda Sadeh 8291fec7093SYehuda Sadeh if (!rq) 8301fec7093SYehuda Sadeh return; 8311fec7093SYehuda Sadeh 8321fec7093SYehuda Sadeh if (!coll) { 8331fec7093SYehuda Sadeh blk_end_request(rq, ret, len); 8341fec7093SYehuda Sadeh return; 8351fec7093SYehuda Sadeh } 8361fec7093SYehuda Sadeh 8371fec7093SYehuda Sadeh q = rq->q; 8381fec7093SYehuda Sadeh 8391fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 8401fec7093SYehuda Sadeh coll->status[index].done = 1; 8411fec7093SYehuda Sadeh coll->status[index].rc = ret; 8421fec7093SYehuda Sadeh coll->status[index].bytes = len; 8431fec7093SYehuda Sadeh max = min = coll->num_done; 8441fec7093SYehuda Sadeh while (max < coll->total && coll->status[max].done) 8451fec7093SYehuda Sadeh max++; 8461fec7093SYehuda Sadeh 8471fec7093SYehuda Sadeh for (i = min; i<max; i++) { 8481fec7093SYehuda Sadeh __blk_end_request(rq, coll->status[i].rc, 8491fec7093SYehuda Sadeh coll->status[i].bytes); 8501fec7093SYehuda Sadeh coll->num_done++; 8511fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 8521fec7093SYehuda Sadeh } 8531fec7093SYehuda Sadeh spin_unlock_irq(q->queue_lock); 8541fec7093SYehuda Sadeh } 8551fec7093SYehuda Sadeh 8561fec7093SYehuda Sadeh static void rbd_coll_end_req(struct rbd_request *req, 8571fec7093SYehuda Sadeh int ret, u64 len) 8581fec7093SYehuda Sadeh { 8591fec7093SYehuda Sadeh rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); 8601fec7093SYehuda Sadeh } 8611fec7093SYehuda Sadeh 862602adf40SYehuda Sadeh /* 863602adf40SYehuda Sadeh * Send ceph osd request 864602adf40SYehuda Sadeh */ 865602adf40SYehuda Sadeh static int rbd_do_request(struct request *rq, 866602adf40SYehuda Sadeh struct rbd_device *dev, 867602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 868602adf40SYehuda Sadeh u64 snapid, 869602adf40SYehuda Sadeh const char *obj, u64 ofs, u64 len, 870602adf40SYehuda Sadeh struct bio *bio, 871602adf40SYehuda Sadeh struct page **pages, 872602adf40SYehuda Sadeh int num_pages, 873602adf40SYehuda Sadeh int flags, 874602adf40SYehuda Sadeh struct ceph_osd_req_op *ops, 875602adf40SYehuda Sadeh int num_reply, 8761fec7093SYehuda Sadeh struct rbd_req_coll *coll, 8771fec7093SYehuda Sadeh int coll_index, 878602adf40SYehuda Sadeh void (*rbd_cb)(struct ceph_osd_request *req, 87959c2be1eSYehuda Sadeh struct ceph_msg *msg), 88059c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 88159c2be1eSYehuda Sadeh u64 *ver) 882602adf40SYehuda Sadeh { 883602adf40SYehuda Sadeh struct ceph_osd_request *req; 884602adf40SYehuda Sadeh struct ceph_file_layout *layout; 885602adf40SYehuda Sadeh int ret; 886602adf40SYehuda Sadeh u64 bno; 887602adf40SYehuda Sadeh struct timespec mtime = CURRENT_TIME; 888602adf40SYehuda Sadeh struct rbd_request *req_data; 889602adf40SYehuda Sadeh struct ceph_osd_request_head *reqhead; 8901dbb4399SAlex Elder struct ceph_osd_client *osdc; 891602adf40SYehuda Sadeh 892602adf40SYehuda Sadeh req_data = kzalloc(sizeof(*req_data), GFP_NOIO); 8931fec7093SYehuda Sadeh if (!req_data) { 8941fec7093SYehuda Sadeh if (coll) 8951fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, coll_index, 8961fec7093SYehuda Sadeh -ENOMEM, len); 8971fec7093SYehuda Sadeh return -ENOMEM; 8981fec7093SYehuda Sadeh } 899602adf40SYehuda Sadeh 9001fec7093SYehuda Sadeh if (coll) { 9011fec7093SYehuda Sadeh req_data->coll = coll; 9021fec7093SYehuda Sadeh req_data->coll_index = coll_index; 9031fec7093SYehuda Sadeh } 9041fec7093SYehuda Sadeh 9051fec7093SYehuda Sadeh dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); 906602adf40SYehuda Sadeh 907c666601aSJosh Durgin down_read(&dev->header_rwsem); 908602adf40SYehuda Sadeh 9091dbb4399SAlex Elder osdc = &dev->rbd_client->client->osdc; 9101dbb4399SAlex Elder req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, 9111dbb4399SAlex Elder false, GFP_NOIO, pages, bio); 9124ad12621SSage Weil if (!req) { 913c666601aSJosh Durgin up_read(&dev->header_rwsem); 9144ad12621SSage Weil ret = -ENOMEM; 915602adf40SYehuda Sadeh goto done_pages; 916602adf40SYehuda Sadeh } 917602adf40SYehuda Sadeh 918602adf40SYehuda Sadeh req->r_callback = rbd_cb; 919602adf40SYehuda Sadeh 920602adf40SYehuda Sadeh req_data->rq = rq; 921602adf40SYehuda Sadeh req_data->bio = bio; 922602adf40SYehuda Sadeh req_data->pages = pages; 923602adf40SYehuda Sadeh req_data->len = len; 924602adf40SYehuda Sadeh 925602adf40SYehuda Sadeh req->r_priv = req_data; 926602adf40SYehuda Sadeh 927602adf40SYehuda Sadeh reqhead = req->r_request->front.iov_base; 928602adf40SYehuda Sadeh reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); 929602adf40SYehuda Sadeh 930602adf40SYehuda Sadeh strncpy(req->r_oid, obj, sizeof(req->r_oid)); 931602adf40SYehuda Sadeh req->r_oid_len = strlen(req->r_oid); 932602adf40SYehuda Sadeh 933602adf40SYehuda Sadeh layout = &req->r_file_layout; 934602adf40SYehuda Sadeh memset(layout, 0, sizeof(*layout)); 935602adf40SYehuda Sadeh layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 936602adf40SYehuda Sadeh layout->fl_stripe_count = cpu_to_le32(1); 937602adf40SYehuda Sadeh layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 938602adf40SYehuda Sadeh layout->fl_pg_pool = cpu_to_le32(dev->poolid); 9391dbb4399SAlex Elder ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, 9401dbb4399SAlex Elder req, ops); 941602adf40SYehuda Sadeh 942602adf40SYehuda Sadeh ceph_osdc_build_request(req, ofs, &len, 943602adf40SYehuda Sadeh ops, 944602adf40SYehuda Sadeh snapc, 945602adf40SYehuda Sadeh &mtime, 946602adf40SYehuda Sadeh req->r_oid, req->r_oid_len); 947c666601aSJosh Durgin up_read(&dev->header_rwsem); 948602adf40SYehuda Sadeh 94959c2be1eSYehuda Sadeh if (linger_req) { 9501dbb4399SAlex Elder ceph_osdc_set_request_linger(osdc, req); 95159c2be1eSYehuda Sadeh *linger_req = req; 95259c2be1eSYehuda Sadeh } 95359c2be1eSYehuda Sadeh 9541dbb4399SAlex Elder ret = ceph_osdc_start_request(osdc, req, false); 955602adf40SYehuda Sadeh if (ret < 0) 956602adf40SYehuda Sadeh goto done_err; 957602adf40SYehuda Sadeh 958602adf40SYehuda Sadeh if (!rbd_cb) { 9591dbb4399SAlex Elder ret = ceph_osdc_wait_request(osdc, req); 96059c2be1eSYehuda Sadeh if (ver) 96159c2be1eSYehuda Sadeh *ver = le64_to_cpu(req->r_reassert_version.version); 9621fec7093SYehuda Sadeh dout("reassert_ver=%lld\n", 9631fec7093SYehuda Sadeh le64_to_cpu(req->r_reassert_version.version)); 964602adf40SYehuda Sadeh ceph_osdc_put_request(req); 965602adf40SYehuda Sadeh } 966602adf40SYehuda Sadeh return ret; 967602adf40SYehuda Sadeh 968602adf40SYehuda Sadeh done_err: 969602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 970602adf40SYehuda Sadeh ceph_osdc_put_request(req); 971602adf40SYehuda Sadeh done_pages: 9721fec7093SYehuda Sadeh rbd_coll_end_req(req_data, ret, len); 973602adf40SYehuda Sadeh kfree(req_data); 974602adf40SYehuda Sadeh return ret; 975602adf40SYehuda Sadeh } 976602adf40SYehuda Sadeh 977602adf40SYehuda Sadeh /* 978602adf40SYehuda Sadeh * Ceph osd op callback 979602adf40SYehuda Sadeh */ 980602adf40SYehuda Sadeh static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 981602adf40SYehuda Sadeh { 982602adf40SYehuda Sadeh struct rbd_request *req_data = req->r_priv; 983602adf40SYehuda Sadeh struct ceph_osd_reply_head *replyhead; 984602adf40SYehuda Sadeh struct ceph_osd_op *op; 985602adf40SYehuda Sadeh __s32 rc; 986602adf40SYehuda Sadeh u64 bytes; 987602adf40SYehuda Sadeh int read_op; 988602adf40SYehuda Sadeh 989602adf40SYehuda Sadeh /* parse reply */ 990602adf40SYehuda Sadeh replyhead = msg->front.iov_base; 991602adf40SYehuda Sadeh WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); 992602adf40SYehuda Sadeh op = (void *)(replyhead + 1); 993602adf40SYehuda Sadeh rc = le32_to_cpu(replyhead->result); 994602adf40SYehuda Sadeh bytes = le64_to_cpu(op->extent.length); 995602adf40SYehuda Sadeh read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); 996602adf40SYehuda Sadeh 997602adf40SYehuda Sadeh dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); 998602adf40SYehuda Sadeh 999602adf40SYehuda Sadeh if (rc == -ENOENT && read_op) { 1000602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, 0); 1001602adf40SYehuda Sadeh rc = 0; 1002602adf40SYehuda Sadeh } else if (rc == 0 && read_op && bytes < req_data->len) { 1003602adf40SYehuda Sadeh zero_bio_chain(req_data->bio, bytes); 1004602adf40SYehuda Sadeh bytes = req_data->len; 1005602adf40SYehuda Sadeh } 1006602adf40SYehuda Sadeh 10071fec7093SYehuda Sadeh rbd_coll_end_req(req_data, rc, bytes); 1008602adf40SYehuda Sadeh 1009602adf40SYehuda Sadeh if (req_data->bio) 1010602adf40SYehuda Sadeh bio_chain_put(req_data->bio); 1011602adf40SYehuda Sadeh 1012602adf40SYehuda Sadeh ceph_osdc_put_request(req); 1013602adf40SYehuda Sadeh kfree(req_data); 1014602adf40SYehuda Sadeh } 1015602adf40SYehuda Sadeh 101659c2be1eSYehuda Sadeh static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) 101759c2be1eSYehuda Sadeh { 101859c2be1eSYehuda Sadeh ceph_osdc_put_request(req); 101959c2be1eSYehuda Sadeh } 102059c2be1eSYehuda Sadeh 1021602adf40SYehuda Sadeh /* 1022602adf40SYehuda Sadeh * Do a synchronous ceph osd operation 1023602adf40SYehuda Sadeh */ 1024602adf40SYehuda Sadeh static int rbd_req_sync_op(struct rbd_device *dev, 1025602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1026602adf40SYehuda Sadeh u64 snapid, 1027602adf40SYehuda Sadeh int opcode, 1028602adf40SYehuda Sadeh int flags, 1029602adf40SYehuda Sadeh struct ceph_osd_req_op *orig_ops, 1030602adf40SYehuda Sadeh int num_reply, 1031602adf40SYehuda Sadeh const char *obj, 1032602adf40SYehuda Sadeh u64 ofs, u64 len, 103359c2be1eSYehuda Sadeh char *buf, 103459c2be1eSYehuda Sadeh struct ceph_osd_request **linger_req, 103559c2be1eSYehuda Sadeh u64 *ver) 1036602adf40SYehuda Sadeh { 1037602adf40SYehuda Sadeh int ret; 1038602adf40SYehuda Sadeh struct page **pages; 1039602adf40SYehuda Sadeh int num_pages; 1040602adf40SYehuda Sadeh struct ceph_osd_req_op *ops = orig_ops; 1041602adf40SYehuda Sadeh u32 payload_len; 1042602adf40SYehuda Sadeh 1043602adf40SYehuda Sadeh num_pages = calc_pages_for(ofs , len); 1044602adf40SYehuda Sadeh pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 1045b8d0638aSDan Carpenter if (IS_ERR(pages)) 1046b8d0638aSDan Carpenter return PTR_ERR(pages); 1047602adf40SYehuda Sadeh 1048602adf40SYehuda Sadeh if (!orig_ops) { 1049602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); 1050602adf40SYehuda Sadeh ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); 1051602adf40SYehuda Sadeh if (ret < 0) 1052602adf40SYehuda Sadeh goto done; 1053602adf40SYehuda Sadeh 1054602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { 1055602adf40SYehuda Sadeh ret = ceph_copy_to_page_vector(pages, buf, ofs, len); 1056602adf40SYehuda Sadeh if (ret < 0) 1057602adf40SYehuda Sadeh goto done_ops; 1058602adf40SYehuda Sadeh } 1059602adf40SYehuda Sadeh } 1060602adf40SYehuda Sadeh 1061602adf40SYehuda Sadeh ret = rbd_do_request(NULL, dev, snapc, snapid, 1062602adf40SYehuda Sadeh obj, ofs, len, NULL, 1063602adf40SYehuda Sadeh pages, num_pages, 1064602adf40SYehuda Sadeh flags, 1065602adf40SYehuda Sadeh ops, 1066602adf40SYehuda Sadeh 2, 10671fec7093SYehuda Sadeh NULL, 0, 106859c2be1eSYehuda Sadeh NULL, 106959c2be1eSYehuda Sadeh linger_req, ver); 1070602adf40SYehuda Sadeh if (ret < 0) 1071602adf40SYehuda Sadeh goto done_ops; 1072602adf40SYehuda Sadeh 1073602adf40SYehuda Sadeh if ((flags & CEPH_OSD_FLAG_READ) && buf) 1074602adf40SYehuda Sadeh ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); 1075602adf40SYehuda Sadeh 1076602adf40SYehuda Sadeh done_ops: 1077602adf40SYehuda Sadeh if (!orig_ops) 1078602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1079602adf40SYehuda Sadeh done: 1080602adf40SYehuda Sadeh ceph_release_page_vector(pages, num_pages); 1081602adf40SYehuda Sadeh return ret; 1082602adf40SYehuda Sadeh } 1083602adf40SYehuda Sadeh 1084602adf40SYehuda Sadeh /* 1085602adf40SYehuda Sadeh * Do an asynchronous ceph osd operation 1086602adf40SYehuda Sadeh */ 1087602adf40SYehuda Sadeh static int rbd_do_op(struct request *rq, 1088602adf40SYehuda Sadeh struct rbd_device *rbd_dev , 1089602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1090602adf40SYehuda Sadeh u64 snapid, 1091602adf40SYehuda Sadeh int opcode, int flags, int num_reply, 1092602adf40SYehuda Sadeh u64 ofs, u64 len, 10931fec7093SYehuda Sadeh struct bio *bio, 10941fec7093SYehuda Sadeh struct rbd_req_coll *coll, 10951fec7093SYehuda Sadeh int coll_index) 1096602adf40SYehuda Sadeh { 1097602adf40SYehuda Sadeh char *seg_name; 1098602adf40SYehuda Sadeh u64 seg_ofs; 1099602adf40SYehuda Sadeh u64 seg_len; 1100602adf40SYehuda Sadeh int ret; 1101602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1102602adf40SYehuda Sadeh u32 payload_len; 1103602adf40SYehuda Sadeh 1104602adf40SYehuda Sadeh seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 1105602adf40SYehuda Sadeh if (!seg_name) 1106602adf40SYehuda Sadeh return -ENOMEM; 1107602adf40SYehuda Sadeh 1108602adf40SYehuda Sadeh seg_len = rbd_get_segment(&rbd_dev->header, 1109602adf40SYehuda Sadeh rbd_dev->header.block_name, 1110602adf40SYehuda Sadeh ofs, len, 1111602adf40SYehuda Sadeh seg_name, &seg_ofs); 1112602adf40SYehuda Sadeh 1113602adf40SYehuda Sadeh payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); 1114602adf40SYehuda Sadeh 1115602adf40SYehuda Sadeh ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); 1116602adf40SYehuda Sadeh if (ret < 0) 1117602adf40SYehuda Sadeh goto done; 1118602adf40SYehuda Sadeh 1119602adf40SYehuda Sadeh /* we've taken care of segment sizes earlier when we 1120602adf40SYehuda Sadeh cloned the bios. We should never have a segment 1121602adf40SYehuda Sadeh truncated at this point */ 1122602adf40SYehuda Sadeh BUG_ON(seg_len < len); 1123602adf40SYehuda Sadeh 1124602adf40SYehuda Sadeh ret = rbd_do_request(rq, rbd_dev, snapc, snapid, 1125602adf40SYehuda Sadeh seg_name, seg_ofs, seg_len, 1126602adf40SYehuda Sadeh bio, 1127602adf40SYehuda Sadeh NULL, 0, 1128602adf40SYehuda Sadeh flags, 1129602adf40SYehuda Sadeh ops, 1130602adf40SYehuda Sadeh num_reply, 11311fec7093SYehuda Sadeh coll, coll_index, 113259c2be1eSYehuda Sadeh rbd_req_cb, 0, NULL); 113311f77002SSage Weil 113411f77002SSage Weil rbd_destroy_ops(ops); 1135602adf40SYehuda Sadeh done: 1136602adf40SYehuda Sadeh kfree(seg_name); 1137602adf40SYehuda Sadeh return ret; 1138602adf40SYehuda Sadeh } 1139602adf40SYehuda Sadeh 1140602adf40SYehuda Sadeh /* 1141602adf40SYehuda Sadeh * Request async osd write 1142602adf40SYehuda Sadeh */ 1143602adf40SYehuda Sadeh static int rbd_req_write(struct request *rq, 1144602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1145602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1146602adf40SYehuda Sadeh u64 ofs, u64 len, 11471fec7093SYehuda Sadeh struct bio *bio, 11481fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11491fec7093SYehuda Sadeh int coll_index) 1150602adf40SYehuda Sadeh { 1151602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, 1152602adf40SYehuda Sadeh CEPH_OSD_OP_WRITE, 1153602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1154602adf40SYehuda Sadeh 2, 11551fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1156602adf40SYehuda Sadeh } 1157602adf40SYehuda Sadeh 1158602adf40SYehuda Sadeh /* 1159602adf40SYehuda Sadeh * Request async osd read 1160602adf40SYehuda Sadeh */ 1161602adf40SYehuda Sadeh static int rbd_req_read(struct request *rq, 1162602adf40SYehuda Sadeh struct rbd_device *rbd_dev, 1163602adf40SYehuda Sadeh u64 snapid, 1164602adf40SYehuda Sadeh u64 ofs, u64 len, 11651fec7093SYehuda Sadeh struct bio *bio, 11661fec7093SYehuda Sadeh struct rbd_req_coll *coll, 11671fec7093SYehuda Sadeh int coll_index) 1168602adf40SYehuda Sadeh { 1169602adf40SYehuda Sadeh return rbd_do_op(rq, rbd_dev, NULL, 1170602adf40SYehuda Sadeh (snapid ? snapid : CEPH_NOSNAP), 1171602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1172602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1173602adf40SYehuda Sadeh 2, 11741fec7093SYehuda Sadeh ofs, len, bio, coll, coll_index); 1175602adf40SYehuda Sadeh } 1176602adf40SYehuda Sadeh 1177602adf40SYehuda Sadeh /* 1178602adf40SYehuda Sadeh * Request sync osd read 1179602adf40SYehuda Sadeh */ 1180602adf40SYehuda Sadeh static int rbd_req_sync_read(struct rbd_device *dev, 1181602adf40SYehuda Sadeh struct ceph_snap_context *snapc, 1182602adf40SYehuda Sadeh u64 snapid, 1183602adf40SYehuda Sadeh const char *obj, 1184602adf40SYehuda Sadeh u64 ofs, u64 len, 118559c2be1eSYehuda Sadeh char *buf, 118659c2be1eSYehuda Sadeh u64 *ver) 1187602adf40SYehuda Sadeh { 1188602adf40SYehuda Sadeh return rbd_req_sync_op(dev, NULL, 1189602adf40SYehuda Sadeh (snapid ? snapid : CEPH_NOSNAP), 1190602adf40SYehuda Sadeh CEPH_OSD_OP_READ, 1191602adf40SYehuda Sadeh CEPH_OSD_FLAG_READ, 1192602adf40SYehuda Sadeh NULL, 119359c2be1eSYehuda Sadeh 1, obj, ofs, len, buf, NULL, ver); 1194602adf40SYehuda Sadeh } 1195602adf40SYehuda Sadeh 1196602adf40SYehuda Sadeh /* 119759c2be1eSYehuda Sadeh * Request sync osd watch 119859c2be1eSYehuda Sadeh */ 119959c2be1eSYehuda Sadeh static int rbd_req_sync_notify_ack(struct rbd_device *dev, 120059c2be1eSYehuda Sadeh u64 ver, 120159c2be1eSYehuda Sadeh u64 notify_id, 120259c2be1eSYehuda Sadeh const char *obj) 120359c2be1eSYehuda Sadeh { 120459c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 120559c2be1eSYehuda Sadeh struct page **pages = NULL; 120611f77002SSage Weil int ret; 120711f77002SSage Weil 120811f77002SSage Weil ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); 120959c2be1eSYehuda Sadeh if (ret < 0) 121059c2be1eSYehuda Sadeh return ret; 121159c2be1eSYehuda Sadeh 121259c2be1eSYehuda Sadeh ops[0].watch.ver = cpu_to_le64(dev->header.obj_version); 121359c2be1eSYehuda Sadeh ops[0].watch.cookie = notify_id; 121459c2be1eSYehuda Sadeh ops[0].watch.flag = 0; 121559c2be1eSYehuda Sadeh 121659c2be1eSYehuda Sadeh ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP, 121759c2be1eSYehuda Sadeh obj, 0, 0, NULL, 121859c2be1eSYehuda Sadeh pages, 0, 121959c2be1eSYehuda Sadeh CEPH_OSD_FLAG_READ, 122059c2be1eSYehuda Sadeh ops, 122159c2be1eSYehuda Sadeh 1, 12221fec7093SYehuda Sadeh NULL, 0, 122359c2be1eSYehuda Sadeh rbd_simple_req_cb, 0, NULL); 122459c2be1eSYehuda Sadeh 122559c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 122659c2be1eSYehuda Sadeh return ret; 122759c2be1eSYehuda Sadeh } 122859c2be1eSYehuda Sadeh 122959c2be1eSYehuda Sadeh static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 123059c2be1eSYehuda Sadeh { 123159c2be1eSYehuda Sadeh struct rbd_device *dev = (struct rbd_device *)data; 123213143d2dSSage Weil int rc; 123313143d2dSSage Weil 123459c2be1eSYehuda Sadeh if (!dev) 123559c2be1eSYehuda Sadeh return; 123659c2be1eSYehuda Sadeh 123759c2be1eSYehuda Sadeh dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, 123859c2be1eSYehuda Sadeh notify_id, (int)opcode); 123959c2be1eSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 124013143d2dSSage Weil rc = __rbd_update_snaps(dev); 124159c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 124213143d2dSSage Weil if (rc) 1243f0f8cef5SAlex Elder pr_warning(RBD_DRV_NAME "%d got notification but failed to " 1244f0f8cef5SAlex Elder " update snaps: %d\n", dev->major, rc); 124559c2be1eSYehuda Sadeh 124659c2be1eSYehuda Sadeh rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); 124759c2be1eSYehuda Sadeh } 124859c2be1eSYehuda Sadeh 124959c2be1eSYehuda Sadeh /* 125059c2be1eSYehuda Sadeh * Request sync osd watch 125159c2be1eSYehuda Sadeh */ 125259c2be1eSYehuda Sadeh static int rbd_req_sync_watch(struct rbd_device *dev, 125359c2be1eSYehuda Sadeh const char *obj, 125459c2be1eSYehuda Sadeh u64 ver) 125559c2be1eSYehuda Sadeh { 125659c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 12571dbb4399SAlex Elder struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; 125859c2be1eSYehuda Sadeh 125959c2be1eSYehuda Sadeh int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); 126059c2be1eSYehuda Sadeh if (ret < 0) 126159c2be1eSYehuda Sadeh return ret; 126259c2be1eSYehuda Sadeh 126359c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, 126459c2be1eSYehuda Sadeh (void *)dev, &dev->watch_event); 126559c2be1eSYehuda Sadeh if (ret < 0) 126659c2be1eSYehuda Sadeh goto fail; 126759c2be1eSYehuda Sadeh 126859c2be1eSYehuda Sadeh ops[0].watch.ver = cpu_to_le64(ver); 126959c2be1eSYehuda Sadeh ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie); 127059c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 127159c2be1eSYehuda Sadeh 127259c2be1eSYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 127359c2be1eSYehuda Sadeh CEPH_NOSNAP, 127459c2be1eSYehuda Sadeh 0, 127559c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 127659c2be1eSYehuda Sadeh ops, 127759c2be1eSYehuda Sadeh 1, obj, 0, 0, NULL, 127859c2be1eSYehuda Sadeh &dev->watch_request, NULL); 127959c2be1eSYehuda Sadeh 128059c2be1eSYehuda Sadeh if (ret < 0) 128159c2be1eSYehuda Sadeh goto fail_event; 128259c2be1eSYehuda Sadeh 128359c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 128459c2be1eSYehuda Sadeh return 0; 128559c2be1eSYehuda Sadeh 128659c2be1eSYehuda Sadeh fail_event: 128759c2be1eSYehuda Sadeh ceph_osdc_cancel_event(dev->watch_event); 128859c2be1eSYehuda Sadeh dev->watch_event = NULL; 128959c2be1eSYehuda Sadeh fail: 129059c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 129159c2be1eSYehuda Sadeh return ret; 129259c2be1eSYehuda Sadeh } 129359c2be1eSYehuda Sadeh 129479e3057cSYehuda Sadeh /* 129579e3057cSYehuda Sadeh * Request sync osd unwatch 129679e3057cSYehuda Sadeh */ 129779e3057cSYehuda Sadeh static int rbd_req_sync_unwatch(struct rbd_device *dev, 129879e3057cSYehuda Sadeh const char *obj) 129979e3057cSYehuda Sadeh { 130079e3057cSYehuda Sadeh struct ceph_osd_req_op *ops; 130179e3057cSYehuda Sadeh 130279e3057cSYehuda Sadeh int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); 130379e3057cSYehuda Sadeh if (ret < 0) 130479e3057cSYehuda Sadeh return ret; 130579e3057cSYehuda Sadeh 130679e3057cSYehuda Sadeh ops[0].watch.ver = 0; 130779e3057cSYehuda Sadeh ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie); 130879e3057cSYehuda Sadeh ops[0].watch.flag = 0; 130979e3057cSYehuda Sadeh 131079e3057cSYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 131179e3057cSYehuda Sadeh CEPH_NOSNAP, 131279e3057cSYehuda Sadeh 0, 131379e3057cSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 131479e3057cSYehuda Sadeh ops, 131579e3057cSYehuda Sadeh 1, obj, 0, 0, NULL, NULL, NULL); 131679e3057cSYehuda Sadeh 131779e3057cSYehuda Sadeh rbd_destroy_ops(ops); 131879e3057cSYehuda Sadeh ceph_osdc_cancel_event(dev->watch_event); 131979e3057cSYehuda Sadeh dev->watch_event = NULL; 132079e3057cSYehuda Sadeh return ret; 132179e3057cSYehuda Sadeh } 132279e3057cSYehuda Sadeh 132359c2be1eSYehuda Sadeh struct rbd_notify_info { 132459c2be1eSYehuda Sadeh struct rbd_device *dev; 132559c2be1eSYehuda Sadeh }; 132659c2be1eSYehuda Sadeh 132759c2be1eSYehuda Sadeh static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 132859c2be1eSYehuda Sadeh { 132959c2be1eSYehuda Sadeh struct rbd_device *dev = (struct rbd_device *)data; 133059c2be1eSYehuda Sadeh if (!dev) 133159c2be1eSYehuda Sadeh return; 133259c2be1eSYehuda Sadeh 133359c2be1eSYehuda Sadeh dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, 133459c2be1eSYehuda Sadeh notify_id, (int)opcode); 133559c2be1eSYehuda Sadeh } 133659c2be1eSYehuda Sadeh 133759c2be1eSYehuda Sadeh /* 133859c2be1eSYehuda Sadeh * Request sync osd notify 133959c2be1eSYehuda Sadeh */ 134059c2be1eSYehuda Sadeh static int rbd_req_sync_notify(struct rbd_device *dev, 134159c2be1eSYehuda Sadeh const char *obj) 134259c2be1eSYehuda Sadeh { 134359c2be1eSYehuda Sadeh struct ceph_osd_req_op *ops; 13441dbb4399SAlex Elder struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; 134559c2be1eSYehuda Sadeh struct ceph_osd_event *event; 134659c2be1eSYehuda Sadeh struct rbd_notify_info info; 134759c2be1eSYehuda Sadeh int payload_len = sizeof(u32) + sizeof(u32); 134859c2be1eSYehuda Sadeh int ret; 134959c2be1eSYehuda Sadeh 135059c2be1eSYehuda Sadeh ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len); 135159c2be1eSYehuda Sadeh if (ret < 0) 135259c2be1eSYehuda Sadeh return ret; 135359c2be1eSYehuda Sadeh 135459c2be1eSYehuda Sadeh info.dev = dev; 135559c2be1eSYehuda Sadeh 135659c2be1eSYehuda Sadeh ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, 135759c2be1eSYehuda Sadeh (void *)&info, &event); 135859c2be1eSYehuda Sadeh if (ret < 0) 135959c2be1eSYehuda Sadeh goto fail; 136059c2be1eSYehuda Sadeh 136159c2be1eSYehuda Sadeh ops[0].watch.ver = 1; 136259c2be1eSYehuda Sadeh ops[0].watch.flag = 1; 136359c2be1eSYehuda Sadeh ops[0].watch.cookie = event->cookie; 136459c2be1eSYehuda Sadeh ops[0].watch.prot_ver = RADOS_NOTIFY_VER; 136559c2be1eSYehuda Sadeh ops[0].watch.timeout = 12; 136659c2be1eSYehuda Sadeh 136759c2be1eSYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 136859c2be1eSYehuda Sadeh CEPH_NOSNAP, 136959c2be1eSYehuda Sadeh 0, 137059c2be1eSYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 137159c2be1eSYehuda Sadeh ops, 137259c2be1eSYehuda Sadeh 1, obj, 0, 0, NULL, NULL, NULL); 137359c2be1eSYehuda Sadeh if (ret < 0) 137459c2be1eSYehuda Sadeh goto fail_event; 137559c2be1eSYehuda Sadeh 137659c2be1eSYehuda Sadeh ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT); 137759c2be1eSYehuda Sadeh dout("ceph_osdc_wait_event returned %d\n", ret); 137859c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 137959c2be1eSYehuda Sadeh return 0; 138059c2be1eSYehuda Sadeh 138159c2be1eSYehuda Sadeh fail_event: 138259c2be1eSYehuda Sadeh ceph_osdc_cancel_event(event); 138359c2be1eSYehuda Sadeh fail: 138459c2be1eSYehuda Sadeh rbd_destroy_ops(ops); 138559c2be1eSYehuda Sadeh return ret; 138659c2be1eSYehuda Sadeh } 138759c2be1eSYehuda Sadeh 138859c2be1eSYehuda Sadeh /* 1389602adf40SYehuda Sadeh * Request sync osd read 1390602adf40SYehuda Sadeh */ 1391602adf40SYehuda Sadeh static int rbd_req_sync_exec(struct rbd_device *dev, 1392602adf40SYehuda Sadeh const char *obj, 1393602adf40SYehuda Sadeh const char *cls, 1394602adf40SYehuda Sadeh const char *method, 1395602adf40SYehuda Sadeh const char *data, 139659c2be1eSYehuda Sadeh int len, 139759c2be1eSYehuda Sadeh u64 *ver) 1398602adf40SYehuda Sadeh { 1399602adf40SYehuda Sadeh struct ceph_osd_req_op *ops; 1400602adf40SYehuda Sadeh int cls_len = strlen(cls); 1401602adf40SYehuda Sadeh int method_len = strlen(method); 1402602adf40SYehuda Sadeh int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, 1403602adf40SYehuda Sadeh cls_len + method_len + len); 1404602adf40SYehuda Sadeh if (ret < 0) 1405602adf40SYehuda Sadeh return ret; 1406602adf40SYehuda Sadeh 1407602adf40SYehuda Sadeh ops[0].cls.class_name = cls; 1408602adf40SYehuda Sadeh ops[0].cls.class_len = (__u8)cls_len; 1409602adf40SYehuda Sadeh ops[0].cls.method_name = method; 1410602adf40SYehuda Sadeh ops[0].cls.method_len = (__u8)method_len; 1411602adf40SYehuda Sadeh ops[0].cls.argc = 0; 1412602adf40SYehuda Sadeh ops[0].cls.indata = data; 1413602adf40SYehuda Sadeh ops[0].cls.indata_len = len; 1414602adf40SYehuda Sadeh 1415602adf40SYehuda Sadeh ret = rbd_req_sync_op(dev, NULL, 1416602adf40SYehuda Sadeh CEPH_NOSNAP, 1417602adf40SYehuda Sadeh 0, 1418602adf40SYehuda Sadeh CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1419602adf40SYehuda Sadeh ops, 142059c2be1eSYehuda Sadeh 1, obj, 0, 0, NULL, NULL, ver); 1421602adf40SYehuda Sadeh 1422602adf40SYehuda Sadeh rbd_destroy_ops(ops); 1423602adf40SYehuda Sadeh 1424602adf40SYehuda Sadeh dout("cls_exec returned %d\n", ret); 1425602adf40SYehuda Sadeh return ret; 1426602adf40SYehuda Sadeh } 1427602adf40SYehuda Sadeh 14281fec7093SYehuda Sadeh static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) 14291fec7093SYehuda Sadeh { 14301fec7093SYehuda Sadeh struct rbd_req_coll *coll = 14311fec7093SYehuda Sadeh kzalloc(sizeof(struct rbd_req_coll) + 14321fec7093SYehuda Sadeh sizeof(struct rbd_req_status) * num_reqs, 14331fec7093SYehuda Sadeh GFP_ATOMIC); 14341fec7093SYehuda Sadeh 14351fec7093SYehuda Sadeh if (!coll) 14361fec7093SYehuda Sadeh return NULL; 14371fec7093SYehuda Sadeh coll->total = num_reqs; 14381fec7093SYehuda Sadeh kref_init(&coll->kref); 14391fec7093SYehuda Sadeh return coll; 14401fec7093SYehuda Sadeh } 14411fec7093SYehuda Sadeh 1442602adf40SYehuda Sadeh /* 1443602adf40SYehuda Sadeh * block device queue callback 1444602adf40SYehuda Sadeh */ 1445602adf40SYehuda Sadeh static void rbd_rq_fn(struct request_queue *q) 1446602adf40SYehuda Sadeh { 1447602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1448602adf40SYehuda Sadeh struct request *rq; 1449602adf40SYehuda Sadeh struct bio_pair *bp = NULL; 1450602adf40SYehuda Sadeh 145100f1f36fSAlex Elder while ((rq = blk_fetch_request(q))) { 1452602adf40SYehuda Sadeh struct bio *bio; 1453602adf40SYehuda Sadeh struct bio *rq_bio, *next_bio = NULL; 1454602adf40SYehuda Sadeh bool do_write; 1455602adf40SYehuda Sadeh int size, op_size = 0; 1456602adf40SYehuda Sadeh u64 ofs; 14571fec7093SYehuda Sadeh int num_segs, cur_seg = 0; 14581fec7093SYehuda Sadeh struct rbd_req_coll *coll; 1459602adf40SYehuda Sadeh 1460602adf40SYehuda Sadeh /* peek at request from block layer */ 1461602adf40SYehuda Sadeh if (!rq) 1462602adf40SYehuda Sadeh break; 1463602adf40SYehuda Sadeh 1464602adf40SYehuda Sadeh dout("fetched request\n"); 1465602adf40SYehuda Sadeh 1466602adf40SYehuda Sadeh /* filter out block requests we don't understand */ 1467602adf40SYehuda Sadeh if ((rq->cmd_type != REQ_TYPE_FS)) { 1468602adf40SYehuda Sadeh __blk_end_request_all(rq, 0); 146900f1f36fSAlex Elder continue; 1470602adf40SYehuda Sadeh } 1471602adf40SYehuda Sadeh 1472602adf40SYehuda Sadeh /* deduce our operation (read, write) */ 1473602adf40SYehuda Sadeh do_write = (rq_data_dir(rq) == WRITE); 1474602adf40SYehuda Sadeh 1475602adf40SYehuda Sadeh size = blk_rq_bytes(rq); 1476593a9e7bSAlex Elder ofs = blk_rq_pos(rq) * SECTOR_SIZE; 1477602adf40SYehuda Sadeh rq_bio = rq->bio; 1478602adf40SYehuda Sadeh if (do_write && rbd_dev->read_only) { 1479602adf40SYehuda Sadeh __blk_end_request_all(rq, -EROFS); 148000f1f36fSAlex Elder continue; 1481602adf40SYehuda Sadeh } 1482602adf40SYehuda Sadeh 1483602adf40SYehuda Sadeh spin_unlock_irq(q->queue_lock); 1484602adf40SYehuda Sadeh 1485602adf40SYehuda Sadeh dout("%s 0x%x bytes at 0x%llx\n", 1486602adf40SYehuda Sadeh do_write ? "write" : "read", 1487593a9e7bSAlex Elder size, blk_rq_pos(rq) * SECTOR_SIZE); 1488602adf40SYehuda Sadeh 14891fec7093SYehuda Sadeh num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 14901fec7093SYehuda Sadeh coll = rbd_alloc_coll(num_segs); 14911fec7093SYehuda Sadeh if (!coll) { 14921fec7093SYehuda Sadeh spin_lock_irq(q->queue_lock); 14931fec7093SYehuda Sadeh __blk_end_request_all(rq, -ENOMEM); 149400f1f36fSAlex Elder continue; 14951fec7093SYehuda Sadeh } 14961fec7093SYehuda Sadeh 1497602adf40SYehuda Sadeh do { 1498602adf40SYehuda Sadeh /* a bio clone to be passed down to OSD req */ 1499602adf40SYehuda Sadeh dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); 1500602adf40SYehuda Sadeh op_size = rbd_get_segment(&rbd_dev->header, 1501602adf40SYehuda Sadeh rbd_dev->header.block_name, 1502602adf40SYehuda Sadeh ofs, size, 1503602adf40SYehuda Sadeh NULL, NULL); 15041fec7093SYehuda Sadeh kref_get(&coll->kref); 1505602adf40SYehuda Sadeh bio = bio_chain_clone(&rq_bio, &next_bio, &bp, 1506602adf40SYehuda Sadeh op_size, GFP_ATOMIC); 1507602adf40SYehuda Sadeh if (!bio) { 15081fec7093SYehuda Sadeh rbd_coll_end_req_index(rq, coll, cur_seg, 15091fec7093SYehuda Sadeh -ENOMEM, op_size); 15101fec7093SYehuda Sadeh goto next_seg; 1511602adf40SYehuda Sadeh } 1512602adf40SYehuda Sadeh 15131fec7093SYehuda Sadeh 1514602adf40SYehuda Sadeh /* init OSD command: write or read */ 1515602adf40SYehuda Sadeh if (do_write) 1516602adf40SYehuda Sadeh rbd_req_write(rq, rbd_dev, 1517602adf40SYehuda Sadeh rbd_dev->header.snapc, 1518602adf40SYehuda Sadeh ofs, 15191fec7093SYehuda Sadeh op_size, bio, 15201fec7093SYehuda Sadeh coll, cur_seg); 1521602adf40SYehuda Sadeh else 1522602adf40SYehuda Sadeh rbd_req_read(rq, rbd_dev, 1523602adf40SYehuda Sadeh cur_snap_id(rbd_dev), 1524602adf40SYehuda Sadeh ofs, 15251fec7093SYehuda Sadeh op_size, bio, 15261fec7093SYehuda Sadeh coll, cur_seg); 1527602adf40SYehuda Sadeh 15281fec7093SYehuda Sadeh next_seg: 1529602adf40SYehuda Sadeh size -= op_size; 1530602adf40SYehuda Sadeh ofs += op_size; 1531602adf40SYehuda Sadeh 15321fec7093SYehuda Sadeh cur_seg++; 1533602adf40SYehuda Sadeh rq_bio = next_bio; 1534602adf40SYehuda Sadeh } while (size > 0); 15351fec7093SYehuda Sadeh kref_put(&coll->kref, rbd_coll_release); 1536602adf40SYehuda Sadeh 1537602adf40SYehuda Sadeh if (bp) 1538602adf40SYehuda Sadeh bio_pair_release(bp); 1539602adf40SYehuda Sadeh spin_lock_irq(q->queue_lock); 1540602adf40SYehuda Sadeh } 1541602adf40SYehuda Sadeh } 1542602adf40SYehuda Sadeh 1543602adf40SYehuda Sadeh /* 1544602adf40SYehuda Sadeh * a queue callback. Makes sure that we don't create a bio that spans across 1545602adf40SYehuda Sadeh * multiple osd objects. One exception would be with a single page bios, 1546602adf40SYehuda Sadeh * which we handle later at bio_chain_clone 1547602adf40SYehuda Sadeh */ 1548602adf40SYehuda Sadeh static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, 1549602adf40SYehuda Sadeh struct bio_vec *bvec) 1550602adf40SYehuda Sadeh { 1551602adf40SYehuda Sadeh struct rbd_device *rbd_dev = q->queuedata; 1552593a9e7bSAlex Elder unsigned int chunk_sectors; 1553593a9e7bSAlex Elder sector_t sector; 1554593a9e7bSAlex Elder unsigned int bio_sectors; 1555602adf40SYehuda Sadeh int max; 1556602adf40SYehuda Sadeh 1557593a9e7bSAlex Elder chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); 1558593a9e7bSAlex Elder sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); 1559593a9e7bSAlex Elder bio_sectors = bmd->bi_size >> SECTOR_SHIFT; 1560593a9e7bSAlex Elder 1561602adf40SYehuda Sadeh max = (chunk_sectors - ((sector & (chunk_sectors - 1)) 1562593a9e7bSAlex Elder + bio_sectors)) << SECTOR_SHIFT; 1563602adf40SYehuda Sadeh if (max < 0) 1564602adf40SYehuda Sadeh max = 0; /* bio_add cannot handle a negative return */ 1565602adf40SYehuda Sadeh if (max <= bvec->bv_len && bio_sectors == 0) 1566602adf40SYehuda Sadeh return bvec->bv_len; 1567602adf40SYehuda Sadeh return max; 1568602adf40SYehuda Sadeh } 1569602adf40SYehuda Sadeh 1570602adf40SYehuda Sadeh static void rbd_free_disk(struct rbd_device *rbd_dev) 1571602adf40SYehuda Sadeh { 1572602adf40SYehuda Sadeh struct gendisk *disk = rbd_dev->disk; 1573602adf40SYehuda Sadeh 1574602adf40SYehuda Sadeh if (!disk) 1575602adf40SYehuda Sadeh return; 1576602adf40SYehuda Sadeh 1577602adf40SYehuda Sadeh rbd_header_free(&rbd_dev->header); 1578602adf40SYehuda Sadeh 1579602adf40SYehuda Sadeh if (disk->flags & GENHD_FL_UP) 1580602adf40SYehuda Sadeh del_gendisk(disk); 1581602adf40SYehuda Sadeh if (disk->queue) 1582602adf40SYehuda Sadeh blk_cleanup_queue(disk->queue); 1583602adf40SYehuda Sadeh put_disk(disk); 1584602adf40SYehuda Sadeh } 1585602adf40SYehuda Sadeh 1586602adf40SYehuda Sadeh /* 1587602adf40SYehuda Sadeh * reload the ondisk the header 1588602adf40SYehuda Sadeh */ 1589602adf40SYehuda Sadeh static int rbd_read_header(struct rbd_device *rbd_dev, 1590602adf40SYehuda Sadeh struct rbd_image_header *header) 1591602adf40SYehuda Sadeh { 1592602adf40SYehuda Sadeh ssize_t rc; 1593602adf40SYehuda Sadeh struct rbd_image_header_ondisk *dh; 1594602adf40SYehuda Sadeh int snap_count = 0; 159559c2be1eSYehuda Sadeh u64 ver; 159600f1f36fSAlex Elder size_t len; 1597602adf40SYehuda Sadeh 159800f1f36fSAlex Elder /* 159900f1f36fSAlex Elder * First reads the fixed-size header to determine the number 160000f1f36fSAlex Elder * of snapshots, then re-reads it, along with all snapshot 160100f1f36fSAlex Elder * records as well as their stored names. 160200f1f36fSAlex Elder */ 160300f1f36fSAlex Elder len = sizeof (*dh); 1604602adf40SYehuda Sadeh while (1) { 1605602adf40SYehuda Sadeh dh = kmalloc(len, GFP_KERNEL); 1606602adf40SYehuda Sadeh if (!dh) 1607602adf40SYehuda Sadeh return -ENOMEM; 1608602adf40SYehuda Sadeh 1609602adf40SYehuda Sadeh rc = rbd_req_sync_read(rbd_dev, 1610602adf40SYehuda Sadeh NULL, CEPH_NOSNAP, 1611602adf40SYehuda Sadeh rbd_dev->obj_md_name, 1612602adf40SYehuda Sadeh 0, len, 161359c2be1eSYehuda Sadeh (char *)dh, &ver); 1614602adf40SYehuda Sadeh if (rc < 0) 1615602adf40SYehuda Sadeh goto out_dh; 1616602adf40SYehuda Sadeh 1617602adf40SYehuda Sadeh rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); 161881e759fbSJosh Durgin if (rc < 0) { 161900f1f36fSAlex Elder if (rc == -ENXIO) 162081e759fbSJosh Durgin pr_warning("unrecognized header format" 162181e759fbSJosh Durgin " for image %s", rbd_dev->obj); 1622602adf40SYehuda Sadeh goto out_dh; 162381e759fbSJosh Durgin } 1624602adf40SYehuda Sadeh 162500f1f36fSAlex Elder if (snap_count == header->total_snaps) 162600f1f36fSAlex Elder break; 162700f1f36fSAlex Elder 1628602adf40SYehuda Sadeh snap_count = header->total_snaps; 162900f1f36fSAlex Elder len = sizeof (*dh) + 163000f1f36fSAlex Elder snap_count * sizeof(struct rbd_image_snap_ondisk) + 163100f1f36fSAlex Elder header->snap_names_len; 163200f1f36fSAlex Elder 1633602adf40SYehuda Sadeh rbd_header_free(header); 1634602adf40SYehuda Sadeh kfree(dh); 1635602adf40SYehuda Sadeh } 163659c2be1eSYehuda Sadeh header->obj_version = ver; 1637602adf40SYehuda Sadeh 1638602adf40SYehuda Sadeh out_dh: 1639602adf40SYehuda Sadeh kfree(dh); 1640602adf40SYehuda Sadeh return rc; 1641602adf40SYehuda Sadeh } 1642602adf40SYehuda Sadeh 1643602adf40SYehuda Sadeh /* 1644602adf40SYehuda Sadeh * create a snapshot 1645602adf40SYehuda Sadeh */ 1646602adf40SYehuda Sadeh static int rbd_header_add_snap(struct rbd_device *dev, 1647602adf40SYehuda Sadeh const char *snap_name, 1648602adf40SYehuda Sadeh gfp_t gfp_flags) 1649602adf40SYehuda Sadeh { 1650602adf40SYehuda Sadeh int name_len = strlen(snap_name); 1651602adf40SYehuda Sadeh u64 new_snapid; 1652602adf40SYehuda Sadeh int ret; 1653916d4d67SSage Weil void *data, *p, *e; 165459c2be1eSYehuda Sadeh u64 ver; 16551dbb4399SAlex Elder struct ceph_mon_client *monc; 1656602adf40SYehuda Sadeh 1657602adf40SYehuda Sadeh /* we should create a snapshot only if we're pointing at the head */ 1658602adf40SYehuda Sadeh if (dev->cur_snap) 1659602adf40SYehuda Sadeh return -EINVAL; 1660602adf40SYehuda Sadeh 16611dbb4399SAlex Elder monc = &dev->rbd_client->client->monc; 16621dbb4399SAlex Elder ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid); 1663602adf40SYehuda Sadeh dout("created snapid=%lld\n", new_snapid); 1664602adf40SYehuda Sadeh if (ret < 0) 1665602adf40SYehuda Sadeh return ret; 1666602adf40SYehuda Sadeh 1667602adf40SYehuda Sadeh data = kmalloc(name_len + 16, gfp_flags); 1668602adf40SYehuda Sadeh if (!data) 1669602adf40SYehuda Sadeh return -ENOMEM; 1670602adf40SYehuda Sadeh 1671916d4d67SSage Weil p = data; 1672916d4d67SSage Weil e = data + name_len + 16; 1673602adf40SYehuda Sadeh 1674916d4d67SSage Weil ceph_encode_string_safe(&p, e, snap_name, name_len, bad); 1675916d4d67SSage Weil ceph_encode_64_safe(&p, e, new_snapid, bad); 1676602adf40SYehuda Sadeh 1677602adf40SYehuda Sadeh ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", 1678916d4d67SSage Weil data, p - data, &ver); 1679602adf40SYehuda Sadeh 1680916d4d67SSage Weil kfree(data); 1681602adf40SYehuda Sadeh 1682602adf40SYehuda Sadeh if (ret < 0) 1683602adf40SYehuda Sadeh return ret; 1684602adf40SYehuda Sadeh 1685602adf40SYehuda Sadeh dev->header.snapc->seq = new_snapid; 1686602adf40SYehuda Sadeh 1687602adf40SYehuda Sadeh return 0; 1688602adf40SYehuda Sadeh bad: 1689602adf40SYehuda Sadeh return -ERANGE; 1690602adf40SYehuda Sadeh } 1691602adf40SYehuda Sadeh 1692dfc5606dSYehuda Sadeh static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) 1693dfc5606dSYehuda Sadeh { 1694dfc5606dSYehuda Sadeh struct rbd_snap *snap; 1695dfc5606dSYehuda Sadeh 1696dfc5606dSYehuda Sadeh while (!list_empty(&rbd_dev->snaps)) { 1697dfc5606dSYehuda Sadeh snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node); 1698dfc5606dSYehuda Sadeh __rbd_remove_snap_dev(rbd_dev, snap); 1699dfc5606dSYehuda Sadeh } 1700dfc5606dSYehuda Sadeh } 1701dfc5606dSYehuda Sadeh 1702602adf40SYehuda Sadeh /* 1703602adf40SYehuda Sadeh * only read the first part of the ondisk header, without the snaps info 1704602adf40SYehuda Sadeh */ 1705dfc5606dSYehuda Sadeh static int __rbd_update_snaps(struct rbd_device *rbd_dev) 1706602adf40SYehuda Sadeh { 1707602adf40SYehuda Sadeh int ret; 1708602adf40SYehuda Sadeh struct rbd_image_header h; 1709602adf40SYehuda Sadeh u64 snap_seq; 171059c2be1eSYehuda Sadeh int follow_seq = 0; 1711602adf40SYehuda Sadeh 1712602adf40SYehuda Sadeh ret = rbd_read_header(rbd_dev, &h); 1713602adf40SYehuda Sadeh if (ret < 0) 1714602adf40SYehuda Sadeh return ret; 1715602adf40SYehuda Sadeh 17169db4b3e3SSage Weil /* resized? */ 1717593a9e7bSAlex Elder set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE); 17189db4b3e3SSage Weil 1719c666601aSJosh Durgin down_write(&rbd_dev->header_rwsem); 1720602adf40SYehuda Sadeh 1721602adf40SYehuda Sadeh snap_seq = rbd_dev->header.snapc->seq; 172259c2be1eSYehuda Sadeh if (rbd_dev->header.total_snaps && 172359c2be1eSYehuda Sadeh rbd_dev->header.snapc->snaps[0] == snap_seq) 172459c2be1eSYehuda Sadeh /* pointing at the head, will need to follow that 172559c2be1eSYehuda Sadeh if head moves */ 172659c2be1eSYehuda Sadeh follow_seq = 1; 1727602adf40SYehuda Sadeh 1728602adf40SYehuda Sadeh kfree(rbd_dev->header.snapc); 1729602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_names); 1730602adf40SYehuda Sadeh kfree(rbd_dev->header.snap_sizes); 1731602adf40SYehuda Sadeh 1732602adf40SYehuda Sadeh rbd_dev->header.total_snaps = h.total_snaps; 1733602adf40SYehuda Sadeh rbd_dev->header.snapc = h.snapc; 1734602adf40SYehuda Sadeh rbd_dev->header.snap_names = h.snap_names; 1735dfc5606dSYehuda Sadeh rbd_dev->header.snap_names_len = h.snap_names_len; 1736602adf40SYehuda Sadeh rbd_dev->header.snap_sizes = h.snap_sizes; 173759c2be1eSYehuda Sadeh if (follow_seq) 173859c2be1eSYehuda Sadeh rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0]; 173959c2be1eSYehuda Sadeh else 1740602adf40SYehuda Sadeh rbd_dev->header.snapc->seq = snap_seq; 1741602adf40SYehuda Sadeh 1742dfc5606dSYehuda Sadeh ret = __rbd_init_snaps_header(rbd_dev); 1743dfc5606dSYehuda Sadeh 1744c666601aSJosh Durgin up_write(&rbd_dev->header_rwsem); 1745602adf40SYehuda Sadeh 1746dfc5606dSYehuda Sadeh return ret; 1747602adf40SYehuda Sadeh } 1748602adf40SYehuda Sadeh 1749602adf40SYehuda Sadeh static int rbd_init_disk(struct rbd_device *rbd_dev) 1750602adf40SYehuda Sadeh { 1751602adf40SYehuda Sadeh struct gendisk *disk; 1752602adf40SYehuda Sadeh struct request_queue *q; 1753602adf40SYehuda Sadeh int rc; 1754593a9e7bSAlex Elder u64 segment_size; 1755602adf40SYehuda Sadeh u64 total_size = 0; 1756602adf40SYehuda Sadeh 1757602adf40SYehuda Sadeh /* contact OSD, request size info about the object being mapped */ 1758602adf40SYehuda Sadeh rc = rbd_read_header(rbd_dev, &rbd_dev->header); 1759602adf40SYehuda Sadeh if (rc) 1760602adf40SYehuda Sadeh return rc; 1761602adf40SYehuda Sadeh 1762dfc5606dSYehuda Sadeh /* no need to lock here, as rbd_dev is not registered yet */ 1763dfc5606dSYehuda Sadeh rc = __rbd_init_snaps_header(rbd_dev); 1764dfc5606dSYehuda Sadeh if (rc) 1765dfc5606dSYehuda Sadeh return rc; 1766dfc5606dSYehuda Sadeh 1767cc9d734cSJosh Durgin rc = rbd_header_set_snap(rbd_dev, &total_size); 1768602adf40SYehuda Sadeh if (rc) 1769602adf40SYehuda Sadeh return rc; 1770602adf40SYehuda Sadeh 1771602adf40SYehuda Sadeh /* create gendisk info */ 1772602adf40SYehuda Sadeh rc = -ENOMEM; 1773602adf40SYehuda Sadeh disk = alloc_disk(RBD_MINORS_PER_MAJOR); 1774602adf40SYehuda Sadeh if (!disk) 1775602adf40SYehuda Sadeh goto out; 1776602adf40SYehuda Sadeh 1777f0f8cef5SAlex Elder snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 1778aedfec59SSage Weil rbd_dev->id); 1779602adf40SYehuda Sadeh disk->major = rbd_dev->major; 1780602adf40SYehuda Sadeh disk->first_minor = 0; 1781602adf40SYehuda Sadeh disk->fops = &rbd_bd_ops; 1782602adf40SYehuda Sadeh disk->private_data = rbd_dev; 1783602adf40SYehuda Sadeh 1784602adf40SYehuda Sadeh /* init rq */ 1785602adf40SYehuda Sadeh rc = -ENOMEM; 1786602adf40SYehuda Sadeh q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); 1787602adf40SYehuda Sadeh if (!q) 1788602adf40SYehuda Sadeh goto out_disk; 1789029bcbd8SJosh Durgin 1790593a9e7bSAlex Elder /* We use the default size, but let's be explicit about it. */ 1791593a9e7bSAlex Elder blk_queue_physical_block_size(q, SECTOR_SIZE); 1792593a9e7bSAlex Elder 1793029bcbd8SJosh Durgin /* set io sizes to object size */ 1794593a9e7bSAlex Elder segment_size = rbd_obj_bytes(&rbd_dev->header); 1795593a9e7bSAlex Elder blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 1796593a9e7bSAlex Elder blk_queue_max_segment_size(q, segment_size); 1797593a9e7bSAlex Elder blk_queue_io_min(q, segment_size); 1798593a9e7bSAlex Elder blk_queue_io_opt(q, segment_size); 1799029bcbd8SJosh Durgin 1800602adf40SYehuda Sadeh blk_queue_merge_bvec(q, rbd_merge_bvec); 1801602adf40SYehuda Sadeh disk->queue = q; 1802602adf40SYehuda Sadeh 1803602adf40SYehuda Sadeh q->queuedata = rbd_dev; 1804602adf40SYehuda Sadeh 1805602adf40SYehuda Sadeh rbd_dev->disk = disk; 1806602adf40SYehuda Sadeh rbd_dev->q = q; 1807602adf40SYehuda Sadeh 1808602adf40SYehuda Sadeh /* finally, announce the disk to the world */ 1809593a9e7bSAlex Elder set_capacity(disk, total_size / SECTOR_SIZE); 1810602adf40SYehuda Sadeh add_disk(disk); 1811602adf40SYehuda Sadeh 1812602adf40SYehuda Sadeh pr_info("%s: added with size 0x%llx\n", 1813602adf40SYehuda Sadeh disk->disk_name, (unsigned long long)total_size); 1814602adf40SYehuda Sadeh return 0; 1815602adf40SYehuda Sadeh 1816602adf40SYehuda Sadeh out_disk: 1817602adf40SYehuda Sadeh put_disk(disk); 1818602adf40SYehuda Sadeh out: 1819602adf40SYehuda Sadeh return rc; 1820602adf40SYehuda Sadeh } 1821602adf40SYehuda Sadeh 1822dfc5606dSYehuda Sadeh /* 1823dfc5606dSYehuda Sadeh sysfs 1824dfc5606dSYehuda Sadeh */ 1825602adf40SYehuda Sadeh 1826593a9e7bSAlex Elder static struct rbd_device *dev_to_rbd_dev(struct device *dev) 1827593a9e7bSAlex Elder { 1828593a9e7bSAlex Elder return container_of(dev, struct rbd_device, dev); 1829593a9e7bSAlex Elder } 1830593a9e7bSAlex Elder 1831dfc5606dSYehuda Sadeh static ssize_t rbd_size_show(struct device *dev, 1832dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1833602adf40SYehuda Sadeh { 1834593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1835dfc5606dSYehuda Sadeh 1836dfc5606dSYehuda Sadeh return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); 1837602adf40SYehuda Sadeh } 1838602adf40SYehuda Sadeh 1839dfc5606dSYehuda Sadeh static ssize_t rbd_major_show(struct device *dev, 1840dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1841602adf40SYehuda Sadeh { 1842593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1843dfc5606dSYehuda Sadeh 1844dfc5606dSYehuda Sadeh return sprintf(buf, "%d\n", rbd_dev->major); 1845dfc5606dSYehuda Sadeh } 1846dfc5606dSYehuda Sadeh 1847dfc5606dSYehuda Sadeh static ssize_t rbd_client_id_show(struct device *dev, 1848dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1849dfc5606dSYehuda Sadeh { 1850593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1851dfc5606dSYehuda Sadeh 18521dbb4399SAlex Elder return sprintf(buf, "client%lld\n", 18531dbb4399SAlex Elder ceph_client_id(rbd_dev->rbd_client->client)); 1854dfc5606dSYehuda Sadeh } 1855dfc5606dSYehuda Sadeh 1856dfc5606dSYehuda Sadeh static ssize_t rbd_pool_show(struct device *dev, 1857dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1858dfc5606dSYehuda Sadeh { 1859593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1860dfc5606dSYehuda Sadeh 1861dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->pool_name); 1862dfc5606dSYehuda Sadeh } 1863dfc5606dSYehuda Sadeh 1864dfc5606dSYehuda Sadeh static ssize_t rbd_name_show(struct device *dev, 1865dfc5606dSYehuda Sadeh struct device_attribute *attr, char *buf) 1866dfc5606dSYehuda Sadeh { 1867593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1868dfc5606dSYehuda Sadeh 1869dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->obj); 1870dfc5606dSYehuda Sadeh } 1871dfc5606dSYehuda Sadeh 1872dfc5606dSYehuda Sadeh static ssize_t rbd_snap_show(struct device *dev, 1873dfc5606dSYehuda Sadeh struct device_attribute *attr, 1874dfc5606dSYehuda Sadeh char *buf) 1875dfc5606dSYehuda Sadeh { 1876593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1877dfc5606dSYehuda Sadeh 1878dfc5606dSYehuda Sadeh return sprintf(buf, "%s\n", rbd_dev->snap_name); 1879dfc5606dSYehuda Sadeh } 1880dfc5606dSYehuda Sadeh 1881dfc5606dSYehuda Sadeh static ssize_t rbd_image_refresh(struct device *dev, 1882dfc5606dSYehuda Sadeh struct device_attribute *attr, 1883dfc5606dSYehuda Sadeh const char *buf, 1884dfc5606dSYehuda Sadeh size_t size) 1885dfc5606dSYehuda Sadeh { 1886593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1887dfc5606dSYehuda Sadeh int rc; 1888dfc5606dSYehuda Sadeh int ret = size; 1889602adf40SYehuda Sadeh 1890602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1891602adf40SYehuda Sadeh 1892dfc5606dSYehuda Sadeh rc = __rbd_update_snaps(rbd_dev); 1893dfc5606dSYehuda Sadeh if (rc < 0) 1894dfc5606dSYehuda Sadeh ret = rc; 1895602adf40SYehuda Sadeh 1896dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 1897dfc5606dSYehuda Sadeh return ret; 1898dfc5606dSYehuda Sadeh } 1899602adf40SYehuda Sadeh 1900dfc5606dSYehuda Sadeh static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 1901dfc5606dSYehuda Sadeh static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 1902dfc5606dSYehuda Sadeh static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 1903dfc5606dSYehuda Sadeh static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 1904dfc5606dSYehuda Sadeh static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 1905dfc5606dSYehuda Sadeh static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 1906dfc5606dSYehuda Sadeh static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 1907dfc5606dSYehuda Sadeh static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add); 1908dfc5606dSYehuda Sadeh 1909dfc5606dSYehuda Sadeh static struct attribute *rbd_attrs[] = { 1910dfc5606dSYehuda Sadeh &dev_attr_size.attr, 1911dfc5606dSYehuda Sadeh &dev_attr_major.attr, 1912dfc5606dSYehuda Sadeh &dev_attr_client_id.attr, 1913dfc5606dSYehuda Sadeh &dev_attr_pool.attr, 1914dfc5606dSYehuda Sadeh &dev_attr_name.attr, 1915dfc5606dSYehuda Sadeh &dev_attr_current_snap.attr, 1916dfc5606dSYehuda Sadeh &dev_attr_refresh.attr, 1917dfc5606dSYehuda Sadeh &dev_attr_create_snap.attr, 1918dfc5606dSYehuda Sadeh NULL 1919dfc5606dSYehuda Sadeh }; 1920dfc5606dSYehuda Sadeh 1921dfc5606dSYehuda Sadeh static struct attribute_group rbd_attr_group = { 1922dfc5606dSYehuda Sadeh .attrs = rbd_attrs, 1923dfc5606dSYehuda Sadeh }; 1924dfc5606dSYehuda Sadeh 1925dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_attr_groups[] = { 1926dfc5606dSYehuda Sadeh &rbd_attr_group, 1927dfc5606dSYehuda Sadeh NULL 1928dfc5606dSYehuda Sadeh }; 1929dfc5606dSYehuda Sadeh 1930dfc5606dSYehuda Sadeh static void rbd_sysfs_dev_release(struct device *dev) 1931dfc5606dSYehuda Sadeh { 1932dfc5606dSYehuda Sadeh } 1933dfc5606dSYehuda Sadeh 1934dfc5606dSYehuda Sadeh static struct device_type rbd_device_type = { 1935dfc5606dSYehuda Sadeh .name = "rbd", 1936dfc5606dSYehuda Sadeh .groups = rbd_attr_groups, 1937dfc5606dSYehuda Sadeh .release = rbd_sysfs_dev_release, 1938dfc5606dSYehuda Sadeh }; 1939dfc5606dSYehuda Sadeh 1940dfc5606dSYehuda Sadeh 1941dfc5606dSYehuda Sadeh /* 1942dfc5606dSYehuda Sadeh sysfs - snapshots 1943dfc5606dSYehuda Sadeh */ 1944dfc5606dSYehuda Sadeh 1945dfc5606dSYehuda Sadeh static ssize_t rbd_snap_size_show(struct device *dev, 1946dfc5606dSYehuda Sadeh struct device_attribute *attr, 1947dfc5606dSYehuda Sadeh char *buf) 1948dfc5606dSYehuda Sadeh { 1949dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1950dfc5606dSYehuda Sadeh 1951593a9e7bSAlex Elder return sprintf(buf, "%zd\n", snap->size); 1952dfc5606dSYehuda Sadeh } 1953dfc5606dSYehuda Sadeh 1954dfc5606dSYehuda Sadeh static ssize_t rbd_snap_id_show(struct device *dev, 1955dfc5606dSYehuda Sadeh struct device_attribute *attr, 1956dfc5606dSYehuda Sadeh char *buf) 1957dfc5606dSYehuda Sadeh { 1958dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1959dfc5606dSYehuda Sadeh 1960593a9e7bSAlex Elder return sprintf(buf, "%llu\n", (unsigned long long) snap->id); 1961dfc5606dSYehuda Sadeh } 1962dfc5606dSYehuda Sadeh 1963dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 1964dfc5606dSYehuda Sadeh static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 1965dfc5606dSYehuda Sadeh 1966dfc5606dSYehuda Sadeh static struct attribute *rbd_snap_attrs[] = { 1967dfc5606dSYehuda Sadeh &dev_attr_snap_size.attr, 1968dfc5606dSYehuda Sadeh &dev_attr_snap_id.attr, 1969dfc5606dSYehuda Sadeh NULL, 1970dfc5606dSYehuda Sadeh }; 1971dfc5606dSYehuda Sadeh 1972dfc5606dSYehuda Sadeh static struct attribute_group rbd_snap_attr_group = { 1973dfc5606dSYehuda Sadeh .attrs = rbd_snap_attrs, 1974dfc5606dSYehuda Sadeh }; 1975dfc5606dSYehuda Sadeh 1976dfc5606dSYehuda Sadeh static void rbd_snap_dev_release(struct device *dev) 1977dfc5606dSYehuda Sadeh { 1978dfc5606dSYehuda Sadeh struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1979dfc5606dSYehuda Sadeh kfree(snap->name); 1980dfc5606dSYehuda Sadeh kfree(snap); 1981dfc5606dSYehuda Sadeh } 1982dfc5606dSYehuda Sadeh 1983dfc5606dSYehuda Sadeh static const struct attribute_group *rbd_snap_attr_groups[] = { 1984dfc5606dSYehuda Sadeh &rbd_snap_attr_group, 1985dfc5606dSYehuda Sadeh NULL 1986dfc5606dSYehuda Sadeh }; 1987dfc5606dSYehuda Sadeh 1988dfc5606dSYehuda Sadeh static struct device_type rbd_snap_device_type = { 1989dfc5606dSYehuda Sadeh .groups = rbd_snap_attr_groups, 1990dfc5606dSYehuda Sadeh .release = rbd_snap_dev_release, 1991dfc5606dSYehuda Sadeh }; 1992dfc5606dSYehuda Sadeh 1993dfc5606dSYehuda Sadeh static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, 1994dfc5606dSYehuda Sadeh struct rbd_snap *snap) 1995dfc5606dSYehuda Sadeh { 1996dfc5606dSYehuda Sadeh list_del(&snap->node); 1997dfc5606dSYehuda Sadeh device_unregister(&snap->dev); 1998dfc5606dSYehuda Sadeh } 1999dfc5606dSYehuda Sadeh 2000dfc5606dSYehuda Sadeh static int rbd_register_snap_dev(struct rbd_device *rbd_dev, 2001dfc5606dSYehuda Sadeh struct rbd_snap *snap, 2002dfc5606dSYehuda Sadeh struct device *parent) 2003dfc5606dSYehuda Sadeh { 2004dfc5606dSYehuda Sadeh struct device *dev = &snap->dev; 2005dfc5606dSYehuda Sadeh int ret; 2006dfc5606dSYehuda Sadeh 2007dfc5606dSYehuda Sadeh dev->type = &rbd_snap_device_type; 2008dfc5606dSYehuda Sadeh dev->parent = parent; 2009dfc5606dSYehuda Sadeh dev->release = rbd_snap_dev_release; 2010dfc5606dSYehuda Sadeh dev_set_name(dev, "snap_%s", snap->name); 2011dfc5606dSYehuda Sadeh ret = device_register(dev); 2012dfc5606dSYehuda Sadeh 2013dfc5606dSYehuda Sadeh return ret; 2014dfc5606dSYehuda Sadeh } 2015dfc5606dSYehuda Sadeh 2016dfc5606dSYehuda Sadeh static int __rbd_add_snap_dev(struct rbd_device *rbd_dev, 2017dfc5606dSYehuda Sadeh int i, const char *name, 2018dfc5606dSYehuda Sadeh struct rbd_snap **snapp) 2019dfc5606dSYehuda Sadeh { 2020dfc5606dSYehuda Sadeh int ret; 2021dfc5606dSYehuda Sadeh struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL); 2022dfc5606dSYehuda Sadeh if (!snap) 2023dfc5606dSYehuda Sadeh return -ENOMEM; 2024dfc5606dSYehuda Sadeh snap->name = kstrdup(name, GFP_KERNEL); 2025dfc5606dSYehuda Sadeh snap->size = rbd_dev->header.snap_sizes[i]; 2026dfc5606dSYehuda Sadeh snap->id = rbd_dev->header.snapc->snaps[i]; 2027dfc5606dSYehuda Sadeh if (device_is_registered(&rbd_dev->dev)) { 2028dfc5606dSYehuda Sadeh ret = rbd_register_snap_dev(rbd_dev, snap, 2029dfc5606dSYehuda Sadeh &rbd_dev->dev); 2030dfc5606dSYehuda Sadeh if (ret < 0) 2031dfc5606dSYehuda Sadeh goto err; 2032dfc5606dSYehuda Sadeh } 2033dfc5606dSYehuda Sadeh *snapp = snap; 2034dfc5606dSYehuda Sadeh return 0; 2035dfc5606dSYehuda Sadeh err: 2036dfc5606dSYehuda Sadeh kfree(snap->name); 2037dfc5606dSYehuda Sadeh kfree(snap); 2038dfc5606dSYehuda Sadeh return ret; 2039dfc5606dSYehuda Sadeh } 2040dfc5606dSYehuda Sadeh 2041dfc5606dSYehuda Sadeh /* 2042dfc5606dSYehuda Sadeh * search for the previous snap in a null delimited string list 2043dfc5606dSYehuda Sadeh */ 2044dfc5606dSYehuda Sadeh const char *rbd_prev_snap_name(const char *name, const char *start) 2045dfc5606dSYehuda Sadeh { 2046dfc5606dSYehuda Sadeh if (name < start + 2) 2047dfc5606dSYehuda Sadeh return NULL; 2048dfc5606dSYehuda Sadeh 2049dfc5606dSYehuda Sadeh name -= 2; 2050dfc5606dSYehuda Sadeh while (*name) { 2051dfc5606dSYehuda Sadeh if (name == start) 2052dfc5606dSYehuda Sadeh return start; 2053dfc5606dSYehuda Sadeh name--; 2054dfc5606dSYehuda Sadeh } 2055dfc5606dSYehuda Sadeh return name + 1; 2056dfc5606dSYehuda Sadeh } 2057dfc5606dSYehuda Sadeh 2058dfc5606dSYehuda Sadeh /* 2059dfc5606dSYehuda Sadeh * compare the old list of snapshots that we have to what's in the header 2060dfc5606dSYehuda Sadeh * and update it accordingly. Note that the header holds the snapshots 2061dfc5606dSYehuda Sadeh * in a reverse order (from newest to oldest) and we need to go from 2062dfc5606dSYehuda Sadeh * older to new so that we don't get a duplicate snap name when 2063dfc5606dSYehuda Sadeh * doing the process (e.g., removed snapshot and recreated a new 2064dfc5606dSYehuda Sadeh * one with the same name. 2065dfc5606dSYehuda Sadeh */ 2066dfc5606dSYehuda Sadeh static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) 2067dfc5606dSYehuda Sadeh { 2068dfc5606dSYehuda Sadeh const char *name, *first_name; 2069dfc5606dSYehuda Sadeh int i = rbd_dev->header.total_snaps; 2070dfc5606dSYehuda Sadeh struct rbd_snap *snap, *old_snap = NULL; 2071dfc5606dSYehuda Sadeh int ret; 2072dfc5606dSYehuda Sadeh struct list_head *p, *n; 2073dfc5606dSYehuda Sadeh 2074dfc5606dSYehuda Sadeh first_name = rbd_dev->header.snap_names; 2075dfc5606dSYehuda Sadeh name = first_name + rbd_dev->header.snap_names_len; 2076dfc5606dSYehuda Sadeh 2077dfc5606dSYehuda Sadeh list_for_each_prev_safe(p, n, &rbd_dev->snaps) { 2078dfc5606dSYehuda Sadeh u64 cur_id; 2079dfc5606dSYehuda Sadeh 2080dfc5606dSYehuda Sadeh old_snap = list_entry(p, struct rbd_snap, node); 2081dfc5606dSYehuda Sadeh 2082dfc5606dSYehuda Sadeh if (i) 2083dfc5606dSYehuda Sadeh cur_id = rbd_dev->header.snapc->snaps[i - 1]; 2084dfc5606dSYehuda Sadeh 2085dfc5606dSYehuda Sadeh if (!i || old_snap->id < cur_id) { 2086dfc5606dSYehuda Sadeh /* old_snap->id was skipped, thus was removed */ 2087dfc5606dSYehuda Sadeh __rbd_remove_snap_dev(rbd_dev, old_snap); 2088dfc5606dSYehuda Sadeh continue; 2089dfc5606dSYehuda Sadeh } 2090dfc5606dSYehuda Sadeh if (old_snap->id == cur_id) { 2091dfc5606dSYehuda Sadeh /* we have this snapshot already */ 2092dfc5606dSYehuda Sadeh i--; 2093dfc5606dSYehuda Sadeh name = rbd_prev_snap_name(name, first_name); 2094dfc5606dSYehuda Sadeh continue; 2095dfc5606dSYehuda Sadeh } 2096dfc5606dSYehuda Sadeh for (; i > 0; 2097dfc5606dSYehuda Sadeh i--, name = rbd_prev_snap_name(name, first_name)) { 2098dfc5606dSYehuda Sadeh if (!name) { 2099dfc5606dSYehuda Sadeh WARN_ON(1); 2100dfc5606dSYehuda Sadeh return -EINVAL; 2101dfc5606dSYehuda Sadeh } 2102dfc5606dSYehuda Sadeh cur_id = rbd_dev->header.snapc->snaps[i]; 2103dfc5606dSYehuda Sadeh /* snapshot removal? handle it above */ 2104dfc5606dSYehuda Sadeh if (cur_id >= old_snap->id) 2105dfc5606dSYehuda Sadeh break; 2106dfc5606dSYehuda Sadeh /* a new snapshot */ 2107dfc5606dSYehuda Sadeh ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap); 2108dfc5606dSYehuda Sadeh if (ret < 0) 2109dfc5606dSYehuda Sadeh return ret; 2110dfc5606dSYehuda Sadeh 2111dfc5606dSYehuda Sadeh /* note that we add it backward so using n and not p */ 2112dfc5606dSYehuda Sadeh list_add(&snap->node, n); 2113dfc5606dSYehuda Sadeh p = &snap->node; 2114dfc5606dSYehuda Sadeh } 2115dfc5606dSYehuda Sadeh } 2116dfc5606dSYehuda Sadeh /* we're done going over the old snap list, just add what's left */ 2117dfc5606dSYehuda Sadeh for (; i > 0; i--) { 2118dfc5606dSYehuda Sadeh name = rbd_prev_snap_name(name, first_name); 2119dfc5606dSYehuda Sadeh if (!name) { 2120dfc5606dSYehuda Sadeh WARN_ON(1); 2121dfc5606dSYehuda Sadeh return -EINVAL; 2122dfc5606dSYehuda Sadeh } 2123dfc5606dSYehuda Sadeh ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap); 2124dfc5606dSYehuda Sadeh if (ret < 0) 2125dfc5606dSYehuda Sadeh return ret; 2126dfc5606dSYehuda Sadeh list_add(&snap->node, &rbd_dev->snaps); 2127dfc5606dSYehuda Sadeh } 2128dfc5606dSYehuda Sadeh 2129dfc5606dSYehuda Sadeh return 0; 2130dfc5606dSYehuda Sadeh } 2131dfc5606dSYehuda Sadeh 2132dfc5606dSYehuda Sadeh static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2133dfc5606dSYehuda Sadeh { 2134f0f8cef5SAlex Elder int ret; 2135dfc5606dSYehuda Sadeh struct device *dev; 2136dfc5606dSYehuda Sadeh struct rbd_snap *snap; 2137dfc5606dSYehuda Sadeh 2138dfc5606dSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2139dfc5606dSYehuda Sadeh dev = &rbd_dev->dev; 2140dfc5606dSYehuda Sadeh 2141dfc5606dSYehuda Sadeh dev->bus = &rbd_bus_type; 2142dfc5606dSYehuda Sadeh dev->type = &rbd_device_type; 2143dfc5606dSYehuda Sadeh dev->parent = &rbd_root_dev; 2144dfc5606dSYehuda Sadeh dev->release = rbd_dev_release; 2145dfc5606dSYehuda Sadeh dev_set_name(dev, "%d", rbd_dev->id); 2146dfc5606dSYehuda Sadeh ret = device_register(dev); 2147dfc5606dSYehuda Sadeh if (ret < 0) 2148f0f8cef5SAlex Elder goto out; 2149dfc5606dSYehuda Sadeh 2150dfc5606dSYehuda Sadeh list_for_each_entry(snap, &rbd_dev->snaps, node) { 2151dfc5606dSYehuda Sadeh ret = rbd_register_snap_dev(rbd_dev, snap, 2152dfc5606dSYehuda Sadeh &rbd_dev->dev); 2153dfc5606dSYehuda Sadeh if (ret < 0) 2154602adf40SYehuda Sadeh break; 2155602adf40SYehuda Sadeh } 2156f0f8cef5SAlex Elder out: 2157dfc5606dSYehuda Sadeh mutex_unlock(&ctl_mutex); 2158dfc5606dSYehuda Sadeh return ret; 2159602adf40SYehuda Sadeh } 2160602adf40SYehuda Sadeh 2161dfc5606dSYehuda Sadeh static void rbd_bus_del_dev(struct rbd_device *rbd_dev) 2162dfc5606dSYehuda Sadeh { 2163dfc5606dSYehuda Sadeh device_unregister(&rbd_dev->dev); 2164dfc5606dSYehuda Sadeh } 2165dfc5606dSYehuda Sadeh 216659c2be1eSYehuda Sadeh static int rbd_init_watch_dev(struct rbd_device *rbd_dev) 216759c2be1eSYehuda Sadeh { 216859c2be1eSYehuda Sadeh int ret, rc; 216959c2be1eSYehuda Sadeh 217059c2be1eSYehuda Sadeh do { 217159c2be1eSYehuda Sadeh ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name, 217259c2be1eSYehuda Sadeh rbd_dev->header.obj_version); 217359c2be1eSYehuda Sadeh if (ret == -ERANGE) { 217459c2be1eSYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 217559c2be1eSYehuda Sadeh rc = __rbd_update_snaps(rbd_dev); 217659c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 217759c2be1eSYehuda Sadeh if (rc < 0) 217859c2be1eSYehuda Sadeh return rc; 217959c2be1eSYehuda Sadeh } 218059c2be1eSYehuda Sadeh } while (ret == -ERANGE); 218159c2be1eSYehuda Sadeh 218259c2be1eSYehuda Sadeh return ret; 218359c2be1eSYehuda Sadeh } 218459c2be1eSYehuda Sadeh 21851ddbe94eSAlex Elder static atomic64_t rbd_id_max = ATOMIC64_INIT(0); 21861ddbe94eSAlex Elder 21871ddbe94eSAlex Elder /* 2188499afd5bSAlex Elder * Get a unique rbd identifier for the given new rbd_dev, and add 2189499afd5bSAlex Elder * the rbd_dev to the global list. The minimum rbd id is 1. 21901ddbe94eSAlex Elder */ 2191499afd5bSAlex Elder static void rbd_id_get(struct rbd_device *rbd_dev) 2192b7f23c36SAlex Elder { 2193499afd5bSAlex Elder rbd_dev->id = atomic64_inc_return(&rbd_id_max); 2194499afd5bSAlex Elder 2195499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2196499afd5bSAlex Elder list_add_tail(&rbd_dev->node, &rbd_dev_list); 2197499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 2198b7f23c36SAlex Elder } 2199b7f23c36SAlex Elder 22001ddbe94eSAlex Elder /* 2201499afd5bSAlex Elder * Remove an rbd_dev from the global list, and record that its 2202499afd5bSAlex Elder * identifier is no longer in use. 22031ddbe94eSAlex Elder */ 2204499afd5bSAlex Elder static void rbd_id_put(struct rbd_device *rbd_dev) 22051ddbe94eSAlex Elder { 2206d184f6bfSAlex Elder struct list_head *tmp; 2207d184f6bfSAlex Elder int rbd_id = rbd_dev->id; 2208d184f6bfSAlex Elder int max_id; 2209d184f6bfSAlex Elder 2210d184f6bfSAlex Elder BUG_ON(rbd_id < 1); 2211499afd5bSAlex Elder 2212499afd5bSAlex Elder spin_lock(&rbd_dev_list_lock); 2213499afd5bSAlex Elder list_del_init(&rbd_dev->node); 2214d184f6bfSAlex Elder 2215d184f6bfSAlex Elder /* 2216d184f6bfSAlex Elder * If the id being "put" is not the current maximum, there 2217d184f6bfSAlex Elder * is nothing special we need to do. 2218d184f6bfSAlex Elder */ 2219d184f6bfSAlex Elder if (rbd_id != atomic64_read(&rbd_id_max)) { 2220d184f6bfSAlex Elder spin_unlock(&rbd_dev_list_lock); 2221d184f6bfSAlex Elder return; 2222d184f6bfSAlex Elder } 2223d184f6bfSAlex Elder 2224d184f6bfSAlex Elder /* 2225d184f6bfSAlex Elder * We need to update the current maximum id. Search the 2226d184f6bfSAlex Elder * list to find out what it is. We're more likely to find 2227d184f6bfSAlex Elder * the maximum at the end, so search the list backward. 2228d184f6bfSAlex Elder */ 2229d184f6bfSAlex Elder max_id = 0; 2230d184f6bfSAlex Elder list_for_each_prev(tmp, &rbd_dev_list) { 2231d184f6bfSAlex Elder struct rbd_device *rbd_dev; 2232d184f6bfSAlex Elder 2233d184f6bfSAlex Elder rbd_dev = list_entry(tmp, struct rbd_device, node); 2234d184f6bfSAlex Elder if (rbd_id > max_id) 2235d184f6bfSAlex Elder max_id = rbd_id; 2236d184f6bfSAlex Elder } 2237499afd5bSAlex Elder spin_unlock(&rbd_dev_list_lock); 22381ddbe94eSAlex Elder 22391ddbe94eSAlex Elder /* 2240d184f6bfSAlex Elder * The max id could have been updated by rbd_id_get(), in 2241d184f6bfSAlex Elder * which case it now accurately reflects the new maximum. 2242d184f6bfSAlex Elder * Be careful not to overwrite the maximum value in that 2243d184f6bfSAlex Elder * case. 22441ddbe94eSAlex Elder */ 2245d184f6bfSAlex Elder atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id); 2246b7f23c36SAlex Elder } 2247b7f23c36SAlex Elder 2248a725f65eSAlex Elder /* 2249e28fff26SAlex Elder * Skips over white space at *buf, and updates *buf to point to the 2250e28fff26SAlex Elder * first found non-space character (if any). Returns the length of 2251593a9e7bSAlex Elder * the token (string of non-white space characters) found. Note 2252593a9e7bSAlex Elder * that *buf must be terminated with '\0'. 2253e28fff26SAlex Elder */ 2254e28fff26SAlex Elder static inline size_t next_token(const char **buf) 2255e28fff26SAlex Elder { 2256e28fff26SAlex Elder /* 2257e28fff26SAlex Elder * These are the characters that produce nonzero for 2258e28fff26SAlex Elder * isspace() in the "C" and "POSIX" locales. 2259e28fff26SAlex Elder */ 2260e28fff26SAlex Elder const char *spaces = " \f\n\r\t\v"; 2261e28fff26SAlex Elder 2262e28fff26SAlex Elder *buf += strspn(*buf, spaces); /* Find start of token */ 2263e28fff26SAlex Elder 2264e28fff26SAlex Elder return strcspn(*buf, spaces); /* Return token length */ 2265e28fff26SAlex Elder } 2266e28fff26SAlex Elder 2267e28fff26SAlex Elder /* 2268e28fff26SAlex Elder * Finds the next token in *buf, and if the provided token buffer is 2269e28fff26SAlex Elder * big enough, copies the found token into it. The result, if 2270593a9e7bSAlex Elder * copied, is guaranteed to be terminated with '\0'. Note that *buf 2271593a9e7bSAlex Elder * must be terminated with '\0' on entry. 2272e28fff26SAlex Elder * 2273e28fff26SAlex Elder * Returns the length of the token found (not including the '\0'). 2274e28fff26SAlex Elder * Return value will be 0 if no token is found, and it will be >= 2275e28fff26SAlex Elder * token_size if the token would not fit. 2276e28fff26SAlex Elder * 2277593a9e7bSAlex Elder * The *buf pointer will be updated to point beyond the end of the 2278e28fff26SAlex Elder * found token. Note that this occurs even if the token buffer is 2279e28fff26SAlex Elder * too small to hold it. 2280e28fff26SAlex Elder */ 2281e28fff26SAlex Elder static inline size_t copy_token(const char **buf, 2282e28fff26SAlex Elder char *token, 2283e28fff26SAlex Elder size_t token_size) 2284e28fff26SAlex Elder { 2285e28fff26SAlex Elder size_t len; 2286e28fff26SAlex Elder 2287e28fff26SAlex Elder len = next_token(buf); 2288e28fff26SAlex Elder if (len < token_size) { 2289e28fff26SAlex Elder memcpy(token, *buf, len); 2290e28fff26SAlex Elder *(token + len) = '\0'; 2291e28fff26SAlex Elder } 2292e28fff26SAlex Elder *buf += len; 2293e28fff26SAlex Elder 2294e28fff26SAlex Elder return len; 2295e28fff26SAlex Elder } 2296e28fff26SAlex Elder 2297e28fff26SAlex Elder /* 2298a725f65eSAlex Elder * This fills in the pool_name, obj, obj_len, snap_name, obj_len, 2299a725f65eSAlex Elder * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based 2300a725f65eSAlex Elder * on the list of monitor addresses and other options provided via 2301a725f65eSAlex Elder * /sys/bus/rbd/add. 2302a725f65eSAlex Elder */ 2303a725f65eSAlex Elder static int rbd_add_parse_args(struct rbd_device *rbd_dev, 2304a725f65eSAlex Elder const char *buf, 23057ef3214aSAlex Elder const char **mon_addrs, 23065214ecc4SAlex Elder size_t *mon_addrs_size, 2307e28fff26SAlex Elder char *options, 2308e28fff26SAlex Elder size_t options_size) 2309a725f65eSAlex Elder { 2310e28fff26SAlex Elder size_t len; 2311e28fff26SAlex Elder 2312e28fff26SAlex Elder /* The first four tokens are required */ 2313e28fff26SAlex Elder 23147ef3214aSAlex Elder len = next_token(&buf); 23157ef3214aSAlex Elder if (!len) 2316a725f65eSAlex Elder return -EINVAL; 23175214ecc4SAlex Elder *mon_addrs_size = len + 1; 23187ef3214aSAlex Elder *mon_addrs = buf; 23197ef3214aSAlex Elder 23207ef3214aSAlex Elder buf += len; 2321a725f65eSAlex Elder 2322e28fff26SAlex Elder len = copy_token(&buf, options, options_size); 2323e28fff26SAlex Elder if (!len || len >= options_size) 2324e28fff26SAlex Elder return -EINVAL; 2325a725f65eSAlex Elder 2326e28fff26SAlex Elder len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name)); 2327e28fff26SAlex Elder if (!len || len >= sizeof (rbd_dev->pool_name)) 2328e28fff26SAlex Elder return -EINVAL; 2329e28fff26SAlex Elder 2330e28fff26SAlex Elder len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj)); 2331e28fff26SAlex Elder if (!len || len >= sizeof (rbd_dev->obj)) 2332e28fff26SAlex Elder return -EINVAL; 2333e28fff26SAlex Elder 2334e28fff26SAlex Elder /* We have the object length in hand, save it. */ 2335e28fff26SAlex Elder 2336e28fff26SAlex Elder rbd_dev->obj_len = len; 2337e28fff26SAlex Elder 233881a89793SAlex Elder BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN 233981a89793SAlex Elder < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX)); 234081a89793SAlex Elder sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX); 2341a725f65eSAlex Elder 2342e28fff26SAlex Elder /* 2343e28fff26SAlex Elder * The snapshot name is optional, but it's an error if it's 2344e28fff26SAlex Elder * too long. If no snapshot is supplied, fill in the default. 2345e28fff26SAlex Elder */ 2346e28fff26SAlex Elder len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name)); 2347e28fff26SAlex Elder if (!len) 2348e28fff26SAlex Elder memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, 2349e28fff26SAlex Elder sizeof (RBD_SNAP_HEAD_NAME)); 2350e28fff26SAlex Elder else if (len >= sizeof (rbd_dev->snap_name)) 2351e28fff26SAlex Elder return -EINVAL; 2352e28fff26SAlex Elder 2353a725f65eSAlex Elder return 0; 2354a725f65eSAlex Elder } 2355a725f65eSAlex Elder 235659c2be1eSYehuda Sadeh static ssize_t rbd_add(struct bus_type *bus, 235759c2be1eSYehuda Sadeh const char *buf, 235859c2be1eSYehuda Sadeh size_t count) 2359602adf40SYehuda Sadeh { 2360602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 23617ef3214aSAlex Elder const char *mon_addrs = NULL; 23627ef3214aSAlex Elder size_t mon_addrs_size = 0; 236327cc2594SAlex Elder char *options = NULL; 236427cc2594SAlex Elder struct ceph_osd_client *osdc; 236527cc2594SAlex Elder int rc = -ENOMEM; 2366602adf40SYehuda Sadeh 2367602adf40SYehuda Sadeh if (!try_module_get(THIS_MODULE)) 2368602adf40SYehuda Sadeh return -ENODEV; 2369602adf40SYehuda Sadeh 2370602adf40SYehuda Sadeh rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 2371602adf40SYehuda Sadeh if (!rbd_dev) 237227cc2594SAlex Elder goto err_nomem; 237327cc2594SAlex Elder options = kmalloc(count, GFP_KERNEL); 237427cc2594SAlex Elder if (!options) 237527cc2594SAlex Elder goto err_nomem; 2376602adf40SYehuda Sadeh 2377602adf40SYehuda Sadeh /* static rbd_device initialization */ 2378602adf40SYehuda Sadeh spin_lock_init(&rbd_dev->lock); 2379602adf40SYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->node); 2380dfc5606dSYehuda Sadeh INIT_LIST_HEAD(&rbd_dev->snaps); 2381c666601aSJosh Durgin init_rwsem(&rbd_dev->header_rwsem); 2382602adf40SYehuda Sadeh 2383c666601aSJosh Durgin init_rwsem(&rbd_dev->header_rwsem); 23840e805a1dSAlex Elder 2385d184f6bfSAlex Elder /* generate unique id: find highest unique id, add one */ 2386499afd5bSAlex Elder rbd_id_get(rbd_dev); 2387602adf40SYehuda Sadeh 2388a725f65eSAlex Elder /* Fill in the device name, now that we have its id. */ 238981a89793SAlex Elder BUILD_BUG_ON(DEV_NAME_LEN 239081a89793SAlex Elder < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 239181a89793SAlex Elder sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id); 2392e124a82fSAlex Elder 2393a725f65eSAlex Elder /* parse add command */ 23947ef3214aSAlex Elder rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, 2395e28fff26SAlex Elder options, count); 2396a725f65eSAlex Elder if (rc) 2397a725f65eSAlex Elder goto err_put_id; 2398a725f65eSAlex Elder 23995214ecc4SAlex Elder rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1, 24005214ecc4SAlex Elder options); 2401d720bcb0SAlex Elder if (IS_ERR(rbd_dev->rbd_client)) { 2402d720bcb0SAlex Elder rc = PTR_ERR(rbd_dev->rbd_client); 2403f0f8cef5SAlex Elder goto err_put_id; 2404d720bcb0SAlex Elder } 2405602adf40SYehuda Sadeh 2406602adf40SYehuda Sadeh /* pick the pool */ 24071dbb4399SAlex Elder osdc = &rbd_dev->rbd_client->client->osdc; 2408602adf40SYehuda Sadeh rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); 2409602adf40SYehuda Sadeh if (rc < 0) 2410602adf40SYehuda Sadeh goto err_out_client; 2411602adf40SYehuda Sadeh rbd_dev->poolid = rc; 2412602adf40SYehuda Sadeh 2413602adf40SYehuda Sadeh /* register our block device */ 241427cc2594SAlex Elder rc = register_blkdev(0, rbd_dev->name); 241527cc2594SAlex Elder if (rc < 0) 2416602adf40SYehuda Sadeh goto err_out_client; 241727cc2594SAlex Elder rbd_dev->major = rc; 2418602adf40SYehuda Sadeh 2419dfc5606dSYehuda Sadeh rc = rbd_bus_add_dev(rbd_dev); 2420dfc5606dSYehuda Sadeh if (rc) 2421766fc439SYehuda Sadeh goto err_out_blkdev; 2422766fc439SYehuda Sadeh 242332eec68dSAlex Elder /* 242432eec68dSAlex Elder * At this point cleanup in the event of an error is the job 242532eec68dSAlex Elder * of the sysfs code (initiated by rbd_bus_del_dev()). 242632eec68dSAlex Elder * 242732eec68dSAlex Elder * Set up and announce blkdev mapping. 242832eec68dSAlex Elder */ 2429602adf40SYehuda Sadeh rc = rbd_init_disk(rbd_dev); 2430602adf40SYehuda Sadeh if (rc) 2431766fc439SYehuda Sadeh goto err_out_bus; 2432602adf40SYehuda Sadeh 243359c2be1eSYehuda Sadeh rc = rbd_init_watch_dev(rbd_dev); 243459c2be1eSYehuda Sadeh if (rc) 243559c2be1eSYehuda Sadeh goto err_out_bus; 243659c2be1eSYehuda Sadeh 2437602adf40SYehuda Sadeh return count; 2438602adf40SYehuda Sadeh 2439766fc439SYehuda Sadeh err_out_bus: 2440766fc439SYehuda Sadeh /* this will also clean up rest of rbd_dev stuff */ 2441766fc439SYehuda Sadeh 2442766fc439SYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2443766fc439SYehuda Sadeh kfree(options); 2444766fc439SYehuda Sadeh return rc; 2445766fc439SYehuda Sadeh 2446602adf40SYehuda Sadeh err_out_blkdev: 2447602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 2448602adf40SYehuda Sadeh err_out_client: 2449602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2450f0f8cef5SAlex Elder err_put_id: 2451499afd5bSAlex Elder rbd_id_put(rbd_dev); 245227cc2594SAlex Elder err_nomem: 2453602adf40SYehuda Sadeh kfree(options); 245427cc2594SAlex Elder kfree(rbd_dev); 245527cc2594SAlex Elder 2456602adf40SYehuda Sadeh dout("Error adding device %s\n", buf); 2457602adf40SYehuda Sadeh module_put(THIS_MODULE); 245827cc2594SAlex Elder 245927cc2594SAlex Elder return (ssize_t) rc; 2460602adf40SYehuda Sadeh } 2461602adf40SYehuda Sadeh 2462602adf40SYehuda Sadeh static struct rbd_device *__rbd_get_dev(unsigned long id) 2463602adf40SYehuda Sadeh { 2464602adf40SYehuda Sadeh struct list_head *tmp; 2465602adf40SYehuda Sadeh struct rbd_device *rbd_dev; 2466602adf40SYehuda Sadeh 2467e124a82fSAlex Elder spin_lock(&rbd_dev_list_lock); 2468602adf40SYehuda Sadeh list_for_each(tmp, &rbd_dev_list) { 2469602adf40SYehuda Sadeh rbd_dev = list_entry(tmp, struct rbd_device, node); 2470e124a82fSAlex Elder if (rbd_dev->id == id) { 2471e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2472602adf40SYehuda Sadeh return rbd_dev; 2473602adf40SYehuda Sadeh } 2474e124a82fSAlex Elder } 2475e124a82fSAlex Elder spin_unlock(&rbd_dev_list_lock); 2476602adf40SYehuda Sadeh return NULL; 2477602adf40SYehuda Sadeh } 2478602adf40SYehuda Sadeh 2479dfc5606dSYehuda Sadeh static void rbd_dev_release(struct device *dev) 2480602adf40SYehuda Sadeh { 2481593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2482602adf40SYehuda Sadeh 24831dbb4399SAlex Elder if (rbd_dev->watch_request) { 24841dbb4399SAlex Elder struct ceph_client *client = rbd_dev->rbd_client->client; 24851dbb4399SAlex Elder 24861dbb4399SAlex Elder ceph_osdc_unregister_linger_request(&client->osdc, 248759c2be1eSYehuda Sadeh rbd_dev->watch_request); 24881dbb4399SAlex Elder } 248959c2be1eSYehuda Sadeh if (rbd_dev->watch_event) 249079e3057cSYehuda Sadeh rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); 249159c2be1eSYehuda Sadeh 2492602adf40SYehuda Sadeh rbd_put_client(rbd_dev); 2493602adf40SYehuda Sadeh 2494602adf40SYehuda Sadeh /* clean up and free blkdev */ 2495602adf40SYehuda Sadeh rbd_free_disk(rbd_dev); 2496602adf40SYehuda Sadeh unregister_blkdev(rbd_dev->major, rbd_dev->name); 249732eec68dSAlex Elder 249832eec68dSAlex Elder /* done with the id, and with the rbd_dev */ 249932eec68dSAlex Elder rbd_id_put(rbd_dev); 2500602adf40SYehuda Sadeh kfree(rbd_dev); 2501602adf40SYehuda Sadeh 2502602adf40SYehuda Sadeh /* release module ref */ 2503602adf40SYehuda Sadeh module_put(THIS_MODULE); 2504602adf40SYehuda Sadeh } 2505602adf40SYehuda Sadeh 2506dfc5606dSYehuda Sadeh static ssize_t rbd_remove(struct bus_type *bus, 2507602adf40SYehuda Sadeh const char *buf, 2508602adf40SYehuda Sadeh size_t count) 2509602adf40SYehuda Sadeh { 2510602adf40SYehuda Sadeh struct rbd_device *rbd_dev = NULL; 2511602adf40SYehuda Sadeh int target_id, rc; 2512602adf40SYehuda Sadeh unsigned long ul; 2513602adf40SYehuda Sadeh int ret = count; 2514602adf40SYehuda Sadeh 2515602adf40SYehuda Sadeh rc = strict_strtoul(buf, 10, &ul); 2516602adf40SYehuda Sadeh if (rc) 2517602adf40SYehuda Sadeh return rc; 2518602adf40SYehuda Sadeh 2519602adf40SYehuda Sadeh /* convert to int; abort if we lost anything in the conversion */ 2520602adf40SYehuda Sadeh target_id = (int) ul; 2521602adf40SYehuda Sadeh if (target_id != ul) 2522602adf40SYehuda Sadeh return -EINVAL; 2523602adf40SYehuda Sadeh 2524602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2525602adf40SYehuda Sadeh 2526602adf40SYehuda Sadeh rbd_dev = __rbd_get_dev(target_id); 2527602adf40SYehuda Sadeh if (!rbd_dev) { 2528602adf40SYehuda Sadeh ret = -ENOENT; 2529602adf40SYehuda Sadeh goto done; 2530602adf40SYehuda Sadeh } 2531602adf40SYehuda Sadeh 2532dfc5606dSYehuda Sadeh __rbd_remove_all_snaps(rbd_dev); 2533dfc5606dSYehuda Sadeh rbd_bus_del_dev(rbd_dev); 2534602adf40SYehuda Sadeh 2535602adf40SYehuda Sadeh done: 2536602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2537602adf40SYehuda Sadeh return ret; 2538602adf40SYehuda Sadeh } 2539602adf40SYehuda Sadeh 2540dfc5606dSYehuda Sadeh static ssize_t rbd_snap_add(struct device *dev, 2541dfc5606dSYehuda Sadeh struct device_attribute *attr, 2542602adf40SYehuda Sadeh const char *buf, 2543602adf40SYehuda Sadeh size_t count) 2544602adf40SYehuda Sadeh { 2545593a9e7bSAlex Elder struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 2546dfc5606dSYehuda Sadeh int ret; 2547dfc5606dSYehuda Sadeh char *name = kmalloc(count + 1, GFP_KERNEL); 2548602adf40SYehuda Sadeh if (!name) 2549602adf40SYehuda Sadeh return -ENOMEM; 2550602adf40SYehuda Sadeh 2551dfc5606dSYehuda Sadeh snprintf(name, count, "%s", buf); 2552602adf40SYehuda Sadeh 2553602adf40SYehuda Sadeh mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2554602adf40SYehuda Sadeh 2555602adf40SYehuda Sadeh ret = rbd_header_add_snap(rbd_dev, 2556602adf40SYehuda Sadeh name, GFP_KERNEL); 2557602adf40SYehuda Sadeh if (ret < 0) 255859c2be1eSYehuda Sadeh goto err_unlock; 2559602adf40SYehuda Sadeh 2560dfc5606dSYehuda Sadeh ret = __rbd_update_snaps(rbd_dev); 2561602adf40SYehuda Sadeh if (ret < 0) 256259c2be1eSYehuda Sadeh goto err_unlock; 256359c2be1eSYehuda Sadeh 256459c2be1eSYehuda Sadeh /* shouldn't hold ctl_mutex when notifying.. notify might 256559c2be1eSYehuda Sadeh trigger a watch callback that would need to get that mutex */ 256659c2be1eSYehuda Sadeh mutex_unlock(&ctl_mutex); 256759c2be1eSYehuda Sadeh 256859c2be1eSYehuda Sadeh /* make a best effort, don't error if failed */ 256959c2be1eSYehuda Sadeh rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name); 2570602adf40SYehuda Sadeh 2571602adf40SYehuda Sadeh ret = count; 257259c2be1eSYehuda Sadeh kfree(name); 257359c2be1eSYehuda Sadeh return ret; 257459c2be1eSYehuda Sadeh 257559c2be1eSYehuda Sadeh err_unlock: 2576602adf40SYehuda Sadeh mutex_unlock(&ctl_mutex); 2577602adf40SYehuda Sadeh kfree(name); 2578602adf40SYehuda Sadeh return ret; 2579602adf40SYehuda Sadeh } 2580602adf40SYehuda Sadeh 2581602adf40SYehuda Sadeh /* 2582602adf40SYehuda Sadeh * create control files in sysfs 2583dfc5606dSYehuda Sadeh * /sys/bus/rbd/... 2584602adf40SYehuda Sadeh */ 2585602adf40SYehuda Sadeh static int rbd_sysfs_init(void) 2586602adf40SYehuda Sadeh { 2587dfc5606dSYehuda Sadeh int ret; 2588602adf40SYehuda Sadeh 2589fed4c143SAlex Elder ret = device_register(&rbd_root_dev); 2590dfc5606dSYehuda Sadeh if (ret < 0) 2591dfc5606dSYehuda Sadeh return ret; 2592602adf40SYehuda Sadeh 2593fed4c143SAlex Elder ret = bus_register(&rbd_bus_type); 2594fed4c143SAlex Elder if (ret < 0) 2595fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2596602adf40SYehuda Sadeh 2597602adf40SYehuda Sadeh return ret; 2598602adf40SYehuda Sadeh } 2599602adf40SYehuda Sadeh 2600602adf40SYehuda Sadeh static void rbd_sysfs_cleanup(void) 2601602adf40SYehuda Sadeh { 2602dfc5606dSYehuda Sadeh bus_unregister(&rbd_bus_type); 2603fed4c143SAlex Elder device_unregister(&rbd_root_dev); 2604602adf40SYehuda Sadeh } 2605602adf40SYehuda Sadeh 2606602adf40SYehuda Sadeh int __init rbd_init(void) 2607602adf40SYehuda Sadeh { 2608602adf40SYehuda Sadeh int rc; 2609602adf40SYehuda Sadeh 2610602adf40SYehuda Sadeh rc = rbd_sysfs_init(); 2611602adf40SYehuda Sadeh if (rc) 2612602adf40SYehuda Sadeh return rc; 2613f0f8cef5SAlex Elder pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 2614602adf40SYehuda Sadeh return 0; 2615602adf40SYehuda Sadeh } 2616602adf40SYehuda Sadeh 2617602adf40SYehuda Sadeh void __exit rbd_exit(void) 2618602adf40SYehuda Sadeh { 2619602adf40SYehuda Sadeh rbd_sysfs_cleanup(); 2620602adf40SYehuda Sadeh } 2621602adf40SYehuda Sadeh 2622602adf40SYehuda Sadeh module_init(rbd_init); 2623602adf40SYehuda Sadeh module_exit(rbd_exit); 2624602adf40SYehuda Sadeh 2625602adf40SYehuda Sadeh MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 2626602adf40SYehuda Sadeh MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 2627602adf40SYehuda Sadeh MODULE_DESCRIPTION("rados block device"); 2628602adf40SYehuda Sadeh 2629602adf40SYehuda Sadeh /* following authorship retained from original osdblk.c */ 2630602adf40SYehuda Sadeh MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 2631602adf40SYehuda Sadeh 2632602adf40SYehuda Sadeh MODULE_LICENSE("GPL"); 2633