1 /* 2 * Copyright(c) 2017 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #include <linux/pagemap.h> 14 #include <linux/module.h> 15 #include <linux/mount.h> 16 #include <linux/magic.h> 17 #include <linux/genhd.h> 18 #include <linux/cdev.h> 19 #include <linux/hash.h> 20 #include <linux/slab.h> 21 #include <linux/dax.h> 22 #include <linux/fs.h> 23 24 static dev_t dax_devt; 25 DEFINE_STATIC_SRCU(dax_srcu); 26 static struct vfsmount *dax_mnt; 27 static DEFINE_IDA(dax_minor_ida); 28 static struct kmem_cache *dax_cache __read_mostly; 29 static struct super_block *dax_superblock __read_mostly; 30 31 #define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head)) 32 static struct hlist_head dax_host_list[DAX_HASH_SIZE]; 33 static DEFINE_SPINLOCK(dax_host_lock); 34 35 int dax_read_lock(void) 36 { 37 return srcu_read_lock(&dax_srcu); 38 } 39 EXPORT_SYMBOL_GPL(dax_read_lock); 40 41 void dax_read_unlock(int id) 42 { 43 srcu_read_unlock(&dax_srcu, id); 44 } 45 EXPORT_SYMBOL_GPL(dax_read_unlock); 46 47 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, 48 pgoff_t *pgoff) 49 { 50 phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512; 51 52 if (pgoff) 53 *pgoff = PHYS_PFN(phys_off); 54 if (phys_off % PAGE_SIZE || size % PAGE_SIZE) 55 return -EINVAL; 56 return 0; 57 } 58 EXPORT_SYMBOL(bdev_dax_pgoff); 59 60 /** 61 * __bdev_dax_supported() - Check if the device supports dax for filesystem 62 * @sb: The superblock of the device 63 * @blocksize: The block size of the device 64 * 65 * This is a library function for filesystems to check if the block device 66 * can be mounted with dax option. 67 * 68 * Return: negative errno if unsupported, 0 if supported. 69 */ 70 int __bdev_dax_supported(struct super_block *sb, int blocksize) 71 { 72 struct block_device *bdev = sb->s_bdev; 73 struct dax_device *dax_dev; 74 pgoff_t pgoff; 75 int err, id; 76 void *kaddr; 77 pfn_t pfn; 78 long len; 79 80 if (blocksize != PAGE_SIZE) { 81 pr_err("VFS (%s): error: unsupported blocksize for dax\n", 82 sb->s_id); 83 return -EINVAL; 84 } 85 86 err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); 87 if (err) { 88 pr_err("VFS (%s): error: unaligned partition for dax\n", 89 sb->s_id); 90 return err; 91 } 92 93 dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); 94 if (!dax_dev) { 95 pr_err("VFS (%s): error: device does not support dax\n", 96 sb->s_id); 97 return -EOPNOTSUPP; 98 } 99 100 id = dax_read_lock(); 101 len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); 102 dax_read_unlock(id); 103 104 put_dax(dax_dev); 105 106 if (len < 1) { 107 pr_err("VFS (%s): error: dax access failed (%ld)", 108 sb->s_id, len); 109 return len < 0 ? len : -EIO; 110 } 111 112 return 0; 113 } 114 EXPORT_SYMBOL_GPL(__bdev_dax_supported); 115 116 /** 117 * struct dax_device - anchor object for dax services 118 * @inode: core vfs 119 * @cdev: optional character interface for "device dax" 120 * @host: optional name for lookups where the device path is not available 121 * @private: dax driver private data 122 * @alive: !alive + rcu grace period == no new operations / mappings 123 */ 124 struct dax_device { 125 struct hlist_node list; 126 struct inode inode; 127 struct cdev cdev; 128 const char *host; 129 void *private; 130 bool alive; 131 const struct dax_operations *ops; 132 }; 133 134 /** 135 * dax_direct_access() - translate a device pgoff to an absolute pfn 136 * @dax_dev: a dax_device instance representing the logical memory range 137 * @pgoff: offset in pages from the start of the device to translate 138 * @nr_pages: number of consecutive pages caller can handle relative to @pfn 139 * @kaddr: output parameter that returns a virtual address mapping of pfn 140 * @pfn: output parameter that returns an absolute pfn translation of @pgoff 141 * 142 * Return: negative errno if an error occurs, otherwise the number of 143 * pages accessible at the device relative @pgoff. 144 */ 145 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, 146 void **kaddr, pfn_t *pfn) 147 { 148 long avail; 149 150 /* 151 * The device driver is allowed to sleep, in order to make the 152 * memory directly accessible. 153 */ 154 might_sleep(); 155 156 if (!dax_dev) 157 return -EOPNOTSUPP; 158 159 if (!dax_alive(dax_dev)) 160 return -ENXIO; 161 162 if (nr_pages < 0) 163 return nr_pages; 164 165 avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages, 166 kaddr, pfn); 167 if (!avail) 168 return -ERANGE; 169 return min(avail, nr_pages); 170 } 171 EXPORT_SYMBOL_GPL(dax_direct_access); 172 173 bool dax_alive(struct dax_device *dax_dev) 174 { 175 lockdep_assert_held(&dax_srcu); 176 return dax_dev->alive; 177 } 178 EXPORT_SYMBOL_GPL(dax_alive); 179 180 static int dax_host_hash(const char *host) 181 { 182 return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; 183 } 184 185 /* 186 * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring 187 * that any fault handlers or operations that might have seen 188 * dax_alive(), have completed. Any operations that start after 189 * synchronize_srcu() has run will abort upon seeing !dax_alive(). 190 */ 191 void kill_dax(struct dax_device *dax_dev) 192 { 193 if (!dax_dev) 194 return; 195 196 dax_dev->alive = false; 197 198 synchronize_srcu(&dax_srcu); 199 200 spin_lock(&dax_host_lock); 201 hlist_del_init(&dax_dev->list); 202 spin_unlock(&dax_host_lock); 203 204 dax_dev->private = NULL; 205 } 206 EXPORT_SYMBOL_GPL(kill_dax); 207 208 static struct inode *dax_alloc_inode(struct super_block *sb) 209 { 210 struct dax_device *dax_dev; 211 212 dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); 213 return &dax_dev->inode; 214 } 215 216 static struct dax_device *to_dax_dev(struct inode *inode) 217 { 218 return container_of(inode, struct dax_device, inode); 219 } 220 221 static void dax_i_callback(struct rcu_head *head) 222 { 223 struct inode *inode = container_of(head, struct inode, i_rcu); 224 struct dax_device *dax_dev = to_dax_dev(inode); 225 226 kfree(dax_dev->host); 227 dax_dev->host = NULL; 228 ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev)); 229 kmem_cache_free(dax_cache, dax_dev); 230 } 231 232 static void dax_destroy_inode(struct inode *inode) 233 { 234 struct dax_device *dax_dev = to_dax_dev(inode); 235 236 WARN_ONCE(dax_dev->alive, 237 "kill_dax() must be called before final iput()\n"); 238 call_rcu(&inode->i_rcu, dax_i_callback); 239 } 240 241 static const struct super_operations dax_sops = { 242 .statfs = simple_statfs, 243 .alloc_inode = dax_alloc_inode, 244 .destroy_inode = dax_destroy_inode, 245 .drop_inode = generic_delete_inode, 246 }; 247 248 static struct dentry *dax_mount(struct file_system_type *fs_type, 249 int flags, const char *dev_name, void *data) 250 { 251 return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); 252 } 253 254 static struct file_system_type dax_fs_type = { 255 .name = "dax", 256 .mount = dax_mount, 257 .kill_sb = kill_anon_super, 258 }; 259 260 static int dax_test(struct inode *inode, void *data) 261 { 262 dev_t devt = *(dev_t *) data; 263 264 return inode->i_rdev == devt; 265 } 266 267 static int dax_set(struct inode *inode, void *data) 268 { 269 dev_t devt = *(dev_t *) data; 270 271 inode->i_rdev = devt; 272 return 0; 273 } 274 275 static struct dax_device *dax_dev_get(dev_t devt) 276 { 277 struct dax_device *dax_dev; 278 struct inode *inode; 279 280 inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31), 281 dax_test, dax_set, &devt); 282 283 if (!inode) 284 return NULL; 285 286 dax_dev = to_dax_dev(inode); 287 if (inode->i_state & I_NEW) { 288 dax_dev->alive = true; 289 inode->i_cdev = &dax_dev->cdev; 290 inode->i_mode = S_IFCHR; 291 inode->i_flags = S_DAX; 292 mapping_set_gfp_mask(&inode->i_data, GFP_USER); 293 unlock_new_inode(inode); 294 } 295 296 return dax_dev; 297 } 298 299 static void dax_add_host(struct dax_device *dax_dev, const char *host) 300 { 301 int hash; 302 303 /* 304 * Unconditionally init dax_dev since it's coming from a 305 * non-zeroed slab cache 306 */ 307 INIT_HLIST_NODE(&dax_dev->list); 308 dax_dev->host = host; 309 if (!host) 310 return; 311 312 hash = dax_host_hash(host); 313 spin_lock(&dax_host_lock); 314 hlist_add_head(&dax_dev->list, &dax_host_list[hash]); 315 spin_unlock(&dax_host_lock); 316 } 317 318 struct dax_device *alloc_dax(void *private, const char *__host, 319 const struct dax_operations *ops) 320 { 321 struct dax_device *dax_dev; 322 const char *host; 323 dev_t devt; 324 int minor; 325 326 host = kstrdup(__host, GFP_KERNEL); 327 if (__host && !host) 328 return NULL; 329 330 minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL); 331 if (minor < 0) 332 goto err_minor; 333 334 devt = MKDEV(MAJOR(dax_devt), minor); 335 dax_dev = dax_dev_get(devt); 336 if (!dax_dev) 337 goto err_dev; 338 339 dax_add_host(dax_dev, host); 340 dax_dev->ops = ops; 341 dax_dev->private = private; 342 return dax_dev; 343 344 err_dev: 345 ida_simple_remove(&dax_minor_ida, minor); 346 err_minor: 347 kfree(host); 348 return NULL; 349 } 350 EXPORT_SYMBOL_GPL(alloc_dax); 351 352 void put_dax(struct dax_device *dax_dev) 353 { 354 if (!dax_dev) 355 return; 356 iput(&dax_dev->inode); 357 } 358 EXPORT_SYMBOL_GPL(put_dax); 359 360 /** 361 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax 362 * @host: alternate name for the device registered by a dax driver 363 */ 364 struct dax_device *dax_get_by_host(const char *host) 365 { 366 struct dax_device *dax_dev, *found = NULL; 367 int hash, id; 368 369 if (!host) 370 return NULL; 371 372 hash = dax_host_hash(host); 373 374 id = dax_read_lock(); 375 spin_lock(&dax_host_lock); 376 hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { 377 if (!dax_alive(dax_dev) 378 || strcmp(host, dax_dev->host) != 0) 379 continue; 380 381 if (igrab(&dax_dev->inode)) 382 found = dax_dev; 383 break; 384 } 385 spin_unlock(&dax_host_lock); 386 dax_read_unlock(id); 387 388 return found; 389 } 390 EXPORT_SYMBOL_GPL(dax_get_by_host); 391 392 /** 393 * inode_dax: convert a public inode into its dax_dev 394 * @inode: An inode with i_cdev pointing to a dax_dev 395 * 396 * Note this is not equivalent to to_dax_dev() which is for private 397 * internal use where we know the inode filesystem type == dax_fs_type. 398 */ 399 struct dax_device *inode_dax(struct inode *inode) 400 { 401 struct cdev *cdev = inode->i_cdev; 402 403 return container_of(cdev, struct dax_device, cdev); 404 } 405 EXPORT_SYMBOL_GPL(inode_dax); 406 407 struct inode *dax_inode(struct dax_device *dax_dev) 408 { 409 return &dax_dev->inode; 410 } 411 EXPORT_SYMBOL_GPL(dax_inode); 412 413 void *dax_get_private(struct dax_device *dax_dev) 414 { 415 return dax_dev->private; 416 } 417 EXPORT_SYMBOL_GPL(dax_get_private); 418 419 static void init_once(void *_dax_dev) 420 { 421 struct dax_device *dax_dev = _dax_dev; 422 struct inode *inode = &dax_dev->inode; 423 424 inode_init_once(inode); 425 } 426 427 static int __dax_fs_init(void) 428 { 429 int rc; 430 431 dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0, 432 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 433 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 434 init_once); 435 if (!dax_cache) 436 return -ENOMEM; 437 438 rc = register_filesystem(&dax_fs_type); 439 if (rc) 440 goto err_register_fs; 441 442 dax_mnt = kern_mount(&dax_fs_type); 443 if (IS_ERR(dax_mnt)) { 444 rc = PTR_ERR(dax_mnt); 445 goto err_mount; 446 } 447 dax_superblock = dax_mnt->mnt_sb; 448 449 return 0; 450 451 err_mount: 452 unregister_filesystem(&dax_fs_type); 453 err_register_fs: 454 kmem_cache_destroy(dax_cache); 455 456 return rc; 457 } 458 459 static void __dax_fs_exit(void) 460 { 461 kern_unmount(dax_mnt); 462 unregister_filesystem(&dax_fs_type); 463 kmem_cache_destroy(dax_cache); 464 } 465 466 static int __init dax_fs_init(void) 467 { 468 int rc; 469 470 rc = __dax_fs_init(); 471 if (rc) 472 return rc; 473 474 rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax"); 475 if (rc) 476 __dax_fs_exit(); 477 return rc; 478 } 479 480 static void __exit dax_fs_exit(void) 481 { 482 unregister_chrdev_region(dax_devt, MINORMASK+1); 483 ida_destroy(&dax_minor_ida); 484 __dax_fs_exit(); 485 } 486 487 MODULE_AUTHOR("Intel Corporation"); 488 MODULE_LICENSE("GPL v2"); 489 subsys_initcall(dax_fs_init); 490 module_exit(dax_fs_exit); 491