1 /* 2 * Copyright(c) 2017 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #include <linux/pagemap.h> 14 #include <linux/module.h> 15 #include <linux/mount.h> 16 #include <linux/magic.h> 17 #include <linux/cdev.h> 18 #include <linux/hash.h> 19 #include <linux/slab.h> 20 #include <linux/dax.h> 21 #include <linux/fs.h> 22 23 static int nr_dax = CONFIG_NR_DEV_DAX; 24 module_param(nr_dax, int, S_IRUGO); 25 MODULE_PARM_DESC(nr_dax, "max number of dax device instances"); 26 27 static dev_t dax_devt; 28 DEFINE_STATIC_SRCU(dax_srcu); 29 static struct vfsmount *dax_mnt; 30 static DEFINE_IDA(dax_minor_ida); 31 static struct kmem_cache *dax_cache __read_mostly; 32 static struct super_block *dax_superblock __read_mostly; 33 34 #define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head)) 35 static struct hlist_head dax_host_list[DAX_HASH_SIZE]; 36 static DEFINE_SPINLOCK(dax_host_lock); 37 38 int dax_read_lock(void) 39 { 40 return srcu_read_lock(&dax_srcu); 41 } 42 EXPORT_SYMBOL_GPL(dax_read_lock); 43 44 void dax_read_unlock(int id) 45 { 46 srcu_read_unlock(&dax_srcu, id); 47 } 48 EXPORT_SYMBOL_GPL(dax_read_unlock); 49 50 /** 51 * struct dax_device - anchor object for dax services 52 * @inode: core vfs 53 * @cdev: optional character interface for "device dax" 54 * @host: optional name for lookups where the device path is not available 55 * @private: dax driver private data 56 * @alive: !alive + rcu grace period == no new operations / mappings 57 */ 58 struct dax_device { 59 struct hlist_node list; 60 struct inode inode; 61 struct cdev cdev; 62 const char *host; 63 void *private; 64 bool alive; 65 const struct dax_operations *ops; 66 }; 67 68 /** 69 * dax_direct_access() - translate a device pgoff to an absolute pfn 70 * @dax_dev: a dax_device instance representing the logical memory range 71 * @pgoff: offset in pages from the start of the device to translate 72 * @nr_pages: number of consecutive pages caller can handle relative to @pfn 73 * @kaddr: output parameter that returns a virtual address mapping of pfn 74 * @pfn: output parameter that returns an absolute pfn translation of @pgoff 75 * 76 * Return: negative errno if an error occurs, otherwise the number of 77 * pages accessible at the device relative @pgoff. 78 */ 79 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, 80 void **kaddr, pfn_t *pfn) 81 { 82 long avail; 83 84 /* 85 * The device driver is allowed to sleep, in order to make the 86 * memory directly accessible. 87 */ 88 might_sleep(); 89 90 if (!dax_dev) 91 return -EOPNOTSUPP; 92 93 if (!dax_alive(dax_dev)) 94 return -ENXIO; 95 96 if (nr_pages < 0) 97 return nr_pages; 98 99 avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages, 100 kaddr, pfn); 101 if (!avail) 102 return -ERANGE; 103 return min(avail, nr_pages); 104 } 105 EXPORT_SYMBOL_GPL(dax_direct_access); 106 107 bool dax_alive(struct dax_device *dax_dev) 108 { 109 lockdep_assert_held(&dax_srcu); 110 return dax_dev->alive; 111 } 112 EXPORT_SYMBOL_GPL(dax_alive); 113 114 static int dax_host_hash(const char *host) 115 { 116 return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; 117 } 118 119 /* 120 * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring 121 * that any fault handlers or operations that might have seen 122 * dax_alive(), have completed. Any operations that start after 123 * synchronize_srcu() has run will abort upon seeing !dax_alive(). 124 */ 125 void kill_dax(struct dax_device *dax_dev) 126 { 127 if (!dax_dev) 128 return; 129 130 dax_dev->alive = false; 131 132 synchronize_srcu(&dax_srcu); 133 134 spin_lock(&dax_host_lock); 135 hlist_del_init(&dax_dev->list); 136 spin_unlock(&dax_host_lock); 137 138 dax_dev->private = NULL; 139 } 140 EXPORT_SYMBOL_GPL(kill_dax); 141 142 static struct inode *dax_alloc_inode(struct super_block *sb) 143 { 144 struct dax_device *dax_dev; 145 146 dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); 147 return &dax_dev->inode; 148 } 149 150 static struct dax_device *to_dax_dev(struct inode *inode) 151 { 152 return container_of(inode, struct dax_device, inode); 153 } 154 155 static void dax_i_callback(struct rcu_head *head) 156 { 157 struct inode *inode = container_of(head, struct inode, i_rcu); 158 struct dax_device *dax_dev = to_dax_dev(inode); 159 160 kfree(dax_dev->host); 161 dax_dev->host = NULL; 162 ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev)); 163 kmem_cache_free(dax_cache, dax_dev); 164 } 165 166 static void dax_destroy_inode(struct inode *inode) 167 { 168 struct dax_device *dax_dev = to_dax_dev(inode); 169 170 WARN_ONCE(dax_dev->alive, 171 "kill_dax() must be called before final iput()\n"); 172 call_rcu(&inode->i_rcu, dax_i_callback); 173 } 174 175 static const struct super_operations dax_sops = { 176 .statfs = simple_statfs, 177 .alloc_inode = dax_alloc_inode, 178 .destroy_inode = dax_destroy_inode, 179 .drop_inode = generic_delete_inode, 180 }; 181 182 static struct dentry *dax_mount(struct file_system_type *fs_type, 183 int flags, const char *dev_name, void *data) 184 { 185 return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); 186 } 187 188 static struct file_system_type dax_fs_type = { 189 .name = "dax", 190 .mount = dax_mount, 191 .kill_sb = kill_anon_super, 192 }; 193 194 static int dax_test(struct inode *inode, void *data) 195 { 196 dev_t devt = *(dev_t *) data; 197 198 return inode->i_rdev == devt; 199 } 200 201 static int dax_set(struct inode *inode, void *data) 202 { 203 dev_t devt = *(dev_t *) data; 204 205 inode->i_rdev = devt; 206 return 0; 207 } 208 209 static struct dax_device *dax_dev_get(dev_t devt) 210 { 211 struct dax_device *dax_dev; 212 struct inode *inode; 213 214 inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31), 215 dax_test, dax_set, &devt); 216 217 if (!inode) 218 return NULL; 219 220 dax_dev = to_dax_dev(inode); 221 if (inode->i_state & I_NEW) { 222 dax_dev->alive = true; 223 inode->i_cdev = &dax_dev->cdev; 224 inode->i_mode = S_IFCHR; 225 inode->i_flags = S_DAX; 226 mapping_set_gfp_mask(&inode->i_data, GFP_USER); 227 unlock_new_inode(inode); 228 } 229 230 return dax_dev; 231 } 232 233 static void dax_add_host(struct dax_device *dax_dev, const char *host) 234 { 235 int hash; 236 237 /* 238 * Unconditionally init dax_dev since it's coming from a 239 * non-zeroed slab cache 240 */ 241 INIT_HLIST_NODE(&dax_dev->list); 242 dax_dev->host = host; 243 if (!host) 244 return; 245 246 hash = dax_host_hash(host); 247 spin_lock(&dax_host_lock); 248 hlist_add_head(&dax_dev->list, &dax_host_list[hash]); 249 spin_unlock(&dax_host_lock); 250 } 251 252 struct dax_device *alloc_dax(void *private, const char *__host, 253 const struct dax_operations *ops) 254 { 255 struct dax_device *dax_dev; 256 const char *host; 257 dev_t devt; 258 int minor; 259 260 host = kstrdup(__host, GFP_KERNEL); 261 if (__host && !host) 262 return NULL; 263 264 minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL); 265 if (minor < 0) 266 goto err_minor; 267 268 devt = MKDEV(MAJOR(dax_devt), minor); 269 dax_dev = dax_dev_get(devt); 270 if (!dax_dev) 271 goto err_dev; 272 273 dax_add_host(dax_dev, host); 274 dax_dev->ops = ops; 275 dax_dev->private = private; 276 return dax_dev; 277 278 err_dev: 279 ida_simple_remove(&dax_minor_ida, minor); 280 err_minor: 281 kfree(host); 282 return NULL; 283 } 284 EXPORT_SYMBOL_GPL(alloc_dax); 285 286 void put_dax(struct dax_device *dax_dev) 287 { 288 if (!dax_dev) 289 return; 290 iput(&dax_dev->inode); 291 } 292 EXPORT_SYMBOL_GPL(put_dax); 293 294 /** 295 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax 296 * @host: alternate name for the device registered by a dax driver 297 */ 298 struct dax_device *dax_get_by_host(const char *host) 299 { 300 struct dax_device *dax_dev, *found = NULL; 301 int hash, id; 302 303 if (!host) 304 return NULL; 305 306 hash = dax_host_hash(host); 307 308 id = dax_read_lock(); 309 spin_lock(&dax_host_lock); 310 hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { 311 if (!dax_alive(dax_dev) 312 || strcmp(host, dax_dev->host) != 0) 313 continue; 314 315 if (igrab(&dax_dev->inode)) 316 found = dax_dev; 317 break; 318 } 319 spin_unlock(&dax_host_lock); 320 dax_read_unlock(id); 321 322 return found; 323 } 324 EXPORT_SYMBOL_GPL(dax_get_by_host); 325 326 /** 327 * inode_dax: convert a public inode into its dax_dev 328 * @inode: An inode with i_cdev pointing to a dax_dev 329 * 330 * Note this is not equivalent to to_dax_dev() which is for private 331 * internal use where we know the inode filesystem type == dax_fs_type. 332 */ 333 struct dax_device *inode_dax(struct inode *inode) 334 { 335 struct cdev *cdev = inode->i_cdev; 336 337 return container_of(cdev, struct dax_device, cdev); 338 } 339 EXPORT_SYMBOL_GPL(inode_dax); 340 341 struct inode *dax_inode(struct dax_device *dax_dev) 342 { 343 return &dax_dev->inode; 344 } 345 EXPORT_SYMBOL_GPL(dax_inode); 346 347 void *dax_get_private(struct dax_device *dax_dev) 348 { 349 return dax_dev->private; 350 } 351 EXPORT_SYMBOL_GPL(dax_get_private); 352 353 static void init_once(void *_dax_dev) 354 { 355 struct dax_device *dax_dev = _dax_dev; 356 struct inode *inode = &dax_dev->inode; 357 358 inode_init_once(inode); 359 } 360 361 static int __dax_fs_init(void) 362 { 363 int rc; 364 365 dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0, 366 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 367 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 368 init_once); 369 if (!dax_cache) 370 return -ENOMEM; 371 372 rc = register_filesystem(&dax_fs_type); 373 if (rc) 374 goto err_register_fs; 375 376 dax_mnt = kern_mount(&dax_fs_type); 377 if (IS_ERR(dax_mnt)) { 378 rc = PTR_ERR(dax_mnt); 379 goto err_mount; 380 } 381 dax_superblock = dax_mnt->mnt_sb; 382 383 return 0; 384 385 err_mount: 386 unregister_filesystem(&dax_fs_type); 387 err_register_fs: 388 kmem_cache_destroy(dax_cache); 389 390 return rc; 391 } 392 393 static void __dax_fs_exit(void) 394 { 395 kern_unmount(dax_mnt); 396 unregister_filesystem(&dax_fs_type); 397 kmem_cache_destroy(dax_cache); 398 } 399 400 static int __init dax_fs_init(void) 401 { 402 int rc; 403 404 rc = __dax_fs_init(); 405 if (rc) 406 return rc; 407 408 nr_dax = max(nr_dax, 256); 409 rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax"); 410 if (rc) 411 __dax_fs_exit(); 412 return rc; 413 } 414 415 static void __exit dax_fs_exit(void) 416 { 417 unregister_chrdev_region(dax_devt, nr_dax); 418 ida_destroy(&dax_minor_ida); 419 __dax_fs_exit(); 420 } 421 422 MODULE_AUTHOR("Intel Corporation"); 423 MODULE_LICENSE("GPL v2"); 424 subsys_initcall(dax_fs_init); 425 module_exit(dax_fs_exit); 426