1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * dax: direct host memory access 4 * Copyright (C) 2020 Red Hat, Inc. 5 */ 6 7 #include "fuse_i.h" 8 9 #include <linux/dax.h> 10 #include <linux/uio.h> 11 #include <linux/pfn_t.h> 12 #include <linux/iomap.h> 13 #include <linux/interval_tree.h> 14 15 /* 16 * Default memory range size. A power of 2 so it agrees with common FUSE_INIT 17 * map_alignment values 4KB and 64KB. 18 */ 19 #define FUSE_DAX_SHIFT 21 20 #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) 21 #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) 22 23 /** Translation information for file offsets to DAX window offsets */ 24 struct fuse_dax_mapping { 25 /* Will connect in fcd->free_ranges to keep track of free memory */ 26 struct list_head list; 27 28 /* For interval tree in file/inode */ 29 struct interval_tree_node itn; 30 31 /** Position in DAX window */ 32 u64 window_offset; 33 34 /** Length of mapping, in bytes */ 35 loff_t length; 36 37 /* Is this mapping read-only or read-write */ 38 bool writable; 39 }; 40 41 /* Per-inode dax map */ 42 struct fuse_inode_dax { 43 /* Semaphore to protect modifications to the dmap tree */ 44 struct rw_semaphore sem; 45 46 /* Sorted rb tree of struct fuse_dax_mapping elements */ 47 struct rb_root_cached tree; 48 unsigned long nr; 49 }; 50 51 struct fuse_conn_dax { 52 /* DAX device */ 53 struct dax_device *dev; 54 55 /* Lock protecting accessess to members of this structure */ 56 spinlock_t lock; 57 58 /* DAX Window Free Ranges */ 59 long nr_free_ranges; 60 struct list_head free_ranges; 61 }; 62 63 static inline struct fuse_dax_mapping * 64 node_to_dmap(struct interval_tree_node *node) 65 { 66 if (!node) 67 return NULL; 68 69 return container_of(node, struct fuse_dax_mapping, itn); 70 } 71 72 static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) 73 { 74 struct fuse_dax_mapping *dmap; 75 76 spin_lock(&fcd->lock); 77 dmap = list_first_entry_or_null(&fcd->free_ranges, 78 struct fuse_dax_mapping, list); 79 if (dmap) { 80 list_del_init(&dmap->list); 81 WARN_ON(fcd->nr_free_ranges <= 0); 82 fcd->nr_free_ranges--; 83 } 84 spin_unlock(&fcd->lock); 85 return dmap; 86 } 87 88 /* This assumes fcd->lock is held */ 89 static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 90 struct fuse_dax_mapping *dmap) 91 { 92 list_add_tail(&dmap->list, &fcd->free_ranges); 93 fcd->nr_free_ranges++; 94 } 95 96 static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 97 struct fuse_dax_mapping *dmap) 98 { 99 /* Return fuse_dax_mapping to free list */ 100 spin_lock(&fcd->lock); 101 __dmap_add_to_free_pool(fcd, dmap); 102 spin_unlock(&fcd->lock); 103 } 104 105 static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, 106 struct fuse_dax_mapping *dmap, bool writable, 107 bool upgrade) 108 { 109 struct fuse_conn *fc = get_fuse_conn(inode); 110 struct fuse_conn_dax *fcd = fc->dax; 111 struct fuse_inode *fi = get_fuse_inode(inode); 112 struct fuse_setupmapping_in inarg; 113 loff_t offset = start_idx << FUSE_DAX_SHIFT; 114 FUSE_ARGS(args); 115 ssize_t err; 116 117 WARN_ON(fcd->nr_free_ranges < 0); 118 119 /* Ask fuse daemon to setup mapping */ 120 memset(&inarg, 0, sizeof(inarg)); 121 inarg.foffset = offset; 122 inarg.fh = -1; 123 inarg.moffset = dmap->window_offset; 124 inarg.len = FUSE_DAX_SZ; 125 inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; 126 if (writable) 127 inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; 128 args.opcode = FUSE_SETUPMAPPING; 129 args.nodeid = fi->nodeid; 130 args.in_numargs = 1; 131 args.in_args[0].size = sizeof(inarg); 132 args.in_args[0].value = &inarg; 133 err = fuse_simple_request(fc, &args); 134 if (err < 0) 135 return err; 136 dmap->writable = writable; 137 if (!upgrade) { 138 dmap->itn.start = dmap->itn.last = start_idx; 139 /* Protected by fi->dax->sem */ 140 interval_tree_insert(&dmap->itn, &fi->dax->tree); 141 fi->dax->nr++; 142 } 143 return 0; 144 } 145 146 static int fuse_send_removemapping(struct inode *inode, 147 struct fuse_removemapping_in *inargp, 148 struct fuse_removemapping_one *remove_one) 149 { 150 struct fuse_inode *fi = get_fuse_inode(inode); 151 struct fuse_conn *fc = get_fuse_conn(inode); 152 FUSE_ARGS(args); 153 154 args.opcode = FUSE_REMOVEMAPPING; 155 args.nodeid = fi->nodeid; 156 args.in_numargs = 2; 157 args.in_args[0].size = sizeof(*inargp); 158 args.in_args[0].value = inargp; 159 args.in_args[1].size = inargp->count * sizeof(*remove_one); 160 args.in_args[1].value = remove_one; 161 return fuse_simple_request(fc, &args); 162 } 163 164 static int dmap_removemapping_list(struct inode *inode, unsigned int num, 165 struct list_head *to_remove) 166 { 167 struct fuse_removemapping_one *remove_one, *ptr; 168 struct fuse_removemapping_in inarg; 169 struct fuse_dax_mapping *dmap; 170 int ret, i = 0, nr_alloc; 171 172 nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); 173 remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS); 174 if (!remove_one) 175 return -ENOMEM; 176 177 ptr = remove_one; 178 list_for_each_entry(dmap, to_remove, list) { 179 ptr->moffset = dmap->window_offset; 180 ptr->len = dmap->length; 181 ptr++; 182 i++; 183 num--; 184 if (i >= nr_alloc || num == 0) { 185 memset(&inarg, 0, sizeof(inarg)); 186 inarg.count = i; 187 ret = fuse_send_removemapping(inode, &inarg, 188 remove_one); 189 if (ret) 190 goto out; 191 ptr = remove_one; 192 i = 0; 193 } 194 } 195 out: 196 kfree(remove_one); 197 return ret; 198 } 199 200 /* 201 * Cleanup dmap entry and add back to free list. This should be called with 202 * fcd->lock held. 203 */ 204 static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, 205 struct fuse_dax_mapping *dmap) 206 { 207 pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", 208 dmap->itn.start, dmap->itn.last, dmap->window_offset, 209 dmap->length); 210 dmap->itn.start = dmap->itn.last = 0; 211 __dmap_add_to_free_pool(fcd, dmap); 212 } 213 214 /* 215 * Free inode dmap entries whose range falls inside [start, end]. 216 * Does not take any locks. At this point of time it should only be 217 * called from evict_inode() path where we know all dmap entries can be 218 * reclaimed. 219 */ 220 static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, 221 struct inode *inode, 222 loff_t start, loff_t end) 223 { 224 struct fuse_inode *fi = get_fuse_inode(inode); 225 struct fuse_dax_mapping *dmap, *n; 226 int err, num = 0; 227 LIST_HEAD(to_remove); 228 unsigned long start_idx = start >> FUSE_DAX_SHIFT; 229 unsigned long end_idx = end >> FUSE_DAX_SHIFT; 230 struct interval_tree_node *node; 231 232 while (1) { 233 node = interval_tree_iter_first(&fi->dax->tree, start_idx, 234 end_idx); 235 if (!node) 236 break; 237 dmap = node_to_dmap(node); 238 interval_tree_remove(&dmap->itn, &fi->dax->tree); 239 num++; 240 list_add(&dmap->list, &to_remove); 241 } 242 243 /* Nothing to remove */ 244 if (list_empty(&to_remove)) 245 return; 246 247 WARN_ON(fi->dax->nr < num); 248 fi->dax->nr -= num; 249 err = dmap_removemapping_list(inode, num, &to_remove); 250 if (err && err != -ENOTCONN) { 251 pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", 252 start, end); 253 } 254 spin_lock(&fcd->lock); 255 list_for_each_entry_safe(dmap, n, &to_remove, list) { 256 list_del_init(&dmap->list); 257 dmap_reinit_add_to_free_pool(fcd, dmap); 258 } 259 spin_unlock(&fcd->lock); 260 } 261 262 /* 263 * It is called from evict_inode() and by that time inode is going away. So 264 * this function does not take any locks like fi->dax->sem for traversing 265 * that fuse inode interval tree. If that lock is taken then lock validator 266 * complains of deadlock situation w.r.t fs_reclaim lock. 267 */ 268 void fuse_dax_inode_cleanup(struct inode *inode) 269 { 270 struct fuse_conn *fc = get_fuse_conn(inode); 271 struct fuse_inode *fi = get_fuse_inode(inode); 272 273 /* 274 * fuse_evict_inode() has already called truncate_inode_pages_final() 275 * before we arrive here. So we should not have to worry about any 276 * pages/exception entries still associated with inode. 277 */ 278 inode_reclaim_dmap_range(fc->dax, inode, 0, -1); 279 WARN_ON(fi->dax->nr); 280 } 281 282 static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) 283 { 284 iomap->addr = IOMAP_NULL_ADDR; 285 iomap->length = length; 286 iomap->type = IOMAP_HOLE; 287 } 288 289 static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, 290 struct iomap *iomap, struct fuse_dax_mapping *dmap, 291 unsigned int flags) 292 { 293 loff_t offset, len; 294 loff_t i_size = i_size_read(inode); 295 296 offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); 297 len = min(length, dmap->length - offset); 298 299 /* If length is beyond end of file, truncate further */ 300 if (pos + len > i_size) 301 len = i_size - pos; 302 303 if (len > 0) { 304 iomap->addr = dmap->window_offset + offset; 305 iomap->length = len; 306 if (flags & IOMAP_FAULT) 307 iomap->length = ALIGN(len, PAGE_SIZE); 308 iomap->type = IOMAP_MAPPED; 309 } else { 310 /* Mapping beyond end of file is hole */ 311 fuse_fill_iomap_hole(iomap, length); 312 } 313 } 314 315 static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, 316 loff_t length, unsigned int flags, 317 struct iomap *iomap) 318 { 319 struct fuse_inode *fi = get_fuse_inode(inode); 320 struct fuse_conn *fc = get_fuse_conn(inode); 321 struct fuse_conn_dax *fcd = fc->dax; 322 struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; 323 int ret; 324 bool writable = flags & IOMAP_WRITE; 325 unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 326 struct interval_tree_node *node; 327 328 alloc_dmap = alloc_dax_mapping(fcd); 329 if (!alloc_dmap) 330 return -EIO; 331 332 /* 333 * Take write lock so that only one caller can try to setup mapping 334 * and other waits. 335 */ 336 down_write(&fi->dax->sem); 337 /* 338 * We dropped lock. Check again if somebody else setup 339 * mapping already. 340 */ 341 node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 342 if (node) { 343 dmap = node_to_dmap(node); 344 fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 345 dmap_add_to_free_pool(fcd, alloc_dmap); 346 up_write(&fi->dax->sem); 347 return 0; 348 } 349 350 /* Setup one mapping */ 351 ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, 352 writable, false); 353 if (ret < 0) { 354 dmap_add_to_free_pool(fcd, alloc_dmap); 355 up_write(&fi->dax->sem); 356 return ret; 357 } 358 fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); 359 up_write(&fi->dax->sem); 360 return 0; 361 } 362 363 static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, 364 loff_t length, unsigned int flags, 365 struct iomap *iomap) 366 { 367 struct fuse_inode *fi = get_fuse_inode(inode); 368 struct fuse_dax_mapping *dmap; 369 int ret; 370 unsigned long idx = pos >> FUSE_DAX_SHIFT; 371 struct interval_tree_node *node; 372 373 /* 374 * Take exclusive lock so that only one caller can try to setup 375 * mapping and others wait. 376 */ 377 down_write(&fi->dax->sem); 378 node = interval_tree_iter_first(&fi->dax->tree, idx, idx); 379 380 /* We are holding either inode lock or i_mmap_sem, and that should 381 * ensure that dmap can't reclaimed or truncated and it should still 382 * be there in tree despite the fact we dropped and re-acquired the 383 * lock. 384 */ 385 ret = -EIO; 386 if (WARN_ON(!node)) 387 goto out_err; 388 389 dmap = node_to_dmap(node); 390 391 /* Maybe another thread already upgraded mapping while we were not 392 * holding lock. 393 */ 394 if (dmap->writable) { 395 ret = 0; 396 goto out_fill_iomap; 397 } 398 399 ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, 400 true); 401 if (ret < 0) 402 goto out_err; 403 out_fill_iomap: 404 fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 405 out_err: 406 up_write(&fi->dax->sem); 407 return ret; 408 } 409 410 /* This is just for DAX and the mapping is ephemeral, do not use it for other 411 * purposes since there is no block device with a permanent mapping. 412 */ 413 static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, 414 unsigned int flags, struct iomap *iomap, 415 struct iomap *srcmap) 416 { 417 struct fuse_inode *fi = get_fuse_inode(inode); 418 struct fuse_conn *fc = get_fuse_conn(inode); 419 struct fuse_dax_mapping *dmap; 420 bool writable = flags & IOMAP_WRITE; 421 unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 422 struct interval_tree_node *node; 423 424 /* We don't support FIEMAP */ 425 if (WARN_ON(flags & IOMAP_REPORT)) 426 return -EIO; 427 428 iomap->offset = pos; 429 iomap->flags = 0; 430 iomap->bdev = NULL; 431 iomap->dax_dev = fc->dax->dev; 432 433 /* 434 * Both read/write and mmap path can race here. So we need something 435 * to make sure if we are setting up mapping, then other path waits 436 * 437 * For now, use a semaphore for this. It probably needs to be 438 * optimized later. 439 */ 440 down_read(&fi->dax->sem); 441 node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 442 if (node) { 443 dmap = node_to_dmap(node); 444 if (writable && !dmap->writable) { 445 /* Upgrade read-only mapping to read-write. This will 446 * require exclusive fi->dax->sem lock as we don't want 447 * two threads to be trying to this simultaneously 448 * for same dmap. So drop shared lock and acquire 449 * exclusive lock. 450 */ 451 up_read(&fi->dax->sem); 452 pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", 453 __func__, pos, length); 454 return fuse_upgrade_dax_mapping(inode, pos, length, 455 flags, iomap); 456 } else { 457 fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 458 up_read(&fi->dax->sem); 459 return 0; 460 } 461 } else { 462 up_read(&fi->dax->sem); 463 pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", 464 __func__, pos, length); 465 if (pos >= i_size_read(inode)) 466 goto iomap_hole; 467 468 return fuse_setup_new_dax_mapping(inode, pos, length, flags, 469 iomap); 470 } 471 472 /* 473 * If read beyond end of file happnes, fs code seems to return 474 * it as hole 475 */ 476 iomap_hole: 477 fuse_fill_iomap_hole(iomap, length); 478 pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", 479 __func__, pos, length, iomap->length); 480 return 0; 481 } 482 483 static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, 484 ssize_t written, unsigned int flags, 485 struct iomap *iomap) 486 { 487 /* DAX writes beyond end-of-file aren't handled using iomap, so the 488 * file size is unchanged and there is nothing to do here. 489 */ 490 return 0; 491 } 492 493 static const struct iomap_ops fuse_iomap_ops = { 494 .iomap_begin = fuse_iomap_begin, 495 .iomap_end = fuse_iomap_end, 496 }; 497 498 ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) 499 { 500 struct inode *inode = file_inode(iocb->ki_filp); 501 ssize_t ret; 502 503 if (iocb->ki_flags & IOCB_NOWAIT) { 504 if (!inode_trylock_shared(inode)) 505 return -EAGAIN; 506 } else { 507 inode_lock_shared(inode); 508 } 509 510 ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); 511 inode_unlock_shared(inode); 512 513 /* TODO file_accessed(iocb->f_filp) */ 514 return ret; 515 } 516 517 static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) 518 { 519 struct inode *inode = file_inode(iocb->ki_filp); 520 521 return (iov_iter_rw(from) == WRITE && 522 ((iocb->ki_pos) >= i_size_read(inode) || 523 (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); 524 } 525 526 static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) 527 { 528 struct inode *inode = file_inode(iocb->ki_filp); 529 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 530 ssize_t ret; 531 532 ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); 533 if (ret < 0) 534 return ret; 535 536 fuse_invalidate_attr(inode); 537 fuse_write_update_size(inode, iocb->ki_pos); 538 return ret; 539 } 540 541 ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) 542 { 543 struct inode *inode = file_inode(iocb->ki_filp); 544 ssize_t ret; 545 546 if (iocb->ki_flags & IOCB_NOWAIT) { 547 if (!inode_trylock(inode)) 548 return -EAGAIN; 549 } else { 550 inode_lock(inode); 551 } 552 553 ret = generic_write_checks(iocb, from); 554 if (ret <= 0) 555 goto out; 556 557 ret = file_remove_privs(iocb->ki_filp); 558 if (ret) 559 goto out; 560 /* TODO file_update_time() but we don't want metadata I/O */ 561 562 /* Do not use dax for file extending writes as write and on 563 * disk i_size increase are not atomic otherwise. 564 */ 565 if (file_extending_write(iocb, from)) 566 ret = fuse_dax_direct_write(iocb, from); 567 else 568 ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); 569 570 out: 571 inode_unlock(inode); 572 573 if (ret > 0) 574 ret = generic_write_sync(iocb, ret); 575 return ret; 576 } 577 578 static int fuse_dax_writepages(struct address_space *mapping, 579 struct writeback_control *wbc) 580 { 581 582 struct inode *inode = mapping->host; 583 struct fuse_conn *fc = get_fuse_conn(inode); 584 585 return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); 586 } 587 588 static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, 589 enum page_entry_size pe_size, bool write) 590 { 591 vm_fault_t ret; 592 struct inode *inode = file_inode(vmf->vma->vm_file); 593 struct super_block *sb = inode->i_sb; 594 pfn_t pfn; 595 596 if (write) 597 sb_start_pagefault(sb); 598 599 ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &fuse_iomap_ops); 600 601 if (ret & VM_FAULT_NEEDDSYNC) 602 ret = dax_finish_sync_fault(vmf, pe_size, pfn); 603 604 if (write) 605 sb_end_pagefault(sb); 606 607 return ret; 608 } 609 610 static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) 611 { 612 return __fuse_dax_fault(vmf, PE_SIZE_PTE, 613 vmf->flags & FAULT_FLAG_WRITE); 614 } 615 616 static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, 617 enum page_entry_size pe_size) 618 { 619 return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE); 620 } 621 622 static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) 623 { 624 return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 625 } 626 627 static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) 628 { 629 return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 630 } 631 632 static const struct vm_operations_struct fuse_dax_vm_ops = { 633 .fault = fuse_dax_fault, 634 .huge_fault = fuse_dax_huge_fault, 635 .page_mkwrite = fuse_dax_page_mkwrite, 636 .pfn_mkwrite = fuse_dax_pfn_mkwrite, 637 }; 638 639 int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) 640 { 641 file_accessed(file); 642 vma->vm_ops = &fuse_dax_vm_ops; 643 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 644 return 0; 645 } 646 647 static void fuse_free_dax_mem_ranges(struct list_head *mem_list) 648 { 649 struct fuse_dax_mapping *range, *temp; 650 651 /* Free All allocated elements */ 652 list_for_each_entry_safe(range, temp, mem_list, list) { 653 list_del(&range->list); 654 kfree(range); 655 } 656 } 657 658 void fuse_dax_conn_free(struct fuse_conn *fc) 659 { 660 if (fc->dax) { 661 fuse_free_dax_mem_ranges(&fc->dax->free_ranges); 662 kfree(fc->dax); 663 } 664 } 665 666 static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) 667 { 668 long nr_pages, nr_ranges; 669 void *kaddr; 670 pfn_t pfn; 671 struct fuse_dax_mapping *range; 672 int ret, id; 673 size_t dax_size = -1; 674 unsigned long i; 675 676 INIT_LIST_HEAD(&fcd->free_ranges); 677 id = dax_read_lock(); 678 nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr, 679 &pfn); 680 dax_read_unlock(id); 681 if (nr_pages < 0) { 682 pr_debug("dax_direct_access() returned %ld\n", nr_pages); 683 return nr_pages; 684 } 685 686 nr_ranges = nr_pages/FUSE_DAX_PAGES; 687 pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", 688 __func__, nr_pages, nr_ranges); 689 690 for (i = 0; i < nr_ranges; i++) { 691 range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL); 692 ret = -ENOMEM; 693 if (!range) 694 goto out_err; 695 696 /* TODO: This offset only works if virtio-fs driver is not 697 * having some memory hidden at the beginning. This needs 698 * better handling 699 */ 700 range->window_offset = i * FUSE_DAX_SZ; 701 range->length = FUSE_DAX_SZ; 702 list_add_tail(&range->list, &fcd->free_ranges); 703 } 704 705 fcd->nr_free_ranges = nr_ranges; 706 return 0; 707 out_err: 708 /* Free All allocated elements */ 709 fuse_free_dax_mem_ranges(&fcd->free_ranges); 710 return ret; 711 } 712 713 int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev) 714 { 715 struct fuse_conn_dax *fcd; 716 int err; 717 718 if (!dax_dev) 719 return 0; 720 721 fcd = kzalloc(sizeof(*fcd), GFP_KERNEL); 722 if (!fcd) 723 return -ENOMEM; 724 725 spin_lock_init(&fcd->lock); 726 fcd->dev = dax_dev; 727 err = fuse_dax_mem_range_init(fcd); 728 if (err) { 729 kfree(fcd); 730 return err; 731 } 732 733 fc->dax = fcd; 734 return 0; 735 } 736 737 bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) 738 { 739 struct fuse_conn *fc = get_fuse_conn_super(sb); 740 741 fi->dax = NULL; 742 if (fc->dax) { 743 fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); 744 if (!fi->dax) 745 return false; 746 747 init_rwsem(&fi->dax->sem); 748 fi->dax->tree = RB_ROOT_CACHED; 749 } 750 751 return true; 752 } 753 754 static const struct address_space_operations fuse_dax_file_aops = { 755 .writepages = fuse_dax_writepages, 756 .direct_IO = noop_direct_IO, 757 .set_page_dirty = noop_set_page_dirty, 758 .invalidatepage = noop_invalidatepage, 759 }; 760 761 void fuse_dax_inode_init(struct inode *inode) 762 { 763 struct fuse_conn *fc = get_fuse_conn(inode); 764 765 if (!fc->dax) 766 return; 767 768 inode->i_flags |= S_DAX; 769 inode->i_data.a_ops = &fuse_dax_file_aops; 770 } 771 772 bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) 773 { 774 if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { 775 pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", 776 map_alignment, FUSE_DAX_SZ); 777 return false; 778 } 779 return true; 780 } 781