xref: /openbmc/linux/fs/fuse/dax.c (revision c2d0ad00)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * dax: direct host memory access
4  * Copyright (C) 2020 Red Hat, Inc.
5  */
6 
7 #include "fuse_i.h"
8 
9 #include <linux/dax.h>
10 #include <linux/uio.h>
11 #include <linux/pfn_t.h>
12 #include <linux/iomap.h>
13 #include <linux/interval_tree.h>
14 
15 /*
16  * Default memory range size.  A power of 2 so it agrees with common FUSE_INIT
17  * map_alignment values 4KB and 64KB.
18  */
19 #define FUSE_DAX_SHIFT	21
20 #define FUSE_DAX_SZ	(1 << FUSE_DAX_SHIFT)
21 #define FUSE_DAX_PAGES	(FUSE_DAX_SZ / PAGE_SIZE)
22 
23 /** Translation information for file offsets to DAX window offsets */
24 struct fuse_dax_mapping {
25 	/* Will connect in fcd->free_ranges to keep track of free memory */
26 	struct list_head list;
27 
28 	/* For interval tree in file/inode */
29 	struct interval_tree_node itn;
30 
31 	/** Position in DAX window */
32 	u64 window_offset;
33 
34 	/** Length of mapping, in bytes */
35 	loff_t length;
36 
37 	/* Is this mapping read-only or read-write */
38 	bool writable;
39 };
40 
41 /* Per-inode dax map */
42 struct fuse_inode_dax {
43 	/* Semaphore to protect modifications to the dmap tree */
44 	struct rw_semaphore sem;
45 
46 	/* Sorted rb tree of struct fuse_dax_mapping elements */
47 	struct rb_root_cached tree;
48 	unsigned long nr;
49 };
50 
51 struct fuse_conn_dax {
52 	/* DAX device */
53 	struct dax_device *dev;
54 
55 	/* Lock protecting accessess to  members of this structure */
56 	spinlock_t lock;
57 
58 	/* DAX Window Free Ranges */
59 	long nr_free_ranges;
60 	struct list_head free_ranges;
61 };
62 
63 static inline struct fuse_dax_mapping *
64 node_to_dmap(struct interval_tree_node *node)
65 {
66 	if (!node)
67 		return NULL;
68 
69 	return container_of(node, struct fuse_dax_mapping, itn);
70 }
71 
72 static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd)
73 {
74 	struct fuse_dax_mapping *dmap;
75 
76 	spin_lock(&fcd->lock);
77 	dmap = list_first_entry_or_null(&fcd->free_ranges,
78 					struct fuse_dax_mapping, list);
79 	if (dmap) {
80 		list_del_init(&dmap->list);
81 		WARN_ON(fcd->nr_free_ranges <= 0);
82 		fcd->nr_free_ranges--;
83 	}
84 	spin_unlock(&fcd->lock);
85 	return dmap;
86 }
87 
88 /* This assumes fcd->lock is held */
89 static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
90 				struct fuse_dax_mapping *dmap)
91 {
92 	list_add_tail(&dmap->list, &fcd->free_ranges);
93 	fcd->nr_free_ranges++;
94 }
95 
96 static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
97 				struct fuse_dax_mapping *dmap)
98 {
99 	/* Return fuse_dax_mapping to free list */
100 	spin_lock(&fcd->lock);
101 	__dmap_add_to_free_pool(fcd, dmap);
102 	spin_unlock(&fcd->lock);
103 }
104 
105 static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx,
106 				  struct fuse_dax_mapping *dmap, bool writable,
107 				  bool upgrade)
108 {
109 	struct fuse_conn *fc = get_fuse_conn(inode);
110 	struct fuse_conn_dax *fcd = fc->dax;
111 	struct fuse_inode *fi = get_fuse_inode(inode);
112 	struct fuse_setupmapping_in inarg;
113 	loff_t offset = start_idx << FUSE_DAX_SHIFT;
114 	FUSE_ARGS(args);
115 	ssize_t err;
116 
117 	WARN_ON(fcd->nr_free_ranges < 0);
118 
119 	/* Ask fuse daemon to setup mapping */
120 	memset(&inarg, 0, sizeof(inarg));
121 	inarg.foffset = offset;
122 	inarg.fh = -1;
123 	inarg.moffset = dmap->window_offset;
124 	inarg.len = FUSE_DAX_SZ;
125 	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
126 	if (writable)
127 		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
128 	args.opcode = FUSE_SETUPMAPPING;
129 	args.nodeid = fi->nodeid;
130 	args.in_numargs = 1;
131 	args.in_args[0].size = sizeof(inarg);
132 	args.in_args[0].value = &inarg;
133 	err = fuse_simple_request(fc, &args);
134 	if (err < 0)
135 		return err;
136 	dmap->writable = writable;
137 	if (!upgrade) {
138 		dmap->itn.start = dmap->itn.last = start_idx;
139 		/* Protected by fi->dax->sem */
140 		interval_tree_insert(&dmap->itn, &fi->dax->tree);
141 		fi->dax->nr++;
142 	}
143 	return 0;
144 }
145 
146 static int fuse_send_removemapping(struct inode *inode,
147 				   struct fuse_removemapping_in *inargp,
148 				   struct fuse_removemapping_one *remove_one)
149 {
150 	struct fuse_inode *fi = get_fuse_inode(inode);
151 	struct fuse_conn *fc = get_fuse_conn(inode);
152 	FUSE_ARGS(args);
153 
154 	args.opcode = FUSE_REMOVEMAPPING;
155 	args.nodeid = fi->nodeid;
156 	args.in_numargs = 2;
157 	args.in_args[0].size = sizeof(*inargp);
158 	args.in_args[0].value = inargp;
159 	args.in_args[1].size = inargp->count * sizeof(*remove_one);
160 	args.in_args[1].value = remove_one;
161 	return fuse_simple_request(fc, &args);
162 }
163 
164 static int dmap_removemapping_list(struct inode *inode, unsigned int num,
165 				   struct list_head *to_remove)
166 {
167 	struct fuse_removemapping_one *remove_one, *ptr;
168 	struct fuse_removemapping_in inarg;
169 	struct fuse_dax_mapping *dmap;
170 	int ret, i = 0, nr_alloc;
171 
172 	nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
173 	remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS);
174 	if (!remove_one)
175 		return -ENOMEM;
176 
177 	ptr = remove_one;
178 	list_for_each_entry(dmap, to_remove, list) {
179 		ptr->moffset = dmap->window_offset;
180 		ptr->len = dmap->length;
181 		ptr++;
182 		i++;
183 		num--;
184 		if (i >= nr_alloc || num == 0) {
185 			memset(&inarg, 0, sizeof(inarg));
186 			inarg.count = i;
187 			ret = fuse_send_removemapping(inode, &inarg,
188 						      remove_one);
189 			if (ret)
190 				goto out;
191 			ptr = remove_one;
192 			i = 0;
193 		}
194 	}
195 out:
196 	kfree(remove_one);
197 	return ret;
198 }
199 
200 /*
201  * Cleanup dmap entry and add back to free list. This should be called with
202  * fcd->lock held.
203  */
204 static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd,
205 					    struct fuse_dax_mapping *dmap)
206 {
207 	pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n",
208 		 dmap->itn.start, dmap->itn.last, dmap->window_offset,
209 		 dmap->length);
210 	dmap->itn.start = dmap->itn.last = 0;
211 	__dmap_add_to_free_pool(fcd, dmap);
212 }
213 
214 /*
215  * Free inode dmap entries whose range falls inside [start, end].
216  * Does not take any locks. At this point of time it should only be
217  * called from evict_inode() path where we know all dmap entries can be
218  * reclaimed.
219  */
220 static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd,
221 				     struct inode *inode,
222 				     loff_t start, loff_t end)
223 {
224 	struct fuse_inode *fi = get_fuse_inode(inode);
225 	struct fuse_dax_mapping *dmap, *n;
226 	int err, num = 0;
227 	LIST_HEAD(to_remove);
228 	unsigned long start_idx = start >> FUSE_DAX_SHIFT;
229 	unsigned long end_idx = end >> FUSE_DAX_SHIFT;
230 	struct interval_tree_node *node;
231 
232 	while (1) {
233 		node = interval_tree_iter_first(&fi->dax->tree, start_idx,
234 						end_idx);
235 		if (!node)
236 			break;
237 		dmap = node_to_dmap(node);
238 		interval_tree_remove(&dmap->itn, &fi->dax->tree);
239 		num++;
240 		list_add(&dmap->list, &to_remove);
241 	}
242 
243 	/* Nothing to remove */
244 	if (list_empty(&to_remove))
245 		return;
246 
247 	WARN_ON(fi->dax->nr < num);
248 	fi->dax->nr -= num;
249 	err = dmap_removemapping_list(inode, num, &to_remove);
250 	if (err && err != -ENOTCONN) {
251 		pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n",
252 			start, end);
253 	}
254 	spin_lock(&fcd->lock);
255 	list_for_each_entry_safe(dmap, n, &to_remove, list) {
256 		list_del_init(&dmap->list);
257 		dmap_reinit_add_to_free_pool(fcd, dmap);
258 	}
259 	spin_unlock(&fcd->lock);
260 }
261 
262 /*
263  * It is called from evict_inode() and by that time inode is going away. So
264  * this function does not take any locks like fi->dax->sem for traversing
265  * that fuse inode interval tree. If that lock is taken then lock validator
266  * complains of deadlock situation w.r.t fs_reclaim lock.
267  */
268 void fuse_dax_inode_cleanup(struct inode *inode)
269 {
270 	struct fuse_conn *fc = get_fuse_conn(inode);
271 	struct fuse_inode *fi = get_fuse_inode(inode);
272 
273 	/*
274 	 * fuse_evict_inode() has already called truncate_inode_pages_final()
275 	 * before we arrive here. So we should not have to worry about any
276 	 * pages/exception entries still associated with inode.
277 	 */
278 	inode_reclaim_dmap_range(fc->dax, inode, 0, -1);
279 	WARN_ON(fi->dax->nr);
280 }
281 
282 static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length)
283 {
284 	iomap->addr = IOMAP_NULL_ADDR;
285 	iomap->length = length;
286 	iomap->type = IOMAP_HOLE;
287 }
288 
289 static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
290 			    struct iomap *iomap, struct fuse_dax_mapping *dmap,
291 			    unsigned int flags)
292 {
293 	loff_t offset, len;
294 	loff_t i_size = i_size_read(inode);
295 
296 	offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT);
297 	len = min(length, dmap->length - offset);
298 
299 	/* If length is beyond end of file, truncate further */
300 	if (pos + len > i_size)
301 		len = i_size - pos;
302 
303 	if (len > 0) {
304 		iomap->addr = dmap->window_offset + offset;
305 		iomap->length = len;
306 		if (flags & IOMAP_FAULT)
307 			iomap->length = ALIGN(len, PAGE_SIZE);
308 		iomap->type = IOMAP_MAPPED;
309 	} else {
310 		/* Mapping beyond end of file is hole */
311 		fuse_fill_iomap_hole(iomap, length);
312 	}
313 }
314 
315 static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
316 				      loff_t length, unsigned int flags,
317 				      struct iomap *iomap)
318 {
319 	struct fuse_inode *fi = get_fuse_inode(inode);
320 	struct fuse_conn *fc = get_fuse_conn(inode);
321 	struct fuse_conn_dax *fcd = fc->dax;
322 	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
323 	int ret;
324 	bool writable = flags & IOMAP_WRITE;
325 	unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
326 	struct interval_tree_node *node;
327 
328 	alloc_dmap = alloc_dax_mapping(fcd);
329 	if (!alloc_dmap)
330 		return -EIO;
331 
332 	/*
333 	 * Take write lock so that only one caller can try to setup mapping
334 	 * and other waits.
335 	 */
336 	down_write(&fi->dax->sem);
337 	/*
338 	 * We dropped lock. Check again if somebody else setup
339 	 * mapping already.
340 	 */
341 	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
342 	if (node) {
343 		dmap = node_to_dmap(node);
344 		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
345 		dmap_add_to_free_pool(fcd, alloc_dmap);
346 		up_write(&fi->dax->sem);
347 		return 0;
348 	}
349 
350 	/* Setup one mapping */
351 	ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap,
352 				     writable, false);
353 	if (ret < 0) {
354 		dmap_add_to_free_pool(fcd, alloc_dmap);
355 		up_write(&fi->dax->sem);
356 		return ret;
357 	}
358 	fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
359 	up_write(&fi->dax->sem);
360 	return 0;
361 }
362 
363 static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
364 				    loff_t length, unsigned int flags,
365 				    struct iomap *iomap)
366 {
367 	struct fuse_inode *fi = get_fuse_inode(inode);
368 	struct fuse_dax_mapping *dmap;
369 	int ret;
370 	unsigned long idx = pos >> FUSE_DAX_SHIFT;
371 	struct interval_tree_node *node;
372 
373 	/*
374 	 * Take exclusive lock so that only one caller can try to setup
375 	 * mapping and others wait.
376 	 */
377 	down_write(&fi->dax->sem);
378 	node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
379 
380 	/* We are holding either inode lock or i_mmap_sem, and that should
381 	 * ensure that dmap can't reclaimed or truncated and it should still
382 	 * be there in tree despite the fact we dropped and re-acquired the
383 	 * lock.
384 	 */
385 	ret = -EIO;
386 	if (WARN_ON(!node))
387 		goto out_err;
388 
389 	dmap = node_to_dmap(node);
390 
391 	/* Maybe another thread already upgraded mapping while we were not
392 	 * holding lock.
393 	 */
394 	if (dmap->writable) {
395 		ret = 0;
396 		goto out_fill_iomap;
397 	}
398 
399 	ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true,
400 				     true);
401 	if (ret < 0)
402 		goto out_err;
403 out_fill_iomap:
404 	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
405 out_err:
406 	up_write(&fi->dax->sem);
407 	return ret;
408 }
409 
410 /* This is just for DAX and the mapping is ephemeral, do not use it for other
411  * purposes since there is no block device with a permanent mapping.
412  */
413 static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
414 			    unsigned int flags, struct iomap *iomap,
415 			    struct iomap *srcmap)
416 {
417 	struct fuse_inode *fi = get_fuse_inode(inode);
418 	struct fuse_conn *fc = get_fuse_conn(inode);
419 	struct fuse_dax_mapping *dmap;
420 	bool writable = flags & IOMAP_WRITE;
421 	unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
422 	struct interval_tree_node *node;
423 
424 	/* We don't support FIEMAP */
425 	if (WARN_ON(flags & IOMAP_REPORT))
426 		return -EIO;
427 
428 	iomap->offset = pos;
429 	iomap->flags = 0;
430 	iomap->bdev = NULL;
431 	iomap->dax_dev = fc->dax->dev;
432 
433 	/*
434 	 * Both read/write and mmap path can race here. So we need something
435 	 * to make sure if we are setting up mapping, then other path waits
436 	 *
437 	 * For now, use a semaphore for this. It probably needs to be
438 	 * optimized later.
439 	 */
440 	down_read(&fi->dax->sem);
441 	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
442 	if (node) {
443 		dmap = node_to_dmap(node);
444 		if (writable && !dmap->writable) {
445 			/* Upgrade read-only mapping to read-write. This will
446 			 * require exclusive fi->dax->sem lock as we don't want
447 			 * two threads to be trying to this simultaneously
448 			 * for same dmap. So drop shared lock and acquire
449 			 * exclusive lock.
450 			 */
451 			up_read(&fi->dax->sem);
452 			pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n",
453 				 __func__, pos, length);
454 			return fuse_upgrade_dax_mapping(inode, pos, length,
455 							flags, iomap);
456 		} else {
457 			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
458 			up_read(&fi->dax->sem);
459 			return 0;
460 		}
461 	} else {
462 		up_read(&fi->dax->sem);
463 		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
464 				__func__, pos, length);
465 		if (pos >= i_size_read(inode))
466 			goto iomap_hole;
467 
468 		return fuse_setup_new_dax_mapping(inode, pos, length, flags,
469 						  iomap);
470 	}
471 
472 	/*
473 	 * If read beyond end of file happnes, fs code seems to return
474 	 * it as hole
475 	 */
476 iomap_hole:
477 	fuse_fill_iomap_hole(iomap, length);
478 	pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n",
479 		 __func__, pos, length, iomap->length);
480 	return 0;
481 }
482 
483 static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length,
484 			  ssize_t written, unsigned int flags,
485 			  struct iomap *iomap)
486 {
487 	/* DAX writes beyond end-of-file aren't handled using iomap, so the
488 	 * file size is unchanged and there is nothing to do here.
489 	 */
490 	return 0;
491 }
492 
493 static const struct iomap_ops fuse_iomap_ops = {
494 	.iomap_begin = fuse_iomap_begin,
495 	.iomap_end = fuse_iomap_end,
496 };
497 
498 ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
499 {
500 	struct inode *inode = file_inode(iocb->ki_filp);
501 	ssize_t ret;
502 
503 	if (iocb->ki_flags & IOCB_NOWAIT) {
504 		if (!inode_trylock_shared(inode))
505 			return -EAGAIN;
506 	} else {
507 		inode_lock_shared(inode);
508 	}
509 
510 	ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops);
511 	inode_unlock_shared(inode);
512 
513 	/* TODO file_accessed(iocb->f_filp) */
514 	return ret;
515 }
516 
517 static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from)
518 {
519 	struct inode *inode = file_inode(iocb->ki_filp);
520 
521 	return (iov_iter_rw(from) == WRITE &&
522 		((iocb->ki_pos) >= i_size_read(inode) ||
523 		  (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode))));
524 }
525 
526 static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from)
527 {
528 	struct inode *inode = file_inode(iocb->ki_filp);
529 	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
530 	ssize_t ret;
531 
532 	ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
533 	if (ret < 0)
534 		return ret;
535 
536 	fuse_invalidate_attr(inode);
537 	fuse_write_update_size(inode, iocb->ki_pos);
538 	return ret;
539 }
540 
541 ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
542 {
543 	struct inode *inode = file_inode(iocb->ki_filp);
544 	ssize_t ret;
545 
546 	if (iocb->ki_flags & IOCB_NOWAIT) {
547 		if (!inode_trylock(inode))
548 			return -EAGAIN;
549 	} else {
550 		inode_lock(inode);
551 	}
552 
553 	ret = generic_write_checks(iocb, from);
554 	if (ret <= 0)
555 		goto out;
556 
557 	ret = file_remove_privs(iocb->ki_filp);
558 	if (ret)
559 		goto out;
560 	/* TODO file_update_time() but we don't want metadata I/O */
561 
562 	/* Do not use dax for file extending writes as write and on
563 	 * disk i_size increase are not atomic otherwise.
564 	 */
565 	if (file_extending_write(iocb, from))
566 		ret = fuse_dax_direct_write(iocb, from);
567 	else
568 		ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops);
569 
570 out:
571 	inode_unlock(inode);
572 
573 	if (ret > 0)
574 		ret = generic_write_sync(iocb, ret);
575 	return ret;
576 }
577 
578 static void fuse_free_dax_mem_ranges(struct list_head *mem_list)
579 {
580 	struct fuse_dax_mapping *range, *temp;
581 
582 	/* Free All allocated elements */
583 	list_for_each_entry_safe(range, temp, mem_list, list) {
584 		list_del(&range->list);
585 		kfree(range);
586 	}
587 }
588 
589 void fuse_dax_conn_free(struct fuse_conn *fc)
590 {
591 	if (fc->dax) {
592 		fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
593 		kfree(fc->dax);
594 	}
595 }
596 
597 static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
598 {
599 	long nr_pages, nr_ranges;
600 	void *kaddr;
601 	pfn_t pfn;
602 	struct fuse_dax_mapping *range;
603 	int ret, id;
604 	size_t dax_size = -1;
605 	unsigned long i;
606 
607 	INIT_LIST_HEAD(&fcd->free_ranges);
608 	id = dax_read_lock();
609 	nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
610 				     &pfn);
611 	dax_read_unlock(id);
612 	if (nr_pages < 0) {
613 		pr_debug("dax_direct_access() returned %ld\n", nr_pages);
614 		return nr_pages;
615 	}
616 
617 	nr_ranges = nr_pages/FUSE_DAX_PAGES;
618 	pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n",
619 		__func__, nr_pages, nr_ranges);
620 
621 	for (i = 0; i < nr_ranges; i++) {
622 		range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL);
623 		ret = -ENOMEM;
624 		if (!range)
625 			goto out_err;
626 
627 		/* TODO: This offset only works if virtio-fs driver is not
628 		 * having some memory hidden at the beginning. This needs
629 		 * better handling
630 		 */
631 		range->window_offset = i * FUSE_DAX_SZ;
632 		range->length = FUSE_DAX_SZ;
633 		list_add_tail(&range->list, &fcd->free_ranges);
634 	}
635 
636 	fcd->nr_free_ranges = nr_ranges;
637 	return 0;
638 out_err:
639 	/* Free All allocated elements */
640 	fuse_free_dax_mem_ranges(&fcd->free_ranges);
641 	return ret;
642 }
643 
644 int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
645 {
646 	struct fuse_conn_dax *fcd;
647 	int err;
648 
649 	if (!dax_dev)
650 		return 0;
651 
652 	fcd = kzalloc(sizeof(*fcd), GFP_KERNEL);
653 	if (!fcd)
654 		return -ENOMEM;
655 
656 	spin_lock_init(&fcd->lock);
657 	fcd->dev = dax_dev;
658 	err = fuse_dax_mem_range_init(fcd);
659 	if (err) {
660 		kfree(fcd);
661 		return err;
662 	}
663 
664 	fc->dax = fcd;
665 	return 0;
666 }
667 
668 bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
669 {
670 	struct fuse_conn *fc = get_fuse_conn_super(sb);
671 
672 	fi->dax = NULL;
673 	if (fc->dax) {
674 		fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT);
675 		if (!fi->dax)
676 			return false;
677 
678 		init_rwsem(&fi->dax->sem);
679 		fi->dax->tree = RB_ROOT_CACHED;
680 	}
681 
682 	return true;
683 }
684 
685 void fuse_dax_inode_init(struct inode *inode)
686 {
687 	struct fuse_conn *fc = get_fuse_conn(inode);
688 
689 	if (!fc->dax)
690 		return;
691 
692 	inode->i_flags |= S_DAX;
693 }
694 
695 bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment)
696 {
697 	if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) {
698 		pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n",
699 			map_alignment, FUSE_DAX_SZ);
700 		return false;
701 	}
702 	return true;
703 }
704