11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Resizable virtual memory filesystem for Linux. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2000 Linus Torvalds. 51da177e4SLinus Torvalds * 2000 Transmeta Corp. 61da177e4SLinus Torvalds * 2000-2001 Christoph Rohland 71da177e4SLinus Torvalds * 2000-2001 SAP AG 81da177e4SLinus Torvalds * 2002 Red Hat Inc. 96922c0c7SHugh Dickins * Copyright (C) 2002-2011 Hugh Dickins. 106922c0c7SHugh Dickins * Copyright (C) 2011 Google Inc. 110edd73b3SHugh Dickins * Copyright (C) 2002-2005 VERITAS Software Corporation. 121da177e4SLinus Torvalds * Copyright (C) 2004 Andi Kleen, SuSE Labs 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * Extended attribute support for tmpfs: 151da177e4SLinus Torvalds * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> 161da177e4SLinus Torvalds * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 171da177e4SLinus Torvalds * 18853ac43aSMatt Mackall * tiny-shmem: 19853ac43aSMatt Mackall * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com> 20853ac43aSMatt Mackall * 211da177e4SLinus Torvalds * This file is released under the GPL. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 24853ac43aSMatt Mackall #include <linux/fs.h> 25853ac43aSMatt Mackall #include <linux/init.h> 26853ac43aSMatt Mackall #include <linux/vfs.h> 27853ac43aSMatt Mackall #include <linux/mount.h> 28250297edSAndrew Morton #include <linux/ramfs.h> 29caefba17SHugh Dickins #include <linux/pagemap.h> 30853ac43aSMatt Mackall #include <linux/file.h> 31e408e695STheodore Ts'o #include <linux/fileattr.h> 32853ac43aSMatt Mackall #include <linux/mm.h> 3346c9a946SArnd Bergmann #include <linux/random.h> 34174cd4b1SIngo Molnar #include <linux/sched/signal.h> 35b95f1b31SPaul Gortmaker #include <linux/export.h> 365ff2121aSMatthew Wilcox (Oracle) #include <linux/shmem_fs.h> 37853ac43aSMatt Mackall #include <linux/swap.h> 38e2e40f2cSChristoph Hellwig #include <linux/uio.h> 39749df87bSMike Kravetz #include <linux/hugetlb.h> 40626c3920SAl Viro #include <linux/fs_parser.h> 4186a2f3f2SMiaohe Lin #include <linux/swapfile.h> 4236f05cabSJeff Layton #include <linux/iversion.h> 43014bb1deSNeilBrown #include "swap.h" 4495cc09d6SAndrea Arcangeli 45853ac43aSMatt Mackall static struct vfsmount *shm_mnt; 46853ac43aSMatt Mackall 47853ac43aSMatt Mackall #ifdef CONFIG_SHMEM 481da177e4SLinus Torvalds /* 491da177e4SLinus Torvalds * This virtual memory filesystem is heavily based on the ramfs. It 501da177e4SLinus Torvalds * extends ramfs by the ability to use swap and honor resource limits 511da177e4SLinus Torvalds * which makes it a completely usable filesystem. 521da177e4SLinus Torvalds */ 531da177e4SLinus Torvalds 5439f0247dSAndreas Gruenbacher #include <linux/xattr.h> 55a5694255SChristoph Hellwig #include <linux/exportfs.h> 561c7c474cSChristoph Hellwig #include <linux/posix_acl.h> 57feda821eSChristoph Hellwig #include <linux/posix_acl_xattr.h> 581da177e4SLinus Torvalds #include <linux/mman.h> 591da177e4SLinus Torvalds #include <linux/string.h> 601da177e4SLinus Torvalds #include <linux/slab.h> 611da177e4SLinus Torvalds #include <linux/backing-dev.h> 621da177e4SLinus Torvalds #include <linux/writeback.h> 63bda97eabSHugh Dickins #include <linux/pagevec.h> 6441ffe5d5SHugh Dickins #include <linux/percpu_counter.h> 6583e4fa9cSHugh Dickins #include <linux/falloc.h> 66708e3508SHugh Dickins #include <linux/splice.h> 671da177e4SLinus Torvalds #include <linux/security.h> 681da177e4SLinus Torvalds #include <linux/swapops.h> 691da177e4SLinus Torvalds #include <linux/mempolicy.h> 701da177e4SLinus Torvalds #include <linux/namei.h> 71b00dc3adSHugh Dickins #include <linux/ctype.h> 72304dbdb7SLee Schermerhorn #include <linux/migrate.h> 73c1f60a5aSChristoph Lameter #include <linux/highmem.h> 74680d794bSakpm@linux-foundation.org #include <linux/seq_file.h> 7592562927SMimi Zohar #include <linux/magic.h> 769183df25SDavid Herrmann #include <linux/syscalls.h> 7740e041a2SDavid Herrmann #include <linux/fcntl.h> 789183df25SDavid Herrmann #include <uapi/linux/memfd.h> 794c27fe4cSMike Rapoport #include <linux/rmap.h> 802b4db796SAmir Goldstein #include <linux/uuid.h> 81e09764cfSCarlos Maiolino #include <linux/quotaops.h> 82304dbdb7SLee Schermerhorn 837c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 841da177e4SLinus Torvalds 85dd56b046SMel Gorman #include "internal.h" 86dd56b046SMel Gorman 8709cbfeafSKirill A. Shutemov #define BLOCKS_PER_PAGE (PAGE_SIZE/512) 8809cbfeafSKirill A. Shutemov #define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT) 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds /* Pretend that each entry is of this size in directory's i_size */ 911da177e4SLinus Torvalds #define BOGO_DIRENT_SIZE 20 921da177e4SLinus Torvalds 9369f07ec9SHugh Dickins /* Symlink up to this size is kmalloc'ed instead of using a swappable page */ 9469f07ec9SHugh Dickins #define SHORT_SYMLINK_LEN 128 9569f07ec9SHugh Dickins 961aac1400SHugh Dickins /* 97f00cdc6dSHugh Dickins * shmem_fallocate communicates with shmem_fault or shmem_writepage via 989608703eSJan Kara * inode->i_private (with i_rwsem making sure that it has only one user at 99f00cdc6dSHugh Dickins * a time): we would prefer not to enlarge the shmem inode just for that. 1001aac1400SHugh Dickins */ 1011aac1400SHugh Dickins struct shmem_falloc { 1028e205f77SHugh Dickins wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ 1031aac1400SHugh Dickins pgoff_t start; /* start of range currently being fallocated */ 1041aac1400SHugh Dickins pgoff_t next; /* the next page offset to be fallocated */ 1051aac1400SHugh Dickins pgoff_t nr_falloced; /* how many new pages have been fallocated */ 1061aac1400SHugh Dickins pgoff_t nr_unswapped; /* how often writepage refused to swap out */ 1071aac1400SHugh Dickins }; 1081aac1400SHugh Dickins 1090b5071ddSAl Viro struct shmem_options { 1100b5071ddSAl Viro unsigned long long blocks; 1110b5071ddSAl Viro unsigned long long inodes; 1120b5071ddSAl Viro struct mempolicy *mpol; 1130b5071ddSAl Viro kuid_t uid; 1140b5071ddSAl Viro kgid_t gid; 1150b5071ddSAl Viro umode_t mode; 116ea3271f7SChris Down bool full_inums; 1170b5071ddSAl Viro int huge; 1180b5071ddSAl Viro int seen; 1192c6efe9cSLuis Chamberlain bool noswap; 120e09764cfSCarlos Maiolino unsigned short quota_types; 121*de4c0e7cSLukas Czerner struct shmem_quota_limits qlimits; 1220b5071ddSAl Viro #define SHMEM_SEEN_BLOCKS 1 1230b5071ddSAl Viro #define SHMEM_SEEN_INODES 2 1240b5071ddSAl Viro #define SHMEM_SEEN_HUGE 4 125ea3271f7SChris Down #define SHMEM_SEEN_INUMS 8 1262c6efe9cSLuis Chamberlain #define SHMEM_SEEN_NOSWAP 16 127e09764cfSCarlos Maiolino #define SHMEM_SEEN_QUOTA 32 1280b5071ddSAl Viro }; 1290b5071ddSAl Viro 130b76db735SAndrew Morton #ifdef CONFIG_TMPFS 131680d794bSakpm@linux-foundation.org static unsigned long shmem_default_max_blocks(void) 132680d794bSakpm@linux-foundation.org { 133ca79b0c2SArun KS return totalram_pages() / 2; 134680d794bSakpm@linux-foundation.org } 135680d794bSakpm@linux-foundation.org 136680d794bSakpm@linux-foundation.org static unsigned long shmem_default_max_inodes(void) 137680d794bSakpm@linux-foundation.org { 138ca79b0c2SArun KS unsigned long nr_pages = totalram_pages(); 139ca79b0c2SArun KS 140ca79b0c2SArun KS return min(nr_pages - totalhigh_pages(), nr_pages / 2); 141680d794bSakpm@linux-foundation.org } 142b76db735SAndrew Morton #endif 143680d794bSakpm@linux-foundation.org 144da08e9b7SMatthew Wilcox (Oracle) static int shmem_swapin_folio(struct inode *inode, pgoff_t index, 145da08e9b7SMatthew Wilcox (Oracle) struct folio **foliop, enum sgp_type sgp, 146c5bf121eSVineeth Remanan Pillai gfp_t gfp, struct vm_area_struct *vma, 147c5bf121eSVineeth Remanan Pillai vm_fault_t *fault_type); 1481da177e4SLinus Torvalds 1491da177e4SLinus Torvalds static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 1501da177e4SLinus Torvalds { 1511da177e4SLinus Torvalds return sb->s_fs_info; 1521da177e4SLinus Torvalds } 1531da177e4SLinus Torvalds 1541da177e4SLinus Torvalds /* 1551da177e4SLinus Torvalds * shmem_file_setup pre-accounts the whole fixed size of a VM object, 1561da177e4SLinus Torvalds * for shared memory and for shared anonymous (/dev/zero) mappings 1571da177e4SLinus Torvalds * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), 1581da177e4SLinus Torvalds * consistent with the pre-accounting of private mappings ... 1591da177e4SLinus Torvalds */ 1601da177e4SLinus Torvalds static inline int shmem_acct_size(unsigned long flags, loff_t size) 1611da177e4SLinus Torvalds { 1620b0a0806SHugh Dickins return (flags & VM_NORESERVE) ? 163191c5424SAl Viro 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size)); 1641da177e4SLinus Torvalds } 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds static inline void shmem_unacct_size(unsigned long flags, loff_t size) 1671da177e4SLinus Torvalds { 1680b0a0806SHugh Dickins if (!(flags & VM_NORESERVE)) 1691da177e4SLinus Torvalds vm_unacct_memory(VM_ACCT(size)); 1701da177e4SLinus Torvalds } 1711da177e4SLinus Torvalds 17277142517SKonstantin Khlebnikov static inline int shmem_reacct_size(unsigned long flags, 17377142517SKonstantin Khlebnikov loff_t oldsize, loff_t newsize) 17477142517SKonstantin Khlebnikov { 17577142517SKonstantin Khlebnikov if (!(flags & VM_NORESERVE)) { 17677142517SKonstantin Khlebnikov if (VM_ACCT(newsize) > VM_ACCT(oldsize)) 17777142517SKonstantin Khlebnikov return security_vm_enough_memory_mm(current->mm, 17877142517SKonstantin Khlebnikov VM_ACCT(newsize) - VM_ACCT(oldsize)); 17977142517SKonstantin Khlebnikov else if (VM_ACCT(newsize) < VM_ACCT(oldsize)) 18077142517SKonstantin Khlebnikov vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize)); 18177142517SKonstantin Khlebnikov } 18277142517SKonstantin Khlebnikov return 0; 18377142517SKonstantin Khlebnikov } 18477142517SKonstantin Khlebnikov 1851da177e4SLinus Torvalds /* 1861da177e4SLinus Torvalds * ... whereas tmpfs objects are accounted incrementally as 18775edd345SHugh Dickins * pages are allocated, in order to allow large sparse files. 188923e2f0eSMatthew Wilcox (Oracle) * shmem_get_folio reports shmem_acct_block failure as -ENOSPC not -ENOMEM, 1891da177e4SLinus Torvalds * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. 1901da177e4SLinus Torvalds */ 191800d8c63SKirill A. Shutemov static inline int shmem_acct_block(unsigned long flags, long pages) 1921da177e4SLinus Torvalds { 193800d8c63SKirill A. Shutemov if (!(flags & VM_NORESERVE)) 194800d8c63SKirill A. Shutemov return 0; 195800d8c63SKirill A. Shutemov 196800d8c63SKirill A. Shutemov return security_vm_enough_memory_mm(current->mm, 197800d8c63SKirill A. Shutemov pages * VM_ACCT(PAGE_SIZE)); 1981da177e4SLinus Torvalds } 1991da177e4SLinus Torvalds 2001da177e4SLinus Torvalds static inline void shmem_unacct_blocks(unsigned long flags, long pages) 2011da177e4SLinus Torvalds { 2020b0a0806SHugh Dickins if (flags & VM_NORESERVE) 20309cbfeafSKirill A. Shutemov vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); 2041da177e4SLinus Torvalds } 2051da177e4SLinus Torvalds 206c7e263abSLukas Czerner static inline int shmem_inode_acct_block(struct inode *inode, long pages) 2070f079694SMike Rapoport { 2080f079694SMike Rapoport struct shmem_inode_info *info = SHMEM_I(inode); 2090f079694SMike Rapoport struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 210c7e263abSLukas Czerner int err = -ENOSPC; 2110f079694SMike Rapoport 2120f079694SMike Rapoport if (shmem_acct_block(info->flags, pages)) 213c7e263abSLukas Czerner return err; 2140f079694SMike Rapoport 2150f079694SMike Rapoport if (sbinfo->max_blocks) { 2160f079694SMike Rapoport if (percpu_counter_compare(&sbinfo->used_blocks, 2170f079694SMike Rapoport sbinfo->max_blocks - pages) > 0) 2180f079694SMike Rapoport goto unacct; 219e09764cfSCarlos Maiolino 220e09764cfSCarlos Maiolino err = dquot_alloc_block_nodirty(inode, pages); 221e09764cfSCarlos Maiolino if (err) 222e09764cfSCarlos Maiolino goto unacct; 223e09764cfSCarlos Maiolino 2240f079694SMike Rapoport percpu_counter_add(&sbinfo->used_blocks, pages); 225e09764cfSCarlos Maiolino } else { 226e09764cfSCarlos Maiolino err = dquot_alloc_block_nodirty(inode, pages); 227e09764cfSCarlos Maiolino if (err) 228e09764cfSCarlos Maiolino goto unacct; 2290f079694SMike Rapoport } 2300f079694SMike Rapoport 231c7e263abSLukas Czerner return 0; 2320f079694SMike Rapoport 2330f079694SMike Rapoport unacct: 2340f079694SMike Rapoport shmem_unacct_blocks(info->flags, pages); 235c7e263abSLukas Czerner return err; 2360f079694SMike Rapoport } 2370f079694SMike Rapoport 2380f079694SMike Rapoport static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages) 2390f079694SMike Rapoport { 2400f079694SMike Rapoport struct shmem_inode_info *info = SHMEM_I(inode); 2410f079694SMike Rapoport struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 2420f079694SMike Rapoport 243e09764cfSCarlos Maiolino dquot_free_block_nodirty(inode, pages); 244e09764cfSCarlos Maiolino 2450f079694SMike Rapoport if (sbinfo->max_blocks) 2460f079694SMike Rapoport percpu_counter_sub(&sbinfo->used_blocks, pages); 2470f079694SMike Rapoport shmem_unacct_blocks(info->flags, pages); 2480f079694SMike Rapoport } 2490f079694SMike Rapoport 250759b9775SHugh Dickins static const struct super_operations shmem_ops; 25130e6a51dSHui Su const struct address_space_operations shmem_aops; 25215ad7cdcSHelge Deller static const struct file_operations shmem_file_operations; 25392e1d5beSArjan van de Ven static const struct inode_operations shmem_inode_operations; 25492e1d5beSArjan van de Ven static const struct inode_operations shmem_dir_inode_operations; 25592e1d5beSArjan van de Ven static const struct inode_operations shmem_special_inode_operations; 256f0f37e2fSAlexey Dobriyan static const struct vm_operations_struct shmem_vm_ops; 257d09e8ca6SPasha Tatashin static const struct vm_operations_struct shmem_anon_vm_ops; 258779750d2SKirill A. Shutemov static struct file_system_type shmem_fs_type; 2591da177e4SLinus Torvalds 260d09e8ca6SPasha Tatashin bool vma_is_anon_shmem(struct vm_area_struct *vma) 261d09e8ca6SPasha Tatashin { 262d09e8ca6SPasha Tatashin return vma->vm_ops == &shmem_anon_vm_ops; 263d09e8ca6SPasha Tatashin } 264d09e8ca6SPasha Tatashin 265b0506e48SMike Rapoport bool vma_is_shmem(struct vm_area_struct *vma) 266b0506e48SMike Rapoport { 267d09e8ca6SPasha Tatashin return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops; 268b0506e48SMike Rapoport } 269b0506e48SMike Rapoport 2701da177e4SLinus Torvalds static LIST_HEAD(shmem_swaplist); 271cb5f7b9aSHugh Dickins static DEFINE_MUTEX(shmem_swaplist_mutex); 2721da177e4SLinus Torvalds 273e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 274e09764cfSCarlos Maiolino 275e09764cfSCarlos Maiolino static int shmem_enable_quotas(struct super_block *sb, 276e09764cfSCarlos Maiolino unsigned short quota_types) 277e09764cfSCarlos Maiolino { 278e09764cfSCarlos Maiolino int type, err = 0; 279e09764cfSCarlos Maiolino 280e09764cfSCarlos Maiolino sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY; 281e09764cfSCarlos Maiolino for (type = 0; type < SHMEM_MAXQUOTAS; type++) { 282e09764cfSCarlos Maiolino if (!(quota_types & (1 << type))) 283e09764cfSCarlos Maiolino continue; 284e09764cfSCarlos Maiolino err = dquot_load_quota_sb(sb, type, QFMT_SHMEM, 285e09764cfSCarlos Maiolino DQUOT_USAGE_ENABLED | 286e09764cfSCarlos Maiolino DQUOT_LIMITS_ENABLED); 287e09764cfSCarlos Maiolino if (err) 288e09764cfSCarlos Maiolino goto out_err; 289e09764cfSCarlos Maiolino } 290e09764cfSCarlos Maiolino return 0; 291e09764cfSCarlos Maiolino 292e09764cfSCarlos Maiolino out_err: 293e09764cfSCarlos Maiolino pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n", 294e09764cfSCarlos Maiolino type, err); 295e09764cfSCarlos Maiolino for (type--; type >= 0; type--) 296e09764cfSCarlos Maiolino dquot_quota_off(sb, type); 297e09764cfSCarlos Maiolino return err; 298e09764cfSCarlos Maiolino } 299e09764cfSCarlos Maiolino 300e09764cfSCarlos Maiolino static void shmem_disable_quotas(struct super_block *sb) 301e09764cfSCarlos Maiolino { 302e09764cfSCarlos Maiolino int type; 303e09764cfSCarlos Maiolino 304e09764cfSCarlos Maiolino for (type = 0; type < SHMEM_MAXQUOTAS; type++) 305e09764cfSCarlos Maiolino dquot_quota_off(sb, type); 306e09764cfSCarlos Maiolino } 307e09764cfSCarlos Maiolino 308e09764cfSCarlos Maiolino static struct dquot **shmem_get_dquots(struct inode *inode) 309e09764cfSCarlos Maiolino { 310e09764cfSCarlos Maiolino return SHMEM_I(inode)->i_dquot; 311e09764cfSCarlos Maiolino } 312e09764cfSCarlos Maiolino #endif /* CONFIG_TMPFS_QUOTA */ 313e09764cfSCarlos Maiolino 314e809d5f0SChris Down /* 315e809d5f0SChris Down * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and 316e809d5f0SChris Down * produces a novel ino for the newly allocated inode. 317e809d5f0SChris Down * 318e809d5f0SChris Down * It may also be called when making a hard link to permit the space needed by 319e809d5f0SChris Down * each dentry. However, in that case, no new inode number is needed since that 320e809d5f0SChris Down * internally draws from another pool of inode numbers (currently global 321e809d5f0SChris Down * get_next_ino()). This case is indicated by passing NULL as inop. 322e809d5f0SChris Down */ 323e809d5f0SChris Down #define SHMEM_INO_BATCH 1024 324e809d5f0SChris Down static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) 3255b04c689SPavel Emelyanov { 3265b04c689SPavel Emelyanov struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 327e809d5f0SChris Down ino_t ino; 328e809d5f0SChris Down 329e809d5f0SChris Down if (!(sb->s_flags & SB_KERNMOUNT)) { 330bf11b9a8SSebastian Andrzej Siewior raw_spin_lock(&sbinfo->stat_lock); 331bb3e96d6SByron Stanoszek if (sbinfo->max_inodes) { 3325b04c689SPavel Emelyanov if (!sbinfo->free_inodes) { 333bf11b9a8SSebastian Andrzej Siewior raw_spin_unlock(&sbinfo->stat_lock); 3345b04c689SPavel Emelyanov return -ENOSPC; 3355b04c689SPavel Emelyanov } 3365b04c689SPavel Emelyanov sbinfo->free_inodes--; 337bb3e96d6SByron Stanoszek } 338e809d5f0SChris Down if (inop) { 339e809d5f0SChris Down ino = sbinfo->next_ino++; 340e809d5f0SChris Down if (unlikely(is_zero_ino(ino))) 341e809d5f0SChris Down ino = sbinfo->next_ino++; 342ea3271f7SChris Down if (unlikely(!sbinfo->full_inums && 343ea3271f7SChris Down ino > UINT_MAX)) { 344e809d5f0SChris Down /* 345e809d5f0SChris Down * Emulate get_next_ino uint wraparound for 346e809d5f0SChris Down * compatibility 347e809d5f0SChris Down */ 348ea3271f7SChris Down if (IS_ENABLED(CONFIG_64BIT)) 349ea3271f7SChris Down pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n", 350ea3271f7SChris Down __func__, MINOR(sb->s_dev)); 351ea3271f7SChris Down sbinfo->next_ino = 1; 352ea3271f7SChris Down ino = sbinfo->next_ino++; 3535b04c689SPavel Emelyanov } 354e809d5f0SChris Down *inop = ino; 355e809d5f0SChris Down } 356bf11b9a8SSebastian Andrzej Siewior raw_spin_unlock(&sbinfo->stat_lock); 357e809d5f0SChris Down } else if (inop) { 358e809d5f0SChris Down /* 359e809d5f0SChris Down * __shmem_file_setup, one of our callers, is lock-free: it 360e809d5f0SChris Down * doesn't hold stat_lock in shmem_reserve_inode since 361e809d5f0SChris Down * max_inodes is always 0, and is called from potentially 362e809d5f0SChris Down * unknown contexts. As such, use a per-cpu batched allocator 363e809d5f0SChris Down * which doesn't require the per-sb stat_lock unless we are at 364e809d5f0SChris Down * the batch boundary. 365ea3271f7SChris Down * 366ea3271f7SChris Down * We don't need to worry about inode{32,64} since SB_KERNMOUNT 367ea3271f7SChris Down * shmem mounts are not exposed to userspace, so we don't need 368ea3271f7SChris Down * to worry about things like glibc compatibility. 369e809d5f0SChris Down */ 370e809d5f0SChris Down ino_t *next_ino; 371bf11b9a8SSebastian Andrzej Siewior 372e809d5f0SChris Down next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); 373e809d5f0SChris Down ino = *next_ino; 374e809d5f0SChris Down if (unlikely(ino % SHMEM_INO_BATCH == 0)) { 375bf11b9a8SSebastian Andrzej Siewior raw_spin_lock(&sbinfo->stat_lock); 376e809d5f0SChris Down ino = sbinfo->next_ino; 377e809d5f0SChris Down sbinfo->next_ino += SHMEM_INO_BATCH; 378bf11b9a8SSebastian Andrzej Siewior raw_spin_unlock(&sbinfo->stat_lock); 379e809d5f0SChris Down if (unlikely(is_zero_ino(ino))) 380e809d5f0SChris Down ino++; 381e809d5f0SChris Down } 382e809d5f0SChris Down *inop = ino; 383e809d5f0SChris Down *next_ino = ++ino; 384e809d5f0SChris Down put_cpu(); 385e809d5f0SChris Down } 386e809d5f0SChris Down 3875b04c689SPavel Emelyanov return 0; 3885b04c689SPavel Emelyanov } 3895b04c689SPavel Emelyanov 3905b04c689SPavel Emelyanov static void shmem_free_inode(struct super_block *sb) 3915b04c689SPavel Emelyanov { 3925b04c689SPavel Emelyanov struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 3935b04c689SPavel Emelyanov if (sbinfo->max_inodes) { 394bf11b9a8SSebastian Andrzej Siewior raw_spin_lock(&sbinfo->stat_lock); 3955b04c689SPavel Emelyanov sbinfo->free_inodes++; 396bf11b9a8SSebastian Andrzej Siewior raw_spin_unlock(&sbinfo->stat_lock); 3975b04c689SPavel Emelyanov } 3985b04c689SPavel Emelyanov } 3995b04c689SPavel Emelyanov 40046711810SRandy Dunlap /** 40141ffe5d5SHugh Dickins * shmem_recalc_inode - recalculate the block usage of an inode 4021da177e4SLinus Torvalds * @inode: inode to recalc 4031da177e4SLinus Torvalds * 4041da177e4SLinus Torvalds * We have to calculate the free blocks since the mm can drop 4051da177e4SLinus Torvalds * undirtied hole pages behind our back. 4061da177e4SLinus Torvalds * 4071da177e4SLinus Torvalds * But normally info->alloced == inode->i_mapping->nrpages + info->swapped 4081da177e4SLinus Torvalds * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) 4091da177e4SLinus Torvalds * 4101da177e4SLinus Torvalds * It has to be called with the spinlock held. 4111da177e4SLinus Torvalds */ 4121da177e4SLinus Torvalds static void shmem_recalc_inode(struct inode *inode) 4131da177e4SLinus Torvalds { 4141da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 4151da177e4SLinus Torvalds long freed; 4161da177e4SLinus Torvalds 4171da177e4SLinus Torvalds freed = info->alloced - info->swapped - inode->i_mapping->nrpages; 4181da177e4SLinus Torvalds if (freed > 0) { 4191da177e4SLinus Torvalds info->alloced -= freed; 4200f079694SMike Rapoport shmem_inode_unacct_blocks(inode, freed); 4211da177e4SLinus Torvalds } 4221da177e4SLinus Torvalds } 4231da177e4SLinus Torvalds 424800d8c63SKirill A. Shutemov bool shmem_charge(struct inode *inode, long pages) 425800d8c63SKirill A. Shutemov { 426800d8c63SKirill A. Shutemov struct shmem_inode_info *info = SHMEM_I(inode); 4274595ef88SKirill A. Shutemov unsigned long flags; 428800d8c63SKirill A. Shutemov 429c7e263abSLukas Czerner if (shmem_inode_acct_block(inode, pages)) 430800d8c63SKirill A. Shutemov return false; 431b1cc94abSMike Rapoport 432aaa52e34SHugh Dickins /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ 433aaa52e34SHugh Dickins inode->i_mapping->nrpages += pages; 434aaa52e34SHugh Dickins 4354595ef88SKirill A. Shutemov spin_lock_irqsave(&info->lock, flags); 436800d8c63SKirill A. Shutemov info->alloced += pages; 437800d8c63SKirill A. Shutemov shmem_recalc_inode(inode); 4384595ef88SKirill A. Shutemov spin_unlock_irqrestore(&info->lock, flags); 439800d8c63SKirill A. Shutemov 440800d8c63SKirill A. Shutemov return true; 441800d8c63SKirill A. Shutemov } 442800d8c63SKirill A. Shutemov 443800d8c63SKirill A. Shutemov void shmem_uncharge(struct inode *inode, long pages) 444800d8c63SKirill A. Shutemov { 445800d8c63SKirill A. Shutemov struct shmem_inode_info *info = SHMEM_I(inode); 4464595ef88SKirill A. Shutemov unsigned long flags; 447800d8c63SKirill A. Shutemov 4486ffcd825SMatthew Wilcox (Oracle) /* nrpages adjustment done by __filemap_remove_folio() or caller */ 449aaa52e34SHugh Dickins 4504595ef88SKirill A. Shutemov spin_lock_irqsave(&info->lock, flags); 451800d8c63SKirill A. Shutemov info->alloced -= pages; 452800d8c63SKirill A. Shutemov shmem_recalc_inode(inode); 4534595ef88SKirill A. Shutemov spin_unlock_irqrestore(&info->lock, flags); 454800d8c63SKirill A. Shutemov 4550f079694SMike Rapoport shmem_inode_unacct_blocks(inode, pages); 456800d8c63SKirill A. Shutemov } 457800d8c63SKirill A. Shutemov 4587a5d0fbbSHugh Dickins /* 45962f945b6SMatthew Wilcox * Replace item expected in xarray by a new item, while holding xa_lock. 4607a5d0fbbSHugh Dickins */ 46162f945b6SMatthew Wilcox static int shmem_replace_entry(struct address_space *mapping, 4627a5d0fbbSHugh Dickins pgoff_t index, void *expected, void *replacement) 4637a5d0fbbSHugh Dickins { 46462f945b6SMatthew Wilcox XA_STATE(xas, &mapping->i_pages, index); 4656dbaf22cSJohannes Weiner void *item; 4667a5d0fbbSHugh Dickins 4677a5d0fbbSHugh Dickins VM_BUG_ON(!expected); 4686dbaf22cSJohannes Weiner VM_BUG_ON(!replacement); 46962f945b6SMatthew Wilcox item = xas_load(&xas); 4707a5d0fbbSHugh Dickins if (item != expected) 4717a5d0fbbSHugh Dickins return -ENOENT; 47262f945b6SMatthew Wilcox xas_store(&xas, replacement); 4737a5d0fbbSHugh Dickins return 0; 4747a5d0fbbSHugh Dickins } 4757a5d0fbbSHugh Dickins 4767a5d0fbbSHugh Dickins /* 477d1899228SHugh Dickins * Sometimes, before we decide whether to proceed or to fail, we must check 478d1899228SHugh Dickins * that an entry was not already brought back from swap by a racing thread. 479d1899228SHugh Dickins * 480d1899228SHugh Dickins * Checking page is not enough: by the time a SwapCache page is locked, it 481d1899228SHugh Dickins * might be reused, and again be SwapCache, using the same swap as before. 482d1899228SHugh Dickins */ 483d1899228SHugh Dickins static bool shmem_confirm_swap(struct address_space *mapping, 484d1899228SHugh Dickins pgoff_t index, swp_entry_t swap) 485d1899228SHugh Dickins { 486a12831bfSMatthew Wilcox return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); 487d1899228SHugh Dickins } 488d1899228SHugh Dickins 489d1899228SHugh Dickins /* 4905a6e75f8SKirill A. Shutemov * Definitions for "huge tmpfs": tmpfs mounted with the huge= option 4915a6e75f8SKirill A. Shutemov * 4925a6e75f8SKirill A. Shutemov * SHMEM_HUGE_NEVER: 4935a6e75f8SKirill A. Shutemov * disables huge pages for the mount; 4945a6e75f8SKirill A. Shutemov * SHMEM_HUGE_ALWAYS: 4955a6e75f8SKirill A. Shutemov * enables huge pages for the mount; 4965a6e75f8SKirill A. Shutemov * SHMEM_HUGE_WITHIN_SIZE: 4975a6e75f8SKirill A. Shutemov * only allocate huge pages if the page will be fully within i_size, 4985a6e75f8SKirill A. Shutemov * also respect fadvise()/madvise() hints; 4995a6e75f8SKirill A. Shutemov * SHMEM_HUGE_ADVISE: 5005a6e75f8SKirill A. Shutemov * only allocate huge pages if requested with fadvise()/madvise(); 5015a6e75f8SKirill A. Shutemov */ 5025a6e75f8SKirill A. Shutemov 5035a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_NEVER 0 5045a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_ALWAYS 1 5055a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_WITHIN_SIZE 2 5065a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_ADVISE 3 5075a6e75f8SKirill A. Shutemov 5085a6e75f8SKirill A. Shutemov /* 5095a6e75f8SKirill A. Shutemov * Special values. 5105a6e75f8SKirill A. Shutemov * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled: 5115a6e75f8SKirill A. Shutemov * 5125a6e75f8SKirill A. Shutemov * SHMEM_HUGE_DENY: 5135a6e75f8SKirill A. Shutemov * disables huge on shm_mnt and all mounts, for emergency use; 5145a6e75f8SKirill A. Shutemov * SHMEM_HUGE_FORCE: 5155a6e75f8SKirill A. Shutemov * enables huge on shm_mnt and all mounts, w/o needing option, for testing; 5165a6e75f8SKirill A. Shutemov * 5175a6e75f8SKirill A. Shutemov */ 5185a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_DENY (-1) 5195a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_FORCE (-2) 5205a6e75f8SKirill A. Shutemov 521396bcc52SMatthew Wilcox (Oracle) #ifdef CONFIG_TRANSPARENT_HUGEPAGE 5225a6e75f8SKirill A. Shutemov /* ifdef here to avoid bloating shmem.o when not necessary */ 5235a6e75f8SKirill A. Shutemov 5245e6e5a12SHugh Dickins static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; 5255a6e75f8SKirill A. Shutemov 5262cf13384SDavid Stevens bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, 5272cf13384SDavid Stevens struct mm_struct *mm, unsigned long vm_flags) 528c852023eSHugh Dickins { 529c852023eSHugh Dickins loff_t i_size; 530c852023eSHugh Dickins 531f7cd16a5SXavier Roche if (!S_ISREG(inode->i_mode)) 532f7cd16a5SXavier Roche return false; 5332cf13384SDavid Stevens if (mm && ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &mm->flags))) 534c852023eSHugh Dickins return false; 5357c6c6cc4SZach O'Keefe if (shmem_huge == SHMEM_HUGE_DENY) 5367c6c6cc4SZach O'Keefe return false; 5373de0c269SZach O'Keefe if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE) 5383de0c269SZach O'Keefe return true; 5395e6e5a12SHugh Dickins 5405e6e5a12SHugh Dickins switch (SHMEM_SB(inode->i_sb)->huge) { 541c852023eSHugh Dickins case SHMEM_HUGE_ALWAYS: 542c852023eSHugh Dickins return true; 543c852023eSHugh Dickins case SHMEM_HUGE_WITHIN_SIZE: 544de6ee659SLiu Yuntao index = round_up(index + 1, HPAGE_PMD_NR); 545c852023eSHugh Dickins i_size = round_up(i_size_read(inode), PAGE_SIZE); 546de6ee659SLiu Yuntao if (i_size >> PAGE_SHIFT >= index) 547c852023eSHugh Dickins return true; 548c852023eSHugh Dickins fallthrough; 549c852023eSHugh Dickins case SHMEM_HUGE_ADVISE: 5502cf13384SDavid Stevens if (mm && (vm_flags & VM_HUGEPAGE)) 5515e6e5a12SHugh Dickins return true; 5525e6e5a12SHugh Dickins fallthrough; 553c852023eSHugh Dickins default: 554c852023eSHugh Dickins return false; 555c852023eSHugh Dickins } 556c852023eSHugh Dickins } 5575a6e75f8SKirill A. Shutemov 558e5f2249aSArnd Bergmann #if defined(CONFIG_SYSFS) 5595a6e75f8SKirill A. Shutemov static int shmem_parse_huge(const char *str) 5605a6e75f8SKirill A. Shutemov { 5615a6e75f8SKirill A. Shutemov if (!strcmp(str, "never")) 5625a6e75f8SKirill A. Shutemov return SHMEM_HUGE_NEVER; 5635a6e75f8SKirill A. Shutemov if (!strcmp(str, "always")) 5645a6e75f8SKirill A. Shutemov return SHMEM_HUGE_ALWAYS; 5655a6e75f8SKirill A. Shutemov if (!strcmp(str, "within_size")) 5665a6e75f8SKirill A. Shutemov return SHMEM_HUGE_WITHIN_SIZE; 5675a6e75f8SKirill A. Shutemov if (!strcmp(str, "advise")) 5685a6e75f8SKirill A. Shutemov return SHMEM_HUGE_ADVISE; 5695a6e75f8SKirill A. Shutemov if (!strcmp(str, "deny")) 5705a6e75f8SKirill A. Shutemov return SHMEM_HUGE_DENY; 5715a6e75f8SKirill A. Shutemov if (!strcmp(str, "force")) 5725a6e75f8SKirill A. Shutemov return SHMEM_HUGE_FORCE; 5735a6e75f8SKirill A. Shutemov return -EINVAL; 5745a6e75f8SKirill A. Shutemov } 575e5f2249aSArnd Bergmann #endif 5765a6e75f8SKirill A. Shutemov 577e5f2249aSArnd Bergmann #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) 5785a6e75f8SKirill A. Shutemov static const char *shmem_format_huge(int huge) 5795a6e75f8SKirill A. Shutemov { 5805a6e75f8SKirill A. Shutemov switch (huge) { 5815a6e75f8SKirill A. Shutemov case SHMEM_HUGE_NEVER: 5825a6e75f8SKirill A. Shutemov return "never"; 5835a6e75f8SKirill A. Shutemov case SHMEM_HUGE_ALWAYS: 5845a6e75f8SKirill A. Shutemov return "always"; 5855a6e75f8SKirill A. Shutemov case SHMEM_HUGE_WITHIN_SIZE: 5865a6e75f8SKirill A. Shutemov return "within_size"; 5875a6e75f8SKirill A. Shutemov case SHMEM_HUGE_ADVISE: 5885a6e75f8SKirill A. Shutemov return "advise"; 5895a6e75f8SKirill A. Shutemov case SHMEM_HUGE_DENY: 5905a6e75f8SKirill A. Shutemov return "deny"; 5915a6e75f8SKirill A. Shutemov case SHMEM_HUGE_FORCE: 5925a6e75f8SKirill A. Shutemov return "force"; 5935a6e75f8SKirill A. Shutemov default: 5945a6e75f8SKirill A. Shutemov VM_BUG_ON(1); 5955a6e75f8SKirill A. Shutemov return "bad_val"; 5965a6e75f8SKirill A. Shutemov } 5975a6e75f8SKirill A. Shutemov } 598f1f5929cSJérémy Lefaure #endif 5995a6e75f8SKirill A. Shutemov 600779750d2SKirill A. Shutemov static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, 601779750d2SKirill A. Shutemov struct shrink_control *sc, unsigned long nr_to_split) 602779750d2SKirill A. Shutemov { 603779750d2SKirill A. Shutemov LIST_HEAD(list), *pos, *next; 604253fd0f0SKirill A. Shutemov LIST_HEAD(to_remove); 605779750d2SKirill A. Shutemov struct inode *inode; 606779750d2SKirill A. Shutemov struct shmem_inode_info *info; 60705624571SMatthew Wilcox (Oracle) struct folio *folio; 608779750d2SKirill A. Shutemov unsigned long batch = sc ? sc->nr_to_scan : 128; 60962c9827cSGang Li int split = 0; 610779750d2SKirill A. Shutemov 611779750d2SKirill A. Shutemov if (list_empty(&sbinfo->shrinklist)) 612779750d2SKirill A. Shutemov return SHRINK_STOP; 613779750d2SKirill A. Shutemov 614779750d2SKirill A. Shutemov spin_lock(&sbinfo->shrinklist_lock); 615779750d2SKirill A. Shutemov list_for_each_safe(pos, next, &sbinfo->shrinklist) { 616779750d2SKirill A. Shutemov info = list_entry(pos, struct shmem_inode_info, shrinklist); 617779750d2SKirill A. Shutemov 618779750d2SKirill A. Shutemov /* pin the inode */ 619779750d2SKirill A. Shutemov inode = igrab(&info->vfs_inode); 620779750d2SKirill A. Shutemov 621779750d2SKirill A. Shutemov /* inode is about to be evicted */ 622779750d2SKirill A. Shutemov if (!inode) { 623779750d2SKirill A. Shutemov list_del_init(&info->shrinklist); 624779750d2SKirill A. Shutemov goto next; 625779750d2SKirill A. Shutemov } 626779750d2SKirill A. Shutemov 627779750d2SKirill A. Shutemov /* Check if there's anything to gain */ 628779750d2SKirill A. Shutemov if (round_up(inode->i_size, PAGE_SIZE) == 629779750d2SKirill A. Shutemov round_up(inode->i_size, HPAGE_PMD_SIZE)) { 630253fd0f0SKirill A. Shutemov list_move(&info->shrinklist, &to_remove); 631779750d2SKirill A. Shutemov goto next; 632779750d2SKirill A. Shutemov } 633779750d2SKirill A. Shutemov 634779750d2SKirill A. Shutemov list_move(&info->shrinklist, &list); 635779750d2SKirill A. Shutemov next: 63662c9827cSGang Li sbinfo->shrinklist_len--; 637779750d2SKirill A. Shutemov if (!--batch) 638779750d2SKirill A. Shutemov break; 639779750d2SKirill A. Shutemov } 640779750d2SKirill A. Shutemov spin_unlock(&sbinfo->shrinklist_lock); 641779750d2SKirill A. Shutemov 642253fd0f0SKirill A. Shutemov list_for_each_safe(pos, next, &to_remove) { 643253fd0f0SKirill A. Shutemov info = list_entry(pos, struct shmem_inode_info, shrinklist); 644253fd0f0SKirill A. Shutemov inode = &info->vfs_inode; 645253fd0f0SKirill A. Shutemov list_del_init(&info->shrinklist); 646253fd0f0SKirill A. Shutemov iput(inode); 647253fd0f0SKirill A. Shutemov } 648253fd0f0SKirill A. Shutemov 649779750d2SKirill A. Shutemov list_for_each_safe(pos, next, &list) { 650779750d2SKirill A. Shutemov int ret; 65105624571SMatthew Wilcox (Oracle) pgoff_t index; 652779750d2SKirill A. Shutemov 653779750d2SKirill A. Shutemov info = list_entry(pos, struct shmem_inode_info, shrinklist); 654779750d2SKirill A. Shutemov inode = &info->vfs_inode; 655779750d2SKirill A. Shutemov 656b3cd54b2SKirill A. Shutemov if (nr_to_split && split >= nr_to_split) 65762c9827cSGang Li goto move_back; 658779750d2SKirill A. Shutemov 65905624571SMatthew Wilcox (Oracle) index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT; 66005624571SMatthew Wilcox (Oracle) folio = filemap_get_folio(inode->i_mapping, index); 66166dabbb6SChristoph Hellwig if (IS_ERR(folio)) 662779750d2SKirill A. Shutemov goto drop; 663779750d2SKirill A. Shutemov 664b3cd54b2SKirill A. Shutemov /* No huge page at the end of the file: nothing to split */ 66505624571SMatthew Wilcox (Oracle) if (!folio_test_large(folio)) { 66605624571SMatthew Wilcox (Oracle) folio_put(folio); 667779750d2SKirill A. Shutemov goto drop; 668779750d2SKirill A. Shutemov } 669779750d2SKirill A. Shutemov 670b3cd54b2SKirill A. Shutemov /* 67162c9827cSGang Li * Move the inode on the list back to shrinklist if we failed 67262c9827cSGang Li * to lock the page at this time. 673b3cd54b2SKirill A. Shutemov * 674b3cd54b2SKirill A. Shutemov * Waiting for the lock may lead to deadlock in the 675b3cd54b2SKirill A. Shutemov * reclaim path. 676b3cd54b2SKirill A. Shutemov */ 67705624571SMatthew Wilcox (Oracle) if (!folio_trylock(folio)) { 67805624571SMatthew Wilcox (Oracle) folio_put(folio); 67962c9827cSGang Li goto move_back; 680b3cd54b2SKirill A. Shutemov } 681b3cd54b2SKirill A. Shutemov 682d788f5b3SMatthew Wilcox (Oracle) ret = split_folio(folio); 68305624571SMatthew Wilcox (Oracle) folio_unlock(folio); 68405624571SMatthew Wilcox (Oracle) folio_put(folio); 685779750d2SKirill A. Shutemov 68662c9827cSGang Li /* If split failed move the inode on the list back to shrinklist */ 687b3cd54b2SKirill A. Shutemov if (ret) 68862c9827cSGang Li goto move_back; 689779750d2SKirill A. Shutemov 690779750d2SKirill A. Shutemov split++; 691779750d2SKirill A. Shutemov drop: 692779750d2SKirill A. Shutemov list_del_init(&info->shrinklist); 69362c9827cSGang Li goto put; 69462c9827cSGang Li move_back: 69562c9827cSGang Li /* 69662c9827cSGang Li * Make sure the inode is either on the global list or deleted 69762c9827cSGang Li * from any local list before iput() since it could be deleted 69862c9827cSGang Li * in another thread once we put the inode (then the local list 69962c9827cSGang Li * is corrupted). 70062c9827cSGang Li */ 70162c9827cSGang Li spin_lock(&sbinfo->shrinklist_lock); 70262c9827cSGang Li list_move(&info->shrinklist, &sbinfo->shrinklist); 70362c9827cSGang Li sbinfo->shrinklist_len++; 70462c9827cSGang Li spin_unlock(&sbinfo->shrinklist_lock); 70562c9827cSGang Li put: 706779750d2SKirill A. Shutemov iput(inode); 707779750d2SKirill A. Shutemov } 708779750d2SKirill A. Shutemov 709779750d2SKirill A. Shutemov return split; 710779750d2SKirill A. Shutemov } 711779750d2SKirill A. Shutemov 712779750d2SKirill A. Shutemov static long shmem_unused_huge_scan(struct super_block *sb, 713779750d2SKirill A. Shutemov struct shrink_control *sc) 714779750d2SKirill A. Shutemov { 715779750d2SKirill A. Shutemov struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 716779750d2SKirill A. Shutemov 717779750d2SKirill A. Shutemov if (!READ_ONCE(sbinfo->shrinklist_len)) 718779750d2SKirill A. Shutemov return SHRINK_STOP; 719779750d2SKirill A. Shutemov 720779750d2SKirill A. Shutemov return shmem_unused_huge_shrink(sbinfo, sc, 0); 721779750d2SKirill A. Shutemov } 722779750d2SKirill A. Shutemov 723779750d2SKirill A. Shutemov static long shmem_unused_huge_count(struct super_block *sb, 724779750d2SKirill A. Shutemov struct shrink_control *sc) 725779750d2SKirill A. Shutemov { 726779750d2SKirill A. Shutemov struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 727779750d2SKirill A. Shutemov return READ_ONCE(sbinfo->shrinklist_len); 728779750d2SKirill A. Shutemov } 729396bcc52SMatthew Wilcox (Oracle) #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 7305a6e75f8SKirill A. Shutemov 7315a6e75f8SKirill A. Shutemov #define shmem_huge SHMEM_HUGE_DENY 7325a6e75f8SKirill A. Shutemov 7332cf13384SDavid Stevens bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, 7342cf13384SDavid Stevens struct mm_struct *mm, unsigned long vm_flags) 7355e6e5a12SHugh Dickins { 7365e6e5a12SHugh Dickins return false; 7375e6e5a12SHugh Dickins } 7385e6e5a12SHugh Dickins 739779750d2SKirill A. Shutemov static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, 740779750d2SKirill A. Shutemov struct shrink_control *sc, unsigned long nr_to_split) 741779750d2SKirill A. Shutemov { 742779750d2SKirill A. Shutemov return 0; 743779750d2SKirill A. Shutemov } 744396bcc52SMatthew Wilcox (Oracle) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 7455a6e75f8SKirill A. Shutemov 7465a6e75f8SKirill A. Shutemov /* 7472bb876b5SMatthew Wilcox (Oracle) * Like filemap_add_folio, but error if expected item has gone. 74846f65ec1SHugh Dickins */ 749b7dd44a1SMatthew Wilcox (Oracle) static int shmem_add_to_page_cache(struct folio *folio, 75046f65ec1SHugh Dickins struct address_space *mapping, 7513fea5a49SJohannes Weiner pgoff_t index, void *expected, gfp_t gfp, 7523fea5a49SJohannes Weiner struct mm_struct *charge_mm) 75346f65ec1SHugh Dickins { 754b7dd44a1SMatthew Wilcox (Oracle) XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); 755b7dd44a1SMatthew Wilcox (Oracle) long nr = folio_nr_pages(folio); 7563fea5a49SJohannes Weiner int error; 75746f65ec1SHugh Dickins 758b7dd44a1SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(index != round_down(index, nr), folio); 759b7dd44a1SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 760b7dd44a1SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); 761b7dd44a1SMatthew Wilcox (Oracle) VM_BUG_ON(expected && folio_test_large(folio)); 76246f65ec1SHugh Dickins 763b7dd44a1SMatthew Wilcox (Oracle) folio_ref_add(folio, nr); 764b7dd44a1SMatthew Wilcox (Oracle) folio->mapping = mapping; 765b7dd44a1SMatthew Wilcox (Oracle) folio->index = index; 76646f65ec1SHugh Dickins 767b7dd44a1SMatthew Wilcox (Oracle) if (!folio_test_swapcache(folio)) { 768b7dd44a1SMatthew Wilcox (Oracle) error = mem_cgroup_charge(folio, charge_mm, gfp); 7693fea5a49SJohannes Weiner if (error) { 770b7dd44a1SMatthew Wilcox (Oracle) if (folio_test_pmd_mappable(folio)) { 7713fea5a49SJohannes Weiner count_vm_event(THP_FILE_FALLBACK); 7723fea5a49SJohannes Weiner count_vm_event(THP_FILE_FALLBACK_CHARGE); 7733fea5a49SJohannes Weiner } 7743fea5a49SJohannes Weiner goto error; 7753fea5a49SJohannes Weiner } 7764c6355b2SJohannes Weiner } 777b7dd44a1SMatthew Wilcox (Oracle) folio_throttle_swaprate(folio, gfp); 7783fea5a49SJohannes Weiner 779552446a4SMatthew Wilcox do { 780552446a4SMatthew Wilcox xas_lock_irq(&xas); 7816b24ca4aSMatthew Wilcox (Oracle) if (expected != xas_find_conflict(&xas)) { 782552446a4SMatthew Wilcox xas_set_err(&xas, -EEXIST); 7836b24ca4aSMatthew Wilcox (Oracle) goto unlock; 7846b24ca4aSMatthew Wilcox (Oracle) } 7856b24ca4aSMatthew Wilcox (Oracle) if (expected && xas_find_conflict(&xas)) { 7866b24ca4aSMatthew Wilcox (Oracle) xas_set_err(&xas, -EEXIST); 7876b24ca4aSMatthew Wilcox (Oracle) goto unlock; 7886b24ca4aSMatthew Wilcox (Oracle) } 789b7dd44a1SMatthew Wilcox (Oracle) xas_store(&xas, folio); 790552446a4SMatthew Wilcox if (xas_error(&xas)) 791552446a4SMatthew Wilcox goto unlock; 792b7dd44a1SMatthew Wilcox (Oracle) if (folio_test_pmd_mappable(folio)) { 793800d8c63SKirill A. Shutemov count_vm_event(THP_FILE_ALLOC); 794b7dd44a1SMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); 795552446a4SMatthew Wilcox } 796552446a4SMatthew Wilcox mapping->nrpages += nr; 797b7dd44a1SMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); 798b7dd44a1SMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(folio, NR_SHMEM, nr); 799552446a4SMatthew Wilcox unlock: 800552446a4SMatthew Wilcox xas_unlock_irq(&xas); 801552446a4SMatthew Wilcox } while (xas_nomem(&xas, gfp)); 802552446a4SMatthew Wilcox 803552446a4SMatthew Wilcox if (xas_error(&xas)) { 8043fea5a49SJohannes Weiner error = xas_error(&xas); 8053fea5a49SJohannes Weiner goto error; 80646f65ec1SHugh Dickins } 807552446a4SMatthew Wilcox 808552446a4SMatthew Wilcox return 0; 8093fea5a49SJohannes Weiner error: 810b7dd44a1SMatthew Wilcox (Oracle) folio->mapping = NULL; 811b7dd44a1SMatthew Wilcox (Oracle) folio_ref_sub(folio, nr); 8123fea5a49SJohannes Weiner return error; 81346f65ec1SHugh Dickins } 81446f65ec1SHugh Dickins 81546f65ec1SHugh Dickins /* 8164cd400fdSMatthew Wilcox (Oracle) * Like delete_from_page_cache, but substitutes swap for @folio. 8176922c0c7SHugh Dickins */ 8184cd400fdSMatthew Wilcox (Oracle) static void shmem_delete_from_page_cache(struct folio *folio, void *radswap) 8196922c0c7SHugh Dickins { 8204cd400fdSMatthew Wilcox (Oracle) struct address_space *mapping = folio->mapping; 8214cd400fdSMatthew Wilcox (Oracle) long nr = folio_nr_pages(folio); 8226922c0c7SHugh Dickins int error; 8236922c0c7SHugh Dickins 824b93b0163SMatthew Wilcox xa_lock_irq(&mapping->i_pages); 8254cd400fdSMatthew Wilcox (Oracle) error = shmem_replace_entry(mapping, folio->index, folio, radswap); 8264cd400fdSMatthew Wilcox (Oracle) folio->mapping = NULL; 8274cd400fdSMatthew Wilcox (Oracle) mapping->nrpages -= nr; 8284cd400fdSMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); 8294cd400fdSMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); 830b93b0163SMatthew Wilcox xa_unlock_irq(&mapping->i_pages); 8314cd400fdSMatthew Wilcox (Oracle) folio_put(folio); 8326922c0c7SHugh Dickins BUG_ON(error); 8336922c0c7SHugh Dickins } 8346922c0c7SHugh Dickins 8356922c0c7SHugh Dickins /* 836c121d3bbSMatthew Wilcox * Remove swap entry from page cache, free the swap and its page cache. 8377a5d0fbbSHugh Dickins */ 8387a5d0fbbSHugh Dickins static int shmem_free_swap(struct address_space *mapping, 8397a5d0fbbSHugh Dickins pgoff_t index, void *radswap) 8407a5d0fbbSHugh Dickins { 8416dbaf22cSJohannes Weiner void *old; 8427a5d0fbbSHugh Dickins 84355f3f7eaSMatthew Wilcox old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); 8446dbaf22cSJohannes Weiner if (old != radswap) 8456dbaf22cSJohannes Weiner return -ENOENT; 8467a5d0fbbSHugh Dickins free_swap_and_cache(radix_to_swp_entry(radswap)); 8476dbaf22cSJohannes Weiner return 0; 8487a5d0fbbSHugh Dickins } 8497a5d0fbbSHugh Dickins 8507a5d0fbbSHugh Dickins /* 8516a15a370SVlastimil Babka * Determine (in bytes) how many of the shmem object's pages mapped by the 85248131e03SVlastimil Babka * given offsets are swapped out. 8536a15a370SVlastimil Babka * 8549608703eSJan Kara * This is safe to call without i_rwsem or the i_pages lock thanks to RCU, 8556a15a370SVlastimil Babka * as long as the inode doesn't go away and racy results are not a problem. 8566a15a370SVlastimil Babka */ 85748131e03SVlastimil Babka unsigned long shmem_partial_swap_usage(struct address_space *mapping, 85848131e03SVlastimil Babka pgoff_t start, pgoff_t end) 8596a15a370SVlastimil Babka { 8607ae3424fSMatthew Wilcox XA_STATE(xas, &mapping->i_pages, start); 8616a15a370SVlastimil Babka struct page *page; 86248131e03SVlastimil Babka unsigned long swapped = 0; 8636a15a370SVlastimil Babka 8646a15a370SVlastimil Babka rcu_read_lock(); 8657ae3424fSMatthew Wilcox xas_for_each(&xas, page, end - 1) { 8667ae3424fSMatthew Wilcox if (xas_retry(&xas, page)) 8672cf938aaSMatthew Wilcox continue; 8683159f943SMatthew Wilcox if (xa_is_value(page)) 8696a15a370SVlastimil Babka swapped++; 8706a15a370SVlastimil Babka 8716a15a370SVlastimil Babka if (need_resched()) { 8727ae3424fSMatthew Wilcox xas_pause(&xas); 8736a15a370SVlastimil Babka cond_resched_rcu(); 8746a15a370SVlastimil Babka } 8756a15a370SVlastimil Babka } 8766a15a370SVlastimil Babka 8776a15a370SVlastimil Babka rcu_read_unlock(); 8786a15a370SVlastimil Babka 8796a15a370SVlastimil Babka return swapped << PAGE_SHIFT; 8806a15a370SVlastimil Babka } 8816a15a370SVlastimil Babka 8826a15a370SVlastimil Babka /* 88348131e03SVlastimil Babka * Determine (in bytes) how many of the shmem object's pages mapped by the 88448131e03SVlastimil Babka * given vma is swapped out. 88548131e03SVlastimil Babka * 8869608703eSJan Kara * This is safe to call without i_rwsem or the i_pages lock thanks to RCU, 88748131e03SVlastimil Babka * as long as the inode doesn't go away and racy results are not a problem. 88848131e03SVlastimil Babka */ 88948131e03SVlastimil Babka unsigned long shmem_swap_usage(struct vm_area_struct *vma) 89048131e03SVlastimil Babka { 89148131e03SVlastimil Babka struct inode *inode = file_inode(vma->vm_file); 89248131e03SVlastimil Babka struct shmem_inode_info *info = SHMEM_I(inode); 89348131e03SVlastimil Babka struct address_space *mapping = inode->i_mapping; 89448131e03SVlastimil Babka unsigned long swapped; 89548131e03SVlastimil Babka 89648131e03SVlastimil Babka /* Be careful as we don't hold info->lock */ 89748131e03SVlastimil Babka swapped = READ_ONCE(info->swapped); 89848131e03SVlastimil Babka 89948131e03SVlastimil Babka /* 90048131e03SVlastimil Babka * The easier cases are when the shmem object has nothing in swap, or 90148131e03SVlastimil Babka * the vma maps it whole. Then we can simply use the stats that we 90248131e03SVlastimil Babka * already track. 90348131e03SVlastimil Babka */ 90448131e03SVlastimil Babka if (!swapped) 90548131e03SVlastimil Babka return 0; 90648131e03SVlastimil Babka 90748131e03SVlastimil Babka if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size) 90848131e03SVlastimil Babka return swapped << PAGE_SHIFT; 90948131e03SVlastimil Babka 91048131e03SVlastimil Babka /* Here comes the more involved part */ 91102399c88SPeter Xu return shmem_partial_swap_usage(mapping, vma->vm_pgoff, 91202399c88SPeter Xu vma->vm_pgoff + vma_pages(vma)); 91348131e03SVlastimil Babka } 91448131e03SVlastimil Babka 91548131e03SVlastimil Babka /* 91624513264SHugh Dickins * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. 91724513264SHugh Dickins */ 91824513264SHugh Dickins void shmem_unlock_mapping(struct address_space *mapping) 91924513264SHugh Dickins { 920105c988fSMatthew Wilcox (Oracle) struct folio_batch fbatch; 92124513264SHugh Dickins pgoff_t index = 0; 92224513264SHugh Dickins 923105c988fSMatthew Wilcox (Oracle) folio_batch_init(&fbatch); 92424513264SHugh Dickins /* 92524513264SHugh Dickins * Minor point, but we might as well stop if someone else SHM_LOCKs it. 92624513264SHugh Dickins */ 927105c988fSMatthew Wilcox (Oracle) while (!mapping_unevictable(mapping) && 928105c988fSMatthew Wilcox (Oracle) filemap_get_folios(mapping, &index, ~0UL, &fbatch)) { 929105c988fSMatthew Wilcox (Oracle) check_move_unevictable_folios(&fbatch); 930105c988fSMatthew Wilcox (Oracle) folio_batch_release(&fbatch); 93124513264SHugh Dickins cond_resched(); 93224513264SHugh Dickins } 9337a5d0fbbSHugh Dickins } 9347a5d0fbbSHugh Dickins 935b9a8a419SMatthew Wilcox (Oracle) static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) 93671725ed1SHugh Dickins { 937b9a8a419SMatthew Wilcox (Oracle) struct folio *folio; 93871725ed1SHugh Dickins 939b9a8a419SMatthew Wilcox (Oracle) /* 940a7f5862cSMatthew Wilcox (Oracle) * At first avoid shmem_get_folio(,,,SGP_READ): that fails 94181914affSHugh Dickins * beyond i_size, and reports fallocated folios as holes. 942b9a8a419SMatthew Wilcox (Oracle) */ 94381914affSHugh Dickins folio = filemap_get_entry(inode->i_mapping, index); 94481914affSHugh Dickins if (!folio) 945b9a8a419SMatthew Wilcox (Oracle) return folio; 94681914affSHugh Dickins if (!xa_is_value(folio)) { 94781914affSHugh Dickins folio_lock(folio); 94881914affSHugh Dickins if (folio->mapping == inode->i_mapping) 94981914affSHugh Dickins return folio; 95081914affSHugh Dickins /* The folio has been swapped out */ 95181914affSHugh Dickins folio_unlock(folio); 95281914affSHugh Dickins folio_put(folio); 95381914affSHugh Dickins } 954b9a8a419SMatthew Wilcox (Oracle) /* 95581914affSHugh Dickins * But read a folio back from swap if any of it is within i_size 956b9a8a419SMatthew Wilcox (Oracle) * (although in some cases this is just a waste of time). 957b9a8a419SMatthew Wilcox (Oracle) */ 958a7f5862cSMatthew Wilcox (Oracle) folio = NULL; 959a7f5862cSMatthew Wilcox (Oracle) shmem_get_folio(inode, index, &folio, SGP_READ); 960a7f5862cSMatthew Wilcox (Oracle) return folio; 96171725ed1SHugh Dickins } 96271725ed1SHugh Dickins 96371725ed1SHugh Dickins /* 9647f4446eeSMatthew Wilcox * Remove range of pages and swap entries from page cache, and free them. 9651635f6a7SHugh Dickins * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. 9667a5d0fbbSHugh Dickins */ 9671635f6a7SHugh Dickins static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, 9681635f6a7SHugh Dickins bool unfalloc) 9691da177e4SLinus Torvalds { 970285b2c4fSHugh Dickins struct address_space *mapping = inode->i_mapping; 9711da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 97209cbfeafSKirill A. Shutemov pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 97309cbfeafSKirill A. Shutemov pgoff_t end = (lend + 1) >> PAGE_SHIFT; 9740e499ed3SMatthew Wilcox (Oracle) struct folio_batch fbatch; 9757a5d0fbbSHugh Dickins pgoff_t indices[PAGEVEC_SIZE]; 976b9a8a419SMatthew Wilcox (Oracle) struct folio *folio; 977b9a8a419SMatthew Wilcox (Oracle) bool same_folio; 9787a5d0fbbSHugh Dickins long nr_swaps_freed = 0; 979285b2c4fSHugh Dickins pgoff_t index; 980bda97eabSHugh Dickins int i; 9811da177e4SLinus Torvalds 98283e4fa9cSHugh Dickins if (lend == -1) 98383e4fa9cSHugh Dickins end = -1; /* unsigned, so actually very big */ 984bda97eabSHugh Dickins 985d144bf62SHugh Dickins if (info->fallocend > start && info->fallocend <= end && !unfalloc) 986d144bf62SHugh Dickins info->fallocend = start; 987d144bf62SHugh Dickins 98851dcbdacSMatthew Wilcox (Oracle) folio_batch_init(&fbatch); 989bda97eabSHugh Dickins index = start; 9903392ca12SVishal Moola (Oracle) while (index < end && find_lock_entries(mapping, &index, end - 1, 99151dcbdacSMatthew Wilcox (Oracle) &fbatch, indices)) { 99251dcbdacSMatthew Wilcox (Oracle) for (i = 0; i < folio_batch_count(&fbatch); i++) { 993b9a8a419SMatthew Wilcox (Oracle) folio = fbatch.folios[i]; 994bda97eabSHugh Dickins 9957b774aabSMatthew Wilcox (Oracle) if (xa_is_value(folio)) { 9961635f6a7SHugh Dickins if (unfalloc) 9971635f6a7SHugh Dickins continue; 9987a5d0fbbSHugh Dickins nr_swaps_freed += !shmem_free_swap(mapping, 9993392ca12SVishal Moola (Oracle) indices[i], folio); 10007a5d0fbbSHugh Dickins continue; 10017a5d0fbbSHugh Dickins } 10027a5d0fbbSHugh Dickins 10037b774aabSMatthew Wilcox (Oracle) if (!unfalloc || !folio_test_uptodate(folio)) 10041e84a3d9SMatthew Wilcox (Oracle) truncate_inode_folio(mapping, folio); 10057b774aabSMatthew Wilcox (Oracle) folio_unlock(folio); 1006bda97eabSHugh Dickins } 100751dcbdacSMatthew Wilcox (Oracle) folio_batch_remove_exceptionals(&fbatch); 100851dcbdacSMatthew Wilcox (Oracle) folio_batch_release(&fbatch); 1009bda97eabSHugh Dickins cond_resched(); 1010bda97eabSHugh Dickins } 1011bda97eabSHugh Dickins 101244bcabd7SHugh Dickins /* 101344bcabd7SHugh Dickins * When undoing a failed fallocate, we want none of the partial folio 101444bcabd7SHugh Dickins * zeroing and splitting below, but shall want to truncate the whole 101544bcabd7SHugh Dickins * folio when !uptodate indicates that it was added by this fallocate, 101644bcabd7SHugh Dickins * even when [lstart, lend] covers only a part of the folio. 101744bcabd7SHugh Dickins */ 101844bcabd7SHugh Dickins if (unfalloc) 101944bcabd7SHugh Dickins goto whole_folios; 102044bcabd7SHugh Dickins 1021b9a8a419SMatthew Wilcox (Oracle) same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 1022b9a8a419SMatthew Wilcox (Oracle) folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); 1023b9a8a419SMatthew Wilcox (Oracle) if (folio) { 1024b9a8a419SMatthew Wilcox (Oracle) same_folio = lend < folio_pos(folio) + folio_size(folio); 1025b9a8a419SMatthew Wilcox (Oracle) folio_mark_dirty(folio); 1026b9a8a419SMatthew Wilcox (Oracle) if (!truncate_inode_partial_folio(folio, lstart, lend)) { 1027b9a8a419SMatthew Wilcox (Oracle) start = folio->index + folio_nr_pages(folio); 1028b9a8a419SMatthew Wilcox (Oracle) if (same_folio) 1029b9a8a419SMatthew Wilcox (Oracle) end = folio->index; 103083e4fa9cSHugh Dickins } 1031b9a8a419SMatthew Wilcox (Oracle) folio_unlock(folio); 1032b9a8a419SMatthew Wilcox (Oracle) folio_put(folio); 1033b9a8a419SMatthew Wilcox (Oracle) folio = NULL; 1034bda97eabSHugh Dickins } 1035b9a8a419SMatthew Wilcox (Oracle) 1036b9a8a419SMatthew Wilcox (Oracle) if (!same_folio) 1037b9a8a419SMatthew Wilcox (Oracle) folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT); 1038b9a8a419SMatthew Wilcox (Oracle) if (folio) { 1039b9a8a419SMatthew Wilcox (Oracle) folio_mark_dirty(folio); 1040b9a8a419SMatthew Wilcox (Oracle) if (!truncate_inode_partial_folio(folio, lstart, lend)) 1041b9a8a419SMatthew Wilcox (Oracle) end = folio->index; 1042b9a8a419SMatthew Wilcox (Oracle) folio_unlock(folio); 1043b9a8a419SMatthew Wilcox (Oracle) folio_put(folio); 1044bda97eabSHugh Dickins } 1045bda97eabSHugh Dickins 104644bcabd7SHugh Dickins whole_folios: 104744bcabd7SHugh Dickins 1048bda97eabSHugh Dickins index = start; 1049b1a36650SHugh Dickins while (index < end) { 1050bda97eabSHugh Dickins cond_resched(); 10510cd6144aSJohannes Weiner 10529fb6beeaSVishal Moola (Oracle) if (!find_get_entries(mapping, &index, end - 1, &fbatch, 1053cf2039afSMatthew Wilcox (Oracle) indices)) { 1054b1a36650SHugh Dickins /* If all gone or hole-punch or unfalloc, we're done */ 1055b1a36650SHugh Dickins if (index == start || end != -1) 1056bda97eabSHugh Dickins break; 1057b1a36650SHugh Dickins /* But if truncating, restart to make sure all gone */ 1058bda97eabSHugh Dickins index = start; 1059bda97eabSHugh Dickins continue; 1060bda97eabSHugh Dickins } 10610e499ed3SMatthew Wilcox (Oracle) for (i = 0; i < folio_batch_count(&fbatch); i++) { 1062b9a8a419SMatthew Wilcox (Oracle) folio = fbatch.folios[i]; 1063bda97eabSHugh Dickins 10640e499ed3SMatthew Wilcox (Oracle) if (xa_is_value(folio)) { 10651635f6a7SHugh Dickins if (unfalloc) 10661635f6a7SHugh Dickins continue; 10679fb6beeaSVishal Moola (Oracle) if (shmem_free_swap(mapping, indices[i], folio)) { 1068b1a36650SHugh Dickins /* Swap was replaced by page: retry */ 10699fb6beeaSVishal Moola (Oracle) index = indices[i]; 1070b1a36650SHugh Dickins break; 1071b1a36650SHugh Dickins } 1072b1a36650SHugh Dickins nr_swaps_freed++; 10737a5d0fbbSHugh Dickins continue; 10747a5d0fbbSHugh Dickins } 10757a5d0fbbSHugh Dickins 10760e499ed3SMatthew Wilcox (Oracle) folio_lock(folio); 1077800d8c63SKirill A. Shutemov 10780e499ed3SMatthew Wilcox (Oracle) if (!unfalloc || !folio_test_uptodate(folio)) { 10790e499ed3SMatthew Wilcox (Oracle) if (folio_mapping(folio) != mapping) { 1080b1a36650SHugh Dickins /* Page was replaced by swap: retry */ 10810e499ed3SMatthew Wilcox (Oracle) folio_unlock(folio); 10829fb6beeaSVishal Moola (Oracle) index = indices[i]; 1083b1a36650SHugh Dickins break; 10847a5d0fbbSHugh Dickins } 10850e499ed3SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(folio_test_writeback(folio), 10860e499ed3SMatthew Wilcox (Oracle) folio); 10870e499ed3SMatthew Wilcox (Oracle) truncate_inode_folio(mapping, folio); 108871725ed1SHugh Dickins } 10890e499ed3SMatthew Wilcox (Oracle) folio_unlock(folio); 1090bda97eabSHugh Dickins } 10910e499ed3SMatthew Wilcox (Oracle) folio_batch_remove_exceptionals(&fbatch); 10920e499ed3SMatthew Wilcox (Oracle) folio_batch_release(&fbatch); 1093bda97eabSHugh Dickins } 109494c1e62dSHugh Dickins 10954595ef88SKirill A. Shutemov spin_lock_irq(&info->lock); 10967a5d0fbbSHugh Dickins info->swapped -= nr_swaps_freed; 10971da177e4SLinus Torvalds shmem_recalc_inode(inode); 10984595ef88SKirill A. Shutemov spin_unlock_irq(&info->lock); 10991635f6a7SHugh Dickins } 11001da177e4SLinus Torvalds 11011635f6a7SHugh Dickins void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 11021635f6a7SHugh Dickins { 11031635f6a7SHugh Dickins shmem_undo_range(inode, lstart, lend, false); 1104078cd827SDeepa Dinamani inode->i_ctime = inode->i_mtime = current_time(inode); 110536f05cabSJeff Layton inode_inc_iversion(inode); 11061da177e4SLinus Torvalds } 110794c1e62dSHugh Dickins EXPORT_SYMBOL_GPL(shmem_truncate_range); 11081da177e4SLinus Torvalds 1109b74d24f7SChristian Brauner static int shmem_getattr(struct mnt_idmap *idmap, 1110549c7297SChristian Brauner const struct path *path, struct kstat *stat, 1111a528d35eSDavid Howells u32 request_mask, unsigned int query_flags) 111244a30220SYu Zhao { 1113a528d35eSDavid Howells struct inode *inode = path->dentry->d_inode; 111444a30220SYu Zhao struct shmem_inode_info *info = SHMEM_I(inode); 111544a30220SYu Zhao 1116d0424c42SHugh Dickins if (info->alloced - info->swapped != inode->i_mapping->nrpages) { 11174595ef88SKirill A. Shutemov spin_lock_irq(&info->lock); 111844a30220SYu Zhao shmem_recalc_inode(inode); 11194595ef88SKirill A. Shutemov spin_unlock_irq(&info->lock); 1120d0424c42SHugh Dickins } 1121e408e695STheodore Ts'o if (info->fsflags & FS_APPEND_FL) 1122e408e695STheodore Ts'o stat->attributes |= STATX_ATTR_APPEND; 1123e408e695STheodore Ts'o if (info->fsflags & FS_IMMUTABLE_FL) 1124e408e695STheodore Ts'o stat->attributes |= STATX_ATTR_IMMUTABLE; 1125e408e695STheodore Ts'o if (info->fsflags & FS_NODUMP_FL) 1126e408e695STheodore Ts'o stat->attributes |= STATX_ATTR_NODUMP; 1127e408e695STheodore Ts'o stat->attributes_mask |= (STATX_ATTR_APPEND | 1128e408e695STheodore Ts'o STATX_ATTR_IMMUTABLE | 1129e408e695STheodore Ts'o STATX_ATTR_NODUMP); 11307a80e5b8SGiuseppe Scrivano generic_fillattr(idmap, inode, stat); 113189fdcd26SYang Shi 11322cf13384SDavid Stevens if (shmem_is_huge(inode, 0, false, NULL, 0)) 113389fdcd26SYang Shi stat->blksize = HPAGE_PMD_SIZE; 113489fdcd26SYang Shi 1135f7cd16a5SXavier Roche if (request_mask & STATX_BTIME) { 1136f7cd16a5SXavier Roche stat->result_mask |= STATX_BTIME; 1137f7cd16a5SXavier Roche stat->btime.tv_sec = info->i_crtime.tv_sec; 1138f7cd16a5SXavier Roche stat->btime.tv_nsec = info->i_crtime.tv_nsec; 1139f7cd16a5SXavier Roche } 1140f7cd16a5SXavier Roche 114144a30220SYu Zhao return 0; 114244a30220SYu Zhao } 114344a30220SYu Zhao 1144c1632a0fSChristian Brauner static int shmem_setattr(struct mnt_idmap *idmap, 1145549c7297SChristian Brauner struct dentry *dentry, struct iattr *attr) 11461da177e4SLinus Torvalds { 114775c3cfa8SDavid Howells struct inode *inode = d_inode(dentry); 114840e041a2SDavid Herrmann struct shmem_inode_info *info = SHMEM_I(inode); 11491da177e4SLinus Torvalds int error; 115036f05cabSJeff Layton bool update_mtime = false; 115136f05cabSJeff Layton bool update_ctime = true; 11521da177e4SLinus Torvalds 11537a80e5b8SGiuseppe Scrivano error = setattr_prepare(idmap, dentry, attr); 1154db78b877SChristoph Hellwig if (error) 1155db78b877SChristoph Hellwig return error; 1156db78b877SChristoph Hellwig 11576fd73538SDaniel Verkamp if ((info->seals & F_SEAL_EXEC) && (attr->ia_valid & ATTR_MODE)) { 11586fd73538SDaniel Verkamp if ((inode->i_mode ^ attr->ia_mode) & 0111) { 11596fd73538SDaniel Verkamp return -EPERM; 11606fd73538SDaniel Verkamp } 11616fd73538SDaniel Verkamp } 11626fd73538SDaniel Verkamp 116394c1e62dSHugh Dickins if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 116494c1e62dSHugh Dickins loff_t oldsize = inode->i_size; 116594c1e62dSHugh Dickins loff_t newsize = attr->ia_size; 11663889e6e7Snpiggin@suse.de 11679608703eSJan Kara /* protected by i_rwsem */ 116840e041a2SDavid Herrmann if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || 116940e041a2SDavid Herrmann (newsize > oldsize && (info->seals & F_SEAL_GROW))) 117040e041a2SDavid Herrmann return -EPERM; 117140e041a2SDavid Herrmann 117294c1e62dSHugh Dickins if (newsize != oldsize) { 117377142517SKonstantin Khlebnikov error = shmem_reacct_size(SHMEM_I(inode)->flags, 117477142517SKonstantin Khlebnikov oldsize, newsize); 117577142517SKonstantin Khlebnikov if (error) 117677142517SKonstantin Khlebnikov return error; 117794c1e62dSHugh Dickins i_size_write(inode, newsize); 117836f05cabSJeff Layton update_mtime = true; 117936f05cabSJeff Layton } else { 118036f05cabSJeff Layton update_ctime = false; 118194c1e62dSHugh Dickins } 1182afa2db2fSJosef Bacik if (newsize <= oldsize) { 118394c1e62dSHugh Dickins loff_t holebegin = round_up(newsize, PAGE_SIZE); 1184d0424c42SHugh Dickins if (oldsize > holebegin) 1185d0424c42SHugh Dickins unmap_mapping_range(inode->i_mapping, 1186d0424c42SHugh Dickins holebegin, 0, 1); 1187d0424c42SHugh Dickins if (info->alloced) 1188d0424c42SHugh Dickins shmem_truncate_range(inode, 1189d0424c42SHugh Dickins newsize, (loff_t)-1); 119094c1e62dSHugh Dickins /* unmap again to remove racily COWed private pages */ 1191d0424c42SHugh Dickins if (oldsize > holebegin) 1192d0424c42SHugh Dickins unmap_mapping_range(inode->i_mapping, 1193d0424c42SHugh Dickins holebegin, 0, 1); 119494c1e62dSHugh Dickins } 11951da177e4SLinus Torvalds } 11961da177e4SLinus Torvalds 1197e09764cfSCarlos Maiolino if (is_quota_modification(idmap, inode, attr)) { 1198e09764cfSCarlos Maiolino error = dquot_initialize(inode); 1199e09764cfSCarlos Maiolino if (error) 1200e09764cfSCarlos Maiolino return error; 1201e09764cfSCarlos Maiolino } 1202e09764cfSCarlos Maiolino 1203e09764cfSCarlos Maiolino /* Transfer quota accounting */ 1204e09764cfSCarlos Maiolino if (i_uid_needs_update(idmap, attr, inode) || 1205e09764cfSCarlos Maiolino i_gid_needs_update(idmap, attr, inode)) { 1206e09764cfSCarlos Maiolino error = dquot_transfer(idmap, inode, attr); 1207e09764cfSCarlos Maiolino 1208e09764cfSCarlos Maiolino if (error) 1209e09764cfSCarlos Maiolino return error; 1210e09764cfSCarlos Maiolino } 1211e09764cfSCarlos Maiolino 12127a80e5b8SGiuseppe Scrivano setattr_copy(idmap, inode, attr); 1213db78b877SChristoph Hellwig if (attr->ia_valid & ATTR_MODE) 12147a80e5b8SGiuseppe Scrivano error = posix_acl_chmod(idmap, dentry, inode->i_mode); 121536f05cabSJeff Layton if (!error && update_ctime) { 121636f05cabSJeff Layton inode->i_ctime = current_time(inode); 121736f05cabSJeff Layton if (update_mtime) 121836f05cabSJeff Layton inode->i_mtime = inode->i_ctime; 121936f05cabSJeff Layton inode_inc_iversion(inode); 122036f05cabSJeff Layton } 12211da177e4SLinus Torvalds return error; 12221da177e4SLinus Torvalds } 12231da177e4SLinus Torvalds 12241f895f75SAl Viro static void shmem_evict_inode(struct inode *inode) 12251da177e4SLinus Torvalds { 12261da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 1227779750d2SKirill A. Shutemov struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 12281da177e4SLinus Torvalds 122930e6a51dSHui Su if (shmem_mapping(inode->i_mapping)) { 12301da177e4SLinus Torvalds shmem_unacct_size(info->flags, inode->i_size); 12311da177e4SLinus Torvalds inode->i_size = 0; 1232bc786390SHugh Dickins mapping_set_exiting(inode->i_mapping); 12333889e6e7Snpiggin@suse.de shmem_truncate_range(inode, 0, (loff_t)-1); 1234779750d2SKirill A. Shutemov if (!list_empty(&info->shrinklist)) { 1235779750d2SKirill A. Shutemov spin_lock(&sbinfo->shrinklist_lock); 1236779750d2SKirill A. Shutemov if (!list_empty(&info->shrinklist)) { 1237779750d2SKirill A. Shutemov list_del_init(&info->shrinklist); 1238779750d2SKirill A. Shutemov sbinfo->shrinklist_len--; 1239779750d2SKirill A. Shutemov } 1240779750d2SKirill A. Shutemov spin_unlock(&sbinfo->shrinklist_lock); 1241779750d2SKirill A. Shutemov } 1242af53d3e9SHugh Dickins while (!list_empty(&info->swaplist)) { 1243af53d3e9SHugh Dickins /* Wait while shmem_unuse() is scanning this inode... */ 1244af53d3e9SHugh Dickins wait_var_event(&info->stop_eviction, 1245af53d3e9SHugh Dickins !atomic_read(&info->stop_eviction)); 1246cb5f7b9aSHugh Dickins mutex_lock(&shmem_swaplist_mutex); 1247af53d3e9SHugh Dickins /* ...but beware of the race if we peeked too early */ 1248af53d3e9SHugh Dickins if (!atomic_read(&info->stop_eviction)) 12491da177e4SLinus Torvalds list_del_init(&info->swaplist); 1250cb5f7b9aSHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 12511da177e4SLinus Torvalds } 12523ed47db3SAl Viro } 1253b09e0fa4SEric Paris 125438f38657SAristeu Rozanski simple_xattrs_free(&info->xattrs); 12550f3c42f5SHugh Dickins WARN_ON(inode->i_blocks); 12565b04c689SPavel Emelyanov shmem_free_inode(inode->i_sb); 1257dbd5768fSJan Kara clear_inode(inode); 1258e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 1259e09764cfSCarlos Maiolino dquot_free_inode(inode); 1260e09764cfSCarlos Maiolino dquot_drop(inode); 1261e09764cfSCarlos Maiolino #endif 12621da177e4SLinus Torvalds } 12631da177e4SLinus Torvalds 1264b56a2d8aSVineeth Remanan Pillai static int shmem_find_swap_entries(struct address_space *mapping, 1265da08e9b7SMatthew Wilcox (Oracle) pgoff_t start, struct folio_batch *fbatch, 1266da08e9b7SMatthew Wilcox (Oracle) pgoff_t *indices, unsigned int type) 1267478922e2SMatthew Wilcox { 1268b56a2d8aSVineeth Remanan Pillai XA_STATE(xas, &mapping->i_pages, start); 1269da08e9b7SMatthew Wilcox (Oracle) struct folio *folio; 127087039546SHugh Dickins swp_entry_t entry; 1271478922e2SMatthew Wilcox 1272478922e2SMatthew Wilcox rcu_read_lock(); 1273da08e9b7SMatthew Wilcox (Oracle) xas_for_each(&xas, folio, ULONG_MAX) { 1274da08e9b7SMatthew Wilcox (Oracle) if (xas_retry(&xas, folio)) 12755b9c98f3SMike Kravetz continue; 1276b56a2d8aSVineeth Remanan Pillai 1277da08e9b7SMatthew Wilcox (Oracle) if (!xa_is_value(folio)) 1278478922e2SMatthew Wilcox continue; 1279b56a2d8aSVineeth Remanan Pillai 1280da08e9b7SMatthew Wilcox (Oracle) entry = radix_to_swp_entry(folio); 12816cec2b95SMiaohe Lin /* 12826cec2b95SMiaohe Lin * swapin error entries can be found in the mapping. But they're 12836cec2b95SMiaohe Lin * deliberately ignored here as we've done everything we can do. 12846cec2b95SMiaohe Lin */ 128587039546SHugh Dickins if (swp_type(entry) != type) 1286b56a2d8aSVineeth Remanan Pillai continue; 1287b56a2d8aSVineeth Remanan Pillai 1288e384200eSHugh Dickins indices[folio_batch_count(fbatch)] = xas.xa_index; 1289da08e9b7SMatthew Wilcox (Oracle) if (!folio_batch_add(fbatch, folio)) 1290da08e9b7SMatthew Wilcox (Oracle) break; 1291b56a2d8aSVineeth Remanan Pillai 1292b56a2d8aSVineeth Remanan Pillai if (need_resched()) { 1293e21a2955SMatthew Wilcox xas_pause(&xas); 1294478922e2SMatthew Wilcox cond_resched_rcu(); 1295478922e2SMatthew Wilcox } 1296b56a2d8aSVineeth Remanan Pillai } 1297478922e2SMatthew Wilcox rcu_read_unlock(); 1298e21a2955SMatthew Wilcox 1299da08e9b7SMatthew Wilcox (Oracle) return xas.xa_index; 1300b56a2d8aSVineeth Remanan Pillai } 1301b56a2d8aSVineeth Remanan Pillai 1302b56a2d8aSVineeth Remanan Pillai /* 1303b56a2d8aSVineeth Remanan Pillai * Move the swapped pages for an inode to page cache. Returns the count 1304b56a2d8aSVineeth Remanan Pillai * of pages swapped in, or the error in case of failure. 1305b56a2d8aSVineeth Remanan Pillai */ 1306da08e9b7SMatthew Wilcox (Oracle) static int shmem_unuse_swap_entries(struct inode *inode, 1307da08e9b7SMatthew Wilcox (Oracle) struct folio_batch *fbatch, pgoff_t *indices) 1308b56a2d8aSVineeth Remanan Pillai { 1309b56a2d8aSVineeth Remanan Pillai int i = 0; 1310b56a2d8aSVineeth Remanan Pillai int ret = 0; 1311b56a2d8aSVineeth Remanan Pillai int error = 0; 1312b56a2d8aSVineeth Remanan Pillai struct address_space *mapping = inode->i_mapping; 1313b56a2d8aSVineeth Remanan Pillai 1314da08e9b7SMatthew Wilcox (Oracle) for (i = 0; i < folio_batch_count(fbatch); i++) { 1315da08e9b7SMatthew Wilcox (Oracle) struct folio *folio = fbatch->folios[i]; 1316b56a2d8aSVineeth Remanan Pillai 1317da08e9b7SMatthew Wilcox (Oracle) if (!xa_is_value(folio)) 1318b56a2d8aSVineeth Remanan Pillai continue; 1319da08e9b7SMatthew Wilcox (Oracle) error = shmem_swapin_folio(inode, indices[i], 1320da08e9b7SMatthew Wilcox (Oracle) &folio, SGP_CACHE, 1321b56a2d8aSVineeth Remanan Pillai mapping_gfp_mask(mapping), 1322b56a2d8aSVineeth Remanan Pillai NULL, NULL); 1323b56a2d8aSVineeth Remanan Pillai if (error == 0) { 1324da08e9b7SMatthew Wilcox (Oracle) folio_unlock(folio); 1325da08e9b7SMatthew Wilcox (Oracle) folio_put(folio); 1326b56a2d8aSVineeth Remanan Pillai ret++; 1327b56a2d8aSVineeth Remanan Pillai } 1328b56a2d8aSVineeth Remanan Pillai if (error == -ENOMEM) 1329b56a2d8aSVineeth Remanan Pillai break; 1330b56a2d8aSVineeth Remanan Pillai error = 0; 1331b56a2d8aSVineeth Remanan Pillai } 1332b56a2d8aSVineeth Remanan Pillai return error ? error : ret; 1333478922e2SMatthew Wilcox } 1334478922e2SMatthew Wilcox 133546f65ec1SHugh Dickins /* 133646f65ec1SHugh Dickins * If swap found in inode, free it and move page from swapcache to filecache. 133746f65ec1SHugh Dickins */ 133810a9c496SChristoph Hellwig static int shmem_unuse_inode(struct inode *inode, unsigned int type) 13391da177e4SLinus Torvalds { 1340b56a2d8aSVineeth Remanan Pillai struct address_space *mapping = inode->i_mapping; 1341b56a2d8aSVineeth Remanan Pillai pgoff_t start = 0; 1342da08e9b7SMatthew Wilcox (Oracle) struct folio_batch fbatch; 1343b56a2d8aSVineeth Remanan Pillai pgoff_t indices[PAGEVEC_SIZE]; 1344b56a2d8aSVineeth Remanan Pillai int ret = 0; 13451da177e4SLinus Torvalds 1346b56a2d8aSVineeth Remanan Pillai do { 1347da08e9b7SMatthew Wilcox (Oracle) folio_batch_init(&fbatch); 1348da08e9b7SMatthew Wilcox (Oracle) shmem_find_swap_entries(mapping, start, &fbatch, indices, type); 1349da08e9b7SMatthew Wilcox (Oracle) if (folio_batch_count(&fbatch) == 0) { 1350b56a2d8aSVineeth Remanan Pillai ret = 0; 1351778dd893SHugh Dickins break; 1352b56a2d8aSVineeth Remanan Pillai } 1353b56a2d8aSVineeth Remanan Pillai 1354da08e9b7SMatthew Wilcox (Oracle) ret = shmem_unuse_swap_entries(inode, &fbatch, indices); 1355b56a2d8aSVineeth Remanan Pillai if (ret < 0) 1356b56a2d8aSVineeth Remanan Pillai break; 1357b56a2d8aSVineeth Remanan Pillai 1358da08e9b7SMatthew Wilcox (Oracle) start = indices[folio_batch_count(&fbatch) - 1]; 1359b56a2d8aSVineeth Remanan Pillai } while (true); 1360b56a2d8aSVineeth Remanan Pillai 1361b56a2d8aSVineeth Remanan Pillai return ret; 1362b56a2d8aSVineeth Remanan Pillai } 1363b56a2d8aSVineeth Remanan Pillai 1364b56a2d8aSVineeth Remanan Pillai /* 1365b56a2d8aSVineeth Remanan Pillai * Read all the shared memory data that resides in the swap 1366b56a2d8aSVineeth Remanan Pillai * device 'type' back into memory, so the swap device can be 1367b56a2d8aSVineeth Remanan Pillai * unused. 1368b56a2d8aSVineeth Remanan Pillai */ 136910a9c496SChristoph Hellwig int shmem_unuse(unsigned int type) 1370b56a2d8aSVineeth Remanan Pillai { 1371b56a2d8aSVineeth Remanan Pillai struct shmem_inode_info *info, *next; 1372b56a2d8aSVineeth Remanan Pillai int error = 0; 1373b56a2d8aSVineeth Remanan Pillai 1374b56a2d8aSVineeth Remanan Pillai if (list_empty(&shmem_swaplist)) 1375b56a2d8aSVineeth Remanan Pillai return 0; 1376b56a2d8aSVineeth Remanan Pillai 1377b56a2d8aSVineeth Remanan Pillai mutex_lock(&shmem_swaplist_mutex); 1378b56a2d8aSVineeth Remanan Pillai list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { 1379b56a2d8aSVineeth Remanan Pillai if (!info->swapped) { 1380b56a2d8aSVineeth Remanan Pillai list_del_init(&info->swaplist); 1381b56a2d8aSVineeth Remanan Pillai continue; 1382b56a2d8aSVineeth Remanan Pillai } 1383af53d3e9SHugh Dickins /* 1384af53d3e9SHugh Dickins * Drop the swaplist mutex while searching the inode for swap; 1385af53d3e9SHugh Dickins * but before doing so, make sure shmem_evict_inode() will not 1386af53d3e9SHugh Dickins * remove placeholder inode from swaplist, nor let it be freed 1387af53d3e9SHugh Dickins * (igrab() would protect from unlink, but not from unmount). 1388af53d3e9SHugh Dickins */ 1389af53d3e9SHugh Dickins atomic_inc(&info->stop_eviction); 1390b56a2d8aSVineeth Remanan Pillai mutex_unlock(&shmem_swaplist_mutex); 1391b56a2d8aSVineeth Remanan Pillai 139210a9c496SChristoph Hellwig error = shmem_unuse_inode(&info->vfs_inode, type); 1393b56a2d8aSVineeth Remanan Pillai cond_resched(); 1394b56a2d8aSVineeth Remanan Pillai 1395b56a2d8aSVineeth Remanan Pillai mutex_lock(&shmem_swaplist_mutex); 1396b56a2d8aSVineeth Remanan Pillai next = list_next_entry(info, swaplist); 1397b56a2d8aSVineeth Remanan Pillai if (!info->swapped) 1398b56a2d8aSVineeth Remanan Pillai list_del_init(&info->swaplist); 1399af53d3e9SHugh Dickins if (atomic_dec_and_test(&info->stop_eviction)) 1400af53d3e9SHugh Dickins wake_up_var(&info->stop_eviction); 1401b56a2d8aSVineeth Remanan Pillai if (error) 1402b56a2d8aSVineeth Remanan Pillai break; 14031da177e4SLinus Torvalds } 1404cb5f7b9aSHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 1405778dd893SHugh Dickins 1406778dd893SHugh Dickins return error; 14071da177e4SLinus Torvalds } 14081da177e4SLinus Torvalds 14091da177e4SLinus Torvalds /* 14101da177e4SLinus Torvalds * Move the page from the page cache to the swap cache. 14111da177e4SLinus Torvalds */ 14121da177e4SLinus Torvalds static int shmem_writepage(struct page *page, struct writeback_control *wbc) 14131da177e4SLinus Torvalds { 1414e2e3fdc7SMatthew Wilcox (Oracle) struct folio *folio = page_folio(page); 14158ccee8c1SLuis Chamberlain struct address_space *mapping = folio->mapping; 14168ccee8c1SLuis Chamberlain struct inode *inode = mapping->host; 14178ccee8c1SLuis Chamberlain struct shmem_inode_info *info = SHMEM_I(inode); 14182c6efe9cSLuis Chamberlain struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 14196922c0c7SHugh Dickins swp_entry_t swap; 14206922c0c7SHugh Dickins pgoff_t index; 14211da177e4SLinus Torvalds 14221e6decf3SHugh Dickins /* 1423cf7992bfSLuis Chamberlain * Our capabilities prevent regular writeback or sync from ever calling 1424cf7992bfSLuis Chamberlain * shmem_writepage; but a stacking filesystem might use ->writepage of 1425cf7992bfSLuis Chamberlain * its underlying filesystem, in which case tmpfs should write out to 1426cf7992bfSLuis Chamberlain * swap only in response to memory pressure, and not for the writeback 1427cf7992bfSLuis Chamberlain * threads or sync. 1428cf7992bfSLuis Chamberlain */ 1429cf7992bfSLuis Chamberlain if (WARN_ON_ONCE(!wbc->for_reclaim)) 1430cf7992bfSLuis Chamberlain goto redirty; 1431cf7992bfSLuis Chamberlain 14322c6efe9cSLuis Chamberlain if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap)) 14339a976f0cSLuis Chamberlain goto redirty; 14349a976f0cSLuis Chamberlain 14359a976f0cSLuis Chamberlain if (!total_swap_pages) 14369a976f0cSLuis Chamberlain goto redirty; 14379a976f0cSLuis Chamberlain 1438cf7992bfSLuis Chamberlain /* 14391e6decf3SHugh Dickins * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or 14401e6decf3SHugh Dickins * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages, 14411e6decf3SHugh Dickins * and its shmem_writeback() needs them to be split when swapping. 14421e6decf3SHugh Dickins */ 1443f530ed0eSMatthew Wilcox (Oracle) if (folio_test_large(folio)) { 14441e6decf3SHugh Dickins /* Ensure the subpages are still dirty */ 1445f530ed0eSMatthew Wilcox (Oracle) folio_test_set_dirty(folio); 14461e6decf3SHugh Dickins if (split_huge_page(page) < 0) 14471e6decf3SHugh Dickins goto redirty; 1448f530ed0eSMatthew Wilcox (Oracle) folio = page_folio(page); 1449f530ed0eSMatthew Wilcox (Oracle) folio_clear_dirty(folio); 14501e6decf3SHugh Dickins } 14511e6decf3SHugh Dickins 1452f530ed0eSMatthew Wilcox (Oracle) index = folio->index; 14531635f6a7SHugh Dickins 14541635f6a7SHugh Dickins /* 14551635f6a7SHugh Dickins * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC 14561635f6a7SHugh Dickins * value into swapfile.c, the only way we can correctly account for a 1457f530ed0eSMatthew Wilcox (Oracle) * fallocated folio arriving here is now to initialize it and write it. 14581aac1400SHugh Dickins * 1459f530ed0eSMatthew Wilcox (Oracle) * That's okay for a folio already fallocated earlier, but if we have 14601aac1400SHugh Dickins * not yet completed the fallocation, then (a) we want to keep track 1461f530ed0eSMatthew Wilcox (Oracle) * of this folio in case we have to undo it, and (b) it may not be a 14621aac1400SHugh Dickins * good idea to continue anyway, once we're pushing into swap. So 1463f530ed0eSMatthew Wilcox (Oracle) * reactivate the folio, and let shmem_fallocate() quit when too many. 14641635f6a7SHugh Dickins */ 1465f530ed0eSMatthew Wilcox (Oracle) if (!folio_test_uptodate(folio)) { 14661aac1400SHugh Dickins if (inode->i_private) { 14671aac1400SHugh Dickins struct shmem_falloc *shmem_falloc; 14681aac1400SHugh Dickins spin_lock(&inode->i_lock); 14691aac1400SHugh Dickins shmem_falloc = inode->i_private; 14701aac1400SHugh Dickins if (shmem_falloc && 14718e205f77SHugh Dickins !shmem_falloc->waitq && 14721aac1400SHugh Dickins index >= shmem_falloc->start && 14731aac1400SHugh Dickins index < shmem_falloc->next) 14741aac1400SHugh Dickins shmem_falloc->nr_unswapped++; 14751aac1400SHugh Dickins else 14761aac1400SHugh Dickins shmem_falloc = NULL; 14771aac1400SHugh Dickins spin_unlock(&inode->i_lock); 14781aac1400SHugh Dickins if (shmem_falloc) 14791aac1400SHugh Dickins goto redirty; 14801aac1400SHugh Dickins } 1481f530ed0eSMatthew Wilcox (Oracle) folio_zero_range(folio, 0, folio_size(folio)); 1482f530ed0eSMatthew Wilcox (Oracle) flush_dcache_folio(folio); 1483f530ed0eSMatthew Wilcox (Oracle) folio_mark_uptodate(folio); 14841635f6a7SHugh Dickins } 14851635f6a7SHugh Dickins 1486e2e3fdc7SMatthew Wilcox (Oracle) swap = folio_alloc_swap(folio); 148748f170fbSHugh Dickins if (!swap.val) 148848f170fbSHugh Dickins goto redirty; 1489d9fe526aSHugh Dickins 1490b1dea800SHugh Dickins /* 1491b1dea800SHugh Dickins * Add inode to shmem_unuse()'s list of swapped-out inodes, 1492f530ed0eSMatthew Wilcox (Oracle) * if it's not already there. Do it now before the folio is 14936922c0c7SHugh Dickins * moved to swap cache, when its pagelock no longer protects 1494b1dea800SHugh Dickins * the inode from eviction. But don't unlock the mutex until 14956922c0c7SHugh Dickins * we've incremented swapped, because shmem_unuse_inode() will 14966922c0c7SHugh Dickins * prune a !swapped inode from the swaplist under this mutex. 1497b1dea800SHugh Dickins */ 1498b1dea800SHugh Dickins mutex_lock(&shmem_swaplist_mutex); 149905bf86b4SHugh Dickins if (list_empty(&info->swaplist)) 1500b56a2d8aSVineeth Remanan Pillai list_add(&info->swaplist, &shmem_swaplist); 1501b1dea800SHugh Dickins 1502a4c366f0SMatthew Wilcox (Oracle) if (add_to_swap_cache(folio, swap, 15033852f676SJoonsoo Kim __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, 15043852f676SJoonsoo Kim NULL) == 0) { 15054595ef88SKirill A. Shutemov spin_lock_irq(&info->lock); 1506267a4c76SHugh Dickins shmem_recalc_inode(inode); 1507267a4c76SHugh Dickins info->swapped++; 15084595ef88SKirill A. Shutemov spin_unlock_irq(&info->lock); 1509267a4c76SHugh Dickins 1510aaa46865SHugh Dickins swap_shmem_alloc(swap); 15114cd400fdSMatthew Wilcox (Oracle) shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap)); 15126922c0c7SHugh Dickins 15136922c0c7SHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 1514f530ed0eSMatthew Wilcox (Oracle) BUG_ON(folio_mapped(folio)); 1515f530ed0eSMatthew Wilcox (Oracle) swap_writepage(&folio->page, wbc); 15161da177e4SLinus Torvalds return 0; 15171da177e4SLinus Torvalds } 15181da177e4SLinus Torvalds 15196922c0c7SHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 15204081f744SMatthew Wilcox (Oracle) put_swap_folio(folio, swap); 15211da177e4SLinus Torvalds redirty: 1522f530ed0eSMatthew Wilcox (Oracle) folio_mark_dirty(folio); 1523d9fe526aSHugh Dickins if (wbc->for_reclaim) 1524f530ed0eSMatthew Wilcox (Oracle) return AOP_WRITEPAGE_ACTIVATE; /* Return with folio locked */ 1525f530ed0eSMatthew Wilcox (Oracle) folio_unlock(folio); 1526d9fe526aSHugh Dickins return 0; 15271da177e4SLinus Torvalds } 15281da177e4SLinus Torvalds 152975edd345SHugh Dickins #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS) 153071fe804bSLee Schermerhorn static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 1531680d794bSakpm@linux-foundation.org { 1532680d794bSakpm@linux-foundation.org char buffer[64]; 1533680d794bSakpm@linux-foundation.org 153471fe804bSLee Schermerhorn if (!mpol || mpol->mode == MPOL_DEFAULT) 1535095f1fc4SLee Schermerhorn return; /* show nothing */ 1536095f1fc4SLee Schermerhorn 1537a7a88b23SHugh Dickins mpol_to_str(buffer, sizeof(buffer), mpol); 1538095f1fc4SLee Schermerhorn 1539095f1fc4SLee Schermerhorn seq_printf(seq, ",mpol=%s", buffer); 1540680d794bSakpm@linux-foundation.org } 154171fe804bSLee Schermerhorn 154271fe804bSLee Schermerhorn static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 154371fe804bSLee Schermerhorn { 154471fe804bSLee Schermerhorn struct mempolicy *mpol = NULL; 154571fe804bSLee Schermerhorn if (sbinfo->mpol) { 1546bf11b9a8SSebastian Andrzej Siewior raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ 154771fe804bSLee Schermerhorn mpol = sbinfo->mpol; 154871fe804bSLee Schermerhorn mpol_get(mpol); 1549bf11b9a8SSebastian Andrzej Siewior raw_spin_unlock(&sbinfo->stat_lock); 155071fe804bSLee Schermerhorn } 155171fe804bSLee Schermerhorn return mpol; 155271fe804bSLee Schermerhorn } 155375edd345SHugh Dickins #else /* !CONFIG_NUMA || !CONFIG_TMPFS */ 155475edd345SHugh Dickins static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 155575edd345SHugh Dickins { 155675edd345SHugh Dickins } 155775edd345SHugh Dickins static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 155875edd345SHugh Dickins { 155975edd345SHugh Dickins return NULL; 156075edd345SHugh Dickins } 156175edd345SHugh Dickins #endif /* CONFIG_NUMA && CONFIG_TMPFS */ 156275edd345SHugh Dickins #ifndef CONFIG_NUMA 156375edd345SHugh Dickins #define vm_policy vm_private_data 156475edd345SHugh Dickins #endif 1565680d794bSakpm@linux-foundation.org 1566800d8c63SKirill A. Shutemov static void shmem_pseudo_vma_init(struct vm_area_struct *vma, 1567800d8c63SKirill A. Shutemov struct shmem_inode_info *info, pgoff_t index) 1568800d8c63SKirill A. Shutemov { 1569800d8c63SKirill A. Shutemov /* Create a pseudo vma that just contains the policy */ 15702c4541e2SKirill A. Shutemov vma_init(vma, NULL); 1571800d8c63SKirill A. Shutemov /* Bias interleave by inode number to distribute better across nodes */ 1572800d8c63SKirill A. Shutemov vma->vm_pgoff = index + info->vfs_inode.i_ino; 1573800d8c63SKirill A. Shutemov vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); 1574800d8c63SKirill A. Shutemov } 1575800d8c63SKirill A. Shutemov 1576800d8c63SKirill A. Shutemov static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma) 1577800d8c63SKirill A. Shutemov { 1578800d8c63SKirill A. Shutemov /* Drop reference taken by mpol_shared_policy_lookup() */ 1579800d8c63SKirill A. Shutemov mpol_cond_put(vma->vm_policy); 1580800d8c63SKirill A. Shutemov } 1581800d8c63SKirill A. Shutemov 15825739a81cSMatthew Wilcox (Oracle) static struct folio *shmem_swapin(swp_entry_t swap, gfp_t gfp, 158341ffe5d5SHugh Dickins struct shmem_inode_info *info, pgoff_t index) 15841da177e4SLinus Torvalds { 15851da177e4SLinus Torvalds struct vm_area_struct pvma; 158618a2f371SMel Gorman struct page *page; 15878c63ca5bSWill Deacon struct vm_fault vmf = { 15888c63ca5bSWill Deacon .vma = &pvma, 15898c63ca5bSWill Deacon }; 15901da177e4SLinus Torvalds 1591800d8c63SKirill A. Shutemov shmem_pseudo_vma_init(&pvma, info, index); 1592e9e9b7ecSMinchan Kim page = swap_cluster_readahead(swap, gfp, &vmf); 1593800d8c63SKirill A. Shutemov shmem_pseudo_vma_destroy(&pvma); 159418a2f371SMel Gorman 15955739a81cSMatthew Wilcox (Oracle) if (!page) 15965739a81cSMatthew Wilcox (Oracle) return NULL; 15975739a81cSMatthew Wilcox (Oracle) return page_folio(page); 1598800d8c63SKirill A. Shutemov } 159918a2f371SMel Gorman 160078cc8cdcSRik van Riel /* 160178cc8cdcSRik van Riel * Make sure huge_gfp is always more limited than limit_gfp. 160278cc8cdcSRik van Riel * Some of the flags set permissions, while others set limitations. 160378cc8cdcSRik van Riel */ 160478cc8cdcSRik van Riel static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) 160578cc8cdcSRik van Riel { 160678cc8cdcSRik van Riel gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM; 160778cc8cdcSRik van Riel gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY; 1608187df5ddSRik van Riel gfp_t zoneflags = limit_gfp & GFP_ZONEMASK; 1609187df5ddSRik van Riel gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK); 1610187df5ddSRik van Riel 1611187df5ddSRik van Riel /* Allow allocations only from the originally specified zones. */ 1612187df5ddSRik van Riel result |= zoneflags; 161378cc8cdcSRik van Riel 161478cc8cdcSRik van Riel /* 161578cc8cdcSRik van Riel * Minimize the result gfp by taking the union with the deny flags, 161678cc8cdcSRik van Riel * and the intersection of the allow flags. 161778cc8cdcSRik van Riel */ 161878cc8cdcSRik van Riel result |= (limit_gfp & denyflags); 161978cc8cdcSRik van Riel result |= (huge_gfp & limit_gfp) & allowflags; 162078cc8cdcSRik van Riel 162178cc8cdcSRik van Riel return result; 162278cc8cdcSRik van Riel } 162378cc8cdcSRik van Riel 162472827e5cSMatthew Wilcox (Oracle) static struct folio *shmem_alloc_hugefolio(gfp_t gfp, 1625800d8c63SKirill A. Shutemov struct shmem_inode_info *info, pgoff_t index) 1626800d8c63SKirill A. Shutemov { 1627800d8c63SKirill A. Shutemov struct vm_area_struct pvma; 16287b8d046fSMatthew Wilcox struct address_space *mapping = info->vfs_inode.i_mapping; 16297b8d046fSMatthew Wilcox pgoff_t hindex; 1630dfe98499SMatthew Wilcox (Oracle) struct folio *folio; 1631800d8c63SKirill A. Shutemov 16324620a06eSGeert Uytterhoeven hindex = round_down(index, HPAGE_PMD_NR); 16337b8d046fSMatthew Wilcox if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1, 16347b8d046fSMatthew Wilcox XA_PRESENT)) 1635800d8c63SKirill A. Shutemov return NULL; 1636800d8c63SKirill A. Shutemov 1637800d8c63SKirill A. Shutemov shmem_pseudo_vma_init(&pvma, info, hindex); 1638dfe98499SMatthew Wilcox (Oracle) folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, &pvma, 0, true); 1639800d8c63SKirill A. Shutemov shmem_pseudo_vma_destroy(&pvma); 1640dfe98499SMatthew Wilcox (Oracle) if (!folio) 1641dcdf11eeSDavid Rientjes count_vm_event(THP_FILE_FALLBACK); 164272827e5cSMatthew Wilcox (Oracle) return folio; 164318a2f371SMel Gorman } 164418a2f371SMel Gorman 16450c023ef5SMatthew Wilcox (Oracle) static struct folio *shmem_alloc_folio(gfp_t gfp, 164618a2f371SMel Gorman struct shmem_inode_info *info, pgoff_t index) 164718a2f371SMel Gorman { 164818a2f371SMel Gorman struct vm_area_struct pvma; 16490c023ef5SMatthew Wilcox (Oracle) struct folio *folio; 165018a2f371SMel Gorman 1651800d8c63SKirill A. Shutemov shmem_pseudo_vma_init(&pvma, info, index); 16520c023ef5SMatthew Wilcox (Oracle) folio = vma_alloc_folio(gfp, 0, &pvma, 0, false); 1653800d8c63SKirill A. Shutemov shmem_pseudo_vma_destroy(&pvma); 165418a2f371SMel Gorman 16550c023ef5SMatthew Wilcox (Oracle) return folio; 165618a2f371SMel Gorman } 165718a2f371SMel Gorman 1658b1d0ec3aSMatthew Wilcox (Oracle) static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode, 1659800d8c63SKirill A. Shutemov pgoff_t index, bool huge) 1660800d8c63SKirill A. Shutemov { 16610f079694SMike Rapoport struct shmem_inode_info *info = SHMEM_I(inode); 166272827e5cSMatthew Wilcox (Oracle) struct folio *folio; 1663800d8c63SKirill A. Shutemov int nr; 1664c7e263abSLukas Czerner int err; 1665800d8c63SKirill A. Shutemov 1666396bcc52SMatthew Wilcox (Oracle) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1667800d8c63SKirill A. Shutemov huge = false; 1668800d8c63SKirill A. Shutemov nr = huge ? HPAGE_PMD_NR : 1; 1669800d8c63SKirill A. Shutemov 1670c7e263abSLukas Czerner err = shmem_inode_acct_block(inode, nr); 1671c7e263abSLukas Czerner if (err) 1672800d8c63SKirill A. Shutemov goto failed; 1673800d8c63SKirill A. Shutemov 1674800d8c63SKirill A. Shutemov if (huge) 167572827e5cSMatthew Wilcox (Oracle) folio = shmem_alloc_hugefolio(gfp, info, index); 1676800d8c63SKirill A. Shutemov else 167772827e5cSMatthew Wilcox (Oracle) folio = shmem_alloc_folio(gfp, info, index); 167872827e5cSMatthew Wilcox (Oracle) if (folio) { 167972827e5cSMatthew Wilcox (Oracle) __folio_set_locked(folio); 168072827e5cSMatthew Wilcox (Oracle) __folio_set_swapbacked(folio); 1681b1d0ec3aSMatthew Wilcox (Oracle) return folio; 168275edd345SHugh Dickins } 168318a2f371SMel Gorman 1684800d8c63SKirill A. Shutemov err = -ENOMEM; 16850f079694SMike Rapoport shmem_inode_unacct_blocks(inode, nr); 1686800d8c63SKirill A. Shutemov failed: 1687800d8c63SKirill A. Shutemov return ERR_PTR(err); 16881da177e4SLinus Torvalds } 168971fe804bSLee Schermerhorn 16901da177e4SLinus Torvalds /* 1691bde05d1cSHugh Dickins * When a page is moved from swapcache to shmem filecache (either by the 1692fc26babbSMatthew Wilcox (Oracle) * usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of 1693bde05d1cSHugh Dickins * shmem_unuse_inode()), it may have been read in earlier from swap, in 1694bde05d1cSHugh Dickins * ignorance of the mapping it belongs to. If that mapping has special 1695bde05d1cSHugh Dickins * constraints (like the gma500 GEM driver, which requires RAM below 4GB), 1696bde05d1cSHugh Dickins * we may need to copy to a suitable page before moving to filecache. 1697bde05d1cSHugh Dickins * 1698bde05d1cSHugh Dickins * In a future release, this may well be extended to respect cpuset and 1699bde05d1cSHugh Dickins * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); 1700bde05d1cSHugh Dickins * but for now it is a simple matter of zone. 1701bde05d1cSHugh Dickins */ 1702069d849cSMatthew Wilcox (Oracle) static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp) 1703bde05d1cSHugh Dickins { 1704069d849cSMatthew Wilcox (Oracle) return folio_zonenum(folio) > gfp_zone(gfp); 1705bde05d1cSHugh Dickins } 1706bde05d1cSHugh Dickins 17070d698e25SMatthew Wilcox (Oracle) static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, 1708bde05d1cSHugh Dickins struct shmem_inode_info *info, pgoff_t index) 1709bde05d1cSHugh Dickins { 1710d21bba2bSMatthew Wilcox (Oracle) struct folio *old, *new; 1711bde05d1cSHugh Dickins struct address_space *swap_mapping; 1712c1cb20d4SYu Zhao swp_entry_t entry; 1713bde05d1cSHugh Dickins pgoff_t swap_index; 1714bde05d1cSHugh Dickins int error; 1715bde05d1cSHugh Dickins 17160d698e25SMatthew Wilcox (Oracle) old = *foliop; 1717907ea17eSMatthew Wilcox (Oracle) entry = folio_swap_entry(old); 1718c1cb20d4SYu Zhao swap_index = swp_offset(entry); 1719907ea17eSMatthew Wilcox (Oracle) swap_mapping = swap_address_space(entry); 1720bde05d1cSHugh Dickins 1721bde05d1cSHugh Dickins /* 1722bde05d1cSHugh Dickins * We have arrived here because our zones are constrained, so don't 1723bde05d1cSHugh Dickins * limit chance of success by further cpuset and node constraints. 1724bde05d1cSHugh Dickins */ 1725bde05d1cSHugh Dickins gfp &= ~GFP_CONSTRAINT_MASK; 1726907ea17eSMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(folio_test_large(old), old); 1727907ea17eSMatthew Wilcox (Oracle) new = shmem_alloc_folio(gfp, info, index); 1728907ea17eSMatthew Wilcox (Oracle) if (!new) 1729bde05d1cSHugh Dickins return -ENOMEM; 1730bde05d1cSHugh Dickins 1731907ea17eSMatthew Wilcox (Oracle) folio_get(new); 1732907ea17eSMatthew Wilcox (Oracle) folio_copy(new, old); 1733907ea17eSMatthew Wilcox (Oracle) flush_dcache_folio(new); 1734bde05d1cSHugh Dickins 1735907ea17eSMatthew Wilcox (Oracle) __folio_set_locked(new); 1736907ea17eSMatthew Wilcox (Oracle) __folio_set_swapbacked(new); 1737907ea17eSMatthew Wilcox (Oracle) folio_mark_uptodate(new); 1738907ea17eSMatthew Wilcox (Oracle) folio_set_swap_entry(new, entry); 1739907ea17eSMatthew Wilcox (Oracle) folio_set_swapcache(new); 1740bde05d1cSHugh Dickins 1741bde05d1cSHugh Dickins /* 1742bde05d1cSHugh Dickins * Our caller will very soon move newpage out of swapcache, but it's 1743bde05d1cSHugh Dickins * a nice clean interface for us to replace oldpage by newpage there. 1744bde05d1cSHugh Dickins */ 1745b93b0163SMatthew Wilcox xa_lock_irq(&swap_mapping->i_pages); 1746907ea17eSMatthew Wilcox (Oracle) error = shmem_replace_entry(swap_mapping, swap_index, old, new); 17470142ef6cSHugh Dickins if (!error) { 1748d21bba2bSMatthew Wilcox (Oracle) mem_cgroup_migrate(old, new); 1749907ea17eSMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1); 1750907ea17eSMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(new, NR_SHMEM, 1); 1751907ea17eSMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1); 1752907ea17eSMatthew Wilcox (Oracle) __lruvec_stat_mod_folio(old, NR_SHMEM, -1); 17530142ef6cSHugh Dickins } 1754b93b0163SMatthew Wilcox xa_unlock_irq(&swap_mapping->i_pages); 1755bde05d1cSHugh Dickins 17560142ef6cSHugh Dickins if (unlikely(error)) { 17570142ef6cSHugh Dickins /* 17580142ef6cSHugh Dickins * Is this possible? I think not, now that our callers check 17590142ef6cSHugh Dickins * both PageSwapCache and page_private after getting page lock; 17600142ef6cSHugh Dickins * but be defensive. Reverse old to newpage for clear and free. 17610142ef6cSHugh Dickins */ 1762907ea17eSMatthew Wilcox (Oracle) old = new; 17630142ef6cSHugh Dickins } else { 1764907ea17eSMatthew Wilcox (Oracle) folio_add_lru(new); 17650d698e25SMatthew Wilcox (Oracle) *foliop = new; 17660142ef6cSHugh Dickins } 1767bde05d1cSHugh Dickins 1768907ea17eSMatthew Wilcox (Oracle) folio_clear_swapcache(old); 1769907ea17eSMatthew Wilcox (Oracle) old->private = NULL; 1770bde05d1cSHugh Dickins 1771907ea17eSMatthew Wilcox (Oracle) folio_unlock(old); 1772907ea17eSMatthew Wilcox (Oracle) folio_put_refs(old, 2); 17730142ef6cSHugh Dickins return error; 1774bde05d1cSHugh Dickins } 1775bde05d1cSHugh Dickins 17766cec2b95SMiaohe Lin static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, 17776cec2b95SMiaohe Lin struct folio *folio, swp_entry_t swap) 17786cec2b95SMiaohe Lin { 17796cec2b95SMiaohe Lin struct address_space *mapping = inode->i_mapping; 17806cec2b95SMiaohe Lin struct shmem_inode_info *info = SHMEM_I(inode); 17816cec2b95SMiaohe Lin swp_entry_t swapin_error; 17826cec2b95SMiaohe Lin void *old; 17836cec2b95SMiaohe Lin 178415520a3fSPeter Xu swapin_error = make_swapin_error_entry(); 17856cec2b95SMiaohe Lin old = xa_cmpxchg_irq(&mapping->i_pages, index, 17866cec2b95SMiaohe Lin swp_to_radix_entry(swap), 17876cec2b95SMiaohe Lin swp_to_radix_entry(swapin_error), 0); 17886cec2b95SMiaohe Lin if (old != swp_to_radix_entry(swap)) 17896cec2b95SMiaohe Lin return; 17906cec2b95SMiaohe Lin 17916cec2b95SMiaohe Lin folio_wait_writeback(folio); 179275fa68a5SMatthew Wilcox (Oracle) delete_from_swap_cache(folio); 17936cec2b95SMiaohe Lin spin_lock_irq(&info->lock); 17946cec2b95SMiaohe Lin /* 17956cec2b95SMiaohe Lin * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks won't 17966cec2b95SMiaohe Lin * be 0 when inode is released and thus trigger WARN_ON(inode->i_blocks) in 17976cec2b95SMiaohe Lin * shmem_evict_inode. 17986cec2b95SMiaohe Lin */ 17996cec2b95SMiaohe Lin info->alloced--; 18006cec2b95SMiaohe Lin info->swapped--; 18016cec2b95SMiaohe Lin shmem_recalc_inode(inode); 18026cec2b95SMiaohe Lin spin_unlock_irq(&info->lock); 18036cec2b95SMiaohe Lin swap_free(swap); 18046cec2b95SMiaohe Lin } 18056cec2b95SMiaohe Lin 1806bde05d1cSHugh Dickins /* 1807833de10fSMiaohe Lin * Swap in the folio pointed to by *foliop. 1808833de10fSMiaohe Lin * Caller has to make sure that *foliop contains a valid swapped folio. 1809833de10fSMiaohe Lin * Returns 0 and the folio in foliop if success. On failure, returns the 1810833de10fSMiaohe Lin * error code and NULL in *foliop. 18111da177e4SLinus Torvalds */ 1812da08e9b7SMatthew Wilcox (Oracle) static int shmem_swapin_folio(struct inode *inode, pgoff_t index, 1813da08e9b7SMatthew Wilcox (Oracle) struct folio **foliop, enum sgp_type sgp, 1814c5bf121eSVineeth Remanan Pillai gfp_t gfp, struct vm_area_struct *vma, 18152b740303SSouptick Joarder vm_fault_t *fault_type) 18161da177e4SLinus Torvalds { 18171da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 181823f919d4SArnd Bergmann struct shmem_inode_info *info = SHMEM_I(inode); 181904f94e3fSDan Schatzberg struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; 1820cbc2bd98SKairui Song struct swap_info_struct *si; 1821da08e9b7SMatthew Wilcox (Oracle) struct folio *folio = NULL; 18221da177e4SLinus Torvalds swp_entry_t swap; 18231da177e4SLinus Torvalds int error; 18241da177e4SLinus Torvalds 1825da08e9b7SMatthew Wilcox (Oracle) VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); 1826da08e9b7SMatthew Wilcox (Oracle) swap = radix_to_swp_entry(*foliop); 1827da08e9b7SMatthew Wilcox (Oracle) *foliop = NULL; 182854af6042SHugh Dickins 18296cec2b95SMiaohe Lin if (is_swapin_error_entry(swap)) 18306cec2b95SMiaohe Lin return -EIO; 18316cec2b95SMiaohe Lin 1832cbc2bd98SKairui Song si = get_swap_device(swap); 1833cbc2bd98SKairui Song if (!si) { 1834cbc2bd98SKairui Song if (!shmem_confirm_swap(mapping, index, swap)) 1835cbc2bd98SKairui Song return -EEXIST; 1836cbc2bd98SKairui Song else 1837cbc2bd98SKairui Song return -EINVAL; 1838cbc2bd98SKairui Song } 1839cbc2bd98SKairui Song 18401da177e4SLinus Torvalds /* Look it up and read it in.. */ 18415739a81cSMatthew Wilcox (Oracle) folio = swap_cache_get_folio(swap, NULL, 0); 18425739a81cSMatthew Wilcox (Oracle) if (!folio) { 18439e18eb29SAndres Lagar-Cavilla /* Or update major stats only when swapin succeeds?? */ 18449e18eb29SAndres Lagar-Cavilla if (fault_type) { 184568da9f05SHugh Dickins *fault_type |= VM_FAULT_MAJOR; 18469e18eb29SAndres Lagar-Cavilla count_vm_event(PGMAJFAULT); 18472262185cSRoman Gushchin count_memcg_event_mm(charge_mm, PGMAJFAULT); 18489e18eb29SAndres Lagar-Cavilla } 18499e18eb29SAndres Lagar-Cavilla /* Here we actually start the io */ 18505739a81cSMatthew Wilcox (Oracle) folio = shmem_swapin(swap, gfp, info, index); 18515739a81cSMatthew Wilcox (Oracle) if (!folio) { 18521da177e4SLinus Torvalds error = -ENOMEM; 185354af6042SHugh Dickins goto failed; 1854285b2c4fSHugh Dickins } 18551da177e4SLinus Torvalds } 18561da177e4SLinus Torvalds 1857833de10fSMiaohe Lin /* We have to do this with folio locked to prevent races */ 1858da08e9b7SMatthew Wilcox (Oracle) folio_lock(folio); 1859da08e9b7SMatthew Wilcox (Oracle) if (!folio_test_swapcache(folio) || 1860da08e9b7SMatthew Wilcox (Oracle) folio_swap_entry(folio).val != swap.val || 1861d1899228SHugh Dickins !shmem_confirm_swap(mapping, index, swap)) { 1862c5bf121eSVineeth Remanan Pillai error = -EEXIST; 1863d1899228SHugh Dickins goto unlock; 1864bde05d1cSHugh Dickins } 1865da08e9b7SMatthew Wilcox (Oracle) if (!folio_test_uptodate(folio)) { 18661da177e4SLinus Torvalds error = -EIO; 186754af6042SHugh Dickins goto failed; 186854af6042SHugh Dickins } 1869da08e9b7SMatthew Wilcox (Oracle) folio_wait_writeback(folio); 187054af6042SHugh Dickins 18718a84802eSSteven Price /* 18728a84802eSSteven Price * Some architectures may have to restore extra metadata to the 1873da08e9b7SMatthew Wilcox (Oracle) * folio after reading from swap. 18748a84802eSSteven Price */ 1875da08e9b7SMatthew Wilcox (Oracle) arch_swap_restore(swap, folio); 18768a84802eSSteven Price 1877069d849cSMatthew Wilcox (Oracle) if (shmem_should_replace_folio(folio, gfp)) { 18780d698e25SMatthew Wilcox (Oracle) error = shmem_replace_folio(&folio, gfp, info, index); 1879bde05d1cSHugh Dickins if (error) 188054af6042SHugh Dickins goto failed; 18811da177e4SLinus Torvalds } 18821da177e4SLinus Torvalds 1883b7dd44a1SMatthew Wilcox (Oracle) error = shmem_add_to_page_cache(folio, mapping, index, 18843fea5a49SJohannes Weiner swp_to_radix_entry(swap), gfp, 18853fea5a49SJohannes Weiner charge_mm); 188654af6042SHugh Dickins if (error) 188754af6042SHugh Dickins goto failed; 188854af6042SHugh Dickins 18894595ef88SKirill A. Shutemov spin_lock_irq(&info->lock); 189054af6042SHugh Dickins info->swapped--; 189154af6042SHugh Dickins shmem_recalc_inode(inode); 18924595ef88SKirill A. Shutemov spin_unlock_irq(&info->lock); 189327ab7006SHugh Dickins 189466d2f4d2SHugh Dickins if (sgp == SGP_WRITE) 1895da08e9b7SMatthew Wilcox (Oracle) folio_mark_accessed(folio); 189666d2f4d2SHugh Dickins 189775fa68a5SMatthew Wilcox (Oracle) delete_from_swap_cache(folio); 1898da08e9b7SMatthew Wilcox (Oracle) folio_mark_dirty(folio); 189927ab7006SHugh Dickins swap_free(swap); 1900cbc2bd98SKairui Song put_swap_device(si); 190127ab7006SHugh Dickins 1902da08e9b7SMatthew Wilcox (Oracle) *foliop = folio; 1903c5bf121eSVineeth Remanan Pillai return 0; 1904c5bf121eSVineeth Remanan Pillai failed: 1905c5bf121eSVineeth Remanan Pillai if (!shmem_confirm_swap(mapping, index, swap)) 1906c5bf121eSVineeth Remanan Pillai error = -EEXIST; 19076cec2b95SMiaohe Lin if (error == -EIO) 19086cec2b95SMiaohe Lin shmem_set_folio_swapin_error(inode, index, folio, swap); 1909c5bf121eSVineeth Remanan Pillai unlock: 1910da08e9b7SMatthew Wilcox (Oracle) if (folio) { 1911da08e9b7SMatthew Wilcox (Oracle) folio_unlock(folio); 1912da08e9b7SMatthew Wilcox (Oracle) folio_put(folio); 1913c5bf121eSVineeth Remanan Pillai } 1914cbc2bd98SKairui Song put_swap_device(si); 1915c5bf121eSVineeth Remanan Pillai 1916c5bf121eSVineeth Remanan Pillai return error; 1917c5bf121eSVineeth Remanan Pillai } 1918c5bf121eSVineeth Remanan Pillai 1919c5bf121eSVineeth Remanan Pillai /* 1920fc26babbSMatthew Wilcox (Oracle) * shmem_get_folio_gfp - find page in cache, or get from swap, or allocate 1921c5bf121eSVineeth Remanan Pillai * 1922c5bf121eSVineeth Remanan Pillai * If we allocate a new one we do not mark it dirty. That's up to the 1923c5bf121eSVineeth Remanan Pillai * vm. If we swap it in we mark it dirty since we also free the swap 1924c5bf121eSVineeth Remanan Pillai * entry since a page cannot live in both the swap and page cache. 1925c5bf121eSVineeth Remanan Pillai * 1926c949b097SAxel Rasmussen * vma, vmf, and fault_type are only supplied by shmem_fault: 1927c5bf121eSVineeth Remanan Pillai * otherwise they are NULL. 1928c5bf121eSVineeth Remanan Pillai */ 1929fc26babbSMatthew Wilcox (Oracle) static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, 1930fc26babbSMatthew Wilcox (Oracle) struct folio **foliop, enum sgp_type sgp, gfp_t gfp, 1931c5bf121eSVineeth Remanan Pillai struct vm_area_struct *vma, struct vm_fault *vmf, 1932c5bf121eSVineeth Remanan Pillai vm_fault_t *fault_type) 1933c5bf121eSVineeth Remanan Pillai { 1934c5bf121eSVineeth Remanan Pillai struct address_space *mapping = inode->i_mapping; 1935c5bf121eSVineeth Remanan Pillai struct shmem_inode_info *info = SHMEM_I(inode); 1936c5bf121eSVineeth Remanan Pillai struct shmem_sb_info *sbinfo; 1937c5bf121eSVineeth Remanan Pillai struct mm_struct *charge_mm; 1938b7dd44a1SMatthew Wilcox (Oracle) struct folio *folio; 19396fe7d712SLukas Bulwahn pgoff_t hindex; 1940164cc4feSRik van Riel gfp_t huge_gfp; 1941c5bf121eSVineeth Remanan Pillai int error; 1942c5bf121eSVineeth Remanan Pillai int once = 0; 1943c5bf121eSVineeth Remanan Pillai int alloced = 0; 1944c5bf121eSVineeth Remanan Pillai 1945c5bf121eSVineeth Remanan Pillai if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT)) 1946c5bf121eSVineeth Remanan Pillai return -EFBIG; 1947c5bf121eSVineeth Remanan Pillai repeat: 1948c5bf121eSVineeth Remanan Pillai if (sgp <= SGP_CACHE && 1949c5bf121eSVineeth Remanan Pillai ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 1950c5bf121eSVineeth Remanan Pillai return -EINVAL; 1951c5bf121eSVineeth Remanan Pillai } 1952c5bf121eSVineeth Remanan Pillai 1953c5bf121eSVineeth Remanan Pillai sbinfo = SHMEM_SB(inode->i_sb); 195404f94e3fSDan Schatzberg charge_mm = vma ? vma->vm_mm : NULL; 1955c5bf121eSVineeth Remanan Pillai 1956aaeb94ebSChristoph Hellwig folio = filemap_get_entry(mapping, index); 1957b1d0ec3aSMatthew Wilcox (Oracle) if (folio && vma && userfaultfd_minor(vma)) { 1958aaeb94ebSChristoph Hellwig if (!xa_is_value(folio)) 1959b1d0ec3aSMatthew Wilcox (Oracle) folio_put(folio); 1960c949b097SAxel Rasmussen *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); 1961c949b097SAxel Rasmussen return 0; 1962c949b097SAxel Rasmussen } 1963c949b097SAxel Rasmussen 1964b1d0ec3aSMatthew Wilcox (Oracle) if (xa_is_value(folio)) { 1965da08e9b7SMatthew Wilcox (Oracle) error = shmem_swapin_folio(inode, index, &folio, 1966c5bf121eSVineeth Remanan Pillai sgp, gfp, vma, fault_type); 1967c5bf121eSVineeth Remanan Pillai if (error == -EEXIST) 1968c5bf121eSVineeth Remanan Pillai goto repeat; 1969c5bf121eSVineeth Remanan Pillai 1970fc26babbSMatthew Wilcox (Oracle) *foliop = folio; 1971c5bf121eSVineeth Remanan Pillai return error; 1972c5bf121eSVineeth Remanan Pillai } 1973c5bf121eSVineeth Remanan Pillai 1974b1d0ec3aSMatthew Wilcox (Oracle) if (folio) { 1975aaeb94ebSChristoph Hellwig folio_lock(folio); 1976aaeb94ebSChristoph Hellwig 1977aaeb94ebSChristoph Hellwig /* Has the folio been truncated or swapped out? */ 1978aaeb94ebSChristoph Hellwig if (unlikely(folio->mapping != mapping)) { 1979aaeb94ebSChristoph Hellwig folio_unlock(folio); 1980aaeb94ebSChristoph Hellwig folio_put(folio); 1981aaeb94ebSChristoph Hellwig goto repeat; 1982aaeb94ebSChristoph Hellwig } 1983acdd9f8eSHugh Dickins if (sgp == SGP_WRITE) 1984b1d0ec3aSMatthew Wilcox (Oracle) folio_mark_accessed(folio); 1985b1d0ec3aSMatthew Wilcox (Oracle) if (folio_test_uptodate(folio)) 1986acdd9f8eSHugh Dickins goto out; 1987fc26babbSMatthew Wilcox (Oracle) /* fallocated folio */ 1988c5bf121eSVineeth Remanan Pillai if (sgp != SGP_READ) 1989c5bf121eSVineeth Remanan Pillai goto clear; 1990b1d0ec3aSMatthew Wilcox (Oracle) folio_unlock(folio); 1991b1d0ec3aSMatthew Wilcox (Oracle) folio_put(folio); 1992c5bf121eSVineeth Remanan Pillai } 1993c5bf121eSVineeth Remanan Pillai 1994c5bf121eSVineeth Remanan Pillai /* 1995fc26babbSMatthew Wilcox (Oracle) * SGP_READ: succeed on hole, with NULL folio, letting caller zero. 1996fc26babbSMatthew Wilcox (Oracle) * SGP_NOALLOC: fail on hole, with NULL folio, letting caller fail. 1997acdd9f8eSHugh Dickins */ 1998fc26babbSMatthew Wilcox (Oracle) *foliop = NULL; 1999acdd9f8eSHugh Dickins if (sgp == SGP_READ) 2000acdd9f8eSHugh Dickins return 0; 2001acdd9f8eSHugh Dickins if (sgp == SGP_NOALLOC) 2002acdd9f8eSHugh Dickins return -ENOENT; 2003acdd9f8eSHugh Dickins 2004acdd9f8eSHugh Dickins /* 2005acdd9f8eSHugh Dickins * Fast cache lookup and swap lookup did not find it: allocate. 2006c5bf121eSVineeth Remanan Pillai */ 2007c5bf121eSVineeth Remanan Pillai 2008cfda0526SMike Rapoport if (vma && userfaultfd_missing(vma)) { 2009cfda0526SMike Rapoport *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); 2010cfda0526SMike Rapoport return 0; 2011cfda0526SMike Rapoport } 2012cfda0526SMike Rapoport 20132cf13384SDavid Stevens if (!shmem_is_huge(inode, index, false, 20142cf13384SDavid Stevens vma ? vma->vm_mm : NULL, vma ? vma->vm_flags : 0)) 2015800d8c63SKirill A. Shutemov goto alloc_nohuge; 201627d80fa2SKees Cook 2017164cc4feSRik van Riel huge_gfp = vma_thp_gfp_mask(vma); 201878cc8cdcSRik van Riel huge_gfp = limit_gfp_mask(huge_gfp, gfp); 2019b1d0ec3aSMatthew Wilcox (Oracle) folio = shmem_alloc_and_acct_folio(huge_gfp, inode, index, true); 2020b1d0ec3aSMatthew Wilcox (Oracle) if (IS_ERR(folio)) { 2021c5bf121eSVineeth Remanan Pillai alloc_nohuge: 2022b1d0ec3aSMatthew Wilcox (Oracle) folio = shmem_alloc_and_acct_folio(gfp, inode, index, false); 202354af6042SHugh Dickins } 2024b1d0ec3aSMatthew Wilcox (Oracle) if (IS_ERR(folio)) { 2025779750d2SKirill A. Shutemov int retry = 5; 2026c5bf121eSVineeth Remanan Pillai 2027b1d0ec3aSMatthew Wilcox (Oracle) error = PTR_ERR(folio); 2028b1d0ec3aSMatthew Wilcox (Oracle) folio = NULL; 2029779750d2SKirill A. Shutemov if (error != -ENOSPC) 2030c5bf121eSVineeth Remanan Pillai goto unlock; 2031779750d2SKirill A. Shutemov /* 2032fc26babbSMatthew Wilcox (Oracle) * Try to reclaim some space by splitting a large folio 2033779750d2SKirill A. Shutemov * beyond i_size on the filesystem. 2034779750d2SKirill A. Shutemov */ 2035779750d2SKirill A. Shutemov while (retry--) { 2036779750d2SKirill A. Shutemov int ret; 2037c5bf121eSVineeth Remanan Pillai 2038779750d2SKirill A. Shutemov ret = shmem_unused_huge_shrink(sbinfo, NULL, 1); 2039779750d2SKirill A. Shutemov if (ret == SHRINK_STOP) 2040779750d2SKirill A. Shutemov break; 2041779750d2SKirill A. Shutemov if (ret) 2042779750d2SKirill A. Shutemov goto alloc_nohuge; 2043779750d2SKirill A. Shutemov } 2044c5bf121eSVineeth Remanan Pillai goto unlock; 2045800d8c63SKirill A. Shutemov } 2046800d8c63SKirill A. Shutemov 2047b1d0ec3aSMatthew Wilcox (Oracle) hindex = round_down(index, folio_nr_pages(folio)); 2048800d8c63SKirill A. Shutemov 204966d2f4d2SHugh Dickins if (sgp == SGP_WRITE) 2050b1d0ec3aSMatthew Wilcox (Oracle) __folio_set_referenced(folio); 205166d2f4d2SHugh Dickins 2052b7dd44a1SMatthew Wilcox (Oracle) error = shmem_add_to_page_cache(folio, mapping, hindex, 20533fea5a49SJohannes Weiner NULL, gfp & GFP_RECLAIM_MASK, 20543fea5a49SJohannes Weiner charge_mm); 20553fea5a49SJohannes Weiner if (error) 2056800d8c63SKirill A. Shutemov goto unacct; 2057b1d0ec3aSMatthew Wilcox (Oracle) folio_add_lru(folio); 205854af6042SHugh Dickins 20594595ef88SKirill A. Shutemov spin_lock_irq(&info->lock); 2060b1d0ec3aSMatthew Wilcox (Oracle) info->alloced += folio_nr_pages(folio); 206154af6042SHugh Dickins shmem_recalc_inode(inode); 20624595ef88SKirill A. Shutemov spin_unlock_irq(&info->lock); 20631635f6a7SHugh Dickins alloced = true; 206454af6042SHugh Dickins 2065b1d0ec3aSMatthew Wilcox (Oracle) if (folio_test_pmd_mappable(folio) && 2066779750d2SKirill A. Shutemov DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < 2067fc26babbSMatthew Wilcox (Oracle) folio_next_index(folio) - 1) { 2068779750d2SKirill A. Shutemov /* 2069fc26babbSMatthew Wilcox (Oracle) * Part of the large folio is beyond i_size: subject 2070779750d2SKirill A. Shutemov * to shrink under memory pressure. 2071779750d2SKirill A. Shutemov */ 2072779750d2SKirill A. Shutemov spin_lock(&sbinfo->shrinklist_lock); 2073d041353dSCong Wang /* 2074d041353dSCong Wang * _careful to defend against unlocked access to 2075d041353dSCong Wang * ->shrink_list in shmem_unused_huge_shrink() 2076d041353dSCong Wang */ 2077d041353dSCong Wang if (list_empty_careful(&info->shrinklist)) { 2078779750d2SKirill A. Shutemov list_add_tail(&info->shrinklist, 2079779750d2SKirill A. Shutemov &sbinfo->shrinklist); 2080779750d2SKirill A. Shutemov sbinfo->shrinklist_len++; 2081779750d2SKirill A. Shutemov } 2082779750d2SKirill A. Shutemov spin_unlock(&sbinfo->shrinklist_lock); 2083779750d2SKirill A. Shutemov } 2084779750d2SKirill A. Shutemov 2085ec9516fbSHugh Dickins /* 2086fc26babbSMatthew Wilcox (Oracle) * Let SGP_FALLOC use the SGP_WRITE optimization on a new folio. 20871635f6a7SHugh Dickins */ 20881635f6a7SHugh Dickins if (sgp == SGP_FALLOC) 20891635f6a7SHugh Dickins sgp = SGP_WRITE; 20901635f6a7SHugh Dickins clear: 20911635f6a7SHugh Dickins /* 2092fc26babbSMatthew Wilcox (Oracle) * Let SGP_WRITE caller clear ends if write does not fill folio; 2093fc26babbSMatthew Wilcox (Oracle) * but SGP_FALLOC on a folio fallocated earlier must initialize 20941635f6a7SHugh Dickins * it now, lest undo on failure cancel our earlier guarantee. 2095ec9516fbSHugh Dickins */ 2096b1d0ec3aSMatthew Wilcox (Oracle) if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) { 2097b1d0ec3aSMatthew Wilcox (Oracle) long i, n = folio_nr_pages(folio); 2098800d8c63SKirill A. Shutemov 2099b1d0ec3aSMatthew Wilcox (Oracle) for (i = 0; i < n; i++) 2100b1d0ec3aSMatthew Wilcox (Oracle) clear_highpage(folio_page(folio, i)); 2101b1d0ec3aSMatthew Wilcox (Oracle) flush_dcache_folio(folio); 2102b1d0ec3aSMatthew Wilcox (Oracle) folio_mark_uptodate(folio); 2103ec9516fbSHugh Dickins } 2104bde05d1cSHugh Dickins 210554af6042SHugh Dickins /* Perhaps the file has been truncated since we checked */ 210675edd345SHugh Dickins if (sgp <= SGP_CACHE && 210709cbfeafSKirill A. Shutemov ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 2108267a4c76SHugh Dickins if (alloced) { 2109b1d0ec3aSMatthew Wilcox (Oracle) folio_clear_dirty(folio); 2110b1d0ec3aSMatthew Wilcox (Oracle) filemap_remove_folio(folio); 21114595ef88SKirill A. Shutemov spin_lock_irq(&info->lock); 2112267a4c76SHugh Dickins shmem_recalc_inode(inode); 21134595ef88SKirill A. Shutemov spin_unlock_irq(&info->lock); 2114267a4c76SHugh Dickins } 211554af6042SHugh Dickins error = -EINVAL; 2116267a4c76SHugh Dickins goto unlock; 2117ff36b801SShaohua Li } 211863ec1973SMatthew Wilcox (Oracle) out: 2119fc26babbSMatthew Wilcox (Oracle) *foliop = folio; 212054af6042SHugh Dickins return 0; 2121d00806b1SNick Piggin 2122d0217ac0SNick Piggin /* 212354af6042SHugh Dickins * Error recovery. 21241da177e4SLinus Torvalds */ 212554af6042SHugh Dickins unacct: 2126b1d0ec3aSMatthew Wilcox (Oracle) shmem_inode_unacct_blocks(inode, folio_nr_pages(folio)); 2127800d8c63SKirill A. Shutemov 2128b1d0ec3aSMatthew Wilcox (Oracle) if (folio_test_large(folio)) { 2129b1d0ec3aSMatthew Wilcox (Oracle) folio_unlock(folio); 2130b1d0ec3aSMatthew Wilcox (Oracle) folio_put(folio); 2131800d8c63SKirill A. Shutemov goto alloc_nohuge; 2132800d8c63SKirill A. Shutemov } 2133d1899228SHugh Dickins unlock: 2134b1d0ec3aSMatthew Wilcox (Oracle) if (folio) { 2135b1d0ec3aSMatthew Wilcox (Oracle) folio_unlock(folio); 2136b1d0ec3aSMatthew Wilcox (Oracle) folio_put(folio); 213754af6042SHugh Dickins } 213854af6042SHugh Dickins if (error == -ENOSPC && !once++) { 21394595ef88SKirill A. Shutemov spin_lock_irq(&info->lock); 214054af6042SHugh Dickins shmem_recalc_inode(inode); 21414595ef88SKirill A. Shutemov spin_unlock_irq(&info->lock); 21421da177e4SLinus Torvalds goto repeat; 2143d8dc74f2SAdrian Bunk } 21447f4446eeSMatthew Wilcox if (error == -EEXIST) 214554af6042SHugh Dickins goto repeat; 214654af6042SHugh Dickins return error; 21471da177e4SLinus Torvalds } 21481da177e4SLinus Torvalds 21494e1fc793SMatthew Wilcox (Oracle) int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, 21504e1fc793SMatthew Wilcox (Oracle) enum sgp_type sgp) 21514e1fc793SMatthew Wilcox (Oracle) { 21524e1fc793SMatthew Wilcox (Oracle) return shmem_get_folio_gfp(inode, index, foliop, sgp, 21534e1fc793SMatthew Wilcox (Oracle) mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL); 21544e1fc793SMatthew Wilcox (Oracle) } 21554e1fc793SMatthew Wilcox (Oracle) 215610d20bd2SLinus Torvalds /* 215710d20bd2SLinus Torvalds * This is like autoremove_wake_function, but it removes the wait queue 215810d20bd2SLinus Torvalds * entry unconditionally - even if something else had already woken the 215910d20bd2SLinus Torvalds * target. 216010d20bd2SLinus Torvalds */ 2161ac6424b9SIngo Molnar static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) 216210d20bd2SLinus Torvalds { 216310d20bd2SLinus Torvalds int ret = default_wake_function(wait, mode, sync, key); 21642055da97SIngo Molnar list_del_init(&wait->entry); 216510d20bd2SLinus Torvalds return ret; 216610d20bd2SLinus Torvalds } 216710d20bd2SLinus Torvalds 216820acce67SSouptick Joarder static vm_fault_t shmem_fault(struct vm_fault *vmf) 21691da177e4SLinus Torvalds { 217011bac800SDave Jiang struct vm_area_struct *vma = vmf->vma; 2171496ad9aaSAl Viro struct inode *inode = file_inode(vma->vm_file); 21729e18eb29SAndres Lagar-Cavilla gfp_t gfp = mapping_gfp_mask(inode->i_mapping); 217368a54100SMatthew Wilcox (Oracle) struct folio *folio = NULL; 217420acce67SSouptick Joarder int err; 217520acce67SSouptick Joarder vm_fault_t ret = VM_FAULT_LOCKED; 21761da177e4SLinus Torvalds 2177f00cdc6dSHugh Dickins /* 2178f00cdc6dSHugh Dickins * Trinity finds that probing a hole which tmpfs is punching can 2179f00cdc6dSHugh Dickins * prevent the hole-punch from ever completing: which in turn 21809608703eSJan Kara * locks writers out with its hold on i_rwsem. So refrain from 21818e205f77SHugh Dickins * faulting pages into the hole while it's being punched. Although 21828e205f77SHugh Dickins * shmem_undo_range() does remove the additions, it may be unable to 21838e205f77SHugh Dickins * keep up, as each new page needs its own unmap_mapping_range() call, 21848e205f77SHugh Dickins * and the i_mmap tree grows ever slower to scan if new vmas are added. 21858e205f77SHugh Dickins * 21868e205f77SHugh Dickins * It does not matter if we sometimes reach this check just before the 21878e205f77SHugh Dickins * hole-punch begins, so that one fault then races with the punch: 21888e205f77SHugh Dickins * we just need to make racing faults a rare case. 21898e205f77SHugh Dickins * 21908e205f77SHugh Dickins * The implementation below would be much simpler if we just used a 21919608703eSJan Kara * standard mutex or completion: but we cannot take i_rwsem in fault, 21928e205f77SHugh Dickins * and bloating every shmem inode for this unlikely case would be sad. 2193f00cdc6dSHugh Dickins */ 2194f00cdc6dSHugh Dickins if (unlikely(inode->i_private)) { 2195f00cdc6dSHugh Dickins struct shmem_falloc *shmem_falloc; 2196f00cdc6dSHugh Dickins 2197f00cdc6dSHugh Dickins spin_lock(&inode->i_lock); 2198f00cdc6dSHugh Dickins shmem_falloc = inode->i_private; 21998e205f77SHugh Dickins if (shmem_falloc && 22008e205f77SHugh Dickins shmem_falloc->waitq && 22018e205f77SHugh Dickins vmf->pgoff >= shmem_falloc->start && 22028e205f77SHugh Dickins vmf->pgoff < shmem_falloc->next) { 22038897c1b1SKirill A. Shutemov struct file *fpin; 22048e205f77SHugh Dickins wait_queue_head_t *shmem_falloc_waitq; 220510d20bd2SLinus Torvalds DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); 22068e205f77SHugh Dickins 22078e205f77SHugh Dickins ret = VM_FAULT_NOPAGE; 22088897c1b1SKirill A. Shutemov fpin = maybe_unlock_mmap_for_io(vmf, NULL); 22098897c1b1SKirill A. Shutemov if (fpin) 22108e205f77SHugh Dickins ret = VM_FAULT_RETRY; 22118e205f77SHugh Dickins 22128e205f77SHugh Dickins shmem_falloc_waitq = shmem_falloc->waitq; 22138e205f77SHugh Dickins prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, 22148e205f77SHugh Dickins TASK_UNINTERRUPTIBLE); 22158e205f77SHugh Dickins spin_unlock(&inode->i_lock); 22168e205f77SHugh Dickins schedule(); 22178e205f77SHugh Dickins 22188e205f77SHugh Dickins /* 22198e205f77SHugh Dickins * shmem_falloc_waitq points into the shmem_fallocate() 22208e205f77SHugh Dickins * stack of the hole-punching task: shmem_falloc_waitq 22218e205f77SHugh Dickins * is usually invalid by the time we reach here, but 22228e205f77SHugh Dickins * finish_wait() does not dereference it in that case; 22238e205f77SHugh Dickins * though i_lock needed lest racing with wake_up_all(). 22248e205f77SHugh Dickins */ 22258e205f77SHugh Dickins spin_lock(&inode->i_lock); 22268e205f77SHugh Dickins finish_wait(shmem_falloc_waitq, &shmem_fault_wait); 22278e205f77SHugh Dickins spin_unlock(&inode->i_lock); 22288897c1b1SKirill A. Shutemov 22298897c1b1SKirill A. Shutemov if (fpin) 22308897c1b1SKirill A. Shutemov fput(fpin); 22318e205f77SHugh Dickins return ret; 2232f00cdc6dSHugh Dickins } 22338e205f77SHugh Dickins spin_unlock(&inode->i_lock); 2234f00cdc6dSHugh Dickins } 2235f00cdc6dSHugh Dickins 223668a54100SMatthew Wilcox (Oracle) err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE, 2237cfda0526SMike Rapoport gfp, vma, vmf, &ret); 223820acce67SSouptick Joarder if (err) 223920acce67SSouptick Joarder return vmf_error(err); 224068a54100SMatthew Wilcox (Oracle) if (folio) 224168a54100SMatthew Wilcox (Oracle) vmf->page = folio_file_page(folio, vmf->pgoff); 224268da9f05SHugh Dickins return ret; 22431da177e4SLinus Torvalds } 22441da177e4SLinus Torvalds 2245c01d5b30SHugh Dickins unsigned long shmem_get_unmapped_area(struct file *file, 2246c01d5b30SHugh Dickins unsigned long uaddr, unsigned long len, 2247c01d5b30SHugh Dickins unsigned long pgoff, unsigned long flags) 2248c01d5b30SHugh Dickins { 2249c01d5b30SHugh Dickins unsigned long (*get_area)(struct file *, 2250c01d5b30SHugh Dickins unsigned long, unsigned long, unsigned long, unsigned long); 2251c01d5b30SHugh Dickins unsigned long addr; 2252c01d5b30SHugh Dickins unsigned long offset; 2253c01d5b30SHugh Dickins unsigned long inflated_len; 2254c01d5b30SHugh Dickins unsigned long inflated_addr; 2255c01d5b30SHugh Dickins unsigned long inflated_offset; 2256c01d5b30SHugh Dickins 2257c01d5b30SHugh Dickins if (len > TASK_SIZE) 2258c01d5b30SHugh Dickins return -ENOMEM; 2259c01d5b30SHugh Dickins 2260c01d5b30SHugh Dickins get_area = current->mm->get_unmapped_area; 2261c01d5b30SHugh Dickins addr = get_area(file, uaddr, len, pgoff, flags); 2262c01d5b30SHugh Dickins 2263396bcc52SMatthew Wilcox (Oracle) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 2264c01d5b30SHugh Dickins return addr; 2265c01d5b30SHugh Dickins if (IS_ERR_VALUE(addr)) 2266c01d5b30SHugh Dickins return addr; 2267c01d5b30SHugh Dickins if (addr & ~PAGE_MASK) 2268c01d5b30SHugh Dickins return addr; 2269c01d5b30SHugh Dickins if (addr > TASK_SIZE - len) 2270c01d5b30SHugh Dickins return addr; 2271c01d5b30SHugh Dickins 2272c01d5b30SHugh Dickins if (shmem_huge == SHMEM_HUGE_DENY) 2273c01d5b30SHugh Dickins return addr; 2274c01d5b30SHugh Dickins if (len < HPAGE_PMD_SIZE) 2275c01d5b30SHugh Dickins return addr; 2276c01d5b30SHugh Dickins if (flags & MAP_FIXED) 2277c01d5b30SHugh Dickins return addr; 2278c01d5b30SHugh Dickins /* 2279c01d5b30SHugh Dickins * Our priority is to support MAP_SHARED mapped hugely; 2280c01d5b30SHugh Dickins * and support MAP_PRIVATE mapped hugely too, until it is COWed. 228199158997SKirill A. Shutemov * But if caller specified an address hint and we allocated area there 228299158997SKirill A. Shutemov * successfully, respect that as before. 2283c01d5b30SHugh Dickins */ 228499158997SKirill A. Shutemov if (uaddr == addr) 2285c01d5b30SHugh Dickins return addr; 2286c01d5b30SHugh Dickins 2287c01d5b30SHugh Dickins if (shmem_huge != SHMEM_HUGE_FORCE) { 2288c01d5b30SHugh Dickins struct super_block *sb; 2289c01d5b30SHugh Dickins 2290c01d5b30SHugh Dickins if (file) { 2291c01d5b30SHugh Dickins VM_BUG_ON(file->f_op != &shmem_file_operations); 2292c01d5b30SHugh Dickins sb = file_inode(file)->i_sb; 2293c01d5b30SHugh Dickins } else { 2294c01d5b30SHugh Dickins /* 2295c01d5b30SHugh Dickins * Called directly from mm/mmap.c, or drivers/char/mem.c 2296c01d5b30SHugh Dickins * for "/dev/zero", to create a shared anonymous object. 2297c01d5b30SHugh Dickins */ 2298c01d5b30SHugh Dickins if (IS_ERR(shm_mnt)) 2299c01d5b30SHugh Dickins return addr; 2300c01d5b30SHugh Dickins sb = shm_mnt->mnt_sb; 2301c01d5b30SHugh Dickins } 23023089bf61SToshi Kani if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER) 2303c01d5b30SHugh Dickins return addr; 2304c01d5b30SHugh Dickins } 2305c01d5b30SHugh Dickins 2306c01d5b30SHugh Dickins offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1); 2307c01d5b30SHugh Dickins if (offset && offset + len < 2 * HPAGE_PMD_SIZE) 2308c01d5b30SHugh Dickins return addr; 2309c01d5b30SHugh Dickins if ((addr & (HPAGE_PMD_SIZE-1)) == offset) 2310c01d5b30SHugh Dickins return addr; 2311c01d5b30SHugh Dickins 2312c01d5b30SHugh Dickins inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE; 2313c01d5b30SHugh Dickins if (inflated_len > TASK_SIZE) 2314c01d5b30SHugh Dickins return addr; 2315c01d5b30SHugh Dickins if (inflated_len < len) 2316c01d5b30SHugh Dickins return addr; 2317c01d5b30SHugh Dickins 231899158997SKirill A. Shutemov inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags); 2319c01d5b30SHugh Dickins if (IS_ERR_VALUE(inflated_addr)) 2320c01d5b30SHugh Dickins return addr; 2321c01d5b30SHugh Dickins if (inflated_addr & ~PAGE_MASK) 2322c01d5b30SHugh Dickins return addr; 2323c01d5b30SHugh Dickins 2324c01d5b30SHugh Dickins inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1); 2325c01d5b30SHugh Dickins inflated_addr += offset - inflated_offset; 2326c01d5b30SHugh Dickins if (inflated_offset > offset) 2327c01d5b30SHugh Dickins inflated_addr += HPAGE_PMD_SIZE; 2328c01d5b30SHugh Dickins 2329c01d5b30SHugh Dickins if (inflated_addr > TASK_SIZE - len) 2330c01d5b30SHugh Dickins return addr; 2331c01d5b30SHugh Dickins return inflated_addr; 2332c01d5b30SHugh Dickins } 2333c01d5b30SHugh Dickins 23341da177e4SLinus Torvalds #ifdef CONFIG_NUMA 233541ffe5d5SHugh Dickins static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) 23361da177e4SLinus Torvalds { 2337496ad9aaSAl Viro struct inode *inode = file_inode(vma->vm_file); 233841ffe5d5SHugh Dickins return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); 23391da177e4SLinus Torvalds } 23401da177e4SLinus Torvalds 2341d8dc74f2SAdrian Bunk static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 2342d8dc74f2SAdrian Bunk unsigned long addr) 23431da177e4SLinus Torvalds { 2344496ad9aaSAl Viro struct inode *inode = file_inode(vma->vm_file); 234541ffe5d5SHugh Dickins pgoff_t index; 23461da177e4SLinus Torvalds 234741ffe5d5SHugh Dickins index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 234841ffe5d5SHugh Dickins return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); 23491da177e4SLinus Torvalds } 23501da177e4SLinus Torvalds #endif 23511da177e4SLinus Torvalds 2352d7c9e99aSAlexey Gladkov int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) 23531da177e4SLinus Torvalds { 2354496ad9aaSAl Viro struct inode *inode = file_inode(file); 23551da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 23561da177e4SLinus Torvalds int retval = -ENOMEM; 23571da177e4SLinus Torvalds 2358ea0dfeb4SHugh Dickins /* 2359ea0dfeb4SHugh Dickins * What serializes the accesses to info->flags? 2360ea0dfeb4SHugh Dickins * ipc_lock_object() when called from shmctl_do_lock(), 2361ea0dfeb4SHugh Dickins * no serialization needed when called from shm_destroy(). 2362ea0dfeb4SHugh Dickins */ 23631da177e4SLinus Torvalds if (lock && !(info->flags & VM_LOCKED)) { 2364d7c9e99aSAlexey Gladkov if (!user_shm_lock(inode->i_size, ucounts)) 23651da177e4SLinus Torvalds goto out_nomem; 23661da177e4SLinus Torvalds info->flags |= VM_LOCKED; 236789e004eaSLee Schermerhorn mapping_set_unevictable(file->f_mapping); 23681da177e4SLinus Torvalds } 2369d7c9e99aSAlexey Gladkov if (!lock && (info->flags & VM_LOCKED) && ucounts) { 2370d7c9e99aSAlexey Gladkov user_shm_unlock(inode->i_size, ucounts); 23711da177e4SLinus Torvalds info->flags &= ~VM_LOCKED; 237289e004eaSLee Schermerhorn mapping_clear_unevictable(file->f_mapping); 23731da177e4SLinus Torvalds } 23741da177e4SLinus Torvalds retval = 0; 237589e004eaSLee Schermerhorn 23761da177e4SLinus Torvalds out_nomem: 23771da177e4SLinus Torvalds return retval; 23781da177e4SLinus Torvalds } 23791da177e4SLinus Torvalds 23809b83a6a8SAdrian Bunk static int shmem_mmap(struct file *file, struct vm_area_struct *vma) 23811da177e4SLinus Torvalds { 2382d09e8ca6SPasha Tatashin struct inode *inode = file_inode(file); 2383d09e8ca6SPasha Tatashin struct shmem_inode_info *info = SHMEM_I(inode); 238422247efdSPeter Xu int ret; 2385ab3948f5SJoel Fernandes (Google) 238622247efdSPeter Xu ret = seal_check_future_write(info->seals, vma); 238722247efdSPeter Xu if (ret) 238822247efdSPeter Xu return ret; 2389ab3948f5SJoel Fernandes (Google) 239051b0bff2SCatalin Marinas /* arm64 - allow memory tagging on RAM-based files */ 23911c71222eSSuren Baghdasaryan vm_flags_set(vma, VM_MTE_ALLOWED); 239251b0bff2SCatalin Marinas 23931da177e4SLinus Torvalds file_accessed(file); 2394d09e8ca6SPasha Tatashin /* This is anonymous shared memory if it is unlinked at the time of mmap */ 2395d09e8ca6SPasha Tatashin if (inode->i_nlink) 23961da177e4SLinus Torvalds vma->vm_ops = &shmem_vm_ops; 2397d09e8ca6SPasha Tatashin else 2398d09e8ca6SPasha Tatashin vma->vm_ops = &shmem_anon_vm_ops; 23991da177e4SLinus Torvalds return 0; 24001da177e4SLinus Torvalds } 24011da177e4SLinus Torvalds 2402cb241339SHugh Dickins #ifdef CONFIG_TMPFS_XATTR 2403cb241339SHugh Dickins static int shmem_initxattrs(struct inode *, const struct xattr *, void *); 2404cb241339SHugh Dickins 2405cb241339SHugh Dickins /* 2406cb241339SHugh Dickins * chattr's fsflags are unrelated to extended attributes, 2407cb241339SHugh Dickins * but tmpfs has chosen to enable them under the same config option. 2408cb241339SHugh Dickins */ 2409cb241339SHugh Dickins static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) 2410e408e695STheodore Ts'o { 2411cb241339SHugh Dickins unsigned int i_flags = 0; 2412cb241339SHugh Dickins 2413cb241339SHugh Dickins if (fsflags & FS_NOATIME_FL) 2414cb241339SHugh Dickins i_flags |= S_NOATIME; 2415cb241339SHugh Dickins if (fsflags & FS_APPEND_FL) 2416cb241339SHugh Dickins i_flags |= S_APPEND; 2417cb241339SHugh Dickins if (fsflags & FS_IMMUTABLE_FL) 2418cb241339SHugh Dickins i_flags |= S_IMMUTABLE; 2419cb241339SHugh Dickins /* 2420cb241339SHugh Dickins * But FS_NODUMP_FL does not require any action in i_flags. 2421cb241339SHugh Dickins */ 2422cb241339SHugh Dickins inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE); 2423e408e695STheodore Ts'o } 2424cb241339SHugh Dickins #else 2425cb241339SHugh Dickins static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) 2426cb241339SHugh Dickins { 2427cb241339SHugh Dickins } 2428cb241339SHugh Dickins #define shmem_initxattrs NULL 2429cb241339SHugh Dickins #endif 2430e408e695STheodore Ts'o 2431e09764cfSCarlos Maiolino static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, 2432e09764cfSCarlos Maiolino struct super_block *sb, 2433e09764cfSCarlos Maiolino struct inode *dir, umode_t mode, 2434e09764cfSCarlos Maiolino dev_t dev, unsigned long flags) 24351da177e4SLinus Torvalds { 24361da177e4SLinus Torvalds struct inode *inode; 24371da177e4SLinus Torvalds struct shmem_inode_info *info; 24381da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 2439e809d5f0SChris Down ino_t ino; 244071480663SCarlos Maiolino int err; 24411da177e4SLinus Torvalds 244271480663SCarlos Maiolino err = shmem_reserve_inode(sb, &ino); 244371480663SCarlos Maiolino if (err) 244471480663SCarlos Maiolino return ERR_PTR(err); 244571480663SCarlos Maiolino 24461da177e4SLinus Torvalds 24471da177e4SLinus Torvalds inode = new_inode(sb); 244871480663SCarlos Maiolino 244971480663SCarlos Maiolino if (!inode) { 245071480663SCarlos Maiolino shmem_free_inode(sb); 245171480663SCarlos Maiolino return ERR_PTR(-ENOSPC); 245271480663SCarlos Maiolino } 245371480663SCarlos Maiolino 2454e809d5f0SChris Down inode->i_ino = ino; 24557a80e5b8SGiuseppe Scrivano inode_init_owner(idmap, inode, dir, mode); 24561da177e4SLinus Torvalds inode->i_blocks = 0; 2457078cd827SDeepa Dinamani inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 2458a251c17aSJason A. Donenfeld inode->i_generation = get_random_u32(); 24591da177e4SLinus Torvalds info = SHMEM_I(inode); 24601da177e4SLinus Torvalds memset(info, 0, (char *)inode - (char *)info); 24611da177e4SLinus Torvalds spin_lock_init(&info->lock); 2462af53d3e9SHugh Dickins atomic_set(&info->stop_eviction, 0); 246340e041a2SDavid Herrmann info->seals = F_SEAL_SEAL; 24640b0a0806SHugh Dickins info->flags = flags & VM_NORESERVE; 2465f7cd16a5SXavier Roche info->i_crtime = inode->i_mtime; 2466e408e695STheodore Ts'o info->fsflags = (dir == NULL) ? 0 : 2467e408e695STheodore Ts'o SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; 2468cb241339SHugh Dickins if (info->fsflags) 2469cb241339SHugh Dickins shmem_set_inode_flags(inode, info->fsflags); 2470779750d2SKirill A. Shutemov INIT_LIST_HEAD(&info->shrinklist); 24711da177e4SLinus Torvalds INIT_LIST_HEAD(&info->swaplist); 247271480663SCarlos Maiolino INIT_LIST_HEAD(&info->swaplist); 24732c6efe9cSLuis Chamberlain if (sbinfo->noswap) 24742c6efe9cSLuis Chamberlain mapping_set_unevictable(inode->i_mapping); 247538f38657SAristeu Rozanski simple_xattrs_init(&info->xattrs); 247672c04902SAl Viro cache_no_acl(inode); 2477ff36da69SMatthew Wilcox (Oracle) mapping_set_large_folios(inode->i_mapping); 24781da177e4SLinus Torvalds 24791da177e4SLinus Torvalds switch (mode & S_IFMT) { 24801da177e4SLinus Torvalds default: 248139f0247dSAndreas Gruenbacher inode->i_op = &shmem_special_inode_operations; 24821da177e4SLinus Torvalds init_special_inode(inode, mode, dev); 24831da177e4SLinus Torvalds break; 24841da177e4SLinus Torvalds case S_IFREG: 248514fcc23fSHugh Dickins inode->i_mapping->a_ops = &shmem_aops; 24861da177e4SLinus Torvalds inode->i_op = &shmem_inode_operations; 24871da177e4SLinus Torvalds inode->i_fop = &shmem_file_operations; 248871fe804bSLee Schermerhorn mpol_shared_policy_init(&info->policy, 248971fe804bSLee Schermerhorn shmem_get_sbmpol(sbinfo)); 24901da177e4SLinus Torvalds break; 24911da177e4SLinus Torvalds case S_IFDIR: 2492d8c76e6fSDave Hansen inc_nlink(inode); 24931da177e4SLinus Torvalds /* Some things misbehave if size == 0 on a directory */ 24941da177e4SLinus Torvalds inode->i_size = 2 * BOGO_DIRENT_SIZE; 24951da177e4SLinus Torvalds inode->i_op = &shmem_dir_inode_operations; 24961da177e4SLinus Torvalds inode->i_fop = &simple_dir_operations; 24971da177e4SLinus Torvalds break; 24981da177e4SLinus Torvalds case S_IFLNK: 24991da177e4SLinus Torvalds /* 25001da177e4SLinus Torvalds * Must not load anything in the rbtree, 25011da177e4SLinus Torvalds * mpol_free_shared_policy will not be called. 25021da177e4SLinus Torvalds */ 250371fe804bSLee Schermerhorn mpol_shared_policy_init(&info->policy, NULL); 25041da177e4SLinus Torvalds break; 25051da177e4SLinus Torvalds } 2506b45d71fbSJoel Fernandes (Google) 2507b45d71fbSJoel Fernandes (Google) lockdep_annotate_inode_mutex_key(inode); 25081da177e4SLinus Torvalds return inode; 25091da177e4SLinus Torvalds } 25101da177e4SLinus Torvalds 2511e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 2512e09764cfSCarlos Maiolino static struct inode *shmem_get_inode(struct mnt_idmap *idmap, 2513e09764cfSCarlos Maiolino struct super_block *sb, struct inode *dir, 2514e09764cfSCarlos Maiolino umode_t mode, dev_t dev, unsigned long flags) 2515e09764cfSCarlos Maiolino { 2516e09764cfSCarlos Maiolino int err; 2517e09764cfSCarlos Maiolino struct inode *inode; 2518e09764cfSCarlos Maiolino 2519e09764cfSCarlos Maiolino inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags); 2520e09764cfSCarlos Maiolino if (IS_ERR(inode)) 2521e09764cfSCarlos Maiolino return inode; 2522e09764cfSCarlos Maiolino 2523e09764cfSCarlos Maiolino err = dquot_initialize(inode); 2524e09764cfSCarlos Maiolino if (err) 2525e09764cfSCarlos Maiolino goto errout; 2526e09764cfSCarlos Maiolino 2527e09764cfSCarlos Maiolino err = dquot_alloc_inode(inode); 2528e09764cfSCarlos Maiolino if (err) { 2529e09764cfSCarlos Maiolino dquot_drop(inode); 2530e09764cfSCarlos Maiolino goto errout; 2531e09764cfSCarlos Maiolino } 2532e09764cfSCarlos Maiolino return inode; 2533e09764cfSCarlos Maiolino 2534e09764cfSCarlos Maiolino errout: 2535e09764cfSCarlos Maiolino inode->i_flags |= S_NOQUOTA; 2536e09764cfSCarlos Maiolino iput(inode); 2537e09764cfSCarlos Maiolino return ERR_PTR(err); 2538e09764cfSCarlos Maiolino } 2539e09764cfSCarlos Maiolino #else 2540e09764cfSCarlos Maiolino static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, 2541e09764cfSCarlos Maiolino struct super_block *sb, struct inode *dir, 2542e09764cfSCarlos Maiolino umode_t mode, dev_t dev, unsigned long flags) 2543e09764cfSCarlos Maiolino { 2544e09764cfSCarlos Maiolino return __shmem_get_inode(idmap, sb, dir, mode, dev, flags); 2545e09764cfSCarlos Maiolino } 2546e09764cfSCarlos Maiolino #endif /* CONFIG_TMPFS_QUOTA */ 2547e09764cfSCarlos Maiolino 25483460f6e5SAxel Rasmussen #ifdef CONFIG_USERFAULTFD 254961c50040SAxel Rasmussen int shmem_mfill_atomic_pte(pmd_t *dst_pmd, 25504c27fe4cSMike Rapoport struct vm_area_struct *dst_vma, 25514c27fe4cSMike Rapoport unsigned long dst_addr, 25524c27fe4cSMike Rapoport unsigned long src_addr, 2553d9712937SAxel Rasmussen uffd_flags_t flags, 2554d7be6d7eSZhangPeng struct folio **foliop) 25554c27fe4cSMike Rapoport { 25564c27fe4cSMike Rapoport struct inode *inode = file_inode(dst_vma->vm_file); 25574c27fe4cSMike Rapoport struct shmem_inode_info *info = SHMEM_I(inode); 25584c27fe4cSMike Rapoport struct address_space *mapping = inode->i_mapping; 25594c27fe4cSMike Rapoport gfp_t gfp = mapping_gfp_mask(mapping); 25604c27fe4cSMike Rapoport pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); 25614c27fe4cSMike Rapoport void *page_kaddr; 2562b7dd44a1SMatthew Wilcox (Oracle) struct folio *folio; 25634c27fe4cSMike Rapoport int ret; 25643460f6e5SAxel Rasmussen pgoff_t max_off; 25654c27fe4cSMike Rapoport 2566c7e263abSLukas Czerner if (shmem_inode_acct_block(inode, 1)) { 25677ed9d238SAxel Rasmussen /* 25687ed9d238SAxel Rasmussen * We may have got a page, returned -ENOENT triggering a retry, 25697ed9d238SAxel Rasmussen * and now we find ourselves with -ENOMEM. Release the page, to 25707ed9d238SAxel Rasmussen * avoid a BUG_ON in our caller. 25717ed9d238SAxel Rasmussen */ 2572d7be6d7eSZhangPeng if (unlikely(*foliop)) { 2573d7be6d7eSZhangPeng folio_put(*foliop); 2574d7be6d7eSZhangPeng *foliop = NULL; 25757ed9d238SAxel Rasmussen } 25767d64ae3aSAxel Rasmussen return -ENOMEM; 25777ed9d238SAxel Rasmussen } 25784c27fe4cSMike Rapoport 2579d7be6d7eSZhangPeng if (!*foliop) { 25807d64ae3aSAxel Rasmussen ret = -ENOMEM; 25817a7256d5SMatthew Wilcox (Oracle) folio = shmem_alloc_folio(gfp, info, pgoff); 25827a7256d5SMatthew Wilcox (Oracle) if (!folio) 25830f079694SMike Rapoport goto out_unacct_blocks; 25844c27fe4cSMike Rapoport 2585d9712937SAxel Rasmussen if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) { 25867a7256d5SMatthew Wilcox (Oracle) page_kaddr = kmap_local_folio(folio, 0); 25875dc21f0cSIra Weiny /* 25885dc21f0cSIra Weiny * The read mmap_lock is held here. Despite the 25895dc21f0cSIra Weiny * mmap_lock being read recursive a deadlock is still 25905dc21f0cSIra Weiny * possible if a writer has taken a lock. For example: 25915dc21f0cSIra Weiny * 25925dc21f0cSIra Weiny * process A thread 1 takes read lock on own mmap_lock 25935dc21f0cSIra Weiny * process A thread 2 calls mmap, blocks taking write lock 25945dc21f0cSIra Weiny * process B thread 1 takes page fault, read lock on own mmap lock 25955dc21f0cSIra Weiny * process B thread 2 calls mmap, blocks taking write lock 25965dc21f0cSIra Weiny * process A thread 1 blocks taking read lock on process B 25975dc21f0cSIra Weiny * process B thread 1 blocks taking read lock on process A 25985dc21f0cSIra Weiny * 25995dc21f0cSIra Weiny * Disable page faults to prevent potential deadlock 26005dc21f0cSIra Weiny * and retry the copy outside the mmap_lock. 26015dc21f0cSIra Weiny */ 26025dc21f0cSIra Weiny pagefault_disable(); 26038d103963SMike Rapoport ret = copy_from_user(page_kaddr, 26048d103963SMike Rapoport (const void __user *)src_addr, 26054c27fe4cSMike Rapoport PAGE_SIZE); 26065dc21f0cSIra Weiny pagefault_enable(); 26077a7256d5SMatthew Wilcox (Oracle) kunmap_local(page_kaddr); 26084c27fe4cSMike Rapoport 2609c1e8d7c6SMichel Lespinasse /* fallback to copy_from_user outside mmap_lock */ 26104c27fe4cSMike Rapoport if (unlikely(ret)) { 2611d7be6d7eSZhangPeng *foliop = folio; 26127d64ae3aSAxel Rasmussen ret = -ENOENT; 26134c27fe4cSMike Rapoport /* don't free the page */ 26147d64ae3aSAxel Rasmussen goto out_unacct_blocks; 26154c27fe4cSMike Rapoport } 261619b482c2SMuchun Song 26177a7256d5SMatthew Wilcox (Oracle) flush_dcache_folio(folio); 26183460f6e5SAxel Rasmussen } else { /* ZEROPAGE */ 26197a7256d5SMatthew Wilcox (Oracle) clear_user_highpage(&folio->page, dst_addr); 26208d103963SMike Rapoport } 26214c27fe4cSMike Rapoport } else { 2622d7be6d7eSZhangPeng folio = *foliop; 26237a7256d5SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(folio_test_large(folio), folio); 2624d7be6d7eSZhangPeng *foliop = NULL; 26254c27fe4cSMike Rapoport } 26264c27fe4cSMike Rapoport 26277a7256d5SMatthew Wilcox (Oracle) VM_BUG_ON(folio_test_locked(folio)); 26287a7256d5SMatthew Wilcox (Oracle) VM_BUG_ON(folio_test_swapbacked(folio)); 26297a7256d5SMatthew Wilcox (Oracle) __folio_set_locked(folio); 26307a7256d5SMatthew Wilcox (Oracle) __folio_set_swapbacked(folio); 26317a7256d5SMatthew Wilcox (Oracle) __folio_mark_uptodate(folio); 26329cc90c66SAndrea Arcangeli 2633e2a50c1fSAndrea Arcangeli ret = -EFAULT; 2634e2a50c1fSAndrea Arcangeli max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 26353460f6e5SAxel Rasmussen if (unlikely(pgoff >= max_off)) 2636e2a50c1fSAndrea Arcangeli goto out_release; 2637e2a50c1fSAndrea Arcangeli 2638b7dd44a1SMatthew Wilcox (Oracle) ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, 263961c50040SAxel Rasmussen gfp & GFP_RECLAIM_MASK, dst_vma->vm_mm); 26404c27fe4cSMike Rapoport if (ret) 26414c27fe4cSMike Rapoport goto out_release; 26424c27fe4cSMike Rapoport 264361c50040SAxel Rasmussen ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, 2644d9712937SAxel Rasmussen &folio->page, true, flags); 26457d64ae3aSAxel Rasmussen if (ret) 26467d64ae3aSAxel Rasmussen goto out_delete_from_cache; 26474c27fe4cSMike Rapoport 264894b7cc01SYang Shi spin_lock_irq(&info->lock); 26494c27fe4cSMike Rapoport info->alloced++; 26504c27fe4cSMike Rapoport shmem_recalc_inode(inode); 265194b7cc01SYang Shi spin_unlock_irq(&info->lock); 26524c27fe4cSMike Rapoport 26537a7256d5SMatthew Wilcox (Oracle) folio_unlock(folio); 26547d64ae3aSAxel Rasmussen return 0; 26557d64ae3aSAxel Rasmussen out_delete_from_cache: 26567a7256d5SMatthew Wilcox (Oracle) filemap_remove_folio(folio); 26574c27fe4cSMike Rapoport out_release: 26587a7256d5SMatthew Wilcox (Oracle) folio_unlock(folio); 26597a7256d5SMatthew Wilcox (Oracle) folio_put(folio); 26604c27fe4cSMike Rapoport out_unacct_blocks: 26610f079694SMike Rapoport shmem_inode_unacct_blocks(inode, 1); 26627d64ae3aSAxel Rasmussen return ret; 26634c27fe4cSMike Rapoport } 26643460f6e5SAxel Rasmussen #endif /* CONFIG_USERFAULTFD */ 26658d103963SMike Rapoport 26661da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 266792e1d5beSArjan van de Ven static const struct inode_operations shmem_symlink_inode_operations; 266869f07ec9SHugh Dickins static const struct inode_operations shmem_short_symlink_operations; 26691da177e4SLinus Torvalds 26701da177e4SLinus Torvalds static int 2671800d15a5SNick Piggin shmem_write_begin(struct file *file, struct address_space *mapping, 26729d6b0cd7SMatthew Wilcox (Oracle) loff_t pos, unsigned len, 2673800d15a5SNick Piggin struct page **pagep, void **fsdata) 26741da177e4SLinus Torvalds { 2675800d15a5SNick Piggin struct inode *inode = mapping->host; 267640e041a2SDavid Herrmann struct shmem_inode_info *info = SHMEM_I(inode); 267709cbfeafSKirill A. Shutemov pgoff_t index = pos >> PAGE_SHIFT; 2678eff1f906SMatthew Wilcox (Oracle) struct folio *folio; 2679a7605426SYang Shi int ret = 0; 268040e041a2SDavid Herrmann 26819608703eSJan Kara /* i_rwsem is held by caller */ 2682ab3948f5SJoel Fernandes (Google) if (unlikely(info->seals & (F_SEAL_GROW | 2683ab3948f5SJoel Fernandes (Google) F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { 2684ab3948f5SJoel Fernandes (Google) if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) 268540e041a2SDavid Herrmann return -EPERM; 268640e041a2SDavid Herrmann if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) 268740e041a2SDavid Herrmann return -EPERM; 268840e041a2SDavid Herrmann } 268940e041a2SDavid Herrmann 2690eff1f906SMatthew Wilcox (Oracle) ret = shmem_get_folio(inode, index, &folio, SGP_WRITE); 2691a7605426SYang Shi 2692a7605426SYang Shi if (ret) 2693a7605426SYang Shi return ret; 2694a7605426SYang Shi 2695eff1f906SMatthew Wilcox (Oracle) *pagep = folio_file_page(folio, index); 2696a7605426SYang Shi if (PageHWPoison(*pagep)) { 2697eff1f906SMatthew Wilcox (Oracle) folio_unlock(folio); 2698eff1f906SMatthew Wilcox (Oracle) folio_put(folio); 2699a7605426SYang Shi *pagep = NULL; 2700a7605426SYang Shi return -EIO; 2701a7605426SYang Shi } 2702a7605426SYang Shi 2703a7605426SYang Shi return 0; 2704800d15a5SNick Piggin } 2705800d15a5SNick Piggin 2706800d15a5SNick Piggin static int 2707800d15a5SNick Piggin shmem_write_end(struct file *file, struct address_space *mapping, 2708800d15a5SNick Piggin loff_t pos, unsigned len, unsigned copied, 2709800d15a5SNick Piggin struct page *page, void *fsdata) 2710800d15a5SNick Piggin { 271169bbb87bSMatthew Wilcox (Oracle) struct folio *folio = page_folio(page); 2712800d15a5SNick Piggin struct inode *inode = mapping->host; 2713800d15a5SNick Piggin 2714800d15a5SNick Piggin if (pos + copied > inode->i_size) 2715800d15a5SNick Piggin i_size_write(inode, pos + copied); 2716800d15a5SNick Piggin 271769bbb87bSMatthew Wilcox (Oracle) if (!folio_test_uptodate(folio)) { 271869bbb87bSMatthew Wilcox (Oracle) if (copied < folio_size(folio)) { 271969bbb87bSMatthew Wilcox (Oracle) size_t from = offset_in_folio(folio, pos); 272069bbb87bSMatthew Wilcox (Oracle) folio_zero_segments(folio, 0, from, 272169bbb87bSMatthew Wilcox (Oracle) from + copied, folio_size(folio)); 2722800d8c63SKirill A. Shutemov } 272369bbb87bSMatthew Wilcox (Oracle) folio_mark_uptodate(folio); 2724800d8c63SKirill A. Shutemov } 272569bbb87bSMatthew Wilcox (Oracle) folio_mark_dirty(folio); 272669bbb87bSMatthew Wilcox (Oracle) folio_unlock(folio); 272769bbb87bSMatthew Wilcox (Oracle) folio_put(folio); 2728d3602444SHugh Dickins 2729800d15a5SNick Piggin return copied; 27301da177e4SLinus Torvalds } 27311da177e4SLinus Torvalds 27322ba5bbedSAl Viro static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 27331da177e4SLinus Torvalds { 27346e58e79dSAl Viro struct file *file = iocb->ki_filp; 27356e58e79dSAl Viro struct inode *inode = file_inode(file); 27361da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 273741ffe5d5SHugh Dickins pgoff_t index; 273841ffe5d5SHugh Dickins unsigned long offset; 2739f7c1d074SGeert Uytterhoeven int error = 0; 2740cb66a7a1SAl Viro ssize_t retval = 0; 27416e58e79dSAl Viro loff_t *ppos = &iocb->ki_pos; 2742a0ee5ec5SHugh Dickins 274309cbfeafSKirill A. Shutemov index = *ppos >> PAGE_SHIFT; 274409cbfeafSKirill A. Shutemov offset = *ppos & ~PAGE_MASK; 27451da177e4SLinus Torvalds 27461da177e4SLinus Torvalds for (;;) { 27474601e2fcSMatthew Wilcox (Oracle) struct folio *folio = NULL; 27481da177e4SLinus Torvalds struct page *page = NULL; 274941ffe5d5SHugh Dickins pgoff_t end_index; 275041ffe5d5SHugh Dickins unsigned long nr, ret; 27511da177e4SLinus Torvalds loff_t i_size = i_size_read(inode); 27521da177e4SLinus Torvalds 275309cbfeafSKirill A. Shutemov end_index = i_size >> PAGE_SHIFT; 27541da177e4SLinus Torvalds if (index > end_index) 27551da177e4SLinus Torvalds break; 27561da177e4SLinus Torvalds if (index == end_index) { 275709cbfeafSKirill A. Shutemov nr = i_size & ~PAGE_MASK; 27581da177e4SLinus Torvalds if (nr <= offset) 27591da177e4SLinus Torvalds break; 27601da177e4SLinus Torvalds } 27611da177e4SLinus Torvalds 27624601e2fcSMatthew Wilcox (Oracle) error = shmem_get_folio(inode, index, &folio, SGP_READ); 27636e58e79dSAl Viro if (error) { 27646e58e79dSAl Viro if (error == -EINVAL) 27656e58e79dSAl Viro error = 0; 27661da177e4SLinus Torvalds break; 27671da177e4SLinus Torvalds } 27684601e2fcSMatthew Wilcox (Oracle) if (folio) { 27694601e2fcSMatthew Wilcox (Oracle) folio_unlock(folio); 2770a7605426SYang Shi 27714601e2fcSMatthew Wilcox (Oracle) page = folio_file_page(folio, index); 2772a7605426SYang Shi if (PageHWPoison(page)) { 27734601e2fcSMatthew Wilcox (Oracle) folio_put(folio); 2774a7605426SYang Shi error = -EIO; 2775a7605426SYang Shi break; 2776a7605426SYang Shi } 277775edd345SHugh Dickins } 27781da177e4SLinus Torvalds 27791da177e4SLinus Torvalds /* 27801da177e4SLinus Torvalds * We must evaluate after, since reads (unlike writes) 27819608703eSJan Kara * are called without i_rwsem protection against truncate 27821da177e4SLinus Torvalds */ 278309cbfeafSKirill A. Shutemov nr = PAGE_SIZE; 27841da177e4SLinus Torvalds i_size = i_size_read(inode); 278509cbfeafSKirill A. Shutemov end_index = i_size >> PAGE_SHIFT; 27861da177e4SLinus Torvalds if (index == end_index) { 278709cbfeafSKirill A. Shutemov nr = i_size & ~PAGE_MASK; 27881da177e4SLinus Torvalds if (nr <= offset) { 27894601e2fcSMatthew Wilcox (Oracle) if (folio) 27904601e2fcSMatthew Wilcox (Oracle) folio_put(folio); 27911da177e4SLinus Torvalds break; 27921da177e4SLinus Torvalds } 27931da177e4SLinus Torvalds } 27941da177e4SLinus Torvalds nr -= offset; 27951da177e4SLinus Torvalds 27964601e2fcSMatthew Wilcox (Oracle) if (folio) { 27971da177e4SLinus Torvalds /* 27981da177e4SLinus Torvalds * If users can be writing to this page using arbitrary 27991da177e4SLinus Torvalds * virtual addresses, take care about potential aliasing 28001da177e4SLinus Torvalds * before reading the page on the kernel side. 28011da177e4SLinus Torvalds */ 28021da177e4SLinus Torvalds if (mapping_writably_mapped(mapping)) 28031da177e4SLinus Torvalds flush_dcache_page(page); 28041da177e4SLinus Torvalds /* 28051da177e4SLinus Torvalds * Mark the page accessed if we read the beginning. 28061da177e4SLinus Torvalds */ 28071da177e4SLinus Torvalds if (!offset) 28084601e2fcSMatthew Wilcox (Oracle) folio_mark_accessed(folio); 28091da177e4SLinus Torvalds /* 28101da177e4SLinus Torvalds * Ok, we have the page, and it's up-to-date, so 28111da177e4SLinus Torvalds * now we can copy it to user space... 28121da177e4SLinus Torvalds */ 28132ba5bbedSAl Viro ret = copy_page_to_iter(page, offset, nr, to); 28144601e2fcSMatthew Wilcox (Oracle) folio_put(folio); 28151bdec44bSHugh Dickins 2816fcb14cb1SAl Viro } else if (user_backed_iter(to)) { 28171bdec44bSHugh Dickins /* 28181bdec44bSHugh Dickins * Copy to user tends to be so well optimized, but 28191bdec44bSHugh Dickins * clear_user() not so much, that it is noticeably 28201bdec44bSHugh Dickins * faster to copy the zero page instead of clearing. 28211bdec44bSHugh Dickins */ 28221bdec44bSHugh Dickins ret = copy_page_to_iter(ZERO_PAGE(0), offset, nr, to); 28231bdec44bSHugh Dickins } else { 28241bdec44bSHugh Dickins /* 28251bdec44bSHugh Dickins * But submitting the same page twice in a row to 28261bdec44bSHugh Dickins * splice() - or others? - can result in confusion: 28271bdec44bSHugh Dickins * so don't attempt that optimization on pipes etc. 28281bdec44bSHugh Dickins */ 28291bdec44bSHugh Dickins ret = iov_iter_zero(nr, to); 28301bdec44bSHugh Dickins } 28311bdec44bSHugh Dickins 28326e58e79dSAl Viro retval += ret; 28331da177e4SLinus Torvalds offset += ret; 283409cbfeafSKirill A. Shutemov index += offset >> PAGE_SHIFT; 283509cbfeafSKirill A. Shutemov offset &= ~PAGE_MASK; 28361da177e4SLinus Torvalds 28372ba5bbedSAl Viro if (!iov_iter_count(to)) 28381da177e4SLinus Torvalds break; 28396e58e79dSAl Viro if (ret < nr) { 28406e58e79dSAl Viro error = -EFAULT; 28416e58e79dSAl Viro break; 28426e58e79dSAl Viro } 28431da177e4SLinus Torvalds cond_resched(); 28441da177e4SLinus Torvalds } 28451da177e4SLinus Torvalds 284609cbfeafSKirill A. Shutemov *ppos = ((loff_t) index << PAGE_SHIFT) + offset; 28476e58e79dSAl Viro file_accessed(file); 28486e58e79dSAl Viro return retval ? retval : error; 28491da177e4SLinus Torvalds } 28501da177e4SLinus Torvalds 2851bd194b18SDavid Howells static bool zero_pipe_buf_get(struct pipe_inode_info *pipe, 2852bd194b18SDavid Howells struct pipe_buffer *buf) 2853bd194b18SDavid Howells { 2854bd194b18SDavid Howells return true; 2855bd194b18SDavid Howells } 2856bd194b18SDavid Howells 2857bd194b18SDavid Howells static void zero_pipe_buf_release(struct pipe_inode_info *pipe, 2858bd194b18SDavid Howells struct pipe_buffer *buf) 2859bd194b18SDavid Howells { 2860bd194b18SDavid Howells } 2861bd194b18SDavid Howells 2862bd194b18SDavid Howells static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe, 2863bd194b18SDavid Howells struct pipe_buffer *buf) 2864bd194b18SDavid Howells { 2865bd194b18SDavid Howells return false; 2866bd194b18SDavid Howells } 2867bd194b18SDavid Howells 2868bd194b18SDavid Howells static const struct pipe_buf_operations zero_pipe_buf_ops = { 2869bd194b18SDavid Howells .release = zero_pipe_buf_release, 2870bd194b18SDavid Howells .try_steal = zero_pipe_buf_try_steal, 2871bd194b18SDavid Howells .get = zero_pipe_buf_get, 2872bd194b18SDavid Howells }; 2873bd194b18SDavid Howells 2874bd194b18SDavid Howells static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe, 2875bd194b18SDavid Howells loff_t fpos, size_t size) 2876bd194b18SDavid Howells { 2877bd194b18SDavid Howells size_t offset = fpos & ~PAGE_MASK; 2878bd194b18SDavid Howells 2879bd194b18SDavid Howells size = min_t(size_t, size, PAGE_SIZE - offset); 2880bd194b18SDavid Howells 2881bd194b18SDavid Howells if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { 2882bd194b18SDavid Howells struct pipe_buffer *buf = pipe_head_buf(pipe); 2883bd194b18SDavid Howells 2884bd194b18SDavid Howells *buf = (struct pipe_buffer) { 2885bd194b18SDavid Howells .ops = &zero_pipe_buf_ops, 2886bd194b18SDavid Howells .page = ZERO_PAGE(0), 2887bd194b18SDavid Howells .offset = offset, 2888bd194b18SDavid Howells .len = size, 2889bd194b18SDavid Howells }; 2890bd194b18SDavid Howells pipe->head++; 2891bd194b18SDavid Howells } 2892bd194b18SDavid Howells 2893bd194b18SDavid Howells return size; 2894bd194b18SDavid Howells } 2895bd194b18SDavid Howells 2896bd194b18SDavid Howells static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, 2897bd194b18SDavid Howells struct pipe_inode_info *pipe, 2898bd194b18SDavid Howells size_t len, unsigned int flags) 2899bd194b18SDavid Howells { 2900bd194b18SDavid Howells struct inode *inode = file_inode(in); 2901bd194b18SDavid Howells struct address_space *mapping = inode->i_mapping; 2902bd194b18SDavid Howells struct folio *folio = NULL; 2903bd194b18SDavid Howells size_t total_spliced = 0, used, npages, n, part; 2904bd194b18SDavid Howells loff_t isize; 2905bd194b18SDavid Howells int error = 0; 2906bd194b18SDavid Howells 2907bd194b18SDavid Howells /* Work out how much data we can actually add into the pipe */ 2908bd194b18SDavid Howells used = pipe_occupancy(pipe->head, pipe->tail); 2909bd194b18SDavid Howells npages = max_t(ssize_t, pipe->max_usage - used, 0); 2910bd194b18SDavid Howells len = min_t(size_t, len, npages * PAGE_SIZE); 2911bd194b18SDavid Howells 2912bd194b18SDavid Howells do { 2913bd194b18SDavid Howells if (*ppos >= i_size_read(inode)) 2914bd194b18SDavid Howells break; 2915bd194b18SDavid Howells 2916fa598952SHugh Dickins error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, 2917fa598952SHugh Dickins SGP_READ); 2918bd194b18SDavid Howells if (error) { 2919bd194b18SDavid Howells if (error == -EINVAL) 2920bd194b18SDavid Howells error = 0; 2921bd194b18SDavid Howells break; 2922bd194b18SDavid Howells } 2923bd194b18SDavid Howells if (folio) { 2924bd194b18SDavid Howells folio_unlock(folio); 2925bd194b18SDavid Howells 2926fa598952SHugh Dickins if (folio_test_hwpoison(folio) || 2927fa598952SHugh Dickins (folio_test_large(folio) && 2928fa598952SHugh Dickins folio_test_has_hwpoisoned(folio))) { 2929bd194b18SDavid Howells error = -EIO; 2930bd194b18SDavid Howells break; 2931bd194b18SDavid Howells } 2932bd194b18SDavid Howells } 2933bd194b18SDavid Howells 2934bd194b18SDavid Howells /* 2935bd194b18SDavid Howells * i_size must be checked after we know the pages are Uptodate. 2936bd194b18SDavid Howells * 2937bd194b18SDavid Howells * Checking i_size after the check allows us to calculate 2938bd194b18SDavid Howells * the correct value for "nr", which means the zero-filled 2939bd194b18SDavid Howells * part of the page is not copied back to userspace (unless 2940bd194b18SDavid Howells * another truncate extends the file - this is desired though). 2941bd194b18SDavid Howells */ 2942bd194b18SDavid Howells isize = i_size_read(inode); 2943bd194b18SDavid Howells if (unlikely(*ppos >= isize)) 2944bd194b18SDavid Howells break; 2945bd194b18SDavid Howells part = min_t(loff_t, isize - *ppos, len); 2946bd194b18SDavid Howells 2947bd194b18SDavid Howells if (folio) { 2948bd194b18SDavid Howells /* 2949bd194b18SDavid Howells * If users can be writing to this page using arbitrary 2950bd194b18SDavid Howells * virtual addresses, take care about potential aliasing 2951bd194b18SDavid Howells * before reading the page on the kernel side. 2952bd194b18SDavid Howells */ 2953bd194b18SDavid Howells if (mapping_writably_mapped(mapping)) 2954bd194b18SDavid Howells flush_dcache_folio(folio); 2955bd194b18SDavid Howells folio_mark_accessed(folio); 2956bd194b18SDavid Howells /* 2957bd194b18SDavid Howells * Ok, we have the page, and it's up-to-date, so we can 2958bd194b18SDavid Howells * now splice it into the pipe. 2959bd194b18SDavid Howells */ 2960bd194b18SDavid Howells n = splice_folio_into_pipe(pipe, folio, *ppos, part); 2961bd194b18SDavid Howells folio_put(folio); 2962bd194b18SDavid Howells folio = NULL; 2963bd194b18SDavid Howells } else { 2964fa598952SHugh Dickins n = splice_zeropage_into_pipe(pipe, *ppos, part); 2965bd194b18SDavid Howells } 2966bd194b18SDavid Howells 2967bd194b18SDavid Howells if (!n) 2968bd194b18SDavid Howells break; 2969bd194b18SDavid Howells len -= n; 2970bd194b18SDavid Howells total_spliced += n; 2971bd194b18SDavid Howells *ppos += n; 2972bd194b18SDavid Howells in->f_ra.prev_pos = *ppos; 2973bd194b18SDavid Howells if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 2974bd194b18SDavid Howells break; 2975bd194b18SDavid Howells 2976bd194b18SDavid Howells cond_resched(); 2977bd194b18SDavid Howells } while (len); 2978bd194b18SDavid Howells 2979bd194b18SDavid Howells if (folio) 2980bd194b18SDavid Howells folio_put(folio); 2981bd194b18SDavid Howells 2982bd194b18SDavid Howells file_accessed(in); 2983bd194b18SDavid Howells return total_spliced ? total_spliced : error; 2984bd194b18SDavid Howells } 2985bd194b18SDavid Howells 2986965c8e59SAndrew Morton static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) 2987220f2ac9SHugh Dickins { 2988220f2ac9SHugh Dickins struct address_space *mapping = file->f_mapping; 2989220f2ac9SHugh Dickins struct inode *inode = mapping->host; 2990220f2ac9SHugh Dickins 2991965c8e59SAndrew Morton if (whence != SEEK_DATA && whence != SEEK_HOLE) 2992965c8e59SAndrew Morton return generic_file_llseek_size(file, offset, whence, 2993220f2ac9SHugh Dickins MAX_LFS_FILESIZE, i_size_read(inode)); 299441139aa4SMatthew Wilcox (Oracle) if (offset < 0) 299541139aa4SMatthew Wilcox (Oracle) return -ENXIO; 299641139aa4SMatthew Wilcox (Oracle) 29975955102cSAl Viro inode_lock(inode); 29989608703eSJan Kara /* We're holding i_rwsem so we can access i_size directly */ 299941139aa4SMatthew Wilcox (Oracle) offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence); 3000387aae6fSHugh Dickins if (offset >= 0) 300146a1c2c7SJie Liu offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE); 30025955102cSAl Viro inode_unlock(inode); 3003220f2ac9SHugh Dickins return offset; 3004220f2ac9SHugh Dickins } 3005220f2ac9SHugh Dickins 300683e4fa9cSHugh Dickins static long shmem_fallocate(struct file *file, int mode, loff_t offset, 300783e4fa9cSHugh Dickins loff_t len) 300883e4fa9cSHugh Dickins { 3009496ad9aaSAl Viro struct inode *inode = file_inode(file); 3010e2d12e22SHugh Dickins struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 301140e041a2SDavid Herrmann struct shmem_inode_info *info = SHMEM_I(inode); 30121aac1400SHugh Dickins struct shmem_falloc shmem_falloc; 3013d144bf62SHugh Dickins pgoff_t start, index, end, undo_fallocend; 3014e2d12e22SHugh Dickins int error; 301583e4fa9cSHugh Dickins 301613ace4d0SHugh Dickins if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 301713ace4d0SHugh Dickins return -EOPNOTSUPP; 301813ace4d0SHugh Dickins 30195955102cSAl Viro inode_lock(inode); 302083e4fa9cSHugh Dickins 302183e4fa9cSHugh Dickins if (mode & FALLOC_FL_PUNCH_HOLE) { 302283e4fa9cSHugh Dickins struct address_space *mapping = file->f_mapping; 302383e4fa9cSHugh Dickins loff_t unmap_start = round_up(offset, PAGE_SIZE); 302483e4fa9cSHugh Dickins loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; 30258e205f77SHugh Dickins DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); 302683e4fa9cSHugh Dickins 30279608703eSJan Kara /* protected by i_rwsem */ 3028ab3948f5SJoel Fernandes (Google) if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { 302940e041a2SDavid Herrmann error = -EPERM; 303040e041a2SDavid Herrmann goto out; 303140e041a2SDavid Herrmann } 303240e041a2SDavid Herrmann 30338e205f77SHugh Dickins shmem_falloc.waitq = &shmem_falloc_waitq; 3034aa71ecd8SChen Jun shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT; 3035f00cdc6dSHugh Dickins shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; 3036f00cdc6dSHugh Dickins spin_lock(&inode->i_lock); 3037f00cdc6dSHugh Dickins inode->i_private = &shmem_falloc; 3038f00cdc6dSHugh Dickins spin_unlock(&inode->i_lock); 3039f00cdc6dSHugh Dickins 304083e4fa9cSHugh Dickins if ((u64)unmap_end > (u64)unmap_start) 304183e4fa9cSHugh Dickins unmap_mapping_range(mapping, unmap_start, 304283e4fa9cSHugh Dickins 1 + unmap_end - unmap_start, 0); 304383e4fa9cSHugh Dickins shmem_truncate_range(inode, offset, offset + len - 1); 304483e4fa9cSHugh Dickins /* No need to unmap again: hole-punching leaves COWed pages */ 30458e205f77SHugh Dickins 30468e205f77SHugh Dickins spin_lock(&inode->i_lock); 30478e205f77SHugh Dickins inode->i_private = NULL; 30488e205f77SHugh Dickins wake_up_all(&shmem_falloc_waitq); 30492055da97SIngo Molnar WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head)); 30508e205f77SHugh Dickins spin_unlock(&inode->i_lock); 305183e4fa9cSHugh Dickins error = 0; 30528e205f77SHugh Dickins goto out; 305383e4fa9cSHugh Dickins } 305483e4fa9cSHugh Dickins 3055e2d12e22SHugh Dickins /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ 3056e2d12e22SHugh Dickins error = inode_newsize_ok(inode, offset + len); 3057e2d12e22SHugh Dickins if (error) 3058e2d12e22SHugh Dickins goto out; 3059e2d12e22SHugh Dickins 306040e041a2SDavid Herrmann if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { 306140e041a2SDavid Herrmann error = -EPERM; 306240e041a2SDavid Herrmann goto out; 306340e041a2SDavid Herrmann } 306440e041a2SDavid Herrmann 306509cbfeafSKirill A. Shutemov start = offset >> PAGE_SHIFT; 306609cbfeafSKirill A. Shutemov end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 3067e2d12e22SHugh Dickins /* Try to avoid a swapstorm if len is impossible to satisfy */ 3068e2d12e22SHugh Dickins if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) { 3069e2d12e22SHugh Dickins error = -ENOSPC; 3070e2d12e22SHugh Dickins goto out; 3071e2d12e22SHugh Dickins } 3072e2d12e22SHugh Dickins 30738e205f77SHugh Dickins shmem_falloc.waitq = NULL; 30741aac1400SHugh Dickins shmem_falloc.start = start; 30751aac1400SHugh Dickins shmem_falloc.next = start; 30761aac1400SHugh Dickins shmem_falloc.nr_falloced = 0; 30771aac1400SHugh Dickins shmem_falloc.nr_unswapped = 0; 30781aac1400SHugh Dickins spin_lock(&inode->i_lock); 30791aac1400SHugh Dickins inode->i_private = &shmem_falloc; 30801aac1400SHugh Dickins spin_unlock(&inode->i_lock); 30811aac1400SHugh Dickins 3082d144bf62SHugh Dickins /* 3083d144bf62SHugh Dickins * info->fallocend is only relevant when huge pages might be 3084d144bf62SHugh Dickins * involved: to prevent split_huge_page() freeing fallocated 3085d144bf62SHugh Dickins * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size. 3086d144bf62SHugh Dickins */ 3087d144bf62SHugh Dickins undo_fallocend = info->fallocend; 3088d144bf62SHugh Dickins if (info->fallocend < end) 3089d144bf62SHugh Dickins info->fallocend = end; 3090d144bf62SHugh Dickins 3091050dcb5cSHugh Dickins for (index = start; index < end; ) { 3092b0802b22SMatthew Wilcox (Oracle) struct folio *folio; 3093e2d12e22SHugh Dickins 3094e2d12e22SHugh Dickins /* 3095e2d12e22SHugh Dickins * Good, the fallocate(2) manpage permits EINTR: we may have 3096e2d12e22SHugh Dickins * been interrupted because we are using up too much memory. 3097e2d12e22SHugh Dickins */ 3098e2d12e22SHugh Dickins if (signal_pending(current)) 3099e2d12e22SHugh Dickins error = -EINTR; 31001aac1400SHugh Dickins else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) 31011aac1400SHugh Dickins error = -ENOMEM; 3102e2d12e22SHugh Dickins else 3103b0802b22SMatthew Wilcox (Oracle) error = shmem_get_folio(inode, index, &folio, 3104b0802b22SMatthew Wilcox (Oracle) SGP_FALLOC); 3105e2d12e22SHugh Dickins if (error) { 3106d144bf62SHugh Dickins info->fallocend = undo_fallocend; 3107b0802b22SMatthew Wilcox (Oracle) /* Remove the !uptodate folios we added */ 31087f556567SHugh Dickins if (index > start) { 31091635f6a7SHugh Dickins shmem_undo_range(inode, 311009cbfeafSKirill A. Shutemov (loff_t)start << PAGE_SHIFT, 3111b9b4bb26SAnthony Romano ((loff_t)index << PAGE_SHIFT) - 1, true); 31127f556567SHugh Dickins } 31131aac1400SHugh Dickins goto undone; 3114e2d12e22SHugh Dickins } 3115e2d12e22SHugh Dickins 3116050dcb5cSHugh Dickins /* 3117050dcb5cSHugh Dickins * Here is a more important optimization than it appears: 3118b0802b22SMatthew Wilcox (Oracle) * a second SGP_FALLOC on the same large folio will clear it, 3119b0802b22SMatthew Wilcox (Oracle) * making it uptodate and un-undoable if we fail later. 3120050dcb5cSHugh Dickins */ 3121b0802b22SMatthew Wilcox (Oracle) index = folio_next_index(folio); 3122050dcb5cSHugh Dickins /* Beware 32-bit wraparound */ 3123050dcb5cSHugh Dickins if (!index) 3124050dcb5cSHugh Dickins index--; 3125050dcb5cSHugh Dickins 3126e2d12e22SHugh Dickins /* 31271aac1400SHugh Dickins * Inform shmem_writepage() how far we have reached. 31281aac1400SHugh Dickins * No need for lock or barrier: we have the page lock. 31291aac1400SHugh Dickins */ 3130b0802b22SMatthew Wilcox (Oracle) if (!folio_test_uptodate(folio)) 3131050dcb5cSHugh Dickins shmem_falloc.nr_falloced += index - shmem_falloc.next; 3132050dcb5cSHugh Dickins shmem_falloc.next = index; 31331aac1400SHugh Dickins 31341aac1400SHugh Dickins /* 3135b0802b22SMatthew Wilcox (Oracle) * If !uptodate, leave it that way so that freeable folios 31361635f6a7SHugh Dickins * can be recognized if we need to rollback on error later. 3137b0802b22SMatthew Wilcox (Oracle) * But mark it dirty so that memory pressure will swap rather 3138b0802b22SMatthew Wilcox (Oracle) * than free the folios we are allocating (and SGP_CACHE folios 3139e2d12e22SHugh Dickins * might still be clean: we now need to mark those dirty too). 3140e2d12e22SHugh Dickins */ 3141b0802b22SMatthew Wilcox (Oracle) folio_mark_dirty(folio); 3142b0802b22SMatthew Wilcox (Oracle) folio_unlock(folio); 3143b0802b22SMatthew Wilcox (Oracle) folio_put(folio); 3144e2d12e22SHugh Dickins cond_resched(); 3145e2d12e22SHugh Dickins } 3146e2d12e22SHugh Dickins 3147e2d12e22SHugh Dickins if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) 3148e2d12e22SHugh Dickins i_size_write(inode, offset + len); 31491aac1400SHugh Dickins undone: 31501aac1400SHugh Dickins spin_lock(&inode->i_lock); 31511aac1400SHugh Dickins inode->i_private = NULL; 31521aac1400SHugh Dickins spin_unlock(&inode->i_lock); 3153e2d12e22SHugh Dickins out: 315415f242bbSHugh Dickins if (!error) 315515f242bbSHugh Dickins file_modified(file); 31565955102cSAl Viro inode_unlock(inode); 315783e4fa9cSHugh Dickins return error; 315883e4fa9cSHugh Dickins } 315983e4fa9cSHugh Dickins 3160726c3342SDavid Howells static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 31611da177e4SLinus Torvalds { 3162726c3342SDavid Howells struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 31631da177e4SLinus Torvalds 31641da177e4SLinus Torvalds buf->f_type = TMPFS_MAGIC; 316509cbfeafSKirill A. Shutemov buf->f_bsize = PAGE_SIZE; 31661da177e4SLinus Torvalds buf->f_namelen = NAME_MAX; 31670edd73b3SHugh Dickins if (sbinfo->max_blocks) { 31681da177e4SLinus Torvalds buf->f_blocks = sbinfo->max_blocks; 316941ffe5d5SHugh Dickins buf->f_bavail = 317041ffe5d5SHugh Dickins buf->f_bfree = sbinfo->max_blocks - 317141ffe5d5SHugh Dickins percpu_counter_sum(&sbinfo->used_blocks); 31720edd73b3SHugh Dickins } 31730edd73b3SHugh Dickins if (sbinfo->max_inodes) { 31741da177e4SLinus Torvalds buf->f_files = sbinfo->max_inodes; 31751da177e4SLinus Torvalds buf->f_ffree = sbinfo->free_inodes; 31761da177e4SLinus Torvalds } 31771da177e4SLinus Torvalds /* else leave those fields 0 like simple_statfs */ 317859cda49eSAmir Goldstein 317959cda49eSAmir Goldstein buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); 318059cda49eSAmir Goldstein 31811da177e4SLinus Torvalds return 0; 31821da177e4SLinus Torvalds } 31831da177e4SLinus Torvalds 31841da177e4SLinus Torvalds /* 31851da177e4SLinus Torvalds * File creation. Allocate an inode, and we're done.. 31861da177e4SLinus Torvalds */ 31871da177e4SLinus Torvalds static int 31885ebb29beSChristian Brauner shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, 3189549c7297SChristian Brauner struct dentry *dentry, umode_t mode, dev_t dev) 31901da177e4SLinus Torvalds { 31910b0a0806SHugh Dickins struct inode *inode; 319271480663SCarlos Maiolino int error; 31931da177e4SLinus Torvalds 31947a80e5b8SGiuseppe Scrivano inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE); 319571480663SCarlos Maiolino 319671480663SCarlos Maiolino if (IS_ERR(inode)) 319771480663SCarlos Maiolino return PTR_ERR(inode); 319871480663SCarlos Maiolino 3199feda821eSChristoph Hellwig error = simple_acl_create(dir, inode); 3200feda821eSChristoph Hellwig if (error) 3201feda821eSChristoph Hellwig goto out_iput; 32022a7dba39SEric Paris error = security_inode_init_security(inode, dir, 32039d8f13baSMimi Zohar &dentry->d_name, 32046d9d88d0SJarkko Sakkinen shmem_initxattrs, NULL); 3205feda821eSChristoph Hellwig if (error && error != -EOPNOTSUPP) 3206feda821eSChristoph Hellwig goto out_iput; 320737ec43cdSMimi Zohar 3208718deb6bSAl Viro error = 0; 32091da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 3210078cd827SDeepa Dinamani dir->i_ctime = dir->i_mtime = current_time(dir); 321136f05cabSJeff Layton inode_inc_iversion(dir); 32121da177e4SLinus Torvalds d_instantiate(dentry, inode); 32131da177e4SLinus Torvalds dget(dentry); /* Extra count - pin the dentry in core */ 32141da177e4SLinus Torvalds return error; 321571480663SCarlos Maiolino 3216feda821eSChristoph Hellwig out_iput: 3217feda821eSChristoph Hellwig iput(inode); 3218feda821eSChristoph Hellwig return error; 32191da177e4SLinus Torvalds } 32201da177e4SLinus Torvalds 322160545d0dSAl Viro static int 3222011e2b71SChristian Brauner shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir, 3223863f144fSMiklos Szeredi struct file *file, umode_t mode) 322460545d0dSAl Viro { 322560545d0dSAl Viro struct inode *inode; 322671480663SCarlos Maiolino int error; 322760545d0dSAl Viro 32287a80e5b8SGiuseppe Scrivano inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE); 322971480663SCarlos Maiolino 323071480663SCarlos Maiolino if (IS_ERR(inode)) { 323171480663SCarlos Maiolino error = PTR_ERR(inode); 323271480663SCarlos Maiolino goto err_out; 323371480663SCarlos Maiolino } 323471480663SCarlos Maiolino 323560545d0dSAl Viro error = security_inode_init_security(inode, dir, 323660545d0dSAl Viro NULL, 323760545d0dSAl Viro shmem_initxattrs, NULL); 3238feda821eSChristoph Hellwig if (error && error != -EOPNOTSUPP) 3239feda821eSChristoph Hellwig goto out_iput; 3240feda821eSChristoph Hellwig error = simple_acl_create(dir, inode); 3241feda821eSChristoph Hellwig if (error) 3242feda821eSChristoph Hellwig goto out_iput; 3243863f144fSMiklos Szeredi d_tmpfile(file, inode); 324471480663SCarlos Maiolino 324571480663SCarlos Maiolino err_out: 3246863f144fSMiklos Szeredi return finish_open_simple(file, error); 3247feda821eSChristoph Hellwig out_iput: 3248feda821eSChristoph Hellwig iput(inode); 3249feda821eSChristoph Hellwig return error; 325060545d0dSAl Viro } 325160545d0dSAl Viro 3252c54bd91eSChristian Brauner static int shmem_mkdir(struct mnt_idmap *idmap, struct inode *dir, 3253549c7297SChristian Brauner struct dentry *dentry, umode_t mode) 32541da177e4SLinus Torvalds { 32551da177e4SLinus Torvalds int error; 32561da177e4SLinus Torvalds 32577a80e5b8SGiuseppe Scrivano error = shmem_mknod(idmap, dir, dentry, mode | S_IFDIR, 0); 32587a80e5b8SGiuseppe Scrivano if (error) 32591da177e4SLinus Torvalds return error; 3260d8c76e6fSDave Hansen inc_nlink(dir); 32611da177e4SLinus Torvalds return 0; 32621da177e4SLinus Torvalds } 32631da177e4SLinus Torvalds 32646c960e68SChristian Brauner static int shmem_create(struct mnt_idmap *idmap, struct inode *dir, 3265549c7297SChristian Brauner struct dentry *dentry, umode_t mode, bool excl) 32661da177e4SLinus Torvalds { 32677a80e5b8SGiuseppe Scrivano return shmem_mknod(idmap, dir, dentry, mode | S_IFREG, 0); 32681da177e4SLinus Torvalds } 32691da177e4SLinus Torvalds 32701da177e4SLinus Torvalds /* 32711da177e4SLinus Torvalds * Link a file.. 32721da177e4SLinus Torvalds */ 32731da177e4SLinus Torvalds static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 32741da177e4SLinus Torvalds { 327575c3cfa8SDavid Howells struct inode *inode = d_inode(old_dentry); 327629b00e60SDarrick J. Wong int ret = 0; 32771da177e4SLinus Torvalds 32781da177e4SLinus Torvalds /* 32791da177e4SLinus Torvalds * No ordinary (disk based) filesystem counts links as inodes; 32801da177e4SLinus Torvalds * but each new link needs a new dentry, pinning lowmem, and 32811da177e4SLinus Torvalds * tmpfs dentries cannot be pruned until they are unlinked. 32821062af92SDarrick J. Wong * But if an O_TMPFILE file is linked into the tmpfs, the 32831062af92SDarrick J. Wong * first link must skip that, to get the accounting right. 32841da177e4SLinus Torvalds */ 32851062af92SDarrick J. Wong if (inode->i_nlink) { 3286e809d5f0SChris Down ret = shmem_reserve_inode(inode->i_sb, NULL); 32875b04c689SPavel Emelyanov if (ret) 32885b04c689SPavel Emelyanov goto out; 32891062af92SDarrick J. Wong } 32901da177e4SLinus Torvalds 32911da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 3292078cd827SDeepa Dinamani inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 329336f05cabSJeff Layton inode_inc_iversion(dir); 3294d8c76e6fSDave Hansen inc_nlink(inode); 32957de9c6eeSAl Viro ihold(inode); /* New dentry reference */ 32961da177e4SLinus Torvalds dget(dentry); /* Extra pinning count for the created dentry */ 32971da177e4SLinus Torvalds d_instantiate(dentry, inode); 32985b04c689SPavel Emelyanov out: 32995b04c689SPavel Emelyanov return ret; 33001da177e4SLinus Torvalds } 33011da177e4SLinus Torvalds 33021da177e4SLinus Torvalds static int shmem_unlink(struct inode *dir, struct dentry *dentry) 33031da177e4SLinus Torvalds { 330475c3cfa8SDavid Howells struct inode *inode = d_inode(dentry); 33051da177e4SLinus Torvalds 33065b04c689SPavel Emelyanov if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) 33075b04c689SPavel Emelyanov shmem_free_inode(inode->i_sb); 33081da177e4SLinus Torvalds 33091da177e4SLinus Torvalds dir->i_size -= BOGO_DIRENT_SIZE; 3310078cd827SDeepa Dinamani inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 331136f05cabSJeff Layton inode_inc_iversion(dir); 33129a53c3a7SDave Hansen drop_nlink(inode); 33131da177e4SLinus Torvalds dput(dentry); /* Undo the count from "create" - this does all the work */ 33141da177e4SLinus Torvalds return 0; 33151da177e4SLinus Torvalds } 33161da177e4SLinus Torvalds 33171da177e4SLinus Torvalds static int shmem_rmdir(struct inode *dir, struct dentry *dentry) 33181da177e4SLinus Torvalds { 33191da177e4SLinus Torvalds if (!simple_empty(dentry)) 33201da177e4SLinus Torvalds return -ENOTEMPTY; 33211da177e4SLinus Torvalds 332275c3cfa8SDavid Howells drop_nlink(d_inode(dentry)); 33239a53c3a7SDave Hansen drop_nlink(dir); 33241da177e4SLinus Torvalds return shmem_unlink(dir, dentry); 33251da177e4SLinus Torvalds } 33261da177e4SLinus Torvalds 3327e18275aeSChristian Brauner static int shmem_whiteout(struct mnt_idmap *idmap, 3328549c7297SChristian Brauner struct inode *old_dir, struct dentry *old_dentry) 332946fdb794SMiklos Szeredi { 333046fdb794SMiklos Szeredi struct dentry *whiteout; 333146fdb794SMiklos Szeredi int error; 333246fdb794SMiklos Szeredi 333346fdb794SMiklos Szeredi whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); 333446fdb794SMiklos Szeredi if (!whiteout) 333546fdb794SMiklos Szeredi return -ENOMEM; 333646fdb794SMiklos Szeredi 33377a80e5b8SGiuseppe Scrivano error = shmem_mknod(idmap, old_dir, whiteout, 333846fdb794SMiklos Szeredi S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 333946fdb794SMiklos Szeredi dput(whiteout); 334046fdb794SMiklos Szeredi if (error) 334146fdb794SMiklos Szeredi return error; 334246fdb794SMiklos Szeredi 334346fdb794SMiklos Szeredi /* 334446fdb794SMiklos Szeredi * Cheat and hash the whiteout while the old dentry is still in 334546fdb794SMiklos Szeredi * place, instead of playing games with FS_RENAME_DOES_D_MOVE. 334646fdb794SMiklos Szeredi * 334746fdb794SMiklos Szeredi * d_lookup() will consistently find one of them at this point, 334846fdb794SMiklos Szeredi * not sure which one, but that isn't even important. 334946fdb794SMiklos Szeredi */ 335046fdb794SMiklos Szeredi d_rehash(whiteout); 335146fdb794SMiklos Szeredi return 0; 335246fdb794SMiklos Szeredi } 335346fdb794SMiklos Szeredi 33541da177e4SLinus Torvalds /* 33551da177e4SLinus Torvalds * The VFS layer already does all the dentry stuff for rename, 33561da177e4SLinus Torvalds * we just have to decrement the usage count for the target if 33571da177e4SLinus Torvalds * it exists so that the VFS layer correctly free's it when it 33581da177e4SLinus Torvalds * gets overwritten. 33591da177e4SLinus Torvalds */ 3360e18275aeSChristian Brauner static int shmem_rename2(struct mnt_idmap *idmap, 3361549c7297SChristian Brauner struct inode *old_dir, struct dentry *old_dentry, 3362549c7297SChristian Brauner struct inode *new_dir, struct dentry *new_dentry, 3363549c7297SChristian Brauner unsigned int flags) 33641da177e4SLinus Torvalds { 336575c3cfa8SDavid Howells struct inode *inode = d_inode(old_dentry); 33661da177e4SLinus Torvalds int they_are_dirs = S_ISDIR(inode->i_mode); 33671da177e4SLinus Torvalds 336846fdb794SMiklos Szeredi if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 33693b69ff51SMiklos Szeredi return -EINVAL; 33703b69ff51SMiklos Szeredi 337137456771SMiklos Szeredi if (flags & RENAME_EXCHANGE) 33726429e463SLorenz Bauer return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); 337337456771SMiklos Szeredi 33741da177e4SLinus Torvalds if (!simple_empty(new_dentry)) 33751da177e4SLinus Torvalds return -ENOTEMPTY; 33761da177e4SLinus Torvalds 337746fdb794SMiklos Szeredi if (flags & RENAME_WHITEOUT) { 337846fdb794SMiklos Szeredi int error; 337946fdb794SMiklos Szeredi 33807a80e5b8SGiuseppe Scrivano error = shmem_whiteout(idmap, old_dir, old_dentry); 338146fdb794SMiklos Szeredi if (error) 338246fdb794SMiklos Szeredi return error; 338346fdb794SMiklos Szeredi } 338446fdb794SMiklos Szeredi 338575c3cfa8SDavid Howells if (d_really_is_positive(new_dentry)) { 33861da177e4SLinus Torvalds (void) shmem_unlink(new_dir, new_dentry); 3387b928095bSMiklos Szeredi if (they_are_dirs) { 338875c3cfa8SDavid Howells drop_nlink(d_inode(new_dentry)); 33899a53c3a7SDave Hansen drop_nlink(old_dir); 3390b928095bSMiklos Szeredi } 33911da177e4SLinus Torvalds } else if (they_are_dirs) { 33929a53c3a7SDave Hansen drop_nlink(old_dir); 3393d8c76e6fSDave Hansen inc_nlink(new_dir); 33941da177e4SLinus Torvalds } 33951da177e4SLinus Torvalds 33961da177e4SLinus Torvalds old_dir->i_size -= BOGO_DIRENT_SIZE; 33971da177e4SLinus Torvalds new_dir->i_size += BOGO_DIRENT_SIZE; 33981da177e4SLinus Torvalds old_dir->i_ctime = old_dir->i_mtime = 33991da177e4SLinus Torvalds new_dir->i_ctime = new_dir->i_mtime = 3400078cd827SDeepa Dinamani inode->i_ctime = current_time(old_dir); 340136f05cabSJeff Layton inode_inc_iversion(old_dir); 340236f05cabSJeff Layton inode_inc_iversion(new_dir); 34031da177e4SLinus Torvalds return 0; 34041da177e4SLinus Torvalds } 34051da177e4SLinus Torvalds 34067a77db95SChristian Brauner static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, 3407549c7297SChristian Brauner struct dentry *dentry, const char *symname) 34081da177e4SLinus Torvalds { 34091da177e4SLinus Torvalds int error; 34101da177e4SLinus Torvalds int len; 34111da177e4SLinus Torvalds struct inode *inode; 34127ad0414bSMatthew Wilcox (Oracle) struct folio *folio; 34131da177e4SLinus Torvalds 34141da177e4SLinus Torvalds len = strlen(symname) + 1; 341509cbfeafSKirill A. Shutemov if (len > PAGE_SIZE) 34161da177e4SLinus Torvalds return -ENAMETOOLONG; 34171da177e4SLinus Torvalds 34187a80e5b8SGiuseppe Scrivano inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0, 34190825a6f9SJoe Perches VM_NORESERVE); 342071480663SCarlos Maiolino 342171480663SCarlos Maiolino if (IS_ERR(inode)) 342271480663SCarlos Maiolino return PTR_ERR(inode); 34231da177e4SLinus Torvalds 34249d8f13baSMimi Zohar error = security_inode_init_security(inode, dir, &dentry->d_name, 34256d9d88d0SJarkko Sakkinen shmem_initxattrs, NULL); 3426343c3d7fSMateusz Nosek if (error && error != -EOPNOTSUPP) { 3427570bc1c2SStephen Smalley iput(inode); 3428570bc1c2SStephen Smalley return error; 3429570bc1c2SStephen Smalley } 3430570bc1c2SStephen Smalley 34311da177e4SLinus Torvalds inode->i_size = len-1; 343269f07ec9SHugh Dickins if (len <= SHORT_SYMLINK_LEN) { 34333ed47db3SAl Viro inode->i_link = kmemdup(symname, len, GFP_KERNEL); 34343ed47db3SAl Viro if (!inode->i_link) { 343569f07ec9SHugh Dickins iput(inode); 343669f07ec9SHugh Dickins return -ENOMEM; 343769f07ec9SHugh Dickins } 343869f07ec9SHugh Dickins inode->i_op = &shmem_short_symlink_operations; 34391da177e4SLinus Torvalds } else { 3440e8ecde25SAl Viro inode_nohighmem(inode); 34417ad0414bSMatthew Wilcox (Oracle) error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); 34421da177e4SLinus Torvalds if (error) { 34431da177e4SLinus Torvalds iput(inode); 34441da177e4SLinus Torvalds return error; 34451da177e4SLinus Torvalds } 344614fcc23fSHugh Dickins inode->i_mapping->a_ops = &shmem_aops; 34471da177e4SLinus Torvalds inode->i_op = &shmem_symlink_inode_operations; 34487ad0414bSMatthew Wilcox (Oracle) memcpy(folio_address(folio), symname, len); 34497ad0414bSMatthew Wilcox (Oracle) folio_mark_uptodate(folio); 34507ad0414bSMatthew Wilcox (Oracle) folio_mark_dirty(folio); 34517ad0414bSMatthew Wilcox (Oracle) folio_unlock(folio); 34527ad0414bSMatthew Wilcox (Oracle) folio_put(folio); 34531da177e4SLinus Torvalds } 34541da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 3455078cd827SDeepa Dinamani dir->i_ctime = dir->i_mtime = current_time(dir); 345636f05cabSJeff Layton inode_inc_iversion(dir); 34571da177e4SLinus Torvalds d_instantiate(dentry, inode); 34581da177e4SLinus Torvalds dget(dentry); 34591da177e4SLinus Torvalds return 0; 34601da177e4SLinus Torvalds } 34611da177e4SLinus Torvalds 3462fceef393SAl Viro static void shmem_put_link(void *arg) 3463fceef393SAl Viro { 3464e4b57722SMatthew Wilcox (Oracle) folio_mark_accessed(arg); 3465e4b57722SMatthew Wilcox (Oracle) folio_put(arg); 3466fceef393SAl Viro } 3467fceef393SAl Viro 34686b255391SAl Viro static const char *shmem_get_link(struct dentry *dentry, 3469fceef393SAl Viro struct inode *inode, 3470fceef393SAl Viro struct delayed_call *done) 34711da177e4SLinus Torvalds { 3472e4b57722SMatthew Wilcox (Oracle) struct folio *folio = NULL; 34736b255391SAl Viro int error; 3474e4b57722SMatthew Wilcox (Oracle) 34756a6c9904SAl Viro if (!dentry) { 3476e4b57722SMatthew Wilcox (Oracle) folio = filemap_get_folio(inode->i_mapping, 0); 347766dabbb6SChristoph Hellwig if (IS_ERR(folio)) 34786b255391SAl Viro return ERR_PTR(-ECHILD); 34797459c149SMatthew Wilcox (Oracle) if (PageHWPoison(folio_page(folio, 0)) || 3480e4b57722SMatthew Wilcox (Oracle) !folio_test_uptodate(folio)) { 3481e4b57722SMatthew Wilcox (Oracle) folio_put(folio); 34826a6c9904SAl Viro return ERR_PTR(-ECHILD); 34836a6c9904SAl Viro } 34846a6c9904SAl Viro } else { 3485e4b57722SMatthew Wilcox (Oracle) error = shmem_get_folio(inode, 0, &folio, SGP_READ); 3486680baacbSAl Viro if (error) 3487680baacbSAl Viro return ERR_PTR(error); 3488e4b57722SMatthew Wilcox (Oracle) if (!folio) 3489a7605426SYang Shi return ERR_PTR(-ECHILD); 34907459c149SMatthew Wilcox (Oracle) if (PageHWPoison(folio_page(folio, 0))) { 3491e4b57722SMatthew Wilcox (Oracle) folio_unlock(folio); 3492e4b57722SMatthew Wilcox (Oracle) folio_put(folio); 3493a7605426SYang Shi return ERR_PTR(-ECHILD); 3494a7605426SYang Shi } 3495e4b57722SMatthew Wilcox (Oracle) folio_unlock(folio); 34961da177e4SLinus Torvalds } 3497e4b57722SMatthew Wilcox (Oracle) set_delayed_call(done, shmem_put_link, folio); 3498e4b57722SMatthew Wilcox (Oracle) return folio_address(folio); 34991da177e4SLinus Torvalds } 35001da177e4SLinus Torvalds 3501b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 3502e408e695STheodore Ts'o 3503e408e695STheodore Ts'o static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa) 3504e408e695STheodore Ts'o { 3505e408e695STheodore Ts'o struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); 3506e408e695STheodore Ts'o 3507e408e695STheodore Ts'o fileattr_fill_flags(fa, info->fsflags & SHMEM_FL_USER_VISIBLE); 3508e408e695STheodore Ts'o 3509e408e695STheodore Ts'o return 0; 3510e408e695STheodore Ts'o } 3511e408e695STheodore Ts'o 35128782a9aeSChristian Brauner static int shmem_fileattr_set(struct mnt_idmap *idmap, 3513e408e695STheodore Ts'o struct dentry *dentry, struct fileattr *fa) 3514e408e695STheodore Ts'o { 3515e408e695STheodore Ts'o struct inode *inode = d_inode(dentry); 3516e408e695STheodore Ts'o struct shmem_inode_info *info = SHMEM_I(inode); 3517e408e695STheodore Ts'o 3518e408e695STheodore Ts'o if (fileattr_has_fsx(fa)) 3519e408e695STheodore Ts'o return -EOPNOTSUPP; 3520cb241339SHugh Dickins if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE) 3521cb241339SHugh Dickins return -EOPNOTSUPP; 3522e408e695STheodore Ts'o 3523e408e695STheodore Ts'o info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) | 3524e408e695STheodore Ts'o (fa->flags & SHMEM_FL_USER_MODIFIABLE); 3525e408e695STheodore Ts'o 3526cb241339SHugh Dickins shmem_set_inode_flags(inode, info->fsflags); 3527e408e695STheodore Ts'o inode->i_ctime = current_time(inode); 352836f05cabSJeff Layton inode_inc_iversion(inode); 3529e408e695STheodore Ts'o return 0; 3530e408e695STheodore Ts'o } 3531e408e695STheodore Ts'o 3532b09e0fa4SEric Paris /* 3533b09e0fa4SEric Paris * Superblocks without xattr inode operations may get some security.* xattr 3534b09e0fa4SEric Paris * support from the LSM "for free". As soon as we have any other xattrs 3535b09e0fa4SEric Paris * like ACLs, we also need to implement the security.* handlers at 3536b09e0fa4SEric Paris * filesystem level, though. 3537b09e0fa4SEric Paris */ 3538b09e0fa4SEric Paris 35396d9d88d0SJarkko Sakkinen /* 35406d9d88d0SJarkko Sakkinen * Callback for security_inode_init_security() for acquiring xattrs. 35416d9d88d0SJarkko Sakkinen */ 35426d9d88d0SJarkko Sakkinen static int shmem_initxattrs(struct inode *inode, 35436d9d88d0SJarkko Sakkinen const struct xattr *xattr_array, 35446d9d88d0SJarkko Sakkinen void *fs_info) 35456d9d88d0SJarkko Sakkinen { 35466d9d88d0SJarkko Sakkinen struct shmem_inode_info *info = SHMEM_I(inode); 35476d9d88d0SJarkko Sakkinen const struct xattr *xattr; 354838f38657SAristeu Rozanski struct simple_xattr *new_xattr; 35496d9d88d0SJarkko Sakkinen size_t len; 35506d9d88d0SJarkko Sakkinen 35516d9d88d0SJarkko Sakkinen for (xattr = xattr_array; xattr->name != NULL; xattr++) { 355238f38657SAristeu Rozanski new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); 35536d9d88d0SJarkko Sakkinen if (!new_xattr) 35546d9d88d0SJarkko Sakkinen return -ENOMEM; 35556d9d88d0SJarkko Sakkinen 35566d9d88d0SJarkko Sakkinen len = strlen(xattr->name) + 1; 35576d9d88d0SJarkko Sakkinen new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, 35586d9d88d0SJarkko Sakkinen GFP_KERNEL); 35596d9d88d0SJarkko Sakkinen if (!new_xattr->name) { 35603bef735aSChengguang Xu kvfree(new_xattr); 35616d9d88d0SJarkko Sakkinen return -ENOMEM; 35626d9d88d0SJarkko Sakkinen } 35636d9d88d0SJarkko Sakkinen 35646d9d88d0SJarkko Sakkinen memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, 35656d9d88d0SJarkko Sakkinen XATTR_SECURITY_PREFIX_LEN); 35666d9d88d0SJarkko Sakkinen memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, 35676d9d88d0SJarkko Sakkinen xattr->name, len); 35686d9d88d0SJarkko Sakkinen 35693b4c7bc0SChristian Brauner simple_xattr_add(&info->xattrs, new_xattr); 35706d9d88d0SJarkko Sakkinen } 35716d9d88d0SJarkko Sakkinen 35726d9d88d0SJarkko Sakkinen return 0; 35736d9d88d0SJarkko Sakkinen } 35746d9d88d0SJarkko Sakkinen 3575aa7c5241SAndreas Gruenbacher static int shmem_xattr_handler_get(const struct xattr_handler *handler, 3576b296821aSAl Viro struct dentry *unused, struct inode *inode, 3577b296821aSAl Viro const char *name, void *buffer, size_t size) 3578aa7c5241SAndreas Gruenbacher { 3579b296821aSAl Viro struct shmem_inode_info *info = SHMEM_I(inode); 3580aa7c5241SAndreas Gruenbacher 3581aa7c5241SAndreas Gruenbacher name = xattr_full_name(handler, name); 3582aa7c5241SAndreas Gruenbacher return simple_xattr_get(&info->xattrs, name, buffer, size); 3583aa7c5241SAndreas Gruenbacher } 3584aa7c5241SAndreas Gruenbacher 3585aa7c5241SAndreas Gruenbacher static int shmem_xattr_handler_set(const struct xattr_handler *handler, 358639f60c1cSChristian Brauner struct mnt_idmap *idmap, 358759301226SAl Viro struct dentry *unused, struct inode *inode, 358859301226SAl Viro const char *name, const void *value, 358959301226SAl Viro size_t size, int flags) 3590aa7c5241SAndreas Gruenbacher { 359159301226SAl Viro struct shmem_inode_info *info = SHMEM_I(inode); 359236f05cabSJeff Layton int err; 3593aa7c5241SAndreas Gruenbacher 3594aa7c5241SAndreas Gruenbacher name = xattr_full_name(handler, name); 359536f05cabSJeff Layton err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL); 359636f05cabSJeff Layton if (!err) { 359736f05cabSJeff Layton inode->i_ctime = current_time(inode); 359836f05cabSJeff Layton inode_inc_iversion(inode); 359936f05cabSJeff Layton } 360036f05cabSJeff Layton return err; 3601aa7c5241SAndreas Gruenbacher } 3602aa7c5241SAndreas Gruenbacher 3603aa7c5241SAndreas Gruenbacher static const struct xattr_handler shmem_security_xattr_handler = { 3604aa7c5241SAndreas Gruenbacher .prefix = XATTR_SECURITY_PREFIX, 3605aa7c5241SAndreas Gruenbacher .get = shmem_xattr_handler_get, 3606aa7c5241SAndreas Gruenbacher .set = shmem_xattr_handler_set, 3607aa7c5241SAndreas Gruenbacher }; 3608aa7c5241SAndreas Gruenbacher 3609aa7c5241SAndreas Gruenbacher static const struct xattr_handler shmem_trusted_xattr_handler = { 3610aa7c5241SAndreas Gruenbacher .prefix = XATTR_TRUSTED_PREFIX, 3611aa7c5241SAndreas Gruenbacher .get = shmem_xattr_handler_get, 3612aa7c5241SAndreas Gruenbacher .set = shmem_xattr_handler_set, 3613aa7c5241SAndreas Gruenbacher }; 3614aa7c5241SAndreas Gruenbacher 3615b09e0fa4SEric Paris static const struct xattr_handler *shmem_xattr_handlers[] = { 3616aa7c5241SAndreas Gruenbacher &shmem_security_xattr_handler, 3617aa7c5241SAndreas Gruenbacher &shmem_trusted_xattr_handler, 3618b09e0fa4SEric Paris NULL 3619b09e0fa4SEric Paris }; 3620b09e0fa4SEric Paris 3621b09e0fa4SEric Paris static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) 3622b09e0fa4SEric Paris { 362375c3cfa8SDavid Howells struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); 3624786534b9SAndreas Gruenbacher return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); 3625b09e0fa4SEric Paris } 3626b09e0fa4SEric Paris #endif /* CONFIG_TMPFS_XATTR */ 3627b09e0fa4SEric Paris 362869f07ec9SHugh Dickins static const struct inode_operations shmem_short_symlink_operations = { 3629f7cd16a5SXavier Roche .getattr = shmem_getattr, 3630e09764cfSCarlos Maiolino .setattr = shmem_setattr, 36316b255391SAl Viro .get_link = simple_get_link, 3632b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 3633b09e0fa4SEric Paris .listxattr = shmem_listxattr, 3634b09e0fa4SEric Paris #endif 36351da177e4SLinus Torvalds }; 36361da177e4SLinus Torvalds 363792e1d5beSArjan van de Ven static const struct inode_operations shmem_symlink_inode_operations = { 3638f7cd16a5SXavier Roche .getattr = shmem_getattr, 3639e09764cfSCarlos Maiolino .setattr = shmem_setattr, 36406b255391SAl Viro .get_link = shmem_get_link, 3641b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 3642b09e0fa4SEric Paris .listxattr = shmem_listxattr, 364339f0247dSAndreas Gruenbacher #endif 3644b09e0fa4SEric Paris }; 364539f0247dSAndreas Gruenbacher 364691828a40SDavid M. Grimes static struct dentry *shmem_get_parent(struct dentry *child) 364791828a40SDavid M. Grimes { 364891828a40SDavid M. Grimes return ERR_PTR(-ESTALE); 364991828a40SDavid M. Grimes } 365091828a40SDavid M. Grimes 365191828a40SDavid M. Grimes static int shmem_match(struct inode *ino, void *vfh) 365291828a40SDavid M. Grimes { 365391828a40SDavid M. Grimes __u32 *fh = vfh; 365491828a40SDavid M. Grimes __u64 inum = fh[2]; 365591828a40SDavid M. Grimes inum = (inum << 32) | fh[1]; 365691828a40SDavid M. Grimes return ino->i_ino == inum && fh[0] == ino->i_generation; 365791828a40SDavid M. Grimes } 365891828a40SDavid M. Grimes 365912ba780dSAmir Goldstein /* Find any alias of inode, but prefer a hashed alias */ 366012ba780dSAmir Goldstein static struct dentry *shmem_find_alias(struct inode *inode) 366112ba780dSAmir Goldstein { 366212ba780dSAmir Goldstein struct dentry *alias = d_find_alias(inode); 366312ba780dSAmir Goldstein 366412ba780dSAmir Goldstein return alias ?: d_find_any_alias(inode); 366512ba780dSAmir Goldstein } 366612ba780dSAmir Goldstein 366712ba780dSAmir Goldstein 3668480b116cSChristoph Hellwig static struct dentry *shmem_fh_to_dentry(struct super_block *sb, 3669480b116cSChristoph Hellwig struct fid *fid, int fh_len, int fh_type) 367091828a40SDavid M. Grimes { 367191828a40SDavid M. Grimes struct inode *inode; 3672480b116cSChristoph Hellwig struct dentry *dentry = NULL; 367335c2a7f4SHugh Dickins u64 inum; 367491828a40SDavid M. Grimes 3675480b116cSChristoph Hellwig if (fh_len < 3) 3676480b116cSChristoph Hellwig return NULL; 3677480b116cSChristoph Hellwig 367835c2a7f4SHugh Dickins inum = fid->raw[2]; 367935c2a7f4SHugh Dickins inum = (inum << 32) | fid->raw[1]; 368035c2a7f4SHugh Dickins 3681480b116cSChristoph Hellwig inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), 3682480b116cSChristoph Hellwig shmem_match, fid->raw); 368391828a40SDavid M. Grimes if (inode) { 368412ba780dSAmir Goldstein dentry = shmem_find_alias(inode); 368591828a40SDavid M. Grimes iput(inode); 368691828a40SDavid M. Grimes } 368791828a40SDavid M. Grimes 3688480b116cSChristoph Hellwig return dentry; 368991828a40SDavid M. Grimes } 369091828a40SDavid M. Grimes 3691b0b0382bSAl Viro static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, 3692b0b0382bSAl Viro struct inode *parent) 369391828a40SDavid M. Grimes { 36945fe0c237SAneesh Kumar K.V if (*len < 3) { 36955fe0c237SAneesh Kumar K.V *len = 3; 369694e07a75SNamjae Jeon return FILEID_INVALID; 36975fe0c237SAneesh Kumar K.V } 369891828a40SDavid M. Grimes 36991d3382cbSAl Viro if (inode_unhashed(inode)) { 370091828a40SDavid M. Grimes /* Unfortunately insert_inode_hash is not idempotent, 370191828a40SDavid M. Grimes * so as we hash inodes here rather than at creation 370291828a40SDavid M. Grimes * time, we need a lock to ensure we only try 370391828a40SDavid M. Grimes * to do it once 370491828a40SDavid M. Grimes */ 370591828a40SDavid M. Grimes static DEFINE_SPINLOCK(lock); 370691828a40SDavid M. Grimes spin_lock(&lock); 37071d3382cbSAl Viro if (inode_unhashed(inode)) 370891828a40SDavid M. Grimes __insert_inode_hash(inode, 370991828a40SDavid M. Grimes inode->i_ino + inode->i_generation); 371091828a40SDavid M. Grimes spin_unlock(&lock); 371191828a40SDavid M. Grimes } 371291828a40SDavid M. Grimes 371391828a40SDavid M. Grimes fh[0] = inode->i_generation; 371491828a40SDavid M. Grimes fh[1] = inode->i_ino; 371591828a40SDavid M. Grimes fh[2] = ((__u64)inode->i_ino) >> 32; 371691828a40SDavid M. Grimes 371791828a40SDavid M. Grimes *len = 3; 371891828a40SDavid M. Grimes return 1; 371991828a40SDavid M. Grimes } 372091828a40SDavid M. Grimes 372139655164SChristoph Hellwig static const struct export_operations shmem_export_ops = { 372291828a40SDavid M. Grimes .get_parent = shmem_get_parent, 372391828a40SDavid M. Grimes .encode_fh = shmem_encode_fh, 3724480b116cSChristoph Hellwig .fh_to_dentry = shmem_fh_to_dentry, 372591828a40SDavid M. Grimes }; 372691828a40SDavid M. Grimes 3727626c3920SAl Viro enum shmem_param { 3728626c3920SAl Viro Opt_gid, 3729626c3920SAl Viro Opt_huge, 3730626c3920SAl Viro Opt_mode, 3731626c3920SAl Viro Opt_mpol, 3732626c3920SAl Viro Opt_nr_blocks, 3733626c3920SAl Viro Opt_nr_inodes, 3734626c3920SAl Viro Opt_size, 3735626c3920SAl Viro Opt_uid, 3736ea3271f7SChris Down Opt_inode32, 3737ea3271f7SChris Down Opt_inode64, 37382c6efe9cSLuis Chamberlain Opt_noswap, 3739e09764cfSCarlos Maiolino Opt_quota, 3740e09764cfSCarlos Maiolino Opt_usrquota, 3741e09764cfSCarlos Maiolino Opt_grpquota, 3742*de4c0e7cSLukas Czerner Opt_usrquota_block_hardlimit, 3743*de4c0e7cSLukas Czerner Opt_usrquota_inode_hardlimit, 3744*de4c0e7cSLukas Czerner Opt_grpquota_block_hardlimit, 3745*de4c0e7cSLukas Czerner Opt_grpquota_inode_hardlimit, 3746626c3920SAl Viro }; 37471da177e4SLinus Torvalds 37485eede625SAl Viro static const struct constant_table shmem_param_enums_huge[] = { 37492710c957SAl Viro {"never", SHMEM_HUGE_NEVER }, 37502710c957SAl Viro {"always", SHMEM_HUGE_ALWAYS }, 37512710c957SAl Viro {"within_size", SHMEM_HUGE_WITHIN_SIZE }, 37522710c957SAl Viro {"advise", SHMEM_HUGE_ADVISE }, 37532710c957SAl Viro {} 37542710c957SAl Viro }; 37552710c957SAl Viro 3756d7167b14SAl Viro const struct fs_parameter_spec shmem_fs_parameters[] = { 3757626c3920SAl Viro fsparam_u32 ("gid", Opt_gid), 37582710c957SAl Viro fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge), 3759626c3920SAl Viro fsparam_u32oct("mode", Opt_mode), 3760626c3920SAl Viro fsparam_string("mpol", Opt_mpol), 3761626c3920SAl Viro fsparam_string("nr_blocks", Opt_nr_blocks), 3762626c3920SAl Viro fsparam_string("nr_inodes", Opt_nr_inodes), 3763626c3920SAl Viro fsparam_string("size", Opt_size), 3764626c3920SAl Viro fsparam_u32 ("uid", Opt_uid), 3765ea3271f7SChris Down fsparam_flag ("inode32", Opt_inode32), 3766ea3271f7SChris Down fsparam_flag ("inode64", Opt_inode64), 37672c6efe9cSLuis Chamberlain fsparam_flag ("noswap", Opt_noswap), 3768e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 3769e09764cfSCarlos Maiolino fsparam_flag ("quota", Opt_quota), 3770e09764cfSCarlos Maiolino fsparam_flag ("usrquota", Opt_usrquota), 3771e09764cfSCarlos Maiolino fsparam_flag ("grpquota", Opt_grpquota), 3772*de4c0e7cSLukas Czerner fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit), 3773*de4c0e7cSLukas Czerner fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit), 3774*de4c0e7cSLukas Czerner fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit), 3775*de4c0e7cSLukas Czerner fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit), 3776e09764cfSCarlos Maiolino #endif 3777626c3920SAl Viro {} 3778626c3920SAl Viro }; 3779626c3920SAl Viro 3780f3235626SDavid Howells static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) 3781626c3920SAl Viro { 3782f3235626SDavid Howells struct shmem_options *ctx = fc->fs_private; 3783626c3920SAl Viro struct fs_parse_result result; 3784e04dc423SAl Viro unsigned long long size; 3785626c3920SAl Viro char *rest; 3786626c3920SAl Viro int opt; 3787626c3920SAl Viro 3788d7167b14SAl Viro opt = fs_parse(fc, shmem_fs_parameters, param, &result); 3789f3235626SDavid Howells if (opt < 0) 3790626c3920SAl Viro return opt; 3791626c3920SAl Viro 3792626c3920SAl Viro switch (opt) { 3793626c3920SAl Viro case Opt_size: 3794626c3920SAl Viro size = memparse(param->string, &rest); 3795e04dc423SAl Viro if (*rest == '%') { 3796e04dc423SAl Viro size <<= PAGE_SHIFT; 3797e04dc423SAl Viro size *= totalram_pages(); 3798e04dc423SAl Viro do_div(size, 100); 3799e04dc423SAl Viro rest++; 3800e04dc423SAl Viro } 3801e04dc423SAl Viro if (*rest) 3802626c3920SAl Viro goto bad_value; 3803e04dc423SAl Viro ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE); 3804e04dc423SAl Viro ctx->seen |= SHMEM_SEEN_BLOCKS; 3805626c3920SAl Viro break; 3806626c3920SAl Viro case Opt_nr_blocks: 3807626c3920SAl Viro ctx->blocks = memparse(param->string, &rest); 38080c98c8e1SZhaoLong Wang if (*rest || ctx->blocks > S64_MAX) 3809626c3920SAl Viro goto bad_value; 3810e04dc423SAl Viro ctx->seen |= SHMEM_SEEN_BLOCKS; 3811626c3920SAl Viro break; 3812626c3920SAl Viro case Opt_nr_inodes: 3813626c3920SAl Viro ctx->inodes = memparse(param->string, &rest); 3814e04dc423SAl Viro if (*rest) 3815626c3920SAl Viro goto bad_value; 3816e04dc423SAl Viro ctx->seen |= SHMEM_SEEN_INODES; 3817626c3920SAl Viro break; 3818626c3920SAl Viro case Opt_mode: 3819626c3920SAl Viro ctx->mode = result.uint_32 & 07777; 3820626c3920SAl Viro break; 3821626c3920SAl Viro case Opt_uid: 3822626c3920SAl Viro ctx->uid = make_kuid(current_user_ns(), result.uint_32); 3823e04dc423SAl Viro if (!uid_valid(ctx->uid)) 3824626c3920SAl Viro goto bad_value; 3825626c3920SAl Viro break; 3826626c3920SAl Viro case Opt_gid: 3827626c3920SAl Viro ctx->gid = make_kgid(current_user_ns(), result.uint_32); 3828e04dc423SAl Viro if (!gid_valid(ctx->gid)) 3829626c3920SAl Viro goto bad_value; 3830626c3920SAl Viro break; 3831626c3920SAl Viro case Opt_huge: 3832626c3920SAl Viro ctx->huge = result.uint_32; 3833626c3920SAl Viro if (ctx->huge != SHMEM_HUGE_NEVER && 3834396bcc52SMatthew Wilcox (Oracle) !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 3835626c3920SAl Viro has_transparent_hugepage())) 3836626c3920SAl Viro goto unsupported_parameter; 3837e04dc423SAl Viro ctx->seen |= SHMEM_SEEN_HUGE; 3838626c3920SAl Viro break; 3839626c3920SAl Viro case Opt_mpol: 3840626c3920SAl Viro if (IS_ENABLED(CONFIG_NUMA)) { 3841e04dc423SAl Viro mpol_put(ctx->mpol); 3842e04dc423SAl Viro ctx->mpol = NULL; 3843626c3920SAl Viro if (mpol_parse_str(param->string, &ctx->mpol)) 3844626c3920SAl Viro goto bad_value; 3845626c3920SAl Viro break; 3846626c3920SAl Viro } 3847626c3920SAl Viro goto unsupported_parameter; 3848ea3271f7SChris Down case Opt_inode32: 3849ea3271f7SChris Down ctx->full_inums = false; 3850ea3271f7SChris Down ctx->seen |= SHMEM_SEEN_INUMS; 3851ea3271f7SChris Down break; 3852ea3271f7SChris Down case Opt_inode64: 3853ea3271f7SChris Down if (sizeof(ino_t) < 8) { 3854ea3271f7SChris Down return invalfc(fc, 3855ea3271f7SChris Down "Cannot use inode64 with <64bit inums in kernel\n"); 3856ea3271f7SChris Down } 3857ea3271f7SChris Down ctx->full_inums = true; 3858ea3271f7SChris Down ctx->seen |= SHMEM_SEEN_INUMS; 3859ea3271f7SChris Down break; 38602c6efe9cSLuis Chamberlain case Opt_noswap: 386101106e14SChristian Brauner if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) { 386201106e14SChristian Brauner return invalfc(fc, 386301106e14SChristian Brauner "Turning off swap in unprivileged tmpfs mounts unsupported"); 386401106e14SChristian Brauner } 38652c6efe9cSLuis Chamberlain ctx->noswap = true; 38662c6efe9cSLuis Chamberlain ctx->seen |= SHMEM_SEEN_NOSWAP; 38672c6efe9cSLuis Chamberlain break; 3868e09764cfSCarlos Maiolino case Opt_quota: 3869e09764cfSCarlos Maiolino if (fc->user_ns != &init_user_ns) 3870e09764cfSCarlos Maiolino return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); 3871e09764cfSCarlos Maiolino ctx->seen |= SHMEM_SEEN_QUOTA; 3872e09764cfSCarlos Maiolino ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP); 3873e09764cfSCarlos Maiolino break; 3874e09764cfSCarlos Maiolino case Opt_usrquota: 3875e09764cfSCarlos Maiolino if (fc->user_ns != &init_user_ns) 3876e09764cfSCarlos Maiolino return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); 3877e09764cfSCarlos Maiolino ctx->seen |= SHMEM_SEEN_QUOTA; 3878e09764cfSCarlos Maiolino ctx->quota_types |= QTYPE_MASK_USR; 3879e09764cfSCarlos Maiolino break; 3880e09764cfSCarlos Maiolino case Opt_grpquota: 3881e09764cfSCarlos Maiolino if (fc->user_ns != &init_user_ns) 3882e09764cfSCarlos Maiolino return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); 3883e09764cfSCarlos Maiolino ctx->seen |= SHMEM_SEEN_QUOTA; 3884e09764cfSCarlos Maiolino ctx->quota_types |= QTYPE_MASK_GRP; 3885e09764cfSCarlos Maiolino break; 3886*de4c0e7cSLukas Czerner case Opt_usrquota_block_hardlimit: 3887*de4c0e7cSLukas Czerner size = memparse(param->string, &rest); 3888*de4c0e7cSLukas Czerner if (*rest || !size) 3889*de4c0e7cSLukas Czerner goto bad_value; 3890*de4c0e7cSLukas Czerner if (size > SHMEM_QUOTA_MAX_SPC_LIMIT) 3891*de4c0e7cSLukas Czerner return invalfc(fc, 3892*de4c0e7cSLukas Czerner "User quota block hardlimit too large."); 3893*de4c0e7cSLukas Czerner ctx->qlimits.usrquota_bhardlimit = size; 3894*de4c0e7cSLukas Czerner break; 3895*de4c0e7cSLukas Czerner case Opt_grpquota_block_hardlimit: 3896*de4c0e7cSLukas Czerner size = memparse(param->string, &rest); 3897*de4c0e7cSLukas Czerner if (*rest || !size) 3898*de4c0e7cSLukas Czerner goto bad_value; 3899*de4c0e7cSLukas Czerner if (size > SHMEM_QUOTA_MAX_SPC_LIMIT) 3900*de4c0e7cSLukas Czerner return invalfc(fc, 3901*de4c0e7cSLukas Czerner "Group quota block hardlimit too large."); 3902*de4c0e7cSLukas Czerner ctx->qlimits.grpquota_bhardlimit = size; 3903*de4c0e7cSLukas Czerner break; 3904*de4c0e7cSLukas Czerner case Opt_usrquota_inode_hardlimit: 3905*de4c0e7cSLukas Czerner size = memparse(param->string, &rest); 3906*de4c0e7cSLukas Czerner if (*rest || !size) 3907*de4c0e7cSLukas Czerner goto bad_value; 3908*de4c0e7cSLukas Czerner if (size > SHMEM_QUOTA_MAX_INO_LIMIT) 3909*de4c0e7cSLukas Czerner return invalfc(fc, 3910*de4c0e7cSLukas Czerner "User quota inode hardlimit too large."); 3911*de4c0e7cSLukas Czerner ctx->qlimits.usrquota_ihardlimit = size; 3912*de4c0e7cSLukas Czerner break; 3913*de4c0e7cSLukas Czerner case Opt_grpquota_inode_hardlimit: 3914*de4c0e7cSLukas Czerner size = memparse(param->string, &rest); 3915*de4c0e7cSLukas Czerner if (*rest || !size) 3916*de4c0e7cSLukas Czerner goto bad_value; 3917*de4c0e7cSLukas Czerner if (size > SHMEM_QUOTA_MAX_INO_LIMIT) 3918*de4c0e7cSLukas Czerner return invalfc(fc, 3919*de4c0e7cSLukas Czerner "Group quota inode hardlimit too large."); 3920*de4c0e7cSLukas Czerner ctx->qlimits.grpquota_ihardlimit = size; 3921*de4c0e7cSLukas Czerner break; 3922e04dc423SAl Viro } 3923e04dc423SAl Viro return 0; 3924e04dc423SAl Viro 3925626c3920SAl Viro unsupported_parameter: 3926f35aa2bcSAl Viro return invalfc(fc, "Unsupported parameter '%s'", param->key); 3927626c3920SAl Viro bad_value: 3928f35aa2bcSAl Viro return invalfc(fc, "Bad value for '%s'", param->key); 3929e04dc423SAl Viro } 3930e04dc423SAl Viro 3931f3235626SDavid Howells static int shmem_parse_options(struct fs_context *fc, void *data) 3932e04dc423SAl Viro { 3933f3235626SDavid Howells char *options = data; 3934f3235626SDavid Howells 393533f37c64SAl Viro if (options) { 393633f37c64SAl Viro int err = security_sb_eat_lsm_opts(options, &fc->security); 393733f37c64SAl Viro if (err) 393833f37c64SAl Viro return err; 393933f37c64SAl Viro } 394033f37c64SAl Viro 3941b00dc3adSHugh Dickins while (options != NULL) { 3942626c3920SAl Viro char *this_char = options; 3943b00dc3adSHugh Dickins for (;;) { 3944b00dc3adSHugh Dickins /* 3945b00dc3adSHugh Dickins * NUL-terminate this option: unfortunately, 3946b00dc3adSHugh Dickins * mount options form a comma-separated list, 3947b00dc3adSHugh Dickins * but mpol's nodelist may also contain commas. 3948b00dc3adSHugh Dickins */ 3949b00dc3adSHugh Dickins options = strchr(options, ','); 3950b00dc3adSHugh Dickins if (options == NULL) 3951b00dc3adSHugh Dickins break; 3952b00dc3adSHugh Dickins options++; 3953b00dc3adSHugh Dickins if (!isdigit(*options)) { 3954b00dc3adSHugh Dickins options[-1] = '\0'; 3955b00dc3adSHugh Dickins break; 3956b00dc3adSHugh Dickins } 3957b00dc3adSHugh Dickins } 3958626c3920SAl Viro if (*this_char) { 3959626c3920SAl Viro char *value = strchr(this_char, '='); 3960f3235626SDavid Howells size_t len = 0; 3961626c3920SAl Viro int err; 3962626c3920SAl Viro 3963626c3920SAl Viro if (value) { 3964626c3920SAl Viro *value++ = '\0'; 3965f3235626SDavid Howells len = strlen(value); 39661da177e4SLinus Torvalds } 3967f3235626SDavid Howells err = vfs_parse_fs_string(fc, this_char, value, len); 3968f3235626SDavid Howells if (err < 0) 3969f3235626SDavid Howells return err; 39701da177e4SLinus Torvalds } 3971626c3920SAl Viro } 39721da177e4SLinus Torvalds return 0; 39731da177e4SLinus Torvalds } 39741da177e4SLinus Torvalds 3975f3235626SDavid Howells /* 3976f3235626SDavid Howells * Reconfigure a shmem filesystem. 3977f3235626SDavid Howells * 3978f3235626SDavid Howells * Note that we disallow change from limited->unlimited blocks/inodes while any 3979f3235626SDavid Howells * are in use; but we must separately disallow unlimited->limited, because in 3980f3235626SDavid Howells * that case we have no record of how much is already in use. 3981f3235626SDavid Howells */ 3982f3235626SDavid Howells static int shmem_reconfigure(struct fs_context *fc) 39831da177e4SLinus Torvalds { 3984f3235626SDavid Howells struct shmem_options *ctx = fc->fs_private; 3985f3235626SDavid Howells struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); 39860edd73b3SHugh Dickins unsigned long inodes; 3987bf11b9a8SSebastian Andrzej Siewior struct mempolicy *mpol = NULL; 3988f3235626SDavid Howells const char *err; 39890edd73b3SHugh Dickins 3990bf11b9a8SSebastian Andrzej Siewior raw_spin_lock(&sbinfo->stat_lock); 39910edd73b3SHugh Dickins inodes = sbinfo->max_inodes - sbinfo->free_inodes; 39920c98c8e1SZhaoLong Wang 3993f3235626SDavid Howells if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { 3994f3235626SDavid Howells if (!sbinfo->max_blocks) { 3995f3235626SDavid Howells err = "Cannot retroactively limit size"; 39960edd73b3SHugh Dickins goto out; 39970b5071ddSAl Viro } 3998f3235626SDavid Howells if (percpu_counter_compare(&sbinfo->used_blocks, 3999f3235626SDavid Howells ctx->blocks) > 0) { 4000f3235626SDavid Howells err = "Too small a size for current use"; 40010b5071ddSAl Viro goto out; 4002f3235626SDavid Howells } 4003f3235626SDavid Howells } 4004f3235626SDavid Howells if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) { 4005f3235626SDavid Howells if (!sbinfo->max_inodes) { 4006f3235626SDavid Howells err = "Cannot retroactively limit inodes"; 40070b5071ddSAl Viro goto out; 40080b5071ddSAl Viro } 4009f3235626SDavid Howells if (ctx->inodes < inodes) { 4010f3235626SDavid Howells err = "Too few inodes for current use"; 4011f3235626SDavid Howells goto out; 4012f3235626SDavid Howells } 4013f3235626SDavid Howells } 40140edd73b3SHugh Dickins 4015ea3271f7SChris Down if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums && 4016ea3271f7SChris Down sbinfo->next_ino > UINT_MAX) { 4017ea3271f7SChris Down err = "Current inum too high to switch to 32-bit inums"; 4018ea3271f7SChris Down goto out; 4019ea3271f7SChris Down } 40202c6efe9cSLuis Chamberlain if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) { 40212c6efe9cSLuis Chamberlain err = "Cannot disable swap on remount"; 40222c6efe9cSLuis Chamberlain goto out; 40232c6efe9cSLuis Chamberlain } 40242c6efe9cSLuis Chamberlain if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) { 40252c6efe9cSLuis Chamberlain err = "Cannot enable swap on remount if it was disabled on first mount"; 40262c6efe9cSLuis Chamberlain goto out; 40272c6efe9cSLuis Chamberlain } 4028ea3271f7SChris Down 4029e09764cfSCarlos Maiolino if (ctx->seen & SHMEM_SEEN_QUOTA && 4030e09764cfSCarlos Maiolino !sb_any_quota_loaded(fc->root->d_sb)) { 4031e09764cfSCarlos Maiolino err = "Cannot enable quota on remount"; 4032e09764cfSCarlos Maiolino goto out; 4033e09764cfSCarlos Maiolino } 4034e09764cfSCarlos Maiolino 4035*de4c0e7cSLukas Czerner #ifdef CONFIG_TMPFS_QUOTA 4036*de4c0e7cSLukas Czerner #define CHANGED_LIMIT(name) \ 4037*de4c0e7cSLukas Czerner (ctx->qlimits.name## hardlimit && \ 4038*de4c0e7cSLukas Czerner (ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit)) 4039*de4c0e7cSLukas Czerner 4040*de4c0e7cSLukas Czerner if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) || 4041*de4c0e7cSLukas Czerner CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) { 4042*de4c0e7cSLukas Czerner err = "Cannot change global quota limit on remount"; 4043*de4c0e7cSLukas Czerner goto out; 4044*de4c0e7cSLukas Czerner } 4045*de4c0e7cSLukas Czerner #endif /* CONFIG_TMPFS_QUOTA */ 4046*de4c0e7cSLukas Czerner 4047f3235626SDavid Howells if (ctx->seen & SHMEM_SEEN_HUGE) 4048f3235626SDavid Howells sbinfo->huge = ctx->huge; 4049ea3271f7SChris Down if (ctx->seen & SHMEM_SEEN_INUMS) 4050ea3271f7SChris Down sbinfo->full_inums = ctx->full_inums; 4051f3235626SDavid Howells if (ctx->seen & SHMEM_SEEN_BLOCKS) 4052f3235626SDavid Howells sbinfo->max_blocks = ctx->blocks; 4053f3235626SDavid Howells if (ctx->seen & SHMEM_SEEN_INODES) { 4054f3235626SDavid Howells sbinfo->max_inodes = ctx->inodes; 4055f3235626SDavid Howells sbinfo->free_inodes = ctx->inodes - inodes; 40560b5071ddSAl Viro } 405771fe804bSLee Schermerhorn 40585f00110fSGreg Thelen /* 40595f00110fSGreg Thelen * Preserve previous mempolicy unless mpol remount option was specified. 40605f00110fSGreg Thelen */ 4061f3235626SDavid Howells if (ctx->mpol) { 4062bf11b9a8SSebastian Andrzej Siewior mpol = sbinfo->mpol; 4063f3235626SDavid Howells sbinfo->mpol = ctx->mpol; /* transfers initial ref */ 4064f3235626SDavid Howells ctx->mpol = NULL; 40655f00110fSGreg Thelen } 40662c6efe9cSLuis Chamberlain 40672c6efe9cSLuis Chamberlain if (ctx->noswap) 40682c6efe9cSLuis Chamberlain sbinfo->noswap = true; 40692c6efe9cSLuis Chamberlain 4070bf11b9a8SSebastian Andrzej Siewior raw_spin_unlock(&sbinfo->stat_lock); 4071bf11b9a8SSebastian Andrzej Siewior mpol_put(mpol); 4072f3235626SDavid Howells return 0; 40730edd73b3SHugh Dickins out: 4074bf11b9a8SSebastian Andrzej Siewior raw_spin_unlock(&sbinfo->stat_lock); 4075f35aa2bcSAl Viro return invalfc(fc, "%s", err); 40761da177e4SLinus Torvalds } 4077680d794bSakpm@linux-foundation.org 407834c80b1dSAl Viro static int shmem_show_options(struct seq_file *seq, struct dentry *root) 4079680d794bSakpm@linux-foundation.org { 408034c80b1dSAl Viro struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb); 4081283ebdeeSTu Jinjiang struct mempolicy *mpol; 4082680d794bSakpm@linux-foundation.org 4083680d794bSakpm@linux-foundation.org if (sbinfo->max_blocks != shmem_default_max_blocks()) 4084680d794bSakpm@linux-foundation.org seq_printf(seq, ",size=%luk", 408509cbfeafSKirill A. Shutemov sbinfo->max_blocks << (PAGE_SHIFT - 10)); 4086680d794bSakpm@linux-foundation.org if (sbinfo->max_inodes != shmem_default_max_inodes()) 4087680d794bSakpm@linux-foundation.org seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); 40880825a6f9SJoe Perches if (sbinfo->mode != (0777 | S_ISVTX)) 408909208d15SAl Viro seq_printf(seq, ",mode=%03ho", sbinfo->mode); 40908751e039SEric W. Biederman if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) 40918751e039SEric W. Biederman seq_printf(seq, ",uid=%u", 40928751e039SEric W. Biederman from_kuid_munged(&init_user_ns, sbinfo->uid)); 40938751e039SEric W. Biederman if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) 40948751e039SEric W. Biederman seq_printf(seq, ",gid=%u", 40958751e039SEric W. Biederman from_kgid_munged(&init_user_ns, sbinfo->gid)); 4096ea3271f7SChris Down 4097ea3271f7SChris Down /* 4098ea3271f7SChris Down * Showing inode{64,32} might be useful even if it's the system default, 4099ea3271f7SChris Down * since then people don't have to resort to checking both here and 4100ea3271f7SChris Down * /proc/config.gz to confirm 64-bit inums were successfully applied 4101ea3271f7SChris Down * (which may not even exist if IKCONFIG_PROC isn't enabled). 4102ea3271f7SChris Down * 4103ea3271f7SChris Down * We hide it when inode64 isn't the default and we are using 32-bit 4104ea3271f7SChris Down * inodes, since that probably just means the feature isn't even under 4105ea3271f7SChris Down * consideration. 4106ea3271f7SChris Down * 4107ea3271f7SChris Down * As such: 4108ea3271f7SChris Down * 4109ea3271f7SChris Down * +-----------------+-----------------+ 4110ea3271f7SChris Down * | TMPFS_INODE64=y | TMPFS_INODE64=n | 4111ea3271f7SChris Down * +------------------+-----------------+-----------------+ 4112ea3271f7SChris Down * | full_inums=true | show | show | 4113ea3271f7SChris Down * | full_inums=false | show | hide | 4114ea3271f7SChris Down * +------------------+-----------------+-----------------+ 4115ea3271f7SChris Down * 4116ea3271f7SChris Down */ 4117ea3271f7SChris Down if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums) 4118ea3271f7SChris Down seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32)); 4119396bcc52SMatthew Wilcox (Oracle) #ifdef CONFIG_TRANSPARENT_HUGEPAGE 41205a6e75f8SKirill A. Shutemov /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ 41215a6e75f8SKirill A. Shutemov if (sbinfo->huge) 41225a6e75f8SKirill A. Shutemov seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); 41235a6e75f8SKirill A. Shutemov #endif 4124283ebdeeSTu Jinjiang mpol = shmem_get_sbmpol(sbinfo); 4125283ebdeeSTu Jinjiang shmem_show_mpol(seq, mpol); 4126283ebdeeSTu Jinjiang mpol_put(mpol); 41272c6efe9cSLuis Chamberlain if (sbinfo->noswap) 41282c6efe9cSLuis Chamberlain seq_printf(seq, ",noswap"); 4129680d794bSakpm@linux-foundation.org return 0; 4130680d794bSakpm@linux-foundation.org } 41319183df25SDavid Herrmann 4132680d794bSakpm@linux-foundation.org #endif /* CONFIG_TMPFS */ 41331da177e4SLinus Torvalds 41341da177e4SLinus Torvalds static void shmem_put_super(struct super_block *sb) 41351da177e4SLinus Torvalds { 4136602586a8SHugh Dickins struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 4137602586a8SHugh Dickins 4138e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 4139e09764cfSCarlos Maiolino shmem_disable_quotas(sb); 4140e09764cfSCarlos Maiolino #endif 4141e809d5f0SChris Down free_percpu(sbinfo->ino_batch); 4142602586a8SHugh Dickins percpu_counter_destroy(&sbinfo->used_blocks); 414349cd0a5cSGreg Thelen mpol_put(sbinfo->mpol); 4144602586a8SHugh Dickins kfree(sbinfo); 41451da177e4SLinus Torvalds sb->s_fs_info = NULL; 41461da177e4SLinus Torvalds } 41471da177e4SLinus Torvalds 4148f3235626SDavid Howells static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) 41491da177e4SLinus Torvalds { 4150f3235626SDavid Howells struct shmem_options *ctx = fc->fs_private; 41511da177e4SLinus Torvalds struct inode *inode; 41520edd73b3SHugh Dickins struct shmem_sb_info *sbinfo; 415371480663SCarlos Maiolino int error = -ENOMEM; 4154680d794bSakpm@linux-foundation.org 4155680d794bSakpm@linux-foundation.org /* Round up to L1_CACHE_BYTES to resist false sharing */ 4156425fbf04SPekka Enberg sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info), 4157680d794bSakpm@linux-foundation.org L1_CACHE_BYTES), GFP_KERNEL); 4158680d794bSakpm@linux-foundation.org if (!sbinfo) 415971480663SCarlos Maiolino return error; 4160680d794bSakpm@linux-foundation.org 4161680d794bSakpm@linux-foundation.org sb->s_fs_info = sbinfo; 41621da177e4SLinus Torvalds 41630edd73b3SHugh Dickins #ifdef CONFIG_TMPFS 41641da177e4SLinus Torvalds /* 41651da177e4SLinus Torvalds * Per default we only allow half of the physical ram per 41661da177e4SLinus Torvalds * tmpfs instance, limiting inodes to one per page of lowmem; 41671da177e4SLinus Torvalds * but the internal instance is left unlimited. 41681da177e4SLinus Torvalds */ 41691751e8a6SLinus Torvalds if (!(sb->s_flags & SB_KERNMOUNT)) { 4170f3235626SDavid Howells if (!(ctx->seen & SHMEM_SEEN_BLOCKS)) 4171f3235626SDavid Howells ctx->blocks = shmem_default_max_blocks(); 4172f3235626SDavid Howells if (!(ctx->seen & SHMEM_SEEN_INODES)) 4173f3235626SDavid Howells ctx->inodes = shmem_default_max_inodes(); 4174ea3271f7SChris Down if (!(ctx->seen & SHMEM_SEEN_INUMS)) 4175ea3271f7SChris Down ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64); 41762c6efe9cSLuis Chamberlain sbinfo->noswap = ctx->noswap; 4177ca4e0519SAl Viro } else { 41781751e8a6SLinus Torvalds sb->s_flags |= SB_NOUSER; 41791da177e4SLinus Torvalds } 418091828a40SDavid M. Grimes sb->s_export_op = &shmem_export_ops; 418136f05cabSJeff Layton sb->s_flags |= SB_NOSEC | SB_I_VERSION; 41820edd73b3SHugh Dickins #else 41831751e8a6SLinus Torvalds sb->s_flags |= SB_NOUSER; 41840edd73b3SHugh Dickins #endif 4185f3235626SDavid Howells sbinfo->max_blocks = ctx->blocks; 4186f3235626SDavid Howells sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes; 4187e809d5f0SChris Down if (sb->s_flags & SB_KERNMOUNT) { 4188e809d5f0SChris Down sbinfo->ino_batch = alloc_percpu(ino_t); 4189e809d5f0SChris Down if (!sbinfo->ino_batch) 4190e809d5f0SChris Down goto failed; 4191e809d5f0SChris Down } 4192f3235626SDavid Howells sbinfo->uid = ctx->uid; 4193f3235626SDavid Howells sbinfo->gid = ctx->gid; 4194ea3271f7SChris Down sbinfo->full_inums = ctx->full_inums; 4195f3235626SDavid Howells sbinfo->mode = ctx->mode; 4196f3235626SDavid Howells sbinfo->huge = ctx->huge; 4197f3235626SDavid Howells sbinfo->mpol = ctx->mpol; 4198f3235626SDavid Howells ctx->mpol = NULL; 41991da177e4SLinus Torvalds 4200bf11b9a8SSebastian Andrzej Siewior raw_spin_lock_init(&sbinfo->stat_lock); 4201908c7f19STejun Heo if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) 4202602586a8SHugh Dickins goto failed; 4203779750d2SKirill A. Shutemov spin_lock_init(&sbinfo->shrinklist_lock); 4204779750d2SKirill A. Shutemov INIT_LIST_HEAD(&sbinfo->shrinklist); 42051da177e4SLinus Torvalds 4206285b2c4fSHugh Dickins sb->s_maxbytes = MAX_LFS_FILESIZE; 420709cbfeafSKirill A. Shutemov sb->s_blocksize = PAGE_SIZE; 420809cbfeafSKirill A. Shutemov sb->s_blocksize_bits = PAGE_SHIFT; 42091da177e4SLinus Torvalds sb->s_magic = TMPFS_MAGIC; 42101da177e4SLinus Torvalds sb->s_op = &shmem_ops; 4211cfd95a9cSRobin H. Johnson sb->s_time_gran = 1; 4212b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 421339f0247dSAndreas Gruenbacher sb->s_xattr = shmem_xattr_handlers; 4214b09e0fa4SEric Paris #endif 4215b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_POSIX_ACL 42161751e8a6SLinus Torvalds sb->s_flags |= SB_POSIXACL; 421739f0247dSAndreas Gruenbacher #endif 42182b4db796SAmir Goldstein uuid_gen(&sb->s_uuid); 42190edd73b3SHugh Dickins 4220e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 4221e09764cfSCarlos Maiolino if (ctx->seen & SHMEM_SEEN_QUOTA) { 4222e09764cfSCarlos Maiolino sb->dq_op = &shmem_quota_operations; 4223e09764cfSCarlos Maiolino sb->s_qcop = &dquot_quotactl_sysfile_ops; 4224e09764cfSCarlos Maiolino sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 4225e09764cfSCarlos Maiolino 4226*de4c0e7cSLukas Czerner /* Copy the default limits from ctx into sbinfo */ 4227*de4c0e7cSLukas Czerner memcpy(&sbinfo->qlimits, &ctx->qlimits, 4228*de4c0e7cSLukas Czerner sizeof(struct shmem_quota_limits)); 4229*de4c0e7cSLukas Czerner 4230e09764cfSCarlos Maiolino if (shmem_enable_quotas(sb, ctx->quota_types)) 4231e09764cfSCarlos Maiolino goto failed; 4232e09764cfSCarlos Maiolino } 4233e09764cfSCarlos Maiolino #endif /* CONFIG_TMPFS_QUOTA */ 4234e09764cfSCarlos Maiolino 42357a80e5b8SGiuseppe Scrivano inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0, 42367a80e5b8SGiuseppe Scrivano VM_NORESERVE); 423771480663SCarlos Maiolino if (IS_ERR(inode)) { 423871480663SCarlos Maiolino error = PTR_ERR(inode); 42391da177e4SLinus Torvalds goto failed; 424071480663SCarlos Maiolino } 4241680d794bSakpm@linux-foundation.org inode->i_uid = sbinfo->uid; 4242680d794bSakpm@linux-foundation.org inode->i_gid = sbinfo->gid; 4243318ceed0SAl Viro sb->s_root = d_make_root(inode); 4244318ceed0SAl Viro if (!sb->s_root) 424548fde701SAl Viro goto failed; 42461da177e4SLinus Torvalds return 0; 42471da177e4SLinus Torvalds 42481da177e4SLinus Torvalds failed: 42491da177e4SLinus Torvalds shmem_put_super(sb); 425071480663SCarlos Maiolino return error; 42511da177e4SLinus Torvalds } 42521da177e4SLinus Torvalds 4253f3235626SDavid Howells static int shmem_get_tree(struct fs_context *fc) 4254f3235626SDavid Howells { 4255f3235626SDavid Howells return get_tree_nodev(fc, shmem_fill_super); 4256f3235626SDavid Howells } 4257f3235626SDavid Howells 4258f3235626SDavid Howells static void shmem_free_fc(struct fs_context *fc) 4259f3235626SDavid Howells { 4260f3235626SDavid Howells struct shmem_options *ctx = fc->fs_private; 4261f3235626SDavid Howells 4262f3235626SDavid Howells if (ctx) { 4263f3235626SDavid Howells mpol_put(ctx->mpol); 4264f3235626SDavid Howells kfree(ctx); 4265f3235626SDavid Howells } 4266f3235626SDavid Howells } 4267f3235626SDavid Howells 4268f3235626SDavid Howells static const struct fs_context_operations shmem_fs_context_ops = { 4269f3235626SDavid Howells .free = shmem_free_fc, 4270f3235626SDavid Howells .get_tree = shmem_get_tree, 4271f3235626SDavid Howells #ifdef CONFIG_TMPFS 4272f3235626SDavid Howells .parse_monolithic = shmem_parse_options, 4273f3235626SDavid Howells .parse_param = shmem_parse_one, 4274f3235626SDavid Howells .reconfigure = shmem_reconfigure, 4275f3235626SDavid Howells #endif 4276f3235626SDavid Howells }; 4277f3235626SDavid Howells 4278fcc234f8SPekka Enberg static struct kmem_cache *shmem_inode_cachep; 42791da177e4SLinus Torvalds 42801da177e4SLinus Torvalds static struct inode *shmem_alloc_inode(struct super_block *sb) 42811da177e4SLinus Torvalds { 428241ffe5d5SHugh Dickins struct shmem_inode_info *info; 4283fd60b288SMuchun Song info = alloc_inode_sb(sb, shmem_inode_cachep, GFP_KERNEL); 428441ffe5d5SHugh Dickins if (!info) 42851da177e4SLinus Torvalds return NULL; 428641ffe5d5SHugh Dickins return &info->vfs_inode; 42871da177e4SLinus Torvalds } 42881da177e4SLinus Torvalds 428974b1da56SAl Viro static void shmem_free_in_core_inode(struct inode *inode) 4290fa0d7e3dSNick Piggin { 429184e710daSAl Viro if (S_ISLNK(inode->i_mode)) 42923ed47db3SAl Viro kfree(inode->i_link); 4293fa0d7e3dSNick Piggin kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 4294fa0d7e3dSNick Piggin } 4295fa0d7e3dSNick Piggin 42961da177e4SLinus Torvalds static void shmem_destroy_inode(struct inode *inode) 42971da177e4SLinus Torvalds { 429809208d15SAl Viro if (S_ISREG(inode->i_mode)) 42991da177e4SLinus Torvalds mpol_free_shared_policy(&SHMEM_I(inode)->policy); 43001da177e4SLinus Torvalds } 43011da177e4SLinus Torvalds 430241ffe5d5SHugh Dickins static void shmem_init_inode(void *foo) 43031da177e4SLinus Torvalds { 430441ffe5d5SHugh Dickins struct shmem_inode_info *info = foo; 430541ffe5d5SHugh Dickins inode_init_once(&info->vfs_inode); 43061da177e4SLinus Torvalds } 43071da177e4SLinus Torvalds 43089a8ec03eSweiping zhang static void shmem_init_inodecache(void) 43091da177e4SLinus Torvalds { 43101da177e4SLinus Torvalds shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", 43111da177e4SLinus Torvalds sizeof(struct shmem_inode_info), 43125d097056SVladimir Davydov 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); 43131da177e4SLinus Torvalds } 43141da177e4SLinus Torvalds 431541ffe5d5SHugh Dickins static void shmem_destroy_inodecache(void) 43161da177e4SLinus Torvalds { 43171a1d92c1SAlexey Dobriyan kmem_cache_destroy(shmem_inode_cachep); 43181da177e4SLinus Torvalds } 43191da177e4SLinus Torvalds 4320a7605426SYang Shi /* Keep the page in page cache instead of truncating it */ 4321a7605426SYang Shi static int shmem_error_remove_page(struct address_space *mapping, 4322a7605426SYang Shi struct page *page) 4323a7605426SYang Shi { 4324a7605426SYang Shi return 0; 4325a7605426SYang Shi } 4326a7605426SYang Shi 432730e6a51dSHui Su const struct address_space_operations shmem_aops = { 43281da177e4SLinus Torvalds .writepage = shmem_writepage, 432946de8b97SMatthew Wilcox (Oracle) .dirty_folio = noop_dirty_folio, 43301da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 4331800d15a5SNick Piggin .write_begin = shmem_write_begin, 4332800d15a5SNick Piggin .write_end = shmem_write_end, 43331da177e4SLinus Torvalds #endif 43341c93923cSAndrew Morton #ifdef CONFIG_MIGRATION 433554184650SMatthew Wilcox (Oracle) .migrate_folio = migrate_folio, 43361c93923cSAndrew Morton #endif 4337a7605426SYang Shi .error_remove_page = shmem_error_remove_page, 43381da177e4SLinus Torvalds }; 433930e6a51dSHui Su EXPORT_SYMBOL(shmem_aops); 43401da177e4SLinus Torvalds 434115ad7cdcSHelge Deller static const struct file_operations shmem_file_operations = { 43421da177e4SLinus Torvalds .mmap = shmem_mmap, 4343a5454f95SThomas Weißschuh .open = generic_file_open, 4344c01d5b30SHugh Dickins .get_unmapped_area = shmem_get_unmapped_area, 43451da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 4346220f2ac9SHugh Dickins .llseek = shmem_file_llseek, 43472ba5bbedSAl Viro .read_iter = shmem_file_read_iter, 43488174202bSAl Viro .write_iter = generic_file_write_iter, 43491b061d92SChristoph Hellwig .fsync = noop_fsync, 4350bd194b18SDavid Howells .splice_read = shmem_file_splice_read, 4351f6cb85d0SAl Viro .splice_write = iter_file_splice_write, 435283e4fa9cSHugh Dickins .fallocate = shmem_fallocate, 43531da177e4SLinus Torvalds #endif 43541da177e4SLinus Torvalds }; 43551da177e4SLinus Torvalds 435692e1d5beSArjan van de Ven static const struct inode_operations shmem_inode_operations = { 435744a30220SYu Zhao .getattr = shmem_getattr, 435894c1e62dSHugh Dickins .setattr = shmem_setattr, 4359b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 4360b09e0fa4SEric Paris .listxattr = shmem_listxattr, 4361feda821eSChristoph Hellwig .set_acl = simple_set_acl, 4362e408e695STheodore Ts'o .fileattr_get = shmem_fileattr_get, 4363e408e695STheodore Ts'o .fileattr_set = shmem_fileattr_set, 4364b09e0fa4SEric Paris #endif 43651da177e4SLinus Torvalds }; 43661da177e4SLinus Torvalds 436792e1d5beSArjan van de Ven static const struct inode_operations shmem_dir_inode_operations = { 43681da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 4369f7cd16a5SXavier Roche .getattr = shmem_getattr, 43701da177e4SLinus Torvalds .create = shmem_create, 43711da177e4SLinus Torvalds .lookup = simple_lookup, 43721da177e4SLinus Torvalds .link = shmem_link, 43731da177e4SLinus Torvalds .unlink = shmem_unlink, 43741da177e4SLinus Torvalds .symlink = shmem_symlink, 43751da177e4SLinus Torvalds .mkdir = shmem_mkdir, 43761da177e4SLinus Torvalds .rmdir = shmem_rmdir, 43771da177e4SLinus Torvalds .mknod = shmem_mknod, 43782773bf00SMiklos Szeredi .rename = shmem_rename2, 437960545d0dSAl Viro .tmpfile = shmem_tmpfile, 43801da177e4SLinus Torvalds #endif 4381b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 4382b09e0fa4SEric Paris .listxattr = shmem_listxattr, 4383e408e695STheodore Ts'o .fileattr_get = shmem_fileattr_get, 4384e408e695STheodore Ts'o .fileattr_set = shmem_fileattr_set, 4385b09e0fa4SEric Paris #endif 438639f0247dSAndreas Gruenbacher #ifdef CONFIG_TMPFS_POSIX_ACL 438794c1e62dSHugh Dickins .setattr = shmem_setattr, 4388feda821eSChristoph Hellwig .set_acl = simple_set_acl, 438939f0247dSAndreas Gruenbacher #endif 439039f0247dSAndreas Gruenbacher }; 439139f0247dSAndreas Gruenbacher 439292e1d5beSArjan van de Ven static const struct inode_operations shmem_special_inode_operations = { 4393f7cd16a5SXavier Roche .getattr = shmem_getattr, 4394b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 4395b09e0fa4SEric Paris .listxattr = shmem_listxattr, 4396b09e0fa4SEric Paris #endif 439739f0247dSAndreas Gruenbacher #ifdef CONFIG_TMPFS_POSIX_ACL 439894c1e62dSHugh Dickins .setattr = shmem_setattr, 4399feda821eSChristoph Hellwig .set_acl = simple_set_acl, 440039f0247dSAndreas Gruenbacher #endif 44011da177e4SLinus Torvalds }; 44021da177e4SLinus Torvalds 4403759b9775SHugh Dickins static const struct super_operations shmem_ops = { 44041da177e4SLinus Torvalds .alloc_inode = shmem_alloc_inode, 440574b1da56SAl Viro .free_inode = shmem_free_in_core_inode, 44061da177e4SLinus Torvalds .destroy_inode = shmem_destroy_inode, 44071da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 44081da177e4SLinus Torvalds .statfs = shmem_statfs, 4409680d794bSakpm@linux-foundation.org .show_options = shmem_show_options, 44101da177e4SLinus Torvalds #endif 4411e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 4412e09764cfSCarlos Maiolino .get_dquots = shmem_get_dquots, 4413e09764cfSCarlos Maiolino #endif 44141f895f75SAl Viro .evict_inode = shmem_evict_inode, 44151da177e4SLinus Torvalds .drop_inode = generic_delete_inode, 44161da177e4SLinus Torvalds .put_super = shmem_put_super, 4417396bcc52SMatthew Wilcox (Oracle) #ifdef CONFIG_TRANSPARENT_HUGEPAGE 4418779750d2SKirill A. Shutemov .nr_cached_objects = shmem_unused_huge_count, 4419779750d2SKirill A. Shutemov .free_cached_objects = shmem_unused_huge_scan, 4420779750d2SKirill A. Shutemov #endif 44211da177e4SLinus Torvalds }; 44221da177e4SLinus Torvalds 4423f0f37e2fSAlexey Dobriyan static const struct vm_operations_struct shmem_vm_ops = { 442454cb8821SNick Piggin .fault = shmem_fault, 4425d7c17551SNing Qu .map_pages = filemap_map_pages, 44261da177e4SLinus Torvalds #ifdef CONFIG_NUMA 44271da177e4SLinus Torvalds .set_policy = shmem_set_policy, 44281da177e4SLinus Torvalds .get_policy = shmem_get_policy, 44291da177e4SLinus Torvalds #endif 44301da177e4SLinus Torvalds }; 44311da177e4SLinus Torvalds 4432d09e8ca6SPasha Tatashin static const struct vm_operations_struct shmem_anon_vm_ops = { 4433d09e8ca6SPasha Tatashin .fault = shmem_fault, 4434d09e8ca6SPasha Tatashin .map_pages = filemap_map_pages, 4435d09e8ca6SPasha Tatashin #ifdef CONFIG_NUMA 4436d09e8ca6SPasha Tatashin .set_policy = shmem_set_policy, 4437d09e8ca6SPasha Tatashin .get_policy = shmem_get_policy, 4438d09e8ca6SPasha Tatashin #endif 4439d09e8ca6SPasha Tatashin }; 4440d09e8ca6SPasha Tatashin 4441f3235626SDavid Howells int shmem_init_fs_context(struct fs_context *fc) 44421da177e4SLinus Torvalds { 4443f3235626SDavid Howells struct shmem_options *ctx; 4444f3235626SDavid Howells 4445f3235626SDavid Howells ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL); 4446f3235626SDavid Howells if (!ctx) 4447f3235626SDavid Howells return -ENOMEM; 4448f3235626SDavid Howells 4449f3235626SDavid Howells ctx->mode = 0777 | S_ISVTX; 4450f3235626SDavid Howells ctx->uid = current_fsuid(); 4451f3235626SDavid Howells ctx->gid = current_fsgid(); 4452f3235626SDavid Howells 4453f3235626SDavid Howells fc->fs_private = ctx; 4454f3235626SDavid Howells fc->ops = &shmem_fs_context_ops; 4455f3235626SDavid Howells return 0; 44561da177e4SLinus Torvalds } 44571da177e4SLinus Torvalds 445841ffe5d5SHugh Dickins static struct file_system_type shmem_fs_type = { 44591da177e4SLinus Torvalds .owner = THIS_MODULE, 44601da177e4SLinus Torvalds .name = "tmpfs", 4461f3235626SDavid Howells .init_fs_context = shmem_init_fs_context, 4462f3235626SDavid Howells #ifdef CONFIG_TMPFS 4463d7167b14SAl Viro .parameters = shmem_fs_parameters, 4464f3235626SDavid Howells #endif 44651da177e4SLinus Torvalds .kill_sb = kill_litter_super, 44667a80e5b8SGiuseppe Scrivano #ifdef CONFIG_SHMEM 44677a80e5b8SGiuseppe Scrivano .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 44687a80e5b8SGiuseppe Scrivano #else 4469ff36da69SMatthew Wilcox (Oracle) .fs_flags = FS_USERNS_MOUNT, 44707a80e5b8SGiuseppe Scrivano #endif 44711da177e4SLinus Torvalds }; 44721da177e4SLinus Torvalds 44739096bbe9SMiaohe Lin void __init shmem_init(void) 44741da177e4SLinus Torvalds { 44751da177e4SLinus Torvalds int error; 44761da177e4SLinus Torvalds 44779a8ec03eSweiping zhang shmem_init_inodecache(); 44781da177e4SLinus Torvalds 4479e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 4480e09764cfSCarlos Maiolino error = register_quota_format(&shmem_quota_format); 4481e09764cfSCarlos Maiolino if (error < 0) { 4482e09764cfSCarlos Maiolino pr_err("Could not register quota format\n"); 4483e09764cfSCarlos Maiolino goto out3; 4484e09764cfSCarlos Maiolino } 4485e09764cfSCarlos Maiolino #endif 4486e09764cfSCarlos Maiolino 448741ffe5d5SHugh Dickins error = register_filesystem(&shmem_fs_type); 44881da177e4SLinus Torvalds if (error) { 44891170532bSJoe Perches pr_err("Could not register tmpfs\n"); 44901da177e4SLinus Torvalds goto out2; 44911da177e4SLinus Torvalds } 449295dc112aSGreg Kroah-Hartman 4493ca4e0519SAl Viro shm_mnt = kern_mount(&shmem_fs_type); 44941da177e4SLinus Torvalds if (IS_ERR(shm_mnt)) { 44951da177e4SLinus Torvalds error = PTR_ERR(shm_mnt); 44961170532bSJoe Perches pr_err("Could not kern_mount tmpfs\n"); 44971da177e4SLinus Torvalds goto out1; 44981da177e4SLinus Torvalds } 44995a6e75f8SKirill A. Shutemov 4500396bcc52SMatthew Wilcox (Oracle) #ifdef CONFIG_TRANSPARENT_HUGEPAGE 4501435c0b87SKirill A. Shutemov if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) 45025a6e75f8SKirill A. Shutemov SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 45035a6e75f8SKirill A. Shutemov else 45045e6e5a12SHugh Dickins shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */ 45055a6e75f8SKirill A. Shutemov #endif 45069096bbe9SMiaohe Lin return; 45071da177e4SLinus Torvalds 45081da177e4SLinus Torvalds out1: 450941ffe5d5SHugh Dickins unregister_filesystem(&shmem_fs_type); 45101da177e4SLinus Torvalds out2: 4511e09764cfSCarlos Maiolino #ifdef CONFIG_TMPFS_QUOTA 4512e09764cfSCarlos Maiolino unregister_quota_format(&shmem_quota_format); 4513e09764cfSCarlos Maiolino out3: 4514e09764cfSCarlos Maiolino #endif 451541ffe5d5SHugh Dickins shmem_destroy_inodecache(); 45161da177e4SLinus Torvalds shm_mnt = ERR_PTR(error); 45171da177e4SLinus Torvalds } 4518853ac43aSMatt Mackall 4519396bcc52SMatthew Wilcox (Oracle) #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) 45205a6e75f8SKirill A. Shutemov static ssize_t shmem_enabled_show(struct kobject *kobj, 45215a6e75f8SKirill A. Shutemov struct kobj_attribute *attr, char *buf) 45225a6e75f8SKirill A. Shutemov { 452326083eb6SColin Ian King static const int values[] = { 45245a6e75f8SKirill A. Shutemov SHMEM_HUGE_ALWAYS, 45255a6e75f8SKirill A. Shutemov SHMEM_HUGE_WITHIN_SIZE, 45265a6e75f8SKirill A. Shutemov SHMEM_HUGE_ADVISE, 45275a6e75f8SKirill A. Shutemov SHMEM_HUGE_NEVER, 45285a6e75f8SKirill A. Shutemov SHMEM_HUGE_DENY, 45295a6e75f8SKirill A. Shutemov SHMEM_HUGE_FORCE, 45305a6e75f8SKirill A. Shutemov }; 453179d4d38aSJoe Perches int len = 0; 453279d4d38aSJoe Perches int i; 45335a6e75f8SKirill A. Shutemov 453479d4d38aSJoe Perches for (i = 0; i < ARRAY_SIZE(values); i++) { 453579d4d38aSJoe Perches len += sysfs_emit_at(buf, len, 453679d4d38aSJoe Perches shmem_huge == values[i] ? "%s[%s]" : "%s%s", 453779d4d38aSJoe Perches i ? " " : "", 45385a6e75f8SKirill A. Shutemov shmem_format_huge(values[i])); 45395a6e75f8SKirill A. Shutemov } 454079d4d38aSJoe Perches 454179d4d38aSJoe Perches len += sysfs_emit_at(buf, len, "\n"); 454279d4d38aSJoe Perches 454379d4d38aSJoe Perches return len; 45445a6e75f8SKirill A. Shutemov } 45455a6e75f8SKirill A. Shutemov 45465a6e75f8SKirill A. Shutemov static ssize_t shmem_enabled_store(struct kobject *kobj, 45475a6e75f8SKirill A. Shutemov struct kobj_attribute *attr, const char *buf, size_t count) 45485a6e75f8SKirill A. Shutemov { 45495a6e75f8SKirill A. Shutemov char tmp[16]; 45505a6e75f8SKirill A. Shutemov int huge; 45515a6e75f8SKirill A. Shutemov 45525a6e75f8SKirill A. Shutemov if (count + 1 > sizeof(tmp)) 45535a6e75f8SKirill A. Shutemov return -EINVAL; 45545a6e75f8SKirill A. Shutemov memcpy(tmp, buf, count); 45555a6e75f8SKirill A. Shutemov tmp[count] = '\0'; 45565a6e75f8SKirill A. Shutemov if (count && tmp[count - 1] == '\n') 45575a6e75f8SKirill A. Shutemov tmp[count - 1] = '\0'; 45585a6e75f8SKirill A. Shutemov 45595a6e75f8SKirill A. Shutemov huge = shmem_parse_huge(tmp); 45605a6e75f8SKirill A. Shutemov if (huge == -EINVAL) 45615a6e75f8SKirill A. Shutemov return -EINVAL; 45625a6e75f8SKirill A. Shutemov if (!has_transparent_hugepage() && 45635a6e75f8SKirill A. Shutemov huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) 45645a6e75f8SKirill A. Shutemov return -EINVAL; 45655a6e75f8SKirill A. Shutemov 45665a6e75f8SKirill A. Shutemov shmem_huge = huge; 4567435c0b87SKirill A. Shutemov if (shmem_huge > SHMEM_HUGE_DENY) 45685a6e75f8SKirill A. Shutemov SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 45695a6e75f8SKirill A. Shutemov return count; 45705a6e75f8SKirill A. Shutemov } 45715a6e75f8SKirill A. Shutemov 45724bfa8adaSMiaohe Lin struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled); 4573396bcc52SMatthew Wilcox (Oracle) #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ 4574f3f0e1d2SKirill A. Shutemov 4575853ac43aSMatt Mackall #else /* !CONFIG_SHMEM */ 4576853ac43aSMatt Mackall 4577853ac43aSMatt Mackall /* 4578853ac43aSMatt Mackall * tiny-shmem: simple shmemfs and tmpfs using ramfs code 4579853ac43aSMatt Mackall * 4580853ac43aSMatt Mackall * This is intended for small system where the benefits of the full 4581853ac43aSMatt Mackall * shmem code (swap-backed and resource-limited) are outweighed by 4582853ac43aSMatt Mackall * their complexity. On systems without swap this code should be 4583853ac43aSMatt Mackall * effectively equivalent, but much lighter weight. 4584853ac43aSMatt Mackall */ 4585853ac43aSMatt Mackall 458641ffe5d5SHugh Dickins static struct file_system_type shmem_fs_type = { 4587853ac43aSMatt Mackall .name = "tmpfs", 4588f3235626SDavid Howells .init_fs_context = ramfs_init_fs_context, 4589d7167b14SAl Viro .parameters = ramfs_fs_parameters, 459036ce9d76SRoberto Sassu .kill_sb = ramfs_kill_sb, 45912b8576cbSEric W. Biederman .fs_flags = FS_USERNS_MOUNT, 4592853ac43aSMatt Mackall }; 4593853ac43aSMatt Mackall 45949096bbe9SMiaohe Lin void __init shmem_init(void) 4595853ac43aSMatt Mackall { 459641ffe5d5SHugh Dickins BUG_ON(register_filesystem(&shmem_fs_type) != 0); 4597853ac43aSMatt Mackall 459841ffe5d5SHugh Dickins shm_mnt = kern_mount(&shmem_fs_type); 4599853ac43aSMatt Mackall BUG_ON(IS_ERR(shm_mnt)); 4600853ac43aSMatt Mackall } 4601853ac43aSMatt Mackall 460210a9c496SChristoph Hellwig int shmem_unuse(unsigned int type) 4603853ac43aSMatt Mackall { 4604853ac43aSMatt Mackall return 0; 4605853ac43aSMatt Mackall } 4606853ac43aSMatt Mackall 4607d7c9e99aSAlexey Gladkov int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) 46083f96b79aSHugh Dickins { 46093f96b79aSHugh Dickins return 0; 46103f96b79aSHugh Dickins } 46113f96b79aSHugh Dickins 461224513264SHugh Dickins void shmem_unlock_mapping(struct address_space *mapping) 461324513264SHugh Dickins { 461424513264SHugh Dickins } 461524513264SHugh Dickins 4616c01d5b30SHugh Dickins #ifdef CONFIG_MMU 4617c01d5b30SHugh Dickins unsigned long shmem_get_unmapped_area(struct file *file, 4618c01d5b30SHugh Dickins unsigned long addr, unsigned long len, 4619c01d5b30SHugh Dickins unsigned long pgoff, unsigned long flags) 4620c01d5b30SHugh Dickins { 4621c01d5b30SHugh Dickins return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); 4622c01d5b30SHugh Dickins } 4623c01d5b30SHugh Dickins #endif 4624c01d5b30SHugh Dickins 462541ffe5d5SHugh Dickins void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 462694c1e62dSHugh Dickins { 462741ffe5d5SHugh Dickins truncate_inode_pages_range(inode->i_mapping, lstart, lend); 462894c1e62dSHugh Dickins } 462994c1e62dSHugh Dickins EXPORT_SYMBOL_GPL(shmem_truncate_range); 463094c1e62dSHugh Dickins 4631853ac43aSMatt Mackall #define shmem_vm_ops generic_file_vm_ops 4632d09e8ca6SPasha Tatashin #define shmem_anon_vm_ops generic_file_vm_ops 46330b0a0806SHugh Dickins #define shmem_file_operations ramfs_file_operations 46340b0a0806SHugh Dickins #define shmem_acct_size(flags, size) 0 46350b0a0806SHugh Dickins #define shmem_unacct_size(flags, size) do {} while (0) 4636853ac43aSMatt Mackall 463771480663SCarlos Maiolino static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir, 463871480663SCarlos Maiolino umode_t mode, dev_t dev, unsigned long flags) 463971480663SCarlos Maiolino { 464071480663SCarlos Maiolino struct inode *inode = ramfs_get_inode(sb, dir, mode, dev); 464171480663SCarlos Maiolino return inode ? inode : ERR_PTR(-ENOSPC); 464271480663SCarlos Maiolino } 464371480663SCarlos Maiolino 4644853ac43aSMatt Mackall #endif /* CONFIG_SHMEM */ 4645853ac43aSMatt Mackall 4646853ac43aSMatt Mackall /* common code */ 46471da177e4SLinus Torvalds 4648703321b6SMatthew Auld static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, 4649c7277090SEric Paris unsigned long flags, unsigned int i_flags) 46501da177e4SLinus Torvalds { 46511da177e4SLinus Torvalds struct inode *inode; 465293dec2daSAl Viro struct file *res; 46531da177e4SLinus Torvalds 4654703321b6SMatthew Auld if (IS_ERR(mnt)) 4655703321b6SMatthew Auld return ERR_CAST(mnt); 46561da177e4SLinus Torvalds 4657285b2c4fSHugh Dickins if (size < 0 || size > MAX_LFS_FILESIZE) 46581da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 46591da177e4SLinus Torvalds 46601da177e4SLinus Torvalds if (shmem_acct_size(flags, size)) 46611da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 46621da177e4SLinus Torvalds 46637a80e5b8SGiuseppe Scrivano if (is_idmapped_mnt(mnt)) 46647a80e5b8SGiuseppe Scrivano return ERR_PTR(-EINVAL); 46657a80e5b8SGiuseppe Scrivano 46667a80e5b8SGiuseppe Scrivano inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL, 46677a80e5b8SGiuseppe Scrivano S_IFREG | S_IRWXUGO, 0, flags); 466871480663SCarlos Maiolino 466971480663SCarlos Maiolino if (IS_ERR(inode)) { 4670dac2d1f6SAl Viro shmem_unacct_size(flags, size); 467171480663SCarlos Maiolino return ERR_CAST(inode); 4672dac2d1f6SAl Viro } 4673c7277090SEric Paris inode->i_flags |= i_flags; 46741da177e4SLinus Torvalds inode->i_size = size; 46756d6b77f1SMiklos Szeredi clear_nlink(inode); /* It is unlinked */ 467626567cdbSAl Viro res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); 467793dec2daSAl Viro if (!IS_ERR(res)) 467893dec2daSAl Viro res = alloc_file_pseudo(inode, mnt, name, O_RDWR, 46794b42af81SAl Viro &shmem_file_operations); 46806b4d0b27SAl Viro if (IS_ERR(res)) 468193dec2daSAl Viro iput(inode); 46826b4d0b27SAl Viro return res; 46831da177e4SLinus Torvalds } 4684c7277090SEric Paris 4685c7277090SEric Paris /** 4686c7277090SEric Paris * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be 4687c7277090SEric Paris * kernel internal. There will be NO LSM permission checks against the 4688c7277090SEric Paris * underlying inode. So users of this interface must do LSM checks at a 4689e1832f29SStephen Smalley * higher layer. The users are the big_key and shm implementations. LSM 4690e1832f29SStephen Smalley * checks are provided at the key or shm level rather than the inode. 4691c7277090SEric Paris * @name: name for dentry (to be seen in /proc/<pid>/maps 4692c7277090SEric Paris * @size: size to be set for the file 4693c7277090SEric Paris * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 4694c7277090SEric Paris */ 4695c7277090SEric Paris struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) 4696c7277090SEric Paris { 4697703321b6SMatthew Auld return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); 4698c7277090SEric Paris } 4699c7277090SEric Paris 4700c7277090SEric Paris /** 4701c7277090SEric Paris * shmem_file_setup - get an unlinked file living in tmpfs 4702c7277090SEric Paris * @name: name for dentry (to be seen in /proc/<pid>/maps 4703c7277090SEric Paris * @size: size to be set for the file 4704c7277090SEric Paris * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 4705c7277090SEric Paris */ 4706c7277090SEric Paris struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) 4707c7277090SEric Paris { 4708703321b6SMatthew Auld return __shmem_file_setup(shm_mnt, name, size, flags, 0); 4709c7277090SEric Paris } 4710395e0ddcSKeith Packard EXPORT_SYMBOL_GPL(shmem_file_setup); 47111da177e4SLinus Torvalds 471246711810SRandy Dunlap /** 4713703321b6SMatthew Auld * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs 4714703321b6SMatthew Auld * @mnt: the tmpfs mount where the file will be created 4715703321b6SMatthew Auld * @name: name for dentry (to be seen in /proc/<pid>/maps 4716703321b6SMatthew Auld * @size: size to be set for the file 4717703321b6SMatthew Auld * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 4718703321b6SMatthew Auld */ 4719703321b6SMatthew Auld struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, 4720703321b6SMatthew Auld loff_t size, unsigned long flags) 4721703321b6SMatthew Auld { 4722703321b6SMatthew Auld return __shmem_file_setup(mnt, name, size, flags, 0); 4723703321b6SMatthew Auld } 4724703321b6SMatthew Auld EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); 4725703321b6SMatthew Auld 4726703321b6SMatthew Auld /** 47271da177e4SLinus Torvalds * shmem_zero_setup - setup a shared anonymous mapping 472845e55300SPeter Collingbourne * @vma: the vma to be mmapped is prepared by do_mmap 47291da177e4SLinus Torvalds */ 47301da177e4SLinus Torvalds int shmem_zero_setup(struct vm_area_struct *vma) 47311da177e4SLinus Torvalds { 47321da177e4SLinus Torvalds struct file *file; 47331da177e4SLinus Torvalds loff_t size = vma->vm_end - vma->vm_start; 47341da177e4SLinus Torvalds 473566fc1303SHugh Dickins /* 4736c1e8d7c6SMichel Lespinasse * Cloning a new file under mmap_lock leads to a lock ordering conflict 473766fc1303SHugh Dickins * between XFS directory reading and selinux: since this file is only 473866fc1303SHugh Dickins * accessible to the user through its mapping, use S_PRIVATE flag to 473966fc1303SHugh Dickins * bypass file security, in the same way as shmem_kernel_file_setup(). 474066fc1303SHugh Dickins */ 4741703321b6SMatthew Auld file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); 47421da177e4SLinus Torvalds if (IS_ERR(file)) 47431da177e4SLinus Torvalds return PTR_ERR(file); 47441da177e4SLinus Torvalds 47451da177e4SLinus Torvalds if (vma->vm_file) 47461da177e4SLinus Torvalds fput(vma->vm_file); 47471da177e4SLinus Torvalds vma->vm_file = file; 4748d09e8ca6SPasha Tatashin vma->vm_ops = &shmem_anon_vm_ops; 4749f3f0e1d2SKirill A. Shutemov 47501da177e4SLinus Torvalds return 0; 47511da177e4SLinus Torvalds } 4752d9d90e5eSHugh Dickins 4753d9d90e5eSHugh Dickins /** 4754f01b2b3eSMatthew Wilcox (Oracle) * shmem_read_folio_gfp - read into page cache, using specified page allocation flags. 4755f01b2b3eSMatthew Wilcox (Oracle) * @mapping: the folio's address_space 4756f01b2b3eSMatthew Wilcox (Oracle) * @index: the folio index 4757d9d90e5eSHugh Dickins * @gfp: the page allocator flags to use if allocating 4758d9d90e5eSHugh Dickins * 4759d9d90e5eSHugh Dickins * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", 4760d9d90e5eSHugh Dickins * with any new page allocations done using the specified allocation flags. 47617e0a1265SMatthew Wilcox (Oracle) * But read_cache_page_gfp() uses the ->read_folio() method: which does not 4762d9d90e5eSHugh Dickins * suit tmpfs, since it may have pages in swapcache, and needs to find those 4763d9d90e5eSHugh Dickins * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. 4764d9d90e5eSHugh Dickins * 476568da9f05SHugh Dickins * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in 476668da9f05SHugh Dickins * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily. 4767d9d90e5eSHugh Dickins */ 4768f01b2b3eSMatthew Wilcox (Oracle) struct folio *shmem_read_folio_gfp(struct address_space *mapping, 4769d9d90e5eSHugh Dickins pgoff_t index, gfp_t gfp) 4770d9d90e5eSHugh Dickins { 477168da9f05SHugh Dickins #ifdef CONFIG_SHMEM 477268da9f05SHugh Dickins struct inode *inode = mapping->host; 4773a3a9c397SMatthew Wilcox (Oracle) struct folio *folio; 477468da9f05SHugh Dickins int error; 477568da9f05SHugh Dickins 477630e6a51dSHui Su BUG_ON(!shmem_mapping(mapping)); 4777a3a9c397SMatthew Wilcox (Oracle) error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE, 4778cfda0526SMike Rapoport gfp, NULL, NULL, NULL); 477968da9f05SHugh Dickins if (error) 4780a7605426SYang Shi return ERR_PTR(error); 4781a7605426SYang Shi 4782a3a9c397SMatthew Wilcox (Oracle) folio_unlock(folio); 4783f01b2b3eSMatthew Wilcox (Oracle) return folio; 4784f01b2b3eSMatthew Wilcox (Oracle) #else 4785f01b2b3eSMatthew Wilcox (Oracle) /* 4786f01b2b3eSMatthew Wilcox (Oracle) * The tiny !SHMEM case uses ramfs without swap 4787f01b2b3eSMatthew Wilcox (Oracle) */ 4788f01b2b3eSMatthew Wilcox (Oracle) return mapping_read_folio_gfp(mapping, index, gfp); 4789f01b2b3eSMatthew Wilcox (Oracle) #endif 4790f01b2b3eSMatthew Wilcox (Oracle) } 4791f01b2b3eSMatthew Wilcox (Oracle) EXPORT_SYMBOL_GPL(shmem_read_folio_gfp); 4792f01b2b3eSMatthew Wilcox (Oracle) 4793f01b2b3eSMatthew Wilcox (Oracle) struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, 4794f01b2b3eSMatthew Wilcox (Oracle) pgoff_t index, gfp_t gfp) 4795f01b2b3eSMatthew Wilcox (Oracle) { 4796f01b2b3eSMatthew Wilcox (Oracle) struct folio *folio = shmem_read_folio_gfp(mapping, index, gfp); 4797f01b2b3eSMatthew Wilcox (Oracle) struct page *page; 4798f01b2b3eSMatthew Wilcox (Oracle) 4799f01b2b3eSMatthew Wilcox (Oracle) if (IS_ERR(folio)) 4800f01b2b3eSMatthew Wilcox (Oracle) return &folio->page; 4801f01b2b3eSMatthew Wilcox (Oracle) 4802a3a9c397SMatthew Wilcox (Oracle) page = folio_file_page(folio, index); 4803a7605426SYang Shi if (PageHWPoison(page)) { 4804a3a9c397SMatthew Wilcox (Oracle) folio_put(folio); 4805a7605426SYang Shi return ERR_PTR(-EIO); 4806a7605426SYang Shi } 4807a7605426SYang Shi 480868da9f05SHugh Dickins return page; 4809d9d90e5eSHugh Dickins } 4810d9d90e5eSHugh Dickins EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); 4811