11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Resizable virtual memory filesystem for Linux. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2000 Linus Torvalds. 51da177e4SLinus Torvalds * 2000 Transmeta Corp. 61da177e4SLinus Torvalds * 2000-2001 Christoph Rohland 71da177e4SLinus Torvalds * 2000-2001 SAP AG 81da177e4SLinus Torvalds * 2002 Red Hat Inc. 96922c0c7SHugh Dickins * Copyright (C) 2002-2011 Hugh Dickins. 106922c0c7SHugh Dickins * Copyright (C) 2011 Google Inc. 110edd73b3SHugh Dickins * Copyright (C) 2002-2005 VERITAS Software Corporation. 121da177e4SLinus Torvalds * Copyright (C) 2004 Andi Kleen, SuSE Labs 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * Extended attribute support for tmpfs: 151da177e4SLinus Torvalds * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> 161da177e4SLinus Torvalds * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 171da177e4SLinus Torvalds * 18853ac43aSMatt Mackall * tiny-shmem: 19853ac43aSMatt Mackall * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com> 20853ac43aSMatt Mackall * 211da177e4SLinus Torvalds * This file is released under the GPL. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 24853ac43aSMatt Mackall #include <linux/fs.h> 25853ac43aSMatt Mackall #include <linux/init.h> 26853ac43aSMatt Mackall #include <linux/vfs.h> 27853ac43aSMatt Mackall #include <linux/mount.h> 28250297edSAndrew Morton #include <linux/ramfs.h> 29caefba17SHugh Dickins #include <linux/pagemap.h> 30853ac43aSMatt Mackall #include <linux/file.h> 31853ac43aSMatt Mackall #include <linux/mm.h> 32b95f1b31SPaul Gortmaker #include <linux/export.h> 33853ac43aSMatt Mackall #include <linux/swap.h> 34e2e40f2cSChristoph Hellwig #include <linux/uio.h> 35853ac43aSMatt Mackall 36853ac43aSMatt Mackall static struct vfsmount *shm_mnt; 37853ac43aSMatt Mackall 38853ac43aSMatt Mackall #ifdef CONFIG_SHMEM 391da177e4SLinus Torvalds /* 401da177e4SLinus Torvalds * This virtual memory filesystem is heavily based on the ramfs. It 411da177e4SLinus Torvalds * extends ramfs by the ability to use swap and honor resource limits 421da177e4SLinus Torvalds * which makes it a completely usable filesystem. 431da177e4SLinus Torvalds */ 441da177e4SLinus Torvalds 4539f0247dSAndreas Gruenbacher #include <linux/xattr.h> 46a5694255SChristoph Hellwig #include <linux/exportfs.h> 471c7c474cSChristoph Hellwig #include <linux/posix_acl.h> 48feda821eSChristoph Hellwig #include <linux/posix_acl_xattr.h> 491da177e4SLinus Torvalds #include <linux/mman.h> 501da177e4SLinus Torvalds #include <linux/string.h> 511da177e4SLinus Torvalds #include <linux/slab.h> 521da177e4SLinus Torvalds #include <linux/backing-dev.h> 531da177e4SLinus Torvalds #include <linux/shmem_fs.h> 541da177e4SLinus Torvalds #include <linux/writeback.h> 551da177e4SLinus Torvalds #include <linux/blkdev.h> 56bda97eabSHugh Dickins #include <linux/pagevec.h> 5741ffe5d5SHugh Dickins #include <linux/percpu_counter.h> 5883e4fa9cSHugh Dickins #include <linux/falloc.h> 59708e3508SHugh Dickins #include <linux/splice.h> 601da177e4SLinus Torvalds #include <linux/security.h> 611da177e4SLinus Torvalds #include <linux/swapops.h> 621da177e4SLinus Torvalds #include <linux/mempolicy.h> 631da177e4SLinus Torvalds #include <linux/namei.h> 64b00dc3adSHugh Dickins #include <linux/ctype.h> 65304dbdb7SLee Schermerhorn #include <linux/migrate.h> 66c1f60a5aSChristoph Lameter #include <linux/highmem.h> 67680d794bSakpm@linux-foundation.org #include <linux/seq_file.h> 6892562927SMimi Zohar #include <linux/magic.h> 699183df25SDavid Herrmann #include <linux/syscalls.h> 7040e041a2SDavid Herrmann #include <linux/fcntl.h> 719183df25SDavid Herrmann #include <uapi/linux/memfd.h> 72304dbdb7SLee Schermerhorn 731da177e4SLinus Torvalds #include <asm/uaccess.h> 741da177e4SLinus Torvalds #include <asm/pgtable.h> 751da177e4SLinus Torvalds 76dd56b046SMel Gorman #include "internal.h" 77dd56b046SMel Gorman 7809cbfeafSKirill A. Shutemov #define BLOCKS_PER_PAGE (PAGE_SIZE/512) 7909cbfeafSKirill A. Shutemov #define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT) 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds /* Pretend that each entry is of this size in directory's i_size */ 821da177e4SLinus Torvalds #define BOGO_DIRENT_SIZE 20 831da177e4SLinus Torvalds 8469f07ec9SHugh Dickins /* Symlink up to this size is kmalloc'ed instead of using a swappable page */ 8569f07ec9SHugh Dickins #define SHORT_SYMLINK_LEN 128 8669f07ec9SHugh Dickins 871aac1400SHugh Dickins /* 88f00cdc6dSHugh Dickins * shmem_fallocate communicates with shmem_fault or shmem_writepage via 89f00cdc6dSHugh Dickins * inode->i_private (with i_mutex making sure that it has only one user at 90f00cdc6dSHugh Dickins * a time): we would prefer not to enlarge the shmem inode just for that. 911aac1400SHugh Dickins */ 921aac1400SHugh Dickins struct shmem_falloc { 938e205f77SHugh Dickins wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ 941aac1400SHugh Dickins pgoff_t start; /* start of range currently being fallocated */ 951aac1400SHugh Dickins pgoff_t next; /* the next page offset to be fallocated */ 961aac1400SHugh Dickins pgoff_t nr_falloced; /* how many new pages have been fallocated */ 971aac1400SHugh Dickins pgoff_t nr_unswapped; /* how often writepage refused to swap out */ 981aac1400SHugh Dickins }; 991aac1400SHugh Dickins 100285b2c4fSHugh Dickins /* Flag allocation requirements to shmem_getpage */ 1011da177e4SLinus Torvalds enum sgp_type { 1021da177e4SLinus Torvalds SGP_READ, /* don't exceed i_size, don't allocate page */ 1031da177e4SLinus Torvalds SGP_CACHE, /* don't exceed i_size, may allocate page */ 1041635f6a7SHugh Dickins SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ 1051635f6a7SHugh Dickins SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ 1061da177e4SLinus Torvalds }; 1071da177e4SLinus Torvalds 108b76db735SAndrew Morton #ifdef CONFIG_TMPFS 109680d794bSakpm@linux-foundation.org static unsigned long shmem_default_max_blocks(void) 110680d794bSakpm@linux-foundation.org { 111680d794bSakpm@linux-foundation.org return totalram_pages / 2; 112680d794bSakpm@linux-foundation.org } 113680d794bSakpm@linux-foundation.org 114680d794bSakpm@linux-foundation.org static unsigned long shmem_default_max_inodes(void) 115680d794bSakpm@linux-foundation.org { 116680d794bSakpm@linux-foundation.org return min(totalram_pages - totalhigh_pages, totalram_pages / 2); 117680d794bSakpm@linux-foundation.org } 118b76db735SAndrew Morton #endif 119680d794bSakpm@linux-foundation.org 120bde05d1cSHugh Dickins static bool shmem_should_replace_page(struct page *page, gfp_t gfp); 121bde05d1cSHugh Dickins static int shmem_replace_page(struct page **pagep, gfp_t gfp, 122bde05d1cSHugh Dickins struct shmem_inode_info *info, pgoff_t index); 12368da9f05SHugh Dickins static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 1249e18eb29SAndres Lagar-Cavilla struct page **pagep, enum sgp_type sgp, 1259e18eb29SAndres Lagar-Cavilla gfp_t gfp, struct mm_struct *fault_mm, int *fault_type); 12668da9f05SHugh Dickins 12768da9f05SHugh Dickins static inline int shmem_getpage(struct inode *inode, pgoff_t index, 1289e18eb29SAndres Lagar-Cavilla struct page **pagep, enum sgp_type sgp) 12968da9f05SHugh Dickins { 13068da9f05SHugh Dickins return shmem_getpage_gfp(inode, index, pagep, sgp, 1319e18eb29SAndres Lagar-Cavilla mapping_gfp_mask(inode->i_mapping), NULL, NULL); 13268da9f05SHugh Dickins } 1331da177e4SLinus Torvalds 1341da177e4SLinus Torvalds static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 1351da177e4SLinus Torvalds { 1361da177e4SLinus Torvalds return sb->s_fs_info; 1371da177e4SLinus Torvalds } 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds /* 1401da177e4SLinus Torvalds * shmem_file_setup pre-accounts the whole fixed size of a VM object, 1411da177e4SLinus Torvalds * for shared memory and for shared anonymous (/dev/zero) mappings 1421da177e4SLinus Torvalds * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), 1431da177e4SLinus Torvalds * consistent with the pre-accounting of private mappings ... 1441da177e4SLinus Torvalds */ 1451da177e4SLinus Torvalds static inline int shmem_acct_size(unsigned long flags, loff_t size) 1461da177e4SLinus Torvalds { 1470b0a0806SHugh Dickins return (flags & VM_NORESERVE) ? 148191c5424SAl Viro 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size)); 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds static inline void shmem_unacct_size(unsigned long flags, loff_t size) 1521da177e4SLinus Torvalds { 1530b0a0806SHugh Dickins if (!(flags & VM_NORESERVE)) 1541da177e4SLinus Torvalds vm_unacct_memory(VM_ACCT(size)); 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds 15777142517SKonstantin Khlebnikov static inline int shmem_reacct_size(unsigned long flags, 15877142517SKonstantin Khlebnikov loff_t oldsize, loff_t newsize) 15977142517SKonstantin Khlebnikov { 16077142517SKonstantin Khlebnikov if (!(flags & VM_NORESERVE)) { 16177142517SKonstantin Khlebnikov if (VM_ACCT(newsize) > VM_ACCT(oldsize)) 16277142517SKonstantin Khlebnikov return security_vm_enough_memory_mm(current->mm, 16377142517SKonstantin Khlebnikov VM_ACCT(newsize) - VM_ACCT(oldsize)); 16477142517SKonstantin Khlebnikov else if (VM_ACCT(newsize) < VM_ACCT(oldsize)) 16577142517SKonstantin Khlebnikov vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize)); 16677142517SKonstantin Khlebnikov } 16777142517SKonstantin Khlebnikov return 0; 16877142517SKonstantin Khlebnikov } 16977142517SKonstantin Khlebnikov 1701da177e4SLinus Torvalds /* 1711da177e4SLinus Torvalds * ... whereas tmpfs objects are accounted incrementally as 17275edd345SHugh Dickins * pages are allocated, in order to allow large sparse files. 1731da177e4SLinus Torvalds * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, 1741da177e4SLinus Torvalds * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. 1751da177e4SLinus Torvalds */ 1761da177e4SLinus Torvalds static inline int shmem_acct_block(unsigned long flags) 1771da177e4SLinus Torvalds { 1780b0a0806SHugh Dickins return (flags & VM_NORESERVE) ? 17909cbfeafSKirill A. Shutemov security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_SIZE)) : 0; 1801da177e4SLinus Torvalds } 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds static inline void shmem_unacct_blocks(unsigned long flags, long pages) 1831da177e4SLinus Torvalds { 1840b0a0806SHugh Dickins if (flags & VM_NORESERVE) 18509cbfeafSKirill A. Shutemov vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); 1861da177e4SLinus Torvalds } 1871da177e4SLinus Torvalds 188759b9775SHugh Dickins static const struct super_operations shmem_ops; 189f5e54d6eSChristoph Hellwig static const struct address_space_operations shmem_aops; 19015ad7cdcSHelge Deller static const struct file_operations shmem_file_operations; 19192e1d5beSArjan van de Ven static const struct inode_operations shmem_inode_operations; 19292e1d5beSArjan van de Ven static const struct inode_operations shmem_dir_inode_operations; 19392e1d5beSArjan van de Ven static const struct inode_operations shmem_special_inode_operations; 194f0f37e2fSAlexey Dobriyan static const struct vm_operations_struct shmem_vm_ops; 1951da177e4SLinus Torvalds 1961da177e4SLinus Torvalds static LIST_HEAD(shmem_swaplist); 197cb5f7b9aSHugh Dickins static DEFINE_MUTEX(shmem_swaplist_mutex); 1981da177e4SLinus Torvalds 1995b04c689SPavel Emelyanov static int shmem_reserve_inode(struct super_block *sb) 2005b04c689SPavel Emelyanov { 2015b04c689SPavel Emelyanov struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 2025b04c689SPavel Emelyanov if (sbinfo->max_inodes) { 2035b04c689SPavel Emelyanov spin_lock(&sbinfo->stat_lock); 2045b04c689SPavel Emelyanov if (!sbinfo->free_inodes) { 2055b04c689SPavel Emelyanov spin_unlock(&sbinfo->stat_lock); 2065b04c689SPavel Emelyanov return -ENOSPC; 2075b04c689SPavel Emelyanov } 2085b04c689SPavel Emelyanov sbinfo->free_inodes--; 2095b04c689SPavel Emelyanov spin_unlock(&sbinfo->stat_lock); 2105b04c689SPavel Emelyanov } 2115b04c689SPavel Emelyanov return 0; 2125b04c689SPavel Emelyanov } 2135b04c689SPavel Emelyanov 2145b04c689SPavel Emelyanov static void shmem_free_inode(struct super_block *sb) 2155b04c689SPavel Emelyanov { 2165b04c689SPavel Emelyanov struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 2175b04c689SPavel Emelyanov if (sbinfo->max_inodes) { 2185b04c689SPavel Emelyanov spin_lock(&sbinfo->stat_lock); 2195b04c689SPavel Emelyanov sbinfo->free_inodes++; 2205b04c689SPavel Emelyanov spin_unlock(&sbinfo->stat_lock); 2215b04c689SPavel Emelyanov } 2225b04c689SPavel Emelyanov } 2235b04c689SPavel Emelyanov 22446711810SRandy Dunlap /** 22541ffe5d5SHugh Dickins * shmem_recalc_inode - recalculate the block usage of an inode 2261da177e4SLinus Torvalds * @inode: inode to recalc 2271da177e4SLinus Torvalds * 2281da177e4SLinus Torvalds * We have to calculate the free blocks since the mm can drop 2291da177e4SLinus Torvalds * undirtied hole pages behind our back. 2301da177e4SLinus Torvalds * 2311da177e4SLinus Torvalds * But normally info->alloced == inode->i_mapping->nrpages + info->swapped 2321da177e4SLinus Torvalds * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) 2331da177e4SLinus Torvalds * 2341da177e4SLinus Torvalds * It has to be called with the spinlock held. 2351da177e4SLinus Torvalds */ 2361da177e4SLinus Torvalds static void shmem_recalc_inode(struct inode *inode) 2371da177e4SLinus Torvalds { 2381da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 2391da177e4SLinus Torvalds long freed; 2401da177e4SLinus Torvalds 2411da177e4SLinus Torvalds freed = info->alloced - info->swapped - inode->i_mapping->nrpages; 2421da177e4SLinus Torvalds if (freed > 0) { 24354af6042SHugh Dickins struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 24454af6042SHugh Dickins if (sbinfo->max_blocks) 24554af6042SHugh Dickins percpu_counter_add(&sbinfo->used_blocks, -freed); 2461da177e4SLinus Torvalds info->alloced -= freed; 24754af6042SHugh Dickins inode->i_blocks -= freed * BLOCKS_PER_PAGE; 2481da177e4SLinus Torvalds shmem_unacct_blocks(info->flags, freed); 2491da177e4SLinus Torvalds } 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 2527a5d0fbbSHugh Dickins /* 2537a5d0fbbSHugh Dickins * Replace item expected in radix tree by a new item, while holding tree lock. 2547a5d0fbbSHugh Dickins */ 2557a5d0fbbSHugh Dickins static int shmem_radix_tree_replace(struct address_space *mapping, 2567a5d0fbbSHugh Dickins pgoff_t index, void *expected, void *replacement) 2577a5d0fbbSHugh Dickins { 2587a5d0fbbSHugh Dickins void **pslot; 2596dbaf22cSJohannes Weiner void *item; 2607a5d0fbbSHugh Dickins 2617a5d0fbbSHugh Dickins VM_BUG_ON(!expected); 2626dbaf22cSJohannes Weiner VM_BUG_ON(!replacement); 2637a5d0fbbSHugh Dickins pslot = radix_tree_lookup_slot(&mapping->page_tree, index); 2646dbaf22cSJohannes Weiner if (!pslot) 2656dbaf22cSJohannes Weiner return -ENOENT; 2666dbaf22cSJohannes Weiner item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock); 2677a5d0fbbSHugh Dickins if (item != expected) 2687a5d0fbbSHugh Dickins return -ENOENT; 2697a5d0fbbSHugh Dickins radix_tree_replace_slot(pslot, replacement); 2707a5d0fbbSHugh Dickins return 0; 2717a5d0fbbSHugh Dickins } 2727a5d0fbbSHugh Dickins 2737a5d0fbbSHugh Dickins /* 274d1899228SHugh Dickins * Sometimes, before we decide whether to proceed or to fail, we must check 275d1899228SHugh Dickins * that an entry was not already brought back from swap by a racing thread. 276d1899228SHugh Dickins * 277d1899228SHugh Dickins * Checking page is not enough: by the time a SwapCache page is locked, it 278d1899228SHugh Dickins * might be reused, and again be SwapCache, using the same swap as before. 279d1899228SHugh Dickins */ 280d1899228SHugh Dickins static bool shmem_confirm_swap(struct address_space *mapping, 281d1899228SHugh Dickins pgoff_t index, swp_entry_t swap) 282d1899228SHugh Dickins { 283d1899228SHugh Dickins void *item; 284d1899228SHugh Dickins 285d1899228SHugh Dickins rcu_read_lock(); 286d1899228SHugh Dickins item = radix_tree_lookup(&mapping->page_tree, index); 287d1899228SHugh Dickins rcu_read_unlock(); 288d1899228SHugh Dickins return item == swp_to_radix_entry(swap); 289d1899228SHugh Dickins } 290d1899228SHugh Dickins 291d1899228SHugh Dickins /* 292*5a6e75f8SKirill A. Shutemov * Definitions for "huge tmpfs": tmpfs mounted with the huge= option 293*5a6e75f8SKirill A. Shutemov * 294*5a6e75f8SKirill A. Shutemov * SHMEM_HUGE_NEVER: 295*5a6e75f8SKirill A. Shutemov * disables huge pages for the mount; 296*5a6e75f8SKirill A. Shutemov * SHMEM_HUGE_ALWAYS: 297*5a6e75f8SKirill A. Shutemov * enables huge pages for the mount; 298*5a6e75f8SKirill A. Shutemov * SHMEM_HUGE_WITHIN_SIZE: 299*5a6e75f8SKirill A. Shutemov * only allocate huge pages if the page will be fully within i_size, 300*5a6e75f8SKirill A. Shutemov * also respect fadvise()/madvise() hints; 301*5a6e75f8SKirill A. Shutemov * SHMEM_HUGE_ADVISE: 302*5a6e75f8SKirill A. Shutemov * only allocate huge pages if requested with fadvise()/madvise(); 303*5a6e75f8SKirill A. Shutemov */ 304*5a6e75f8SKirill A. Shutemov 305*5a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_NEVER 0 306*5a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_ALWAYS 1 307*5a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_WITHIN_SIZE 2 308*5a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_ADVISE 3 309*5a6e75f8SKirill A. Shutemov 310*5a6e75f8SKirill A. Shutemov /* 311*5a6e75f8SKirill A. Shutemov * Special values. 312*5a6e75f8SKirill A. Shutemov * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled: 313*5a6e75f8SKirill A. Shutemov * 314*5a6e75f8SKirill A. Shutemov * SHMEM_HUGE_DENY: 315*5a6e75f8SKirill A. Shutemov * disables huge on shm_mnt and all mounts, for emergency use; 316*5a6e75f8SKirill A. Shutemov * SHMEM_HUGE_FORCE: 317*5a6e75f8SKirill A. Shutemov * enables huge on shm_mnt and all mounts, w/o needing option, for testing; 318*5a6e75f8SKirill A. Shutemov * 319*5a6e75f8SKirill A. Shutemov */ 320*5a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_DENY (-1) 321*5a6e75f8SKirill A. Shutemov #define SHMEM_HUGE_FORCE (-2) 322*5a6e75f8SKirill A. Shutemov 323*5a6e75f8SKirill A. Shutemov #ifdef CONFIG_TRANSPARENT_HUGEPAGE 324*5a6e75f8SKirill A. Shutemov /* ifdef here to avoid bloating shmem.o when not necessary */ 325*5a6e75f8SKirill A. Shutemov 326*5a6e75f8SKirill A. Shutemov int shmem_huge __read_mostly; 327*5a6e75f8SKirill A. Shutemov 328*5a6e75f8SKirill A. Shutemov static int shmem_parse_huge(const char *str) 329*5a6e75f8SKirill A. Shutemov { 330*5a6e75f8SKirill A. Shutemov if (!strcmp(str, "never")) 331*5a6e75f8SKirill A. Shutemov return SHMEM_HUGE_NEVER; 332*5a6e75f8SKirill A. Shutemov if (!strcmp(str, "always")) 333*5a6e75f8SKirill A. Shutemov return SHMEM_HUGE_ALWAYS; 334*5a6e75f8SKirill A. Shutemov if (!strcmp(str, "within_size")) 335*5a6e75f8SKirill A. Shutemov return SHMEM_HUGE_WITHIN_SIZE; 336*5a6e75f8SKirill A. Shutemov if (!strcmp(str, "advise")) 337*5a6e75f8SKirill A. Shutemov return SHMEM_HUGE_ADVISE; 338*5a6e75f8SKirill A. Shutemov if (!strcmp(str, "deny")) 339*5a6e75f8SKirill A. Shutemov return SHMEM_HUGE_DENY; 340*5a6e75f8SKirill A. Shutemov if (!strcmp(str, "force")) 341*5a6e75f8SKirill A. Shutemov return SHMEM_HUGE_FORCE; 342*5a6e75f8SKirill A. Shutemov return -EINVAL; 343*5a6e75f8SKirill A. Shutemov } 344*5a6e75f8SKirill A. Shutemov 345*5a6e75f8SKirill A. Shutemov static const char *shmem_format_huge(int huge) 346*5a6e75f8SKirill A. Shutemov { 347*5a6e75f8SKirill A. Shutemov switch (huge) { 348*5a6e75f8SKirill A. Shutemov case SHMEM_HUGE_NEVER: 349*5a6e75f8SKirill A. Shutemov return "never"; 350*5a6e75f8SKirill A. Shutemov case SHMEM_HUGE_ALWAYS: 351*5a6e75f8SKirill A. Shutemov return "always"; 352*5a6e75f8SKirill A. Shutemov case SHMEM_HUGE_WITHIN_SIZE: 353*5a6e75f8SKirill A. Shutemov return "within_size"; 354*5a6e75f8SKirill A. Shutemov case SHMEM_HUGE_ADVISE: 355*5a6e75f8SKirill A. Shutemov return "advise"; 356*5a6e75f8SKirill A. Shutemov case SHMEM_HUGE_DENY: 357*5a6e75f8SKirill A. Shutemov return "deny"; 358*5a6e75f8SKirill A. Shutemov case SHMEM_HUGE_FORCE: 359*5a6e75f8SKirill A. Shutemov return "force"; 360*5a6e75f8SKirill A. Shutemov default: 361*5a6e75f8SKirill A. Shutemov VM_BUG_ON(1); 362*5a6e75f8SKirill A. Shutemov return "bad_val"; 363*5a6e75f8SKirill A. Shutemov } 364*5a6e75f8SKirill A. Shutemov } 365*5a6e75f8SKirill A. Shutemov 366*5a6e75f8SKirill A. Shutemov #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 367*5a6e75f8SKirill A. Shutemov 368*5a6e75f8SKirill A. Shutemov #define shmem_huge SHMEM_HUGE_DENY 369*5a6e75f8SKirill A. Shutemov 370*5a6e75f8SKirill A. Shutemov #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 371*5a6e75f8SKirill A. Shutemov 372*5a6e75f8SKirill A. Shutemov /* 37346f65ec1SHugh Dickins * Like add_to_page_cache_locked, but error if expected item has gone. 37446f65ec1SHugh Dickins */ 37546f65ec1SHugh Dickins static int shmem_add_to_page_cache(struct page *page, 37646f65ec1SHugh Dickins struct address_space *mapping, 377fed400a1SWang Sheng-Hui pgoff_t index, void *expected) 37846f65ec1SHugh Dickins { 379b065b432SHugh Dickins int error; 38046f65ec1SHugh Dickins 381309381feSSasha Levin VM_BUG_ON_PAGE(!PageLocked(page), page); 382309381feSSasha Levin VM_BUG_ON_PAGE(!PageSwapBacked(page), page); 38346f65ec1SHugh Dickins 38409cbfeafSKirill A. Shutemov get_page(page); 38546f65ec1SHugh Dickins page->mapping = mapping; 38646f65ec1SHugh Dickins page->index = index; 38746f65ec1SHugh Dickins 38846f65ec1SHugh Dickins spin_lock_irq(&mapping->tree_lock); 38946f65ec1SHugh Dickins if (!expected) 390b065b432SHugh Dickins error = radix_tree_insert(&mapping->page_tree, index, page); 39146f65ec1SHugh Dickins else 392b065b432SHugh Dickins error = shmem_radix_tree_replace(mapping, index, expected, 393b065b432SHugh Dickins page); 39446f65ec1SHugh Dickins if (!error) { 39546f65ec1SHugh Dickins mapping->nrpages++; 39646f65ec1SHugh Dickins __inc_zone_page_state(page, NR_FILE_PAGES); 39746f65ec1SHugh Dickins __inc_zone_page_state(page, NR_SHMEM); 39846f65ec1SHugh Dickins spin_unlock_irq(&mapping->tree_lock); 39946f65ec1SHugh Dickins } else { 40046f65ec1SHugh Dickins page->mapping = NULL; 40146f65ec1SHugh Dickins spin_unlock_irq(&mapping->tree_lock); 40209cbfeafSKirill A. Shutemov put_page(page); 40346f65ec1SHugh Dickins } 40446f65ec1SHugh Dickins return error; 40546f65ec1SHugh Dickins } 40646f65ec1SHugh Dickins 40746f65ec1SHugh Dickins /* 4086922c0c7SHugh Dickins * Like delete_from_page_cache, but substitutes swap for page. 4096922c0c7SHugh Dickins */ 4106922c0c7SHugh Dickins static void shmem_delete_from_page_cache(struct page *page, void *radswap) 4116922c0c7SHugh Dickins { 4126922c0c7SHugh Dickins struct address_space *mapping = page->mapping; 4136922c0c7SHugh Dickins int error; 4146922c0c7SHugh Dickins 4156922c0c7SHugh Dickins spin_lock_irq(&mapping->tree_lock); 4166922c0c7SHugh Dickins error = shmem_radix_tree_replace(mapping, page->index, page, radswap); 4176922c0c7SHugh Dickins page->mapping = NULL; 4186922c0c7SHugh Dickins mapping->nrpages--; 4196922c0c7SHugh Dickins __dec_zone_page_state(page, NR_FILE_PAGES); 4206922c0c7SHugh Dickins __dec_zone_page_state(page, NR_SHMEM); 4216922c0c7SHugh Dickins spin_unlock_irq(&mapping->tree_lock); 42209cbfeafSKirill A. Shutemov put_page(page); 4236922c0c7SHugh Dickins BUG_ON(error); 4246922c0c7SHugh Dickins } 4256922c0c7SHugh Dickins 4266922c0c7SHugh Dickins /* 4277a5d0fbbSHugh Dickins * Remove swap entry from radix tree, free the swap and its page cache. 4287a5d0fbbSHugh Dickins */ 4297a5d0fbbSHugh Dickins static int shmem_free_swap(struct address_space *mapping, 4307a5d0fbbSHugh Dickins pgoff_t index, void *radswap) 4317a5d0fbbSHugh Dickins { 4326dbaf22cSJohannes Weiner void *old; 4337a5d0fbbSHugh Dickins 4347a5d0fbbSHugh Dickins spin_lock_irq(&mapping->tree_lock); 4356dbaf22cSJohannes Weiner old = radix_tree_delete_item(&mapping->page_tree, index, radswap); 4367a5d0fbbSHugh Dickins spin_unlock_irq(&mapping->tree_lock); 4376dbaf22cSJohannes Weiner if (old != radswap) 4386dbaf22cSJohannes Weiner return -ENOENT; 4397a5d0fbbSHugh Dickins free_swap_and_cache(radix_to_swp_entry(radswap)); 4406dbaf22cSJohannes Weiner return 0; 4417a5d0fbbSHugh Dickins } 4427a5d0fbbSHugh Dickins 4437a5d0fbbSHugh Dickins /* 4446a15a370SVlastimil Babka * Determine (in bytes) how many of the shmem object's pages mapped by the 44548131e03SVlastimil Babka * given offsets are swapped out. 4466a15a370SVlastimil Babka * 4476a15a370SVlastimil Babka * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU, 4486a15a370SVlastimil Babka * as long as the inode doesn't go away and racy results are not a problem. 4496a15a370SVlastimil Babka */ 45048131e03SVlastimil Babka unsigned long shmem_partial_swap_usage(struct address_space *mapping, 45148131e03SVlastimil Babka pgoff_t start, pgoff_t end) 4526a15a370SVlastimil Babka { 4536a15a370SVlastimil Babka struct radix_tree_iter iter; 4546a15a370SVlastimil Babka void **slot; 4556a15a370SVlastimil Babka struct page *page; 45648131e03SVlastimil Babka unsigned long swapped = 0; 4576a15a370SVlastimil Babka 4586a15a370SVlastimil Babka rcu_read_lock(); 4596a15a370SVlastimil Babka 4606a15a370SVlastimil Babka radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { 4616a15a370SVlastimil Babka if (iter.index >= end) 4626a15a370SVlastimil Babka break; 4636a15a370SVlastimil Babka 4646a15a370SVlastimil Babka page = radix_tree_deref_slot(slot); 4656a15a370SVlastimil Babka 4662cf938aaSMatthew Wilcox if (radix_tree_deref_retry(page)) { 4672cf938aaSMatthew Wilcox slot = radix_tree_iter_retry(&iter); 4682cf938aaSMatthew Wilcox continue; 4692cf938aaSMatthew Wilcox } 4706a15a370SVlastimil Babka 4716a15a370SVlastimil Babka if (radix_tree_exceptional_entry(page)) 4726a15a370SVlastimil Babka swapped++; 4736a15a370SVlastimil Babka 4746a15a370SVlastimil Babka if (need_resched()) { 4756a15a370SVlastimil Babka cond_resched_rcu(); 4767165092fSMatthew Wilcox slot = radix_tree_iter_next(&iter); 4776a15a370SVlastimil Babka } 4786a15a370SVlastimil Babka } 4796a15a370SVlastimil Babka 4806a15a370SVlastimil Babka rcu_read_unlock(); 4816a15a370SVlastimil Babka 4826a15a370SVlastimil Babka return swapped << PAGE_SHIFT; 4836a15a370SVlastimil Babka } 4846a15a370SVlastimil Babka 4856a15a370SVlastimil Babka /* 48648131e03SVlastimil Babka * Determine (in bytes) how many of the shmem object's pages mapped by the 48748131e03SVlastimil Babka * given vma is swapped out. 48848131e03SVlastimil Babka * 48948131e03SVlastimil Babka * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU, 49048131e03SVlastimil Babka * as long as the inode doesn't go away and racy results are not a problem. 49148131e03SVlastimil Babka */ 49248131e03SVlastimil Babka unsigned long shmem_swap_usage(struct vm_area_struct *vma) 49348131e03SVlastimil Babka { 49448131e03SVlastimil Babka struct inode *inode = file_inode(vma->vm_file); 49548131e03SVlastimil Babka struct shmem_inode_info *info = SHMEM_I(inode); 49648131e03SVlastimil Babka struct address_space *mapping = inode->i_mapping; 49748131e03SVlastimil Babka unsigned long swapped; 49848131e03SVlastimil Babka 49948131e03SVlastimil Babka /* Be careful as we don't hold info->lock */ 50048131e03SVlastimil Babka swapped = READ_ONCE(info->swapped); 50148131e03SVlastimil Babka 50248131e03SVlastimil Babka /* 50348131e03SVlastimil Babka * The easier cases are when the shmem object has nothing in swap, or 50448131e03SVlastimil Babka * the vma maps it whole. Then we can simply use the stats that we 50548131e03SVlastimil Babka * already track. 50648131e03SVlastimil Babka */ 50748131e03SVlastimil Babka if (!swapped) 50848131e03SVlastimil Babka return 0; 50948131e03SVlastimil Babka 51048131e03SVlastimil Babka if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size) 51148131e03SVlastimil Babka return swapped << PAGE_SHIFT; 51248131e03SVlastimil Babka 51348131e03SVlastimil Babka /* Here comes the more involved part */ 51448131e03SVlastimil Babka return shmem_partial_swap_usage(mapping, 51548131e03SVlastimil Babka linear_page_index(vma, vma->vm_start), 51648131e03SVlastimil Babka linear_page_index(vma, vma->vm_end)); 51748131e03SVlastimil Babka } 51848131e03SVlastimil Babka 51948131e03SVlastimil Babka /* 52024513264SHugh Dickins * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. 52124513264SHugh Dickins */ 52224513264SHugh Dickins void shmem_unlock_mapping(struct address_space *mapping) 52324513264SHugh Dickins { 52424513264SHugh Dickins struct pagevec pvec; 52524513264SHugh Dickins pgoff_t indices[PAGEVEC_SIZE]; 52624513264SHugh Dickins pgoff_t index = 0; 52724513264SHugh Dickins 52824513264SHugh Dickins pagevec_init(&pvec, 0); 52924513264SHugh Dickins /* 53024513264SHugh Dickins * Minor point, but we might as well stop if someone else SHM_LOCKs it. 53124513264SHugh Dickins */ 53224513264SHugh Dickins while (!mapping_unevictable(mapping)) { 53324513264SHugh Dickins /* 53424513264SHugh Dickins * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it 53524513264SHugh Dickins * has finished, if it hits a row of PAGEVEC_SIZE swap entries. 53624513264SHugh Dickins */ 5370cd6144aSJohannes Weiner pvec.nr = find_get_entries(mapping, index, 53824513264SHugh Dickins PAGEVEC_SIZE, pvec.pages, indices); 53924513264SHugh Dickins if (!pvec.nr) 54024513264SHugh Dickins break; 54124513264SHugh Dickins index = indices[pvec.nr - 1] + 1; 5420cd6144aSJohannes Weiner pagevec_remove_exceptionals(&pvec); 54324513264SHugh Dickins check_move_unevictable_pages(pvec.pages, pvec.nr); 54424513264SHugh Dickins pagevec_release(&pvec); 54524513264SHugh Dickins cond_resched(); 54624513264SHugh Dickins } 5477a5d0fbbSHugh Dickins } 5487a5d0fbbSHugh Dickins 5497a5d0fbbSHugh Dickins /* 5507a5d0fbbSHugh Dickins * Remove range of pages and swap entries from radix tree, and free them. 5511635f6a7SHugh Dickins * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. 5527a5d0fbbSHugh Dickins */ 5531635f6a7SHugh Dickins static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, 5541635f6a7SHugh Dickins bool unfalloc) 5551da177e4SLinus Torvalds { 556285b2c4fSHugh Dickins struct address_space *mapping = inode->i_mapping; 5571da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 55809cbfeafSKirill A. Shutemov pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 55909cbfeafSKirill A. Shutemov pgoff_t end = (lend + 1) >> PAGE_SHIFT; 56009cbfeafSKirill A. Shutemov unsigned int partial_start = lstart & (PAGE_SIZE - 1); 56109cbfeafSKirill A. Shutemov unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1); 562bda97eabSHugh Dickins struct pagevec pvec; 5637a5d0fbbSHugh Dickins pgoff_t indices[PAGEVEC_SIZE]; 5647a5d0fbbSHugh Dickins long nr_swaps_freed = 0; 565285b2c4fSHugh Dickins pgoff_t index; 566bda97eabSHugh Dickins int i; 5671da177e4SLinus Torvalds 56883e4fa9cSHugh Dickins if (lend == -1) 56983e4fa9cSHugh Dickins end = -1; /* unsigned, so actually very big */ 570bda97eabSHugh Dickins 571bda97eabSHugh Dickins pagevec_init(&pvec, 0); 572bda97eabSHugh Dickins index = start; 57383e4fa9cSHugh Dickins while (index < end) { 5740cd6144aSJohannes Weiner pvec.nr = find_get_entries(mapping, index, 57583e4fa9cSHugh Dickins min(end - index, (pgoff_t)PAGEVEC_SIZE), 5767a5d0fbbSHugh Dickins pvec.pages, indices); 5777a5d0fbbSHugh Dickins if (!pvec.nr) 5787a5d0fbbSHugh Dickins break; 579bda97eabSHugh Dickins for (i = 0; i < pagevec_count(&pvec); i++) { 580bda97eabSHugh Dickins struct page *page = pvec.pages[i]; 581bda97eabSHugh Dickins 5827a5d0fbbSHugh Dickins index = indices[i]; 58383e4fa9cSHugh Dickins if (index >= end) 584bda97eabSHugh Dickins break; 585bda97eabSHugh Dickins 5867a5d0fbbSHugh Dickins if (radix_tree_exceptional_entry(page)) { 5871635f6a7SHugh Dickins if (unfalloc) 5881635f6a7SHugh Dickins continue; 5897a5d0fbbSHugh Dickins nr_swaps_freed += !shmem_free_swap(mapping, 5907a5d0fbbSHugh Dickins index, page); 5917a5d0fbbSHugh Dickins continue; 5927a5d0fbbSHugh Dickins } 5937a5d0fbbSHugh Dickins 594bda97eabSHugh Dickins if (!trylock_page(page)) 595bda97eabSHugh Dickins continue; 5961635f6a7SHugh Dickins if (!unfalloc || !PageUptodate(page)) { 5977a5d0fbbSHugh Dickins if (page->mapping == mapping) { 598309381feSSasha Levin VM_BUG_ON_PAGE(PageWriteback(page), page); 599bda97eabSHugh Dickins truncate_inode_page(mapping, page); 6007a5d0fbbSHugh Dickins } 6011635f6a7SHugh Dickins } 602bda97eabSHugh Dickins unlock_page(page); 603bda97eabSHugh Dickins } 6040cd6144aSJohannes Weiner pagevec_remove_exceptionals(&pvec); 60524513264SHugh Dickins pagevec_release(&pvec); 606bda97eabSHugh Dickins cond_resched(); 607bda97eabSHugh Dickins index++; 608bda97eabSHugh Dickins } 609bda97eabSHugh Dickins 61083e4fa9cSHugh Dickins if (partial_start) { 611bda97eabSHugh Dickins struct page *page = NULL; 6129e18eb29SAndres Lagar-Cavilla shmem_getpage(inode, start - 1, &page, SGP_READ); 613bda97eabSHugh Dickins if (page) { 61409cbfeafSKirill A. Shutemov unsigned int top = PAGE_SIZE; 61583e4fa9cSHugh Dickins if (start > end) { 61683e4fa9cSHugh Dickins top = partial_end; 61783e4fa9cSHugh Dickins partial_end = 0; 61883e4fa9cSHugh Dickins } 61983e4fa9cSHugh Dickins zero_user_segment(page, partial_start, top); 620bda97eabSHugh Dickins set_page_dirty(page); 621bda97eabSHugh Dickins unlock_page(page); 62209cbfeafSKirill A. Shutemov put_page(page); 623bda97eabSHugh Dickins } 624bda97eabSHugh Dickins } 62583e4fa9cSHugh Dickins if (partial_end) { 62683e4fa9cSHugh Dickins struct page *page = NULL; 6279e18eb29SAndres Lagar-Cavilla shmem_getpage(inode, end, &page, SGP_READ); 62883e4fa9cSHugh Dickins if (page) { 62983e4fa9cSHugh Dickins zero_user_segment(page, 0, partial_end); 63083e4fa9cSHugh Dickins set_page_dirty(page); 63183e4fa9cSHugh Dickins unlock_page(page); 63209cbfeafSKirill A. Shutemov put_page(page); 63383e4fa9cSHugh Dickins } 63483e4fa9cSHugh Dickins } 63583e4fa9cSHugh Dickins if (start >= end) 63683e4fa9cSHugh Dickins return; 637bda97eabSHugh Dickins 638bda97eabSHugh Dickins index = start; 639b1a36650SHugh Dickins while (index < end) { 640bda97eabSHugh Dickins cond_resched(); 6410cd6144aSJohannes Weiner 6420cd6144aSJohannes Weiner pvec.nr = find_get_entries(mapping, index, 64383e4fa9cSHugh Dickins min(end - index, (pgoff_t)PAGEVEC_SIZE), 6447a5d0fbbSHugh Dickins pvec.pages, indices); 6457a5d0fbbSHugh Dickins if (!pvec.nr) { 646b1a36650SHugh Dickins /* If all gone or hole-punch or unfalloc, we're done */ 647b1a36650SHugh Dickins if (index == start || end != -1) 648bda97eabSHugh Dickins break; 649b1a36650SHugh Dickins /* But if truncating, restart to make sure all gone */ 650bda97eabSHugh Dickins index = start; 651bda97eabSHugh Dickins continue; 652bda97eabSHugh Dickins } 653bda97eabSHugh Dickins for (i = 0; i < pagevec_count(&pvec); i++) { 654bda97eabSHugh Dickins struct page *page = pvec.pages[i]; 655bda97eabSHugh Dickins 6567a5d0fbbSHugh Dickins index = indices[i]; 65783e4fa9cSHugh Dickins if (index >= end) 658bda97eabSHugh Dickins break; 659bda97eabSHugh Dickins 6607a5d0fbbSHugh Dickins if (radix_tree_exceptional_entry(page)) { 6611635f6a7SHugh Dickins if (unfalloc) 6621635f6a7SHugh Dickins continue; 663b1a36650SHugh Dickins if (shmem_free_swap(mapping, index, page)) { 664b1a36650SHugh Dickins /* Swap was replaced by page: retry */ 665b1a36650SHugh Dickins index--; 666b1a36650SHugh Dickins break; 667b1a36650SHugh Dickins } 668b1a36650SHugh Dickins nr_swaps_freed++; 6697a5d0fbbSHugh Dickins continue; 6707a5d0fbbSHugh Dickins } 6717a5d0fbbSHugh Dickins 672bda97eabSHugh Dickins lock_page(page); 6731635f6a7SHugh Dickins if (!unfalloc || !PageUptodate(page)) { 6747a5d0fbbSHugh Dickins if (page->mapping == mapping) { 675309381feSSasha Levin VM_BUG_ON_PAGE(PageWriteback(page), page); 676bda97eabSHugh Dickins truncate_inode_page(mapping, page); 677b1a36650SHugh Dickins } else { 678b1a36650SHugh Dickins /* Page was replaced by swap: retry */ 679b1a36650SHugh Dickins unlock_page(page); 680b1a36650SHugh Dickins index--; 681b1a36650SHugh Dickins break; 6827a5d0fbbSHugh Dickins } 6831635f6a7SHugh Dickins } 684bda97eabSHugh Dickins unlock_page(page); 685bda97eabSHugh Dickins } 6860cd6144aSJohannes Weiner pagevec_remove_exceptionals(&pvec); 68724513264SHugh Dickins pagevec_release(&pvec); 688bda97eabSHugh Dickins index++; 689bda97eabSHugh Dickins } 69094c1e62dSHugh Dickins 6911da177e4SLinus Torvalds spin_lock(&info->lock); 6927a5d0fbbSHugh Dickins info->swapped -= nr_swaps_freed; 6931da177e4SLinus Torvalds shmem_recalc_inode(inode); 6941da177e4SLinus Torvalds spin_unlock(&info->lock); 6951635f6a7SHugh Dickins } 6961da177e4SLinus Torvalds 6971635f6a7SHugh Dickins void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 6981635f6a7SHugh Dickins { 6991635f6a7SHugh Dickins shmem_undo_range(inode, lstart, lend, false); 700285b2c4fSHugh Dickins inode->i_ctime = inode->i_mtime = CURRENT_TIME; 7011da177e4SLinus Torvalds } 70294c1e62dSHugh Dickins EXPORT_SYMBOL_GPL(shmem_truncate_range); 7031da177e4SLinus Torvalds 70444a30220SYu Zhao static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry, 70544a30220SYu Zhao struct kstat *stat) 70644a30220SYu Zhao { 70744a30220SYu Zhao struct inode *inode = dentry->d_inode; 70844a30220SYu Zhao struct shmem_inode_info *info = SHMEM_I(inode); 70944a30220SYu Zhao 710d0424c42SHugh Dickins if (info->alloced - info->swapped != inode->i_mapping->nrpages) { 71144a30220SYu Zhao spin_lock(&info->lock); 71244a30220SYu Zhao shmem_recalc_inode(inode); 71344a30220SYu Zhao spin_unlock(&info->lock); 714d0424c42SHugh Dickins } 71544a30220SYu Zhao generic_fillattr(inode, stat); 71644a30220SYu Zhao return 0; 71744a30220SYu Zhao } 71844a30220SYu Zhao 71994c1e62dSHugh Dickins static int shmem_setattr(struct dentry *dentry, struct iattr *attr) 7201da177e4SLinus Torvalds { 72175c3cfa8SDavid Howells struct inode *inode = d_inode(dentry); 72240e041a2SDavid Herrmann struct shmem_inode_info *info = SHMEM_I(inode); 7231da177e4SLinus Torvalds int error; 7241da177e4SLinus Torvalds 725db78b877SChristoph Hellwig error = inode_change_ok(inode, attr); 726db78b877SChristoph Hellwig if (error) 727db78b877SChristoph Hellwig return error; 728db78b877SChristoph Hellwig 72994c1e62dSHugh Dickins if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 73094c1e62dSHugh Dickins loff_t oldsize = inode->i_size; 73194c1e62dSHugh Dickins loff_t newsize = attr->ia_size; 7323889e6e7Snpiggin@suse.de 73340e041a2SDavid Herrmann /* protected by i_mutex */ 73440e041a2SDavid Herrmann if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || 73540e041a2SDavid Herrmann (newsize > oldsize && (info->seals & F_SEAL_GROW))) 73640e041a2SDavid Herrmann return -EPERM; 73740e041a2SDavid Herrmann 73894c1e62dSHugh Dickins if (newsize != oldsize) { 73977142517SKonstantin Khlebnikov error = shmem_reacct_size(SHMEM_I(inode)->flags, 74077142517SKonstantin Khlebnikov oldsize, newsize); 74177142517SKonstantin Khlebnikov if (error) 74277142517SKonstantin Khlebnikov return error; 74394c1e62dSHugh Dickins i_size_write(inode, newsize); 74494c1e62dSHugh Dickins inode->i_ctime = inode->i_mtime = CURRENT_TIME; 74594c1e62dSHugh Dickins } 746afa2db2fSJosef Bacik if (newsize <= oldsize) { 74794c1e62dSHugh Dickins loff_t holebegin = round_up(newsize, PAGE_SIZE); 748d0424c42SHugh Dickins if (oldsize > holebegin) 749d0424c42SHugh Dickins unmap_mapping_range(inode->i_mapping, 750d0424c42SHugh Dickins holebegin, 0, 1); 751d0424c42SHugh Dickins if (info->alloced) 752d0424c42SHugh Dickins shmem_truncate_range(inode, 753d0424c42SHugh Dickins newsize, (loff_t)-1); 75494c1e62dSHugh Dickins /* unmap again to remove racily COWed private pages */ 755d0424c42SHugh Dickins if (oldsize > holebegin) 756d0424c42SHugh Dickins unmap_mapping_range(inode->i_mapping, 757d0424c42SHugh Dickins holebegin, 0, 1); 75894c1e62dSHugh Dickins } 7591da177e4SLinus Torvalds } 7601da177e4SLinus Torvalds 7616a1a90adSChristoph Hellwig setattr_copy(inode, attr); 762db78b877SChristoph Hellwig if (attr->ia_valid & ATTR_MODE) 763feda821eSChristoph Hellwig error = posix_acl_chmod(inode, inode->i_mode); 7641da177e4SLinus Torvalds return error; 7651da177e4SLinus Torvalds } 7661da177e4SLinus Torvalds 7671f895f75SAl Viro static void shmem_evict_inode(struct inode *inode) 7681da177e4SLinus Torvalds { 7691da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 7701da177e4SLinus Torvalds 7713889e6e7Snpiggin@suse.de if (inode->i_mapping->a_ops == &shmem_aops) { 7721da177e4SLinus Torvalds shmem_unacct_size(info->flags, inode->i_size); 7731da177e4SLinus Torvalds inode->i_size = 0; 7743889e6e7Snpiggin@suse.de shmem_truncate_range(inode, 0, (loff_t)-1); 7751da177e4SLinus Torvalds if (!list_empty(&info->swaplist)) { 776cb5f7b9aSHugh Dickins mutex_lock(&shmem_swaplist_mutex); 7771da177e4SLinus Torvalds list_del_init(&info->swaplist); 778cb5f7b9aSHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 7791da177e4SLinus Torvalds } 7803ed47db3SAl Viro } 781b09e0fa4SEric Paris 78238f38657SAristeu Rozanski simple_xattrs_free(&info->xattrs); 7830f3c42f5SHugh Dickins WARN_ON(inode->i_blocks); 7845b04c689SPavel Emelyanov shmem_free_inode(inode->i_sb); 785dbd5768fSJan Kara clear_inode(inode); 7861da177e4SLinus Torvalds } 7871da177e4SLinus Torvalds 78846f65ec1SHugh Dickins /* 78946f65ec1SHugh Dickins * If swap found in inode, free it and move page from swapcache to filecache. 79046f65ec1SHugh Dickins */ 79141ffe5d5SHugh Dickins static int shmem_unuse_inode(struct shmem_inode_info *info, 792bde05d1cSHugh Dickins swp_entry_t swap, struct page **pagep) 7931da177e4SLinus Torvalds { 794285b2c4fSHugh Dickins struct address_space *mapping = info->vfs_inode.i_mapping; 79546f65ec1SHugh Dickins void *radswap; 79641ffe5d5SHugh Dickins pgoff_t index; 797bde05d1cSHugh Dickins gfp_t gfp; 798bde05d1cSHugh Dickins int error = 0; 7991da177e4SLinus Torvalds 80046f65ec1SHugh Dickins radswap = swp_to_radix_entry(swap); 801e504f3fdSHugh Dickins index = radix_tree_locate_item(&mapping->page_tree, radswap); 80246f65ec1SHugh Dickins if (index == -1) 80300501b53SJohannes Weiner return -EAGAIN; /* tell shmem_unuse we found nothing */ 8042e0e26c7SHugh Dickins 8051b1b32f2SHugh Dickins /* 8061b1b32f2SHugh Dickins * Move _head_ to start search for next from here. 8071f895f75SAl Viro * But be careful: shmem_evict_inode checks list_empty without taking 8081b1b32f2SHugh Dickins * mutex, and there's an instant in list_move_tail when info->swaplist 809285b2c4fSHugh Dickins * would appear empty, if it were the only one on shmem_swaplist. 8101b1b32f2SHugh Dickins */ 8111b1b32f2SHugh Dickins if (shmem_swaplist.next != &info->swaplist) 8122e0e26c7SHugh Dickins list_move_tail(&shmem_swaplist, &info->swaplist); 8132e0e26c7SHugh Dickins 814bde05d1cSHugh Dickins gfp = mapping_gfp_mask(mapping); 815bde05d1cSHugh Dickins if (shmem_should_replace_page(*pagep, gfp)) { 816bde05d1cSHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 817bde05d1cSHugh Dickins error = shmem_replace_page(pagep, gfp, info, index); 818bde05d1cSHugh Dickins mutex_lock(&shmem_swaplist_mutex); 819bde05d1cSHugh Dickins /* 820bde05d1cSHugh Dickins * We needed to drop mutex to make that restrictive page 8210142ef6cSHugh Dickins * allocation, but the inode might have been freed while we 8220142ef6cSHugh Dickins * dropped it: although a racing shmem_evict_inode() cannot 8230142ef6cSHugh Dickins * complete without emptying the radix_tree, our page lock 8240142ef6cSHugh Dickins * on this swapcache page is not enough to prevent that - 8250142ef6cSHugh Dickins * free_swap_and_cache() of our swap entry will only 8260142ef6cSHugh Dickins * trylock_page(), removing swap from radix_tree whatever. 8270142ef6cSHugh Dickins * 8280142ef6cSHugh Dickins * We must not proceed to shmem_add_to_page_cache() if the 8290142ef6cSHugh Dickins * inode has been freed, but of course we cannot rely on 8300142ef6cSHugh Dickins * inode or mapping or info to check that. However, we can 8310142ef6cSHugh Dickins * safely check if our swap entry is still in use (and here 8320142ef6cSHugh Dickins * it can't have got reused for another page): if it's still 8330142ef6cSHugh Dickins * in use, then the inode cannot have been freed yet, and we 8340142ef6cSHugh Dickins * can safely proceed (if it's no longer in use, that tells 8350142ef6cSHugh Dickins * nothing about the inode, but we don't need to unuse swap). 836bde05d1cSHugh Dickins */ 837bde05d1cSHugh Dickins if (!page_swapcount(*pagep)) 838bde05d1cSHugh Dickins error = -ENOENT; 839bde05d1cSHugh Dickins } 840bde05d1cSHugh Dickins 841d13d1443SKAMEZAWA Hiroyuki /* 842778dd893SHugh Dickins * We rely on shmem_swaplist_mutex, not only to protect the swaplist, 843778dd893SHugh Dickins * but also to hold up shmem_evict_inode(): so inode cannot be freed 844778dd893SHugh Dickins * beneath us (pagelock doesn't help until the page is in pagecache). 845d13d1443SKAMEZAWA Hiroyuki */ 846bde05d1cSHugh Dickins if (!error) 847bde05d1cSHugh Dickins error = shmem_add_to_page_cache(*pagep, mapping, index, 848fed400a1SWang Sheng-Hui radswap); 84948f170fbSHugh Dickins if (error != -ENOMEM) { 85046f65ec1SHugh Dickins /* 85146f65ec1SHugh Dickins * Truncation and eviction use free_swap_and_cache(), which 85246f65ec1SHugh Dickins * only does trylock page: if we raced, best clean up here. 85346f65ec1SHugh Dickins */ 854bde05d1cSHugh Dickins delete_from_swap_cache(*pagep); 855bde05d1cSHugh Dickins set_page_dirty(*pagep); 85646f65ec1SHugh Dickins if (!error) { 85746f65ec1SHugh Dickins spin_lock(&info->lock); 858285b2c4fSHugh Dickins info->swapped--; 85946f65ec1SHugh Dickins spin_unlock(&info->lock); 86041ffe5d5SHugh Dickins swap_free(swap); 86146f65ec1SHugh Dickins } 8621da177e4SLinus Torvalds } 8632e0e26c7SHugh Dickins return error; 8641da177e4SLinus Torvalds } 8651da177e4SLinus Torvalds 8661da177e4SLinus Torvalds /* 86746f65ec1SHugh Dickins * Search through swapped inodes to find and replace swap by page. 8681da177e4SLinus Torvalds */ 86941ffe5d5SHugh Dickins int shmem_unuse(swp_entry_t swap, struct page *page) 8701da177e4SLinus Torvalds { 87141ffe5d5SHugh Dickins struct list_head *this, *next; 8721da177e4SLinus Torvalds struct shmem_inode_info *info; 87300501b53SJohannes Weiner struct mem_cgroup *memcg; 874bde05d1cSHugh Dickins int error = 0; 875bde05d1cSHugh Dickins 876bde05d1cSHugh Dickins /* 877bde05d1cSHugh Dickins * There's a faint possibility that swap page was replaced before 8780142ef6cSHugh Dickins * caller locked it: caller will come back later with the right page. 879bde05d1cSHugh Dickins */ 8800142ef6cSHugh Dickins if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val)) 881bde05d1cSHugh Dickins goto out; 882778dd893SHugh Dickins 883778dd893SHugh Dickins /* 884778dd893SHugh Dickins * Charge page using GFP_KERNEL while we can wait, before taking 885778dd893SHugh Dickins * the shmem_swaplist_mutex which might hold up shmem_writepage(). 886778dd893SHugh Dickins * Charged back to the user (not to caller) when swap account is used. 887778dd893SHugh Dickins */ 888f627c2f5SKirill A. Shutemov error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg, 889f627c2f5SKirill A. Shutemov false); 890778dd893SHugh Dickins if (error) 891778dd893SHugh Dickins goto out; 89246f65ec1SHugh Dickins /* No radix_tree_preload: swap entry keeps a place for page in tree */ 89300501b53SJohannes Weiner error = -EAGAIN; 8941da177e4SLinus Torvalds 895cb5f7b9aSHugh Dickins mutex_lock(&shmem_swaplist_mutex); 89641ffe5d5SHugh Dickins list_for_each_safe(this, next, &shmem_swaplist) { 89741ffe5d5SHugh Dickins info = list_entry(this, struct shmem_inode_info, swaplist); 898285b2c4fSHugh Dickins if (info->swapped) 89900501b53SJohannes Weiner error = shmem_unuse_inode(info, swap, &page); 9006922c0c7SHugh Dickins else 9016922c0c7SHugh Dickins list_del_init(&info->swaplist); 902cb5f7b9aSHugh Dickins cond_resched(); 90300501b53SJohannes Weiner if (error != -EAGAIN) 904778dd893SHugh Dickins break; 90500501b53SJohannes Weiner /* found nothing in this: move on to search the next */ 9061da177e4SLinus Torvalds } 907cb5f7b9aSHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 908778dd893SHugh Dickins 90900501b53SJohannes Weiner if (error) { 91000501b53SJohannes Weiner if (error != -ENOMEM) 91100501b53SJohannes Weiner error = 0; 912f627c2f5SKirill A. Shutemov mem_cgroup_cancel_charge(page, memcg, false); 91300501b53SJohannes Weiner } else 914f627c2f5SKirill A. Shutemov mem_cgroup_commit_charge(page, memcg, true, false); 915778dd893SHugh Dickins out: 916aaa46865SHugh Dickins unlock_page(page); 91709cbfeafSKirill A. Shutemov put_page(page); 918778dd893SHugh Dickins return error; 9191da177e4SLinus Torvalds } 9201da177e4SLinus Torvalds 9211da177e4SLinus Torvalds /* 9221da177e4SLinus Torvalds * Move the page from the page cache to the swap cache. 9231da177e4SLinus Torvalds */ 9241da177e4SLinus Torvalds static int shmem_writepage(struct page *page, struct writeback_control *wbc) 9251da177e4SLinus Torvalds { 9261da177e4SLinus Torvalds struct shmem_inode_info *info; 9271da177e4SLinus Torvalds struct address_space *mapping; 9281da177e4SLinus Torvalds struct inode *inode; 9296922c0c7SHugh Dickins swp_entry_t swap; 9306922c0c7SHugh Dickins pgoff_t index; 9311da177e4SLinus Torvalds 9321da177e4SLinus Torvalds BUG_ON(!PageLocked(page)); 9331da177e4SLinus Torvalds mapping = page->mapping; 9341da177e4SLinus Torvalds index = page->index; 9351da177e4SLinus Torvalds inode = mapping->host; 9361da177e4SLinus Torvalds info = SHMEM_I(inode); 9371da177e4SLinus Torvalds if (info->flags & VM_LOCKED) 9381da177e4SLinus Torvalds goto redirty; 939d9fe526aSHugh Dickins if (!total_swap_pages) 9401da177e4SLinus Torvalds goto redirty; 9411da177e4SLinus Torvalds 942d9fe526aSHugh Dickins /* 94397b713baSChristoph Hellwig * Our capabilities prevent regular writeback or sync from ever calling 94497b713baSChristoph Hellwig * shmem_writepage; but a stacking filesystem might use ->writepage of 94597b713baSChristoph Hellwig * its underlying filesystem, in which case tmpfs should write out to 94697b713baSChristoph Hellwig * swap only in response to memory pressure, and not for the writeback 94797b713baSChristoph Hellwig * threads or sync. 948d9fe526aSHugh Dickins */ 94948f170fbSHugh Dickins if (!wbc->for_reclaim) { 95048f170fbSHugh Dickins WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ 95148f170fbSHugh Dickins goto redirty; 95248f170fbSHugh Dickins } 9531635f6a7SHugh Dickins 9541635f6a7SHugh Dickins /* 9551635f6a7SHugh Dickins * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC 9561635f6a7SHugh Dickins * value into swapfile.c, the only way we can correctly account for a 9571635f6a7SHugh Dickins * fallocated page arriving here is now to initialize it and write it. 9581aac1400SHugh Dickins * 9591aac1400SHugh Dickins * That's okay for a page already fallocated earlier, but if we have 9601aac1400SHugh Dickins * not yet completed the fallocation, then (a) we want to keep track 9611aac1400SHugh Dickins * of this page in case we have to undo it, and (b) it may not be a 9621aac1400SHugh Dickins * good idea to continue anyway, once we're pushing into swap. So 9631aac1400SHugh Dickins * reactivate the page, and let shmem_fallocate() quit when too many. 9641635f6a7SHugh Dickins */ 9651635f6a7SHugh Dickins if (!PageUptodate(page)) { 9661aac1400SHugh Dickins if (inode->i_private) { 9671aac1400SHugh Dickins struct shmem_falloc *shmem_falloc; 9681aac1400SHugh Dickins spin_lock(&inode->i_lock); 9691aac1400SHugh Dickins shmem_falloc = inode->i_private; 9701aac1400SHugh Dickins if (shmem_falloc && 9718e205f77SHugh Dickins !shmem_falloc->waitq && 9721aac1400SHugh Dickins index >= shmem_falloc->start && 9731aac1400SHugh Dickins index < shmem_falloc->next) 9741aac1400SHugh Dickins shmem_falloc->nr_unswapped++; 9751aac1400SHugh Dickins else 9761aac1400SHugh Dickins shmem_falloc = NULL; 9771aac1400SHugh Dickins spin_unlock(&inode->i_lock); 9781aac1400SHugh Dickins if (shmem_falloc) 9791aac1400SHugh Dickins goto redirty; 9801aac1400SHugh Dickins } 9811635f6a7SHugh Dickins clear_highpage(page); 9821635f6a7SHugh Dickins flush_dcache_page(page); 9831635f6a7SHugh Dickins SetPageUptodate(page); 9841635f6a7SHugh Dickins } 9851635f6a7SHugh Dickins 986d9fe526aSHugh Dickins swap = get_swap_page(); 98748f170fbSHugh Dickins if (!swap.val) 98848f170fbSHugh Dickins goto redirty; 989d9fe526aSHugh Dickins 99037e84351SVladimir Davydov if (mem_cgroup_try_charge_swap(page, swap)) 99137e84351SVladimir Davydov goto free_swap; 99237e84351SVladimir Davydov 993b1dea800SHugh Dickins /* 994b1dea800SHugh Dickins * Add inode to shmem_unuse()'s list of swapped-out inodes, 9956922c0c7SHugh Dickins * if it's not already there. Do it now before the page is 9966922c0c7SHugh Dickins * moved to swap cache, when its pagelock no longer protects 997b1dea800SHugh Dickins * the inode from eviction. But don't unlock the mutex until 9986922c0c7SHugh Dickins * we've incremented swapped, because shmem_unuse_inode() will 9996922c0c7SHugh Dickins * prune a !swapped inode from the swaplist under this mutex. 1000b1dea800SHugh Dickins */ 1001b1dea800SHugh Dickins mutex_lock(&shmem_swaplist_mutex); 100205bf86b4SHugh Dickins if (list_empty(&info->swaplist)) 100305bf86b4SHugh Dickins list_add_tail(&info->swaplist, &shmem_swaplist); 1004b1dea800SHugh Dickins 100548f170fbSHugh Dickins if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { 1006267a4c76SHugh Dickins spin_lock(&info->lock); 1007267a4c76SHugh Dickins shmem_recalc_inode(inode); 1008267a4c76SHugh Dickins info->swapped++; 1009267a4c76SHugh Dickins spin_unlock(&info->lock); 1010267a4c76SHugh Dickins 1011aaa46865SHugh Dickins swap_shmem_alloc(swap); 10126922c0c7SHugh Dickins shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); 10136922c0c7SHugh Dickins 10146922c0c7SHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 1015d9fe526aSHugh Dickins BUG_ON(page_mapped(page)); 10169fab5619SHugh Dickins swap_writepage(page, wbc); 10171da177e4SLinus Torvalds return 0; 10181da177e4SLinus Torvalds } 10191da177e4SLinus Torvalds 10206922c0c7SHugh Dickins mutex_unlock(&shmem_swaplist_mutex); 102137e84351SVladimir Davydov free_swap: 10220a31bc97SJohannes Weiner swapcache_free(swap); 10231da177e4SLinus Torvalds redirty: 10241da177e4SLinus Torvalds set_page_dirty(page); 1025d9fe526aSHugh Dickins if (wbc->for_reclaim) 1026d9fe526aSHugh Dickins return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */ 1027d9fe526aSHugh Dickins unlock_page(page); 1028d9fe526aSHugh Dickins return 0; 10291da177e4SLinus Torvalds } 10301da177e4SLinus Torvalds 103175edd345SHugh Dickins #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS) 103271fe804bSLee Schermerhorn static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 1033680d794bSakpm@linux-foundation.org { 1034680d794bSakpm@linux-foundation.org char buffer[64]; 1035680d794bSakpm@linux-foundation.org 103671fe804bSLee Schermerhorn if (!mpol || mpol->mode == MPOL_DEFAULT) 1037095f1fc4SLee Schermerhorn return; /* show nothing */ 1038095f1fc4SLee Schermerhorn 1039a7a88b23SHugh Dickins mpol_to_str(buffer, sizeof(buffer), mpol); 1040095f1fc4SLee Schermerhorn 1041095f1fc4SLee Schermerhorn seq_printf(seq, ",mpol=%s", buffer); 1042680d794bSakpm@linux-foundation.org } 104371fe804bSLee Schermerhorn 104471fe804bSLee Schermerhorn static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 104571fe804bSLee Schermerhorn { 104671fe804bSLee Schermerhorn struct mempolicy *mpol = NULL; 104771fe804bSLee Schermerhorn if (sbinfo->mpol) { 104871fe804bSLee Schermerhorn spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ 104971fe804bSLee Schermerhorn mpol = sbinfo->mpol; 105071fe804bSLee Schermerhorn mpol_get(mpol); 105171fe804bSLee Schermerhorn spin_unlock(&sbinfo->stat_lock); 105271fe804bSLee Schermerhorn } 105371fe804bSLee Schermerhorn return mpol; 105471fe804bSLee Schermerhorn } 105575edd345SHugh Dickins #else /* !CONFIG_NUMA || !CONFIG_TMPFS */ 105675edd345SHugh Dickins static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 105775edd345SHugh Dickins { 105875edd345SHugh Dickins } 105975edd345SHugh Dickins static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 106075edd345SHugh Dickins { 106175edd345SHugh Dickins return NULL; 106275edd345SHugh Dickins } 106375edd345SHugh Dickins #endif /* CONFIG_NUMA && CONFIG_TMPFS */ 106475edd345SHugh Dickins #ifndef CONFIG_NUMA 106575edd345SHugh Dickins #define vm_policy vm_private_data 106675edd345SHugh Dickins #endif 1067680d794bSakpm@linux-foundation.org 106841ffe5d5SHugh Dickins static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, 106941ffe5d5SHugh Dickins struct shmem_inode_info *info, pgoff_t index) 10701da177e4SLinus Torvalds { 10711da177e4SLinus Torvalds struct vm_area_struct pvma; 107218a2f371SMel Gorman struct page *page; 10731da177e4SLinus Torvalds 1074c4cc6d07SHugh Dickins /* Create a pseudo vma that just contains the policy */ 1075c4cc6d07SHugh Dickins pvma.vm_start = 0; 107609c231cbSNathan Zimmer /* Bias interleave by inode number to distribute better across nodes */ 107709c231cbSNathan Zimmer pvma.vm_pgoff = index + info->vfs_inode.i_ino; 1078c4cc6d07SHugh Dickins pvma.vm_ops = NULL; 107941ffe5d5SHugh Dickins pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); 108052cd3b07SLee Schermerhorn 108118a2f371SMel Gorman page = swapin_readahead(swap, gfp, &pvma, 0); 108218a2f371SMel Gorman 108318a2f371SMel Gorman /* Drop reference taken by mpol_shared_policy_lookup() */ 108418a2f371SMel Gorman mpol_cond_put(pvma.vm_policy); 108518a2f371SMel Gorman 108618a2f371SMel Gorman return page; 108718a2f371SMel Gorman } 108818a2f371SMel Gorman 108918a2f371SMel Gorman static struct page *shmem_alloc_page(gfp_t gfp, 109018a2f371SMel Gorman struct shmem_inode_info *info, pgoff_t index) 109118a2f371SMel Gorman { 109218a2f371SMel Gorman struct vm_area_struct pvma; 109318a2f371SMel Gorman struct page *page; 109418a2f371SMel Gorman 109518a2f371SMel Gorman /* Create a pseudo vma that just contains the policy */ 109618a2f371SMel Gorman pvma.vm_start = 0; 109718a2f371SMel Gorman /* Bias interleave by inode number to distribute better across nodes */ 109818a2f371SMel Gorman pvma.vm_pgoff = index + info->vfs_inode.i_ino; 109918a2f371SMel Gorman pvma.vm_ops = NULL; 110018a2f371SMel Gorman pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); 110118a2f371SMel Gorman 110275edd345SHugh Dickins page = alloc_pages_vma(gfp, 0, &pvma, 0, numa_node_id(), false); 110375edd345SHugh Dickins if (page) { 110475edd345SHugh Dickins __SetPageLocked(page); 110575edd345SHugh Dickins __SetPageSwapBacked(page); 110675edd345SHugh Dickins } 110718a2f371SMel Gorman 110818a2f371SMel Gorman /* Drop reference taken by mpol_shared_policy_lookup() */ 110918a2f371SMel Gorman mpol_cond_put(pvma.vm_policy); 111018a2f371SMel Gorman 111118a2f371SMel Gorman return page; 11121da177e4SLinus Torvalds } 111371fe804bSLee Schermerhorn 11141da177e4SLinus Torvalds /* 1115bde05d1cSHugh Dickins * When a page is moved from swapcache to shmem filecache (either by the 1116bde05d1cSHugh Dickins * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of 1117bde05d1cSHugh Dickins * shmem_unuse_inode()), it may have been read in earlier from swap, in 1118bde05d1cSHugh Dickins * ignorance of the mapping it belongs to. If that mapping has special 1119bde05d1cSHugh Dickins * constraints (like the gma500 GEM driver, which requires RAM below 4GB), 1120bde05d1cSHugh Dickins * we may need to copy to a suitable page before moving to filecache. 1121bde05d1cSHugh Dickins * 1122bde05d1cSHugh Dickins * In a future release, this may well be extended to respect cpuset and 1123bde05d1cSHugh Dickins * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); 1124bde05d1cSHugh Dickins * but for now it is a simple matter of zone. 1125bde05d1cSHugh Dickins */ 1126bde05d1cSHugh Dickins static bool shmem_should_replace_page(struct page *page, gfp_t gfp) 1127bde05d1cSHugh Dickins { 1128bde05d1cSHugh Dickins return page_zonenum(page) > gfp_zone(gfp); 1129bde05d1cSHugh Dickins } 1130bde05d1cSHugh Dickins 1131bde05d1cSHugh Dickins static int shmem_replace_page(struct page **pagep, gfp_t gfp, 1132bde05d1cSHugh Dickins struct shmem_inode_info *info, pgoff_t index) 1133bde05d1cSHugh Dickins { 1134bde05d1cSHugh Dickins struct page *oldpage, *newpage; 1135bde05d1cSHugh Dickins struct address_space *swap_mapping; 1136bde05d1cSHugh Dickins pgoff_t swap_index; 1137bde05d1cSHugh Dickins int error; 1138bde05d1cSHugh Dickins 1139bde05d1cSHugh Dickins oldpage = *pagep; 1140bde05d1cSHugh Dickins swap_index = page_private(oldpage); 1141bde05d1cSHugh Dickins swap_mapping = page_mapping(oldpage); 1142bde05d1cSHugh Dickins 1143bde05d1cSHugh Dickins /* 1144bde05d1cSHugh Dickins * We have arrived here because our zones are constrained, so don't 1145bde05d1cSHugh Dickins * limit chance of success by further cpuset and node constraints. 1146bde05d1cSHugh Dickins */ 1147bde05d1cSHugh Dickins gfp &= ~GFP_CONSTRAINT_MASK; 1148bde05d1cSHugh Dickins newpage = shmem_alloc_page(gfp, info, index); 1149bde05d1cSHugh Dickins if (!newpage) 1150bde05d1cSHugh Dickins return -ENOMEM; 1151bde05d1cSHugh Dickins 115209cbfeafSKirill A. Shutemov get_page(newpage); 1153bde05d1cSHugh Dickins copy_highpage(newpage, oldpage); 11540142ef6cSHugh Dickins flush_dcache_page(newpage); 1155bde05d1cSHugh Dickins 1156bde05d1cSHugh Dickins SetPageUptodate(newpage); 1157bde05d1cSHugh Dickins set_page_private(newpage, swap_index); 1158bde05d1cSHugh Dickins SetPageSwapCache(newpage); 1159bde05d1cSHugh Dickins 1160bde05d1cSHugh Dickins /* 1161bde05d1cSHugh Dickins * Our caller will very soon move newpage out of swapcache, but it's 1162bde05d1cSHugh Dickins * a nice clean interface for us to replace oldpage by newpage there. 1163bde05d1cSHugh Dickins */ 1164bde05d1cSHugh Dickins spin_lock_irq(&swap_mapping->tree_lock); 1165bde05d1cSHugh Dickins error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, 1166bde05d1cSHugh Dickins newpage); 11670142ef6cSHugh Dickins if (!error) { 1168bde05d1cSHugh Dickins __inc_zone_page_state(newpage, NR_FILE_PAGES); 1169bde05d1cSHugh Dickins __dec_zone_page_state(oldpage, NR_FILE_PAGES); 11700142ef6cSHugh Dickins } 1171bde05d1cSHugh Dickins spin_unlock_irq(&swap_mapping->tree_lock); 1172bde05d1cSHugh Dickins 11730142ef6cSHugh Dickins if (unlikely(error)) { 11740142ef6cSHugh Dickins /* 11750142ef6cSHugh Dickins * Is this possible? I think not, now that our callers check 11760142ef6cSHugh Dickins * both PageSwapCache and page_private after getting page lock; 11770142ef6cSHugh Dickins * but be defensive. Reverse old to newpage for clear and free. 11780142ef6cSHugh Dickins */ 11790142ef6cSHugh Dickins oldpage = newpage; 11800142ef6cSHugh Dickins } else { 11816a93ca8fSJohannes Weiner mem_cgroup_migrate(oldpage, newpage); 1182bde05d1cSHugh Dickins lru_cache_add_anon(newpage); 11830142ef6cSHugh Dickins *pagep = newpage; 11840142ef6cSHugh Dickins } 1185bde05d1cSHugh Dickins 1186bde05d1cSHugh Dickins ClearPageSwapCache(oldpage); 1187bde05d1cSHugh Dickins set_page_private(oldpage, 0); 1188bde05d1cSHugh Dickins 1189bde05d1cSHugh Dickins unlock_page(oldpage); 119009cbfeafSKirill A. Shutemov put_page(oldpage); 119109cbfeafSKirill A. Shutemov put_page(oldpage); 11920142ef6cSHugh Dickins return error; 1193bde05d1cSHugh Dickins } 1194bde05d1cSHugh Dickins 1195bde05d1cSHugh Dickins /* 119668da9f05SHugh Dickins * shmem_getpage_gfp - find page in cache, or get from swap, or allocate 11971da177e4SLinus Torvalds * 11981da177e4SLinus Torvalds * If we allocate a new one we do not mark it dirty. That's up to the 11991da177e4SLinus Torvalds * vm. If we swap it in we mark it dirty since we also free the swap 12009e18eb29SAndres Lagar-Cavilla * entry since a page cannot live in both the swap and page cache. 12019e18eb29SAndres Lagar-Cavilla * 12029e18eb29SAndres Lagar-Cavilla * fault_mm and fault_type are only supplied by shmem_fault: 12039e18eb29SAndres Lagar-Cavilla * otherwise they are NULL. 12041da177e4SLinus Torvalds */ 120541ffe5d5SHugh Dickins static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 12069e18eb29SAndres Lagar-Cavilla struct page **pagep, enum sgp_type sgp, gfp_t gfp, 12079e18eb29SAndres Lagar-Cavilla struct mm_struct *fault_mm, int *fault_type) 12081da177e4SLinus Torvalds { 12091da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 121054af6042SHugh Dickins struct shmem_inode_info *info; 12111da177e4SLinus Torvalds struct shmem_sb_info *sbinfo; 12129e18eb29SAndres Lagar-Cavilla struct mm_struct *charge_mm; 121300501b53SJohannes Weiner struct mem_cgroup *memcg; 121427ab7006SHugh Dickins struct page *page; 12151da177e4SLinus Torvalds swp_entry_t swap; 12161da177e4SLinus Torvalds int error; 121754af6042SHugh Dickins int once = 0; 12181635f6a7SHugh Dickins int alloced = 0; 12191da177e4SLinus Torvalds 122009cbfeafSKirill A. Shutemov if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT)) 12211da177e4SLinus Torvalds return -EFBIG; 12221da177e4SLinus Torvalds repeat: 122354af6042SHugh Dickins swap.val = 0; 12240cd6144aSJohannes Weiner page = find_lock_entry(mapping, index); 122554af6042SHugh Dickins if (radix_tree_exceptional_entry(page)) { 122654af6042SHugh Dickins swap = radix_to_swp_entry(page); 122754af6042SHugh Dickins page = NULL; 122854af6042SHugh Dickins } 122954af6042SHugh Dickins 123075edd345SHugh Dickins if (sgp <= SGP_CACHE && 123109cbfeafSKirill A. Shutemov ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 123254af6042SHugh Dickins error = -EINVAL; 1233267a4c76SHugh Dickins goto unlock; 123454af6042SHugh Dickins } 123554af6042SHugh Dickins 123666d2f4d2SHugh Dickins if (page && sgp == SGP_WRITE) 123766d2f4d2SHugh Dickins mark_page_accessed(page); 123866d2f4d2SHugh Dickins 12391635f6a7SHugh Dickins /* fallocated page? */ 12401635f6a7SHugh Dickins if (page && !PageUptodate(page)) { 12411635f6a7SHugh Dickins if (sgp != SGP_READ) 12421635f6a7SHugh Dickins goto clear; 12431635f6a7SHugh Dickins unlock_page(page); 124409cbfeafSKirill A. Shutemov put_page(page); 12451635f6a7SHugh Dickins page = NULL; 12461635f6a7SHugh Dickins } 124754af6042SHugh Dickins if (page || (sgp == SGP_READ && !swap.val)) { 124854af6042SHugh Dickins *pagep = page; 124954af6042SHugh Dickins return 0; 125027ab7006SHugh Dickins } 125127ab7006SHugh Dickins 1252b409f9fcSHugh Dickins /* 125354af6042SHugh Dickins * Fast cache lookup did not find it: 125454af6042SHugh Dickins * bring it back from swap or allocate. 1255b409f9fcSHugh Dickins */ 125654af6042SHugh Dickins info = SHMEM_I(inode); 125754af6042SHugh Dickins sbinfo = SHMEM_SB(inode->i_sb); 12589e18eb29SAndres Lagar-Cavilla charge_mm = fault_mm ? : current->mm; 125927ab7006SHugh Dickins 12601da177e4SLinus Torvalds if (swap.val) { 12611da177e4SLinus Torvalds /* Look it up and read it in.. */ 126227ab7006SHugh Dickins page = lookup_swap_cache(swap); 126327ab7006SHugh Dickins if (!page) { 12649e18eb29SAndres Lagar-Cavilla /* Or update major stats only when swapin succeeds?? */ 12659e18eb29SAndres Lagar-Cavilla if (fault_type) { 126668da9f05SHugh Dickins *fault_type |= VM_FAULT_MAJOR; 12679e18eb29SAndres Lagar-Cavilla count_vm_event(PGMAJFAULT); 12689e18eb29SAndres Lagar-Cavilla mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT); 12699e18eb29SAndres Lagar-Cavilla } 12709e18eb29SAndres Lagar-Cavilla /* Here we actually start the io */ 127141ffe5d5SHugh Dickins page = shmem_swapin(swap, gfp, info, index); 127227ab7006SHugh Dickins if (!page) { 12731da177e4SLinus Torvalds error = -ENOMEM; 127454af6042SHugh Dickins goto failed; 1275285b2c4fSHugh Dickins } 12761da177e4SLinus Torvalds } 12771da177e4SLinus Torvalds 12781da177e4SLinus Torvalds /* We have to do this with page locked to prevent races */ 127954af6042SHugh Dickins lock_page(page); 12800142ef6cSHugh Dickins if (!PageSwapCache(page) || page_private(page) != swap.val || 1281d1899228SHugh Dickins !shmem_confirm_swap(mapping, index, swap)) { 1282bde05d1cSHugh Dickins error = -EEXIST; /* try again */ 1283d1899228SHugh Dickins goto unlock; 1284bde05d1cSHugh Dickins } 128527ab7006SHugh Dickins if (!PageUptodate(page)) { 12861da177e4SLinus Torvalds error = -EIO; 128754af6042SHugh Dickins goto failed; 128854af6042SHugh Dickins } 128954af6042SHugh Dickins wait_on_page_writeback(page); 129054af6042SHugh Dickins 1291bde05d1cSHugh Dickins if (shmem_should_replace_page(page, gfp)) { 1292bde05d1cSHugh Dickins error = shmem_replace_page(&page, gfp, info, index); 1293bde05d1cSHugh Dickins if (error) 129454af6042SHugh Dickins goto failed; 12951da177e4SLinus Torvalds } 12961da177e4SLinus Torvalds 12979e18eb29SAndres Lagar-Cavilla error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, 1298f627c2f5SKirill A. Shutemov false); 1299d1899228SHugh Dickins if (!error) { 130054af6042SHugh Dickins error = shmem_add_to_page_cache(page, mapping, index, 1301fed400a1SWang Sheng-Hui swp_to_radix_entry(swap)); 1302215c02bcSHugh Dickins /* 1303215c02bcSHugh Dickins * We already confirmed swap under page lock, and make 1304215c02bcSHugh Dickins * no memory allocation here, so usually no possibility 1305215c02bcSHugh Dickins * of error; but free_swap_and_cache() only trylocks a 1306215c02bcSHugh Dickins * page, so it is just possible that the entry has been 1307215c02bcSHugh Dickins * truncated or holepunched since swap was confirmed. 1308215c02bcSHugh Dickins * shmem_undo_range() will have done some of the 1309215c02bcSHugh Dickins * unaccounting, now delete_from_swap_cache() will do 131093aa7d95SVladimir Davydov * the rest. 1311215c02bcSHugh Dickins * Reset swap.val? No, leave it so "failed" goes back to 1312215c02bcSHugh Dickins * "repeat": reading a hole and writing should succeed. 1313215c02bcSHugh Dickins */ 131400501b53SJohannes Weiner if (error) { 1315f627c2f5SKirill A. Shutemov mem_cgroup_cancel_charge(page, memcg, false); 1316215c02bcSHugh Dickins delete_from_swap_cache(page); 1317d1899228SHugh Dickins } 131800501b53SJohannes Weiner } 131954af6042SHugh Dickins if (error) 132054af6042SHugh Dickins goto failed; 132154af6042SHugh Dickins 1322f627c2f5SKirill A. Shutemov mem_cgroup_commit_charge(page, memcg, true, false); 132300501b53SJohannes Weiner 132454af6042SHugh Dickins spin_lock(&info->lock); 132554af6042SHugh Dickins info->swapped--; 132654af6042SHugh Dickins shmem_recalc_inode(inode); 13271da177e4SLinus Torvalds spin_unlock(&info->lock); 132827ab7006SHugh Dickins 132966d2f4d2SHugh Dickins if (sgp == SGP_WRITE) 133066d2f4d2SHugh Dickins mark_page_accessed(page); 133166d2f4d2SHugh Dickins 133227ab7006SHugh Dickins delete_from_swap_cache(page); 133327ab7006SHugh Dickins set_page_dirty(page); 133427ab7006SHugh Dickins swap_free(swap); 133527ab7006SHugh Dickins 133654af6042SHugh Dickins } else { 133754af6042SHugh Dickins if (shmem_acct_block(info->flags)) { 133854af6042SHugh Dickins error = -ENOSPC; 133954af6042SHugh Dickins goto failed; 13401da177e4SLinus Torvalds } 13410edd73b3SHugh Dickins if (sbinfo->max_blocks) { 1342fc5da22aSHugh Dickins if (percpu_counter_compare(&sbinfo->used_blocks, 134354af6042SHugh Dickins sbinfo->max_blocks) >= 0) { 134454af6042SHugh Dickins error = -ENOSPC; 134554af6042SHugh Dickins goto unacct; 134654af6042SHugh Dickins } 13477e496299STim Chen percpu_counter_inc(&sbinfo->used_blocks); 134859a16eadSHugh Dickins } 13491da177e4SLinus Torvalds 135054af6042SHugh Dickins page = shmem_alloc_page(gfp, info, index); 135154af6042SHugh Dickins if (!page) { 135254af6042SHugh Dickins error = -ENOMEM; 135354af6042SHugh Dickins goto decused; 135454af6042SHugh Dickins } 135566d2f4d2SHugh Dickins if (sgp == SGP_WRITE) 1356eb39d618SHugh Dickins __SetPageReferenced(page); 135766d2f4d2SHugh Dickins 13589e18eb29SAndres Lagar-Cavilla error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, 1359f627c2f5SKirill A. Shutemov false); 136054af6042SHugh Dickins if (error) 136154af6042SHugh Dickins goto decused; 13625e4c0d97SJan Kara error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); 1363b065b432SHugh Dickins if (!error) { 1364b065b432SHugh Dickins error = shmem_add_to_page_cache(page, mapping, index, 1365fed400a1SWang Sheng-Hui NULL); 1366b065b432SHugh Dickins radix_tree_preload_end(); 1367b065b432SHugh Dickins } 1368b065b432SHugh Dickins if (error) { 1369f627c2f5SKirill A. Shutemov mem_cgroup_cancel_charge(page, memcg, false); 1370b065b432SHugh Dickins goto decused; 1371b065b432SHugh Dickins } 1372f627c2f5SKirill A. Shutemov mem_cgroup_commit_charge(page, memcg, false, false); 137354af6042SHugh Dickins lru_cache_add_anon(page); 137454af6042SHugh Dickins 137554af6042SHugh Dickins spin_lock(&info->lock); 13761da177e4SLinus Torvalds info->alloced++; 137754af6042SHugh Dickins inode->i_blocks += BLOCKS_PER_PAGE; 137854af6042SHugh Dickins shmem_recalc_inode(inode); 137959a16eadSHugh Dickins spin_unlock(&info->lock); 13801635f6a7SHugh Dickins alloced = true; 138154af6042SHugh Dickins 1382ec9516fbSHugh Dickins /* 13831635f6a7SHugh Dickins * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. 13841635f6a7SHugh Dickins */ 13851635f6a7SHugh Dickins if (sgp == SGP_FALLOC) 13861635f6a7SHugh Dickins sgp = SGP_WRITE; 13871635f6a7SHugh Dickins clear: 13881635f6a7SHugh Dickins /* 13891635f6a7SHugh Dickins * Let SGP_WRITE caller clear ends if write does not fill page; 13901635f6a7SHugh Dickins * but SGP_FALLOC on a page fallocated earlier must initialize 13911635f6a7SHugh Dickins * it now, lest undo on failure cancel our earlier guarantee. 1392ec9516fbSHugh Dickins */ 1393ec9516fbSHugh Dickins if (sgp != SGP_WRITE) { 139427ab7006SHugh Dickins clear_highpage(page); 139527ab7006SHugh Dickins flush_dcache_page(page); 139627ab7006SHugh Dickins SetPageUptodate(page); 1397ec9516fbSHugh Dickins } 13981da177e4SLinus Torvalds } 1399bde05d1cSHugh Dickins 140054af6042SHugh Dickins /* Perhaps the file has been truncated since we checked */ 140175edd345SHugh Dickins if (sgp <= SGP_CACHE && 140209cbfeafSKirill A. Shutemov ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 1403267a4c76SHugh Dickins if (alloced) { 1404267a4c76SHugh Dickins ClearPageDirty(page); 1405267a4c76SHugh Dickins delete_from_page_cache(page); 1406267a4c76SHugh Dickins spin_lock(&info->lock); 1407267a4c76SHugh Dickins shmem_recalc_inode(inode); 1408267a4c76SHugh Dickins spin_unlock(&info->lock); 1409267a4c76SHugh Dickins } 141054af6042SHugh Dickins error = -EINVAL; 1411267a4c76SHugh Dickins goto unlock; 1412ff36b801SShaohua Li } 141354af6042SHugh Dickins *pagep = page; 141454af6042SHugh Dickins return 0; 1415d00806b1SNick Piggin 1416d0217ac0SNick Piggin /* 141754af6042SHugh Dickins * Error recovery. 14181da177e4SLinus Torvalds */ 141954af6042SHugh Dickins decused: 142054af6042SHugh Dickins if (sbinfo->max_blocks) 142154af6042SHugh Dickins percpu_counter_add(&sbinfo->used_blocks, -1); 142254af6042SHugh Dickins unacct: 142354af6042SHugh Dickins shmem_unacct_blocks(info->flags, 1); 142454af6042SHugh Dickins failed: 1425267a4c76SHugh Dickins if (swap.val && !shmem_confirm_swap(mapping, index, swap)) 142654af6042SHugh Dickins error = -EEXIST; 1427d1899228SHugh Dickins unlock: 142827ab7006SHugh Dickins if (page) { 142954af6042SHugh Dickins unlock_page(page); 143009cbfeafSKirill A. Shutemov put_page(page); 143154af6042SHugh Dickins } 143254af6042SHugh Dickins if (error == -ENOSPC && !once++) { 143354af6042SHugh Dickins info = SHMEM_I(inode); 143454af6042SHugh Dickins spin_lock(&info->lock); 143554af6042SHugh Dickins shmem_recalc_inode(inode); 143654af6042SHugh Dickins spin_unlock(&info->lock); 14371da177e4SLinus Torvalds goto repeat; 1438d8dc74f2SAdrian Bunk } 1439d1899228SHugh Dickins if (error == -EEXIST) /* from above or from radix_tree_insert */ 144054af6042SHugh Dickins goto repeat; 144154af6042SHugh Dickins return error; 14421da177e4SLinus Torvalds } 14431da177e4SLinus Torvalds 14441da177e4SLinus Torvalds static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 14451da177e4SLinus Torvalds { 1446496ad9aaSAl Viro struct inode *inode = file_inode(vma->vm_file); 14479e18eb29SAndres Lagar-Cavilla gfp_t gfp = mapping_gfp_mask(inode->i_mapping); 14481da177e4SLinus Torvalds int error; 144968da9f05SHugh Dickins int ret = VM_FAULT_LOCKED; 14501da177e4SLinus Torvalds 1451f00cdc6dSHugh Dickins /* 1452f00cdc6dSHugh Dickins * Trinity finds that probing a hole which tmpfs is punching can 1453f00cdc6dSHugh Dickins * prevent the hole-punch from ever completing: which in turn 1454f00cdc6dSHugh Dickins * locks writers out with its hold on i_mutex. So refrain from 14558e205f77SHugh Dickins * faulting pages into the hole while it's being punched. Although 14568e205f77SHugh Dickins * shmem_undo_range() does remove the additions, it may be unable to 14578e205f77SHugh Dickins * keep up, as each new page needs its own unmap_mapping_range() call, 14588e205f77SHugh Dickins * and the i_mmap tree grows ever slower to scan if new vmas are added. 14598e205f77SHugh Dickins * 14608e205f77SHugh Dickins * It does not matter if we sometimes reach this check just before the 14618e205f77SHugh Dickins * hole-punch begins, so that one fault then races with the punch: 14628e205f77SHugh Dickins * we just need to make racing faults a rare case. 14638e205f77SHugh Dickins * 14648e205f77SHugh Dickins * The implementation below would be much simpler if we just used a 14658e205f77SHugh Dickins * standard mutex or completion: but we cannot take i_mutex in fault, 14668e205f77SHugh Dickins * and bloating every shmem inode for this unlikely case would be sad. 1467f00cdc6dSHugh Dickins */ 1468f00cdc6dSHugh Dickins if (unlikely(inode->i_private)) { 1469f00cdc6dSHugh Dickins struct shmem_falloc *shmem_falloc; 1470f00cdc6dSHugh Dickins 1471f00cdc6dSHugh Dickins spin_lock(&inode->i_lock); 1472f00cdc6dSHugh Dickins shmem_falloc = inode->i_private; 14738e205f77SHugh Dickins if (shmem_falloc && 14748e205f77SHugh Dickins shmem_falloc->waitq && 14758e205f77SHugh Dickins vmf->pgoff >= shmem_falloc->start && 14768e205f77SHugh Dickins vmf->pgoff < shmem_falloc->next) { 14778e205f77SHugh Dickins wait_queue_head_t *shmem_falloc_waitq; 14788e205f77SHugh Dickins DEFINE_WAIT(shmem_fault_wait); 14798e205f77SHugh Dickins 14808e205f77SHugh Dickins ret = VM_FAULT_NOPAGE; 1481f00cdc6dSHugh Dickins if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && 1482f00cdc6dSHugh Dickins !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { 14838e205f77SHugh Dickins /* It's polite to up mmap_sem if we can */ 1484f00cdc6dSHugh Dickins up_read(&vma->vm_mm->mmap_sem); 14858e205f77SHugh Dickins ret = VM_FAULT_RETRY; 1486f00cdc6dSHugh Dickins } 14878e205f77SHugh Dickins 14888e205f77SHugh Dickins shmem_falloc_waitq = shmem_falloc->waitq; 14898e205f77SHugh Dickins prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, 14908e205f77SHugh Dickins TASK_UNINTERRUPTIBLE); 14918e205f77SHugh Dickins spin_unlock(&inode->i_lock); 14928e205f77SHugh Dickins schedule(); 14938e205f77SHugh Dickins 14948e205f77SHugh Dickins /* 14958e205f77SHugh Dickins * shmem_falloc_waitq points into the shmem_fallocate() 14968e205f77SHugh Dickins * stack of the hole-punching task: shmem_falloc_waitq 14978e205f77SHugh Dickins * is usually invalid by the time we reach here, but 14988e205f77SHugh Dickins * finish_wait() does not dereference it in that case; 14998e205f77SHugh Dickins * though i_lock needed lest racing with wake_up_all(). 15008e205f77SHugh Dickins */ 15018e205f77SHugh Dickins spin_lock(&inode->i_lock); 15028e205f77SHugh Dickins finish_wait(shmem_falloc_waitq, &shmem_fault_wait); 15038e205f77SHugh Dickins spin_unlock(&inode->i_lock); 15048e205f77SHugh Dickins return ret; 1505f00cdc6dSHugh Dickins } 15068e205f77SHugh Dickins spin_unlock(&inode->i_lock); 1507f00cdc6dSHugh Dickins } 1508f00cdc6dSHugh Dickins 15099e18eb29SAndres Lagar-Cavilla error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE, 15109e18eb29SAndres Lagar-Cavilla gfp, vma->vm_mm, &ret); 15111da177e4SLinus Torvalds if (error) 15121da177e4SLinus Torvalds return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 151368da9f05SHugh Dickins return ret; 15141da177e4SLinus Torvalds } 15151da177e4SLinus Torvalds 15161da177e4SLinus Torvalds #ifdef CONFIG_NUMA 151741ffe5d5SHugh Dickins static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) 15181da177e4SLinus Torvalds { 1519496ad9aaSAl Viro struct inode *inode = file_inode(vma->vm_file); 152041ffe5d5SHugh Dickins return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); 15211da177e4SLinus Torvalds } 15221da177e4SLinus Torvalds 1523d8dc74f2SAdrian Bunk static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 1524d8dc74f2SAdrian Bunk unsigned long addr) 15251da177e4SLinus Torvalds { 1526496ad9aaSAl Viro struct inode *inode = file_inode(vma->vm_file); 152741ffe5d5SHugh Dickins pgoff_t index; 15281da177e4SLinus Torvalds 152941ffe5d5SHugh Dickins index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 153041ffe5d5SHugh Dickins return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); 15311da177e4SLinus Torvalds } 15321da177e4SLinus Torvalds #endif 15331da177e4SLinus Torvalds 15341da177e4SLinus Torvalds int shmem_lock(struct file *file, int lock, struct user_struct *user) 15351da177e4SLinus Torvalds { 1536496ad9aaSAl Viro struct inode *inode = file_inode(file); 15371da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 15381da177e4SLinus Torvalds int retval = -ENOMEM; 15391da177e4SLinus Torvalds 15401da177e4SLinus Torvalds spin_lock(&info->lock); 15411da177e4SLinus Torvalds if (lock && !(info->flags & VM_LOCKED)) { 15421da177e4SLinus Torvalds if (!user_shm_lock(inode->i_size, user)) 15431da177e4SLinus Torvalds goto out_nomem; 15441da177e4SLinus Torvalds info->flags |= VM_LOCKED; 154589e004eaSLee Schermerhorn mapping_set_unevictable(file->f_mapping); 15461da177e4SLinus Torvalds } 15471da177e4SLinus Torvalds if (!lock && (info->flags & VM_LOCKED) && user) { 15481da177e4SLinus Torvalds user_shm_unlock(inode->i_size, user); 15491da177e4SLinus Torvalds info->flags &= ~VM_LOCKED; 155089e004eaSLee Schermerhorn mapping_clear_unevictable(file->f_mapping); 15511da177e4SLinus Torvalds } 15521da177e4SLinus Torvalds retval = 0; 155389e004eaSLee Schermerhorn 15541da177e4SLinus Torvalds out_nomem: 15551da177e4SLinus Torvalds spin_unlock(&info->lock); 15561da177e4SLinus Torvalds return retval; 15571da177e4SLinus Torvalds } 15581da177e4SLinus Torvalds 15599b83a6a8SAdrian Bunk static int shmem_mmap(struct file *file, struct vm_area_struct *vma) 15601da177e4SLinus Torvalds { 15611da177e4SLinus Torvalds file_accessed(file); 15621da177e4SLinus Torvalds vma->vm_ops = &shmem_vm_ops; 15631da177e4SLinus Torvalds return 0; 15641da177e4SLinus Torvalds } 15651da177e4SLinus Torvalds 1566454abafeSDmitry Monakhov static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir, 156709208d15SAl Viro umode_t mode, dev_t dev, unsigned long flags) 15681da177e4SLinus Torvalds { 15691da177e4SLinus Torvalds struct inode *inode; 15701da177e4SLinus Torvalds struct shmem_inode_info *info; 15711da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 15721da177e4SLinus Torvalds 15735b04c689SPavel Emelyanov if (shmem_reserve_inode(sb)) 15741da177e4SLinus Torvalds return NULL; 15751da177e4SLinus Torvalds 15761da177e4SLinus Torvalds inode = new_inode(sb); 15771da177e4SLinus Torvalds if (inode) { 157885fe4025SChristoph Hellwig inode->i_ino = get_next_ino(); 1579454abafeSDmitry Monakhov inode_init_owner(inode, dir, mode); 15801da177e4SLinus Torvalds inode->i_blocks = 0; 15811da177e4SLinus Torvalds inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 158291828a40SDavid M. Grimes inode->i_generation = get_seconds(); 15831da177e4SLinus Torvalds info = SHMEM_I(inode); 15841da177e4SLinus Torvalds memset(info, 0, (char *)inode - (char *)info); 15851da177e4SLinus Torvalds spin_lock_init(&info->lock); 158640e041a2SDavid Herrmann info->seals = F_SEAL_SEAL; 15870b0a0806SHugh Dickins info->flags = flags & VM_NORESERVE; 15881da177e4SLinus Torvalds INIT_LIST_HEAD(&info->swaplist); 158938f38657SAristeu Rozanski simple_xattrs_init(&info->xattrs); 159072c04902SAl Viro cache_no_acl(inode); 15911da177e4SLinus Torvalds 15921da177e4SLinus Torvalds switch (mode & S_IFMT) { 15931da177e4SLinus Torvalds default: 159439f0247dSAndreas Gruenbacher inode->i_op = &shmem_special_inode_operations; 15951da177e4SLinus Torvalds init_special_inode(inode, mode, dev); 15961da177e4SLinus Torvalds break; 15971da177e4SLinus Torvalds case S_IFREG: 159814fcc23fSHugh Dickins inode->i_mapping->a_ops = &shmem_aops; 15991da177e4SLinus Torvalds inode->i_op = &shmem_inode_operations; 16001da177e4SLinus Torvalds inode->i_fop = &shmem_file_operations; 160171fe804bSLee Schermerhorn mpol_shared_policy_init(&info->policy, 160271fe804bSLee Schermerhorn shmem_get_sbmpol(sbinfo)); 16031da177e4SLinus Torvalds break; 16041da177e4SLinus Torvalds case S_IFDIR: 1605d8c76e6fSDave Hansen inc_nlink(inode); 16061da177e4SLinus Torvalds /* Some things misbehave if size == 0 on a directory */ 16071da177e4SLinus Torvalds inode->i_size = 2 * BOGO_DIRENT_SIZE; 16081da177e4SLinus Torvalds inode->i_op = &shmem_dir_inode_operations; 16091da177e4SLinus Torvalds inode->i_fop = &simple_dir_operations; 16101da177e4SLinus Torvalds break; 16111da177e4SLinus Torvalds case S_IFLNK: 16121da177e4SLinus Torvalds /* 16131da177e4SLinus Torvalds * Must not load anything in the rbtree, 16141da177e4SLinus Torvalds * mpol_free_shared_policy will not be called. 16151da177e4SLinus Torvalds */ 161671fe804bSLee Schermerhorn mpol_shared_policy_init(&info->policy, NULL); 16171da177e4SLinus Torvalds break; 16181da177e4SLinus Torvalds } 16195b04c689SPavel Emelyanov } else 16205b04c689SPavel Emelyanov shmem_free_inode(sb); 16211da177e4SLinus Torvalds return inode; 16221da177e4SLinus Torvalds } 16231da177e4SLinus Torvalds 16240cd6144aSJohannes Weiner bool shmem_mapping(struct address_space *mapping) 16250cd6144aSJohannes Weiner { 1626f0774d88SSasha Levin if (!mapping->host) 1627f0774d88SSasha Levin return false; 1628f0774d88SSasha Levin 162997b713baSChristoph Hellwig return mapping->host->i_sb->s_op == &shmem_ops; 16300cd6144aSJohannes Weiner } 16310cd6144aSJohannes Weiner 16321da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 163392e1d5beSArjan van de Ven static const struct inode_operations shmem_symlink_inode_operations; 163469f07ec9SHugh Dickins static const struct inode_operations shmem_short_symlink_operations; 16351da177e4SLinus Torvalds 16366d9d88d0SJarkko Sakkinen #ifdef CONFIG_TMPFS_XATTR 16376d9d88d0SJarkko Sakkinen static int shmem_initxattrs(struct inode *, const struct xattr *, void *); 16386d9d88d0SJarkko Sakkinen #else 16396d9d88d0SJarkko Sakkinen #define shmem_initxattrs NULL 16406d9d88d0SJarkko Sakkinen #endif 16416d9d88d0SJarkko Sakkinen 16421da177e4SLinus Torvalds static int 1643800d15a5SNick Piggin shmem_write_begin(struct file *file, struct address_space *mapping, 1644800d15a5SNick Piggin loff_t pos, unsigned len, unsigned flags, 1645800d15a5SNick Piggin struct page **pagep, void **fsdata) 16461da177e4SLinus Torvalds { 1647800d15a5SNick Piggin struct inode *inode = mapping->host; 164840e041a2SDavid Herrmann struct shmem_inode_info *info = SHMEM_I(inode); 164909cbfeafSKirill A. Shutemov pgoff_t index = pos >> PAGE_SHIFT; 165040e041a2SDavid Herrmann 165140e041a2SDavid Herrmann /* i_mutex is held by caller */ 165240e041a2SDavid Herrmann if (unlikely(info->seals)) { 165340e041a2SDavid Herrmann if (info->seals & F_SEAL_WRITE) 165440e041a2SDavid Herrmann return -EPERM; 165540e041a2SDavid Herrmann if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) 165640e041a2SDavid Herrmann return -EPERM; 165740e041a2SDavid Herrmann } 165840e041a2SDavid Herrmann 16599e18eb29SAndres Lagar-Cavilla return shmem_getpage(inode, index, pagep, SGP_WRITE); 1660800d15a5SNick Piggin } 1661800d15a5SNick Piggin 1662800d15a5SNick Piggin static int 1663800d15a5SNick Piggin shmem_write_end(struct file *file, struct address_space *mapping, 1664800d15a5SNick Piggin loff_t pos, unsigned len, unsigned copied, 1665800d15a5SNick Piggin struct page *page, void *fsdata) 1666800d15a5SNick Piggin { 1667800d15a5SNick Piggin struct inode *inode = mapping->host; 1668800d15a5SNick Piggin 1669800d15a5SNick Piggin if (pos + copied > inode->i_size) 1670800d15a5SNick Piggin i_size_write(inode, pos + copied); 1671800d15a5SNick Piggin 1672ec9516fbSHugh Dickins if (!PageUptodate(page)) { 167309cbfeafSKirill A. Shutemov if (copied < PAGE_SIZE) { 167409cbfeafSKirill A. Shutemov unsigned from = pos & (PAGE_SIZE - 1); 1675ec9516fbSHugh Dickins zero_user_segments(page, 0, from, 167609cbfeafSKirill A. Shutemov from + copied, PAGE_SIZE); 1677ec9516fbSHugh Dickins } 1678ec9516fbSHugh Dickins SetPageUptodate(page); 1679ec9516fbSHugh Dickins } 1680d3602444SHugh Dickins set_page_dirty(page); 16816746aff7SWu Fengguang unlock_page(page); 168209cbfeafSKirill A. Shutemov put_page(page); 1683d3602444SHugh Dickins 1684800d15a5SNick Piggin return copied; 16851da177e4SLinus Torvalds } 16861da177e4SLinus Torvalds 16872ba5bbedSAl Viro static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 16881da177e4SLinus Torvalds { 16896e58e79dSAl Viro struct file *file = iocb->ki_filp; 16906e58e79dSAl Viro struct inode *inode = file_inode(file); 16911da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 169241ffe5d5SHugh Dickins pgoff_t index; 169341ffe5d5SHugh Dickins unsigned long offset; 1694a0ee5ec5SHugh Dickins enum sgp_type sgp = SGP_READ; 1695f7c1d074SGeert Uytterhoeven int error = 0; 1696cb66a7a1SAl Viro ssize_t retval = 0; 16976e58e79dSAl Viro loff_t *ppos = &iocb->ki_pos; 1698a0ee5ec5SHugh Dickins 1699a0ee5ec5SHugh Dickins /* 1700a0ee5ec5SHugh Dickins * Might this read be for a stacking filesystem? Then when reading 1701a0ee5ec5SHugh Dickins * holes of a sparse file, we actually need to allocate those pages, 1702a0ee5ec5SHugh Dickins * and even mark them dirty, so it cannot exceed the max_blocks limit. 1703a0ee5ec5SHugh Dickins */ 1704777eda2cSAl Viro if (!iter_is_iovec(to)) 170575edd345SHugh Dickins sgp = SGP_CACHE; 17061da177e4SLinus Torvalds 170709cbfeafSKirill A. Shutemov index = *ppos >> PAGE_SHIFT; 170809cbfeafSKirill A. Shutemov offset = *ppos & ~PAGE_MASK; 17091da177e4SLinus Torvalds 17101da177e4SLinus Torvalds for (;;) { 17111da177e4SLinus Torvalds struct page *page = NULL; 171241ffe5d5SHugh Dickins pgoff_t end_index; 171341ffe5d5SHugh Dickins unsigned long nr, ret; 17141da177e4SLinus Torvalds loff_t i_size = i_size_read(inode); 17151da177e4SLinus Torvalds 171609cbfeafSKirill A. Shutemov end_index = i_size >> PAGE_SHIFT; 17171da177e4SLinus Torvalds if (index > end_index) 17181da177e4SLinus Torvalds break; 17191da177e4SLinus Torvalds if (index == end_index) { 172009cbfeafSKirill A. Shutemov nr = i_size & ~PAGE_MASK; 17211da177e4SLinus Torvalds if (nr <= offset) 17221da177e4SLinus Torvalds break; 17231da177e4SLinus Torvalds } 17241da177e4SLinus Torvalds 17259e18eb29SAndres Lagar-Cavilla error = shmem_getpage(inode, index, &page, sgp); 17266e58e79dSAl Viro if (error) { 17276e58e79dSAl Viro if (error == -EINVAL) 17286e58e79dSAl Viro error = 0; 17291da177e4SLinus Torvalds break; 17301da177e4SLinus Torvalds } 173175edd345SHugh Dickins if (page) { 173275edd345SHugh Dickins if (sgp == SGP_CACHE) 173375edd345SHugh Dickins set_page_dirty(page); 1734d3602444SHugh Dickins unlock_page(page); 173575edd345SHugh Dickins } 17361da177e4SLinus Torvalds 17371da177e4SLinus Torvalds /* 17381da177e4SLinus Torvalds * We must evaluate after, since reads (unlike writes) 17391b1dcc1bSJes Sorensen * are called without i_mutex protection against truncate 17401da177e4SLinus Torvalds */ 174109cbfeafSKirill A. Shutemov nr = PAGE_SIZE; 17421da177e4SLinus Torvalds i_size = i_size_read(inode); 174309cbfeafSKirill A. Shutemov end_index = i_size >> PAGE_SHIFT; 17441da177e4SLinus Torvalds if (index == end_index) { 174509cbfeafSKirill A. Shutemov nr = i_size & ~PAGE_MASK; 17461da177e4SLinus Torvalds if (nr <= offset) { 17471da177e4SLinus Torvalds if (page) 174809cbfeafSKirill A. Shutemov put_page(page); 17491da177e4SLinus Torvalds break; 17501da177e4SLinus Torvalds } 17511da177e4SLinus Torvalds } 17521da177e4SLinus Torvalds nr -= offset; 17531da177e4SLinus Torvalds 17541da177e4SLinus Torvalds if (page) { 17551da177e4SLinus Torvalds /* 17561da177e4SLinus Torvalds * If users can be writing to this page using arbitrary 17571da177e4SLinus Torvalds * virtual addresses, take care about potential aliasing 17581da177e4SLinus Torvalds * before reading the page on the kernel side. 17591da177e4SLinus Torvalds */ 17601da177e4SLinus Torvalds if (mapping_writably_mapped(mapping)) 17611da177e4SLinus Torvalds flush_dcache_page(page); 17621da177e4SLinus Torvalds /* 17631da177e4SLinus Torvalds * Mark the page accessed if we read the beginning. 17641da177e4SLinus Torvalds */ 17651da177e4SLinus Torvalds if (!offset) 17661da177e4SLinus Torvalds mark_page_accessed(page); 1767b5810039SNick Piggin } else { 17681da177e4SLinus Torvalds page = ZERO_PAGE(0); 176909cbfeafSKirill A. Shutemov get_page(page); 1770b5810039SNick Piggin } 17711da177e4SLinus Torvalds 17721da177e4SLinus Torvalds /* 17731da177e4SLinus Torvalds * Ok, we have the page, and it's up-to-date, so 17741da177e4SLinus Torvalds * now we can copy it to user space... 17751da177e4SLinus Torvalds */ 17762ba5bbedSAl Viro ret = copy_page_to_iter(page, offset, nr, to); 17776e58e79dSAl Viro retval += ret; 17781da177e4SLinus Torvalds offset += ret; 177909cbfeafSKirill A. Shutemov index += offset >> PAGE_SHIFT; 178009cbfeafSKirill A. Shutemov offset &= ~PAGE_MASK; 17811da177e4SLinus Torvalds 178209cbfeafSKirill A. Shutemov put_page(page); 17832ba5bbedSAl Viro if (!iov_iter_count(to)) 17841da177e4SLinus Torvalds break; 17856e58e79dSAl Viro if (ret < nr) { 17866e58e79dSAl Viro error = -EFAULT; 17876e58e79dSAl Viro break; 17886e58e79dSAl Viro } 17891da177e4SLinus Torvalds cond_resched(); 17901da177e4SLinus Torvalds } 17911da177e4SLinus Torvalds 179209cbfeafSKirill A. Shutemov *ppos = ((loff_t) index << PAGE_SHIFT) + offset; 17936e58e79dSAl Viro file_accessed(file); 17946e58e79dSAl Viro return retval ? retval : error; 17951da177e4SLinus Torvalds } 17961da177e4SLinus Torvalds 1797708e3508SHugh Dickins static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, 1798708e3508SHugh Dickins struct pipe_inode_info *pipe, size_t len, 1799708e3508SHugh Dickins unsigned int flags) 1800708e3508SHugh Dickins { 1801708e3508SHugh Dickins struct address_space *mapping = in->f_mapping; 180271f0e07aSHugh Dickins struct inode *inode = mapping->host; 1803708e3508SHugh Dickins unsigned int loff, nr_pages, req_pages; 1804708e3508SHugh Dickins struct page *pages[PIPE_DEF_BUFFERS]; 1805708e3508SHugh Dickins struct partial_page partial[PIPE_DEF_BUFFERS]; 1806708e3508SHugh Dickins struct page *page; 1807708e3508SHugh Dickins pgoff_t index, end_index; 1808708e3508SHugh Dickins loff_t isize, left; 1809708e3508SHugh Dickins int error, page_nr; 1810708e3508SHugh Dickins struct splice_pipe_desc spd = { 1811708e3508SHugh Dickins .pages = pages, 1812708e3508SHugh Dickins .partial = partial, 1813047fe360SEric Dumazet .nr_pages_max = PIPE_DEF_BUFFERS, 1814708e3508SHugh Dickins .flags = flags, 1815708e3508SHugh Dickins .ops = &page_cache_pipe_buf_ops, 1816708e3508SHugh Dickins .spd_release = spd_release_page, 1817708e3508SHugh Dickins }; 1818708e3508SHugh Dickins 181971f0e07aSHugh Dickins isize = i_size_read(inode); 1820708e3508SHugh Dickins if (unlikely(*ppos >= isize)) 1821708e3508SHugh Dickins return 0; 1822708e3508SHugh Dickins 1823708e3508SHugh Dickins left = isize - *ppos; 1824708e3508SHugh Dickins if (unlikely(left < len)) 1825708e3508SHugh Dickins len = left; 1826708e3508SHugh Dickins 1827708e3508SHugh Dickins if (splice_grow_spd(pipe, &spd)) 1828708e3508SHugh Dickins return -ENOMEM; 1829708e3508SHugh Dickins 183009cbfeafSKirill A. Shutemov index = *ppos >> PAGE_SHIFT; 183109cbfeafSKirill A. Shutemov loff = *ppos & ~PAGE_MASK; 183209cbfeafSKirill A. Shutemov req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT; 1833a786c06dSAl Viro nr_pages = min(req_pages, spd.nr_pages_max); 1834708e3508SHugh Dickins 1835708e3508SHugh Dickins spd.nr_pages = find_get_pages_contig(mapping, index, 1836708e3508SHugh Dickins nr_pages, spd.pages); 1837708e3508SHugh Dickins index += spd.nr_pages; 1838708e3508SHugh Dickins error = 0; 183971f0e07aSHugh Dickins 1840708e3508SHugh Dickins while (spd.nr_pages < nr_pages) { 18419e18eb29SAndres Lagar-Cavilla error = shmem_getpage(inode, index, &page, SGP_CACHE); 184271f0e07aSHugh Dickins if (error) 1843708e3508SHugh Dickins break; 1844708e3508SHugh Dickins unlock_page(page); 1845708e3508SHugh Dickins spd.pages[spd.nr_pages++] = page; 1846708e3508SHugh Dickins index++; 1847708e3508SHugh Dickins } 1848708e3508SHugh Dickins 184909cbfeafSKirill A. Shutemov index = *ppos >> PAGE_SHIFT; 1850708e3508SHugh Dickins nr_pages = spd.nr_pages; 1851708e3508SHugh Dickins spd.nr_pages = 0; 185271f0e07aSHugh Dickins 1853708e3508SHugh Dickins for (page_nr = 0; page_nr < nr_pages; page_nr++) { 1854708e3508SHugh Dickins unsigned int this_len; 1855708e3508SHugh Dickins 1856708e3508SHugh Dickins if (!len) 1857708e3508SHugh Dickins break; 1858708e3508SHugh Dickins 185909cbfeafSKirill A. Shutemov this_len = min_t(unsigned long, len, PAGE_SIZE - loff); 1860708e3508SHugh Dickins page = spd.pages[page_nr]; 1861708e3508SHugh Dickins 186271f0e07aSHugh Dickins if (!PageUptodate(page) || page->mapping != mapping) { 18639e18eb29SAndres Lagar-Cavilla error = shmem_getpage(inode, index, &page, SGP_CACHE); 186471f0e07aSHugh Dickins if (error) 1865708e3508SHugh Dickins break; 186671f0e07aSHugh Dickins unlock_page(page); 186709cbfeafSKirill A. Shutemov put_page(spd.pages[page_nr]); 1868708e3508SHugh Dickins spd.pages[page_nr] = page; 1869708e3508SHugh Dickins } 1870708e3508SHugh Dickins 187171f0e07aSHugh Dickins isize = i_size_read(inode); 187209cbfeafSKirill A. Shutemov end_index = (isize - 1) >> PAGE_SHIFT; 1873708e3508SHugh Dickins if (unlikely(!isize || index > end_index)) 1874708e3508SHugh Dickins break; 1875708e3508SHugh Dickins 1876708e3508SHugh Dickins if (end_index == index) { 1877708e3508SHugh Dickins unsigned int plen; 1878708e3508SHugh Dickins 187909cbfeafSKirill A. Shutemov plen = ((isize - 1) & ~PAGE_MASK) + 1; 1880708e3508SHugh Dickins if (plen <= loff) 1881708e3508SHugh Dickins break; 1882708e3508SHugh Dickins 1883708e3508SHugh Dickins this_len = min(this_len, plen - loff); 1884708e3508SHugh Dickins len = this_len; 1885708e3508SHugh Dickins } 1886708e3508SHugh Dickins 1887708e3508SHugh Dickins spd.partial[page_nr].offset = loff; 1888708e3508SHugh Dickins spd.partial[page_nr].len = this_len; 1889708e3508SHugh Dickins len -= this_len; 1890708e3508SHugh Dickins loff = 0; 1891708e3508SHugh Dickins spd.nr_pages++; 1892708e3508SHugh Dickins index++; 1893708e3508SHugh Dickins } 1894708e3508SHugh Dickins 1895708e3508SHugh Dickins while (page_nr < nr_pages) 189609cbfeafSKirill A. Shutemov put_page(spd.pages[page_nr++]); 1897708e3508SHugh Dickins 1898708e3508SHugh Dickins if (spd.nr_pages) 1899708e3508SHugh Dickins error = splice_to_pipe(pipe, &spd); 1900708e3508SHugh Dickins 1901047fe360SEric Dumazet splice_shrink_spd(&spd); 1902708e3508SHugh Dickins 1903708e3508SHugh Dickins if (error > 0) { 1904708e3508SHugh Dickins *ppos += error; 1905708e3508SHugh Dickins file_accessed(in); 1906708e3508SHugh Dickins } 1907708e3508SHugh Dickins return error; 1908708e3508SHugh Dickins } 1909708e3508SHugh Dickins 1910220f2ac9SHugh Dickins /* 1911220f2ac9SHugh Dickins * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. 1912220f2ac9SHugh Dickins */ 1913220f2ac9SHugh Dickins static pgoff_t shmem_seek_hole_data(struct address_space *mapping, 1914965c8e59SAndrew Morton pgoff_t index, pgoff_t end, int whence) 1915220f2ac9SHugh Dickins { 1916220f2ac9SHugh Dickins struct page *page; 1917220f2ac9SHugh Dickins struct pagevec pvec; 1918220f2ac9SHugh Dickins pgoff_t indices[PAGEVEC_SIZE]; 1919220f2ac9SHugh Dickins bool done = false; 1920220f2ac9SHugh Dickins int i; 1921220f2ac9SHugh Dickins 1922220f2ac9SHugh Dickins pagevec_init(&pvec, 0); 1923220f2ac9SHugh Dickins pvec.nr = 1; /* start small: we may be there already */ 1924220f2ac9SHugh Dickins while (!done) { 19250cd6144aSJohannes Weiner pvec.nr = find_get_entries(mapping, index, 1926220f2ac9SHugh Dickins pvec.nr, pvec.pages, indices); 1927220f2ac9SHugh Dickins if (!pvec.nr) { 1928965c8e59SAndrew Morton if (whence == SEEK_DATA) 1929220f2ac9SHugh Dickins index = end; 1930220f2ac9SHugh Dickins break; 1931220f2ac9SHugh Dickins } 1932220f2ac9SHugh Dickins for (i = 0; i < pvec.nr; i++, index++) { 1933220f2ac9SHugh Dickins if (index < indices[i]) { 1934965c8e59SAndrew Morton if (whence == SEEK_HOLE) { 1935220f2ac9SHugh Dickins done = true; 1936220f2ac9SHugh Dickins break; 1937220f2ac9SHugh Dickins } 1938220f2ac9SHugh Dickins index = indices[i]; 1939220f2ac9SHugh Dickins } 1940220f2ac9SHugh Dickins page = pvec.pages[i]; 1941220f2ac9SHugh Dickins if (page && !radix_tree_exceptional_entry(page)) { 1942220f2ac9SHugh Dickins if (!PageUptodate(page)) 1943220f2ac9SHugh Dickins page = NULL; 1944220f2ac9SHugh Dickins } 1945220f2ac9SHugh Dickins if (index >= end || 1946965c8e59SAndrew Morton (page && whence == SEEK_DATA) || 1947965c8e59SAndrew Morton (!page && whence == SEEK_HOLE)) { 1948220f2ac9SHugh Dickins done = true; 1949220f2ac9SHugh Dickins break; 1950220f2ac9SHugh Dickins } 1951220f2ac9SHugh Dickins } 19520cd6144aSJohannes Weiner pagevec_remove_exceptionals(&pvec); 1953220f2ac9SHugh Dickins pagevec_release(&pvec); 1954220f2ac9SHugh Dickins pvec.nr = PAGEVEC_SIZE; 1955220f2ac9SHugh Dickins cond_resched(); 1956220f2ac9SHugh Dickins } 1957220f2ac9SHugh Dickins return index; 1958220f2ac9SHugh Dickins } 1959220f2ac9SHugh Dickins 1960965c8e59SAndrew Morton static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) 1961220f2ac9SHugh Dickins { 1962220f2ac9SHugh Dickins struct address_space *mapping = file->f_mapping; 1963220f2ac9SHugh Dickins struct inode *inode = mapping->host; 1964220f2ac9SHugh Dickins pgoff_t start, end; 1965220f2ac9SHugh Dickins loff_t new_offset; 1966220f2ac9SHugh Dickins 1967965c8e59SAndrew Morton if (whence != SEEK_DATA && whence != SEEK_HOLE) 1968965c8e59SAndrew Morton return generic_file_llseek_size(file, offset, whence, 1969220f2ac9SHugh Dickins MAX_LFS_FILESIZE, i_size_read(inode)); 19705955102cSAl Viro inode_lock(inode); 1971220f2ac9SHugh Dickins /* We're holding i_mutex so we can access i_size directly */ 1972220f2ac9SHugh Dickins 1973220f2ac9SHugh Dickins if (offset < 0) 1974220f2ac9SHugh Dickins offset = -EINVAL; 1975220f2ac9SHugh Dickins else if (offset >= inode->i_size) 1976220f2ac9SHugh Dickins offset = -ENXIO; 1977220f2ac9SHugh Dickins else { 197809cbfeafSKirill A. Shutemov start = offset >> PAGE_SHIFT; 197909cbfeafSKirill A. Shutemov end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1980965c8e59SAndrew Morton new_offset = shmem_seek_hole_data(mapping, start, end, whence); 198109cbfeafSKirill A. Shutemov new_offset <<= PAGE_SHIFT; 1982220f2ac9SHugh Dickins if (new_offset > offset) { 1983220f2ac9SHugh Dickins if (new_offset < inode->i_size) 1984220f2ac9SHugh Dickins offset = new_offset; 1985965c8e59SAndrew Morton else if (whence == SEEK_DATA) 1986220f2ac9SHugh Dickins offset = -ENXIO; 1987220f2ac9SHugh Dickins else 1988220f2ac9SHugh Dickins offset = inode->i_size; 1989220f2ac9SHugh Dickins } 1990220f2ac9SHugh Dickins } 1991220f2ac9SHugh Dickins 1992387aae6fSHugh Dickins if (offset >= 0) 199346a1c2c7SJie Liu offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE); 19945955102cSAl Viro inode_unlock(inode); 1995220f2ac9SHugh Dickins return offset; 1996220f2ac9SHugh Dickins } 1997220f2ac9SHugh Dickins 199805f65b5cSDavid Herrmann /* 199905f65b5cSDavid Herrmann * We need a tag: a new tag would expand every radix_tree_node by 8 bytes, 200005f65b5cSDavid Herrmann * so reuse a tag which we firmly believe is never set or cleared on shmem. 200105f65b5cSDavid Herrmann */ 200205f65b5cSDavid Herrmann #define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE 200305f65b5cSDavid Herrmann #define LAST_SCAN 4 /* about 150ms max */ 200405f65b5cSDavid Herrmann 200505f65b5cSDavid Herrmann static void shmem_tag_pins(struct address_space *mapping) 200605f65b5cSDavid Herrmann { 200705f65b5cSDavid Herrmann struct radix_tree_iter iter; 200805f65b5cSDavid Herrmann void **slot; 200905f65b5cSDavid Herrmann pgoff_t start; 201005f65b5cSDavid Herrmann struct page *page; 201105f65b5cSDavid Herrmann 201205f65b5cSDavid Herrmann lru_add_drain(); 201305f65b5cSDavid Herrmann start = 0; 201405f65b5cSDavid Herrmann rcu_read_lock(); 201505f65b5cSDavid Herrmann 201605f65b5cSDavid Herrmann radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { 201705f65b5cSDavid Herrmann page = radix_tree_deref_slot(slot); 201805f65b5cSDavid Herrmann if (!page || radix_tree_exception(page)) { 20192cf938aaSMatthew Wilcox if (radix_tree_deref_retry(page)) { 20202cf938aaSMatthew Wilcox slot = radix_tree_iter_retry(&iter); 20212cf938aaSMatthew Wilcox continue; 20222cf938aaSMatthew Wilcox } 202305f65b5cSDavid Herrmann } else if (page_count(page) - page_mapcount(page) > 1) { 202405f65b5cSDavid Herrmann spin_lock_irq(&mapping->tree_lock); 202505f65b5cSDavid Herrmann radix_tree_tag_set(&mapping->page_tree, iter.index, 202605f65b5cSDavid Herrmann SHMEM_TAG_PINNED); 202705f65b5cSDavid Herrmann spin_unlock_irq(&mapping->tree_lock); 202805f65b5cSDavid Herrmann } 202905f65b5cSDavid Herrmann 203005f65b5cSDavid Herrmann if (need_resched()) { 203105f65b5cSDavid Herrmann cond_resched_rcu(); 20327165092fSMatthew Wilcox slot = radix_tree_iter_next(&iter); 203305f65b5cSDavid Herrmann } 203405f65b5cSDavid Herrmann } 203505f65b5cSDavid Herrmann rcu_read_unlock(); 203605f65b5cSDavid Herrmann } 203705f65b5cSDavid Herrmann 203805f65b5cSDavid Herrmann /* 203905f65b5cSDavid Herrmann * Setting SEAL_WRITE requires us to verify there's no pending writer. However, 204005f65b5cSDavid Herrmann * via get_user_pages(), drivers might have some pending I/O without any active 204105f65b5cSDavid Herrmann * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages 204205f65b5cSDavid Herrmann * and see whether it has an elevated ref-count. If so, we tag them and wait for 204305f65b5cSDavid Herrmann * them to be dropped. 204405f65b5cSDavid Herrmann * The caller must guarantee that no new user will acquire writable references 204505f65b5cSDavid Herrmann * to those pages to avoid races. 204605f65b5cSDavid Herrmann */ 204740e041a2SDavid Herrmann static int shmem_wait_for_pins(struct address_space *mapping) 204840e041a2SDavid Herrmann { 204905f65b5cSDavid Herrmann struct radix_tree_iter iter; 205005f65b5cSDavid Herrmann void **slot; 205105f65b5cSDavid Herrmann pgoff_t start; 205205f65b5cSDavid Herrmann struct page *page; 205305f65b5cSDavid Herrmann int error, scan; 205405f65b5cSDavid Herrmann 205505f65b5cSDavid Herrmann shmem_tag_pins(mapping); 205605f65b5cSDavid Herrmann 205705f65b5cSDavid Herrmann error = 0; 205805f65b5cSDavid Herrmann for (scan = 0; scan <= LAST_SCAN; scan++) { 205905f65b5cSDavid Herrmann if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED)) 206005f65b5cSDavid Herrmann break; 206105f65b5cSDavid Herrmann 206205f65b5cSDavid Herrmann if (!scan) 206305f65b5cSDavid Herrmann lru_add_drain_all(); 206405f65b5cSDavid Herrmann else if (schedule_timeout_killable((HZ << scan) / 200)) 206505f65b5cSDavid Herrmann scan = LAST_SCAN; 206605f65b5cSDavid Herrmann 206705f65b5cSDavid Herrmann start = 0; 206805f65b5cSDavid Herrmann rcu_read_lock(); 206905f65b5cSDavid Herrmann radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 207005f65b5cSDavid Herrmann start, SHMEM_TAG_PINNED) { 207105f65b5cSDavid Herrmann 207205f65b5cSDavid Herrmann page = radix_tree_deref_slot(slot); 207305f65b5cSDavid Herrmann if (radix_tree_exception(page)) { 20742cf938aaSMatthew Wilcox if (radix_tree_deref_retry(page)) { 20752cf938aaSMatthew Wilcox slot = radix_tree_iter_retry(&iter); 20762cf938aaSMatthew Wilcox continue; 20772cf938aaSMatthew Wilcox } 207805f65b5cSDavid Herrmann 207905f65b5cSDavid Herrmann page = NULL; 208005f65b5cSDavid Herrmann } 208105f65b5cSDavid Herrmann 208205f65b5cSDavid Herrmann if (page && 208305f65b5cSDavid Herrmann page_count(page) - page_mapcount(page) != 1) { 208405f65b5cSDavid Herrmann if (scan < LAST_SCAN) 208505f65b5cSDavid Herrmann goto continue_resched; 208605f65b5cSDavid Herrmann 208705f65b5cSDavid Herrmann /* 208805f65b5cSDavid Herrmann * On the last scan, we clean up all those tags 208905f65b5cSDavid Herrmann * we inserted; but make a note that we still 209005f65b5cSDavid Herrmann * found pages pinned. 209105f65b5cSDavid Herrmann */ 209205f65b5cSDavid Herrmann error = -EBUSY; 209305f65b5cSDavid Herrmann } 209405f65b5cSDavid Herrmann 209505f65b5cSDavid Herrmann spin_lock_irq(&mapping->tree_lock); 209605f65b5cSDavid Herrmann radix_tree_tag_clear(&mapping->page_tree, 209705f65b5cSDavid Herrmann iter.index, SHMEM_TAG_PINNED); 209805f65b5cSDavid Herrmann spin_unlock_irq(&mapping->tree_lock); 209905f65b5cSDavid Herrmann continue_resched: 210005f65b5cSDavid Herrmann if (need_resched()) { 210105f65b5cSDavid Herrmann cond_resched_rcu(); 21027165092fSMatthew Wilcox slot = radix_tree_iter_next(&iter); 210305f65b5cSDavid Herrmann } 210405f65b5cSDavid Herrmann } 210505f65b5cSDavid Herrmann rcu_read_unlock(); 210605f65b5cSDavid Herrmann } 210705f65b5cSDavid Herrmann 210805f65b5cSDavid Herrmann return error; 210940e041a2SDavid Herrmann } 211040e041a2SDavid Herrmann 211140e041a2SDavid Herrmann #define F_ALL_SEALS (F_SEAL_SEAL | \ 211240e041a2SDavid Herrmann F_SEAL_SHRINK | \ 211340e041a2SDavid Herrmann F_SEAL_GROW | \ 211440e041a2SDavid Herrmann F_SEAL_WRITE) 211540e041a2SDavid Herrmann 211640e041a2SDavid Herrmann int shmem_add_seals(struct file *file, unsigned int seals) 211740e041a2SDavid Herrmann { 211840e041a2SDavid Herrmann struct inode *inode = file_inode(file); 211940e041a2SDavid Herrmann struct shmem_inode_info *info = SHMEM_I(inode); 212040e041a2SDavid Herrmann int error; 212140e041a2SDavid Herrmann 212240e041a2SDavid Herrmann /* 212340e041a2SDavid Herrmann * SEALING 212440e041a2SDavid Herrmann * Sealing allows multiple parties to share a shmem-file but restrict 212540e041a2SDavid Herrmann * access to a specific subset of file operations. Seals can only be 212640e041a2SDavid Herrmann * added, but never removed. This way, mutually untrusted parties can 212740e041a2SDavid Herrmann * share common memory regions with a well-defined policy. A malicious 212840e041a2SDavid Herrmann * peer can thus never perform unwanted operations on a shared object. 212940e041a2SDavid Herrmann * 213040e041a2SDavid Herrmann * Seals are only supported on special shmem-files and always affect 213140e041a2SDavid Herrmann * the whole underlying inode. Once a seal is set, it may prevent some 213240e041a2SDavid Herrmann * kinds of access to the file. Currently, the following seals are 213340e041a2SDavid Herrmann * defined: 213440e041a2SDavid Herrmann * SEAL_SEAL: Prevent further seals from being set on this file 213540e041a2SDavid Herrmann * SEAL_SHRINK: Prevent the file from shrinking 213640e041a2SDavid Herrmann * SEAL_GROW: Prevent the file from growing 213740e041a2SDavid Herrmann * SEAL_WRITE: Prevent write access to the file 213840e041a2SDavid Herrmann * 213940e041a2SDavid Herrmann * As we don't require any trust relationship between two parties, we 214040e041a2SDavid Herrmann * must prevent seals from being removed. Therefore, sealing a file 214140e041a2SDavid Herrmann * only adds a given set of seals to the file, it never touches 214240e041a2SDavid Herrmann * existing seals. Furthermore, the "setting seals"-operation can be 214340e041a2SDavid Herrmann * sealed itself, which basically prevents any further seal from being 214440e041a2SDavid Herrmann * added. 214540e041a2SDavid Herrmann * 214640e041a2SDavid Herrmann * Semantics of sealing are only defined on volatile files. Only 214740e041a2SDavid Herrmann * anonymous shmem files support sealing. More importantly, seals are 214840e041a2SDavid Herrmann * never written to disk. Therefore, there's no plan to support it on 214940e041a2SDavid Herrmann * other file types. 215040e041a2SDavid Herrmann */ 215140e041a2SDavid Herrmann 215240e041a2SDavid Herrmann if (file->f_op != &shmem_file_operations) 215340e041a2SDavid Herrmann return -EINVAL; 215440e041a2SDavid Herrmann if (!(file->f_mode & FMODE_WRITE)) 215540e041a2SDavid Herrmann return -EPERM; 215640e041a2SDavid Herrmann if (seals & ~(unsigned int)F_ALL_SEALS) 215740e041a2SDavid Herrmann return -EINVAL; 215840e041a2SDavid Herrmann 21595955102cSAl Viro inode_lock(inode); 216040e041a2SDavid Herrmann 216140e041a2SDavid Herrmann if (info->seals & F_SEAL_SEAL) { 216240e041a2SDavid Herrmann error = -EPERM; 216340e041a2SDavid Herrmann goto unlock; 216440e041a2SDavid Herrmann } 216540e041a2SDavid Herrmann 216640e041a2SDavid Herrmann if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) { 216740e041a2SDavid Herrmann error = mapping_deny_writable(file->f_mapping); 216840e041a2SDavid Herrmann if (error) 216940e041a2SDavid Herrmann goto unlock; 217040e041a2SDavid Herrmann 217140e041a2SDavid Herrmann error = shmem_wait_for_pins(file->f_mapping); 217240e041a2SDavid Herrmann if (error) { 217340e041a2SDavid Herrmann mapping_allow_writable(file->f_mapping); 217440e041a2SDavid Herrmann goto unlock; 217540e041a2SDavid Herrmann } 217640e041a2SDavid Herrmann } 217740e041a2SDavid Herrmann 217840e041a2SDavid Herrmann info->seals |= seals; 217940e041a2SDavid Herrmann error = 0; 218040e041a2SDavid Herrmann 218140e041a2SDavid Herrmann unlock: 21825955102cSAl Viro inode_unlock(inode); 218340e041a2SDavid Herrmann return error; 218440e041a2SDavid Herrmann } 218540e041a2SDavid Herrmann EXPORT_SYMBOL_GPL(shmem_add_seals); 218640e041a2SDavid Herrmann 218740e041a2SDavid Herrmann int shmem_get_seals(struct file *file) 218840e041a2SDavid Herrmann { 218940e041a2SDavid Herrmann if (file->f_op != &shmem_file_operations) 219040e041a2SDavid Herrmann return -EINVAL; 219140e041a2SDavid Herrmann 219240e041a2SDavid Herrmann return SHMEM_I(file_inode(file))->seals; 219340e041a2SDavid Herrmann } 219440e041a2SDavid Herrmann EXPORT_SYMBOL_GPL(shmem_get_seals); 219540e041a2SDavid Herrmann 219640e041a2SDavid Herrmann long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 219740e041a2SDavid Herrmann { 219840e041a2SDavid Herrmann long error; 219940e041a2SDavid Herrmann 220040e041a2SDavid Herrmann switch (cmd) { 220140e041a2SDavid Herrmann case F_ADD_SEALS: 220240e041a2SDavid Herrmann /* disallow upper 32bit */ 220340e041a2SDavid Herrmann if (arg > UINT_MAX) 220440e041a2SDavid Herrmann return -EINVAL; 220540e041a2SDavid Herrmann 220640e041a2SDavid Herrmann error = shmem_add_seals(file, arg); 220740e041a2SDavid Herrmann break; 220840e041a2SDavid Herrmann case F_GET_SEALS: 220940e041a2SDavid Herrmann error = shmem_get_seals(file); 221040e041a2SDavid Herrmann break; 221140e041a2SDavid Herrmann default: 221240e041a2SDavid Herrmann error = -EINVAL; 221340e041a2SDavid Herrmann break; 221440e041a2SDavid Herrmann } 221540e041a2SDavid Herrmann 221640e041a2SDavid Herrmann return error; 221740e041a2SDavid Herrmann } 221840e041a2SDavid Herrmann 221983e4fa9cSHugh Dickins static long shmem_fallocate(struct file *file, int mode, loff_t offset, 222083e4fa9cSHugh Dickins loff_t len) 222183e4fa9cSHugh Dickins { 2222496ad9aaSAl Viro struct inode *inode = file_inode(file); 2223e2d12e22SHugh Dickins struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 222440e041a2SDavid Herrmann struct shmem_inode_info *info = SHMEM_I(inode); 22251aac1400SHugh Dickins struct shmem_falloc shmem_falloc; 2226e2d12e22SHugh Dickins pgoff_t start, index, end; 2227e2d12e22SHugh Dickins int error; 222883e4fa9cSHugh Dickins 222913ace4d0SHugh Dickins if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 223013ace4d0SHugh Dickins return -EOPNOTSUPP; 223113ace4d0SHugh Dickins 22325955102cSAl Viro inode_lock(inode); 223383e4fa9cSHugh Dickins 223483e4fa9cSHugh Dickins if (mode & FALLOC_FL_PUNCH_HOLE) { 223583e4fa9cSHugh Dickins struct address_space *mapping = file->f_mapping; 223683e4fa9cSHugh Dickins loff_t unmap_start = round_up(offset, PAGE_SIZE); 223783e4fa9cSHugh Dickins loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; 22388e205f77SHugh Dickins DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); 223983e4fa9cSHugh Dickins 224040e041a2SDavid Herrmann /* protected by i_mutex */ 224140e041a2SDavid Herrmann if (info->seals & F_SEAL_WRITE) { 224240e041a2SDavid Herrmann error = -EPERM; 224340e041a2SDavid Herrmann goto out; 224440e041a2SDavid Herrmann } 224540e041a2SDavid Herrmann 22468e205f77SHugh Dickins shmem_falloc.waitq = &shmem_falloc_waitq; 2247f00cdc6dSHugh Dickins shmem_falloc.start = unmap_start >> PAGE_SHIFT; 2248f00cdc6dSHugh Dickins shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; 2249f00cdc6dSHugh Dickins spin_lock(&inode->i_lock); 2250f00cdc6dSHugh Dickins inode->i_private = &shmem_falloc; 2251f00cdc6dSHugh Dickins spin_unlock(&inode->i_lock); 2252f00cdc6dSHugh Dickins 225383e4fa9cSHugh Dickins if ((u64)unmap_end > (u64)unmap_start) 225483e4fa9cSHugh Dickins unmap_mapping_range(mapping, unmap_start, 225583e4fa9cSHugh Dickins 1 + unmap_end - unmap_start, 0); 225683e4fa9cSHugh Dickins shmem_truncate_range(inode, offset, offset + len - 1); 225783e4fa9cSHugh Dickins /* No need to unmap again: hole-punching leaves COWed pages */ 22588e205f77SHugh Dickins 22598e205f77SHugh Dickins spin_lock(&inode->i_lock); 22608e205f77SHugh Dickins inode->i_private = NULL; 22618e205f77SHugh Dickins wake_up_all(&shmem_falloc_waitq); 22628e205f77SHugh Dickins spin_unlock(&inode->i_lock); 226383e4fa9cSHugh Dickins error = 0; 22648e205f77SHugh Dickins goto out; 226583e4fa9cSHugh Dickins } 226683e4fa9cSHugh Dickins 2267e2d12e22SHugh Dickins /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ 2268e2d12e22SHugh Dickins error = inode_newsize_ok(inode, offset + len); 2269e2d12e22SHugh Dickins if (error) 2270e2d12e22SHugh Dickins goto out; 2271e2d12e22SHugh Dickins 227240e041a2SDavid Herrmann if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { 227340e041a2SDavid Herrmann error = -EPERM; 227440e041a2SDavid Herrmann goto out; 227540e041a2SDavid Herrmann } 227640e041a2SDavid Herrmann 227709cbfeafSKirill A. Shutemov start = offset >> PAGE_SHIFT; 227809cbfeafSKirill A. Shutemov end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 2279e2d12e22SHugh Dickins /* Try to avoid a swapstorm if len is impossible to satisfy */ 2280e2d12e22SHugh Dickins if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) { 2281e2d12e22SHugh Dickins error = -ENOSPC; 2282e2d12e22SHugh Dickins goto out; 2283e2d12e22SHugh Dickins } 2284e2d12e22SHugh Dickins 22858e205f77SHugh Dickins shmem_falloc.waitq = NULL; 22861aac1400SHugh Dickins shmem_falloc.start = start; 22871aac1400SHugh Dickins shmem_falloc.next = start; 22881aac1400SHugh Dickins shmem_falloc.nr_falloced = 0; 22891aac1400SHugh Dickins shmem_falloc.nr_unswapped = 0; 22901aac1400SHugh Dickins spin_lock(&inode->i_lock); 22911aac1400SHugh Dickins inode->i_private = &shmem_falloc; 22921aac1400SHugh Dickins spin_unlock(&inode->i_lock); 22931aac1400SHugh Dickins 2294e2d12e22SHugh Dickins for (index = start; index < end; index++) { 2295e2d12e22SHugh Dickins struct page *page; 2296e2d12e22SHugh Dickins 2297e2d12e22SHugh Dickins /* 2298e2d12e22SHugh Dickins * Good, the fallocate(2) manpage permits EINTR: we may have 2299e2d12e22SHugh Dickins * been interrupted because we are using up too much memory. 2300e2d12e22SHugh Dickins */ 2301e2d12e22SHugh Dickins if (signal_pending(current)) 2302e2d12e22SHugh Dickins error = -EINTR; 23031aac1400SHugh Dickins else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) 23041aac1400SHugh Dickins error = -ENOMEM; 2305e2d12e22SHugh Dickins else 23069e18eb29SAndres Lagar-Cavilla error = shmem_getpage(inode, index, &page, SGP_FALLOC); 2307e2d12e22SHugh Dickins if (error) { 23081635f6a7SHugh Dickins /* Remove the !PageUptodate pages we added */ 23097f556567SHugh Dickins if (index > start) { 23101635f6a7SHugh Dickins shmem_undo_range(inode, 231109cbfeafSKirill A. Shutemov (loff_t)start << PAGE_SHIFT, 2312b9b4bb26SAnthony Romano ((loff_t)index << PAGE_SHIFT) - 1, true); 23137f556567SHugh Dickins } 23141aac1400SHugh Dickins goto undone; 2315e2d12e22SHugh Dickins } 2316e2d12e22SHugh Dickins 2317e2d12e22SHugh Dickins /* 23181aac1400SHugh Dickins * Inform shmem_writepage() how far we have reached. 23191aac1400SHugh Dickins * No need for lock or barrier: we have the page lock. 23201aac1400SHugh Dickins */ 23211aac1400SHugh Dickins shmem_falloc.next++; 23221aac1400SHugh Dickins if (!PageUptodate(page)) 23231aac1400SHugh Dickins shmem_falloc.nr_falloced++; 23241aac1400SHugh Dickins 23251aac1400SHugh Dickins /* 23261635f6a7SHugh Dickins * If !PageUptodate, leave it that way so that freeable pages 23271635f6a7SHugh Dickins * can be recognized if we need to rollback on error later. 23281635f6a7SHugh Dickins * But set_page_dirty so that memory pressure will swap rather 2329e2d12e22SHugh Dickins * than free the pages we are allocating (and SGP_CACHE pages 2330e2d12e22SHugh Dickins * might still be clean: we now need to mark those dirty too). 2331e2d12e22SHugh Dickins */ 2332e2d12e22SHugh Dickins set_page_dirty(page); 2333e2d12e22SHugh Dickins unlock_page(page); 233409cbfeafSKirill A. Shutemov put_page(page); 2335e2d12e22SHugh Dickins cond_resched(); 2336e2d12e22SHugh Dickins } 2337e2d12e22SHugh Dickins 2338e2d12e22SHugh Dickins if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) 2339e2d12e22SHugh Dickins i_size_write(inode, offset + len); 2340e2d12e22SHugh Dickins inode->i_ctime = CURRENT_TIME; 23411aac1400SHugh Dickins undone: 23421aac1400SHugh Dickins spin_lock(&inode->i_lock); 23431aac1400SHugh Dickins inode->i_private = NULL; 23441aac1400SHugh Dickins spin_unlock(&inode->i_lock); 2345e2d12e22SHugh Dickins out: 23465955102cSAl Viro inode_unlock(inode); 234783e4fa9cSHugh Dickins return error; 234883e4fa9cSHugh Dickins } 234983e4fa9cSHugh Dickins 2350726c3342SDavid Howells static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 23511da177e4SLinus Torvalds { 2352726c3342SDavid Howells struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 23531da177e4SLinus Torvalds 23541da177e4SLinus Torvalds buf->f_type = TMPFS_MAGIC; 235509cbfeafSKirill A. Shutemov buf->f_bsize = PAGE_SIZE; 23561da177e4SLinus Torvalds buf->f_namelen = NAME_MAX; 23570edd73b3SHugh Dickins if (sbinfo->max_blocks) { 23581da177e4SLinus Torvalds buf->f_blocks = sbinfo->max_blocks; 235941ffe5d5SHugh Dickins buf->f_bavail = 236041ffe5d5SHugh Dickins buf->f_bfree = sbinfo->max_blocks - 236141ffe5d5SHugh Dickins percpu_counter_sum(&sbinfo->used_blocks); 23620edd73b3SHugh Dickins } 23630edd73b3SHugh Dickins if (sbinfo->max_inodes) { 23641da177e4SLinus Torvalds buf->f_files = sbinfo->max_inodes; 23651da177e4SLinus Torvalds buf->f_ffree = sbinfo->free_inodes; 23661da177e4SLinus Torvalds } 23671da177e4SLinus Torvalds /* else leave those fields 0 like simple_statfs */ 23681da177e4SLinus Torvalds return 0; 23691da177e4SLinus Torvalds } 23701da177e4SLinus Torvalds 23711da177e4SLinus Torvalds /* 23721da177e4SLinus Torvalds * File creation. Allocate an inode, and we're done.. 23731da177e4SLinus Torvalds */ 23741da177e4SLinus Torvalds static int 23751a67aafbSAl Viro shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) 23761da177e4SLinus Torvalds { 23770b0a0806SHugh Dickins struct inode *inode; 23781da177e4SLinus Torvalds int error = -ENOSPC; 23791da177e4SLinus Torvalds 2380454abafeSDmitry Monakhov inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE); 23811da177e4SLinus Torvalds if (inode) { 2382feda821eSChristoph Hellwig error = simple_acl_create(dir, inode); 2383feda821eSChristoph Hellwig if (error) 2384feda821eSChristoph Hellwig goto out_iput; 23852a7dba39SEric Paris error = security_inode_init_security(inode, dir, 23869d8f13baSMimi Zohar &dentry->d_name, 23876d9d88d0SJarkko Sakkinen shmem_initxattrs, NULL); 2388feda821eSChristoph Hellwig if (error && error != -EOPNOTSUPP) 2389feda821eSChristoph Hellwig goto out_iput; 239037ec43cdSMimi Zohar 2391718deb6bSAl Viro error = 0; 23921da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 23931da177e4SLinus Torvalds dir->i_ctime = dir->i_mtime = CURRENT_TIME; 23941da177e4SLinus Torvalds d_instantiate(dentry, inode); 23951da177e4SLinus Torvalds dget(dentry); /* Extra count - pin the dentry in core */ 23961da177e4SLinus Torvalds } 23971da177e4SLinus Torvalds return error; 2398feda821eSChristoph Hellwig out_iput: 2399feda821eSChristoph Hellwig iput(inode); 2400feda821eSChristoph Hellwig return error; 24011da177e4SLinus Torvalds } 24021da177e4SLinus Torvalds 240360545d0dSAl Viro static int 240460545d0dSAl Viro shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 240560545d0dSAl Viro { 240660545d0dSAl Viro struct inode *inode; 240760545d0dSAl Viro int error = -ENOSPC; 240860545d0dSAl Viro 240960545d0dSAl Viro inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE); 241060545d0dSAl Viro if (inode) { 241160545d0dSAl Viro error = security_inode_init_security(inode, dir, 241260545d0dSAl Viro NULL, 241360545d0dSAl Viro shmem_initxattrs, NULL); 2414feda821eSChristoph Hellwig if (error && error != -EOPNOTSUPP) 2415feda821eSChristoph Hellwig goto out_iput; 2416feda821eSChristoph Hellwig error = simple_acl_create(dir, inode); 2417feda821eSChristoph Hellwig if (error) 2418feda821eSChristoph Hellwig goto out_iput; 241960545d0dSAl Viro d_tmpfile(dentry, inode); 242060545d0dSAl Viro } 242160545d0dSAl Viro return error; 2422feda821eSChristoph Hellwig out_iput: 2423feda821eSChristoph Hellwig iput(inode); 2424feda821eSChristoph Hellwig return error; 242560545d0dSAl Viro } 242660545d0dSAl Viro 242718bb1db3SAl Viro static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 24281da177e4SLinus Torvalds { 24291da177e4SLinus Torvalds int error; 24301da177e4SLinus Torvalds 24311da177e4SLinus Torvalds if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) 24321da177e4SLinus Torvalds return error; 2433d8c76e6fSDave Hansen inc_nlink(dir); 24341da177e4SLinus Torvalds return 0; 24351da177e4SLinus Torvalds } 24361da177e4SLinus Torvalds 24374acdaf27SAl Viro static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode, 2438ebfc3b49SAl Viro bool excl) 24391da177e4SLinus Torvalds { 24401da177e4SLinus Torvalds return shmem_mknod(dir, dentry, mode | S_IFREG, 0); 24411da177e4SLinus Torvalds } 24421da177e4SLinus Torvalds 24431da177e4SLinus Torvalds /* 24441da177e4SLinus Torvalds * Link a file.. 24451da177e4SLinus Torvalds */ 24461da177e4SLinus Torvalds static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 24471da177e4SLinus Torvalds { 244875c3cfa8SDavid Howells struct inode *inode = d_inode(old_dentry); 24495b04c689SPavel Emelyanov int ret; 24501da177e4SLinus Torvalds 24511da177e4SLinus Torvalds /* 24521da177e4SLinus Torvalds * No ordinary (disk based) filesystem counts links as inodes; 24531da177e4SLinus Torvalds * but each new link needs a new dentry, pinning lowmem, and 24541da177e4SLinus Torvalds * tmpfs dentries cannot be pruned until they are unlinked. 24551da177e4SLinus Torvalds */ 24565b04c689SPavel Emelyanov ret = shmem_reserve_inode(inode->i_sb); 24575b04c689SPavel Emelyanov if (ret) 24585b04c689SPavel Emelyanov goto out; 24591da177e4SLinus Torvalds 24601da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 24611da177e4SLinus Torvalds inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 2462d8c76e6fSDave Hansen inc_nlink(inode); 24637de9c6eeSAl Viro ihold(inode); /* New dentry reference */ 24641da177e4SLinus Torvalds dget(dentry); /* Extra pinning count for the created dentry */ 24651da177e4SLinus Torvalds d_instantiate(dentry, inode); 24665b04c689SPavel Emelyanov out: 24675b04c689SPavel Emelyanov return ret; 24681da177e4SLinus Torvalds } 24691da177e4SLinus Torvalds 24701da177e4SLinus Torvalds static int shmem_unlink(struct inode *dir, struct dentry *dentry) 24711da177e4SLinus Torvalds { 247275c3cfa8SDavid Howells struct inode *inode = d_inode(dentry); 24731da177e4SLinus Torvalds 24745b04c689SPavel Emelyanov if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) 24755b04c689SPavel Emelyanov shmem_free_inode(inode->i_sb); 24761da177e4SLinus Torvalds 24771da177e4SLinus Torvalds dir->i_size -= BOGO_DIRENT_SIZE; 24781da177e4SLinus Torvalds inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 24799a53c3a7SDave Hansen drop_nlink(inode); 24801da177e4SLinus Torvalds dput(dentry); /* Undo the count from "create" - this does all the work */ 24811da177e4SLinus Torvalds return 0; 24821da177e4SLinus Torvalds } 24831da177e4SLinus Torvalds 24841da177e4SLinus Torvalds static int shmem_rmdir(struct inode *dir, struct dentry *dentry) 24851da177e4SLinus Torvalds { 24861da177e4SLinus Torvalds if (!simple_empty(dentry)) 24871da177e4SLinus Torvalds return -ENOTEMPTY; 24881da177e4SLinus Torvalds 248975c3cfa8SDavid Howells drop_nlink(d_inode(dentry)); 24909a53c3a7SDave Hansen drop_nlink(dir); 24911da177e4SLinus Torvalds return shmem_unlink(dir, dentry); 24921da177e4SLinus Torvalds } 24931da177e4SLinus Torvalds 249437456771SMiklos Szeredi static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) 249537456771SMiklos Szeredi { 2496e36cb0b8SDavid Howells bool old_is_dir = d_is_dir(old_dentry); 2497e36cb0b8SDavid Howells bool new_is_dir = d_is_dir(new_dentry); 249837456771SMiklos Szeredi 249937456771SMiklos Szeredi if (old_dir != new_dir && old_is_dir != new_is_dir) { 250037456771SMiklos Szeredi if (old_is_dir) { 250137456771SMiklos Szeredi drop_nlink(old_dir); 250237456771SMiklos Szeredi inc_nlink(new_dir); 250337456771SMiklos Szeredi } else { 250437456771SMiklos Szeredi drop_nlink(new_dir); 250537456771SMiklos Szeredi inc_nlink(old_dir); 250637456771SMiklos Szeredi } 250737456771SMiklos Szeredi } 250837456771SMiklos Szeredi old_dir->i_ctime = old_dir->i_mtime = 250937456771SMiklos Szeredi new_dir->i_ctime = new_dir->i_mtime = 251075c3cfa8SDavid Howells d_inode(old_dentry)->i_ctime = 251175c3cfa8SDavid Howells d_inode(new_dentry)->i_ctime = CURRENT_TIME; 251237456771SMiklos Szeredi 251337456771SMiklos Szeredi return 0; 251437456771SMiklos Szeredi } 251537456771SMiklos Szeredi 251646fdb794SMiklos Szeredi static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry) 251746fdb794SMiklos Szeredi { 251846fdb794SMiklos Szeredi struct dentry *whiteout; 251946fdb794SMiklos Szeredi int error; 252046fdb794SMiklos Szeredi 252146fdb794SMiklos Szeredi whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); 252246fdb794SMiklos Szeredi if (!whiteout) 252346fdb794SMiklos Szeredi return -ENOMEM; 252446fdb794SMiklos Szeredi 252546fdb794SMiklos Szeredi error = shmem_mknod(old_dir, whiteout, 252646fdb794SMiklos Szeredi S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 252746fdb794SMiklos Szeredi dput(whiteout); 252846fdb794SMiklos Szeredi if (error) 252946fdb794SMiklos Szeredi return error; 253046fdb794SMiklos Szeredi 253146fdb794SMiklos Szeredi /* 253246fdb794SMiklos Szeredi * Cheat and hash the whiteout while the old dentry is still in 253346fdb794SMiklos Szeredi * place, instead of playing games with FS_RENAME_DOES_D_MOVE. 253446fdb794SMiklos Szeredi * 253546fdb794SMiklos Szeredi * d_lookup() will consistently find one of them at this point, 253646fdb794SMiklos Szeredi * not sure which one, but that isn't even important. 253746fdb794SMiklos Szeredi */ 253846fdb794SMiklos Szeredi d_rehash(whiteout); 253946fdb794SMiklos Szeredi return 0; 254046fdb794SMiklos Szeredi } 254146fdb794SMiklos Szeredi 25421da177e4SLinus Torvalds /* 25431da177e4SLinus Torvalds * The VFS layer already does all the dentry stuff for rename, 25441da177e4SLinus Torvalds * we just have to decrement the usage count for the target if 25451da177e4SLinus Torvalds * it exists so that the VFS layer correctly free's it when it 25461da177e4SLinus Torvalds * gets overwritten. 25471da177e4SLinus Torvalds */ 25483b69ff51SMiklos Szeredi static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) 25491da177e4SLinus Torvalds { 255075c3cfa8SDavid Howells struct inode *inode = d_inode(old_dentry); 25511da177e4SLinus Torvalds int they_are_dirs = S_ISDIR(inode->i_mode); 25521da177e4SLinus Torvalds 255346fdb794SMiklos Szeredi if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 25543b69ff51SMiklos Szeredi return -EINVAL; 25553b69ff51SMiklos Szeredi 255637456771SMiklos Szeredi if (flags & RENAME_EXCHANGE) 255737456771SMiklos Szeredi return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry); 255837456771SMiklos Szeredi 25591da177e4SLinus Torvalds if (!simple_empty(new_dentry)) 25601da177e4SLinus Torvalds return -ENOTEMPTY; 25611da177e4SLinus Torvalds 256246fdb794SMiklos Szeredi if (flags & RENAME_WHITEOUT) { 256346fdb794SMiklos Szeredi int error; 256446fdb794SMiklos Szeredi 256546fdb794SMiklos Szeredi error = shmem_whiteout(old_dir, old_dentry); 256646fdb794SMiklos Szeredi if (error) 256746fdb794SMiklos Szeredi return error; 256846fdb794SMiklos Szeredi } 256946fdb794SMiklos Szeredi 257075c3cfa8SDavid Howells if (d_really_is_positive(new_dentry)) { 25711da177e4SLinus Torvalds (void) shmem_unlink(new_dir, new_dentry); 2572b928095bSMiklos Szeredi if (they_are_dirs) { 257375c3cfa8SDavid Howells drop_nlink(d_inode(new_dentry)); 25749a53c3a7SDave Hansen drop_nlink(old_dir); 2575b928095bSMiklos Szeredi } 25761da177e4SLinus Torvalds } else if (they_are_dirs) { 25779a53c3a7SDave Hansen drop_nlink(old_dir); 2578d8c76e6fSDave Hansen inc_nlink(new_dir); 25791da177e4SLinus Torvalds } 25801da177e4SLinus Torvalds 25811da177e4SLinus Torvalds old_dir->i_size -= BOGO_DIRENT_SIZE; 25821da177e4SLinus Torvalds new_dir->i_size += BOGO_DIRENT_SIZE; 25831da177e4SLinus Torvalds old_dir->i_ctime = old_dir->i_mtime = 25841da177e4SLinus Torvalds new_dir->i_ctime = new_dir->i_mtime = 25851da177e4SLinus Torvalds inode->i_ctime = CURRENT_TIME; 25861da177e4SLinus Torvalds return 0; 25871da177e4SLinus Torvalds } 25881da177e4SLinus Torvalds 25891da177e4SLinus Torvalds static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 25901da177e4SLinus Torvalds { 25911da177e4SLinus Torvalds int error; 25921da177e4SLinus Torvalds int len; 25931da177e4SLinus Torvalds struct inode *inode; 25949276aad6SHugh Dickins struct page *page; 25951da177e4SLinus Torvalds struct shmem_inode_info *info; 25961da177e4SLinus Torvalds 25971da177e4SLinus Torvalds len = strlen(symname) + 1; 259809cbfeafSKirill A. Shutemov if (len > PAGE_SIZE) 25991da177e4SLinus Torvalds return -ENAMETOOLONG; 26001da177e4SLinus Torvalds 2601454abafeSDmitry Monakhov inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE); 26021da177e4SLinus Torvalds if (!inode) 26031da177e4SLinus Torvalds return -ENOSPC; 26041da177e4SLinus Torvalds 26059d8f13baSMimi Zohar error = security_inode_init_security(inode, dir, &dentry->d_name, 26066d9d88d0SJarkko Sakkinen shmem_initxattrs, NULL); 2607570bc1c2SStephen Smalley if (error) { 2608570bc1c2SStephen Smalley if (error != -EOPNOTSUPP) { 2609570bc1c2SStephen Smalley iput(inode); 2610570bc1c2SStephen Smalley return error; 2611570bc1c2SStephen Smalley } 2612570bc1c2SStephen Smalley error = 0; 2613570bc1c2SStephen Smalley } 2614570bc1c2SStephen Smalley 26151da177e4SLinus Torvalds info = SHMEM_I(inode); 26161da177e4SLinus Torvalds inode->i_size = len-1; 261769f07ec9SHugh Dickins if (len <= SHORT_SYMLINK_LEN) { 26183ed47db3SAl Viro inode->i_link = kmemdup(symname, len, GFP_KERNEL); 26193ed47db3SAl Viro if (!inode->i_link) { 262069f07ec9SHugh Dickins iput(inode); 262169f07ec9SHugh Dickins return -ENOMEM; 262269f07ec9SHugh Dickins } 262369f07ec9SHugh Dickins inode->i_op = &shmem_short_symlink_operations; 26241da177e4SLinus Torvalds } else { 2625e8ecde25SAl Viro inode_nohighmem(inode); 26269e18eb29SAndres Lagar-Cavilla error = shmem_getpage(inode, 0, &page, SGP_WRITE); 26271da177e4SLinus Torvalds if (error) { 26281da177e4SLinus Torvalds iput(inode); 26291da177e4SLinus Torvalds return error; 26301da177e4SLinus Torvalds } 263114fcc23fSHugh Dickins inode->i_mapping->a_ops = &shmem_aops; 26321da177e4SLinus Torvalds inode->i_op = &shmem_symlink_inode_operations; 263321fc61c7SAl Viro memcpy(page_address(page), symname, len); 2634ec9516fbSHugh Dickins SetPageUptodate(page); 26351da177e4SLinus Torvalds set_page_dirty(page); 26366746aff7SWu Fengguang unlock_page(page); 263709cbfeafSKirill A. Shutemov put_page(page); 26381da177e4SLinus Torvalds } 26391da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 26401da177e4SLinus Torvalds dir->i_ctime = dir->i_mtime = CURRENT_TIME; 26411da177e4SLinus Torvalds d_instantiate(dentry, inode); 26421da177e4SLinus Torvalds dget(dentry); 26431da177e4SLinus Torvalds return 0; 26441da177e4SLinus Torvalds } 26451da177e4SLinus Torvalds 2646fceef393SAl Viro static void shmem_put_link(void *arg) 2647fceef393SAl Viro { 2648fceef393SAl Viro mark_page_accessed(arg); 2649fceef393SAl Viro put_page(arg); 2650fceef393SAl Viro } 2651fceef393SAl Viro 26526b255391SAl Viro static const char *shmem_get_link(struct dentry *dentry, 2653fceef393SAl Viro struct inode *inode, 2654fceef393SAl Viro struct delayed_call *done) 26551da177e4SLinus Torvalds { 26561da177e4SLinus Torvalds struct page *page = NULL; 26576b255391SAl Viro int error; 26586a6c9904SAl Viro if (!dentry) { 26596a6c9904SAl Viro page = find_get_page(inode->i_mapping, 0); 26606a6c9904SAl Viro if (!page) 26616b255391SAl Viro return ERR_PTR(-ECHILD); 26626a6c9904SAl Viro if (!PageUptodate(page)) { 26636a6c9904SAl Viro put_page(page); 26646a6c9904SAl Viro return ERR_PTR(-ECHILD); 26656a6c9904SAl Viro } 26666a6c9904SAl Viro } else { 26679e18eb29SAndres Lagar-Cavilla error = shmem_getpage(inode, 0, &page, SGP_READ); 2668680baacbSAl Viro if (error) 2669680baacbSAl Viro return ERR_PTR(error); 2670d3602444SHugh Dickins unlock_page(page); 26711da177e4SLinus Torvalds } 2672fceef393SAl Viro set_delayed_call(done, shmem_put_link, page); 267321fc61c7SAl Viro return page_address(page); 26741da177e4SLinus Torvalds } 26751da177e4SLinus Torvalds 2676b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 2677b09e0fa4SEric Paris /* 2678b09e0fa4SEric Paris * Superblocks without xattr inode operations may get some security.* xattr 2679b09e0fa4SEric Paris * support from the LSM "for free". As soon as we have any other xattrs 2680b09e0fa4SEric Paris * like ACLs, we also need to implement the security.* handlers at 2681b09e0fa4SEric Paris * filesystem level, though. 2682b09e0fa4SEric Paris */ 2683b09e0fa4SEric Paris 26846d9d88d0SJarkko Sakkinen /* 26856d9d88d0SJarkko Sakkinen * Callback for security_inode_init_security() for acquiring xattrs. 26866d9d88d0SJarkko Sakkinen */ 26876d9d88d0SJarkko Sakkinen static int shmem_initxattrs(struct inode *inode, 26886d9d88d0SJarkko Sakkinen const struct xattr *xattr_array, 26896d9d88d0SJarkko Sakkinen void *fs_info) 26906d9d88d0SJarkko Sakkinen { 26916d9d88d0SJarkko Sakkinen struct shmem_inode_info *info = SHMEM_I(inode); 26926d9d88d0SJarkko Sakkinen const struct xattr *xattr; 269338f38657SAristeu Rozanski struct simple_xattr *new_xattr; 26946d9d88d0SJarkko Sakkinen size_t len; 26956d9d88d0SJarkko Sakkinen 26966d9d88d0SJarkko Sakkinen for (xattr = xattr_array; xattr->name != NULL; xattr++) { 269738f38657SAristeu Rozanski new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); 26986d9d88d0SJarkko Sakkinen if (!new_xattr) 26996d9d88d0SJarkko Sakkinen return -ENOMEM; 27006d9d88d0SJarkko Sakkinen 27016d9d88d0SJarkko Sakkinen len = strlen(xattr->name) + 1; 27026d9d88d0SJarkko Sakkinen new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, 27036d9d88d0SJarkko Sakkinen GFP_KERNEL); 27046d9d88d0SJarkko Sakkinen if (!new_xattr->name) { 27056d9d88d0SJarkko Sakkinen kfree(new_xattr); 27066d9d88d0SJarkko Sakkinen return -ENOMEM; 27076d9d88d0SJarkko Sakkinen } 27086d9d88d0SJarkko Sakkinen 27096d9d88d0SJarkko Sakkinen memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, 27106d9d88d0SJarkko Sakkinen XATTR_SECURITY_PREFIX_LEN); 27116d9d88d0SJarkko Sakkinen memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, 27126d9d88d0SJarkko Sakkinen xattr->name, len); 27136d9d88d0SJarkko Sakkinen 271438f38657SAristeu Rozanski simple_xattr_list_add(&info->xattrs, new_xattr); 27156d9d88d0SJarkko Sakkinen } 27166d9d88d0SJarkko Sakkinen 27176d9d88d0SJarkko Sakkinen return 0; 27186d9d88d0SJarkko Sakkinen } 27196d9d88d0SJarkko Sakkinen 2720aa7c5241SAndreas Gruenbacher static int shmem_xattr_handler_get(const struct xattr_handler *handler, 2721b296821aSAl Viro struct dentry *unused, struct inode *inode, 2722b296821aSAl Viro const char *name, void *buffer, size_t size) 2723aa7c5241SAndreas Gruenbacher { 2724b296821aSAl Viro struct shmem_inode_info *info = SHMEM_I(inode); 2725aa7c5241SAndreas Gruenbacher 2726aa7c5241SAndreas Gruenbacher name = xattr_full_name(handler, name); 2727aa7c5241SAndreas Gruenbacher return simple_xattr_get(&info->xattrs, name, buffer, size); 2728aa7c5241SAndreas Gruenbacher } 2729aa7c5241SAndreas Gruenbacher 2730aa7c5241SAndreas Gruenbacher static int shmem_xattr_handler_set(const struct xattr_handler *handler, 273159301226SAl Viro struct dentry *unused, struct inode *inode, 273259301226SAl Viro const char *name, const void *value, 273359301226SAl Viro size_t size, int flags) 2734aa7c5241SAndreas Gruenbacher { 273559301226SAl Viro struct shmem_inode_info *info = SHMEM_I(inode); 2736aa7c5241SAndreas Gruenbacher 2737aa7c5241SAndreas Gruenbacher name = xattr_full_name(handler, name); 2738aa7c5241SAndreas Gruenbacher return simple_xattr_set(&info->xattrs, name, value, size, flags); 2739aa7c5241SAndreas Gruenbacher } 2740aa7c5241SAndreas Gruenbacher 2741aa7c5241SAndreas Gruenbacher static const struct xattr_handler shmem_security_xattr_handler = { 2742aa7c5241SAndreas Gruenbacher .prefix = XATTR_SECURITY_PREFIX, 2743aa7c5241SAndreas Gruenbacher .get = shmem_xattr_handler_get, 2744aa7c5241SAndreas Gruenbacher .set = shmem_xattr_handler_set, 2745aa7c5241SAndreas Gruenbacher }; 2746aa7c5241SAndreas Gruenbacher 2747aa7c5241SAndreas Gruenbacher static const struct xattr_handler shmem_trusted_xattr_handler = { 2748aa7c5241SAndreas Gruenbacher .prefix = XATTR_TRUSTED_PREFIX, 2749aa7c5241SAndreas Gruenbacher .get = shmem_xattr_handler_get, 2750aa7c5241SAndreas Gruenbacher .set = shmem_xattr_handler_set, 2751aa7c5241SAndreas Gruenbacher }; 2752aa7c5241SAndreas Gruenbacher 2753b09e0fa4SEric Paris static const struct xattr_handler *shmem_xattr_handlers[] = { 2754b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_POSIX_ACL 2755feda821eSChristoph Hellwig &posix_acl_access_xattr_handler, 2756feda821eSChristoph Hellwig &posix_acl_default_xattr_handler, 2757b09e0fa4SEric Paris #endif 2758aa7c5241SAndreas Gruenbacher &shmem_security_xattr_handler, 2759aa7c5241SAndreas Gruenbacher &shmem_trusted_xattr_handler, 2760b09e0fa4SEric Paris NULL 2761b09e0fa4SEric Paris }; 2762b09e0fa4SEric Paris 2763b09e0fa4SEric Paris static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) 2764b09e0fa4SEric Paris { 276575c3cfa8SDavid Howells struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); 2766786534b9SAndreas Gruenbacher return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); 2767b09e0fa4SEric Paris } 2768b09e0fa4SEric Paris #endif /* CONFIG_TMPFS_XATTR */ 2769b09e0fa4SEric Paris 277069f07ec9SHugh Dickins static const struct inode_operations shmem_short_symlink_operations = { 27711da177e4SLinus Torvalds .readlink = generic_readlink, 27726b255391SAl Viro .get_link = simple_get_link, 2773b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 2774aa7c5241SAndreas Gruenbacher .setxattr = generic_setxattr, 2775aa7c5241SAndreas Gruenbacher .getxattr = generic_getxattr, 2776b09e0fa4SEric Paris .listxattr = shmem_listxattr, 2777aa7c5241SAndreas Gruenbacher .removexattr = generic_removexattr, 2778b09e0fa4SEric Paris #endif 27791da177e4SLinus Torvalds }; 27801da177e4SLinus Torvalds 278192e1d5beSArjan van de Ven static const struct inode_operations shmem_symlink_inode_operations = { 27821da177e4SLinus Torvalds .readlink = generic_readlink, 27836b255391SAl Viro .get_link = shmem_get_link, 2784b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 2785aa7c5241SAndreas Gruenbacher .setxattr = generic_setxattr, 2786aa7c5241SAndreas Gruenbacher .getxattr = generic_getxattr, 2787b09e0fa4SEric Paris .listxattr = shmem_listxattr, 2788aa7c5241SAndreas Gruenbacher .removexattr = generic_removexattr, 278939f0247dSAndreas Gruenbacher #endif 2790b09e0fa4SEric Paris }; 279139f0247dSAndreas Gruenbacher 279291828a40SDavid M. Grimes static struct dentry *shmem_get_parent(struct dentry *child) 279391828a40SDavid M. Grimes { 279491828a40SDavid M. Grimes return ERR_PTR(-ESTALE); 279591828a40SDavid M. Grimes } 279691828a40SDavid M. Grimes 279791828a40SDavid M. Grimes static int shmem_match(struct inode *ino, void *vfh) 279891828a40SDavid M. Grimes { 279991828a40SDavid M. Grimes __u32 *fh = vfh; 280091828a40SDavid M. Grimes __u64 inum = fh[2]; 280191828a40SDavid M. Grimes inum = (inum << 32) | fh[1]; 280291828a40SDavid M. Grimes return ino->i_ino == inum && fh[0] == ino->i_generation; 280391828a40SDavid M. Grimes } 280491828a40SDavid M. Grimes 2805480b116cSChristoph Hellwig static struct dentry *shmem_fh_to_dentry(struct super_block *sb, 2806480b116cSChristoph Hellwig struct fid *fid, int fh_len, int fh_type) 280791828a40SDavid M. Grimes { 280891828a40SDavid M. Grimes struct inode *inode; 2809480b116cSChristoph Hellwig struct dentry *dentry = NULL; 281035c2a7f4SHugh Dickins u64 inum; 281191828a40SDavid M. Grimes 2812480b116cSChristoph Hellwig if (fh_len < 3) 2813480b116cSChristoph Hellwig return NULL; 2814480b116cSChristoph Hellwig 281535c2a7f4SHugh Dickins inum = fid->raw[2]; 281635c2a7f4SHugh Dickins inum = (inum << 32) | fid->raw[1]; 281735c2a7f4SHugh Dickins 2818480b116cSChristoph Hellwig inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), 2819480b116cSChristoph Hellwig shmem_match, fid->raw); 282091828a40SDavid M. Grimes if (inode) { 2821480b116cSChristoph Hellwig dentry = d_find_alias(inode); 282291828a40SDavid M. Grimes iput(inode); 282391828a40SDavid M. Grimes } 282491828a40SDavid M. Grimes 2825480b116cSChristoph Hellwig return dentry; 282691828a40SDavid M. Grimes } 282791828a40SDavid M. Grimes 2828b0b0382bSAl Viro static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, 2829b0b0382bSAl Viro struct inode *parent) 283091828a40SDavid M. Grimes { 28315fe0c237SAneesh Kumar K.V if (*len < 3) { 28325fe0c237SAneesh Kumar K.V *len = 3; 283394e07a75SNamjae Jeon return FILEID_INVALID; 28345fe0c237SAneesh Kumar K.V } 283591828a40SDavid M. Grimes 28361d3382cbSAl Viro if (inode_unhashed(inode)) { 283791828a40SDavid M. Grimes /* Unfortunately insert_inode_hash is not idempotent, 283891828a40SDavid M. Grimes * so as we hash inodes here rather than at creation 283991828a40SDavid M. Grimes * time, we need a lock to ensure we only try 284091828a40SDavid M. Grimes * to do it once 284191828a40SDavid M. Grimes */ 284291828a40SDavid M. Grimes static DEFINE_SPINLOCK(lock); 284391828a40SDavid M. Grimes spin_lock(&lock); 28441d3382cbSAl Viro if (inode_unhashed(inode)) 284591828a40SDavid M. Grimes __insert_inode_hash(inode, 284691828a40SDavid M. Grimes inode->i_ino + inode->i_generation); 284791828a40SDavid M. Grimes spin_unlock(&lock); 284891828a40SDavid M. Grimes } 284991828a40SDavid M. Grimes 285091828a40SDavid M. Grimes fh[0] = inode->i_generation; 285191828a40SDavid M. Grimes fh[1] = inode->i_ino; 285291828a40SDavid M. Grimes fh[2] = ((__u64)inode->i_ino) >> 32; 285391828a40SDavid M. Grimes 285491828a40SDavid M. Grimes *len = 3; 285591828a40SDavid M. Grimes return 1; 285691828a40SDavid M. Grimes } 285791828a40SDavid M. Grimes 285839655164SChristoph Hellwig static const struct export_operations shmem_export_ops = { 285991828a40SDavid M. Grimes .get_parent = shmem_get_parent, 286091828a40SDavid M. Grimes .encode_fh = shmem_encode_fh, 2861480b116cSChristoph Hellwig .fh_to_dentry = shmem_fh_to_dentry, 286291828a40SDavid M. Grimes }; 286391828a40SDavid M. Grimes 2864680d794bSakpm@linux-foundation.org static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, 2865680d794bSakpm@linux-foundation.org bool remount) 28661da177e4SLinus Torvalds { 28671da177e4SLinus Torvalds char *this_char, *value, *rest; 286849cd0a5cSGreg Thelen struct mempolicy *mpol = NULL; 28698751e039SEric W. Biederman uid_t uid; 28708751e039SEric W. Biederman gid_t gid; 28711da177e4SLinus Torvalds 2872b00dc3adSHugh Dickins while (options != NULL) { 2873b00dc3adSHugh Dickins this_char = options; 2874b00dc3adSHugh Dickins for (;;) { 2875b00dc3adSHugh Dickins /* 2876b00dc3adSHugh Dickins * NUL-terminate this option: unfortunately, 2877b00dc3adSHugh Dickins * mount options form a comma-separated list, 2878b00dc3adSHugh Dickins * but mpol's nodelist may also contain commas. 2879b00dc3adSHugh Dickins */ 2880b00dc3adSHugh Dickins options = strchr(options, ','); 2881b00dc3adSHugh Dickins if (options == NULL) 2882b00dc3adSHugh Dickins break; 2883b00dc3adSHugh Dickins options++; 2884b00dc3adSHugh Dickins if (!isdigit(*options)) { 2885b00dc3adSHugh Dickins options[-1] = '\0'; 2886b00dc3adSHugh Dickins break; 2887b00dc3adSHugh Dickins } 2888b00dc3adSHugh Dickins } 28891da177e4SLinus Torvalds if (!*this_char) 28901da177e4SLinus Torvalds continue; 28911da177e4SLinus Torvalds if ((value = strchr(this_char,'=')) != NULL) { 28921da177e4SLinus Torvalds *value++ = 0; 28931da177e4SLinus Torvalds } else { 28941170532bSJoe Perches pr_err("tmpfs: No value for mount option '%s'\n", 28951da177e4SLinus Torvalds this_char); 289649cd0a5cSGreg Thelen goto error; 28971da177e4SLinus Torvalds } 28981da177e4SLinus Torvalds 28991da177e4SLinus Torvalds if (!strcmp(this_char,"size")) { 29001da177e4SLinus Torvalds unsigned long long size; 29011da177e4SLinus Torvalds size = memparse(value,&rest); 29021da177e4SLinus Torvalds if (*rest == '%') { 29031da177e4SLinus Torvalds size <<= PAGE_SHIFT; 29041da177e4SLinus Torvalds size *= totalram_pages; 29051da177e4SLinus Torvalds do_div(size, 100); 29061da177e4SLinus Torvalds rest++; 29071da177e4SLinus Torvalds } 29081da177e4SLinus Torvalds if (*rest) 29091da177e4SLinus Torvalds goto bad_val; 2910680d794bSakpm@linux-foundation.org sbinfo->max_blocks = 291109cbfeafSKirill A. Shutemov DIV_ROUND_UP(size, PAGE_SIZE); 29121da177e4SLinus Torvalds } else if (!strcmp(this_char,"nr_blocks")) { 2913680d794bSakpm@linux-foundation.org sbinfo->max_blocks = memparse(value, &rest); 29141da177e4SLinus Torvalds if (*rest) 29151da177e4SLinus Torvalds goto bad_val; 29161da177e4SLinus Torvalds } else if (!strcmp(this_char,"nr_inodes")) { 2917680d794bSakpm@linux-foundation.org sbinfo->max_inodes = memparse(value, &rest); 29181da177e4SLinus Torvalds if (*rest) 29191da177e4SLinus Torvalds goto bad_val; 29201da177e4SLinus Torvalds } else if (!strcmp(this_char,"mode")) { 2921680d794bSakpm@linux-foundation.org if (remount) 29221da177e4SLinus Torvalds continue; 2923680d794bSakpm@linux-foundation.org sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777; 29241da177e4SLinus Torvalds if (*rest) 29251da177e4SLinus Torvalds goto bad_val; 29261da177e4SLinus Torvalds } else if (!strcmp(this_char,"uid")) { 2927680d794bSakpm@linux-foundation.org if (remount) 29281da177e4SLinus Torvalds continue; 29298751e039SEric W. Biederman uid = simple_strtoul(value, &rest, 0); 29301da177e4SLinus Torvalds if (*rest) 29311da177e4SLinus Torvalds goto bad_val; 29328751e039SEric W. Biederman sbinfo->uid = make_kuid(current_user_ns(), uid); 29338751e039SEric W. Biederman if (!uid_valid(sbinfo->uid)) 29348751e039SEric W. Biederman goto bad_val; 29351da177e4SLinus Torvalds } else if (!strcmp(this_char,"gid")) { 2936680d794bSakpm@linux-foundation.org if (remount) 29371da177e4SLinus Torvalds continue; 29388751e039SEric W. Biederman gid = simple_strtoul(value, &rest, 0); 29391da177e4SLinus Torvalds if (*rest) 29401da177e4SLinus Torvalds goto bad_val; 29418751e039SEric W. Biederman sbinfo->gid = make_kgid(current_user_ns(), gid); 29428751e039SEric W. Biederman if (!gid_valid(sbinfo->gid)) 29438751e039SEric W. Biederman goto bad_val; 2944*5a6e75f8SKirill A. Shutemov #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2945*5a6e75f8SKirill A. Shutemov } else if (!strcmp(this_char, "huge")) { 2946*5a6e75f8SKirill A. Shutemov int huge; 2947*5a6e75f8SKirill A. Shutemov huge = shmem_parse_huge(value); 2948*5a6e75f8SKirill A. Shutemov if (huge < 0) 2949*5a6e75f8SKirill A. Shutemov goto bad_val; 2950*5a6e75f8SKirill A. Shutemov if (!has_transparent_hugepage() && 2951*5a6e75f8SKirill A. Shutemov huge != SHMEM_HUGE_NEVER) 2952*5a6e75f8SKirill A. Shutemov goto bad_val; 2953*5a6e75f8SKirill A. Shutemov sbinfo->huge = huge; 2954*5a6e75f8SKirill A. Shutemov #endif 2955*5a6e75f8SKirill A. Shutemov #ifdef CONFIG_NUMA 29567339ff83SRobin Holt } else if (!strcmp(this_char,"mpol")) { 295749cd0a5cSGreg Thelen mpol_put(mpol); 295849cd0a5cSGreg Thelen mpol = NULL; 295949cd0a5cSGreg Thelen if (mpol_parse_str(value, &mpol)) 29607339ff83SRobin Holt goto bad_val; 2961*5a6e75f8SKirill A. Shutemov #endif 29621da177e4SLinus Torvalds } else { 29631170532bSJoe Perches pr_err("tmpfs: Bad mount option %s\n", this_char); 296449cd0a5cSGreg Thelen goto error; 29651da177e4SLinus Torvalds } 29661da177e4SLinus Torvalds } 296749cd0a5cSGreg Thelen sbinfo->mpol = mpol; 29681da177e4SLinus Torvalds return 0; 29691da177e4SLinus Torvalds 29701da177e4SLinus Torvalds bad_val: 29711170532bSJoe Perches pr_err("tmpfs: Bad value '%s' for mount option '%s'\n", 29721da177e4SLinus Torvalds value, this_char); 297349cd0a5cSGreg Thelen error: 297449cd0a5cSGreg Thelen mpol_put(mpol); 29751da177e4SLinus Torvalds return 1; 29761da177e4SLinus Torvalds 29771da177e4SLinus Torvalds } 29781da177e4SLinus Torvalds 29791da177e4SLinus Torvalds static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) 29801da177e4SLinus Torvalds { 29811da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 2982680d794bSakpm@linux-foundation.org struct shmem_sb_info config = *sbinfo; 29830edd73b3SHugh Dickins unsigned long inodes; 29840edd73b3SHugh Dickins int error = -EINVAL; 29851da177e4SLinus Torvalds 29865f00110fSGreg Thelen config.mpol = NULL; 2987680d794bSakpm@linux-foundation.org if (shmem_parse_options(data, &config, true)) 29880edd73b3SHugh Dickins return error; 29890edd73b3SHugh Dickins 29900edd73b3SHugh Dickins spin_lock(&sbinfo->stat_lock); 29910edd73b3SHugh Dickins inodes = sbinfo->max_inodes - sbinfo->free_inodes; 29927e496299STim Chen if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0) 29930edd73b3SHugh Dickins goto out; 2994680d794bSakpm@linux-foundation.org if (config.max_inodes < inodes) 29950edd73b3SHugh Dickins goto out; 29960edd73b3SHugh Dickins /* 299754af6042SHugh Dickins * Those tests disallow limited->unlimited while any are in use; 29980edd73b3SHugh Dickins * but we must separately disallow unlimited->limited, because 29990edd73b3SHugh Dickins * in that case we have no record of how much is already in use. 30000edd73b3SHugh Dickins */ 3001680d794bSakpm@linux-foundation.org if (config.max_blocks && !sbinfo->max_blocks) 30020edd73b3SHugh Dickins goto out; 3003680d794bSakpm@linux-foundation.org if (config.max_inodes && !sbinfo->max_inodes) 30040edd73b3SHugh Dickins goto out; 30050edd73b3SHugh Dickins 30060edd73b3SHugh Dickins error = 0; 3007*5a6e75f8SKirill A. Shutemov sbinfo->huge = config.huge; 3008680d794bSakpm@linux-foundation.org sbinfo->max_blocks = config.max_blocks; 3009680d794bSakpm@linux-foundation.org sbinfo->max_inodes = config.max_inodes; 3010680d794bSakpm@linux-foundation.org sbinfo->free_inodes = config.max_inodes - inodes; 301171fe804bSLee Schermerhorn 30125f00110fSGreg Thelen /* 30135f00110fSGreg Thelen * Preserve previous mempolicy unless mpol remount option was specified. 30145f00110fSGreg Thelen */ 30155f00110fSGreg Thelen if (config.mpol) { 301671fe804bSLee Schermerhorn mpol_put(sbinfo->mpol); 301771fe804bSLee Schermerhorn sbinfo->mpol = config.mpol; /* transfers initial ref */ 30185f00110fSGreg Thelen } 30190edd73b3SHugh Dickins out: 30200edd73b3SHugh Dickins spin_unlock(&sbinfo->stat_lock); 30210edd73b3SHugh Dickins return error; 30221da177e4SLinus Torvalds } 3023680d794bSakpm@linux-foundation.org 302434c80b1dSAl Viro static int shmem_show_options(struct seq_file *seq, struct dentry *root) 3025680d794bSakpm@linux-foundation.org { 302634c80b1dSAl Viro struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb); 3027680d794bSakpm@linux-foundation.org 3028680d794bSakpm@linux-foundation.org if (sbinfo->max_blocks != shmem_default_max_blocks()) 3029680d794bSakpm@linux-foundation.org seq_printf(seq, ",size=%luk", 303009cbfeafSKirill A. Shutemov sbinfo->max_blocks << (PAGE_SHIFT - 10)); 3031680d794bSakpm@linux-foundation.org if (sbinfo->max_inodes != shmem_default_max_inodes()) 3032680d794bSakpm@linux-foundation.org seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); 3033680d794bSakpm@linux-foundation.org if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) 303409208d15SAl Viro seq_printf(seq, ",mode=%03ho", sbinfo->mode); 30358751e039SEric W. Biederman if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) 30368751e039SEric W. Biederman seq_printf(seq, ",uid=%u", 30378751e039SEric W. Biederman from_kuid_munged(&init_user_ns, sbinfo->uid)); 30388751e039SEric W. Biederman if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) 30398751e039SEric W. Biederman seq_printf(seq, ",gid=%u", 30408751e039SEric W. Biederman from_kgid_munged(&init_user_ns, sbinfo->gid)); 3041*5a6e75f8SKirill A. Shutemov #ifdef CONFIG_TRANSPARENT_HUGEPAGE 3042*5a6e75f8SKirill A. Shutemov /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ 3043*5a6e75f8SKirill A. Shutemov if (sbinfo->huge) 3044*5a6e75f8SKirill A. Shutemov seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); 3045*5a6e75f8SKirill A. Shutemov #endif 304671fe804bSLee Schermerhorn shmem_show_mpol(seq, sbinfo->mpol); 3047680d794bSakpm@linux-foundation.org return 0; 3048680d794bSakpm@linux-foundation.org } 30499183df25SDavid Herrmann 30509183df25SDavid Herrmann #define MFD_NAME_PREFIX "memfd:" 30519183df25SDavid Herrmann #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) 30529183df25SDavid Herrmann #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) 30539183df25SDavid Herrmann 30549183df25SDavid Herrmann #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING) 30559183df25SDavid Herrmann 30569183df25SDavid Herrmann SYSCALL_DEFINE2(memfd_create, 30579183df25SDavid Herrmann const char __user *, uname, 30589183df25SDavid Herrmann unsigned int, flags) 30599183df25SDavid Herrmann { 30609183df25SDavid Herrmann struct shmem_inode_info *info; 30619183df25SDavid Herrmann struct file *file; 30629183df25SDavid Herrmann int fd, error; 30639183df25SDavid Herrmann char *name; 30649183df25SDavid Herrmann long len; 30659183df25SDavid Herrmann 30669183df25SDavid Herrmann if (flags & ~(unsigned int)MFD_ALL_FLAGS) 30679183df25SDavid Herrmann return -EINVAL; 30689183df25SDavid Herrmann 30699183df25SDavid Herrmann /* length includes terminating zero */ 30709183df25SDavid Herrmann len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); 30719183df25SDavid Herrmann if (len <= 0) 30729183df25SDavid Herrmann return -EFAULT; 30739183df25SDavid Herrmann if (len > MFD_NAME_MAX_LEN + 1) 30749183df25SDavid Herrmann return -EINVAL; 30759183df25SDavid Herrmann 30769183df25SDavid Herrmann name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY); 30779183df25SDavid Herrmann if (!name) 30789183df25SDavid Herrmann return -ENOMEM; 30799183df25SDavid Herrmann 30809183df25SDavid Herrmann strcpy(name, MFD_NAME_PREFIX); 30819183df25SDavid Herrmann if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) { 30829183df25SDavid Herrmann error = -EFAULT; 30839183df25SDavid Herrmann goto err_name; 30849183df25SDavid Herrmann } 30859183df25SDavid Herrmann 30869183df25SDavid Herrmann /* terminating-zero may have changed after strnlen_user() returned */ 30879183df25SDavid Herrmann if (name[len + MFD_NAME_PREFIX_LEN - 1]) { 30889183df25SDavid Herrmann error = -EFAULT; 30899183df25SDavid Herrmann goto err_name; 30909183df25SDavid Herrmann } 30919183df25SDavid Herrmann 30929183df25SDavid Herrmann fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); 30939183df25SDavid Herrmann if (fd < 0) { 30949183df25SDavid Herrmann error = fd; 30959183df25SDavid Herrmann goto err_name; 30969183df25SDavid Herrmann } 30979183df25SDavid Herrmann 30989183df25SDavid Herrmann file = shmem_file_setup(name, 0, VM_NORESERVE); 30999183df25SDavid Herrmann if (IS_ERR(file)) { 31009183df25SDavid Herrmann error = PTR_ERR(file); 31019183df25SDavid Herrmann goto err_fd; 31029183df25SDavid Herrmann } 31039183df25SDavid Herrmann info = SHMEM_I(file_inode(file)); 31049183df25SDavid Herrmann file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; 31059183df25SDavid Herrmann file->f_flags |= O_RDWR | O_LARGEFILE; 31069183df25SDavid Herrmann if (flags & MFD_ALLOW_SEALING) 31079183df25SDavid Herrmann info->seals &= ~F_SEAL_SEAL; 31089183df25SDavid Herrmann 31099183df25SDavid Herrmann fd_install(fd, file); 31109183df25SDavid Herrmann kfree(name); 31119183df25SDavid Herrmann return fd; 31129183df25SDavid Herrmann 31139183df25SDavid Herrmann err_fd: 31149183df25SDavid Herrmann put_unused_fd(fd); 31159183df25SDavid Herrmann err_name: 31169183df25SDavid Herrmann kfree(name); 31179183df25SDavid Herrmann return error; 31189183df25SDavid Herrmann } 31199183df25SDavid Herrmann 3120680d794bSakpm@linux-foundation.org #endif /* CONFIG_TMPFS */ 31211da177e4SLinus Torvalds 31221da177e4SLinus Torvalds static void shmem_put_super(struct super_block *sb) 31231da177e4SLinus Torvalds { 3124602586a8SHugh Dickins struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 3125602586a8SHugh Dickins 3126602586a8SHugh Dickins percpu_counter_destroy(&sbinfo->used_blocks); 312749cd0a5cSGreg Thelen mpol_put(sbinfo->mpol); 3128602586a8SHugh Dickins kfree(sbinfo); 31291da177e4SLinus Torvalds sb->s_fs_info = NULL; 31301da177e4SLinus Torvalds } 31311da177e4SLinus Torvalds 31322b2af54aSKay Sievers int shmem_fill_super(struct super_block *sb, void *data, int silent) 31331da177e4SLinus Torvalds { 31341da177e4SLinus Torvalds struct inode *inode; 31350edd73b3SHugh Dickins struct shmem_sb_info *sbinfo; 3136680d794bSakpm@linux-foundation.org int err = -ENOMEM; 3137680d794bSakpm@linux-foundation.org 3138680d794bSakpm@linux-foundation.org /* Round up to L1_CACHE_BYTES to resist false sharing */ 3139425fbf04SPekka Enberg sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info), 3140680d794bSakpm@linux-foundation.org L1_CACHE_BYTES), GFP_KERNEL); 3141680d794bSakpm@linux-foundation.org if (!sbinfo) 3142680d794bSakpm@linux-foundation.org return -ENOMEM; 3143680d794bSakpm@linux-foundation.org 3144680d794bSakpm@linux-foundation.org sbinfo->mode = S_IRWXUGO | S_ISVTX; 314576aac0e9SDavid Howells sbinfo->uid = current_fsuid(); 314676aac0e9SDavid Howells sbinfo->gid = current_fsgid(); 3147680d794bSakpm@linux-foundation.org sb->s_fs_info = sbinfo; 31481da177e4SLinus Torvalds 31490edd73b3SHugh Dickins #ifdef CONFIG_TMPFS 31501da177e4SLinus Torvalds /* 31511da177e4SLinus Torvalds * Per default we only allow half of the physical ram per 31521da177e4SLinus Torvalds * tmpfs instance, limiting inodes to one per page of lowmem; 31531da177e4SLinus Torvalds * but the internal instance is left unlimited. 31541da177e4SLinus Torvalds */ 3155ca4e0519SAl Viro if (!(sb->s_flags & MS_KERNMOUNT)) { 3156680d794bSakpm@linux-foundation.org sbinfo->max_blocks = shmem_default_max_blocks(); 3157680d794bSakpm@linux-foundation.org sbinfo->max_inodes = shmem_default_max_inodes(); 3158680d794bSakpm@linux-foundation.org if (shmem_parse_options(data, sbinfo, false)) { 3159680d794bSakpm@linux-foundation.org err = -EINVAL; 3160680d794bSakpm@linux-foundation.org goto failed; 3161680d794bSakpm@linux-foundation.org } 3162ca4e0519SAl Viro } else { 3163ca4e0519SAl Viro sb->s_flags |= MS_NOUSER; 31641da177e4SLinus Torvalds } 316591828a40SDavid M. Grimes sb->s_export_op = &shmem_export_ops; 31662f6e38f3SHugh Dickins sb->s_flags |= MS_NOSEC; 31670edd73b3SHugh Dickins #else 31680edd73b3SHugh Dickins sb->s_flags |= MS_NOUSER; 31690edd73b3SHugh Dickins #endif 31701da177e4SLinus Torvalds 31711da177e4SLinus Torvalds spin_lock_init(&sbinfo->stat_lock); 3172908c7f19STejun Heo if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) 3173602586a8SHugh Dickins goto failed; 3174680d794bSakpm@linux-foundation.org sbinfo->free_inodes = sbinfo->max_inodes; 31751da177e4SLinus Torvalds 3176285b2c4fSHugh Dickins sb->s_maxbytes = MAX_LFS_FILESIZE; 317709cbfeafSKirill A. Shutemov sb->s_blocksize = PAGE_SIZE; 317809cbfeafSKirill A. Shutemov sb->s_blocksize_bits = PAGE_SHIFT; 31791da177e4SLinus Torvalds sb->s_magic = TMPFS_MAGIC; 31801da177e4SLinus Torvalds sb->s_op = &shmem_ops; 3181cfd95a9cSRobin H. Johnson sb->s_time_gran = 1; 3182b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 318339f0247dSAndreas Gruenbacher sb->s_xattr = shmem_xattr_handlers; 3184b09e0fa4SEric Paris #endif 3185b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_POSIX_ACL 318639f0247dSAndreas Gruenbacher sb->s_flags |= MS_POSIXACL; 318739f0247dSAndreas Gruenbacher #endif 31880edd73b3SHugh Dickins 3189454abafeSDmitry Monakhov inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); 31901da177e4SLinus Torvalds if (!inode) 31911da177e4SLinus Torvalds goto failed; 3192680d794bSakpm@linux-foundation.org inode->i_uid = sbinfo->uid; 3193680d794bSakpm@linux-foundation.org inode->i_gid = sbinfo->gid; 3194318ceed0SAl Viro sb->s_root = d_make_root(inode); 3195318ceed0SAl Viro if (!sb->s_root) 319648fde701SAl Viro goto failed; 31971da177e4SLinus Torvalds return 0; 31981da177e4SLinus Torvalds 31991da177e4SLinus Torvalds failed: 32001da177e4SLinus Torvalds shmem_put_super(sb); 32011da177e4SLinus Torvalds return err; 32021da177e4SLinus Torvalds } 32031da177e4SLinus Torvalds 3204fcc234f8SPekka Enberg static struct kmem_cache *shmem_inode_cachep; 32051da177e4SLinus Torvalds 32061da177e4SLinus Torvalds static struct inode *shmem_alloc_inode(struct super_block *sb) 32071da177e4SLinus Torvalds { 320841ffe5d5SHugh Dickins struct shmem_inode_info *info; 320941ffe5d5SHugh Dickins info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); 321041ffe5d5SHugh Dickins if (!info) 32111da177e4SLinus Torvalds return NULL; 321241ffe5d5SHugh Dickins return &info->vfs_inode; 32131da177e4SLinus Torvalds } 32141da177e4SLinus Torvalds 321541ffe5d5SHugh Dickins static void shmem_destroy_callback(struct rcu_head *head) 3216fa0d7e3dSNick Piggin { 3217fa0d7e3dSNick Piggin struct inode *inode = container_of(head, struct inode, i_rcu); 321884e710daSAl Viro if (S_ISLNK(inode->i_mode)) 32193ed47db3SAl Viro kfree(inode->i_link); 3220fa0d7e3dSNick Piggin kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 3221fa0d7e3dSNick Piggin } 3222fa0d7e3dSNick Piggin 32231da177e4SLinus Torvalds static void shmem_destroy_inode(struct inode *inode) 32241da177e4SLinus Torvalds { 322509208d15SAl Viro if (S_ISREG(inode->i_mode)) 32261da177e4SLinus Torvalds mpol_free_shared_policy(&SHMEM_I(inode)->policy); 322741ffe5d5SHugh Dickins call_rcu(&inode->i_rcu, shmem_destroy_callback); 32281da177e4SLinus Torvalds } 32291da177e4SLinus Torvalds 323041ffe5d5SHugh Dickins static void shmem_init_inode(void *foo) 32311da177e4SLinus Torvalds { 323241ffe5d5SHugh Dickins struct shmem_inode_info *info = foo; 323341ffe5d5SHugh Dickins inode_init_once(&info->vfs_inode); 32341da177e4SLinus Torvalds } 32351da177e4SLinus Torvalds 323641ffe5d5SHugh Dickins static int shmem_init_inodecache(void) 32371da177e4SLinus Torvalds { 32381da177e4SLinus Torvalds shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", 32391da177e4SLinus Torvalds sizeof(struct shmem_inode_info), 32405d097056SVladimir Davydov 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); 32411da177e4SLinus Torvalds return 0; 32421da177e4SLinus Torvalds } 32431da177e4SLinus Torvalds 324441ffe5d5SHugh Dickins static void shmem_destroy_inodecache(void) 32451da177e4SLinus Torvalds { 32461a1d92c1SAlexey Dobriyan kmem_cache_destroy(shmem_inode_cachep); 32471da177e4SLinus Torvalds } 32481da177e4SLinus Torvalds 3249f5e54d6eSChristoph Hellwig static const struct address_space_operations shmem_aops = { 32501da177e4SLinus Torvalds .writepage = shmem_writepage, 325176719325SKen Chen .set_page_dirty = __set_page_dirty_no_writeback, 32521da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 3253800d15a5SNick Piggin .write_begin = shmem_write_begin, 3254800d15a5SNick Piggin .write_end = shmem_write_end, 32551da177e4SLinus Torvalds #endif 32561c93923cSAndrew Morton #ifdef CONFIG_MIGRATION 3257304dbdb7SLee Schermerhorn .migratepage = migrate_page, 32581c93923cSAndrew Morton #endif 3259aa261f54SAndi Kleen .error_remove_page = generic_error_remove_page, 32601da177e4SLinus Torvalds }; 32611da177e4SLinus Torvalds 326215ad7cdcSHelge Deller static const struct file_operations shmem_file_operations = { 32631da177e4SLinus Torvalds .mmap = shmem_mmap, 32641da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 3265220f2ac9SHugh Dickins .llseek = shmem_file_llseek, 32662ba5bbedSAl Viro .read_iter = shmem_file_read_iter, 32678174202bSAl Viro .write_iter = generic_file_write_iter, 32681b061d92SChristoph Hellwig .fsync = noop_fsync, 3269708e3508SHugh Dickins .splice_read = shmem_file_splice_read, 3270f6cb85d0SAl Viro .splice_write = iter_file_splice_write, 327183e4fa9cSHugh Dickins .fallocate = shmem_fallocate, 32721da177e4SLinus Torvalds #endif 32731da177e4SLinus Torvalds }; 32741da177e4SLinus Torvalds 327592e1d5beSArjan van de Ven static const struct inode_operations shmem_inode_operations = { 327644a30220SYu Zhao .getattr = shmem_getattr, 327794c1e62dSHugh Dickins .setattr = shmem_setattr, 3278b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 3279aa7c5241SAndreas Gruenbacher .setxattr = generic_setxattr, 3280aa7c5241SAndreas Gruenbacher .getxattr = generic_getxattr, 3281b09e0fa4SEric Paris .listxattr = shmem_listxattr, 3282aa7c5241SAndreas Gruenbacher .removexattr = generic_removexattr, 3283feda821eSChristoph Hellwig .set_acl = simple_set_acl, 3284b09e0fa4SEric Paris #endif 32851da177e4SLinus Torvalds }; 32861da177e4SLinus Torvalds 328792e1d5beSArjan van de Ven static const struct inode_operations shmem_dir_inode_operations = { 32881da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 32891da177e4SLinus Torvalds .create = shmem_create, 32901da177e4SLinus Torvalds .lookup = simple_lookup, 32911da177e4SLinus Torvalds .link = shmem_link, 32921da177e4SLinus Torvalds .unlink = shmem_unlink, 32931da177e4SLinus Torvalds .symlink = shmem_symlink, 32941da177e4SLinus Torvalds .mkdir = shmem_mkdir, 32951da177e4SLinus Torvalds .rmdir = shmem_rmdir, 32961da177e4SLinus Torvalds .mknod = shmem_mknod, 32973b69ff51SMiklos Szeredi .rename2 = shmem_rename2, 329860545d0dSAl Viro .tmpfile = shmem_tmpfile, 32991da177e4SLinus Torvalds #endif 3300b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 3301aa7c5241SAndreas Gruenbacher .setxattr = generic_setxattr, 3302aa7c5241SAndreas Gruenbacher .getxattr = generic_getxattr, 3303b09e0fa4SEric Paris .listxattr = shmem_listxattr, 3304aa7c5241SAndreas Gruenbacher .removexattr = generic_removexattr, 3305b09e0fa4SEric Paris #endif 330639f0247dSAndreas Gruenbacher #ifdef CONFIG_TMPFS_POSIX_ACL 330794c1e62dSHugh Dickins .setattr = shmem_setattr, 3308feda821eSChristoph Hellwig .set_acl = simple_set_acl, 330939f0247dSAndreas Gruenbacher #endif 331039f0247dSAndreas Gruenbacher }; 331139f0247dSAndreas Gruenbacher 331292e1d5beSArjan van de Ven static const struct inode_operations shmem_special_inode_operations = { 3313b09e0fa4SEric Paris #ifdef CONFIG_TMPFS_XATTR 3314aa7c5241SAndreas Gruenbacher .setxattr = generic_setxattr, 3315aa7c5241SAndreas Gruenbacher .getxattr = generic_getxattr, 3316b09e0fa4SEric Paris .listxattr = shmem_listxattr, 3317aa7c5241SAndreas Gruenbacher .removexattr = generic_removexattr, 3318b09e0fa4SEric Paris #endif 331939f0247dSAndreas Gruenbacher #ifdef CONFIG_TMPFS_POSIX_ACL 332094c1e62dSHugh Dickins .setattr = shmem_setattr, 3321feda821eSChristoph Hellwig .set_acl = simple_set_acl, 332239f0247dSAndreas Gruenbacher #endif 33231da177e4SLinus Torvalds }; 33241da177e4SLinus Torvalds 3325759b9775SHugh Dickins static const struct super_operations shmem_ops = { 33261da177e4SLinus Torvalds .alloc_inode = shmem_alloc_inode, 33271da177e4SLinus Torvalds .destroy_inode = shmem_destroy_inode, 33281da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 33291da177e4SLinus Torvalds .statfs = shmem_statfs, 33301da177e4SLinus Torvalds .remount_fs = shmem_remount_fs, 3331680d794bSakpm@linux-foundation.org .show_options = shmem_show_options, 33321da177e4SLinus Torvalds #endif 33331f895f75SAl Viro .evict_inode = shmem_evict_inode, 33341da177e4SLinus Torvalds .drop_inode = generic_delete_inode, 33351da177e4SLinus Torvalds .put_super = shmem_put_super, 33361da177e4SLinus Torvalds }; 33371da177e4SLinus Torvalds 3338f0f37e2fSAlexey Dobriyan static const struct vm_operations_struct shmem_vm_ops = { 333954cb8821SNick Piggin .fault = shmem_fault, 3340d7c17551SNing Qu .map_pages = filemap_map_pages, 33411da177e4SLinus Torvalds #ifdef CONFIG_NUMA 33421da177e4SLinus Torvalds .set_policy = shmem_set_policy, 33431da177e4SLinus Torvalds .get_policy = shmem_get_policy, 33441da177e4SLinus Torvalds #endif 33451da177e4SLinus Torvalds }; 33461da177e4SLinus Torvalds 33473c26ff6eSAl Viro static struct dentry *shmem_mount(struct file_system_type *fs_type, 33483c26ff6eSAl Viro int flags, const char *dev_name, void *data) 33491da177e4SLinus Torvalds { 33503c26ff6eSAl Viro return mount_nodev(fs_type, flags, data, shmem_fill_super); 33511da177e4SLinus Torvalds } 33521da177e4SLinus Torvalds 335341ffe5d5SHugh Dickins static struct file_system_type shmem_fs_type = { 33541da177e4SLinus Torvalds .owner = THIS_MODULE, 33551da177e4SLinus Torvalds .name = "tmpfs", 33563c26ff6eSAl Viro .mount = shmem_mount, 33571da177e4SLinus Torvalds .kill_sb = kill_litter_super, 33582b8576cbSEric W. Biederman .fs_flags = FS_USERNS_MOUNT, 33591da177e4SLinus Torvalds }; 33601da177e4SLinus Torvalds 336141ffe5d5SHugh Dickins int __init shmem_init(void) 33621da177e4SLinus Torvalds { 33631da177e4SLinus Torvalds int error; 33641da177e4SLinus Torvalds 336516203a7aSRob Landley /* If rootfs called this, don't re-init */ 336616203a7aSRob Landley if (shmem_inode_cachep) 336716203a7aSRob Landley return 0; 336816203a7aSRob Landley 336941ffe5d5SHugh Dickins error = shmem_init_inodecache(); 33701da177e4SLinus Torvalds if (error) 33711da177e4SLinus Torvalds goto out3; 33721da177e4SLinus Torvalds 337341ffe5d5SHugh Dickins error = register_filesystem(&shmem_fs_type); 33741da177e4SLinus Torvalds if (error) { 33751170532bSJoe Perches pr_err("Could not register tmpfs\n"); 33761da177e4SLinus Torvalds goto out2; 33771da177e4SLinus Torvalds } 337895dc112aSGreg Kroah-Hartman 3379ca4e0519SAl Viro shm_mnt = kern_mount(&shmem_fs_type); 33801da177e4SLinus Torvalds if (IS_ERR(shm_mnt)) { 33811da177e4SLinus Torvalds error = PTR_ERR(shm_mnt); 33821170532bSJoe Perches pr_err("Could not kern_mount tmpfs\n"); 33831da177e4SLinus Torvalds goto out1; 33841da177e4SLinus Torvalds } 3385*5a6e75f8SKirill A. Shutemov 3386*5a6e75f8SKirill A. Shutemov #ifdef CONFIG_TRANSPARENT_HUGEPAGE 3387*5a6e75f8SKirill A. Shutemov if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY) 3388*5a6e75f8SKirill A. Shutemov SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 3389*5a6e75f8SKirill A. Shutemov else 3390*5a6e75f8SKirill A. Shutemov shmem_huge = 0; /* just in case it was patched */ 3391*5a6e75f8SKirill A. Shutemov #endif 33921da177e4SLinus Torvalds return 0; 33931da177e4SLinus Torvalds 33941da177e4SLinus Torvalds out1: 339541ffe5d5SHugh Dickins unregister_filesystem(&shmem_fs_type); 33961da177e4SLinus Torvalds out2: 339741ffe5d5SHugh Dickins shmem_destroy_inodecache(); 33981da177e4SLinus Torvalds out3: 33991da177e4SLinus Torvalds shm_mnt = ERR_PTR(error); 34001da177e4SLinus Torvalds return error; 34011da177e4SLinus Torvalds } 3402853ac43aSMatt Mackall 3403*5a6e75f8SKirill A. Shutemov #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) 3404*5a6e75f8SKirill A. Shutemov static ssize_t shmem_enabled_show(struct kobject *kobj, 3405*5a6e75f8SKirill A. Shutemov struct kobj_attribute *attr, char *buf) 3406*5a6e75f8SKirill A. Shutemov { 3407*5a6e75f8SKirill A. Shutemov int values[] = { 3408*5a6e75f8SKirill A. Shutemov SHMEM_HUGE_ALWAYS, 3409*5a6e75f8SKirill A. Shutemov SHMEM_HUGE_WITHIN_SIZE, 3410*5a6e75f8SKirill A. Shutemov SHMEM_HUGE_ADVISE, 3411*5a6e75f8SKirill A. Shutemov SHMEM_HUGE_NEVER, 3412*5a6e75f8SKirill A. Shutemov SHMEM_HUGE_DENY, 3413*5a6e75f8SKirill A. Shutemov SHMEM_HUGE_FORCE, 3414*5a6e75f8SKirill A. Shutemov }; 3415*5a6e75f8SKirill A. Shutemov int i, count; 3416*5a6e75f8SKirill A. Shutemov 3417*5a6e75f8SKirill A. Shutemov for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) { 3418*5a6e75f8SKirill A. Shutemov const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s "; 3419*5a6e75f8SKirill A. Shutemov 3420*5a6e75f8SKirill A. Shutemov count += sprintf(buf + count, fmt, 3421*5a6e75f8SKirill A. Shutemov shmem_format_huge(values[i])); 3422*5a6e75f8SKirill A. Shutemov } 3423*5a6e75f8SKirill A. Shutemov buf[count - 1] = '\n'; 3424*5a6e75f8SKirill A. Shutemov return count; 3425*5a6e75f8SKirill A. Shutemov } 3426*5a6e75f8SKirill A. Shutemov 3427*5a6e75f8SKirill A. Shutemov static ssize_t shmem_enabled_store(struct kobject *kobj, 3428*5a6e75f8SKirill A. Shutemov struct kobj_attribute *attr, const char *buf, size_t count) 3429*5a6e75f8SKirill A. Shutemov { 3430*5a6e75f8SKirill A. Shutemov char tmp[16]; 3431*5a6e75f8SKirill A. Shutemov int huge; 3432*5a6e75f8SKirill A. Shutemov 3433*5a6e75f8SKirill A. Shutemov if (count + 1 > sizeof(tmp)) 3434*5a6e75f8SKirill A. Shutemov return -EINVAL; 3435*5a6e75f8SKirill A. Shutemov memcpy(tmp, buf, count); 3436*5a6e75f8SKirill A. Shutemov tmp[count] = '\0'; 3437*5a6e75f8SKirill A. Shutemov if (count && tmp[count - 1] == '\n') 3438*5a6e75f8SKirill A. Shutemov tmp[count - 1] = '\0'; 3439*5a6e75f8SKirill A. Shutemov 3440*5a6e75f8SKirill A. Shutemov huge = shmem_parse_huge(tmp); 3441*5a6e75f8SKirill A. Shutemov if (huge == -EINVAL) 3442*5a6e75f8SKirill A. Shutemov return -EINVAL; 3443*5a6e75f8SKirill A. Shutemov if (!has_transparent_hugepage() && 3444*5a6e75f8SKirill A. Shutemov huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) 3445*5a6e75f8SKirill A. Shutemov return -EINVAL; 3446*5a6e75f8SKirill A. Shutemov 3447*5a6e75f8SKirill A. Shutemov shmem_huge = huge; 3448*5a6e75f8SKirill A. Shutemov if (shmem_huge < SHMEM_HUGE_DENY) 3449*5a6e75f8SKirill A. Shutemov SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 3450*5a6e75f8SKirill A. Shutemov return count; 3451*5a6e75f8SKirill A. Shutemov } 3452*5a6e75f8SKirill A. Shutemov 3453*5a6e75f8SKirill A. Shutemov struct kobj_attribute shmem_enabled_attr = 3454*5a6e75f8SKirill A. Shutemov __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store); 3455*5a6e75f8SKirill A. Shutemov #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ 3456*5a6e75f8SKirill A. Shutemov 3457853ac43aSMatt Mackall #else /* !CONFIG_SHMEM */ 3458853ac43aSMatt Mackall 3459853ac43aSMatt Mackall /* 3460853ac43aSMatt Mackall * tiny-shmem: simple shmemfs and tmpfs using ramfs code 3461853ac43aSMatt Mackall * 3462853ac43aSMatt Mackall * This is intended for small system where the benefits of the full 3463853ac43aSMatt Mackall * shmem code (swap-backed and resource-limited) are outweighed by 3464853ac43aSMatt Mackall * their complexity. On systems without swap this code should be 3465853ac43aSMatt Mackall * effectively equivalent, but much lighter weight. 3466853ac43aSMatt Mackall */ 3467853ac43aSMatt Mackall 346841ffe5d5SHugh Dickins static struct file_system_type shmem_fs_type = { 3469853ac43aSMatt Mackall .name = "tmpfs", 34703c26ff6eSAl Viro .mount = ramfs_mount, 3471853ac43aSMatt Mackall .kill_sb = kill_litter_super, 34722b8576cbSEric W. Biederman .fs_flags = FS_USERNS_MOUNT, 3473853ac43aSMatt Mackall }; 3474853ac43aSMatt Mackall 347541ffe5d5SHugh Dickins int __init shmem_init(void) 3476853ac43aSMatt Mackall { 347741ffe5d5SHugh Dickins BUG_ON(register_filesystem(&shmem_fs_type) != 0); 3478853ac43aSMatt Mackall 347941ffe5d5SHugh Dickins shm_mnt = kern_mount(&shmem_fs_type); 3480853ac43aSMatt Mackall BUG_ON(IS_ERR(shm_mnt)); 3481853ac43aSMatt Mackall 3482853ac43aSMatt Mackall return 0; 3483853ac43aSMatt Mackall } 3484853ac43aSMatt Mackall 348541ffe5d5SHugh Dickins int shmem_unuse(swp_entry_t swap, struct page *page) 3486853ac43aSMatt Mackall { 3487853ac43aSMatt Mackall return 0; 3488853ac43aSMatt Mackall } 3489853ac43aSMatt Mackall 34903f96b79aSHugh Dickins int shmem_lock(struct file *file, int lock, struct user_struct *user) 34913f96b79aSHugh Dickins { 34923f96b79aSHugh Dickins return 0; 34933f96b79aSHugh Dickins } 34943f96b79aSHugh Dickins 349524513264SHugh Dickins void shmem_unlock_mapping(struct address_space *mapping) 349624513264SHugh Dickins { 349724513264SHugh Dickins } 349824513264SHugh Dickins 349941ffe5d5SHugh Dickins void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 350094c1e62dSHugh Dickins { 350141ffe5d5SHugh Dickins truncate_inode_pages_range(inode->i_mapping, lstart, lend); 350294c1e62dSHugh Dickins } 350394c1e62dSHugh Dickins EXPORT_SYMBOL_GPL(shmem_truncate_range); 350494c1e62dSHugh Dickins 3505853ac43aSMatt Mackall #define shmem_vm_ops generic_file_vm_ops 35060b0a0806SHugh Dickins #define shmem_file_operations ramfs_file_operations 3507454abafeSDmitry Monakhov #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) 35080b0a0806SHugh Dickins #define shmem_acct_size(flags, size) 0 35090b0a0806SHugh Dickins #define shmem_unacct_size(flags, size) do {} while (0) 3510853ac43aSMatt Mackall 3511853ac43aSMatt Mackall #endif /* CONFIG_SHMEM */ 3512853ac43aSMatt Mackall 3513853ac43aSMatt Mackall /* common code */ 35141da177e4SLinus Torvalds 35153451538aSAl Viro static struct dentry_operations anon_ops = { 3516118b2302SAl Viro .d_dname = simple_dname 35173451538aSAl Viro }; 35183451538aSAl Viro 3519c7277090SEric Paris static struct file *__shmem_file_setup(const char *name, loff_t size, 3520c7277090SEric Paris unsigned long flags, unsigned int i_flags) 35211da177e4SLinus Torvalds { 35226b4d0b27SAl Viro struct file *res; 35231da177e4SLinus Torvalds struct inode *inode; 35242c48b9c4SAl Viro struct path path; 35253451538aSAl Viro struct super_block *sb; 35261da177e4SLinus Torvalds struct qstr this; 35271da177e4SLinus Torvalds 35281da177e4SLinus Torvalds if (IS_ERR(shm_mnt)) 35296b4d0b27SAl Viro return ERR_CAST(shm_mnt); 35301da177e4SLinus Torvalds 3531285b2c4fSHugh Dickins if (size < 0 || size > MAX_LFS_FILESIZE) 35321da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 35331da177e4SLinus Torvalds 35341da177e4SLinus Torvalds if (shmem_acct_size(flags, size)) 35351da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 35361da177e4SLinus Torvalds 35376b4d0b27SAl Viro res = ERR_PTR(-ENOMEM); 35381da177e4SLinus Torvalds this.name = name; 35391da177e4SLinus Torvalds this.len = strlen(name); 35401da177e4SLinus Torvalds this.hash = 0; /* will go */ 35413451538aSAl Viro sb = shm_mnt->mnt_sb; 354266ee4b88SKonstantin Khlebnikov path.mnt = mntget(shm_mnt); 35433451538aSAl Viro path.dentry = d_alloc_pseudo(sb, &this); 35442c48b9c4SAl Viro if (!path.dentry) 35451da177e4SLinus Torvalds goto put_memory; 35463451538aSAl Viro d_set_d_op(path.dentry, &anon_ops); 35471da177e4SLinus Torvalds 35486b4d0b27SAl Viro res = ERR_PTR(-ENOSPC); 35493451538aSAl Viro inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); 35501da177e4SLinus Torvalds if (!inode) 355166ee4b88SKonstantin Khlebnikov goto put_memory; 35521da177e4SLinus Torvalds 3553c7277090SEric Paris inode->i_flags |= i_flags; 35542c48b9c4SAl Viro d_instantiate(path.dentry, inode); 35551da177e4SLinus Torvalds inode->i_size = size; 35566d6b77f1SMiklos Szeredi clear_nlink(inode); /* It is unlinked */ 355726567cdbSAl Viro res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); 355826567cdbSAl Viro if (IS_ERR(res)) 355966ee4b88SKonstantin Khlebnikov goto put_path; 35604b42af81SAl Viro 35616b4d0b27SAl Viro res = alloc_file(&path, FMODE_WRITE | FMODE_READ, 35624b42af81SAl Viro &shmem_file_operations); 35636b4d0b27SAl Viro if (IS_ERR(res)) 356466ee4b88SKonstantin Khlebnikov goto put_path; 35654b42af81SAl Viro 35666b4d0b27SAl Viro return res; 35671da177e4SLinus Torvalds 35681da177e4SLinus Torvalds put_memory: 35691da177e4SLinus Torvalds shmem_unacct_size(flags, size); 357066ee4b88SKonstantin Khlebnikov put_path: 357166ee4b88SKonstantin Khlebnikov path_put(&path); 35726b4d0b27SAl Viro return res; 35731da177e4SLinus Torvalds } 3574c7277090SEric Paris 3575c7277090SEric Paris /** 3576c7277090SEric Paris * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be 3577c7277090SEric Paris * kernel internal. There will be NO LSM permission checks against the 3578c7277090SEric Paris * underlying inode. So users of this interface must do LSM checks at a 3579e1832f29SStephen Smalley * higher layer. The users are the big_key and shm implementations. LSM 3580e1832f29SStephen Smalley * checks are provided at the key or shm level rather than the inode. 3581c7277090SEric Paris * @name: name for dentry (to be seen in /proc/<pid>/maps 3582c7277090SEric Paris * @size: size to be set for the file 3583c7277090SEric Paris * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 3584c7277090SEric Paris */ 3585c7277090SEric Paris struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) 3586c7277090SEric Paris { 3587c7277090SEric Paris return __shmem_file_setup(name, size, flags, S_PRIVATE); 3588c7277090SEric Paris } 3589c7277090SEric Paris 3590c7277090SEric Paris /** 3591c7277090SEric Paris * shmem_file_setup - get an unlinked file living in tmpfs 3592c7277090SEric Paris * @name: name for dentry (to be seen in /proc/<pid>/maps 3593c7277090SEric Paris * @size: size to be set for the file 3594c7277090SEric Paris * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 3595c7277090SEric Paris */ 3596c7277090SEric Paris struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) 3597c7277090SEric Paris { 3598c7277090SEric Paris return __shmem_file_setup(name, size, flags, 0); 3599c7277090SEric Paris } 3600395e0ddcSKeith Packard EXPORT_SYMBOL_GPL(shmem_file_setup); 36011da177e4SLinus Torvalds 360246711810SRandy Dunlap /** 36031da177e4SLinus Torvalds * shmem_zero_setup - setup a shared anonymous mapping 36041da177e4SLinus Torvalds * @vma: the vma to be mmapped is prepared by do_mmap_pgoff 36051da177e4SLinus Torvalds */ 36061da177e4SLinus Torvalds int shmem_zero_setup(struct vm_area_struct *vma) 36071da177e4SLinus Torvalds { 36081da177e4SLinus Torvalds struct file *file; 36091da177e4SLinus Torvalds loff_t size = vma->vm_end - vma->vm_start; 36101da177e4SLinus Torvalds 361166fc1303SHugh Dickins /* 361266fc1303SHugh Dickins * Cloning a new file under mmap_sem leads to a lock ordering conflict 361366fc1303SHugh Dickins * between XFS directory reading and selinux: since this file is only 361466fc1303SHugh Dickins * accessible to the user through its mapping, use S_PRIVATE flag to 361566fc1303SHugh Dickins * bypass file security, in the same way as shmem_kernel_file_setup(). 361666fc1303SHugh Dickins */ 361766fc1303SHugh Dickins file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE); 36181da177e4SLinus Torvalds if (IS_ERR(file)) 36191da177e4SLinus Torvalds return PTR_ERR(file); 36201da177e4SLinus Torvalds 36211da177e4SLinus Torvalds if (vma->vm_file) 36221da177e4SLinus Torvalds fput(vma->vm_file); 36231da177e4SLinus Torvalds vma->vm_file = file; 36241da177e4SLinus Torvalds vma->vm_ops = &shmem_vm_ops; 36251da177e4SLinus Torvalds return 0; 36261da177e4SLinus Torvalds } 3627d9d90e5eSHugh Dickins 3628d9d90e5eSHugh Dickins /** 3629d9d90e5eSHugh Dickins * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags. 3630d9d90e5eSHugh Dickins * @mapping: the page's address_space 3631d9d90e5eSHugh Dickins * @index: the page index 3632d9d90e5eSHugh Dickins * @gfp: the page allocator flags to use if allocating 3633d9d90e5eSHugh Dickins * 3634d9d90e5eSHugh Dickins * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", 3635d9d90e5eSHugh Dickins * with any new page allocations done using the specified allocation flags. 3636d9d90e5eSHugh Dickins * But read_cache_page_gfp() uses the ->readpage() method: which does not 3637d9d90e5eSHugh Dickins * suit tmpfs, since it may have pages in swapcache, and needs to find those 3638d9d90e5eSHugh Dickins * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. 3639d9d90e5eSHugh Dickins * 364068da9f05SHugh Dickins * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in 364168da9f05SHugh Dickins * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily. 3642d9d90e5eSHugh Dickins */ 3643d9d90e5eSHugh Dickins struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, 3644d9d90e5eSHugh Dickins pgoff_t index, gfp_t gfp) 3645d9d90e5eSHugh Dickins { 364668da9f05SHugh Dickins #ifdef CONFIG_SHMEM 364768da9f05SHugh Dickins struct inode *inode = mapping->host; 36489276aad6SHugh Dickins struct page *page; 364968da9f05SHugh Dickins int error; 365068da9f05SHugh Dickins 365168da9f05SHugh Dickins BUG_ON(mapping->a_ops != &shmem_aops); 36529e18eb29SAndres Lagar-Cavilla error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, 36539e18eb29SAndres Lagar-Cavilla gfp, NULL, NULL); 365468da9f05SHugh Dickins if (error) 365568da9f05SHugh Dickins page = ERR_PTR(error); 365668da9f05SHugh Dickins else 365768da9f05SHugh Dickins unlock_page(page); 365868da9f05SHugh Dickins return page; 365968da9f05SHugh Dickins #else 366068da9f05SHugh Dickins /* 366168da9f05SHugh Dickins * The tiny !SHMEM case uses ramfs without swap 366268da9f05SHugh Dickins */ 3663d9d90e5eSHugh Dickins return read_cache_page_gfp(mapping, index, gfp); 366468da9f05SHugh Dickins #endif 3665d9d90e5eSHugh Dickins } 3666d9d90e5eSHugh Dickins EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); 3667