1*1da177e4SLinus Torvalds /* 2*1da177e4SLinus Torvalds * Resizable virtual memory filesystem for Linux. 3*1da177e4SLinus Torvalds * 4*1da177e4SLinus Torvalds * Copyright (C) 2000 Linus Torvalds. 5*1da177e4SLinus Torvalds * 2000 Transmeta Corp. 6*1da177e4SLinus Torvalds * 2000-2001 Christoph Rohland 7*1da177e4SLinus Torvalds * 2000-2001 SAP AG 8*1da177e4SLinus Torvalds * 2002 Red Hat Inc. 9*1da177e4SLinus Torvalds * Copyright (C) 2002-2004 Hugh Dickins. 10*1da177e4SLinus Torvalds * Copyright (C) 2002-2004 VERITAS Software Corporation. 11*1da177e4SLinus Torvalds * Copyright (C) 2004 Andi Kleen, SuSE Labs 12*1da177e4SLinus Torvalds * 13*1da177e4SLinus Torvalds * Extended attribute support for tmpfs: 14*1da177e4SLinus Torvalds * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> 15*1da177e4SLinus Torvalds * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 16*1da177e4SLinus Torvalds * 17*1da177e4SLinus Torvalds * This file is released under the GPL. 18*1da177e4SLinus Torvalds */ 19*1da177e4SLinus Torvalds 20*1da177e4SLinus Torvalds /* 21*1da177e4SLinus Torvalds * This virtual memory filesystem is heavily based on the ramfs. It 22*1da177e4SLinus Torvalds * extends ramfs by the ability to use swap and honor resource limits 23*1da177e4SLinus Torvalds * which makes it a completely usable filesystem. 24*1da177e4SLinus Torvalds */ 25*1da177e4SLinus Torvalds 26*1da177e4SLinus Torvalds #include <linux/config.h> 27*1da177e4SLinus Torvalds #include <linux/module.h> 28*1da177e4SLinus Torvalds #include <linux/init.h> 29*1da177e4SLinus Torvalds #include <linux/devfs_fs_kernel.h> 30*1da177e4SLinus Torvalds #include <linux/fs.h> 31*1da177e4SLinus Torvalds #include <linux/mm.h> 32*1da177e4SLinus Torvalds #include <linux/mman.h> 33*1da177e4SLinus Torvalds #include <linux/file.h> 34*1da177e4SLinus Torvalds #include <linux/swap.h> 35*1da177e4SLinus Torvalds #include <linux/pagemap.h> 36*1da177e4SLinus Torvalds #include <linux/string.h> 37*1da177e4SLinus Torvalds #include <linux/slab.h> 38*1da177e4SLinus Torvalds #include <linux/backing-dev.h> 39*1da177e4SLinus Torvalds #include <linux/shmem_fs.h> 40*1da177e4SLinus Torvalds #include <linux/mount.h> 41*1da177e4SLinus Torvalds #include <linux/writeback.h> 42*1da177e4SLinus Torvalds #include <linux/vfs.h> 43*1da177e4SLinus Torvalds #include <linux/blkdev.h> 44*1da177e4SLinus Torvalds #include <linux/security.h> 45*1da177e4SLinus Torvalds #include <linux/swapops.h> 46*1da177e4SLinus Torvalds #include <linux/mempolicy.h> 47*1da177e4SLinus Torvalds #include <linux/namei.h> 48*1da177e4SLinus Torvalds #include <linux/xattr.h> 49*1da177e4SLinus Torvalds #include <asm/uaccess.h> 50*1da177e4SLinus Torvalds #include <asm/div64.h> 51*1da177e4SLinus Torvalds #include <asm/pgtable.h> 52*1da177e4SLinus Torvalds 53*1da177e4SLinus Torvalds /* This magic number is used in glibc for posix shared memory */ 54*1da177e4SLinus Torvalds #define TMPFS_MAGIC 0x01021994 55*1da177e4SLinus Torvalds 56*1da177e4SLinus Torvalds #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) 57*1da177e4SLinus Torvalds #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) 58*1da177e4SLinus Torvalds #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) 59*1da177e4SLinus Torvalds 60*1da177e4SLinus Torvalds #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) 61*1da177e4SLinus Torvalds #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) 62*1da177e4SLinus Torvalds 63*1da177e4SLinus Torvalds #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) 64*1da177e4SLinus Torvalds 65*1da177e4SLinus Torvalds /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ 66*1da177e4SLinus Torvalds #define SHMEM_PAGEIN VM_READ 67*1da177e4SLinus Torvalds #define SHMEM_TRUNCATE VM_WRITE 68*1da177e4SLinus Torvalds 69*1da177e4SLinus Torvalds /* Definition to limit shmem_truncate's steps between cond_rescheds */ 70*1da177e4SLinus Torvalds #define LATENCY_LIMIT 64 71*1da177e4SLinus Torvalds 72*1da177e4SLinus Torvalds /* Pretend that each entry is of this size in directory's i_size */ 73*1da177e4SLinus Torvalds #define BOGO_DIRENT_SIZE 20 74*1da177e4SLinus Torvalds 75*1da177e4SLinus Torvalds /* Keep swapped page count in private field of indirect struct page */ 76*1da177e4SLinus Torvalds #define nr_swapped private 77*1da177e4SLinus Torvalds 78*1da177e4SLinus Torvalds /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ 79*1da177e4SLinus Torvalds enum sgp_type { 80*1da177e4SLinus Torvalds SGP_QUICK, /* don't try more than file page cache lookup */ 81*1da177e4SLinus Torvalds SGP_READ, /* don't exceed i_size, don't allocate page */ 82*1da177e4SLinus Torvalds SGP_CACHE, /* don't exceed i_size, may allocate page */ 83*1da177e4SLinus Torvalds SGP_WRITE, /* may exceed i_size, may allocate page */ 84*1da177e4SLinus Torvalds }; 85*1da177e4SLinus Torvalds 86*1da177e4SLinus Torvalds static int shmem_getpage(struct inode *inode, unsigned long idx, 87*1da177e4SLinus Torvalds struct page **pagep, enum sgp_type sgp, int *type); 88*1da177e4SLinus Torvalds 89*1da177e4SLinus Torvalds static inline struct page *shmem_dir_alloc(unsigned int gfp_mask) 90*1da177e4SLinus Torvalds { 91*1da177e4SLinus Torvalds /* 92*1da177e4SLinus Torvalds * The above definition of ENTRIES_PER_PAGE, and the use of 93*1da177e4SLinus Torvalds * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: 94*1da177e4SLinus Torvalds * might be reconsidered if it ever diverges from PAGE_SIZE. 95*1da177e4SLinus Torvalds */ 96*1da177e4SLinus Torvalds return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); 97*1da177e4SLinus Torvalds } 98*1da177e4SLinus Torvalds 99*1da177e4SLinus Torvalds static inline void shmem_dir_free(struct page *page) 100*1da177e4SLinus Torvalds { 101*1da177e4SLinus Torvalds __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); 102*1da177e4SLinus Torvalds } 103*1da177e4SLinus Torvalds 104*1da177e4SLinus Torvalds static struct page **shmem_dir_map(struct page *page) 105*1da177e4SLinus Torvalds { 106*1da177e4SLinus Torvalds return (struct page **)kmap_atomic(page, KM_USER0); 107*1da177e4SLinus Torvalds } 108*1da177e4SLinus Torvalds 109*1da177e4SLinus Torvalds static inline void shmem_dir_unmap(struct page **dir) 110*1da177e4SLinus Torvalds { 111*1da177e4SLinus Torvalds kunmap_atomic(dir, KM_USER0); 112*1da177e4SLinus Torvalds } 113*1da177e4SLinus Torvalds 114*1da177e4SLinus Torvalds static swp_entry_t *shmem_swp_map(struct page *page) 115*1da177e4SLinus Torvalds { 116*1da177e4SLinus Torvalds return (swp_entry_t *)kmap_atomic(page, KM_USER1); 117*1da177e4SLinus Torvalds } 118*1da177e4SLinus Torvalds 119*1da177e4SLinus Torvalds static inline void shmem_swp_balance_unmap(void) 120*1da177e4SLinus Torvalds { 121*1da177e4SLinus Torvalds /* 122*1da177e4SLinus Torvalds * When passing a pointer to an i_direct entry, to code which 123*1da177e4SLinus Torvalds * also handles indirect entries and so will shmem_swp_unmap, 124*1da177e4SLinus Torvalds * we must arrange for the preempt count to remain in balance. 125*1da177e4SLinus Torvalds * What kmap_atomic of a lowmem page does depends on config 126*1da177e4SLinus Torvalds * and architecture, so pretend to kmap_atomic some lowmem page. 127*1da177e4SLinus Torvalds */ 128*1da177e4SLinus Torvalds (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); 129*1da177e4SLinus Torvalds } 130*1da177e4SLinus Torvalds 131*1da177e4SLinus Torvalds static inline void shmem_swp_unmap(swp_entry_t *entry) 132*1da177e4SLinus Torvalds { 133*1da177e4SLinus Torvalds kunmap_atomic(entry, KM_USER1); 134*1da177e4SLinus Torvalds } 135*1da177e4SLinus Torvalds 136*1da177e4SLinus Torvalds static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 137*1da177e4SLinus Torvalds { 138*1da177e4SLinus Torvalds return sb->s_fs_info; 139*1da177e4SLinus Torvalds } 140*1da177e4SLinus Torvalds 141*1da177e4SLinus Torvalds /* 142*1da177e4SLinus Torvalds * shmem_file_setup pre-accounts the whole fixed size of a VM object, 143*1da177e4SLinus Torvalds * for shared memory and for shared anonymous (/dev/zero) mappings 144*1da177e4SLinus Torvalds * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), 145*1da177e4SLinus Torvalds * consistent with the pre-accounting of private mappings ... 146*1da177e4SLinus Torvalds */ 147*1da177e4SLinus Torvalds static inline int shmem_acct_size(unsigned long flags, loff_t size) 148*1da177e4SLinus Torvalds { 149*1da177e4SLinus Torvalds return (flags & VM_ACCOUNT)? 150*1da177e4SLinus Torvalds security_vm_enough_memory(VM_ACCT(size)): 0; 151*1da177e4SLinus Torvalds } 152*1da177e4SLinus Torvalds 153*1da177e4SLinus Torvalds static inline void shmem_unacct_size(unsigned long flags, loff_t size) 154*1da177e4SLinus Torvalds { 155*1da177e4SLinus Torvalds if (flags & VM_ACCOUNT) 156*1da177e4SLinus Torvalds vm_unacct_memory(VM_ACCT(size)); 157*1da177e4SLinus Torvalds } 158*1da177e4SLinus Torvalds 159*1da177e4SLinus Torvalds /* 160*1da177e4SLinus Torvalds * ... whereas tmpfs objects are accounted incrementally as 161*1da177e4SLinus Torvalds * pages are allocated, in order to allow huge sparse files. 162*1da177e4SLinus Torvalds * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, 163*1da177e4SLinus Torvalds * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. 164*1da177e4SLinus Torvalds */ 165*1da177e4SLinus Torvalds static inline int shmem_acct_block(unsigned long flags) 166*1da177e4SLinus Torvalds { 167*1da177e4SLinus Torvalds return (flags & VM_ACCOUNT)? 168*1da177e4SLinus Torvalds 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); 169*1da177e4SLinus Torvalds } 170*1da177e4SLinus Torvalds 171*1da177e4SLinus Torvalds static inline void shmem_unacct_blocks(unsigned long flags, long pages) 172*1da177e4SLinus Torvalds { 173*1da177e4SLinus Torvalds if (!(flags & VM_ACCOUNT)) 174*1da177e4SLinus Torvalds vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); 175*1da177e4SLinus Torvalds } 176*1da177e4SLinus Torvalds 177*1da177e4SLinus Torvalds static struct super_operations shmem_ops; 178*1da177e4SLinus Torvalds static struct address_space_operations shmem_aops; 179*1da177e4SLinus Torvalds static struct file_operations shmem_file_operations; 180*1da177e4SLinus Torvalds static struct inode_operations shmem_inode_operations; 181*1da177e4SLinus Torvalds static struct inode_operations shmem_dir_inode_operations; 182*1da177e4SLinus Torvalds static struct inode_operations shmem_special_inode_operations; 183*1da177e4SLinus Torvalds static struct vm_operations_struct shmem_vm_ops; 184*1da177e4SLinus Torvalds 185*1da177e4SLinus Torvalds static struct backing_dev_info shmem_backing_dev_info = { 186*1da177e4SLinus Torvalds .ra_pages = 0, /* No readahead */ 187*1da177e4SLinus Torvalds .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 188*1da177e4SLinus Torvalds .unplug_io_fn = default_unplug_io_fn, 189*1da177e4SLinus Torvalds }; 190*1da177e4SLinus Torvalds 191*1da177e4SLinus Torvalds static LIST_HEAD(shmem_swaplist); 192*1da177e4SLinus Torvalds static DEFINE_SPINLOCK(shmem_swaplist_lock); 193*1da177e4SLinus Torvalds 194*1da177e4SLinus Torvalds static void shmem_free_blocks(struct inode *inode, long pages) 195*1da177e4SLinus Torvalds { 196*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 197*1da177e4SLinus Torvalds if (sbinfo) { 198*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 199*1da177e4SLinus Torvalds sbinfo->free_blocks += pages; 200*1da177e4SLinus Torvalds inode->i_blocks -= pages*BLOCKS_PER_PAGE; 201*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 202*1da177e4SLinus Torvalds } 203*1da177e4SLinus Torvalds } 204*1da177e4SLinus Torvalds 205*1da177e4SLinus Torvalds /* 206*1da177e4SLinus Torvalds * shmem_recalc_inode - recalculate the size of an inode 207*1da177e4SLinus Torvalds * 208*1da177e4SLinus Torvalds * @inode: inode to recalc 209*1da177e4SLinus Torvalds * 210*1da177e4SLinus Torvalds * We have to calculate the free blocks since the mm can drop 211*1da177e4SLinus Torvalds * undirtied hole pages behind our back. 212*1da177e4SLinus Torvalds * 213*1da177e4SLinus Torvalds * But normally info->alloced == inode->i_mapping->nrpages + info->swapped 214*1da177e4SLinus Torvalds * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) 215*1da177e4SLinus Torvalds * 216*1da177e4SLinus Torvalds * It has to be called with the spinlock held. 217*1da177e4SLinus Torvalds */ 218*1da177e4SLinus Torvalds static void shmem_recalc_inode(struct inode *inode) 219*1da177e4SLinus Torvalds { 220*1da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 221*1da177e4SLinus Torvalds long freed; 222*1da177e4SLinus Torvalds 223*1da177e4SLinus Torvalds freed = info->alloced - info->swapped - inode->i_mapping->nrpages; 224*1da177e4SLinus Torvalds if (freed > 0) { 225*1da177e4SLinus Torvalds info->alloced -= freed; 226*1da177e4SLinus Torvalds shmem_unacct_blocks(info->flags, freed); 227*1da177e4SLinus Torvalds shmem_free_blocks(inode, freed); 228*1da177e4SLinus Torvalds } 229*1da177e4SLinus Torvalds } 230*1da177e4SLinus Torvalds 231*1da177e4SLinus Torvalds /* 232*1da177e4SLinus Torvalds * shmem_swp_entry - find the swap vector position in the info structure 233*1da177e4SLinus Torvalds * 234*1da177e4SLinus Torvalds * @info: info structure for the inode 235*1da177e4SLinus Torvalds * @index: index of the page to find 236*1da177e4SLinus Torvalds * @page: optional page to add to the structure. Has to be preset to 237*1da177e4SLinus Torvalds * all zeros 238*1da177e4SLinus Torvalds * 239*1da177e4SLinus Torvalds * If there is no space allocated yet it will return NULL when 240*1da177e4SLinus Torvalds * page is NULL, else it will use the page for the needed block, 241*1da177e4SLinus Torvalds * setting it to NULL on return to indicate that it has been used. 242*1da177e4SLinus Torvalds * 243*1da177e4SLinus Torvalds * The swap vector is organized the following way: 244*1da177e4SLinus Torvalds * 245*1da177e4SLinus Torvalds * There are SHMEM_NR_DIRECT entries directly stored in the 246*1da177e4SLinus Torvalds * shmem_inode_info structure. So small files do not need an addional 247*1da177e4SLinus Torvalds * allocation. 248*1da177e4SLinus Torvalds * 249*1da177e4SLinus Torvalds * For pages with index > SHMEM_NR_DIRECT there is the pointer 250*1da177e4SLinus Torvalds * i_indirect which points to a page which holds in the first half 251*1da177e4SLinus Torvalds * doubly indirect blocks, in the second half triple indirect blocks: 252*1da177e4SLinus Torvalds * 253*1da177e4SLinus Torvalds * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the 254*1da177e4SLinus Torvalds * following layout (for SHMEM_NR_DIRECT == 16): 255*1da177e4SLinus Torvalds * 256*1da177e4SLinus Torvalds * i_indirect -> dir --> 16-19 257*1da177e4SLinus Torvalds * | +-> 20-23 258*1da177e4SLinus Torvalds * | 259*1da177e4SLinus Torvalds * +-->dir2 --> 24-27 260*1da177e4SLinus Torvalds * | +-> 28-31 261*1da177e4SLinus Torvalds * | +-> 32-35 262*1da177e4SLinus Torvalds * | +-> 36-39 263*1da177e4SLinus Torvalds * | 264*1da177e4SLinus Torvalds * +-->dir3 --> 40-43 265*1da177e4SLinus Torvalds * +-> 44-47 266*1da177e4SLinus Torvalds * +-> 48-51 267*1da177e4SLinus Torvalds * +-> 52-55 268*1da177e4SLinus Torvalds */ 269*1da177e4SLinus Torvalds static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) 270*1da177e4SLinus Torvalds { 271*1da177e4SLinus Torvalds unsigned long offset; 272*1da177e4SLinus Torvalds struct page **dir; 273*1da177e4SLinus Torvalds struct page *subdir; 274*1da177e4SLinus Torvalds 275*1da177e4SLinus Torvalds if (index < SHMEM_NR_DIRECT) { 276*1da177e4SLinus Torvalds shmem_swp_balance_unmap(); 277*1da177e4SLinus Torvalds return info->i_direct+index; 278*1da177e4SLinus Torvalds } 279*1da177e4SLinus Torvalds if (!info->i_indirect) { 280*1da177e4SLinus Torvalds if (page) { 281*1da177e4SLinus Torvalds info->i_indirect = *page; 282*1da177e4SLinus Torvalds *page = NULL; 283*1da177e4SLinus Torvalds } 284*1da177e4SLinus Torvalds return NULL; /* need another page */ 285*1da177e4SLinus Torvalds } 286*1da177e4SLinus Torvalds 287*1da177e4SLinus Torvalds index -= SHMEM_NR_DIRECT; 288*1da177e4SLinus Torvalds offset = index % ENTRIES_PER_PAGE; 289*1da177e4SLinus Torvalds index /= ENTRIES_PER_PAGE; 290*1da177e4SLinus Torvalds dir = shmem_dir_map(info->i_indirect); 291*1da177e4SLinus Torvalds 292*1da177e4SLinus Torvalds if (index >= ENTRIES_PER_PAGE/2) { 293*1da177e4SLinus Torvalds index -= ENTRIES_PER_PAGE/2; 294*1da177e4SLinus Torvalds dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; 295*1da177e4SLinus Torvalds index %= ENTRIES_PER_PAGE; 296*1da177e4SLinus Torvalds subdir = *dir; 297*1da177e4SLinus Torvalds if (!subdir) { 298*1da177e4SLinus Torvalds if (page) { 299*1da177e4SLinus Torvalds *dir = *page; 300*1da177e4SLinus Torvalds *page = NULL; 301*1da177e4SLinus Torvalds } 302*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 303*1da177e4SLinus Torvalds return NULL; /* need another page */ 304*1da177e4SLinus Torvalds } 305*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 306*1da177e4SLinus Torvalds dir = shmem_dir_map(subdir); 307*1da177e4SLinus Torvalds } 308*1da177e4SLinus Torvalds 309*1da177e4SLinus Torvalds dir += index; 310*1da177e4SLinus Torvalds subdir = *dir; 311*1da177e4SLinus Torvalds if (!subdir) { 312*1da177e4SLinus Torvalds if (!page || !(subdir = *page)) { 313*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 314*1da177e4SLinus Torvalds return NULL; /* need a page */ 315*1da177e4SLinus Torvalds } 316*1da177e4SLinus Torvalds *dir = subdir; 317*1da177e4SLinus Torvalds *page = NULL; 318*1da177e4SLinus Torvalds } 319*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 320*1da177e4SLinus Torvalds return shmem_swp_map(subdir) + offset; 321*1da177e4SLinus Torvalds } 322*1da177e4SLinus Torvalds 323*1da177e4SLinus Torvalds static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) 324*1da177e4SLinus Torvalds { 325*1da177e4SLinus Torvalds long incdec = value? 1: -1; 326*1da177e4SLinus Torvalds 327*1da177e4SLinus Torvalds entry->val = value; 328*1da177e4SLinus Torvalds info->swapped += incdec; 329*1da177e4SLinus Torvalds if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) 330*1da177e4SLinus Torvalds kmap_atomic_to_page(entry)->nr_swapped += incdec; 331*1da177e4SLinus Torvalds } 332*1da177e4SLinus Torvalds 333*1da177e4SLinus Torvalds /* 334*1da177e4SLinus Torvalds * shmem_swp_alloc - get the position of the swap entry for the page. 335*1da177e4SLinus Torvalds * If it does not exist allocate the entry. 336*1da177e4SLinus Torvalds * 337*1da177e4SLinus Torvalds * @info: info structure for the inode 338*1da177e4SLinus Torvalds * @index: index of the page to find 339*1da177e4SLinus Torvalds * @sgp: check and recheck i_size? skip allocation? 340*1da177e4SLinus Torvalds */ 341*1da177e4SLinus Torvalds static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) 342*1da177e4SLinus Torvalds { 343*1da177e4SLinus Torvalds struct inode *inode = &info->vfs_inode; 344*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 345*1da177e4SLinus Torvalds struct page *page = NULL; 346*1da177e4SLinus Torvalds swp_entry_t *entry; 347*1da177e4SLinus Torvalds 348*1da177e4SLinus Torvalds if (sgp != SGP_WRITE && 349*1da177e4SLinus Torvalds ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) 350*1da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 351*1da177e4SLinus Torvalds 352*1da177e4SLinus Torvalds while (!(entry = shmem_swp_entry(info, index, &page))) { 353*1da177e4SLinus Torvalds if (sgp == SGP_READ) 354*1da177e4SLinus Torvalds return shmem_swp_map(ZERO_PAGE(0)); 355*1da177e4SLinus Torvalds /* 356*1da177e4SLinus Torvalds * Test free_blocks against 1 not 0, since we have 1 data 357*1da177e4SLinus Torvalds * page (and perhaps indirect index pages) yet to allocate: 358*1da177e4SLinus Torvalds * a waste to allocate index if we cannot allocate data. 359*1da177e4SLinus Torvalds */ 360*1da177e4SLinus Torvalds if (sbinfo) { 361*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 362*1da177e4SLinus Torvalds if (sbinfo->free_blocks <= 1) { 363*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 364*1da177e4SLinus Torvalds return ERR_PTR(-ENOSPC); 365*1da177e4SLinus Torvalds } 366*1da177e4SLinus Torvalds sbinfo->free_blocks--; 367*1da177e4SLinus Torvalds inode->i_blocks += BLOCKS_PER_PAGE; 368*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 369*1da177e4SLinus Torvalds } 370*1da177e4SLinus Torvalds 371*1da177e4SLinus Torvalds spin_unlock(&info->lock); 372*1da177e4SLinus Torvalds page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); 373*1da177e4SLinus Torvalds if (page) { 374*1da177e4SLinus Torvalds page->nr_swapped = 0; 375*1da177e4SLinus Torvalds } 376*1da177e4SLinus Torvalds spin_lock(&info->lock); 377*1da177e4SLinus Torvalds 378*1da177e4SLinus Torvalds if (!page) { 379*1da177e4SLinus Torvalds shmem_free_blocks(inode, 1); 380*1da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 381*1da177e4SLinus Torvalds } 382*1da177e4SLinus Torvalds if (sgp != SGP_WRITE && 383*1da177e4SLinus Torvalds ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 384*1da177e4SLinus Torvalds entry = ERR_PTR(-EINVAL); 385*1da177e4SLinus Torvalds break; 386*1da177e4SLinus Torvalds } 387*1da177e4SLinus Torvalds if (info->next_index <= index) 388*1da177e4SLinus Torvalds info->next_index = index + 1; 389*1da177e4SLinus Torvalds } 390*1da177e4SLinus Torvalds if (page) { 391*1da177e4SLinus Torvalds /* another task gave its page, or truncated the file */ 392*1da177e4SLinus Torvalds shmem_free_blocks(inode, 1); 393*1da177e4SLinus Torvalds shmem_dir_free(page); 394*1da177e4SLinus Torvalds } 395*1da177e4SLinus Torvalds if (info->next_index <= index && !IS_ERR(entry)) 396*1da177e4SLinus Torvalds info->next_index = index + 1; 397*1da177e4SLinus Torvalds return entry; 398*1da177e4SLinus Torvalds } 399*1da177e4SLinus Torvalds 400*1da177e4SLinus Torvalds /* 401*1da177e4SLinus Torvalds * shmem_free_swp - free some swap entries in a directory 402*1da177e4SLinus Torvalds * 403*1da177e4SLinus Torvalds * @dir: pointer to the directory 404*1da177e4SLinus Torvalds * @edir: pointer after last entry of the directory 405*1da177e4SLinus Torvalds */ 406*1da177e4SLinus Torvalds static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) 407*1da177e4SLinus Torvalds { 408*1da177e4SLinus Torvalds swp_entry_t *ptr; 409*1da177e4SLinus Torvalds int freed = 0; 410*1da177e4SLinus Torvalds 411*1da177e4SLinus Torvalds for (ptr = dir; ptr < edir; ptr++) { 412*1da177e4SLinus Torvalds if (ptr->val) { 413*1da177e4SLinus Torvalds free_swap_and_cache(*ptr); 414*1da177e4SLinus Torvalds *ptr = (swp_entry_t){0}; 415*1da177e4SLinus Torvalds freed++; 416*1da177e4SLinus Torvalds } 417*1da177e4SLinus Torvalds } 418*1da177e4SLinus Torvalds return freed; 419*1da177e4SLinus Torvalds } 420*1da177e4SLinus Torvalds 421*1da177e4SLinus Torvalds static int shmem_map_and_free_swp(struct page *subdir, 422*1da177e4SLinus Torvalds int offset, int limit, struct page ***dir) 423*1da177e4SLinus Torvalds { 424*1da177e4SLinus Torvalds swp_entry_t *ptr; 425*1da177e4SLinus Torvalds int freed = 0; 426*1da177e4SLinus Torvalds 427*1da177e4SLinus Torvalds ptr = shmem_swp_map(subdir); 428*1da177e4SLinus Torvalds for (; offset < limit; offset += LATENCY_LIMIT) { 429*1da177e4SLinus Torvalds int size = limit - offset; 430*1da177e4SLinus Torvalds if (size > LATENCY_LIMIT) 431*1da177e4SLinus Torvalds size = LATENCY_LIMIT; 432*1da177e4SLinus Torvalds freed += shmem_free_swp(ptr+offset, ptr+offset+size); 433*1da177e4SLinus Torvalds if (need_resched()) { 434*1da177e4SLinus Torvalds shmem_swp_unmap(ptr); 435*1da177e4SLinus Torvalds if (*dir) { 436*1da177e4SLinus Torvalds shmem_dir_unmap(*dir); 437*1da177e4SLinus Torvalds *dir = NULL; 438*1da177e4SLinus Torvalds } 439*1da177e4SLinus Torvalds cond_resched(); 440*1da177e4SLinus Torvalds ptr = shmem_swp_map(subdir); 441*1da177e4SLinus Torvalds } 442*1da177e4SLinus Torvalds } 443*1da177e4SLinus Torvalds shmem_swp_unmap(ptr); 444*1da177e4SLinus Torvalds return freed; 445*1da177e4SLinus Torvalds } 446*1da177e4SLinus Torvalds 447*1da177e4SLinus Torvalds static void shmem_free_pages(struct list_head *next) 448*1da177e4SLinus Torvalds { 449*1da177e4SLinus Torvalds struct page *page; 450*1da177e4SLinus Torvalds int freed = 0; 451*1da177e4SLinus Torvalds 452*1da177e4SLinus Torvalds do { 453*1da177e4SLinus Torvalds page = container_of(next, struct page, lru); 454*1da177e4SLinus Torvalds next = next->next; 455*1da177e4SLinus Torvalds shmem_dir_free(page); 456*1da177e4SLinus Torvalds freed++; 457*1da177e4SLinus Torvalds if (freed >= LATENCY_LIMIT) { 458*1da177e4SLinus Torvalds cond_resched(); 459*1da177e4SLinus Torvalds freed = 0; 460*1da177e4SLinus Torvalds } 461*1da177e4SLinus Torvalds } while (next); 462*1da177e4SLinus Torvalds } 463*1da177e4SLinus Torvalds 464*1da177e4SLinus Torvalds static void shmem_truncate(struct inode *inode) 465*1da177e4SLinus Torvalds { 466*1da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 467*1da177e4SLinus Torvalds unsigned long idx; 468*1da177e4SLinus Torvalds unsigned long size; 469*1da177e4SLinus Torvalds unsigned long limit; 470*1da177e4SLinus Torvalds unsigned long stage; 471*1da177e4SLinus Torvalds unsigned long diroff; 472*1da177e4SLinus Torvalds struct page **dir; 473*1da177e4SLinus Torvalds struct page *topdir; 474*1da177e4SLinus Torvalds struct page *middir; 475*1da177e4SLinus Torvalds struct page *subdir; 476*1da177e4SLinus Torvalds swp_entry_t *ptr; 477*1da177e4SLinus Torvalds LIST_HEAD(pages_to_free); 478*1da177e4SLinus Torvalds long nr_pages_to_free = 0; 479*1da177e4SLinus Torvalds long nr_swaps_freed = 0; 480*1da177e4SLinus Torvalds int offset; 481*1da177e4SLinus Torvalds int freed; 482*1da177e4SLinus Torvalds 483*1da177e4SLinus Torvalds inode->i_ctime = inode->i_mtime = CURRENT_TIME; 484*1da177e4SLinus Torvalds idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 485*1da177e4SLinus Torvalds if (idx >= info->next_index) 486*1da177e4SLinus Torvalds return; 487*1da177e4SLinus Torvalds 488*1da177e4SLinus Torvalds spin_lock(&info->lock); 489*1da177e4SLinus Torvalds info->flags |= SHMEM_TRUNCATE; 490*1da177e4SLinus Torvalds limit = info->next_index; 491*1da177e4SLinus Torvalds info->next_index = idx; 492*1da177e4SLinus Torvalds topdir = info->i_indirect; 493*1da177e4SLinus Torvalds if (topdir && idx <= SHMEM_NR_DIRECT) { 494*1da177e4SLinus Torvalds info->i_indirect = NULL; 495*1da177e4SLinus Torvalds nr_pages_to_free++; 496*1da177e4SLinus Torvalds list_add(&topdir->lru, &pages_to_free); 497*1da177e4SLinus Torvalds } 498*1da177e4SLinus Torvalds spin_unlock(&info->lock); 499*1da177e4SLinus Torvalds 500*1da177e4SLinus Torvalds if (info->swapped && idx < SHMEM_NR_DIRECT) { 501*1da177e4SLinus Torvalds ptr = info->i_direct; 502*1da177e4SLinus Torvalds size = limit; 503*1da177e4SLinus Torvalds if (size > SHMEM_NR_DIRECT) 504*1da177e4SLinus Torvalds size = SHMEM_NR_DIRECT; 505*1da177e4SLinus Torvalds nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); 506*1da177e4SLinus Torvalds } 507*1da177e4SLinus Torvalds if (!topdir) 508*1da177e4SLinus Torvalds goto done2; 509*1da177e4SLinus Torvalds 510*1da177e4SLinus Torvalds BUG_ON(limit <= SHMEM_NR_DIRECT); 511*1da177e4SLinus Torvalds limit -= SHMEM_NR_DIRECT; 512*1da177e4SLinus Torvalds idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; 513*1da177e4SLinus Torvalds offset = idx % ENTRIES_PER_PAGE; 514*1da177e4SLinus Torvalds idx -= offset; 515*1da177e4SLinus Torvalds 516*1da177e4SLinus Torvalds dir = shmem_dir_map(topdir); 517*1da177e4SLinus Torvalds stage = ENTRIES_PER_PAGEPAGE/2; 518*1da177e4SLinus Torvalds if (idx < ENTRIES_PER_PAGEPAGE/2) { 519*1da177e4SLinus Torvalds middir = topdir; 520*1da177e4SLinus Torvalds diroff = idx/ENTRIES_PER_PAGE; 521*1da177e4SLinus Torvalds } else { 522*1da177e4SLinus Torvalds dir += ENTRIES_PER_PAGE/2; 523*1da177e4SLinus Torvalds dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; 524*1da177e4SLinus Torvalds while (stage <= idx) 525*1da177e4SLinus Torvalds stage += ENTRIES_PER_PAGEPAGE; 526*1da177e4SLinus Torvalds middir = *dir; 527*1da177e4SLinus Torvalds if (*dir) { 528*1da177e4SLinus Torvalds diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % 529*1da177e4SLinus Torvalds ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; 530*1da177e4SLinus Torvalds if (!diroff && !offset) { 531*1da177e4SLinus Torvalds *dir = NULL; 532*1da177e4SLinus Torvalds nr_pages_to_free++; 533*1da177e4SLinus Torvalds list_add(&middir->lru, &pages_to_free); 534*1da177e4SLinus Torvalds } 535*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 536*1da177e4SLinus Torvalds dir = shmem_dir_map(middir); 537*1da177e4SLinus Torvalds } else { 538*1da177e4SLinus Torvalds diroff = 0; 539*1da177e4SLinus Torvalds offset = 0; 540*1da177e4SLinus Torvalds idx = stage; 541*1da177e4SLinus Torvalds } 542*1da177e4SLinus Torvalds } 543*1da177e4SLinus Torvalds 544*1da177e4SLinus Torvalds for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { 545*1da177e4SLinus Torvalds if (unlikely(idx == stage)) { 546*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 547*1da177e4SLinus Torvalds dir = shmem_dir_map(topdir) + 548*1da177e4SLinus Torvalds ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; 549*1da177e4SLinus Torvalds while (!*dir) { 550*1da177e4SLinus Torvalds dir++; 551*1da177e4SLinus Torvalds idx += ENTRIES_PER_PAGEPAGE; 552*1da177e4SLinus Torvalds if (idx >= limit) 553*1da177e4SLinus Torvalds goto done1; 554*1da177e4SLinus Torvalds } 555*1da177e4SLinus Torvalds stage = idx + ENTRIES_PER_PAGEPAGE; 556*1da177e4SLinus Torvalds middir = *dir; 557*1da177e4SLinus Torvalds *dir = NULL; 558*1da177e4SLinus Torvalds nr_pages_to_free++; 559*1da177e4SLinus Torvalds list_add(&middir->lru, &pages_to_free); 560*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 561*1da177e4SLinus Torvalds cond_resched(); 562*1da177e4SLinus Torvalds dir = shmem_dir_map(middir); 563*1da177e4SLinus Torvalds diroff = 0; 564*1da177e4SLinus Torvalds } 565*1da177e4SLinus Torvalds subdir = dir[diroff]; 566*1da177e4SLinus Torvalds if (subdir && subdir->nr_swapped) { 567*1da177e4SLinus Torvalds size = limit - idx; 568*1da177e4SLinus Torvalds if (size > ENTRIES_PER_PAGE) 569*1da177e4SLinus Torvalds size = ENTRIES_PER_PAGE; 570*1da177e4SLinus Torvalds freed = shmem_map_and_free_swp(subdir, 571*1da177e4SLinus Torvalds offset, size, &dir); 572*1da177e4SLinus Torvalds if (!dir) 573*1da177e4SLinus Torvalds dir = shmem_dir_map(middir); 574*1da177e4SLinus Torvalds nr_swaps_freed += freed; 575*1da177e4SLinus Torvalds if (offset) 576*1da177e4SLinus Torvalds spin_lock(&info->lock); 577*1da177e4SLinus Torvalds subdir->nr_swapped -= freed; 578*1da177e4SLinus Torvalds if (offset) 579*1da177e4SLinus Torvalds spin_unlock(&info->lock); 580*1da177e4SLinus Torvalds BUG_ON(subdir->nr_swapped > offset); 581*1da177e4SLinus Torvalds } 582*1da177e4SLinus Torvalds if (offset) 583*1da177e4SLinus Torvalds offset = 0; 584*1da177e4SLinus Torvalds else if (subdir) { 585*1da177e4SLinus Torvalds dir[diroff] = NULL; 586*1da177e4SLinus Torvalds nr_pages_to_free++; 587*1da177e4SLinus Torvalds list_add(&subdir->lru, &pages_to_free); 588*1da177e4SLinus Torvalds } 589*1da177e4SLinus Torvalds } 590*1da177e4SLinus Torvalds done1: 591*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 592*1da177e4SLinus Torvalds done2: 593*1da177e4SLinus Torvalds if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { 594*1da177e4SLinus Torvalds /* 595*1da177e4SLinus Torvalds * Call truncate_inode_pages again: racing shmem_unuse_inode 596*1da177e4SLinus Torvalds * may have swizzled a page in from swap since vmtruncate or 597*1da177e4SLinus Torvalds * generic_delete_inode did it, before we lowered next_index. 598*1da177e4SLinus Torvalds * Also, though shmem_getpage checks i_size before adding to 599*1da177e4SLinus Torvalds * cache, no recheck after: so fix the narrow window there too. 600*1da177e4SLinus Torvalds */ 601*1da177e4SLinus Torvalds truncate_inode_pages(inode->i_mapping, inode->i_size); 602*1da177e4SLinus Torvalds } 603*1da177e4SLinus Torvalds 604*1da177e4SLinus Torvalds spin_lock(&info->lock); 605*1da177e4SLinus Torvalds info->flags &= ~SHMEM_TRUNCATE; 606*1da177e4SLinus Torvalds info->swapped -= nr_swaps_freed; 607*1da177e4SLinus Torvalds if (nr_pages_to_free) 608*1da177e4SLinus Torvalds shmem_free_blocks(inode, nr_pages_to_free); 609*1da177e4SLinus Torvalds shmem_recalc_inode(inode); 610*1da177e4SLinus Torvalds spin_unlock(&info->lock); 611*1da177e4SLinus Torvalds 612*1da177e4SLinus Torvalds /* 613*1da177e4SLinus Torvalds * Empty swap vector directory pages to be freed? 614*1da177e4SLinus Torvalds */ 615*1da177e4SLinus Torvalds if (!list_empty(&pages_to_free)) { 616*1da177e4SLinus Torvalds pages_to_free.prev->next = NULL; 617*1da177e4SLinus Torvalds shmem_free_pages(pages_to_free.next); 618*1da177e4SLinus Torvalds } 619*1da177e4SLinus Torvalds } 620*1da177e4SLinus Torvalds 621*1da177e4SLinus Torvalds static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) 622*1da177e4SLinus Torvalds { 623*1da177e4SLinus Torvalds struct inode *inode = dentry->d_inode; 624*1da177e4SLinus Torvalds struct page *page = NULL; 625*1da177e4SLinus Torvalds int error; 626*1da177e4SLinus Torvalds 627*1da177e4SLinus Torvalds if (attr->ia_valid & ATTR_SIZE) { 628*1da177e4SLinus Torvalds if (attr->ia_size < inode->i_size) { 629*1da177e4SLinus Torvalds /* 630*1da177e4SLinus Torvalds * If truncating down to a partial page, then 631*1da177e4SLinus Torvalds * if that page is already allocated, hold it 632*1da177e4SLinus Torvalds * in memory until the truncation is over, so 633*1da177e4SLinus Torvalds * truncate_partial_page cannnot miss it were 634*1da177e4SLinus Torvalds * it assigned to swap. 635*1da177e4SLinus Torvalds */ 636*1da177e4SLinus Torvalds if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { 637*1da177e4SLinus Torvalds (void) shmem_getpage(inode, 638*1da177e4SLinus Torvalds attr->ia_size>>PAGE_CACHE_SHIFT, 639*1da177e4SLinus Torvalds &page, SGP_READ, NULL); 640*1da177e4SLinus Torvalds } 641*1da177e4SLinus Torvalds /* 642*1da177e4SLinus Torvalds * Reset SHMEM_PAGEIN flag so that shmem_truncate can 643*1da177e4SLinus Torvalds * detect if any pages might have been added to cache 644*1da177e4SLinus Torvalds * after truncate_inode_pages. But we needn't bother 645*1da177e4SLinus Torvalds * if it's being fully truncated to zero-length: the 646*1da177e4SLinus Torvalds * nrpages check is efficient enough in that case. 647*1da177e4SLinus Torvalds */ 648*1da177e4SLinus Torvalds if (attr->ia_size) { 649*1da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 650*1da177e4SLinus Torvalds spin_lock(&info->lock); 651*1da177e4SLinus Torvalds info->flags &= ~SHMEM_PAGEIN; 652*1da177e4SLinus Torvalds spin_unlock(&info->lock); 653*1da177e4SLinus Torvalds } 654*1da177e4SLinus Torvalds } 655*1da177e4SLinus Torvalds } 656*1da177e4SLinus Torvalds 657*1da177e4SLinus Torvalds error = inode_change_ok(inode, attr); 658*1da177e4SLinus Torvalds if (!error) 659*1da177e4SLinus Torvalds error = inode_setattr(inode, attr); 660*1da177e4SLinus Torvalds if (page) 661*1da177e4SLinus Torvalds page_cache_release(page); 662*1da177e4SLinus Torvalds return error; 663*1da177e4SLinus Torvalds } 664*1da177e4SLinus Torvalds 665*1da177e4SLinus Torvalds static void shmem_delete_inode(struct inode *inode) 666*1da177e4SLinus Torvalds { 667*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 668*1da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 669*1da177e4SLinus Torvalds 670*1da177e4SLinus Torvalds if (inode->i_op->truncate == shmem_truncate) { 671*1da177e4SLinus Torvalds shmem_unacct_size(info->flags, inode->i_size); 672*1da177e4SLinus Torvalds inode->i_size = 0; 673*1da177e4SLinus Torvalds shmem_truncate(inode); 674*1da177e4SLinus Torvalds if (!list_empty(&info->swaplist)) { 675*1da177e4SLinus Torvalds spin_lock(&shmem_swaplist_lock); 676*1da177e4SLinus Torvalds list_del_init(&info->swaplist); 677*1da177e4SLinus Torvalds spin_unlock(&shmem_swaplist_lock); 678*1da177e4SLinus Torvalds } 679*1da177e4SLinus Torvalds } 680*1da177e4SLinus Torvalds if (sbinfo) { 681*1da177e4SLinus Torvalds BUG_ON(inode->i_blocks); 682*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 683*1da177e4SLinus Torvalds sbinfo->free_inodes++; 684*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 685*1da177e4SLinus Torvalds } 686*1da177e4SLinus Torvalds clear_inode(inode); 687*1da177e4SLinus Torvalds } 688*1da177e4SLinus Torvalds 689*1da177e4SLinus Torvalds static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) 690*1da177e4SLinus Torvalds { 691*1da177e4SLinus Torvalds swp_entry_t *ptr; 692*1da177e4SLinus Torvalds 693*1da177e4SLinus Torvalds for (ptr = dir; ptr < edir; ptr++) { 694*1da177e4SLinus Torvalds if (ptr->val == entry.val) 695*1da177e4SLinus Torvalds return ptr - dir; 696*1da177e4SLinus Torvalds } 697*1da177e4SLinus Torvalds return -1; 698*1da177e4SLinus Torvalds } 699*1da177e4SLinus Torvalds 700*1da177e4SLinus Torvalds static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) 701*1da177e4SLinus Torvalds { 702*1da177e4SLinus Torvalds struct inode *inode; 703*1da177e4SLinus Torvalds unsigned long idx; 704*1da177e4SLinus Torvalds unsigned long size; 705*1da177e4SLinus Torvalds unsigned long limit; 706*1da177e4SLinus Torvalds unsigned long stage; 707*1da177e4SLinus Torvalds struct page **dir; 708*1da177e4SLinus Torvalds struct page *subdir; 709*1da177e4SLinus Torvalds swp_entry_t *ptr; 710*1da177e4SLinus Torvalds int offset; 711*1da177e4SLinus Torvalds 712*1da177e4SLinus Torvalds idx = 0; 713*1da177e4SLinus Torvalds ptr = info->i_direct; 714*1da177e4SLinus Torvalds spin_lock(&info->lock); 715*1da177e4SLinus Torvalds limit = info->next_index; 716*1da177e4SLinus Torvalds size = limit; 717*1da177e4SLinus Torvalds if (size > SHMEM_NR_DIRECT) 718*1da177e4SLinus Torvalds size = SHMEM_NR_DIRECT; 719*1da177e4SLinus Torvalds offset = shmem_find_swp(entry, ptr, ptr+size); 720*1da177e4SLinus Torvalds if (offset >= 0) { 721*1da177e4SLinus Torvalds shmem_swp_balance_unmap(); 722*1da177e4SLinus Torvalds goto found; 723*1da177e4SLinus Torvalds } 724*1da177e4SLinus Torvalds if (!info->i_indirect) 725*1da177e4SLinus Torvalds goto lost2; 726*1da177e4SLinus Torvalds 727*1da177e4SLinus Torvalds dir = shmem_dir_map(info->i_indirect); 728*1da177e4SLinus Torvalds stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; 729*1da177e4SLinus Torvalds 730*1da177e4SLinus Torvalds for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { 731*1da177e4SLinus Torvalds if (unlikely(idx == stage)) { 732*1da177e4SLinus Torvalds shmem_dir_unmap(dir-1); 733*1da177e4SLinus Torvalds dir = shmem_dir_map(info->i_indirect) + 734*1da177e4SLinus Torvalds ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; 735*1da177e4SLinus Torvalds while (!*dir) { 736*1da177e4SLinus Torvalds dir++; 737*1da177e4SLinus Torvalds idx += ENTRIES_PER_PAGEPAGE; 738*1da177e4SLinus Torvalds if (idx >= limit) 739*1da177e4SLinus Torvalds goto lost1; 740*1da177e4SLinus Torvalds } 741*1da177e4SLinus Torvalds stage = idx + ENTRIES_PER_PAGEPAGE; 742*1da177e4SLinus Torvalds subdir = *dir; 743*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 744*1da177e4SLinus Torvalds dir = shmem_dir_map(subdir); 745*1da177e4SLinus Torvalds } 746*1da177e4SLinus Torvalds subdir = *dir; 747*1da177e4SLinus Torvalds if (subdir && subdir->nr_swapped) { 748*1da177e4SLinus Torvalds ptr = shmem_swp_map(subdir); 749*1da177e4SLinus Torvalds size = limit - idx; 750*1da177e4SLinus Torvalds if (size > ENTRIES_PER_PAGE) 751*1da177e4SLinus Torvalds size = ENTRIES_PER_PAGE; 752*1da177e4SLinus Torvalds offset = shmem_find_swp(entry, ptr, ptr+size); 753*1da177e4SLinus Torvalds if (offset >= 0) { 754*1da177e4SLinus Torvalds shmem_dir_unmap(dir); 755*1da177e4SLinus Torvalds goto found; 756*1da177e4SLinus Torvalds } 757*1da177e4SLinus Torvalds shmem_swp_unmap(ptr); 758*1da177e4SLinus Torvalds } 759*1da177e4SLinus Torvalds } 760*1da177e4SLinus Torvalds lost1: 761*1da177e4SLinus Torvalds shmem_dir_unmap(dir-1); 762*1da177e4SLinus Torvalds lost2: 763*1da177e4SLinus Torvalds spin_unlock(&info->lock); 764*1da177e4SLinus Torvalds return 0; 765*1da177e4SLinus Torvalds found: 766*1da177e4SLinus Torvalds idx += offset; 767*1da177e4SLinus Torvalds inode = &info->vfs_inode; 768*1da177e4SLinus Torvalds if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) { 769*1da177e4SLinus Torvalds info->flags |= SHMEM_PAGEIN; 770*1da177e4SLinus Torvalds shmem_swp_set(info, ptr + offset, 0); 771*1da177e4SLinus Torvalds } 772*1da177e4SLinus Torvalds shmem_swp_unmap(ptr); 773*1da177e4SLinus Torvalds spin_unlock(&info->lock); 774*1da177e4SLinus Torvalds /* 775*1da177e4SLinus Torvalds * Decrement swap count even when the entry is left behind: 776*1da177e4SLinus Torvalds * try_to_unuse will skip over mms, then reincrement count. 777*1da177e4SLinus Torvalds */ 778*1da177e4SLinus Torvalds swap_free(entry); 779*1da177e4SLinus Torvalds return 1; 780*1da177e4SLinus Torvalds } 781*1da177e4SLinus Torvalds 782*1da177e4SLinus Torvalds /* 783*1da177e4SLinus Torvalds * shmem_unuse() search for an eventually swapped out shmem page. 784*1da177e4SLinus Torvalds */ 785*1da177e4SLinus Torvalds int shmem_unuse(swp_entry_t entry, struct page *page) 786*1da177e4SLinus Torvalds { 787*1da177e4SLinus Torvalds struct list_head *p, *next; 788*1da177e4SLinus Torvalds struct shmem_inode_info *info; 789*1da177e4SLinus Torvalds int found = 0; 790*1da177e4SLinus Torvalds 791*1da177e4SLinus Torvalds spin_lock(&shmem_swaplist_lock); 792*1da177e4SLinus Torvalds list_for_each_safe(p, next, &shmem_swaplist) { 793*1da177e4SLinus Torvalds info = list_entry(p, struct shmem_inode_info, swaplist); 794*1da177e4SLinus Torvalds if (!info->swapped) 795*1da177e4SLinus Torvalds list_del_init(&info->swaplist); 796*1da177e4SLinus Torvalds else if (shmem_unuse_inode(info, entry, page)) { 797*1da177e4SLinus Torvalds /* move head to start search for next from here */ 798*1da177e4SLinus Torvalds list_move_tail(&shmem_swaplist, &info->swaplist); 799*1da177e4SLinus Torvalds found = 1; 800*1da177e4SLinus Torvalds break; 801*1da177e4SLinus Torvalds } 802*1da177e4SLinus Torvalds } 803*1da177e4SLinus Torvalds spin_unlock(&shmem_swaplist_lock); 804*1da177e4SLinus Torvalds return found; 805*1da177e4SLinus Torvalds } 806*1da177e4SLinus Torvalds 807*1da177e4SLinus Torvalds /* 808*1da177e4SLinus Torvalds * Move the page from the page cache to the swap cache. 809*1da177e4SLinus Torvalds */ 810*1da177e4SLinus Torvalds static int shmem_writepage(struct page *page, struct writeback_control *wbc) 811*1da177e4SLinus Torvalds { 812*1da177e4SLinus Torvalds struct shmem_inode_info *info; 813*1da177e4SLinus Torvalds swp_entry_t *entry, swap; 814*1da177e4SLinus Torvalds struct address_space *mapping; 815*1da177e4SLinus Torvalds unsigned long index; 816*1da177e4SLinus Torvalds struct inode *inode; 817*1da177e4SLinus Torvalds 818*1da177e4SLinus Torvalds BUG_ON(!PageLocked(page)); 819*1da177e4SLinus Torvalds BUG_ON(page_mapped(page)); 820*1da177e4SLinus Torvalds 821*1da177e4SLinus Torvalds mapping = page->mapping; 822*1da177e4SLinus Torvalds index = page->index; 823*1da177e4SLinus Torvalds inode = mapping->host; 824*1da177e4SLinus Torvalds info = SHMEM_I(inode); 825*1da177e4SLinus Torvalds if (info->flags & VM_LOCKED) 826*1da177e4SLinus Torvalds goto redirty; 827*1da177e4SLinus Torvalds swap = get_swap_page(); 828*1da177e4SLinus Torvalds if (!swap.val) 829*1da177e4SLinus Torvalds goto redirty; 830*1da177e4SLinus Torvalds 831*1da177e4SLinus Torvalds spin_lock(&info->lock); 832*1da177e4SLinus Torvalds shmem_recalc_inode(inode); 833*1da177e4SLinus Torvalds if (index >= info->next_index) { 834*1da177e4SLinus Torvalds BUG_ON(!(info->flags & SHMEM_TRUNCATE)); 835*1da177e4SLinus Torvalds goto unlock; 836*1da177e4SLinus Torvalds } 837*1da177e4SLinus Torvalds entry = shmem_swp_entry(info, index, NULL); 838*1da177e4SLinus Torvalds BUG_ON(!entry); 839*1da177e4SLinus Torvalds BUG_ON(entry->val); 840*1da177e4SLinus Torvalds 841*1da177e4SLinus Torvalds if (move_to_swap_cache(page, swap) == 0) { 842*1da177e4SLinus Torvalds shmem_swp_set(info, entry, swap.val); 843*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 844*1da177e4SLinus Torvalds spin_unlock(&info->lock); 845*1da177e4SLinus Torvalds if (list_empty(&info->swaplist)) { 846*1da177e4SLinus Torvalds spin_lock(&shmem_swaplist_lock); 847*1da177e4SLinus Torvalds /* move instead of add in case we're racing */ 848*1da177e4SLinus Torvalds list_move_tail(&info->swaplist, &shmem_swaplist); 849*1da177e4SLinus Torvalds spin_unlock(&shmem_swaplist_lock); 850*1da177e4SLinus Torvalds } 851*1da177e4SLinus Torvalds unlock_page(page); 852*1da177e4SLinus Torvalds return 0; 853*1da177e4SLinus Torvalds } 854*1da177e4SLinus Torvalds 855*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 856*1da177e4SLinus Torvalds unlock: 857*1da177e4SLinus Torvalds spin_unlock(&info->lock); 858*1da177e4SLinus Torvalds swap_free(swap); 859*1da177e4SLinus Torvalds redirty: 860*1da177e4SLinus Torvalds set_page_dirty(page); 861*1da177e4SLinus Torvalds return WRITEPAGE_ACTIVATE; /* Return with the page locked */ 862*1da177e4SLinus Torvalds } 863*1da177e4SLinus Torvalds 864*1da177e4SLinus Torvalds #ifdef CONFIG_NUMA 865*1da177e4SLinus Torvalds static struct page *shmem_swapin_async(struct shared_policy *p, 866*1da177e4SLinus Torvalds swp_entry_t entry, unsigned long idx) 867*1da177e4SLinus Torvalds { 868*1da177e4SLinus Torvalds struct page *page; 869*1da177e4SLinus Torvalds struct vm_area_struct pvma; 870*1da177e4SLinus Torvalds 871*1da177e4SLinus Torvalds /* Create a pseudo vma that just contains the policy */ 872*1da177e4SLinus Torvalds memset(&pvma, 0, sizeof(struct vm_area_struct)); 873*1da177e4SLinus Torvalds pvma.vm_end = PAGE_SIZE; 874*1da177e4SLinus Torvalds pvma.vm_pgoff = idx; 875*1da177e4SLinus Torvalds pvma.vm_policy = mpol_shared_policy_lookup(p, idx); 876*1da177e4SLinus Torvalds page = read_swap_cache_async(entry, &pvma, 0); 877*1da177e4SLinus Torvalds mpol_free(pvma.vm_policy); 878*1da177e4SLinus Torvalds return page; 879*1da177e4SLinus Torvalds } 880*1da177e4SLinus Torvalds 881*1da177e4SLinus Torvalds struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry, 882*1da177e4SLinus Torvalds unsigned long idx) 883*1da177e4SLinus Torvalds { 884*1da177e4SLinus Torvalds struct shared_policy *p = &info->policy; 885*1da177e4SLinus Torvalds int i, num; 886*1da177e4SLinus Torvalds struct page *page; 887*1da177e4SLinus Torvalds unsigned long offset; 888*1da177e4SLinus Torvalds 889*1da177e4SLinus Torvalds num = valid_swaphandles(entry, &offset); 890*1da177e4SLinus Torvalds for (i = 0; i < num; offset++, i++) { 891*1da177e4SLinus Torvalds page = shmem_swapin_async(p, 892*1da177e4SLinus Torvalds swp_entry(swp_type(entry), offset), idx); 893*1da177e4SLinus Torvalds if (!page) 894*1da177e4SLinus Torvalds break; 895*1da177e4SLinus Torvalds page_cache_release(page); 896*1da177e4SLinus Torvalds } 897*1da177e4SLinus Torvalds lru_add_drain(); /* Push any new pages onto the LRU now */ 898*1da177e4SLinus Torvalds return shmem_swapin_async(p, entry, idx); 899*1da177e4SLinus Torvalds } 900*1da177e4SLinus Torvalds 901*1da177e4SLinus Torvalds static struct page * 902*1da177e4SLinus Torvalds shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info, 903*1da177e4SLinus Torvalds unsigned long idx) 904*1da177e4SLinus Torvalds { 905*1da177e4SLinus Torvalds struct vm_area_struct pvma; 906*1da177e4SLinus Torvalds struct page *page; 907*1da177e4SLinus Torvalds 908*1da177e4SLinus Torvalds memset(&pvma, 0, sizeof(struct vm_area_struct)); 909*1da177e4SLinus Torvalds pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); 910*1da177e4SLinus Torvalds pvma.vm_pgoff = idx; 911*1da177e4SLinus Torvalds pvma.vm_end = PAGE_SIZE; 912*1da177e4SLinus Torvalds page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0); 913*1da177e4SLinus Torvalds mpol_free(pvma.vm_policy); 914*1da177e4SLinus Torvalds return page; 915*1da177e4SLinus Torvalds } 916*1da177e4SLinus Torvalds #else 917*1da177e4SLinus Torvalds static inline struct page * 918*1da177e4SLinus Torvalds shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) 919*1da177e4SLinus Torvalds { 920*1da177e4SLinus Torvalds swapin_readahead(entry, 0, NULL); 921*1da177e4SLinus Torvalds return read_swap_cache_async(entry, NULL, 0); 922*1da177e4SLinus Torvalds } 923*1da177e4SLinus Torvalds 924*1da177e4SLinus Torvalds static inline struct page * 925*1da177e4SLinus Torvalds shmem_alloc_page(unsigned int __nocast gfp,struct shmem_inode_info *info, 926*1da177e4SLinus Torvalds unsigned long idx) 927*1da177e4SLinus Torvalds { 928*1da177e4SLinus Torvalds return alloc_page(gfp | __GFP_ZERO); 929*1da177e4SLinus Torvalds } 930*1da177e4SLinus Torvalds #endif 931*1da177e4SLinus Torvalds 932*1da177e4SLinus Torvalds /* 933*1da177e4SLinus Torvalds * shmem_getpage - either get the page from swap or allocate a new one 934*1da177e4SLinus Torvalds * 935*1da177e4SLinus Torvalds * If we allocate a new one we do not mark it dirty. That's up to the 936*1da177e4SLinus Torvalds * vm. If we swap it in we mark it dirty since we also free the swap 937*1da177e4SLinus Torvalds * entry since a page cannot live in both the swap and page cache 938*1da177e4SLinus Torvalds */ 939*1da177e4SLinus Torvalds static int shmem_getpage(struct inode *inode, unsigned long idx, 940*1da177e4SLinus Torvalds struct page **pagep, enum sgp_type sgp, int *type) 941*1da177e4SLinus Torvalds { 942*1da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 943*1da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 944*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo; 945*1da177e4SLinus Torvalds struct page *filepage = *pagep; 946*1da177e4SLinus Torvalds struct page *swappage; 947*1da177e4SLinus Torvalds swp_entry_t *entry; 948*1da177e4SLinus Torvalds swp_entry_t swap; 949*1da177e4SLinus Torvalds int error; 950*1da177e4SLinus Torvalds 951*1da177e4SLinus Torvalds if (idx >= SHMEM_MAX_INDEX) 952*1da177e4SLinus Torvalds return -EFBIG; 953*1da177e4SLinus Torvalds /* 954*1da177e4SLinus Torvalds * Normally, filepage is NULL on entry, and either found 955*1da177e4SLinus Torvalds * uptodate immediately, or allocated and zeroed, or read 956*1da177e4SLinus Torvalds * in under swappage, which is then assigned to filepage. 957*1da177e4SLinus Torvalds * But shmem_prepare_write passes in a locked filepage, 958*1da177e4SLinus Torvalds * which may be found not uptodate by other callers too, 959*1da177e4SLinus Torvalds * and may need to be copied from the swappage read in. 960*1da177e4SLinus Torvalds */ 961*1da177e4SLinus Torvalds repeat: 962*1da177e4SLinus Torvalds if (!filepage) 963*1da177e4SLinus Torvalds filepage = find_lock_page(mapping, idx); 964*1da177e4SLinus Torvalds if (filepage && PageUptodate(filepage)) 965*1da177e4SLinus Torvalds goto done; 966*1da177e4SLinus Torvalds error = 0; 967*1da177e4SLinus Torvalds if (sgp == SGP_QUICK) 968*1da177e4SLinus Torvalds goto failed; 969*1da177e4SLinus Torvalds 970*1da177e4SLinus Torvalds spin_lock(&info->lock); 971*1da177e4SLinus Torvalds shmem_recalc_inode(inode); 972*1da177e4SLinus Torvalds entry = shmem_swp_alloc(info, idx, sgp); 973*1da177e4SLinus Torvalds if (IS_ERR(entry)) { 974*1da177e4SLinus Torvalds spin_unlock(&info->lock); 975*1da177e4SLinus Torvalds error = PTR_ERR(entry); 976*1da177e4SLinus Torvalds goto failed; 977*1da177e4SLinus Torvalds } 978*1da177e4SLinus Torvalds swap = *entry; 979*1da177e4SLinus Torvalds 980*1da177e4SLinus Torvalds if (swap.val) { 981*1da177e4SLinus Torvalds /* Look it up and read it in.. */ 982*1da177e4SLinus Torvalds swappage = lookup_swap_cache(swap); 983*1da177e4SLinus Torvalds if (!swappage) { 984*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 985*1da177e4SLinus Torvalds spin_unlock(&info->lock); 986*1da177e4SLinus Torvalds /* here we actually do the io */ 987*1da177e4SLinus Torvalds if (type && *type == VM_FAULT_MINOR) { 988*1da177e4SLinus Torvalds inc_page_state(pgmajfault); 989*1da177e4SLinus Torvalds *type = VM_FAULT_MAJOR; 990*1da177e4SLinus Torvalds } 991*1da177e4SLinus Torvalds swappage = shmem_swapin(info, swap, idx); 992*1da177e4SLinus Torvalds if (!swappage) { 993*1da177e4SLinus Torvalds spin_lock(&info->lock); 994*1da177e4SLinus Torvalds entry = shmem_swp_alloc(info, idx, sgp); 995*1da177e4SLinus Torvalds if (IS_ERR(entry)) 996*1da177e4SLinus Torvalds error = PTR_ERR(entry); 997*1da177e4SLinus Torvalds else { 998*1da177e4SLinus Torvalds if (entry->val == swap.val) 999*1da177e4SLinus Torvalds error = -ENOMEM; 1000*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1001*1da177e4SLinus Torvalds } 1002*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1003*1da177e4SLinus Torvalds if (error) 1004*1da177e4SLinus Torvalds goto failed; 1005*1da177e4SLinus Torvalds goto repeat; 1006*1da177e4SLinus Torvalds } 1007*1da177e4SLinus Torvalds wait_on_page_locked(swappage); 1008*1da177e4SLinus Torvalds page_cache_release(swappage); 1009*1da177e4SLinus Torvalds goto repeat; 1010*1da177e4SLinus Torvalds } 1011*1da177e4SLinus Torvalds 1012*1da177e4SLinus Torvalds /* We have to do this with page locked to prevent races */ 1013*1da177e4SLinus Torvalds if (TestSetPageLocked(swappage)) { 1014*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1015*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1016*1da177e4SLinus Torvalds wait_on_page_locked(swappage); 1017*1da177e4SLinus Torvalds page_cache_release(swappage); 1018*1da177e4SLinus Torvalds goto repeat; 1019*1da177e4SLinus Torvalds } 1020*1da177e4SLinus Torvalds if (PageWriteback(swappage)) { 1021*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1022*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1023*1da177e4SLinus Torvalds wait_on_page_writeback(swappage); 1024*1da177e4SLinus Torvalds unlock_page(swappage); 1025*1da177e4SLinus Torvalds page_cache_release(swappage); 1026*1da177e4SLinus Torvalds goto repeat; 1027*1da177e4SLinus Torvalds } 1028*1da177e4SLinus Torvalds if (!PageUptodate(swappage)) { 1029*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1030*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1031*1da177e4SLinus Torvalds unlock_page(swappage); 1032*1da177e4SLinus Torvalds page_cache_release(swappage); 1033*1da177e4SLinus Torvalds error = -EIO; 1034*1da177e4SLinus Torvalds goto failed; 1035*1da177e4SLinus Torvalds } 1036*1da177e4SLinus Torvalds 1037*1da177e4SLinus Torvalds if (filepage) { 1038*1da177e4SLinus Torvalds shmem_swp_set(info, entry, 0); 1039*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1040*1da177e4SLinus Torvalds delete_from_swap_cache(swappage); 1041*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1042*1da177e4SLinus Torvalds copy_highpage(filepage, swappage); 1043*1da177e4SLinus Torvalds unlock_page(swappage); 1044*1da177e4SLinus Torvalds page_cache_release(swappage); 1045*1da177e4SLinus Torvalds flush_dcache_page(filepage); 1046*1da177e4SLinus Torvalds SetPageUptodate(filepage); 1047*1da177e4SLinus Torvalds set_page_dirty(filepage); 1048*1da177e4SLinus Torvalds swap_free(swap); 1049*1da177e4SLinus Torvalds } else if (!(error = move_from_swap_cache( 1050*1da177e4SLinus Torvalds swappage, idx, mapping))) { 1051*1da177e4SLinus Torvalds info->flags |= SHMEM_PAGEIN; 1052*1da177e4SLinus Torvalds shmem_swp_set(info, entry, 0); 1053*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1054*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1055*1da177e4SLinus Torvalds filepage = swappage; 1056*1da177e4SLinus Torvalds swap_free(swap); 1057*1da177e4SLinus Torvalds } else { 1058*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1059*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1060*1da177e4SLinus Torvalds unlock_page(swappage); 1061*1da177e4SLinus Torvalds page_cache_release(swappage); 1062*1da177e4SLinus Torvalds if (error == -ENOMEM) { 1063*1da177e4SLinus Torvalds /* let kswapd refresh zone for GFP_ATOMICs */ 1064*1da177e4SLinus Torvalds blk_congestion_wait(WRITE, HZ/50); 1065*1da177e4SLinus Torvalds } 1066*1da177e4SLinus Torvalds goto repeat; 1067*1da177e4SLinus Torvalds } 1068*1da177e4SLinus Torvalds } else if (sgp == SGP_READ && !filepage) { 1069*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1070*1da177e4SLinus Torvalds filepage = find_get_page(mapping, idx); 1071*1da177e4SLinus Torvalds if (filepage && 1072*1da177e4SLinus Torvalds (!PageUptodate(filepage) || TestSetPageLocked(filepage))) { 1073*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1074*1da177e4SLinus Torvalds wait_on_page_locked(filepage); 1075*1da177e4SLinus Torvalds page_cache_release(filepage); 1076*1da177e4SLinus Torvalds filepage = NULL; 1077*1da177e4SLinus Torvalds goto repeat; 1078*1da177e4SLinus Torvalds } 1079*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1080*1da177e4SLinus Torvalds } else { 1081*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1082*1da177e4SLinus Torvalds sbinfo = SHMEM_SB(inode->i_sb); 1083*1da177e4SLinus Torvalds if (sbinfo) { 1084*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 1085*1da177e4SLinus Torvalds if (sbinfo->free_blocks == 0 || 1086*1da177e4SLinus Torvalds shmem_acct_block(info->flags)) { 1087*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1088*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1089*1da177e4SLinus Torvalds error = -ENOSPC; 1090*1da177e4SLinus Torvalds goto failed; 1091*1da177e4SLinus Torvalds } 1092*1da177e4SLinus Torvalds sbinfo->free_blocks--; 1093*1da177e4SLinus Torvalds inode->i_blocks += BLOCKS_PER_PAGE; 1094*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1095*1da177e4SLinus Torvalds } else if (shmem_acct_block(info->flags)) { 1096*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1097*1da177e4SLinus Torvalds error = -ENOSPC; 1098*1da177e4SLinus Torvalds goto failed; 1099*1da177e4SLinus Torvalds } 1100*1da177e4SLinus Torvalds 1101*1da177e4SLinus Torvalds if (!filepage) { 1102*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1103*1da177e4SLinus Torvalds filepage = shmem_alloc_page(mapping_gfp_mask(mapping), 1104*1da177e4SLinus Torvalds info, 1105*1da177e4SLinus Torvalds idx); 1106*1da177e4SLinus Torvalds if (!filepage) { 1107*1da177e4SLinus Torvalds shmem_unacct_blocks(info->flags, 1); 1108*1da177e4SLinus Torvalds shmem_free_blocks(inode, 1); 1109*1da177e4SLinus Torvalds error = -ENOMEM; 1110*1da177e4SLinus Torvalds goto failed; 1111*1da177e4SLinus Torvalds } 1112*1da177e4SLinus Torvalds 1113*1da177e4SLinus Torvalds spin_lock(&info->lock); 1114*1da177e4SLinus Torvalds entry = shmem_swp_alloc(info, idx, sgp); 1115*1da177e4SLinus Torvalds if (IS_ERR(entry)) 1116*1da177e4SLinus Torvalds error = PTR_ERR(entry); 1117*1da177e4SLinus Torvalds else { 1118*1da177e4SLinus Torvalds swap = *entry; 1119*1da177e4SLinus Torvalds shmem_swp_unmap(entry); 1120*1da177e4SLinus Torvalds } 1121*1da177e4SLinus Torvalds if (error || swap.val || 0 != add_to_page_cache_lru( 1122*1da177e4SLinus Torvalds filepage, mapping, idx, GFP_ATOMIC)) { 1123*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1124*1da177e4SLinus Torvalds page_cache_release(filepage); 1125*1da177e4SLinus Torvalds shmem_unacct_blocks(info->flags, 1); 1126*1da177e4SLinus Torvalds shmem_free_blocks(inode, 1); 1127*1da177e4SLinus Torvalds filepage = NULL; 1128*1da177e4SLinus Torvalds if (error) 1129*1da177e4SLinus Torvalds goto failed; 1130*1da177e4SLinus Torvalds goto repeat; 1131*1da177e4SLinus Torvalds } 1132*1da177e4SLinus Torvalds info->flags |= SHMEM_PAGEIN; 1133*1da177e4SLinus Torvalds } 1134*1da177e4SLinus Torvalds 1135*1da177e4SLinus Torvalds info->alloced++; 1136*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1137*1da177e4SLinus Torvalds flush_dcache_page(filepage); 1138*1da177e4SLinus Torvalds SetPageUptodate(filepage); 1139*1da177e4SLinus Torvalds } 1140*1da177e4SLinus Torvalds done: 1141*1da177e4SLinus Torvalds if (*pagep != filepage) { 1142*1da177e4SLinus Torvalds unlock_page(filepage); 1143*1da177e4SLinus Torvalds *pagep = filepage; 1144*1da177e4SLinus Torvalds } 1145*1da177e4SLinus Torvalds return 0; 1146*1da177e4SLinus Torvalds 1147*1da177e4SLinus Torvalds failed: 1148*1da177e4SLinus Torvalds if (*pagep != filepage) { 1149*1da177e4SLinus Torvalds unlock_page(filepage); 1150*1da177e4SLinus Torvalds page_cache_release(filepage); 1151*1da177e4SLinus Torvalds } 1152*1da177e4SLinus Torvalds return error; 1153*1da177e4SLinus Torvalds } 1154*1da177e4SLinus Torvalds 1155*1da177e4SLinus Torvalds struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type) 1156*1da177e4SLinus Torvalds { 1157*1da177e4SLinus Torvalds struct inode *inode = vma->vm_file->f_dentry->d_inode; 1158*1da177e4SLinus Torvalds struct page *page = NULL; 1159*1da177e4SLinus Torvalds unsigned long idx; 1160*1da177e4SLinus Torvalds int error; 1161*1da177e4SLinus Torvalds 1162*1da177e4SLinus Torvalds idx = (address - vma->vm_start) >> PAGE_SHIFT; 1163*1da177e4SLinus Torvalds idx += vma->vm_pgoff; 1164*1da177e4SLinus Torvalds idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; 1165*1da177e4SLinus Torvalds if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) 1166*1da177e4SLinus Torvalds return NOPAGE_SIGBUS; 1167*1da177e4SLinus Torvalds 1168*1da177e4SLinus Torvalds error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); 1169*1da177e4SLinus Torvalds if (error) 1170*1da177e4SLinus Torvalds return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; 1171*1da177e4SLinus Torvalds 1172*1da177e4SLinus Torvalds mark_page_accessed(page); 1173*1da177e4SLinus Torvalds return page; 1174*1da177e4SLinus Torvalds } 1175*1da177e4SLinus Torvalds 1176*1da177e4SLinus Torvalds static int shmem_populate(struct vm_area_struct *vma, 1177*1da177e4SLinus Torvalds unsigned long addr, unsigned long len, 1178*1da177e4SLinus Torvalds pgprot_t prot, unsigned long pgoff, int nonblock) 1179*1da177e4SLinus Torvalds { 1180*1da177e4SLinus Torvalds struct inode *inode = vma->vm_file->f_dentry->d_inode; 1181*1da177e4SLinus Torvalds struct mm_struct *mm = vma->vm_mm; 1182*1da177e4SLinus Torvalds enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; 1183*1da177e4SLinus Torvalds unsigned long size; 1184*1da177e4SLinus Torvalds 1185*1da177e4SLinus Torvalds size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; 1186*1da177e4SLinus Torvalds if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) 1187*1da177e4SLinus Torvalds return -EINVAL; 1188*1da177e4SLinus Torvalds 1189*1da177e4SLinus Torvalds while ((long) len > 0) { 1190*1da177e4SLinus Torvalds struct page *page = NULL; 1191*1da177e4SLinus Torvalds int err; 1192*1da177e4SLinus Torvalds /* 1193*1da177e4SLinus Torvalds * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE 1194*1da177e4SLinus Torvalds */ 1195*1da177e4SLinus Torvalds err = shmem_getpage(inode, pgoff, &page, sgp, NULL); 1196*1da177e4SLinus Torvalds if (err) 1197*1da177e4SLinus Torvalds return err; 1198*1da177e4SLinus Torvalds if (page) { 1199*1da177e4SLinus Torvalds mark_page_accessed(page); 1200*1da177e4SLinus Torvalds err = install_page(mm, vma, addr, page, prot); 1201*1da177e4SLinus Torvalds if (err) { 1202*1da177e4SLinus Torvalds page_cache_release(page); 1203*1da177e4SLinus Torvalds return err; 1204*1da177e4SLinus Torvalds } 1205*1da177e4SLinus Torvalds } else if (nonblock) { 1206*1da177e4SLinus Torvalds err = install_file_pte(mm, vma, addr, pgoff, prot); 1207*1da177e4SLinus Torvalds if (err) 1208*1da177e4SLinus Torvalds return err; 1209*1da177e4SLinus Torvalds } 1210*1da177e4SLinus Torvalds 1211*1da177e4SLinus Torvalds len -= PAGE_SIZE; 1212*1da177e4SLinus Torvalds addr += PAGE_SIZE; 1213*1da177e4SLinus Torvalds pgoff++; 1214*1da177e4SLinus Torvalds } 1215*1da177e4SLinus Torvalds return 0; 1216*1da177e4SLinus Torvalds } 1217*1da177e4SLinus Torvalds 1218*1da177e4SLinus Torvalds #ifdef CONFIG_NUMA 1219*1da177e4SLinus Torvalds int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 1220*1da177e4SLinus Torvalds { 1221*1da177e4SLinus Torvalds struct inode *i = vma->vm_file->f_dentry->d_inode; 1222*1da177e4SLinus Torvalds return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); 1223*1da177e4SLinus Torvalds } 1224*1da177e4SLinus Torvalds 1225*1da177e4SLinus Torvalds struct mempolicy * 1226*1da177e4SLinus Torvalds shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) 1227*1da177e4SLinus Torvalds { 1228*1da177e4SLinus Torvalds struct inode *i = vma->vm_file->f_dentry->d_inode; 1229*1da177e4SLinus Torvalds unsigned long idx; 1230*1da177e4SLinus Torvalds 1231*1da177e4SLinus Torvalds idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 1232*1da177e4SLinus Torvalds return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); 1233*1da177e4SLinus Torvalds } 1234*1da177e4SLinus Torvalds #endif 1235*1da177e4SLinus Torvalds 1236*1da177e4SLinus Torvalds int shmem_lock(struct file *file, int lock, struct user_struct *user) 1237*1da177e4SLinus Torvalds { 1238*1da177e4SLinus Torvalds struct inode *inode = file->f_dentry->d_inode; 1239*1da177e4SLinus Torvalds struct shmem_inode_info *info = SHMEM_I(inode); 1240*1da177e4SLinus Torvalds int retval = -ENOMEM; 1241*1da177e4SLinus Torvalds 1242*1da177e4SLinus Torvalds spin_lock(&info->lock); 1243*1da177e4SLinus Torvalds if (lock && !(info->flags & VM_LOCKED)) { 1244*1da177e4SLinus Torvalds if (!user_shm_lock(inode->i_size, user)) 1245*1da177e4SLinus Torvalds goto out_nomem; 1246*1da177e4SLinus Torvalds info->flags |= VM_LOCKED; 1247*1da177e4SLinus Torvalds } 1248*1da177e4SLinus Torvalds if (!lock && (info->flags & VM_LOCKED) && user) { 1249*1da177e4SLinus Torvalds user_shm_unlock(inode->i_size, user); 1250*1da177e4SLinus Torvalds info->flags &= ~VM_LOCKED; 1251*1da177e4SLinus Torvalds } 1252*1da177e4SLinus Torvalds retval = 0; 1253*1da177e4SLinus Torvalds out_nomem: 1254*1da177e4SLinus Torvalds spin_unlock(&info->lock); 1255*1da177e4SLinus Torvalds return retval; 1256*1da177e4SLinus Torvalds } 1257*1da177e4SLinus Torvalds 1258*1da177e4SLinus Torvalds static int shmem_mmap(struct file *file, struct vm_area_struct *vma) 1259*1da177e4SLinus Torvalds { 1260*1da177e4SLinus Torvalds file_accessed(file); 1261*1da177e4SLinus Torvalds vma->vm_ops = &shmem_vm_ops; 1262*1da177e4SLinus Torvalds return 0; 1263*1da177e4SLinus Torvalds } 1264*1da177e4SLinus Torvalds 1265*1da177e4SLinus Torvalds static struct inode * 1266*1da177e4SLinus Torvalds shmem_get_inode(struct super_block *sb, int mode, dev_t dev) 1267*1da177e4SLinus Torvalds { 1268*1da177e4SLinus Torvalds struct inode *inode; 1269*1da177e4SLinus Torvalds struct shmem_inode_info *info; 1270*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 1271*1da177e4SLinus Torvalds 1272*1da177e4SLinus Torvalds if (sbinfo) { 1273*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 1274*1da177e4SLinus Torvalds if (!sbinfo->free_inodes) { 1275*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1276*1da177e4SLinus Torvalds return NULL; 1277*1da177e4SLinus Torvalds } 1278*1da177e4SLinus Torvalds sbinfo->free_inodes--; 1279*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1280*1da177e4SLinus Torvalds } 1281*1da177e4SLinus Torvalds 1282*1da177e4SLinus Torvalds inode = new_inode(sb); 1283*1da177e4SLinus Torvalds if (inode) { 1284*1da177e4SLinus Torvalds inode->i_mode = mode; 1285*1da177e4SLinus Torvalds inode->i_uid = current->fsuid; 1286*1da177e4SLinus Torvalds inode->i_gid = current->fsgid; 1287*1da177e4SLinus Torvalds inode->i_blksize = PAGE_CACHE_SIZE; 1288*1da177e4SLinus Torvalds inode->i_blocks = 0; 1289*1da177e4SLinus Torvalds inode->i_mapping->a_ops = &shmem_aops; 1290*1da177e4SLinus Torvalds inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; 1291*1da177e4SLinus Torvalds inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1292*1da177e4SLinus Torvalds info = SHMEM_I(inode); 1293*1da177e4SLinus Torvalds memset(info, 0, (char *)inode - (char *)info); 1294*1da177e4SLinus Torvalds spin_lock_init(&info->lock); 1295*1da177e4SLinus Torvalds INIT_LIST_HEAD(&info->swaplist); 1296*1da177e4SLinus Torvalds 1297*1da177e4SLinus Torvalds switch (mode & S_IFMT) { 1298*1da177e4SLinus Torvalds default: 1299*1da177e4SLinus Torvalds inode->i_op = &shmem_special_inode_operations; 1300*1da177e4SLinus Torvalds init_special_inode(inode, mode, dev); 1301*1da177e4SLinus Torvalds break; 1302*1da177e4SLinus Torvalds case S_IFREG: 1303*1da177e4SLinus Torvalds inode->i_op = &shmem_inode_operations; 1304*1da177e4SLinus Torvalds inode->i_fop = &shmem_file_operations; 1305*1da177e4SLinus Torvalds mpol_shared_policy_init(&info->policy); 1306*1da177e4SLinus Torvalds break; 1307*1da177e4SLinus Torvalds case S_IFDIR: 1308*1da177e4SLinus Torvalds inode->i_nlink++; 1309*1da177e4SLinus Torvalds /* Some things misbehave if size == 0 on a directory */ 1310*1da177e4SLinus Torvalds inode->i_size = 2 * BOGO_DIRENT_SIZE; 1311*1da177e4SLinus Torvalds inode->i_op = &shmem_dir_inode_operations; 1312*1da177e4SLinus Torvalds inode->i_fop = &simple_dir_operations; 1313*1da177e4SLinus Torvalds break; 1314*1da177e4SLinus Torvalds case S_IFLNK: 1315*1da177e4SLinus Torvalds /* 1316*1da177e4SLinus Torvalds * Must not load anything in the rbtree, 1317*1da177e4SLinus Torvalds * mpol_free_shared_policy will not be called. 1318*1da177e4SLinus Torvalds */ 1319*1da177e4SLinus Torvalds mpol_shared_policy_init(&info->policy); 1320*1da177e4SLinus Torvalds break; 1321*1da177e4SLinus Torvalds } 1322*1da177e4SLinus Torvalds } else if (sbinfo) { 1323*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 1324*1da177e4SLinus Torvalds sbinfo->free_inodes++; 1325*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1326*1da177e4SLinus Torvalds } 1327*1da177e4SLinus Torvalds return inode; 1328*1da177e4SLinus Torvalds } 1329*1da177e4SLinus Torvalds 1330*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 1331*1da177e4SLinus Torvalds 1332*1da177e4SLinus Torvalds static int shmem_set_size(struct shmem_sb_info *sbinfo, 1333*1da177e4SLinus Torvalds unsigned long max_blocks, unsigned long max_inodes) 1334*1da177e4SLinus Torvalds { 1335*1da177e4SLinus Torvalds int error; 1336*1da177e4SLinus Torvalds unsigned long blocks, inodes; 1337*1da177e4SLinus Torvalds 1338*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 1339*1da177e4SLinus Torvalds blocks = sbinfo->max_blocks - sbinfo->free_blocks; 1340*1da177e4SLinus Torvalds inodes = sbinfo->max_inodes - sbinfo->free_inodes; 1341*1da177e4SLinus Torvalds error = -EINVAL; 1342*1da177e4SLinus Torvalds if (max_blocks < blocks) 1343*1da177e4SLinus Torvalds goto out; 1344*1da177e4SLinus Torvalds if (max_inodes < inodes) 1345*1da177e4SLinus Torvalds goto out; 1346*1da177e4SLinus Torvalds error = 0; 1347*1da177e4SLinus Torvalds sbinfo->max_blocks = max_blocks; 1348*1da177e4SLinus Torvalds sbinfo->free_blocks = max_blocks - blocks; 1349*1da177e4SLinus Torvalds sbinfo->max_inodes = max_inodes; 1350*1da177e4SLinus Torvalds sbinfo->free_inodes = max_inodes - inodes; 1351*1da177e4SLinus Torvalds out: 1352*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1353*1da177e4SLinus Torvalds return error; 1354*1da177e4SLinus Torvalds } 1355*1da177e4SLinus Torvalds 1356*1da177e4SLinus Torvalds static struct inode_operations shmem_symlink_inode_operations; 1357*1da177e4SLinus Torvalds static struct inode_operations shmem_symlink_inline_operations; 1358*1da177e4SLinus Torvalds 1359*1da177e4SLinus Torvalds /* 1360*1da177e4SLinus Torvalds * Normally tmpfs makes no use of shmem_prepare_write, but it 1361*1da177e4SLinus Torvalds * lets a tmpfs file be used read-write below the loop driver. 1362*1da177e4SLinus Torvalds */ 1363*1da177e4SLinus Torvalds static int 1364*1da177e4SLinus Torvalds shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) 1365*1da177e4SLinus Torvalds { 1366*1da177e4SLinus Torvalds struct inode *inode = page->mapping->host; 1367*1da177e4SLinus Torvalds return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL); 1368*1da177e4SLinus Torvalds } 1369*1da177e4SLinus Torvalds 1370*1da177e4SLinus Torvalds static ssize_t 1371*1da177e4SLinus Torvalds shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 1372*1da177e4SLinus Torvalds { 1373*1da177e4SLinus Torvalds struct inode *inode = file->f_dentry->d_inode; 1374*1da177e4SLinus Torvalds loff_t pos; 1375*1da177e4SLinus Torvalds unsigned long written; 1376*1da177e4SLinus Torvalds ssize_t err; 1377*1da177e4SLinus Torvalds 1378*1da177e4SLinus Torvalds if ((ssize_t) count < 0) 1379*1da177e4SLinus Torvalds return -EINVAL; 1380*1da177e4SLinus Torvalds 1381*1da177e4SLinus Torvalds if (!access_ok(VERIFY_READ, buf, count)) 1382*1da177e4SLinus Torvalds return -EFAULT; 1383*1da177e4SLinus Torvalds 1384*1da177e4SLinus Torvalds down(&inode->i_sem); 1385*1da177e4SLinus Torvalds 1386*1da177e4SLinus Torvalds pos = *ppos; 1387*1da177e4SLinus Torvalds written = 0; 1388*1da177e4SLinus Torvalds 1389*1da177e4SLinus Torvalds err = generic_write_checks(file, &pos, &count, 0); 1390*1da177e4SLinus Torvalds if (err || !count) 1391*1da177e4SLinus Torvalds goto out; 1392*1da177e4SLinus Torvalds 1393*1da177e4SLinus Torvalds err = remove_suid(file->f_dentry); 1394*1da177e4SLinus Torvalds if (err) 1395*1da177e4SLinus Torvalds goto out; 1396*1da177e4SLinus Torvalds 1397*1da177e4SLinus Torvalds inode->i_ctime = inode->i_mtime = CURRENT_TIME; 1398*1da177e4SLinus Torvalds 1399*1da177e4SLinus Torvalds do { 1400*1da177e4SLinus Torvalds struct page *page = NULL; 1401*1da177e4SLinus Torvalds unsigned long bytes, index, offset; 1402*1da177e4SLinus Torvalds char *kaddr; 1403*1da177e4SLinus Torvalds int left; 1404*1da177e4SLinus Torvalds 1405*1da177e4SLinus Torvalds offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 1406*1da177e4SLinus Torvalds index = pos >> PAGE_CACHE_SHIFT; 1407*1da177e4SLinus Torvalds bytes = PAGE_CACHE_SIZE - offset; 1408*1da177e4SLinus Torvalds if (bytes > count) 1409*1da177e4SLinus Torvalds bytes = count; 1410*1da177e4SLinus Torvalds 1411*1da177e4SLinus Torvalds /* 1412*1da177e4SLinus Torvalds * We don't hold page lock across copy from user - 1413*1da177e4SLinus Torvalds * what would it guard against? - so no deadlock here. 1414*1da177e4SLinus Torvalds * But it still may be a good idea to prefault below. 1415*1da177e4SLinus Torvalds */ 1416*1da177e4SLinus Torvalds 1417*1da177e4SLinus Torvalds err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL); 1418*1da177e4SLinus Torvalds if (err) 1419*1da177e4SLinus Torvalds break; 1420*1da177e4SLinus Torvalds 1421*1da177e4SLinus Torvalds left = bytes; 1422*1da177e4SLinus Torvalds if (PageHighMem(page)) { 1423*1da177e4SLinus Torvalds volatile unsigned char dummy; 1424*1da177e4SLinus Torvalds __get_user(dummy, buf); 1425*1da177e4SLinus Torvalds __get_user(dummy, buf + bytes - 1); 1426*1da177e4SLinus Torvalds 1427*1da177e4SLinus Torvalds kaddr = kmap_atomic(page, KM_USER0); 1428*1da177e4SLinus Torvalds left = __copy_from_user_inatomic(kaddr + offset, 1429*1da177e4SLinus Torvalds buf, bytes); 1430*1da177e4SLinus Torvalds kunmap_atomic(kaddr, KM_USER0); 1431*1da177e4SLinus Torvalds } 1432*1da177e4SLinus Torvalds if (left) { 1433*1da177e4SLinus Torvalds kaddr = kmap(page); 1434*1da177e4SLinus Torvalds left = __copy_from_user(kaddr + offset, buf, bytes); 1435*1da177e4SLinus Torvalds kunmap(page); 1436*1da177e4SLinus Torvalds } 1437*1da177e4SLinus Torvalds 1438*1da177e4SLinus Torvalds written += bytes; 1439*1da177e4SLinus Torvalds count -= bytes; 1440*1da177e4SLinus Torvalds pos += bytes; 1441*1da177e4SLinus Torvalds buf += bytes; 1442*1da177e4SLinus Torvalds if (pos > inode->i_size) 1443*1da177e4SLinus Torvalds i_size_write(inode, pos); 1444*1da177e4SLinus Torvalds 1445*1da177e4SLinus Torvalds flush_dcache_page(page); 1446*1da177e4SLinus Torvalds set_page_dirty(page); 1447*1da177e4SLinus Torvalds mark_page_accessed(page); 1448*1da177e4SLinus Torvalds page_cache_release(page); 1449*1da177e4SLinus Torvalds 1450*1da177e4SLinus Torvalds if (left) { 1451*1da177e4SLinus Torvalds pos -= left; 1452*1da177e4SLinus Torvalds written -= left; 1453*1da177e4SLinus Torvalds err = -EFAULT; 1454*1da177e4SLinus Torvalds break; 1455*1da177e4SLinus Torvalds } 1456*1da177e4SLinus Torvalds 1457*1da177e4SLinus Torvalds /* 1458*1da177e4SLinus Torvalds * Our dirty pages are not counted in nr_dirty, 1459*1da177e4SLinus Torvalds * and we do not attempt to balance dirty pages. 1460*1da177e4SLinus Torvalds */ 1461*1da177e4SLinus Torvalds 1462*1da177e4SLinus Torvalds cond_resched(); 1463*1da177e4SLinus Torvalds } while (count); 1464*1da177e4SLinus Torvalds 1465*1da177e4SLinus Torvalds *ppos = pos; 1466*1da177e4SLinus Torvalds if (written) 1467*1da177e4SLinus Torvalds err = written; 1468*1da177e4SLinus Torvalds out: 1469*1da177e4SLinus Torvalds up(&inode->i_sem); 1470*1da177e4SLinus Torvalds return err; 1471*1da177e4SLinus Torvalds } 1472*1da177e4SLinus Torvalds 1473*1da177e4SLinus Torvalds static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) 1474*1da177e4SLinus Torvalds { 1475*1da177e4SLinus Torvalds struct inode *inode = filp->f_dentry->d_inode; 1476*1da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 1477*1da177e4SLinus Torvalds unsigned long index, offset; 1478*1da177e4SLinus Torvalds 1479*1da177e4SLinus Torvalds index = *ppos >> PAGE_CACHE_SHIFT; 1480*1da177e4SLinus Torvalds offset = *ppos & ~PAGE_CACHE_MASK; 1481*1da177e4SLinus Torvalds 1482*1da177e4SLinus Torvalds for (;;) { 1483*1da177e4SLinus Torvalds struct page *page = NULL; 1484*1da177e4SLinus Torvalds unsigned long end_index, nr, ret; 1485*1da177e4SLinus Torvalds loff_t i_size = i_size_read(inode); 1486*1da177e4SLinus Torvalds 1487*1da177e4SLinus Torvalds end_index = i_size >> PAGE_CACHE_SHIFT; 1488*1da177e4SLinus Torvalds if (index > end_index) 1489*1da177e4SLinus Torvalds break; 1490*1da177e4SLinus Torvalds if (index == end_index) { 1491*1da177e4SLinus Torvalds nr = i_size & ~PAGE_CACHE_MASK; 1492*1da177e4SLinus Torvalds if (nr <= offset) 1493*1da177e4SLinus Torvalds break; 1494*1da177e4SLinus Torvalds } 1495*1da177e4SLinus Torvalds 1496*1da177e4SLinus Torvalds desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL); 1497*1da177e4SLinus Torvalds if (desc->error) { 1498*1da177e4SLinus Torvalds if (desc->error == -EINVAL) 1499*1da177e4SLinus Torvalds desc->error = 0; 1500*1da177e4SLinus Torvalds break; 1501*1da177e4SLinus Torvalds } 1502*1da177e4SLinus Torvalds 1503*1da177e4SLinus Torvalds /* 1504*1da177e4SLinus Torvalds * We must evaluate after, since reads (unlike writes) 1505*1da177e4SLinus Torvalds * are called without i_sem protection against truncate 1506*1da177e4SLinus Torvalds */ 1507*1da177e4SLinus Torvalds nr = PAGE_CACHE_SIZE; 1508*1da177e4SLinus Torvalds i_size = i_size_read(inode); 1509*1da177e4SLinus Torvalds end_index = i_size >> PAGE_CACHE_SHIFT; 1510*1da177e4SLinus Torvalds if (index == end_index) { 1511*1da177e4SLinus Torvalds nr = i_size & ~PAGE_CACHE_MASK; 1512*1da177e4SLinus Torvalds if (nr <= offset) { 1513*1da177e4SLinus Torvalds if (page) 1514*1da177e4SLinus Torvalds page_cache_release(page); 1515*1da177e4SLinus Torvalds break; 1516*1da177e4SLinus Torvalds } 1517*1da177e4SLinus Torvalds } 1518*1da177e4SLinus Torvalds nr -= offset; 1519*1da177e4SLinus Torvalds 1520*1da177e4SLinus Torvalds if (page) { 1521*1da177e4SLinus Torvalds /* 1522*1da177e4SLinus Torvalds * If users can be writing to this page using arbitrary 1523*1da177e4SLinus Torvalds * virtual addresses, take care about potential aliasing 1524*1da177e4SLinus Torvalds * before reading the page on the kernel side. 1525*1da177e4SLinus Torvalds */ 1526*1da177e4SLinus Torvalds if (mapping_writably_mapped(mapping)) 1527*1da177e4SLinus Torvalds flush_dcache_page(page); 1528*1da177e4SLinus Torvalds /* 1529*1da177e4SLinus Torvalds * Mark the page accessed if we read the beginning. 1530*1da177e4SLinus Torvalds */ 1531*1da177e4SLinus Torvalds if (!offset) 1532*1da177e4SLinus Torvalds mark_page_accessed(page); 1533*1da177e4SLinus Torvalds } else 1534*1da177e4SLinus Torvalds page = ZERO_PAGE(0); 1535*1da177e4SLinus Torvalds 1536*1da177e4SLinus Torvalds /* 1537*1da177e4SLinus Torvalds * Ok, we have the page, and it's up-to-date, so 1538*1da177e4SLinus Torvalds * now we can copy it to user space... 1539*1da177e4SLinus Torvalds * 1540*1da177e4SLinus Torvalds * The actor routine returns how many bytes were actually used.. 1541*1da177e4SLinus Torvalds * NOTE! This may not be the same as how much of a user buffer 1542*1da177e4SLinus Torvalds * we filled up (we may be padding etc), so we can only update 1543*1da177e4SLinus Torvalds * "pos" here (the actor routine has to update the user buffer 1544*1da177e4SLinus Torvalds * pointers and the remaining count). 1545*1da177e4SLinus Torvalds */ 1546*1da177e4SLinus Torvalds ret = actor(desc, page, offset, nr); 1547*1da177e4SLinus Torvalds offset += ret; 1548*1da177e4SLinus Torvalds index += offset >> PAGE_CACHE_SHIFT; 1549*1da177e4SLinus Torvalds offset &= ~PAGE_CACHE_MASK; 1550*1da177e4SLinus Torvalds 1551*1da177e4SLinus Torvalds page_cache_release(page); 1552*1da177e4SLinus Torvalds if (ret != nr || !desc->count) 1553*1da177e4SLinus Torvalds break; 1554*1da177e4SLinus Torvalds 1555*1da177e4SLinus Torvalds cond_resched(); 1556*1da177e4SLinus Torvalds } 1557*1da177e4SLinus Torvalds 1558*1da177e4SLinus Torvalds *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 1559*1da177e4SLinus Torvalds file_accessed(filp); 1560*1da177e4SLinus Torvalds } 1561*1da177e4SLinus Torvalds 1562*1da177e4SLinus Torvalds static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 1563*1da177e4SLinus Torvalds { 1564*1da177e4SLinus Torvalds read_descriptor_t desc; 1565*1da177e4SLinus Torvalds 1566*1da177e4SLinus Torvalds if ((ssize_t) count < 0) 1567*1da177e4SLinus Torvalds return -EINVAL; 1568*1da177e4SLinus Torvalds if (!access_ok(VERIFY_WRITE, buf, count)) 1569*1da177e4SLinus Torvalds return -EFAULT; 1570*1da177e4SLinus Torvalds if (!count) 1571*1da177e4SLinus Torvalds return 0; 1572*1da177e4SLinus Torvalds 1573*1da177e4SLinus Torvalds desc.written = 0; 1574*1da177e4SLinus Torvalds desc.count = count; 1575*1da177e4SLinus Torvalds desc.arg.buf = buf; 1576*1da177e4SLinus Torvalds desc.error = 0; 1577*1da177e4SLinus Torvalds 1578*1da177e4SLinus Torvalds do_shmem_file_read(filp, ppos, &desc, file_read_actor); 1579*1da177e4SLinus Torvalds if (desc.written) 1580*1da177e4SLinus Torvalds return desc.written; 1581*1da177e4SLinus Torvalds return desc.error; 1582*1da177e4SLinus Torvalds } 1583*1da177e4SLinus Torvalds 1584*1da177e4SLinus Torvalds static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos, 1585*1da177e4SLinus Torvalds size_t count, read_actor_t actor, void *target) 1586*1da177e4SLinus Torvalds { 1587*1da177e4SLinus Torvalds read_descriptor_t desc; 1588*1da177e4SLinus Torvalds 1589*1da177e4SLinus Torvalds if (!count) 1590*1da177e4SLinus Torvalds return 0; 1591*1da177e4SLinus Torvalds 1592*1da177e4SLinus Torvalds desc.written = 0; 1593*1da177e4SLinus Torvalds desc.count = count; 1594*1da177e4SLinus Torvalds desc.arg.data = target; 1595*1da177e4SLinus Torvalds desc.error = 0; 1596*1da177e4SLinus Torvalds 1597*1da177e4SLinus Torvalds do_shmem_file_read(in_file, ppos, &desc, actor); 1598*1da177e4SLinus Torvalds if (desc.written) 1599*1da177e4SLinus Torvalds return desc.written; 1600*1da177e4SLinus Torvalds return desc.error; 1601*1da177e4SLinus Torvalds } 1602*1da177e4SLinus Torvalds 1603*1da177e4SLinus Torvalds static int shmem_statfs(struct super_block *sb, struct kstatfs *buf) 1604*1da177e4SLinus Torvalds { 1605*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 1606*1da177e4SLinus Torvalds 1607*1da177e4SLinus Torvalds buf->f_type = TMPFS_MAGIC; 1608*1da177e4SLinus Torvalds buf->f_bsize = PAGE_CACHE_SIZE; 1609*1da177e4SLinus Torvalds buf->f_namelen = NAME_MAX; 1610*1da177e4SLinus Torvalds if (sbinfo) { 1611*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 1612*1da177e4SLinus Torvalds buf->f_blocks = sbinfo->max_blocks; 1613*1da177e4SLinus Torvalds buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 1614*1da177e4SLinus Torvalds buf->f_files = sbinfo->max_inodes; 1615*1da177e4SLinus Torvalds buf->f_ffree = sbinfo->free_inodes; 1616*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1617*1da177e4SLinus Torvalds } 1618*1da177e4SLinus Torvalds /* else leave those fields 0 like simple_statfs */ 1619*1da177e4SLinus Torvalds return 0; 1620*1da177e4SLinus Torvalds } 1621*1da177e4SLinus Torvalds 1622*1da177e4SLinus Torvalds /* 1623*1da177e4SLinus Torvalds * File creation. Allocate an inode, and we're done.. 1624*1da177e4SLinus Torvalds */ 1625*1da177e4SLinus Torvalds static int 1626*1da177e4SLinus Torvalds shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1627*1da177e4SLinus Torvalds { 1628*1da177e4SLinus Torvalds struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); 1629*1da177e4SLinus Torvalds int error = -ENOSPC; 1630*1da177e4SLinus Torvalds 1631*1da177e4SLinus Torvalds if (inode) { 1632*1da177e4SLinus Torvalds if (dir->i_mode & S_ISGID) { 1633*1da177e4SLinus Torvalds inode->i_gid = dir->i_gid; 1634*1da177e4SLinus Torvalds if (S_ISDIR(mode)) 1635*1da177e4SLinus Torvalds inode->i_mode |= S_ISGID; 1636*1da177e4SLinus Torvalds } 1637*1da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 1638*1da177e4SLinus Torvalds dir->i_ctime = dir->i_mtime = CURRENT_TIME; 1639*1da177e4SLinus Torvalds d_instantiate(dentry, inode); 1640*1da177e4SLinus Torvalds dget(dentry); /* Extra count - pin the dentry in core */ 1641*1da177e4SLinus Torvalds error = 0; 1642*1da177e4SLinus Torvalds } 1643*1da177e4SLinus Torvalds return error; 1644*1da177e4SLinus Torvalds } 1645*1da177e4SLinus Torvalds 1646*1da177e4SLinus Torvalds static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1647*1da177e4SLinus Torvalds { 1648*1da177e4SLinus Torvalds int error; 1649*1da177e4SLinus Torvalds 1650*1da177e4SLinus Torvalds if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) 1651*1da177e4SLinus Torvalds return error; 1652*1da177e4SLinus Torvalds dir->i_nlink++; 1653*1da177e4SLinus Torvalds return 0; 1654*1da177e4SLinus Torvalds } 1655*1da177e4SLinus Torvalds 1656*1da177e4SLinus Torvalds static int shmem_create(struct inode *dir, struct dentry *dentry, int mode, 1657*1da177e4SLinus Torvalds struct nameidata *nd) 1658*1da177e4SLinus Torvalds { 1659*1da177e4SLinus Torvalds return shmem_mknod(dir, dentry, mode | S_IFREG, 0); 1660*1da177e4SLinus Torvalds } 1661*1da177e4SLinus Torvalds 1662*1da177e4SLinus Torvalds /* 1663*1da177e4SLinus Torvalds * Link a file.. 1664*1da177e4SLinus Torvalds */ 1665*1da177e4SLinus Torvalds static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 1666*1da177e4SLinus Torvalds { 1667*1da177e4SLinus Torvalds struct inode *inode = old_dentry->d_inode; 1668*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 1669*1da177e4SLinus Torvalds 1670*1da177e4SLinus Torvalds /* 1671*1da177e4SLinus Torvalds * No ordinary (disk based) filesystem counts links as inodes; 1672*1da177e4SLinus Torvalds * but each new link needs a new dentry, pinning lowmem, and 1673*1da177e4SLinus Torvalds * tmpfs dentries cannot be pruned until they are unlinked. 1674*1da177e4SLinus Torvalds */ 1675*1da177e4SLinus Torvalds if (sbinfo) { 1676*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 1677*1da177e4SLinus Torvalds if (!sbinfo->free_inodes) { 1678*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1679*1da177e4SLinus Torvalds return -ENOSPC; 1680*1da177e4SLinus Torvalds } 1681*1da177e4SLinus Torvalds sbinfo->free_inodes--; 1682*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1683*1da177e4SLinus Torvalds } 1684*1da177e4SLinus Torvalds 1685*1da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 1686*1da177e4SLinus Torvalds inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 1687*1da177e4SLinus Torvalds inode->i_nlink++; 1688*1da177e4SLinus Torvalds atomic_inc(&inode->i_count); /* New dentry reference */ 1689*1da177e4SLinus Torvalds dget(dentry); /* Extra pinning count for the created dentry */ 1690*1da177e4SLinus Torvalds d_instantiate(dentry, inode); 1691*1da177e4SLinus Torvalds return 0; 1692*1da177e4SLinus Torvalds } 1693*1da177e4SLinus Torvalds 1694*1da177e4SLinus Torvalds static int shmem_unlink(struct inode *dir, struct dentry *dentry) 1695*1da177e4SLinus Torvalds { 1696*1da177e4SLinus Torvalds struct inode *inode = dentry->d_inode; 1697*1da177e4SLinus Torvalds 1698*1da177e4SLinus Torvalds if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) { 1699*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 1700*1da177e4SLinus Torvalds if (sbinfo) { 1701*1da177e4SLinus Torvalds spin_lock(&sbinfo->stat_lock); 1702*1da177e4SLinus Torvalds sbinfo->free_inodes++; 1703*1da177e4SLinus Torvalds spin_unlock(&sbinfo->stat_lock); 1704*1da177e4SLinus Torvalds } 1705*1da177e4SLinus Torvalds } 1706*1da177e4SLinus Torvalds 1707*1da177e4SLinus Torvalds dir->i_size -= BOGO_DIRENT_SIZE; 1708*1da177e4SLinus Torvalds inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 1709*1da177e4SLinus Torvalds inode->i_nlink--; 1710*1da177e4SLinus Torvalds dput(dentry); /* Undo the count from "create" - this does all the work */ 1711*1da177e4SLinus Torvalds return 0; 1712*1da177e4SLinus Torvalds } 1713*1da177e4SLinus Torvalds 1714*1da177e4SLinus Torvalds static int shmem_rmdir(struct inode *dir, struct dentry *dentry) 1715*1da177e4SLinus Torvalds { 1716*1da177e4SLinus Torvalds if (!simple_empty(dentry)) 1717*1da177e4SLinus Torvalds return -ENOTEMPTY; 1718*1da177e4SLinus Torvalds 1719*1da177e4SLinus Torvalds dir->i_nlink--; 1720*1da177e4SLinus Torvalds return shmem_unlink(dir, dentry); 1721*1da177e4SLinus Torvalds } 1722*1da177e4SLinus Torvalds 1723*1da177e4SLinus Torvalds /* 1724*1da177e4SLinus Torvalds * The VFS layer already does all the dentry stuff for rename, 1725*1da177e4SLinus Torvalds * we just have to decrement the usage count for the target if 1726*1da177e4SLinus Torvalds * it exists so that the VFS layer correctly free's it when it 1727*1da177e4SLinus Torvalds * gets overwritten. 1728*1da177e4SLinus Torvalds */ 1729*1da177e4SLinus Torvalds static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) 1730*1da177e4SLinus Torvalds { 1731*1da177e4SLinus Torvalds struct inode *inode = old_dentry->d_inode; 1732*1da177e4SLinus Torvalds int they_are_dirs = S_ISDIR(inode->i_mode); 1733*1da177e4SLinus Torvalds 1734*1da177e4SLinus Torvalds if (!simple_empty(new_dentry)) 1735*1da177e4SLinus Torvalds return -ENOTEMPTY; 1736*1da177e4SLinus Torvalds 1737*1da177e4SLinus Torvalds if (new_dentry->d_inode) { 1738*1da177e4SLinus Torvalds (void) shmem_unlink(new_dir, new_dentry); 1739*1da177e4SLinus Torvalds if (they_are_dirs) 1740*1da177e4SLinus Torvalds old_dir->i_nlink--; 1741*1da177e4SLinus Torvalds } else if (they_are_dirs) { 1742*1da177e4SLinus Torvalds old_dir->i_nlink--; 1743*1da177e4SLinus Torvalds new_dir->i_nlink++; 1744*1da177e4SLinus Torvalds } 1745*1da177e4SLinus Torvalds 1746*1da177e4SLinus Torvalds old_dir->i_size -= BOGO_DIRENT_SIZE; 1747*1da177e4SLinus Torvalds new_dir->i_size += BOGO_DIRENT_SIZE; 1748*1da177e4SLinus Torvalds old_dir->i_ctime = old_dir->i_mtime = 1749*1da177e4SLinus Torvalds new_dir->i_ctime = new_dir->i_mtime = 1750*1da177e4SLinus Torvalds inode->i_ctime = CURRENT_TIME; 1751*1da177e4SLinus Torvalds return 0; 1752*1da177e4SLinus Torvalds } 1753*1da177e4SLinus Torvalds 1754*1da177e4SLinus Torvalds static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 1755*1da177e4SLinus Torvalds { 1756*1da177e4SLinus Torvalds int error; 1757*1da177e4SLinus Torvalds int len; 1758*1da177e4SLinus Torvalds struct inode *inode; 1759*1da177e4SLinus Torvalds struct page *page = NULL; 1760*1da177e4SLinus Torvalds char *kaddr; 1761*1da177e4SLinus Torvalds struct shmem_inode_info *info; 1762*1da177e4SLinus Torvalds 1763*1da177e4SLinus Torvalds len = strlen(symname) + 1; 1764*1da177e4SLinus Torvalds if (len > PAGE_CACHE_SIZE) 1765*1da177e4SLinus Torvalds return -ENAMETOOLONG; 1766*1da177e4SLinus Torvalds 1767*1da177e4SLinus Torvalds inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); 1768*1da177e4SLinus Torvalds if (!inode) 1769*1da177e4SLinus Torvalds return -ENOSPC; 1770*1da177e4SLinus Torvalds 1771*1da177e4SLinus Torvalds info = SHMEM_I(inode); 1772*1da177e4SLinus Torvalds inode->i_size = len-1; 1773*1da177e4SLinus Torvalds if (len <= (char *)inode - (char *)info) { 1774*1da177e4SLinus Torvalds /* do it inline */ 1775*1da177e4SLinus Torvalds memcpy(info, symname, len); 1776*1da177e4SLinus Torvalds inode->i_op = &shmem_symlink_inline_operations; 1777*1da177e4SLinus Torvalds } else { 1778*1da177e4SLinus Torvalds error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); 1779*1da177e4SLinus Torvalds if (error) { 1780*1da177e4SLinus Torvalds iput(inode); 1781*1da177e4SLinus Torvalds return error; 1782*1da177e4SLinus Torvalds } 1783*1da177e4SLinus Torvalds inode->i_op = &shmem_symlink_inode_operations; 1784*1da177e4SLinus Torvalds kaddr = kmap_atomic(page, KM_USER0); 1785*1da177e4SLinus Torvalds memcpy(kaddr, symname, len); 1786*1da177e4SLinus Torvalds kunmap_atomic(kaddr, KM_USER0); 1787*1da177e4SLinus Torvalds set_page_dirty(page); 1788*1da177e4SLinus Torvalds page_cache_release(page); 1789*1da177e4SLinus Torvalds } 1790*1da177e4SLinus Torvalds if (dir->i_mode & S_ISGID) 1791*1da177e4SLinus Torvalds inode->i_gid = dir->i_gid; 1792*1da177e4SLinus Torvalds dir->i_size += BOGO_DIRENT_SIZE; 1793*1da177e4SLinus Torvalds dir->i_ctime = dir->i_mtime = CURRENT_TIME; 1794*1da177e4SLinus Torvalds d_instantiate(dentry, inode); 1795*1da177e4SLinus Torvalds dget(dentry); 1796*1da177e4SLinus Torvalds return 0; 1797*1da177e4SLinus Torvalds } 1798*1da177e4SLinus Torvalds 1799*1da177e4SLinus Torvalds static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) 1800*1da177e4SLinus Torvalds { 1801*1da177e4SLinus Torvalds nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode)); 1802*1da177e4SLinus Torvalds return 0; 1803*1da177e4SLinus Torvalds } 1804*1da177e4SLinus Torvalds 1805*1da177e4SLinus Torvalds static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd) 1806*1da177e4SLinus Torvalds { 1807*1da177e4SLinus Torvalds struct page *page = NULL; 1808*1da177e4SLinus Torvalds int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); 1809*1da177e4SLinus Torvalds nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); 1810*1da177e4SLinus Torvalds return 0; 1811*1da177e4SLinus Torvalds } 1812*1da177e4SLinus Torvalds 1813*1da177e4SLinus Torvalds static void shmem_put_link(struct dentry *dentry, struct nameidata *nd) 1814*1da177e4SLinus Torvalds { 1815*1da177e4SLinus Torvalds if (!IS_ERR(nd_get_link(nd))) { 1816*1da177e4SLinus Torvalds struct page *page; 1817*1da177e4SLinus Torvalds 1818*1da177e4SLinus Torvalds page = find_get_page(dentry->d_inode->i_mapping, 0); 1819*1da177e4SLinus Torvalds if (!page) 1820*1da177e4SLinus Torvalds BUG(); 1821*1da177e4SLinus Torvalds kunmap(page); 1822*1da177e4SLinus Torvalds mark_page_accessed(page); 1823*1da177e4SLinus Torvalds page_cache_release(page); 1824*1da177e4SLinus Torvalds page_cache_release(page); 1825*1da177e4SLinus Torvalds } 1826*1da177e4SLinus Torvalds } 1827*1da177e4SLinus Torvalds 1828*1da177e4SLinus Torvalds static struct inode_operations shmem_symlink_inline_operations = { 1829*1da177e4SLinus Torvalds .readlink = generic_readlink, 1830*1da177e4SLinus Torvalds .follow_link = shmem_follow_link_inline, 1831*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_XATTR 1832*1da177e4SLinus Torvalds .setxattr = generic_setxattr, 1833*1da177e4SLinus Torvalds .getxattr = generic_getxattr, 1834*1da177e4SLinus Torvalds .listxattr = generic_listxattr, 1835*1da177e4SLinus Torvalds .removexattr = generic_removexattr, 1836*1da177e4SLinus Torvalds #endif 1837*1da177e4SLinus Torvalds }; 1838*1da177e4SLinus Torvalds 1839*1da177e4SLinus Torvalds static struct inode_operations shmem_symlink_inode_operations = { 1840*1da177e4SLinus Torvalds .truncate = shmem_truncate, 1841*1da177e4SLinus Torvalds .readlink = generic_readlink, 1842*1da177e4SLinus Torvalds .follow_link = shmem_follow_link, 1843*1da177e4SLinus Torvalds .put_link = shmem_put_link, 1844*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_XATTR 1845*1da177e4SLinus Torvalds .setxattr = generic_setxattr, 1846*1da177e4SLinus Torvalds .getxattr = generic_getxattr, 1847*1da177e4SLinus Torvalds .listxattr = generic_listxattr, 1848*1da177e4SLinus Torvalds .removexattr = generic_removexattr, 1849*1da177e4SLinus Torvalds #endif 1850*1da177e4SLinus Torvalds }; 1851*1da177e4SLinus Torvalds 1852*1da177e4SLinus Torvalds static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) 1853*1da177e4SLinus Torvalds { 1854*1da177e4SLinus Torvalds char *this_char, *value, *rest; 1855*1da177e4SLinus Torvalds 1856*1da177e4SLinus Torvalds while ((this_char = strsep(&options, ",")) != NULL) { 1857*1da177e4SLinus Torvalds if (!*this_char) 1858*1da177e4SLinus Torvalds continue; 1859*1da177e4SLinus Torvalds if ((value = strchr(this_char,'=')) != NULL) { 1860*1da177e4SLinus Torvalds *value++ = 0; 1861*1da177e4SLinus Torvalds } else { 1862*1da177e4SLinus Torvalds printk(KERN_ERR 1863*1da177e4SLinus Torvalds "tmpfs: No value for mount option '%s'\n", 1864*1da177e4SLinus Torvalds this_char); 1865*1da177e4SLinus Torvalds return 1; 1866*1da177e4SLinus Torvalds } 1867*1da177e4SLinus Torvalds 1868*1da177e4SLinus Torvalds if (!strcmp(this_char,"size")) { 1869*1da177e4SLinus Torvalds unsigned long long size; 1870*1da177e4SLinus Torvalds size = memparse(value,&rest); 1871*1da177e4SLinus Torvalds if (*rest == '%') { 1872*1da177e4SLinus Torvalds size <<= PAGE_SHIFT; 1873*1da177e4SLinus Torvalds size *= totalram_pages; 1874*1da177e4SLinus Torvalds do_div(size, 100); 1875*1da177e4SLinus Torvalds rest++; 1876*1da177e4SLinus Torvalds } 1877*1da177e4SLinus Torvalds if (*rest) 1878*1da177e4SLinus Torvalds goto bad_val; 1879*1da177e4SLinus Torvalds *blocks = size >> PAGE_CACHE_SHIFT; 1880*1da177e4SLinus Torvalds } else if (!strcmp(this_char,"nr_blocks")) { 1881*1da177e4SLinus Torvalds *blocks = memparse(value,&rest); 1882*1da177e4SLinus Torvalds if (*rest) 1883*1da177e4SLinus Torvalds goto bad_val; 1884*1da177e4SLinus Torvalds } else if (!strcmp(this_char,"nr_inodes")) { 1885*1da177e4SLinus Torvalds *inodes = memparse(value,&rest); 1886*1da177e4SLinus Torvalds if (*rest) 1887*1da177e4SLinus Torvalds goto bad_val; 1888*1da177e4SLinus Torvalds } else if (!strcmp(this_char,"mode")) { 1889*1da177e4SLinus Torvalds if (!mode) 1890*1da177e4SLinus Torvalds continue; 1891*1da177e4SLinus Torvalds *mode = simple_strtoul(value,&rest,8); 1892*1da177e4SLinus Torvalds if (*rest) 1893*1da177e4SLinus Torvalds goto bad_val; 1894*1da177e4SLinus Torvalds } else if (!strcmp(this_char,"uid")) { 1895*1da177e4SLinus Torvalds if (!uid) 1896*1da177e4SLinus Torvalds continue; 1897*1da177e4SLinus Torvalds *uid = simple_strtoul(value,&rest,0); 1898*1da177e4SLinus Torvalds if (*rest) 1899*1da177e4SLinus Torvalds goto bad_val; 1900*1da177e4SLinus Torvalds } else if (!strcmp(this_char,"gid")) { 1901*1da177e4SLinus Torvalds if (!gid) 1902*1da177e4SLinus Torvalds continue; 1903*1da177e4SLinus Torvalds *gid = simple_strtoul(value,&rest,0); 1904*1da177e4SLinus Torvalds if (*rest) 1905*1da177e4SLinus Torvalds goto bad_val; 1906*1da177e4SLinus Torvalds } else { 1907*1da177e4SLinus Torvalds printk(KERN_ERR "tmpfs: Bad mount option %s\n", 1908*1da177e4SLinus Torvalds this_char); 1909*1da177e4SLinus Torvalds return 1; 1910*1da177e4SLinus Torvalds } 1911*1da177e4SLinus Torvalds } 1912*1da177e4SLinus Torvalds return 0; 1913*1da177e4SLinus Torvalds 1914*1da177e4SLinus Torvalds bad_val: 1915*1da177e4SLinus Torvalds printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", 1916*1da177e4SLinus Torvalds value, this_char); 1917*1da177e4SLinus Torvalds return 1; 1918*1da177e4SLinus Torvalds 1919*1da177e4SLinus Torvalds } 1920*1da177e4SLinus Torvalds 1921*1da177e4SLinus Torvalds static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) 1922*1da177e4SLinus Torvalds { 1923*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 1924*1da177e4SLinus Torvalds unsigned long max_blocks = 0; 1925*1da177e4SLinus Torvalds unsigned long max_inodes = 0; 1926*1da177e4SLinus Torvalds 1927*1da177e4SLinus Torvalds if (sbinfo) { 1928*1da177e4SLinus Torvalds max_blocks = sbinfo->max_blocks; 1929*1da177e4SLinus Torvalds max_inodes = sbinfo->max_inodes; 1930*1da177e4SLinus Torvalds } 1931*1da177e4SLinus Torvalds if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes)) 1932*1da177e4SLinus Torvalds return -EINVAL; 1933*1da177e4SLinus Torvalds /* Keep it simple: disallow limited <-> unlimited remount */ 1934*1da177e4SLinus Torvalds if ((max_blocks || max_inodes) == !sbinfo) 1935*1da177e4SLinus Torvalds return -EINVAL; 1936*1da177e4SLinus Torvalds /* But allow the pointless unlimited -> unlimited remount */ 1937*1da177e4SLinus Torvalds if (!sbinfo) 1938*1da177e4SLinus Torvalds return 0; 1939*1da177e4SLinus Torvalds return shmem_set_size(sbinfo, max_blocks, max_inodes); 1940*1da177e4SLinus Torvalds } 1941*1da177e4SLinus Torvalds #endif 1942*1da177e4SLinus Torvalds 1943*1da177e4SLinus Torvalds static void shmem_put_super(struct super_block *sb) 1944*1da177e4SLinus Torvalds { 1945*1da177e4SLinus Torvalds kfree(sb->s_fs_info); 1946*1da177e4SLinus Torvalds sb->s_fs_info = NULL; 1947*1da177e4SLinus Torvalds } 1948*1da177e4SLinus Torvalds 1949*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_XATTR 1950*1da177e4SLinus Torvalds static struct xattr_handler *shmem_xattr_handlers[]; 1951*1da177e4SLinus Torvalds #else 1952*1da177e4SLinus Torvalds #define shmem_xattr_handlers NULL 1953*1da177e4SLinus Torvalds #endif 1954*1da177e4SLinus Torvalds 1955*1da177e4SLinus Torvalds static int shmem_fill_super(struct super_block *sb, 1956*1da177e4SLinus Torvalds void *data, int silent) 1957*1da177e4SLinus Torvalds { 1958*1da177e4SLinus Torvalds struct inode *inode; 1959*1da177e4SLinus Torvalds struct dentry *root; 1960*1da177e4SLinus Torvalds int mode = S_IRWXUGO | S_ISVTX; 1961*1da177e4SLinus Torvalds uid_t uid = current->fsuid; 1962*1da177e4SLinus Torvalds gid_t gid = current->fsgid; 1963*1da177e4SLinus Torvalds int err = -ENOMEM; 1964*1da177e4SLinus Torvalds 1965*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 1966*1da177e4SLinus Torvalds unsigned long blocks = 0; 1967*1da177e4SLinus Torvalds unsigned long inodes = 0; 1968*1da177e4SLinus Torvalds 1969*1da177e4SLinus Torvalds /* 1970*1da177e4SLinus Torvalds * Per default we only allow half of the physical ram per 1971*1da177e4SLinus Torvalds * tmpfs instance, limiting inodes to one per page of lowmem; 1972*1da177e4SLinus Torvalds * but the internal instance is left unlimited. 1973*1da177e4SLinus Torvalds */ 1974*1da177e4SLinus Torvalds if (!(sb->s_flags & MS_NOUSER)) { 1975*1da177e4SLinus Torvalds blocks = totalram_pages / 2; 1976*1da177e4SLinus Torvalds inodes = totalram_pages - totalhigh_pages; 1977*1da177e4SLinus Torvalds if (inodes > blocks) 1978*1da177e4SLinus Torvalds inodes = blocks; 1979*1da177e4SLinus Torvalds 1980*1da177e4SLinus Torvalds if (shmem_parse_options(data, &mode, 1981*1da177e4SLinus Torvalds &uid, &gid, &blocks, &inodes)) 1982*1da177e4SLinus Torvalds return -EINVAL; 1983*1da177e4SLinus Torvalds } 1984*1da177e4SLinus Torvalds 1985*1da177e4SLinus Torvalds if (blocks || inodes) { 1986*1da177e4SLinus Torvalds struct shmem_sb_info *sbinfo; 1987*1da177e4SLinus Torvalds sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL); 1988*1da177e4SLinus Torvalds if (!sbinfo) 1989*1da177e4SLinus Torvalds return -ENOMEM; 1990*1da177e4SLinus Torvalds sb->s_fs_info = sbinfo; 1991*1da177e4SLinus Torvalds spin_lock_init(&sbinfo->stat_lock); 1992*1da177e4SLinus Torvalds sbinfo->max_blocks = blocks; 1993*1da177e4SLinus Torvalds sbinfo->free_blocks = blocks; 1994*1da177e4SLinus Torvalds sbinfo->max_inodes = inodes; 1995*1da177e4SLinus Torvalds sbinfo->free_inodes = inodes; 1996*1da177e4SLinus Torvalds } 1997*1da177e4SLinus Torvalds sb->s_xattr = shmem_xattr_handlers; 1998*1da177e4SLinus Torvalds #else 1999*1da177e4SLinus Torvalds sb->s_flags |= MS_NOUSER; 2000*1da177e4SLinus Torvalds #endif 2001*1da177e4SLinus Torvalds 2002*1da177e4SLinus Torvalds sb->s_maxbytes = SHMEM_MAX_BYTES; 2003*1da177e4SLinus Torvalds sb->s_blocksize = PAGE_CACHE_SIZE; 2004*1da177e4SLinus Torvalds sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 2005*1da177e4SLinus Torvalds sb->s_magic = TMPFS_MAGIC; 2006*1da177e4SLinus Torvalds sb->s_op = &shmem_ops; 2007*1da177e4SLinus Torvalds inode = shmem_get_inode(sb, S_IFDIR | mode, 0); 2008*1da177e4SLinus Torvalds if (!inode) 2009*1da177e4SLinus Torvalds goto failed; 2010*1da177e4SLinus Torvalds inode->i_uid = uid; 2011*1da177e4SLinus Torvalds inode->i_gid = gid; 2012*1da177e4SLinus Torvalds root = d_alloc_root(inode); 2013*1da177e4SLinus Torvalds if (!root) 2014*1da177e4SLinus Torvalds goto failed_iput; 2015*1da177e4SLinus Torvalds sb->s_root = root; 2016*1da177e4SLinus Torvalds return 0; 2017*1da177e4SLinus Torvalds 2018*1da177e4SLinus Torvalds failed_iput: 2019*1da177e4SLinus Torvalds iput(inode); 2020*1da177e4SLinus Torvalds failed: 2021*1da177e4SLinus Torvalds shmem_put_super(sb); 2022*1da177e4SLinus Torvalds return err; 2023*1da177e4SLinus Torvalds } 2024*1da177e4SLinus Torvalds 2025*1da177e4SLinus Torvalds static kmem_cache_t *shmem_inode_cachep; 2026*1da177e4SLinus Torvalds 2027*1da177e4SLinus Torvalds static struct inode *shmem_alloc_inode(struct super_block *sb) 2028*1da177e4SLinus Torvalds { 2029*1da177e4SLinus Torvalds struct shmem_inode_info *p; 2030*1da177e4SLinus Torvalds p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL); 2031*1da177e4SLinus Torvalds if (!p) 2032*1da177e4SLinus Torvalds return NULL; 2033*1da177e4SLinus Torvalds return &p->vfs_inode; 2034*1da177e4SLinus Torvalds } 2035*1da177e4SLinus Torvalds 2036*1da177e4SLinus Torvalds static void shmem_destroy_inode(struct inode *inode) 2037*1da177e4SLinus Torvalds { 2038*1da177e4SLinus Torvalds if ((inode->i_mode & S_IFMT) == S_IFREG) { 2039*1da177e4SLinus Torvalds /* only struct inode is valid if it's an inline symlink */ 2040*1da177e4SLinus Torvalds mpol_free_shared_policy(&SHMEM_I(inode)->policy); 2041*1da177e4SLinus Torvalds } 2042*1da177e4SLinus Torvalds kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 2043*1da177e4SLinus Torvalds } 2044*1da177e4SLinus Torvalds 2045*1da177e4SLinus Torvalds static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) 2046*1da177e4SLinus Torvalds { 2047*1da177e4SLinus Torvalds struct shmem_inode_info *p = (struct shmem_inode_info *) foo; 2048*1da177e4SLinus Torvalds 2049*1da177e4SLinus Torvalds if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 2050*1da177e4SLinus Torvalds SLAB_CTOR_CONSTRUCTOR) { 2051*1da177e4SLinus Torvalds inode_init_once(&p->vfs_inode); 2052*1da177e4SLinus Torvalds } 2053*1da177e4SLinus Torvalds } 2054*1da177e4SLinus Torvalds 2055*1da177e4SLinus Torvalds static int init_inodecache(void) 2056*1da177e4SLinus Torvalds { 2057*1da177e4SLinus Torvalds shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", 2058*1da177e4SLinus Torvalds sizeof(struct shmem_inode_info), 2059*1da177e4SLinus Torvalds 0, 0, init_once, NULL); 2060*1da177e4SLinus Torvalds if (shmem_inode_cachep == NULL) 2061*1da177e4SLinus Torvalds return -ENOMEM; 2062*1da177e4SLinus Torvalds return 0; 2063*1da177e4SLinus Torvalds } 2064*1da177e4SLinus Torvalds 2065*1da177e4SLinus Torvalds static void destroy_inodecache(void) 2066*1da177e4SLinus Torvalds { 2067*1da177e4SLinus Torvalds if (kmem_cache_destroy(shmem_inode_cachep)) 2068*1da177e4SLinus Torvalds printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n"); 2069*1da177e4SLinus Torvalds } 2070*1da177e4SLinus Torvalds 2071*1da177e4SLinus Torvalds static struct address_space_operations shmem_aops = { 2072*1da177e4SLinus Torvalds .writepage = shmem_writepage, 2073*1da177e4SLinus Torvalds .set_page_dirty = __set_page_dirty_nobuffers, 2074*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 2075*1da177e4SLinus Torvalds .prepare_write = shmem_prepare_write, 2076*1da177e4SLinus Torvalds .commit_write = simple_commit_write, 2077*1da177e4SLinus Torvalds #endif 2078*1da177e4SLinus Torvalds }; 2079*1da177e4SLinus Torvalds 2080*1da177e4SLinus Torvalds static struct file_operations shmem_file_operations = { 2081*1da177e4SLinus Torvalds .mmap = shmem_mmap, 2082*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 2083*1da177e4SLinus Torvalds .llseek = generic_file_llseek, 2084*1da177e4SLinus Torvalds .read = shmem_file_read, 2085*1da177e4SLinus Torvalds .write = shmem_file_write, 2086*1da177e4SLinus Torvalds .fsync = simple_sync_file, 2087*1da177e4SLinus Torvalds .sendfile = shmem_file_sendfile, 2088*1da177e4SLinus Torvalds #endif 2089*1da177e4SLinus Torvalds }; 2090*1da177e4SLinus Torvalds 2091*1da177e4SLinus Torvalds static struct inode_operations shmem_inode_operations = { 2092*1da177e4SLinus Torvalds .truncate = shmem_truncate, 2093*1da177e4SLinus Torvalds .setattr = shmem_notify_change, 2094*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_XATTR 2095*1da177e4SLinus Torvalds .setxattr = generic_setxattr, 2096*1da177e4SLinus Torvalds .getxattr = generic_getxattr, 2097*1da177e4SLinus Torvalds .listxattr = generic_listxattr, 2098*1da177e4SLinus Torvalds .removexattr = generic_removexattr, 2099*1da177e4SLinus Torvalds #endif 2100*1da177e4SLinus Torvalds }; 2101*1da177e4SLinus Torvalds 2102*1da177e4SLinus Torvalds static struct inode_operations shmem_dir_inode_operations = { 2103*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 2104*1da177e4SLinus Torvalds .create = shmem_create, 2105*1da177e4SLinus Torvalds .lookup = simple_lookup, 2106*1da177e4SLinus Torvalds .link = shmem_link, 2107*1da177e4SLinus Torvalds .unlink = shmem_unlink, 2108*1da177e4SLinus Torvalds .symlink = shmem_symlink, 2109*1da177e4SLinus Torvalds .mkdir = shmem_mkdir, 2110*1da177e4SLinus Torvalds .rmdir = shmem_rmdir, 2111*1da177e4SLinus Torvalds .mknod = shmem_mknod, 2112*1da177e4SLinus Torvalds .rename = shmem_rename, 2113*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_XATTR 2114*1da177e4SLinus Torvalds .setxattr = generic_setxattr, 2115*1da177e4SLinus Torvalds .getxattr = generic_getxattr, 2116*1da177e4SLinus Torvalds .listxattr = generic_listxattr, 2117*1da177e4SLinus Torvalds .removexattr = generic_removexattr, 2118*1da177e4SLinus Torvalds #endif 2119*1da177e4SLinus Torvalds #endif 2120*1da177e4SLinus Torvalds }; 2121*1da177e4SLinus Torvalds 2122*1da177e4SLinus Torvalds static struct inode_operations shmem_special_inode_operations = { 2123*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_XATTR 2124*1da177e4SLinus Torvalds .setxattr = generic_setxattr, 2125*1da177e4SLinus Torvalds .getxattr = generic_getxattr, 2126*1da177e4SLinus Torvalds .listxattr = generic_listxattr, 2127*1da177e4SLinus Torvalds .removexattr = generic_removexattr, 2128*1da177e4SLinus Torvalds #endif 2129*1da177e4SLinus Torvalds }; 2130*1da177e4SLinus Torvalds 2131*1da177e4SLinus Torvalds static struct super_operations shmem_ops = { 2132*1da177e4SLinus Torvalds .alloc_inode = shmem_alloc_inode, 2133*1da177e4SLinus Torvalds .destroy_inode = shmem_destroy_inode, 2134*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 2135*1da177e4SLinus Torvalds .statfs = shmem_statfs, 2136*1da177e4SLinus Torvalds .remount_fs = shmem_remount_fs, 2137*1da177e4SLinus Torvalds #endif 2138*1da177e4SLinus Torvalds .delete_inode = shmem_delete_inode, 2139*1da177e4SLinus Torvalds .drop_inode = generic_delete_inode, 2140*1da177e4SLinus Torvalds .put_super = shmem_put_super, 2141*1da177e4SLinus Torvalds }; 2142*1da177e4SLinus Torvalds 2143*1da177e4SLinus Torvalds static struct vm_operations_struct shmem_vm_ops = { 2144*1da177e4SLinus Torvalds .nopage = shmem_nopage, 2145*1da177e4SLinus Torvalds .populate = shmem_populate, 2146*1da177e4SLinus Torvalds #ifdef CONFIG_NUMA 2147*1da177e4SLinus Torvalds .set_policy = shmem_set_policy, 2148*1da177e4SLinus Torvalds .get_policy = shmem_get_policy, 2149*1da177e4SLinus Torvalds #endif 2150*1da177e4SLinus Torvalds }; 2151*1da177e4SLinus Torvalds 2152*1da177e4SLinus Torvalds 2153*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_SECURITY 2154*1da177e4SLinus Torvalds 2155*1da177e4SLinus Torvalds static size_t shmem_xattr_security_list(struct inode *inode, char *list, size_t list_len, 2156*1da177e4SLinus Torvalds const char *name, size_t name_len) 2157*1da177e4SLinus Torvalds { 2158*1da177e4SLinus Torvalds return security_inode_listsecurity(inode, list, list_len); 2159*1da177e4SLinus Torvalds } 2160*1da177e4SLinus Torvalds 2161*1da177e4SLinus Torvalds static int shmem_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size) 2162*1da177e4SLinus Torvalds { 2163*1da177e4SLinus Torvalds if (strcmp(name, "") == 0) 2164*1da177e4SLinus Torvalds return -EINVAL; 2165*1da177e4SLinus Torvalds return security_inode_getsecurity(inode, name, buffer, size); 2166*1da177e4SLinus Torvalds } 2167*1da177e4SLinus Torvalds 2168*1da177e4SLinus Torvalds static int shmem_xattr_security_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) 2169*1da177e4SLinus Torvalds { 2170*1da177e4SLinus Torvalds if (strcmp(name, "") == 0) 2171*1da177e4SLinus Torvalds return -EINVAL; 2172*1da177e4SLinus Torvalds return security_inode_setsecurity(inode, name, value, size, flags); 2173*1da177e4SLinus Torvalds } 2174*1da177e4SLinus Torvalds 2175*1da177e4SLinus Torvalds static struct xattr_handler shmem_xattr_security_handler = { 2176*1da177e4SLinus Torvalds .prefix = XATTR_SECURITY_PREFIX, 2177*1da177e4SLinus Torvalds .list = shmem_xattr_security_list, 2178*1da177e4SLinus Torvalds .get = shmem_xattr_security_get, 2179*1da177e4SLinus Torvalds .set = shmem_xattr_security_set, 2180*1da177e4SLinus Torvalds }; 2181*1da177e4SLinus Torvalds 2182*1da177e4SLinus Torvalds #endif /* CONFIG_TMPFS_SECURITY */ 2183*1da177e4SLinus Torvalds 2184*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_XATTR 2185*1da177e4SLinus Torvalds 2186*1da177e4SLinus Torvalds static struct xattr_handler *shmem_xattr_handlers[] = { 2187*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS_SECURITY 2188*1da177e4SLinus Torvalds &shmem_xattr_security_handler, 2189*1da177e4SLinus Torvalds #endif 2190*1da177e4SLinus Torvalds NULL 2191*1da177e4SLinus Torvalds }; 2192*1da177e4SLinus Torvalds 2193*1da177e4SLinus Torvalds #endif /* CONFIG_TMPFS_XATTR */ 2194*1da177e4SLinus Torvalds 2195*1da177e4SLinus Torvalds static struct super_block *shmem_get_sb(struct file_system_type *fs_type, 2196*1da177e4SLinus Torvalds int flags, const char *dev_name, void *data) 2197*1da177e4SLinus Torvalds { 2198*1da177e4SLinus Torvalds return get_sb_nodev(fs_type, flags, data, shmem_fill_super); 2199*1da177e4SLinus Torvalds } 2200*1da177e4SLinus Torvalds 2201*1da177e4SLinus Torvalds static struct file_system_type tmpfs_fs_type = { 2202*1da177e4SLinus Torvalds .owner = THIS_MODULE, 2203*1da177e4SLinus Torvalds .name = "tmpfs", 2204*1da177e4SLinus Torvalds .get_sb = shmem_get_sb, 2205*1da177e4SLinus Torvalds .kill_sb = kill_litter_super, 2206*1da177e4SLinus Torvalds }; 2207*1da177e4SLinus Torvalds static struct vfsmount *shm_mnt; 2208*1da177e4SLinus Torvalds 2209*1da177e4SLinus Torvalds static int __init init_tmpfs(void) 2210*1da177e4SLinus Torvalds { 2211*1da177e4SLinus Torvalds int error; 2212*1da177e4SLinus Torvalds 2213*1da177e4SLinus Torvalds error = init_inodecache(); 2214*1da177e4SLinus Torvalds if (error) 2215*1da177e4SLinus Torvalds goto out3; 2216*1da177e4SLinus Torvalds 2217*1da177e4SLinus Torvalds error = register_filesystem(&tmpfs_fs_type); 2218*1da177e4SLinus Torvalds if (error) { 2219*1da177e4SLinus Torvalds printk(KERN_ERR "Could not register tmpfs\n"); 2220*1da177e4SLinus Torvalds goto out2; 2221*1da177e4SLinus Torvalds } 2222*1da177e4SLinus Torvalds #ifdef CONFIG_TMPFS 2223*1da177e4SLinus Torvalds devfs_mk_dir("shm"); 2224*1da177e4SLinus Torvalds #endif 2225*1da177e4SLinus Torvalds shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER, 2226*1da177e4SLinus Torvalds tmpfs_fs_type.name, NULL); 2227*1da177e4SLinus Torvalds if (IS_ERR(shm_mnt)) { 2228*1da177e4SLinus Torvalds error = PTR_ERR(shm_mnt); 2229*1da177e4SLinus Torvalds printk(KERN_ERR "Could not kern_mount tmpfs\n"); 2230*1da177e4SLinus Torvalds goto out1; 2231*1da177e4SLinus Torvalds } 2232*1da177e4SLinus Torvalds return 0; 2233*1da177e4SLinus Torvalds 2234*1da177e4SLinus Torvalds out1: 2235*1da177e4SLinus Torvalds unregister_filesystem(&tmpfs_fs_type); 2236*1da177e4SLinus Torvalds out2: 2237*1da177e4SLinus Torvalds destroy_inodecache(); 2238*1da177e4SLinus Torvalds out3: 2239*1da177e4SLinus Torvalds shm_mnt = ERR_PTR(error); 2240*1da177e4SLinus Torvalds return error; 2241*1da177e4SLinus Torvalds } 2242*1da177e4SLinus Torvalds module_init(init_tmpfs) 2243*1da177e4SLinus Torvalds 2244*1da177e4SLinus Torvalds /* 2245*1da177e4SLinus Torvalds * shmem_file_setup - get an unlinked file living in tmpfs 2246*1da177e4SLinus Torvalds * 2247*1da177e4SLinus Torvalds * @name: name for dentry (to be seen in /proc/<pid>/maps 2248*1da177e4SLinus Torvalds * @size: size to be set for the file 2249*1da177e4SLinus Torvalds * 2250*1da177e4SLinus Torvalds */ 2251*1da177e4SLinus Torvalds struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) 2252*1da177e4SLinus Torvalds { 2253*1da177e4SLinus Torvalds int error; 2254*1da177e4SLinus Torvalds struct file *file; 2255*1da177e4SLinus Torvalds struct inode *inode; 2256*1da177e4SLinus Torvalds struct dentry *dentry, *root; 2257*1da177e4SLinus Torvalds struct qstr this; 2258*1da177e4SLinus Torvalds 2259*1da177e4SLinus Torvalds if (IS_ERR(shm_mnt)) 2260*1da177e4SLinus Torvalds return (void *)shm_mnt; 2261*1da177e4SLinus Torvalds 2262*1da177e4SLinus Torvalds if (size < 0 || size > SHMEM_MAX_BYTES) 2263*1da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 2264*1da177e4SLinus Torvalds 2265*1da177e4SLinus Torvalds if (shmem_acct_size(flags, size)) 2266*1da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 2267*1da177e4SLinus Torvalds 2268*1da177e4SLinus Torvalds error = -ENOMEM; 2269*1da177e4SLinus Torvalds this.name = name; 2270*1da177e4SLinus Torvalds this.len = strlen(name); 2271*1da177e4SLinus Torvalds this.hash = 0; /* will go */ 2272*1da177e4SLinus Torvalds root = shm_mnt->mnt_root; 2273*1da177e4SLinus Torvalds dentry = d_alloc(root, &this); 2274*1da177e4SLinus Torvalds if (!dentry) 2275*1da177e4SLinus Torvalds goto put_memory; 2276*1da177e4SLinus Torvalds 2277*1da177e4SLinus Torvalds error = -ENFILE; 2278*1da177e4SLinus Torvalds file = get_empty_filp(); 2279*1da177e4SLinus Torvalds if (!file) 2280*1da177e4SLinus Torvalds goto put_dentry; 2281*1da177e4SLinus Torvalds 2282*1da177e4SLinus Torvalds error = -ENOSPC; 2283*1da177e4SLinus Torvalds inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); 2284*1da177e4SLinus Torvalds if (!inode) 2285*1da177e4SLinus Torvalds goto close_file; 2286*1da177e4SLinus Torvalds 2287*1da177e4SLinus Torvalds SHMEM_I(inode)->flags = flags & VM_ACCOUNT; 2288*1da177e4SLinus Torvalds d_instantiate(dentry, inode); 2289*1da177e4SLinus Torvalds inode->i_size = size; 2290*1da177e4SLinus Torvalds inode->i_nlink = 0; /* It is unlinked */ 2291*1da177e4SLinus Torvalds file->f_vfsmnt = mntget(shm_mnt); 2292*1da177e4SLinus Torvalds file->f_dentry = dentry; 2293*1da177e4SLinus Torvalds file->f_mapping = inode->i_mapping; 2294*1da177e4SLinus Torvalds file->f_op = &shmem_file_operations; 2295*1da177e4SLinus Torvalds file->f_mode = FMODE_WRITE | FMODE_READ; 2296*1da177e4SLinus Torvalds return file; 2297*1da177e4SLinus Torvalds 2298*1da177e4SLinus Torvalds close_file: 2299*1da177e4SLinus Torvalds put_filp(file); 2300*1da177e4SLinus Torvalds put_dentry: 2301*1da177e4SLinus Torvalds dput(dentry); 2302*1da177e4SLinus Torvalds put_memory: 2303*1da177e4SLinus Torvalds shmem_unacct_size(flags, size); 2304*1da177e4SLinus Torvalds return ERR_PTR(error); 2305*1da177e4SLinus Torvalds } 2306*1da177e4SLinus Torvalds 2307*1da177e4SLinus Torvalds /* 2308*1da177e4SLinus Torvalds * shmem_zero_setup - setup a shared anonymous mapping 2309*1da177e4SLinus Torvalds * 2310*1da177e4SLinus Torvalds * @vma: the vma to be mmapped is prepared by do_mmap_pgoff 2311*1da177e4SLinus Torvalds */ 2312*1da177e4SLinus Torvalds int shmem_zero_setup(struct vm_area_struct *vma) 2313*1da177e4SLinus Torvalds { 2314*1da177e4SLinus Torvalds struct file *file; 2315*1da177e4SLinus Torvalds loff_t size = vma->vm_end - vma->vm_start; 2316*1da177e4SLinus Torvalds 2317*1da177e4SLinus Torvalds file = shmem_file_setup("dev/zero", size, vma->vm_flags); 2318*1da177e4SLinus Torvalds if (IS_ERR(file)) 2319*1da177e4SLinus Torvalds return PTR_ERR(file); 2320*1da177e4SLinus Torvalds 2321*1da177e4SLinus Torvalds if (vma->vm_file) 2322*1da177e4SLinus Torvalds fput(vma->vm_file); 2323*1da177e4SLinus Torvalds vma->vm_file = file; 2324*1da177e4SLinus Torvalds vma->vm_ops = &shmem_vm_ops; 2325*1da177e4SLinus Torvalds return 0; 2326*1da177e4SLinus Torvalds } 2327