filemap.c (2f718ffc16c43a435d12919c75dbfad518abd056) filemap.c (afddba49d18f346e5cc2938b6ed7c512db18ca68)
1/*
2 * linux/mm/filemap.c
3 *
4 * Copyright (C) 1994-1999 Linus Torvalds
5 */
6
7/*
8 * This file handles the generic file mmap semantics used by

--- 1728 unchanged lines hidden (view full) ---

1737void iov_iter_advance(struct iov_iter *i, size_t bytes)
1738{
1739 BUG_ON(i->count < bytes);
1740
1741 __iov_iter_advance_iov(i, bytes);
1742 i->count -= bytes;
1743}
1744
1/*
2 * linux/mm/filemap.c
3 *
4 * Copyright (C) 1994-1999 Linus Torvalds
5 */
6
7/*
8 * This file handles the generic file mmap semantics used by

--- 1728 unchanged lines hidden (view full) ---

1737void iov_iter_advance(struct iov_iter *i, size_t bytes)
1738{
1739 BUG_ON(i->count < bytes);
1740
1741 __iov_iter_advance_iov(i, bytes);
1742 i->count -= bytes;
1743}
1744
1745int iov_iter_fault_in_readable(struct iov_iter *i)
1745/*
1746 * Fault in the first iovec of the given iov_iter, to a maximum length
1747 * of bytes. Returns 0 on success, or non-zero if the memory could not be
1748 * accessed (ie. because it is an invalid address).
1749 *
1750 * writev-intensive code may want this to prefault several iovecs -- that
1751 * would be possible (callers must not rely on the fact that _only_ the
1752 * first iovec will be faulted with the current implementation).
1753 */
1754int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
1746{
1755{
1747 size_t seglen = min(i->iov->iov_len - i->iov_offset, i->count);
1748 char __user *buf = i->iov->iov_base + i->iov_offset;
1756 char __user *buf = i->iov->iov_base + i->iov_offset;
1749 return fault_in_pages_readable(buf, seglen);
1757 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
1758 return fault_in_pages_readable(buf, bytes);
1750}
1751
1752/*
1753 * Return the count of just the current iov_iter segment.
1754 */
1755size_t iov_iter_single_seg_count(struct iov_iter *i)
1756{
1757 const struct iovec *iov = i->iov;

--- 80 unchanged lines hidden (view full) ---

1838#else
1839 return -EPERM;
1840#endif
1841 }
1842 return 0;
1843}
1844EXPORT_SYMBOL(generic_write_checks);
1845
1759}
1760
1761/*
1762 * Return the count of just the current iov_iter segment.
1763 */
1764size_t iov_iter_single_seg_count(struct iov_iter *i)
1765{
1766 const struct iovec *iov = i->iov;

--- 80 unchanged lines hidden (view full) ---

1847#else
1848 return -EPERM;
1849#endif
1850 }
1851 return 0;
1852}
1853EXPORT_SYMBOL(generic_write_checks);
1854
1855int pagecache_write_begin(struct file *file, struct address_space *mapping,
1856 loff_t pos, unsigned len, unsigned flags,
1857 struct page **pagep, void **fsdata)
1858{
1859 const struct address_space_operations *aops = mapping->a_ops;
1860
1861 if (aops->write_begin) {
1862 return aops->write_begin(file, mapping, pos, len, flags,
1863 pagep, fsdata);
1864 } else {
1865 int ret;
1866 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1867 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1868 struct inode *inode = mapping->host;
1869 struct page *page;
1870again:
1871 page = __grab_cache_page(mapping, index);
1872 *pagep = page;
1873 if (!page)
1874 return -ENOMEM;
1875
1876 if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
1877 /*
1878 * There is no way to resolve a short write situation
1879 * for a !Uptodate page (except by double copying in
1880 * the caller done by generic_perform_write_2copy).
1881 *
1882 * Instead, we have to bring it uptodate here.
1883 */
1884 ret = aops->readpage(file, page);
1885 page_cache_release(page);
1886 if (ret) {
1887 if (ret == AOP_TRUNCATED_PAGE)
1888 goto again;
1889 return ret;
1890 }
1891 goto again;
1892 }
1893
1894 ret = aops->prepare_write(file, page, offset, offset+len);
1895 if (ret) {
1896 if (ret != AOP_TRUNCATED_PAGE)
1897 unlock_page(page);
1898 page_cache_release(page);
1899 if (pos + len > inode->i_size)
1900 vmtruncate(inode, inode->i_size);
1901 if (ret == AOP_TRUNCATED_PAGE)
1902 goto again;
1903 }
1904 return ret;
1905 }
1906}
1907EXPORT_SYMBOL(pagecache_write_begin);
1908
1909int pagecache_write_end(struct file *file, struct address_space *mapping,
1910 loff_t pos, unsigned len, unsigned copied,
1911 struct page *page, void *fsdata)
1912{
1913 const struct address_space_operations *aops = mapping->a_ops;
1914 int ret;
1915
1916 if (aops->write_end) {
1917 mark_page_accessed(page);
1918 ret = aops->write_end(file, mapping, pos, len, copied,
1919 page, fsdata);
1920 } else {
1921 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1922 struct inode *inode = mapping->host;
1923
1924 flush_dcache_page(page);
1925 ret = aops->commit_write(file, page, offset, offset+len);
1926 unlock_page(page);
1927 mark_page_accessed(page);
1928 page_cache_release(page);
1929 BUG_ON(ret == AOP_TRUNCATED_PAGE); /* can't deal with */
1930
1931 if (ret < 0) {
1932 if (pos + len > inode->i_size)
1933 vmtruncate(inode, inode->i_size);
1934 } else if (ret > 0)
1935 ret = min_t(size_t, copied, ret);
1936 else
1937 ret = copied;
1938 }
1939
1940 return ret;
1941}
1942EXPORT_SYMBOL(pagecache_write_end);
1943
1846ssize_t
1847generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1848 unsigned long *nr_segs, loff_t pos, loff_t *ppos,
1849 size_t count, size_t ocount)
1850{
1851 struct file *file = iocb->ki_filp;
1852 struct address_space *mapping = file->f_mapping;
1853 struct inode *inode = mapping->host;

--- 27 unchanged lines hidden (view full) ---

1881 return written;
1882}
1883EXPORT_SYMBOL(generic_file_direct_write);
1884
1885/*
1886 * Find or create a page at the given pagecache position. Return the locked
1887 * page. This function is specifically for buffered writes.
1888 */
1944ssize_t
1945generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1946 unsigned long *nr_segs, loff_t pos, loff_t *ppos,
1947 size_t count, size_t ocount)
1948{
1949 struct file *file = iocb->ki_filp;
1950 struct address_space *mapping = file->f_mapping;
1951 struct inode *inode = mapping->host;

--- 27 unchanged lines hidden (view full) ---

1979 return written;
1980}
1981EXPORT_SYMBOL(generic_file_direct_write);
1982
1983/*
1984 * Find or create a page at the given pagecache position. Return the locked
1985 * page. This function is specifically for buffered writes.
1986 */
1889static struct page *__grab_cache_page(struct address_space *mapping,
1890 pgoff_t index)
1987struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
1891{
1892 int status;
1893 struct page *page;
1894repeat:
1895 page = find_lock_page(mapping, index);
1896 if (likely(page))
1897 return page;
1898

--- 4 unchanged lines hidden (view full) ---

1903 if (unlikely(status)) {
1904 page_cache_release(page);
1905 if (status == -EEXIST)
1906 goto repeat;
1907 return NULL;
1908 }
1909 return page;
1910}
1988{
1989 int status;
1990 struct page *page;
1991repeat:
1992 page = find_lock_page(mapping, index);
1993 if (likely(page))
1994 return page;
1995

--- 4 unchanged lines hidden (view full) ---

2000 if (unlikely(status)) {
2001 page_cache_release(page);
2002 if (status == -EEXIST)
2003 goto repeat;
2004 return NULL;
2005 }
2006 return page;
2007}
2008EXPORT_SYMBOL(__grab_cache_page);
1911
2009
1912ssize_t
1913generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1914 unsigned long nr_segs, loff_t pos, loff_t *ppos,
1915 size_t count, ssize_t written)
2010static ssize_t generic_perform_write_2copy(struct file *file,
2011 struct iov_iter *i, loff_t pos)
1916{
2012{
1917 struct file *file = iocb->ki_filp;
1918 struct address_space *mapping = file->f_mapping;
1919 const struct address_space_operations *a_ops = mapping->a_ops;
2013 struct address_space *mapping = file->f_mapping;
2014 const struct address_space_operations *a_ops = mapping->a_ops;
1920 struct inode *inode = mapping->host;
1921 long status = 0;
1922 struct iov_iter i;
2015 struct inode *inode = mapping->host;
2016 long status = 0;
2017 ssize_t written = 0;
1923
2018
1924 iov_iter_init(&i, iov, nr_segs, count, written);
1925
1926 do {
1927 struct page *src_page;
1928 struct page *page;
1929 pgoff_t index; /* Pagecache index for current page */
1930 unsigned long offset; /* Offset into pagecache page */
1931 unsigned long bytes; /* Bytes to write to page */
1932 size_t copied; /* Bytes copied from user */
1933
1934 offset = (pos & (PAGE_CACHE_SIZE - 1));
1935 index = pos >> PAGE_CACHE_SHIFT;
1936 bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
2019 do {
2020 struct page *src_page;
2021 struct page *page;
2022 pgoff_t index; /* Pagecache index for current page */
2023 unsigned long offset; /* Offset into pagecache page */
2024 unsigned long bytes; /* Bytes to write to page */
2025 size_t copied; /* Bytes copied from user */
2026
2027 offset = (pos & (PAGE_CACHE_SIZE - 1));
2028 index = pos >> PAGE_CACHE_SHIFT;
2029 bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
1937 iov_iter_count(&i));
2030 iov_iter_count(i));
1938
1939 /*
1940 * a non-NULL src_page indicates that we're doing the
1941 * copy via get_user_pages and kmap.
1942 */
1943 src_page = NULL;
1944
1945 /*
1946 * Bring in the user page that we will copy from _first_.
1947 * Otherwise there's a nasty deadlock on copying from the
1948 * same page as we're writing to, without it being marked
1949 * up-to-date.
1950 *
1951 * Not only is this an optimisation, but it is also required
1952 * to check that the address is actually valid, when atomic
1953 * usercopies are used, below.
1954 */
2031
2032 /*
2033 * a non-NULL src_page indicates that we're doing the
2034 * copy via get_user_pages and kmap.
2035 */
2036 src_page = NULL;
2037
2038 /*
2039 * Bring in the user page that we will copy from _first_.
2040 * Otherwise there's a nasty deadlock on copying from the
2041 * same page as we're writing to, without it being marked
2042 * up-to-date.
2043 *
2044 * Not only is this an optimisation, but it is also required
2045 * to check that the address is actually valid, when atomic
2046 * usercopies are used, below.
2047 */
1955 if (unlikely(iov_iter_fault_in_readable(&i))) {
2048 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
1956 status = -EFAULT;
1957 break;
1958 }
1959
1960 page = __grab_cache_page(mapping, index);
1961 if (!page) {
1962 status = -ENOMEM;
1963 break;

--- 14 unchanged lines hidden (view full) ---

1978 break;
1979 }
1980
1981 /*
1982 * Cannot get_user_pages with a page locked for the
1983 * same reason as we can't take a page fault with a
1984 * page locked (as explained below).
1985 */
2049 status = -EFAULT;
2050 break;
2051 }
2052
2053 page = __grab_cache_page(mapping, index);
2054 if (!page) {
2055 status = -ENOMEM;
2056 break;

--- 14 unchanged lines hidden (view full) ---

2071 break;
2072 }
2073
2074 /*
2075 * Cannot get_user_pages with a page locked for the
2076 * same reason as we can't take a page fault with a
2077 * page locked (as explained below).
2078 */
1986 copied = iov_iter_copy_from_user(src_page, &i,
2079 copied = iov_iter_copy_from_user(src_page, i,
1987 offset, bytes);
1988 if (unlikely(copied == 0)) {
1989 status = -EFAULT;
1990 page_cache_release(page);
1991 page_cache_release(src_page);
1992 break;
1993 }
1994 bytes = copied;

--- 8 unchanged lines hidden (view full) ---

2003 * consistent.
2004 */
2005 if (unlikely(!page->mapping || PageUptodate(page))) {
2006 unlock_page(page);
2007 page_cache_release(page);
2008 page_cache_release(src_page);
2009 continue;
2010 }
2080 offset, bytes);
2081 if (unlikely(copied == 0)) {
2082 status = -EFAULT;
2083 page_cache_release(page);
2084 page_cache_release(src_page);
2085 break;
2086 }
2087 bytes = copied;

--- 8 unchanged lines hidden (view full) ---

2096 * consistent.
2097 */
2098 if (unlikely(!page->mapping || PageUptodate(page))) {
2099 unlock_page(page);
2100 page_cache_release(page);
2101 page_cache_release(src_page);
2102 continue;
2103 }
2011
2012 }
2013
2014 status = a_ops->prepare_write(file, page, offset, offset+bytes);
2015 if (unlikely(status))
2016 goto fs_write_aop_error;
2017
2018 if (!src_page) {
2019 /*

--- 5 unchanged lines hidden (view full) ---

2025 * preempt count, and use _atomic usercopies.
2026 *
2027 * The page is uptodate so we are OK to encounter a
2028 * short copy: if unmodified parts of the page are
2029 * marked dirty and written out to disk, it doesn't
2030 * really matter.
2031 */
2032 pagefault_disable();
2104 }
2105
2106 status = a_ops->prepare_write(file, page, offset, offset+bytes);
2107 if (unlikely(status))
2108 goto fs_write_aop_error;
2109
2110 if (!src_page) {
2111 /*

--- 5 unchanged lines hidden (view full) ---

2117 * preempt count, and use _atomic usercopies.
2118 *
2119 * The page is uptodate so we are OK to encounter a
2120 * short copy: if unmodified parts of the page are
2121 * marked dirty and written out to disk, it doesn't
2122 * really matter.
2123 */
2124 pagefault_disable();
2033 copied = iov_iter_copy_from_user_atomic(page, &i,
2125 copied = iov_iter_copy_from_user_atomic(page, i,
2034 offset, bytes);
2035 pagefault_enable();
2036 } else {
2037 void *src, *dst;
2038 src = kmap_atomic(src_page, KM_USER0);
2039 dst = kmap_atomic(page, KM_USER1);
2040 memcpy(dst + offset, src + offset, bytes);
2041 kunmap_atomic(dst, KM_USER1);

--- 9 unchanged lines hidden (view full) ---

2051 copied = min_t(size_t, copied, status);
2052
2053 unlock_page(page);
2054 mark_page_accessed(page);
2055 page_cache_release(page);
2056 if (src_page)
2057 page_cache_release(src_page);
2058
2126 offset, bytes);
2127 pagefault_enable();
2128 } else {
2129 void *src, *dst;
2130 src = kmap_atomic(src_page, KM_USER0);
2131 dst = kmap_atomic(page, KM_USER1);
2132 memcpy(dst + offset, src + offset, bytes);
2133 kunmap_atomic(dst, KM_USER1);

--- 9 unchanged lines hidden (view full) ---

2143 copied = min_t(size_t, copied, status);
2144
2145 unlock_page(page);
2146 mark_page_accessed(page);
2147 page_cache_release(page);
2148 if (src_page)
2149 page_cache_release(src_page);
2150
2059 iov_iter_advance(&i, copied);
2060 written += copied;
2151 iov_iter_advance(i, copied);
2061 pos += copied;
2152 pos += copied;
2153 written += copied;
2062
2063 balance_dirty_pages_ratelimited(mapping);
2064 cond_resched();
2065 continue;
2066
2067fs_write_aop_error:
2068 if (status != AOP_TRUNCATED_PAGE)
2069 unlock_page(page);

--- 7 unchanged lines hidden (view full) ---

2077 * i_size_read because we hold i_mutex.
2078 */
2079 if (pos + bytes > inode->i_size)
2080 vmtruncate(inode, inode->i_size);
2081 if (status == AOP_TRUNCATED_PAGE)
2082 continue;
2083 else
2084 break;
2154
2155 balance_dirty_pages_ratelimited(mapping);
2156 cond_resched();
2157 continue;
2158
2159fs_write_aop_error:
2160 if (status != AOP_TRUNCATED_PAGE)
2161 unlock_page(page);

--- 7 unchanged lines hidden (view full) ---

2169 * i_size_read because we hold i_mutex.
2170 */
2171 if (pos + bytes > inode->i_size)
2172 vmtruncate(inode, inode->i_size);
2173 if (status == AOP_TRUNCATED_PAGE)
2174 continue;
2175 else
2176 break;
2085 } while (iov_iter_count(&i));
2086 *ppos = pos;
2177 } while (iov_iter_count(i));
2087
2178
2088 /*
2089 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
2090 */
2179 return written ? written : status;
2180}
2181
2182static ssize_t generic_perform_write(struct file *file,
2183 struct iov_iter *i, loff_t pos)
2184{
2185 struct address_space *mapping = file->f_mapping;
2186 const struct address_space_operations *a_ops = mapping->a_ops;
2187 long status = 0;
2188 ssize_t written = 0;
2189
2190 do {
2191 struct page *page;
2192 pgoff_t index; /* Pagecache index for current page */
2193 unsigned long offset; /* Offset into pagecache page */
2194 unsigned long bytes; /* Bytes to write to page */
2195 size_t copied; /* Bytes copied from user */
2196 void *fsdata;
2197
2198 offset = (pos & (PAGE_CACHE_SIZE - 1));
2199 index = pos >> PAGE_CACHE_SHIFT;
2200 bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
2201 iov_iter_count(i));
2202
2203again:
2204
2205 /*
2206 * Bring in the user page that we will copy from _first_.
2207 * Otherwise there's a nasty deadlock on copying from the
2208 * same page as we're writing to, without it being marked
2209 * up-to-date.
2210 *
2211 * Not only is this an optimisation, but it is also required
2212 * to check that the address is actually valid, when atomic
2213 * usercopies are used, below.
2214 */
2215 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
2216 status = -EFAULT;
2217 break;
2218 }
2219
2220 status = a_ops->write_begin(file, mapping, pos, bytes, 0,
2221 &page, &fsdata);
2222 if (unlikely(status))
2223 break;
2224
2225 pagefault_disable();
2226 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2227 pagefault_enable();
2228 flush_dcache_page(page);
2229
2230 status = a_ops->write_end(file, mapping, pos, bytes, copied,
2231 page, fsdata);
2232 if (unlikely(status < 0))
2233 break;
2234 copied = status;
2235
2236 cond_resched();
2237
2238 if (unlikely(copied == 0)) {
2239 /*
2240 * If we were unable to copy any data at all, we must
2241 * fall back to a single segment length write.
2242 *
2243 * If we didn't fallback here, we could livelock
2244 * because not all segments in the iov can be copied at
2245 * once without a pagefault.
2246 */
2247 bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
2248 iov_iter_single_seg_count(i));
2249 goto again;
2250 }
2251 iov_iter_advance(i, copied);
2252 pos += copied;
2253 written += copied;
2254
2255 balance_dirty_pages_ratelimited(mapping);
2256
2257 } while (iov_iter_count(i));
2258
2259 return written ? written : status;
2260}
2261
2262ssize_t
2263generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2264 unsigned long nr_segs, loff_t pos, loff_t *ppos,
2265 size_t count, ssize_t written)
2266{
2267 struct file *file = iocb->ki_filp;
2268 struct address_space *mapping = file->f_mapping;
2269 const struct address_space_operations *a_ops = mapping->a_ops;
2270 struct inode *inode = mapping->host;
2271 ssize_t status;
2272 struct iov_iter i;
2273
2274 iov_iter_init(&i, iov, nr_segs, count, written);
2275 if (a_ops->write_begin)
2276 status = generic_perform_write(file, &i, pos);
2277 else
2278 status = generic_perform_write_2copy(file, &i, pos);
2279
2091 if (likely(status >= 0)) {
2280 if (likely(status >= 0)) {
2281 written += status;
2282 *ppos = pos + status;
2283
2284 /*
2285 * For now, when the user asks for O_SYNC, we'll actually give
2286 * O_DSYNC
2287 */
2092 if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2093 if (!a_ops->writepage || !is_sync_kiocb(iocb))
2094 status = generic_osync_inode(inode, mapping,
2095 OSYNC_METADATA|OSYNC_DATA);
2096 }
2097 }
2098
2099 /*

--- 254 unchanged lines hidden ---
2288 if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2289 if (!a_ops->writepage || !is_sync_kiocb(iocb))
2290 status = generic_osync_inode(inode, mapping,
2291 OSYNC_METADATA|OSYNC_DATA);
2292 }
2293 }
2294
2295 /*

--- 254 unchanged lines hidden ---