1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * linux/mm/page_io.c
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Swap reorganised 29.12.95,
81da177e4SLinus Torvalds * Asynchronous swapping added 30.12.95. Stephen Tweedie
91da177e4SLinus Torvalds * Removed race in async swapping. 14.4.1996. Bruno Haible
101da177e4SLinus Torvalds * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
111da177e4SLinus Torvalds * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
121da177e4SLinus Torvalds */
131da177e4SLinus Torvalds
141da177e4SLinus Torvalds #include <linux/mm.h>
151da177e4SLinus Torvalds #include <linux/kernel_stat.h>
165a0e3ad6STejun Heo #include <linux/gfp.h>
171da177e4SLinus Torvalds #include <linux/pagemap.h>
181da177e4SLinus Torvalds #include <linux/swap.h>
191da177e4SLinus Torvalds #include <linux/bio.h>
201da177e4SLinus Torvalds #include <linux/swapops.h>
211da177e4SLinus Torvalds #include <linux/writeback.h>
22b430e9d1SMinchan Kim #include <linux/blkdev.h>
2393779069SMinchan Kim #include <linux/psi.h>
24e2e40f2cSChristoph Hellwig #include <linux/uio.h>
25b0ba2d0fSTetsuo Handa #include <linux/sched/task.h>
26a3d5dc90SYang Yang #include <linux/delayacct.h>
2742c06a0eSJohannes Weiner #include <linux/zswap.h>
28014bb1deSNeilBrown #include "swap.h"
291da177e4SLinus Torvalds
__end_swap_bio_write(struct bio * bio)303222d8c2SChristoph Hellwig static void __end_swap_bio_write(struct bio *bio)
311da177e4SLinus Torvalds {
32a3ed1e9bSZhangPeng struct folio *folio = bio_first_folio_all(bio);
331da177e4SLinus Torvalds
344e4cbee9SChristoph Hellwig if (bio->bi_status) {
356ddab3b9SPeter Zijlstra /*
366ddab3b9SPeter Zijlstra * We failed to write the page out to swap-space.
376ddab3b9SPeter Zijlstra * Re-dirty the page in order to avoid it being reclaimed.
386ddab3b9SPeter Zijlstra * Also print a dire warning that things will go BAD (tm)
396ddab3b9SPeter Zijlstra * very quickly.
406ddab3b9SPeter Zijlstra *
41575ced1cSMatthew Wilcox (Oracle) * Also clear PG_reclaim to avoid folio_rotate_reclaimable()
426ddab3b9SPeter Zijlstra */
43a3ed1e9bSZhangPeng folio_mark_dirty(folio);
4425eaab43SGeorgi Djakov pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
4574d46992SChristoph Hellwig MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
464f024f37SKent Overstreet (unsigned long long)bio->bi_iter.bi_sector);
47a3ed1e9bSZhangPeng folio_clear_reclaim(folio);
486ddab3b9SPeter Zijlstra }
49a3ed1e9bSZhangPeng folio_end_writeback(folio);
503222d8c2SChristoph Hellwig }
513222d8c2SChristoph Hellwig
end_swap_bio_write(struct bio * bio)523222d8c2SChristoph Hellwig static void end_swap_bio_write(struct bio *bio)
533222d8c2SChristoph Hellwig {
543222d8c2SChristoph Hellwig __end_swap_bio_write(bio);
551da177e4SLinus Torvalds bio_put(bio);
561da177e4SLinus Torvalds }
571da177e4SLinus Torvalds
__end_swap_bio_read(struct bio * bio)589b4e30bdSChristoph Hellwig static void __end_swap_bio_read(struct bio *bio)
593f2b1a04SMinchan Kim {
60bc74b53fSZhangPeng struct folio *folio = bio_first_folio_all(bio);
613f2b1a04SMinchan Kim
624e4cbee9SChristoph Hellwig if (bio->bi_status) {
6325eaab43SGeorgi Djakov pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
6474d46992SChristoph Hellwig MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
653f2b1a04SMinchan Kim (unsigned long long)bio->bi_iter.bi_sector);
669b4e30bdSChristoph Hellwig } else {
67bc74b53fSZhangPeng folio_mark_uptodate(folio);
689b4e30bdSChristoph Hellwig }
69bc74b53fSZhangPeng folio_unlock(folio);
70b430e9d1SMinchan Kim }
71b430e9d1SMinchan Kim
end_swap_bio_read(struct bio * bio)729b4e30bdSChristoph Hellwig static void end_swap_bio_read(struct bio *bio)
739b4e30bdSChristoph Hellwig {
749b4e30bdSChristoph Hellwig __end_swap_bio_read(bio);
751da177e4SLinus Torvalds bio_put(bio);
7687518530SOleg Nesterov }
771da177e4SLinus Torvalds
generic_swapfile_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)78a509bc1aSMel Gorman int generic_swapfile_activate(struct swap_info_struct *sis,
79a509bc1aSMel Gorman struct file *swap_file,
80a509bc1aSMel Gorman sector_t *span)
81a509bc1aSMel Gorman {
82a509bc1aSMel Gorman struct address_space *mapping = swap_file->f_mapping;
83a509bc1aSMel Gorman struct inode *inode = mapping->host;
84a509bc1aSMel Gorman unsigned blocks_per_page;
85a509bc1aSMel Gorman unsigned long page_no;
86a509bc1aSMel Gorman unsigned blkbits;
87a509bc1aSMel Gorman sector_t probe_block;
88a509bc1aSMel Gorman sector_t last_block;
89a509bc1aSMel Gorman sector_t lowest_block = -1;
90a509bc1aSMel Gorman sector_t highest_block = 0;
91a509bc1aSMel Gorman int nr_extents = 0;
92a509bc1aSMel Gorman int ret;
93a509bc1aSMel Gorman
94a509bc1aSMel Gorman blkbits = inode->i_blkbits;
95a509bc1aSMel Gorman blocks_per_page = PAGE_SIZE >> blkbits;
96a509bc1aSMel Gorman
97a509bc1aSMel Gorman /*
984efaceb1SAaron Lu * Map all the blocks into the extent tree. This code doesn't try
99a509bc1aSMel Gorman * to be very smart.
100a509bc1aSMel Gorman */
101a509bc1aSMel Gorman probe_block = 0;
102a509bc1aSMel Gorman page_no = 0;
103a509bc1aSMel Gorman last_block = i_size_read(inode) >> blkbits;
104a509bc1aSMel Gorman while ((probe_block + blocks_per_page) <= last_block &&
105a509bc1aSMel Gorman page_no < sis->max) {
106a509bc1aSMel Gorman unsigned block_in_page;
107a509bc1aSMel Gorman sector_t first_block;
108a509bc1aSMel Gorman
1097e4411bfSMikulas Patocka cond_resched();
1107e4411bfSMikulas Patocka
11130460e1eSCarlos Maiolino first_block = probe_block;
11230460e1eSCarlos Maiolino ret = bmap(inode, &first_block);
11330460e1eSCarlos Maiolino if (ret || !first_block)
114a509bc1aSMel Gorman goto bad_bmap;
115a509bc1aSMel Gorman
116a509bc1aSMel Gorman /*
117a509bc1aSMel Gorman * It must be PAGE_SIZE aligned on-disk
118a509bc1aSMel Gorman */
119a509bc1aSMel Gorman if (first_block & (blocks_per_page - 1)) {
120a509bc1aSMel Gorman probe_block++;
121a509bc1aSMel Gorman goto reprobe;
122a509bc1aSMel Gorman }
123a509bc1aSMel Gorman
124a509bc1aSMel Gorman for (block_in_page = 1; block_in_page < blocks_per_page;
125a509bc1aSMel Gorman block_in_page++) {
126a509bc1aSMel Gorman sector_t block;
127a509bc1aSMel Gorman
12830460e1eSCarlos Maiolino block = probe_block + block_in_page;
12930460e1eSCarlos Maiolino ret = bmap(inode, &block);
13030460e1eSCarlos Maiolino if (ret || !block)
131a509bc1aSMel Gorman goto bad_bmap;
13230460e1eSCarlos Maiolino
133a509bc1aSMel Gorman if (block != first_block + block_in_page) {
134a509bc1aSMel Gorman /* Discontiguity */
135a509bc1aSMel Gorman probe_block++;
136a509bc1aSMel Gorman goto reprobe;
137a509bc1aSMel Gorman }
138a509bc1aSMel Gorman }
139a509bc1aSMel Gorman
140a509bc1aSMel Gorman first_block >>= (PAGE_SHIFT - blkbits);
141a509bc1aSMel Gorman if (page_no) { /* exclude the header page */
142a509bc1aSMel Gorman if (first_block < lowest_block)
143a509bc1aSMel Gorman lowest_block = first_block;
144a509bc1aSMel Gorman if (first_block > highest_block)
145a509bc1aSMel Gorman highest_block = first_block;
146a509bc1aSMel Gorman }
147a509bc1aSMel Gorman
148a509bc1aSMel Gorman /*
149a509bc1aSMel Gorman * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
150a509bc1aSMel Gorman */
151a509bc1aSMel Gorman ret = add_swap_extent(sis, page_no, 1, first_block);
152a509bc1aSMel Gorman if (ret < 0)
153a509bc1aSMel Gorman goto out;
154a509bc1aSMel Gorman nr_extents += ret;
155a509bc1aSMel Gorman page_no++;
156a509bc1aSMel Gorman probe_block += blocks_per_page;
157a509bc1aSMel Gorman reprobe:
158a509bc1aSMel Gorman continue;
159a509bc1aSMel Gorman }
160a509bc1aSMel Gorman ret = nr_extents;
161a509bc1aSMel Gorman *span = 1 + highest_block - lowest_block;
162a509bc1aSMel Gorman if (page_no == 0)
163a509bc1aSMel Gorman page_no = 1; /* force Empty message */
164a509bc1aSMel Gorman sis->max = page_no;
165a509bc1aSMel Gorman sis->pages = page_no - 1;
166a509bc1aSMel Gorman sis->highest_bit = page_no - 1;
167a509bc1aSMel Gorman out:
168a509bc1aSMel Gorman return ret;
169a509bc1aSMel Gorman bad_bmap:
1701170532bSJoe Perches pr_err("swapon: swapfile has holes\n");
171a509bc1aSMel Gorman ret = -EINVAL;
172a509bc1aSMel Gorman goto out;
173a509bc1aSMel Gorman }
174a509bc1aSMel Gorman
1751da177e4SLinus Torvalds /*
1761da177e4SLinus Torvalds * We may have stale swap cache pages in memory: notice
1771da177e4SLinus Torvalds * them here and get rid of the unnecessary final write.
1781da177e4SLinus Torvalds */
swap_writepage(struct page * page,struct writeback_control * wbc)1791da177e4SLinus Torvalds int swap_writepage(struct page *page, struct writeback_control *wbc)
1801da177e4SLinus Torvalds {
18171fa1a53SMatthew Wilcox (Oracle) struct folio *folio = page_folio(page);
182e3e2762bSChristoph Hellwig int ret;
1831da177e4SLinus Torvalds
18471fa1a53SMatthew Wilcox (Oracle) if (folio_free_swap(folio)) {
18571fa1a53SMatthew Wilcox (Oracle) folio_unlock(folio);
186e3e2762bSChristoph Hellwig return 0;
1871da177e4SLinus Torvalds }
1888a84802eSSteven Price /*
1898a84802eSSteven Price * Arch code may have to preserve more data than just the page
1908a84802eSSteven Price * contents, e.g. memory tags.
1918a84802eSSteven Price */
19271fa1a53SMatthew Wilcox (Oracle) ret = arch_prepare_to_swap(&folio->page);
1938a84802eSSteven Price if (ret) {
19471fa1a53SMatthew Wilcox (Oracle) folio_mark_dirty(folio);
19571fa1a53SMatthew Wilcox (Oracle) folio_unlock(folio);
196e3e2762bSChristoph Hellwig return ret;
1978a84802eSSteven Price }
19834f4c198SMatthew Wilcox (Oracle) if (zswap_store(folio)) {
19971fa1a53SMatthew Wilcox (Oracle) folio_start_writeback(folio);
20071fa1a53SMatthew Wilcox (Oracle) folio_unlock(folio);
20171fa1a53SMatthew Wilcox (Oracle) folio_end_writeback(folio);
202e3e2762bSChristoph Hellwig return 0;
20338b5faf4SDan Magenheimer }
204e3e2762bSChristoph Hellwig __swap_writepage(&folio->page, wbc);
205e3e2762bSChristoph Hellwig return 0;
2062f772e6cSSeth Jennings }
2072f772e6cSSeth Jennings
count_swpout_vm_event(struct folio * folio)2089b72b134SZhangPeng static inline void count_swpout_vm_event(struct folio *folio)
209225311a4SHuang Ying {
210225311a4SHuang Ying #ifdef CONFIG_TRANSPARENT_HUGEPAGE
2119b72b134SZhangPeng if (unlikely(folio_test_pmd_mappable(folio)))
212225311a4SHuang Ying count_vm_event(THP_SWPOUT);
213225311a4SHuang Ying #endif
2149b72b134SZhangPeng count_vm_events(PSWPOUT, folio_nr_pages(folio));
215225311a4SHuang Ying }
216225311a4SHuang Ying
217a18b9b15SChristoph Hellwig #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
bio_associate_blkg_from_page(struct bio * bio,struct folio * folio)21898630cfdSZhangPeng static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio)
219a18b9b15SChristoph Hellwig {
220a18b9b15SChristoph Hellwig struct cgroup_subsys_state *css;
221bcfe06bfSRoman Gushchin struct mem_cgroup *memcg;
222a18b9b15SChristoph Hellwig
22398630cfdSZhangPeng memcg = folio_memcg(folio);
224bcfe06bfSRoman Gushchin if (!memcg)
225a18b9b15SChristoph Hellwig return;
226a18b9b15SChristoph Hellwig
227a18b9b15SChristoph Hellwig rcu_read_lock();
228bcfe06bfSRoman Gushchin css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys);
229a18b9b15SChristoph Hellwig bio_associate_blkg_from_css(bio, css);
230a18b9b15SChristoph Hellwig rcu_read_unlock();
231a18b9b15SChristoph Hellwig }
232a18b9b15SChristoph Hellwig #else
23398630cfdSZhangPeng #define bio_associate_blkg_from_page(bio, folio) do { } while (0)
234a18b9b15SChristoph Hellwig #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
235a18b9b15SChristoph Hellwig
236e1209d3aSNeilBrown struct swap_iocb {
237e1209d3aSNeilBrown struct kiocb iocb;
2385169b844SNeilBrown struct bio_vec bvec[SWAP_CLUSTER_MAX];
2395169b844SNeilBrown int pages;
240a1a0dfd5SNeilBrown int len;
24162c230bcSMel Gorman };
242e1209d3aSNeilBrown static mempool_t *sio_pool;
24362c230bcSMel Gorman
sio_pool_init(void)244e1209d3aSNeilBrown int sio_pool_init(void)
245e1209d3aSNeilBrown {
246e1209d3aSNeilBrown if (!sio_pool) {
247e1209d3aSNeilBrown mempool_t *pool = mempool_create_kmalloc_pool(
248e1209d3aSNeilBrown SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));
249e1209d3aSNeilBrown if (cmpxchg(&sio_pool, NULL, pool))
250e1209d3aSNeilBrown mempool_destroy(pool);
251e1209d3aSNeilBrown }
252e1209d3aSNeilBrown if (!sio_pool)
253e1209d3aSNeilBrown return -ENOMEM;
254e1209d3aSNeilBrown return 0;
255e1209d3aSNeilBrown }
25662c230bcSMel Gorman
sio_write_complete(struct kiocb * iocb,long ret)2577eadabc0SNeilBrown static void sio_write_complete(struct kiocb *iocb, long ret)
2581da177e4SLinus Torvalds {
2597eadabc0SNeilBrown struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
2605169b844SNeilBrown struct page *page = sio->bvec[0].bv_page;
2612282679fSNeilBrown int p;
2621da177e4SLinus Torvalds
263a1a0dfd5SNeilBrown if (ret != sio->len) {
2640cdc444aSMel Gorman /*
2650cdc444aSMel Gorman * In the case of swap-over-nfs, this can be a
2660cdc444aSMel Gorman * temporary failure if the system has limited
2670cdc444aSMel Gorman * memory for allocating transmit buffers.
2680cdc444aSMel Gorman * Mark the page dirty and avoid
269575ced1cSMatthew Wilcox (Oracle) * folio_rotate_reclaimable but rate-limit the
2700cdc444aSMel Gorman * messages but do not flag PageError like
2710cdc444aSMel Gorman * the normal direct-to-bio case as it could
2720cdc444aSMel Gorman * be temporary.
2730cdc444aSMel Gorman */
2747eadabc0SNeilBrown pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
2757eadabc0SNeilBrown ret, page_file_offset(page));
2762282679fSNeilBrown for (p = 0; p < sio->pages; p++) {
2772282679fSNeilBrown page = sio->bvec[p].bv_page;
2782d30d31eSJerome Marchand set_page_dirty(page);
2790cdc444aSMel Gorman ClearPageReclaim(page);
28062c230bcSMel Gorman }
2816341a446SNeilBrown } else {
2826341a446SNeilBrown for (p = 0; p < sio->pages; p++)
2839b72b134SZhangPeng count_swpout_vm_event(page_folio(sio->bvec[p].bv_page));
28462c230bcSMel Gorman }
28562c230bcSMel Gorman
2862282679fSNeilBrown for (p = 0; p < sio->pages; p++)
2872282679fSNeilBrown end_page_writeback(sio->bvec[p].bv_page);
2882282679fSNeilBrown
2897eadabc0SNeilBrown mempool_free(sio, sio_pool);
2907eadabc0SNeilBrown }
2917eadabc0SNeilBrown
swap_writepage_fs(struct page * page,struct writeback_control * wbc)292e3e2762bSChristoph Hellwig static void swap_writepage_fs(struct page *page, struct writeback_control *wbc)
2937eadabc0SNeilBrown {
2942282679fSNeilBrown struct swap_iocb *sio = NULL;
2957eadabc0SNeilBrown struct swap_info_struct *sis = page_swap_info(page);
2967eadabc0SNeilBrown struct file *swap_file = sis->swap_file;
2972282679fSNeilBrown loff_t pos = page_file_offset(page);
2987eadabc0SNeilBrown
2997eadabc0SNeilBrown set_page_writeback(page);
3007eadabc0SNeilBrown unlock_page(page);
3012282679fSNeilBrown if (wbc->swap_plug)
3022282679fSNeilBrown sio = *wbc->swap_plug;
3032282679fSNeilBrown if (sio) {
3042282679fSNeilBrown if (sio->iocb.ki_filp != swap_file ||
305a1a0dfd5SNeilBrown sio->iocb.ki_pos + sio->len != pos) {
3062282679fSNeilBrown swap_write_unplug(sio);
3072282679fSNeilBrown sio = NULL;
3082282679fSNeilBrown }
3092282679fSNeilBrown }
3102282679fSNeilBrown if (!sio) {
3117eadabc0SNeilBrown sio = mempool_alloc(sio_pool, GFP_NOIO);
3127eadabc0SNeilBrown init_sync_kiocb(&sio->iocb, swap_file);
3137eadabc0SNeilBrown sio->iocb.ki_complete = sio_write_complete;
3142282679fSNeilBrown sio->iocb.ki_pos = pos;
3152282679fSNeilBrown sio->pages = 0;
316a1a0dfd5SNeilBrown sio->len = 0;
3172282679fSNeilBrown }
3188976fa6dSChristoph Hellwig bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0);
319a1a0dfd5SNeilBrown sio->len += thp_size(page);
3202282679fSNeilBrown sio->pages += 1;
3212282679fSNeilBrown if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) {
3222282679fSNeilBrown swap_write_unplug(sio);
3232282679fSNeilBrown sio = NULL;
3242282679fSNeilBrown }
3252282679fSNeilBrown if (wbc->swap_plug)
3262282679fSNeilBrown *wbc->swap_plug = sio;
3271da177e4SLinus Torvalds }
3281da177e4SLinus Torvalds
swap_writepage_bdev_sync(struct page * page,struct writeback_control * wbc,struct swap_info_struct * sis)3293222d8c2SChristoph Hellwig static void swap_writepage_bdev_sync(struct page *page,
3303222d8c2SChristoph Hellwig struct writeback_control *wbc, struct swap_info_struct *sis)
3313222d8c2SChristoph Hellwig {
3323222d8c2SChristoph Hellwig struct bio_vec bv;
3333222d8c2SChristoph Hellwig struct bio bio;
334f54fcaabSZhangPeng struct folio *folio = page_folio(page);
3353222d8c2SChristoph Hellwig
3363222d8c2SChristoph Hellwig bio_init(&bio, sis->bdev, &bv, 1,
3373222d8c2SChristoph Hellwig REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc));
3383222d8c2SChristoph Hellwig bio.bi_iter.bi_sector = swap_page_sector(page);
339cb58bf91SJohannes Thumshirn __bio_add_page(&bio, page, thp_size(page), 0);
3403222d8c2SChristoph Hellwig
34198630cfdSZhangPeng bio_associate_blkg_from_page(&bio, folio);
3429b72b134SZhangPeng count_swpout_vm_event(folio);
3433222d8c2SChristoph Hellwig
344f54fcaabSZhangPeng folio_start_writeback(folio);
345f54fcaabSZhangPeng folio_unlock(folio);
3463222d8c2SChristoph Hellwig
3473222d8c2SChristoph Hellwig submit_bio_wait(&bio);
3483222d8c2SChristoph Hellwig __end_swap_bio_write(&bio);
3493222d8c2SChristoph Hellwig }
3503222d8c2SChristoph Hellwig
swap_writepage_bdev_async(struct page * page,struct writeback_control * wbc,struct swap_info_struct * sis)3513222d8c2SChristoph Hellwig static void swap_writepage_bdev_async(struct page *page,
35205cda97eSChristoph Hellwig struct writeback_control *wbc, struct swap_info_struct *sis)
3537eadabc0SNeilBrown {
3547eadabc0SNeilBrown struct bio *bio;
3552675251dSZhangPeng struct folio *folio = page_folio(page);
356dd6bd0d9SMatthew Wilcox
35707888c66SChristoph Hellwig bio = bio_alloc(sis->bdev, 1,
35807888c66SChristoph Hellwig REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
35907888c66SChristoph Hellwig GFP_NOIO);
36048d15436SChristoph Hellwig bio->bi_iter.bi_sector = swap_page_sector(page);
361cf1e3fe4SChristoph Hellwig bio->bi_end_io = end_swap_bio_write;
362cb58bf91SJohannes Thumshirn __bio_add_page(bio, page, thp_size(page), 0);
36348d15436SChristoph Hellwig
36498630cfdSZhangPeng bio_associate_blkg_from_page(bio, folio);
3659b72b134SZhangPeng count_swpout_vm_event(folio);
3662675251dSZhangPeng folio_start_writeback(folio);
3672675251dSZhangPeng folio_unlock(folio);
3684e49ea4aSMike Christie submit_bio(bio);
3691da177e4SLinus Torvalds }
3701da177e4SLinus Torvalds
__swap_writepage(struct page * page,struct writeback_control * wbc)37105cda97eSChristoph Hellwig void __swap_writepage(struct page *page, struct writeback_control *wbc)
37205cda97eSChristoph Hellwig {
37305cda97eSChristoph Hellwig struct swap_info_struct *sis = page_swap_info(page);
37405cda97eSChristoph Hellwig
37505cda97eSChristoph Hellwig VM_BUG_ON_PAGE(!PageSwapCache(page), page);
37605cda97eSChristoph Hellwig /*
37705cda97eSChristoph Hellwig * ->flags can be updated non-atomicially (scan_swap_map_slots),
37805cda97eSChristoph Hellwig * but that will never affect SWP_FS_OPS, so the data_race
37905cda97eSChristoph Hellwig * is safe.
38005cda97eSChristoph Hellwig */
38105cda97eSChristoph Hellwig if (data_race(sis->flags & SWP_FS_OPS))
38205cda97eSChristoph Hellwig swap_writepage_fs(page, wbc);
3833222d8c2SChristoph Hellwig else if (sis->flags & SWP_SYNCHRONOUS_IO)
3843222d8c2SChristoph Hellwig swap_writepage_bdev_sync(page, wbc, sis);
38505cda97eSChristoph Hellwig else
3863222d8c2SChristoph Hellwig swap_writepage_bdev_async(page, wbc, sis);
3871da177e4SLinus Torvalds }
3881da177e4SLinus Torvalds
swap_write_unplug(struct swap_iocb * sio)3892282679fSNeilBrown void swap_write_unplug(struct swap_iocb *sio)
3902282679fSNeilBrown {
3912282679fSNeilBrown struct iov_iter from;
3922282679fSNeilBrown struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
3932282679fSNeilBrown int ret;
3942282679fSNeilBrown
395de4eda9dSAl Viro iov_iter_bvec(&from, ITER_SOURCE, sio->bvec, sio->pages, sio->len);
3962282679fSNeilBrown ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
3972282679fSNeilBrown if (ret != -EIOCBQUEUED)
3982282679fSNeilBrown sio_write_complete(&sio->iocb, ret);
3992282679fSNeilBrown }
4002282679fSNeilBrown
sio_read_complete(struct kiocb * iocb,long ret)401e1209d3aSNeilBrown static void sio_read_complete(struct kiocb *iocb, long ret)
402e1209d3aSNeilBrown {
403e1209d3aSNeilBrown struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
4045169b844SNeilBrown int p;
405e1209d3aSNeilBrown
406a1a0dfd5SNeilBrown if (ret == sio->len) {
4075169b844SNeilBrown for (p = 0; p < sio->pages; p++) {
4086a8c0687SZhangPeng struct folio *folio = page_folio(sio->bvec[p].bv_page);
4095169b844SNeilBrown
4106a8c0687SZhangPeng folio_mark_uptodate(folio);
4116a8c0687SZhangPeng folio_unlock(folio);
4125169b844SNeilBrown }
4135169b844SNeilBrown count_vm_events(PSWPIN, sio->pages);
4145169b844SNeilBrown } else {
4155169b844SNeilBrown for (p = 0; p < sio->pages; p++) {
4166a8c0687SZhangPeng struct folio *folio = page_folio(sio->bvec[p].bv_page);
4175169b844SNeilBrown
4186a8c0687SZhangPeng folio_unlock(folio);
4195169b844SNeilBrown }
4205169b844SNeilBrown pr_alert_ratelimited("Read-error on swap-device\n");
4215169b844SNeilBrown }
422e1209d3aSNeilBrown mempool_free(sio, sio_pool);
423e1209d3aSNeilBrown }
424e1209d3aSNeilBrown
swap_readpage_fs(struct page * page,struct swap_iocb ** plug)4255169b844SNeilBrown static void swap_readpage_fs(struct page *page,
4265169b844SNeilBrown struct swap_iocb **plug)
427e1209d3aSNeilBrown {
428e1209d3aSNeilBrown struct swap_info_struct *sis = page_swap_info(page);
4295169b844SNeilBrown struct swap_iocb *sio = NULL;
430e1209d3aSNeilBrown loff_t pos = page_file_offset(page);
431e1209d3aSNeilBrown
4325169b844SNeilBrown if (plug)
4335169b844SNeilBrown sio = *plug;
4345169b844SNeilBrown if (sio) {
4355169b844SNeilBrown if (sio->iocb.ki_filp != sis->swap_file ||
436a1a0dfd5SNeilBrown sio->iocb.ki_pos + sio->len != pos) {
4375169b844SNeilBrown swap_read_unplug(sio);
4385169b844SNeilBrown sio = NULL;
4395169b844SNeilBrown }
4405169b844SNeilBrown }
4415169b844SNeilBrown if (!sio) {
442e1209d3aSNeilBrown sio = mempool_alloc(sio_pool, GFP_KERNEL);
4435169b844SNeilBrown init_sync_kiocb(&sio->iocb, sis->swap_file);
444e1209d3aSNeilBrown sio->iocb.ki_pos = pos;
445e1209d3aSNeilBrown sio->iocb.ki_complete = sio_read_complete;
4465169b844SNeilBrown sio->pages = 0;
447a1a0dfd5SNeilBrown sio->len = 0;
4485169b844SNeilBrown }
4498976fa6dSChristoph Hellwig bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0);
450a1a0dfd5SNeilBrown sio->len += thp_size(page);
4515169b844SNeilBrown sio->pages += 1;
4525169b844SNeilBrown if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) {
4535169b844SNeilBrown swap_read_unplug(sio);
4545169b844SNeilBrown sio = NULL;
4555169b844SNeilBrown }
4565169b844SNeilBrown if (plug)
4575169b844SNeilBrown *plug = sio;
458e1209d3aSNeilBrown }
459e1209d3aSNeilBrown
swap_readpage_bdev_sync(struct page * page,struct swap_info_struct * sis)4609b4e30bdSChristoph Hellwig static void swap_readpage_bdev_sync(struct page *page,
4619b4e30bdSChristoph Hellwig struct swap_info_struct *sis)
4629b4e30bdSChristoph Hellwig {
4639b4e30bdSChristoph Hellwig struct bio_vec bv;
4649b4e30bdSChristoph Hellwig struct bio bio;
4659b4e30bdSChristoph Hellwig
4669b4e30bdSChristoph Hellwig bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
4679b4e30bdSChristoph Hellwig bio.bi_iter.bi_sector = swap_page_sector(page);
468cb58bf91SJohannes Thumshirn __bio_add_page(&bio, page, thp_size(page), 0);
4699b4e30bdSChristoph Hellwig /*
4709b4e30bdSChristoph Hellwig * Keep this task valid during swap readpage because the oom killer may
4719b4e30bdSChristoph Hellwig * attempt to access it in the page fault retry time check.
4729b4e30bdSChristoph Hellwig */
4739b4e30bdSChristoph Hellwig get_task_struct(current);
4749b4e30bdSChristoph Hellwig count_vm_event(PSWPIN);
4759b4e30bdSChristoph Hellwig submit_bio_wait(&bio);
4769b4e30bdSChristoph Hellwig __end_swap_bio_read(&bio);
4779b4e30bdSChristoph Hellwig put_task_struct(current);
4789b4e30bdSChristoph Hellwig }
4799b4e30bdSChristoph Hellwig
swap_readpage_bdev_async(struct page * page,struct swap_info_struct * sis)4809b4e30bdSChristoph Hellwig static void swap_readpage_bdev_async(struct page *page,
48114bd75f5SChristoph Hellwig struct swap_info_struct *sis)
4821da177e4SLinus Torvalds {
4831da177e4SLinus Torvalds struct bio *bio;
48462c230bcSMel Gorman
48507888c66SChristoph Hellwig bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
48648d15436SChristoph Hellwig bio->bi_iter.bi_sector = swap_page_sector(page);
48748d15436SChristoph Hellwig bio->bi_end_io = end_swap_bio_read;
488cb58bf91SJohannes Thumshirn __bio_add_page(bio, page, thp_size(page), 0);
489f8891e5eSChristoph Lameter count_vm_event(PSWPIN);
4903e08773cSChristoph Hellwig submit_bio(bio);
49114bd75f5SChristoph Hellwig }
49223955622SShaohua Li
swap_readpage(struct page * page,bool synchronous,struct swap_iocb ** plug)49314bd75f5SChristoph Hellwig void swap_readpage(struct page *page, bool synchronous, struct swap_iocb **plug)
49414bd75f5SChristoph Hellwig {
495fbcec6a3SMatthew Wilcox (Oracle) struct folio *folio = page_folio(page);
4961da177e4SLinus Torvalds struct swap_info_struct *sis = page_swap_info(page);
497fbcec6a3SMatthew Wilcox (Oracle) bool workingset = folio_test_workingset(folio);
4981da177e4SLinus Torvalds unsigned long pflags;
4991da177e4SLinus Torvalds bool in_thrashing;
5001da177e4SLinus Torvalds
501fbcec6a3SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(!folio_test_swapcache(folio) && !synchronous, folio);
502fbcec6a3SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
503fbcec6a3SMatthew Wilcox (Oracle) VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
5041da177e4SLinus Torvalds
5051da177e4SLinus Torvalds /*
5061da177e4SLinus Torvalds * Count submission time as memory stall and delay. When the device
5071da177e4SLinus Torvalds * is congested, or the submitting cgroup IO-throttled, submission
5081da177e4SLinus Torvalds * can be a significant part of overall IO time.
5091da177e4SLinus Torvalds */
5101da177e4SLinus Torvalds if (workingset) {
5111da177e4SLinus Torvalds delayacct_thrashing_start(&in_thrashing);
5121da177e4SLinus Torvalds psi_memstall_enter(&pflags);
5131da177e4SLinus Torvalds }
5141da177e4SLinus Torvalds delayacct_swapin_start();
5151da177e4SLinus Torvalds
516*ca54f6d8SMatthew Wilcox (Oracle) if (zswap_load(folio)) {
517fbcec6a3SMatthew Wilcox (Oracle) folio_mark_uptodate(folio);
518fbcec6a3SMatthew Wilcox (Oracle) folio_unlock(folio);
51914bd75f5SChristoph Hellwig } else if (data_race(sis->flags & SWP_FS_OPS)) {
5201da177e4SLinus Torvalds swap_readpage_fs(page, plug);
5213222d8c2SChristoph Hellwig } else if (synchronous || (sis->flags & SWP_SYNCHRONOUS_IO)) {
5229b4e30bdSChristoph Hellwig swap_readpage_bdev_sync(page, sis);
52314bd75f5SChristoph Hellwig } else {
5249b4e30bdSChristoph Hellwig swap_readpage_bdev_async(page, sis);
5251da177e4SLinus Torvalds }
5261da177e4SLinus Torvalds
5273a9bb7b1SYang Yang if (workingset) {
5283a9bb7b1SYang Yang delayacct_thrashing_end(&in_thrashing);
52993779069SMinchan Kim psi_memstall_leave(&pflags);
5303a9bb7b1SYang Yang }
531a3d5dc90SYang Yang delayacct_swapin_end();
5321da177e4SLinus Torvalds }
53362c230bcSMel Gorman
__swap_read_unplug(struct swap_iocb * sio)5345169b844SNeilBrown void __swap_read_unplug(struct swap_iocb *sio)
53562c230bcSMel Gorman {
5365169b844SNeilBrown struct iov_iter from;
5375169b844SNeilBrown struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
5385169b844SNeilBrown int ret;
53962c230bcSMel Gorman
540de4eda9dSAl Viro iov_iter_bvec(&from, ITER_DEST, sio->bvec, sio->pages, sio->len);
5415169b844SNeilBrown ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
5425169b844SNeilBrown if (ret != -EIOCBQUEUED)
5435169b844SNeilBrown sio_read_complete(&sio->iocb, ret);
54462c230bcSMel Gorman }
545