1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/mm/page_io.c
4   *
5   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
6   *
7   *  Swap reorganised 29.12.95,
8   *  Asynchronous swapping added 30.12.95. Stephen Tweedie
9   *  Removed race in async swapping. 14.4.1996. Bruno Haible
10   *  Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
11   *  Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
12   */
13  
14  #include <linux/mm.h>
15  #include <linux/kernel_stat.h>
16  #include <linux/gfp.h>
17  #include <linux/pagemap.h>
18  #include <linux/swap.h>
19  #include <linux/bio.h>
20  #include <linux/swapops.h>
21  #include <linux/writeback.h>
22  #include <linux/blkdev.h>
23  #include <linux/psi.h>
24  #include <linux/uio.h>
25  #include <linux/sched/task.h>
26  #include <linux/delayacct.h>
27  #include <linux/zswap.h>
28  #include "swap.h"
29  
__end_swap_bio_write(struct bio * bio)30  static void __end_swap_bio_write(struct bio *bio)
31  {
32  	struct folio *folio = bio_first_folio_all(bio);
33  
34  	if (bio->bi_status) {
35  		/*
36  		 * We failed to write the page out to swap-space.
37  		 * Re-dirty the page in order to avoid it being reclaimed.
38  		 * Also print a dire warning that things will go BAD (tm)
39  		 * very quickly.
40  		 *
41  		 * Also clear PG_reclaim to avoid folio_rotate_reclaimable()
42  		 */
43  		folio_mark_dirty(folio);
44  		pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
45  				     MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
46  				     (unsigned long long)bio->bi_iter.bi_sector);
47  		folio_clear_reclaim(folio);
48  	}
49  	folio_end_writeback(folio);
50  }
51  
end_swap_bio_write(struct bio * bio)52  static void end_swap_bio_write(struct bio *bio)
53  {
54  	__end_swap_bio_write(bio);
55  	bio_put(bio);
56  }
57  
__end_swap_bio_read(struct bio * bio)58  static void __end_swap_bio_read(struct bio *bio)
59  {
60  	struct folio *folio = bio_first_folio_all(bio);
61  
62  	if (bio->bi_status) {
63  		pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
64  				     MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
65  				     (unsigned long long)bio->bi_iter.bi_sector);
66  	} else {
67  		folio_mark_uptodate(folio);
68  	}
69  	folio_unlock(folio);
70  }
71  
end_swap_bio_read(struct bio * bio)72  static void end_swap_bio_read(struct bio *bio)
73  {
74  	__end_swap_bio_read(bio);
75  	bio_put(bio);
76  }
77  
generic_swapfile_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)78  int generic_swapfile_activate(struct swap_info_struct *sis,
79  				struct file *swap_file,
80  				sector_t *span)
81  {
82  	struct address_space *mapping = swap_file->f_mapping;
83  	struct inode *inode = mapping->host;
84  	unsigned blocks_per_page;
85  	unsigned long page_no;
86  	unsigned blkbits;
87  	sector_t probe_block;
88  	sector_t last_block;
89  	sector_t lowest_block = -1;
90  	sector_t highest_block = 0;
91  	int nr_extents = 0;
92  	int ret;
93  
94  	blkbits = inode->i_blkbits;
95  	blocks_per_page = PAGE_SIZE >> blkbits;
96  
97  	/*
98  	 * Map all the blocks into the extent tree.  This code doesn't try
99  	 * to be very smart.
100  	 */
101  	probe_block = 0;
102  	page_no = 0;
103  	last_block = i_size_read(inode) >> blkbits;
104  	while ((probe_block + blocks_per_page) <= last_block &&
105  			page_no < sis->max) {
106  		unsigned block_in_page;
107  		sector_t first_block;
108  
109  		cond_resched();
110  
111  		first_block = probe_block;
112  		ret = bmap(inode, &first_block);
113  		if (ret || !first_block)
114  			goto bad_bmap;
115  
116  		/*
117  		 * It must be PAGE_SIZE aligned on-disk
118  		 */
119  		if (first_block & (blocks_per_page - 1)) {
120  			probe_block++;
121  			goto reprobe;
122  		}
123  
124  		for (block_in_page = 1; block_in_page < blocks_per_page;
125  					block_in_page++) {
126  			sector_t block;
127  
128  			block = probe_block + block_in_page;
129  			ret = bmap(inode, &block);
130  			if (ret || !block)
131  				goto bad_bmap;
132  
133  			if (block != first_block + block_in_page) {
134  				/* Discontiguity */
135  				probe_block++;
136  				goto reprobe;
137  			}
138  		}
139  
140  		first_block >>= (PAGE_SHIFT - blkbits);
141  		if (page_no) {	/* exclude the header page */
142  			if (first_block < lowest_block)
143  				lowest_block = first_block;
144  			if (first_block > highest_block)
145  				highest_block = first_block;
146  		}
147  
148  		/*
149  		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
150  		 */
151  		ret = add_swap_extent(sis, page_no, 1, first_block);
152  		if (ret < 0)
153  			goto out;
154  		nr_extents += ret;
155  		page_no++;
156  		probe_block += blocks_per_page;
157  reprobe:
158  		continue;
159  	}
160  	ret = nr_extents;
161  	*span = 1 + highest_block - lowest_block;
162  	if (page_no == 0)
163  		page_no = 1;	/* force Empty message */
164  	sis->max = page_no;
165  	sis->pages = page_no - 1;
166  	sis->highest_bit = page_no - 1;
167  out:
168  	return ret;
169  bad_bmap:
170  	pr_err("swapon: swapfile has holes\n");
171  	ret = -EINVAL;
172  	goto out;
173  }
174  
is_folio_zero_filled(struct folio * folio)175  static bool is_folio_zero_filled(struct folio *folio)
176  {
177  	unsigned int pos, last_pos;
178  	unsigned long *data;
179  	unsigned int i;
180  
181  	last_pos = PAGE_SIZE / sizeof(*data) - 1;
182  	for (i = 0; i < folio_nr_pages(folio); i++) {
183  		data = kmap_local_folio(folio, i * PAGE_SIZE);
184  		/*
185  		 * Check last word first, incase the page is zero-filled at
186  		 * the start and has non-zero data at the end, which is common
187  		 * in real-world workloads.
188  		 */
189  		if (data[last_pos]) {
190  			kunmap_local(data);
191  			return false;
192  		}
193  		for (pos = 0; pos < last_pos; pos++) {
194  			if (data[pos]) {
195  				kunmap_local(data);
196  				return false;
197  			}
198  		}
199  		kunmap_local(data);
200  	}
201  
202  	return true;
203  }
204  
swap_zeromap_folio_set(struct folio * folio)205  static void swap_zeromap_folio_set(struct folio *folio)
206  {
207  	struct obj_cgroup *objcg = get_obj_cgroup_from_folio(folio);
208  	struct swap_info_struct *sis = swp_swap_info(folio->swap);
209  	int nr_pages = folio_nr_pages(folio);
210  	swp_entry_t entry;
211  	unsigned int i;
212  
213  	for (i = 0; i < folio_nr_pages(folio); i++) {
214  		entry = page_swap_entry(folio_page(folio, i));
215  		set_bit(swp_offset(entry), sis->zeromap);
216  	}
217  
218  	count_vm_events(SWPOUT_ZERO, nr_pages);
219  	if (objcg) {
220  		count_objcg_events(objcg, SWPOUT_ZERO, nr_pages);
221  		obj_cgroup_put(objcg);
222  	}
223  }
224  
swap_zeromap_folio_clear(struct folio * folio)225  static void swap_zeromap_folio_clear(struct folio *folio)
226  {
227  	struct swap_info_struct *sis = swp_swap_info(folio->swap);
228  	swp_entry_t entry;
229  	unsigned int i;
230  
231  	for (i = 0; i < folio_nr_pages(folio); i++) {
232  		entry = page_swap_entry(folio_page(folio, i));
233  		clear_bit(swp_offset(entry), sis->zeromap);
234  	}
235  }
236  
237  /*
238   * We may have stale swap cache pages in memory: notice
239   * them here and get rid of the unnecessary final write.
240   */
swap_writepage(struct page * page,struct writeback_control * wbc)241  int swap_writepage(struct page *page, struct writeback_control *wbc)
242  {
243  	struct folio *folio = page_folio(page);
244  	int ret;
245  
246  	if (folio_free_swap(folio)) {
247  		folio_unlock(folio);
248  		return 0;
249  	}
250  	/*
251  	 * Arch code may have to preserve more data than just the page
252  	 * contents, e.g. memory tags.
253  	 */
254  	ret = arch_prepare_to_swap(folio);
255  	if (ret) {
256  		folio_mark_dirty(folio);
257  		folio_unlock(folio);
258  		return ret;
259  	}
260  
261  	/*
262  	 * Use a bitmap (zeromap) to avoid doing IO for zero-filled pages.
263  	 * The bits in zeromap are protected by the locked swapcache folio
264  	 * and atomic updates are used to protect against read-modify-write
265  	 * corruption due to other zero swap entries seeing concurrent updates.
266  	 */
267  	if (is_folio_zero_filled(folio)) {
268  		swap_zeromap_folio_set(folio);
269  		folio_unlock(folio);
270  		return 0;
271  	} else {
272  		/*
273  		 * Clear bits this folio occupies in the zeromap to prevent
274  		 * zero data being read in from any previous zero writes that
275  		 * occupied the same swap entries.
276  		 */
277  		swap_zeromap_folio_clear(folio);
278  	}
279  	if (zswap_store(folio)) {
280  		folio_unlock(folio);
281  		return 0;
282  	}
283  	if (!mem_cgroup_zswap_writeback_enabled(folio_memcg(folio))) {
284  		folio_mark_dirty(folio);
285  		return AOP_WRITEPAGE_ACTIVATE;
286  	}
287  
288  	__swap_writepage(folio, wbc);
289  	return 0;
290  }
291  
count_swpout_vm_event(struct folio * folio)292  static inline void count_swpout_vm_event(struct folio *folio)
293  {
294  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
295  	if (unlikely(folio_test_pmd_mappable(folio))) {
296  		count_memcg_folio_events(folio, THP_SWPOUT, 1);
297  		count_vm_event(THP_SWPOUT);
298  	}
299  	count_mthp_stat(folio_order(folio), MTHP_STAT_SWPOUT);
300  #endif
301  	count_vm_events(PSWPOUT, folio_nr_pages(folio));
302  }
303  
304  #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
bio_associate_blkg_from_page(struct bio * bio,struct folio * folio)305  static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio)
306  {
307  	struct cgroup_subsys_state *css;
308  	struct mem_cgroup *memcg;
309  
310  	memcg = folio_memcg(folio);
311  	if (!memcg)
312  		return;
313  
314  	rcu_read_lock();
315  	css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys);
316  	bio_associate_blkg_from_css(bio, css);
317  	rcu_read_unlock();
318  }
319  #else
320  #define bio_associate_blkg_from_page(bio, folio)		do { } while (0)
321  #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
322  
323  struct swap_iocb {
324  	struct kiocb		iocb;
325  	struct bio_vec		bvec[SWAP_CLUSTER_MAX];
326  	int			pages;
327  	int			len;
328  };
329  static mempool_t *sio_pool;
330  
sio_pool_init(void)331  int sio_pool_init(void)
332  {
333  	if (!sio_pool) {
334  		mempool_t *pool = mempool_create_kmalloc_pool(
335  			SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));
336  		if (cmpxchg(&sio_pool, NULL, pool))
337  			mempool_destroy(pool);
338  	}
339  	if (!sio_pool)
340  		return -ENOMEM;
341  	return 0;
342  }
343  
sio_write_complete(struct kiocb * iocb,long ret)344  static void sio_write_complete(struct kiocb *iocb, long ret)
345  {
346  	struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
347  	struct page *page = sio->bvec[0].bv_page;
348  	int p;
349  
350  	if (ret != sio->len) {
351  		/*
352  		 * In the case of swap-over-nfs, this can be a
353  		 * temporary failure if the system has limited
354  		 * memory for allocating transmit buffers.
355  		 * Mark the page dirty and avoid
356  		 * folio_rotate_reclaimable but rate-limit the
357  		 * messages.
358  		 */
359  		pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
360  				   ret, swap_dev_pos(page_swap_entry(page)));
361  		for (p = 0; p < sio->pages; p++) {
362  			page = sio->bvec[p].bv_page;
363  			set_page_dirty(page);
364  			ClearPageReclaim(page);
365  		}
366  	}
367  
368  	for (p = 0; p < sio->pages; p++)
369  		end_page_writeback(sio->bvec[p].bv_page);
370  
371  	mempool_free(sio, sio_pool);
372  }
373  
swap_writepage_fs(struct folio * folio,struct writeback_control * wbc)374  static void swap_writepage_fs(struct folio *folio, struct writeback_control *wbc)
375  {
376  	struct swap_iocb *sio = NULL;
377  	struct swap_info_struct *sis = swp_swap_info(folio->swap);
378  	struct file *swap_file = sis->swap_file;
379  	loff_t pos = swap_dev_pos(folio->swap);
380  
381  	count_swpout_vm_event(folio);
382  	folio_start_writeback(folio);
383  	folio_unlock(folio);
384  	if (wbc->swap_plug)
385  		sio = *wbc->swap_plug;
386  	if (sio) {
387  		if (sio->iocb.ki_filp != swap_file ||
388  		    sio->iocb.ki_pos + sio->len != pos) {
389  			swap_write_unplug(sio);
390  			sio = NULL;
391  		}
392  	}
393  	if (!sio) {
394  		sio = mempool_alloc(sio_pool, GFP_NOIO);
395  		init_sync_kiocb(&sio->iocb, swap_file);
396  		sio->iocb.ki_complete = sio_write_complete;
397  		sio->iocb.ki_pos = pos;
398  		sio->pages = 0;
399  		sio->len = 0;
400  	}
401  	bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0);
402  	sio->len += folio_size(folio);
403  	sio->pages += 1;
404  	if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) {
405  		swap_write_unplug(sio);
406  		sio = NULL;
407  	}
408  	if (wbc->swap_plug)
409  		*wbc->swap_plug = sio;
410  }
411  
swap_writepage_bdev_sync(struct folio * folio,struct writeback_control * wbc,struct swap_info_struct * sis)412  static void swap_writepage_bdev_sync(struct folio *folio,
413  		struct writeback_control *wbc, struct swap_info_struct *sis)
414  {
415  	struct bio_vec bv;
416  	struct bio bio;
417  
418  	bio_init(&bio, sis->bdev, &bv, 1,
419  		 REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc));
420  	bio.bi_iter.bi_sector = swap_folio_sector(folio);
421  	bio_add_folio_nofail(&bio, folio, folio_size(folio), 0);
422  
423  	bio_associate_blkg_from_page(&bio, folio);
424  	count_swpout_vm_event(folio);
425  
426  	folio_start_writeback(folio);
427  	folio_unlock(folio);
428  
429  	submit_bio_wait(&bio);
430  	__end_swap_bio_write(&bio);
431  }
432  
swap_writepage_bdev_async(struct folio * folio,struct writeback_control * wbc,struct swap_info_struct * sis)433  static void swap_writepage_bdev_async(struct folio *folio,
434  		struct writeback_control *wbc, struct swap_info_struct *sis)
435  {
436  	struct bio *bio;
437  
438  	bio = bio_alloc(sis->bdev, 1,
439  			REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
440  			GFP_NOIO);
441  	bio->bi_iter.bi_sector = swap_folio_sector(folio);
442  	bio->bi_end_io = end_swap_bio_write;
443  	bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
444  
445  	bio_associate_blkg_from_page(bio, folio);
446  	count_swpout_vm_event(folio);
447  	folio_start_writeback(folio);
448  	folio_unlock(folio);
449  	submit_bio(bio);
450  }
451  
__swap_writepage(struct folio * folio,struct writeback_control * wbc)452  void __swap_writepage(struct folio *folio, struct writeback_control *wbc)
453  {
454  	struct swap_info_struct *sis = swp_swap_info(folio->swap);
455  
456  	VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
457  	/*
458  	 * ->flags can be updated non-atomicially (scan_swap_map_slots),
459  	 * but that will never affect SWP_FS_OPS, so the data_race
460  	 * is safe.
461  	 */
462  	if (data_race(sis->flags & SWP_FS_OPS))
463  		swap_writepage_fs(folio, wbc);
464  	/*
465  	 * ->flags can be updated non-atomicially (scan_swap_map_slots),
466  	 * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
467  	 * is safe.
468  	 */
469  	else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
470  		swap_writepage_bdev_sync(folio, wbc, sis);
471  	else
472  		swap_writepage_bdev_async(folio, wbc, sis);
473  }
474  
swap_write_unplug(struct swap_iocb * sio)475  void swap_write_unplug(struct swap_iocb *sio)
476  {
477  	struct iov_iter from;
478  	struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
479  	int ret;
480  
481  	iov_iter_bvec(&from, ITER_SOURCE, sio->bvec, sio->pages, sio->len);
482  	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
483  	if (ret != -EIOCBQUEUED)
484  		sio_write_complete(&sio->iocb, ret);
485  }
486  
sio_read_complete(struct kiocb * iocb,long ret)487  static void sio_read_complete(struct kiocb *iocb, long ret)
488  {
489  	struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
490  	int p;
491  
492  	if (ret == sio->len) {
493  		for (p = 0; p < sio->pages; p++) {
494  			struct folio *folio = page_folio(sio->bvec[p].bv_page);
495  
496  			folio_mark_uptodate(folio);
497  			folio_unlock(folio);
498  		}
499  		count_vm_events(PSWPIN, sio->pages);
500  	} else {
501  		for (p = 0; p < sio->pages; p++) {
502  			struct folio *folio = page_folio(sio->bvec[p].bv_page);
503  
504  			folio_unlock(folio);
505  		}
506  		pr_alert_ratelimited("Read-error on swap-device\n");
507  	}
508  	mempool_free(sio, sio_pool);
509  }
510  
swap_read_folio_zeromap(struct folio * folio)511  static bool swap_read_folio_zeromap(struct folio *folio)
512  {
513  	int nr_pages = folio_nr_pages(folio);
514  	struct obj_cgroup *objcg;
515  	bool is_zeromap;
516  
517  	/*
518  	 * Swapping in a large folio that is partially in the zeromap is not
519  	 * currently handled. Return true without marking the folio uptodate so
520  	 * that an IO error is emitted (e.g. do_swap_page() will sigbus).
521  	 */
522  	if (WARN_ON_ONCE(swap_zeromap_batch(folio->swap, nr_pages,
523  			&is_zeromap) != nr_pages))
524  		return true;
525  
526  	if (!is_zeromap)
527  		return false;
528  
529  	objcg = get_obj_cgroup_from_folio(folio);
530  	count_vm_events(SWPIN_ZERO, nr_pages);
531  	if (objcg) {
532  		count_objcg_events(objcg, SWPIN_ZERO, nr_pages);
533  		obj_cgroup_put(objcg);
534  	}
535  
536  	folio_zero_range(folio, 0, folio_size(folio));
537  	folio_mark_uptodate(folio);
538  	return true;
539  }
540  
swap_read_folio_fs(struct folio * folio,struct swap_iocb ** plug)541  static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
542  {
543  	struct swap_info_struct *sis = swp_swap_info(folio->swap);
544  	struct swap_iocb *sio = NULL;
545  	loff_t pos = swap_dev_pos(folio->swap);
546  
547  	if (plug)
548  		sio = *plug;
549  	if (sio) {
550  		if (sio->iocb.ki_filp != sis->swap_file ||
551  		    sio->iocb.ki_pos + sio->len != pos) {
552  			swap_read_unplug(sio);
553  			sio = NULL;
554  		}
555  	}
556  	if (!sio) {
557  		sio = mempool_alloc(sio_pool, GFP_KERNEL);
558  		init_sync_kiocb(&sio->iocb, sis->swap_file);
559  		sio->iocb.ki_pos = pos;
560  		sio->iocb.ki_complete = sio_read_complete;
561  		sio->pages = 0;
562  		sio->len = 0;
563  	}
564  	bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0);
565  	sio->len += folio_size(folio);
566  	sio->pages += 1;
567  	if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) {
568  		swap_read_unplug(sio);
569  		sio = NULL;
570  	}
571  	if (plug)
572  		*plug = sio;
573  }
574  
swap_read_folio_bdev_sync(struct folio * folio,struct swap_info_struct * sis)575  static void swap_read_folio_bdev_sync(struct folio *folio,
576  		struct swap_info_struct *sis)
577  {
578  	struct bio_vec bv;
579  	struct bio bio;
580  
581  	bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
582  	bio.bi_iter.bi_sector = swap_folio_sector(folio);
583  	bio_add_folio_nofail(&bio, folio, folio_size(folio), 0);
584  	/*
585  	 * Keep this task valid during swap readpage because the oom killer may
586  	 * attempt to access it in the page fault retry time check.
587  	 */
588  	get_task_struct(current);
589  	count_vm_events(PSWPIN, folio_nr_pages(folio));
590  	submit_bio_wait(&bio);
591  	__end_swap_bio_read(&bio);
592  	put_task_struct(current);
593  }
594  
swap_read_folio_bdev_async(struct folio * folio,struct swap_info_struct * sis)595  static void swap_read_folio_bdev_async(struct folio *folio,
596  		struct swap_info_struct *sis)
597  {
598  	struct bio *bio;
599  
600  	bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
601  	bio->bi_iter.bi_sector = swap_folio_sector(folio);
602  	bio->bi_end_io = end_swap_bio_read;
603  	bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
604  	count_vm_events(PSWPIN, folio_nr_pages(folio));
605  	submit_bio(bio);
606  }
607  
swap_read_folio(struct folio * folio,struct swap_iocb ** plug)608  void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
609  {
610  	struct swap_info_struct *sis = swp_swap_info(folio->swap);
611  	bool synchronous = sis->flags & SWP_SYNCHRONOUS_IO;
612  	bool workingset = folio_test_workingset(folio);
613  	unsigned long pflags;
614  	bool in_thrashing;
615  
616  	VM_BUG_ON_FOLIO(!folio_test_swapcache(folio) && !synchronous, folio);
617  	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
618  	VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
619  
620  	/*
621  	 * Count submission time as memory stall and delay. When the device
622  	 * is congested, or the submitting cgroup IO-throttled, submission
623  	 * can be a significant part of overall IO time.
624  	 */
625  	if (workingset) {
626  		delayacct_thrashing_start(&in_thrashing);
627  		psi_memstall_enter(&pflags);
628  	}
629  	delayacct_swapin_start();
630  
631  	if (swap_read_folio_zeromap(folio)) {
632  		folio_unlock(folio);
633  		goto finish;
634  	} else if (zswap_load(folio)) {
635  		folio_unlock(folio);
636  		goto finish;
637  	}
638  
639  	/* We have to read from slower devices. Increase zswap protection. */
640  	zswap_folio_swapin(folio);
641  
642  	if (data_race(sis->flags & SWP_FS_OPS)) {
643  		swap_read_folio_fs(folio, plug);
644  	} else if (synchronous) {
645  		swap_read_folio_bdev_sync(folio, sis);
646  	} else {
647  		swap_read_folio_bdev_async(folio, sis);
648  	}
649  
650  finish:
651  	if (workingset) {
652  		delayacct_thrashing_end(&in_thrashing);
653  		psi_memstall_leave(&pflags);
654  	}
655  	delayacct_swapin_end();
656  }
657  
__swap_read_unplug(struct swap_iocb * sio)658  void __swap_read_unplug(struct swap_iocb *sio)
659  {
660  	struct iov_iter from;
661  	struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
662  	int ret;
663  
664  	iov_iter_bvec(&from, ITER_DEST, sio->bvec, sio->pages, sio->len);
665  	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
666  	if (ret != -EIOCBQUEUED)
667  		sio_read_complete(&sio->iocb, ret);
668  }
669