1  // SPDX-License-Identifier: GPL-2.0-only
2  /* Network filesystem read subrequest result collection, assessment and
3   * retrying.
4   *
5   * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
6   * Written by David Howells (dhowells@redhat.com)
7   */
8  
9  #include <linux/export.h>
10  #include <linux/fs.h>
11  #include <linux/mm.h>
12  #include <linux/pagemap.h>
13  #include <linux/slab.h>
14  #include <linux/task_io_accounting_ops.h>
15  #include "internal.h"
16  
17  /*
18   * Clear the unread part of an I/O request.
19   */
netfs_clear_unread(struct netfs_io_subrequest * subreq)20  static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
21  {
22  	netfs_reset_iter(subreq);
23  	WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter));
24  	iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
25  	if (subreq->start + subreq->transferred >= subreq->rreq->i_size)
26  		__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
27  }
28  
29  /*
30   * Flush, mark and unlock a folio that's now completely read.  If we want to
31   * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
32   * dirty and let writeback handle it.
33   */
netfs_unlock_read_folio(struct netfs_io_subrequest * subreq,struct netfs_io_request * rreq,struct folio_queue * folioq,int slot)34  static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
35  				    struct netfs_io_request *rreq,
36  				    struct folio_queue *folioq,
37  				    int slot)
38  {
39  	struct netfs_folio *finfo;
40  	struct folio *folio = folioq_folio(folioq, slot);
41  
42  	flush_dcache_folio(folio);
43  	folio_mark_uptodate(folio);
44  
45  	if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
46  		finfo = netfs_folio_info(folio);
47  		if (finfo) {
48  			trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
49  			if (finfo->netfs_group)
50  				folio_change_private(folio, finfo->netfs_group);
51  			else
52  				folio_detach_private(folio);
53  			kfree(finfo);
54  		}
55  
56  		if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
57  			if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
58  				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
59  				folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
60  				folio_mark_dirty(folio);
61  			}
62  		} else {
63  			trace_netfs_folio(folio, netfs_folio_trace_read_done);
64  		}
65  	} else {
66  		// TODO: Use of PG_private_2 is deprecated.
67  		if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
68  			netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
69  	}
70  
71  	if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
72  		if (folio->index == rreq->no_unlock_folio &&
73  		    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
74  			_debug("no unlock");
75  		} else {
76  			trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
77  			folio_unlock(folio);
78  		}
79  	}
80  
81  	folioq_clear(folioq, slot);
82  }
83  
84  /*
85   * Unlock any folios that are now completely read.  Returns true if the
86   * subrequest is removed from the list.
87   */
netfs_consume_read_data(struct netfs_io_subrequest * subreq,bool was_async)88  static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async)
89  {
90  	struct netfs_io_subrequest *prev, *next;
91  	struct netfs_io_request *rreq = subreq->rreq;
92  	struct folio_queue *folioq = subreq->curr_folioq;
93  	size_t avail, prev_donated, next_donated, fsize, part, excess;
94  	loff_t fpos, start;
95  	loff_t fend;
96  	int slot = subreq->curr_folioq_slot;
97  
98  	if (WARN(subreq->transferred > subreq->len,
99  		 "Subreq overread: R%x[%x] %zu > %zu",
100  		 rreq->debug_id, subreq->debug_index,
101  		 subreq->transferred, subreq->len))
102  		subreq->transferred = subreq->len;
103  
104  next_folio:
105  	fsize = PAGE_SIZE << subreq->curr_folio_order;
106  	fpos = round_down(subreq->start + subreq->consumed, fsize);
107  	fend = fpos + fsize;
108  
109  	if (WARN_ON_ONCE(!folioq) ||
110  	    WARN_ON_ONCE(!folioq_folio(folioq, slot)) ||
111  	    WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) {
112  		pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n",
113  		       rreq->debug_id, subreq->debug_index,
114  		       subreq->start, subreq->start + subreq->transferred - 1,
115  		       subreq->consumed, subreq->transferred, subreq->len,
116  		       slot);
117  		if (folioq) {
118  			struct folio *folio = folioq_folio(folioq, slot);
119  
120  			pr_err("folioq: orders=%02x%02x%02x%02x\n",
121  			       folioq->orders[0], folioq->orders[1],
122  			       folioq->orders[2], folioq->orders[3]);
123  			if (folio)
124  				pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n",
125  				       fpos, fend - 1, folio_pos(folio), folio_order(folio),
126  				       folioq_folio_order(folioq, slot));
127  		}
128  	}
129  
130  donation_changed:
131  	/* Try to consume the current folio if we've hit or passed the end of
132  	 * it.  There's a possibility that this subreq doesn't start at the
133  	 * beginning of the folio, in which case we need to donate to/from the
134  	 * preceding subreq.
135  	 *
136  	 * We also need to include any potential donation back from the
137  	 * following subreq.
138  	 */
139  	prev_donated = READ_ONCE(subreq->prev_donated);
140  	next_donated =  READ_ONCE(subreq->next_donated);
141  	if (prev_donated || next_donated) {
142  		spin_lock_bh(&rreq->lock);
143  		prev_donated = subreq->prev_donated;
144  		next_donated =  subreq->next_donated;
145  		subreq->start -= prev_donated;
146  		subreq->len += prev_donated;
147  		subreq->transferred += prev_donated;
148  		prev_donated = subreq->prev_donated = 0;
149  		if (subreq->transferred == subreq->len) {
150  			subreq->len += next_donated;
151  			subreq->transferred += next_donated;
152  			next_donated = subreq->next_donated = 0;
153  		}
154  		trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations);
155  		spin_unlock_bh(&rreq->lock);
156  	}
157  
158  	avail = subreq->transferred;
159  	if (avail == subreq->len)
160  		avail += next_donated;
161  	start = subreq->start;
162  	if (subreq->consumed == 0) {
163  		start -= prev_donated;
164  		avail += prev_donated;
165  	} else {
166  		start += subreq->consumed;
167  		avail -= subreq->consumed;
168  	}
169  	part = umin(avail, fsize);
170  
171  	trace_netfs_progress(subreq, start, avail, part);
172  
173  	if (start + avail >= fend) {
174  		if (fpos == start) {
175  			/* Flush, unlock and mark for caching any folio we've just read. */
176  			subreq->consumed = fend - subreq->start;
177  			netfs_unlock_read_folio(subreq, rreq, folioq, slot);
178  			folioq_mark2(folioq, slot);
179  			if (subreq->consumed >= subreq->len)
180  				goto remove_subreq;
181  		} else if (fpos < start) {
182  			excess = fend - subreq->start;
183  
184  			spin_lock_bh(&rreq->lock);
185  			/* If we complete first on a folio split with the
186  			 * preceding subreq, donate to that subreq - otherwise
187  			 * we get the responsibility.
188  			 */
189  			if (subreq->prev_donated != prev_donated) {
190  				spin_unlock_bh(&rreq->lock);
191  				goto donation_changed;
192  			}
193  
194  			if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
195  				spin_unlock_bh(&rreq->lock);
196  				pr_err("Can't donate prior to front\n");
197  				goto bad;
198  			}
199  
200  			prev = list_prev_entry(subreq, rreq_link);
201  			WRITE_ONCE(prev->next_donated, prev->next_donated + excess);
202  			subreq->start += excess;
203  			subreq->len -= excess;
204  			subreq->transferred -= excess;
205  			trace_netfs_donate(rreq, subreq, prev, excess,
206  					   netfs_trace_donate_tail_to_prev);
207  			trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
208  
209  			if (subreq->consumed >= subreq->len)
210  				goto remove_subreq_locked;
211  			spin_unlock_bh(&rreq->lock);
212  		} else {
213  			pr_err("fpos > start\n");
214  			goto bad;
215  		}
216  
217  		/* Advance the rolling buffer to the next folio. */
218  		slot++;
219  		if (slot >= folioq_nr_slots(folioq)) {
220  			slot = 0;
221  			folioq = folioq->next;
222  			subreq->curr_folioq = folioq;
223  		}
224  		subreq->curr_folioq_slot = slot;
225  		if (folioq && folioq_folio(folioq, slot))
226  			subreq->curr_folio_order = folioq->orders[slot];
227  		if (!was_async)
228  			cond_resched();
229  		goto next_folio;
230  	}
231  
232  	/* Deal with partial progress. */
233  	if (subreq->transferred < subreq->len)
234  		return false;
235  
236  	/* Donate the remaining downloaded data to one of the neighbouring
237  	 * subrequests.  Note that we may race with them doing the same thing.
238  	 */
239  	spin_lock_bh(&rreq->lock);
240  
241  	if (subreq->prev_donated != prev_donated ||
242  	    subreq->next_donated != next_donated) {
243  		spin_unlock_bh(&rreq->lock);
244  		cond_resched();
245  		goto donation_changed;
246  	}
247  
248  	/* Deal with the trickiest case: that this subreq is in the middle of a
249  	 * folio, not touching either edge, but finishes first.  In such a
250  	 * case, we donate to the previous subreq, if there is one, so that the
251  	 * donation is only handled when that completes - and remove this
252  	 * subreq from the list.
253  	 *
254  	 * If the previous subreq finished first, we will have acquired their
255  	 * donation and should be able to unlock folios and/or donate nextwards.
256  	 */
257  	if (!subreq->consumed &&
258  	    !prev_donated &&
259  	    !list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
260  		prev = list_prev_entry(subreq, rreq_link);
261  		WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
262  		subreq->start += subreq->len;
263  		subreq->len = 0;
264  		subreq->transferred = 0;
265  		trace_netfs_donate(rreq, subreq, prev, subreq->len,
266  				   netfs_trace_donate_to_prev);
267  		trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
268  		goto remove_subreq_locked;
269  	}
270  
271  	/* If we can't donate down the chain, donate up the chain instead. */
272  	excess = subreq->len - subreq->consumed + next_donated;
273  
274  	if (!subreq->consumed)
275  		excess += prev_donated;
276  
277  	if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
278  		rreq->prev_donated = excess;
279  		trace_netfs_donate(rreq, subreq, NULL, excess,
280  				   netfs_trace_donate_to_deferred_next);
281  	} else {
282  		next = list_next_entry(subreq, rreq_link);
283  		WRITE_ONCE(next->prev_donated, excess);
284  		trace_netfs_donate(rreq, subreq, next, excess,
285  				   netfs_trace_donate_to_next);
286  	}
287  	trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next);
288  	subreq->len = subreq->consumed;
289  	subreq->transferred = subreq->consumed;
290  	goto remove_subreq_locked;
291  
292  remove_subreq:
293  	spin_lock_bh(&rreq->lock);
294  remove_subreq_locked:
295  	subreq->consumed = subreq->len;
296  	list_del(&subreq->rreq_link);
297  	spin_unlock_bh(&rreq->lock);
298  	netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed);
299  	return true;
300  
301  bad:
302  	/* Errr... prev and next both donated to us, but insufficient to finish
303  	 * the folio.
304  	 */
305  	printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n",
306  	       rreq->debug_id, subreq->debug_index,
307  	       subreq->start, subreq->start + subreq->transferred - 1,
308  	       subreq->consumed, subreq->transferred, subreq->len);
309  	printk("folio: %llx-%llx\n", fpos, fend - 1);
310  	printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated);
311  	printk("s=%llx av=%zx part=%zx\n", start, avail, part);
312  	BUG();
313  }
314  
315  /*
316   * Do page flushing and suchlike after DIO.
317   */
netfs_rreq_assess_dio(struct netfs_io_request * rreq)318  static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
319  {
320  	struct netfs_io_subrequest *subreq;
321  	unsigned int i;
322  
323  	/* Collect unbuffered reads and direct reads, adding up the transfer
324  	 * sizes until we find the first short or failed subrequest.
325  	 */
326  	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
327  		rreq->transferred += subreq->transferred;
328  
329  		if (subreq->transferred < subreq->len ||
330  		    test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
331  			rreq->error = subreq->error;
332  			break;
333  		}
334  	}
335  
336  	if (rreq->origin == NETFS_DIO_READ) {
337  		for (i = 0; i < rreq->direct_bv_count; i++) {
338  			flush_dcache_page(rreq->direct_bv[i].bv_page);
339  			// TODO: cifs marks pages in the destination buffer
340  			// dirty under some circumstances after a read.  Do we
341  			// need to do that too?
342  			set_page_dirty(rreq->direct_bv[i].bv_page);
343  		}
344  	}
345  
346  	if (rreq->iocb) {
347  		rreq->iocb->ki_pos += rreq->transferred;
348  		if (rreq->iocb->ki_complete)
349  			rreq->iocb->ki_complete(
350  				rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
351  	}
352  	if (rreq->netfs_ops->done)
353  		rreq->netfs_ops->done(rreq);
354  	if (rreq->origin == NETFS_DIO_READ)
355  		inode_dio_end(rreq->inode);
356  }
357  
358  /*
359   * Assess the state of a read request and decide what to do next.
360   *
361   * Note that we're in normal kernel thread context at this point, possibly
362   * running on a workqueue.
363   */
netfs_rreq_assess(struct netfs_io_request * rreq)364  static void netfs_rreq_assess(struct netfs_io_request *rreq)
365  {
366  	trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
367  
368  	//netfs_rreq_is_still_valid(rreq);
369  
370  	if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) {
371  		netfs_retry_reads(rreq);
372  		return;
373  	}
374  
375  	if (rreq->origin == NETFS_DIO_READ ||
376  	    rreq->origin == NETFS_READ_GAPS)
377  		netfs_rreq_assess_dio(rreq);
378  	task_io_account_read(rreq->transferred);
379  
380  	trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
381  	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
382  	wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
383  
384  	trace_netfs_rreq(rreq, netfs_rreq_trace_done);
385  	netfs_clear_subrequests(rreq, false);
386  	netfs_unlock_abandoned_read_pages(rreq);
387  	if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)))
388  		netfs_pgpriv2_write_to_the_cache(rreq);
389  }
390  
netfs_read_termination_worker(struct work_struct * work)391  void netfs_read_termination_worker(struct work_struct *work)
392  {
393  	struct netfs_io_request *rreq =
394  		container_of(work, struct netfs_io_request, work);
395  	netfs_see_request(rreq, netfs_rreq_trace_see_work);
396  	netfs_rreq_assess(rreq);
397  	netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete);
398  }
399  
400  /*
401   * Handle the completion of all outstanding I/O operations on a read request.
402   * We inherit a ref from the caller.
403   */
netfs_rreq_terminated(struct netfs_io_request * rreq,bool was_async)404  void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async)
405  {
406  	if (!was_async)
407  		return netfs_rreq_assess(rreq);
408  	if (!work_pending(&rreq->work)) {
409  		netfs_get_request(rreq, netfs_rreq_trace_get_work);
410  		if (!queue_work(system_unbound_wq, &rreq->work))
411  			netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq);
412  	}
413  }
414  
415  /**
416   * netfs_read_subreq_progress - Note progress of a read operation.
417   * @subreq: The read request that has terminated.
418   * @was_async: True if we're in an asynchronous context.
419   *
420   * This tells the read side of netfs lib that a contributory I/O operation has
421   * made some progress and that it may be possible to unlock some folios.
422   *
423   * Before calling, the filesystem should update subreq->transferred to track
424   * the amount of data copied into the output buffer.
425   *
426   * If @was_async is true, the caller might be running in softirq or interrupt
427   * context and we can't sleep.
428   */
netfs_read_subreq_progress(struct netfs_io_subrequest * subreq,bool was_async)429  void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
430  				bool was_async)
431  {
432  	struct netfs_io_request *rreq = subreq->rreq;
433  
434  	trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
435  
436  	if (subreq->transferred > subreq->consumed &&
437  	    (rreq->origin == NETFS_READAHEAD ||
438  	     rreq->origin == NETFS_READPAGE ||
439  	     rreq->origin == NETFS_READ_FOR_WRITE)) {
440  		netfs_consume_read_data(subreq, was_async);
441  		__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
442  	}
443  }
444  EXPORT_SYMBOL(netfs_read_subreq_progress);
445  
446  /**
447   * netfs_read_subreq_terminated - Note the termination of an I/O operation.
448   * @subreq: The I/O request that has terminated.
449   * @error: Error code indicating type of completion.
450   * @was_async: The termination was asynchronous
451   *
452   * This tells the read helper that a contributory I/O operation has terminated,
453   * one way or another, and that it should integrate the results.
454   *
455   * The caller indicates the outcome of the operation through @error, supplying
456   * 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY
457   * is set) or a negative error code.  The helper will look after reissuing I/O
458   * operations as appropriate and writing downloaded data to the cache.
459   *
460   * Before calling, the filesystem should update subreq->transferred to track
461   * the amount of data copied into the output buffer.
462   *
463   * If @was_async is true, the caller might be running in softirq or interrupt
464   * context and we can't sleep.
465   */
netfs_read_subreq_terminated(struct netfs_io_subrequest * subreq,int error,bool was_async)466  void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
467  				  int error, bool was_async)
468  {
469  	struct netfs_io_request *rreq = subreq->rreq;
470  
471  	switch (subreq->source) {
472  	case NETFS_READ_FROM_CACHE:
473  		netfs_stat(&netfs_n_rh_read_done);
474  		break;
475  	case NETFS_DOWNLOAD_FROM_SERVER:
476  		netfs_stat(&netfs_n_rh_download_done);
477  		break;
478  	default:
479  		break;
480  	}
481  
482  	if (rreq->origin != NETFS_DIO_READ) {
483  		/* Collect buffered reads.
484  		 *
485  		 * If the read completed validly short, then we can clear the
486  		 * tail before going on to unlock the folios.
487  		 */
488  		if (error == 0 && subreq->transferred < subreq->len &&
489  		    (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) ||
490  		     test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) {
491  			netfs_clear_unread(subreq);
492  			subreq->transferred = subreq->len;
493  			trace_netfs_sreq(subreq, netfs_sreq_trace_clear);
494  		}
495  		if (subreq->transferred > subreq->consumed &&
496  		    (rreq->origin == NETFS_READAHEAD ||
497  		     rreq->origin == NETFS_READPAGE ||
498  		     rreq->origin == NETFS_READ_FOR_WRITE)) {
499  			netfs_consume_read_data(subreq, was_async);
500  			__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
501  		}
502  		rreq->transferred += subreq->transferred;
503  	}
504  
505  	/* Deal with retry requests, short reads and errors.  If we retry
506  	 * but don't make progress, we abandon the attempt.
507  	 */
508  	if (!error && subreq->transferred < subreq->len) {
509  		if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
510  			trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
511  		} else {
512  			trace_netfs_sreq(subreq, netfs_sreq_trace_short);
513  			if (subreq->transferred > subreq->consumed) {
514  				__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
515  				__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
516  				set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
517  			} else if (!__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
518  				__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
519  				set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
520  			} else {
521  				__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
522  				error = -ENODATA;
523  			}
524  		}
525  	}
526  
527  	subreq->error = error;
528  	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
529  
530  	if (unlikely(error < 0)) {
531  		trace_netfs_failure(rreq, subreq, error, netfs_fail_read);
532  		if (subreq->source == NETFS_READ_FROM_CACHE) {
533  			netfs_stat(&netfs_n_rh_read_failed);
534  		} else {
535  			netfs_stat(&netfs_n_rh_download_failed);
536  			set_bit(NETFS_RREQ_FAILED, &rreq->flags);
537  			rreq->error = subreq->error;
538  		}
539  	}
540  
541  	if (atomic_dec_and_test(&rreq->nr_outstanding))
542  		netfs_rreq_terminated(rreq, was_async);
543  
544  	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
545  }
546  EXPORT_SYMBOL(netfs_read_subreq_terminated);
547