1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11
netfs_cache_expand_readahead(struct netfs_io_request * rreq,unsigned long long * _start,unsigned long long * _len,unsigned long long i_size)12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 unsigned long long *_start,
14 unsigned long long *_len,
15 unsigned long long i_size)
16 {
17 struct netfs_cache_resources *cres = &rreq->cache_resources;
18
19 if (cres->ops && cres->ops->expand_readahead)
20 cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22
netfs_rreq_expand(struct netfs_io_request * rreq,struct readahead_control * ractl)23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 struct readahead_control *ractl)
25 {
26 /* Give the cache a chance to change the request parameters. The
27 * resultant request must contain the original region.
28 */
29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30
31 /* Give the netfs a chance to change the request parameters. The
32 * resultant request must contain the original region.
33 */
34 if (rreq->netfs_ops->expand_readahead)
35 rreq->netfs_ops->expand_readahead(rreq);
36
37 /* Expand the request if the cache wants it to start earlier. Note
38 * that the expansion may get further extended if the VM wishes to
39 * insert THPs and the preferred start and/or end wind up in the middle
40 * of THPs.
41 *
42 * If this is the case, however, the THP size should be an integer
43 * multiple of the cache granule size, so we get a whole number of
44 * granules to deal with.
45 */
46 if (rreq->start != readahead_pos(ractl) ||
47 rreq->len != readahead_length(ractl)) {
48 readahead_expand(ractl, rreq->start, rreq->len);
49 rreq->start = readahead_pos(ractl);
50 rreq->len = readahead_length(ractl);
51
52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 netfs_read_trace_expanded);
54 }
55 }
56
57 /*
58 * Begin an operation, and fetch the stored zero point value from the cookie if
59 * available.
60 */
netfs_begin_cache_read(struct netfs_io_request * rreq,struct netfs_inode * ctx)61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65
66 /*
67 * Decant the list of folios to read into a rolling buffer.
68 */
netfs_load_buffer_from_ra(struct netfs_io_request * rreq,struct folio_queue * folioq,struct folio_batch * put_batch)69 static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
70 struct folio_queue *folioq,
71 struct folio_batch *put_batch)
72 {
73 unsigned int order, nr;
74 size_t size = 0;
75
76 nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios,
77 ARRAY_SIZE(folioq->vec.folios));
78 folioq->vec.nr = nr;
79 for (int i = 0; i < nr; i++) {
80 struct folio *folio = folioq_folio(folioq, i);
81
82 trace_netfs_folio(folio, netfs_folio_trace_read);
83 order = folio_order(folio);
84 folioq->orders[i] = order;
85 size += PAGE_SIZE << order;
86
87 if (!folio_batch_add(put_batch, folio))
88 folio_batch_release(put_batch);
89 }
90
91 for (int i = nr; i < folioq_nr_slots(folioq); i++)
92 folioq_clear(folioq, i);
93
94 return size;
95 }
96
97 /*
98 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
99 * @subreq: The subrequest to be set up
100 *
101 * Prepare the I/O iterator representing the read buffer on a subrequest for
102 * the filesystem to use for I/O (it can be passed directly to a socket). This
103 * is intended to be called from the ->issue_read() method once the filesystem
104 * has trimmed the request to the size it wants.
105 *
106 * Returns the limited size if successful and -ENOMEM if insufficient memory
107 * available.
108 *
109 * [!] NOTE: This must be run in the same thread as ->issue_read() was called
110 * in as we access the readahead_control struct.
111 */
netfs_prepare_read_iterator(struct netfs_io_subrequest * subreq)112 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
113 {
114 struct netfs_io_request *rreq = subreq->rreq;
115 size_t rsize = subreq->len;
116
117 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
118 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
119
120 if (rreq->ractl) {
121 /* If we don't have sufficient folios in the rolling buffer,
122 * extract a folioq's worth from the readahead region at a time
123 * into the buffer. Note that this acquires a ref on each page
124 * that we will need to release later - but we don't want to do
125 * that until after we've started the I/O.
126 */
127 struct folio_batch put_batch;
128
129 folio_batch_init(&put_batch);
130 while (rreq->submitted < subreq->start + rsize) {
131 struct folio_queue *tail = rreq->buffer_tail, *new;
132 size_t added;
133
134 new = kmalloc(sizeof(*new), GFP_NOFS);
135 if (!new)
136 return -ENOMEM;
137 netfs_stat(&netfs_n_folioq);
138 folioq_init(new);
139 new->prev = tail;
140 tail->next = new;
141 rreq->buffer_tail = new;
142 added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
143 rreq->iter.count += added;
144 rreq->submitted += added;
145 }
146 folio_batch_release(&put_batch);
147 }
148
149 subreq->len = rsize;
150 if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
151 size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
152 rreq->io_streams[0].sreq_max_segs);
153
154 if (limit < rsize) {
155 subreq->len = limit;
156 trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
157 }
158 }
159
160 subreq->io_iter = rreq->iter;
161
162 if (iov_iter_is_folioq(&subreq->io_iter)) {
163 if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) {
164 subreq->io_iter.folioq = subreq->io_iter.folioq->next;
165 subreq->io_iter.folioq_slot = 0;
166 }
167 subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq;
168 subreq->curr_folioq_slot = subreq->io_iter.folioq_slot;
169 subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
170 }
171
172 iov_iter_truncate(&subreq->io_iter, subreq->len);
173 iov_iter_advance(&rreq->iter, subreq->len);
174 return subreq->len;
175 }
176
netfs_cache_prepare_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,loff_t i_size)177 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
178 struct netfs_io_subrequest *subreq,
179 loff_t i_size)
180 {
181 struct netfs_cache_resources *cres = &rreq->cache_resources;
182
183 if (!cres->ops)
184 return NETFS_DOWNLOAD_FROM_SERVER;
185 return cres->ops->prepare_read(subreq, i_size);
186 }
187
netfs_cache_read_terminated(void * priv,ssize_t transferred_or_error,bool was_async)188 static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
189 bool was_async)
190 {
191 struct netfs_io_subrequest *subreq = priv;
192
193 if (transferred_or_error < 0) {
194 netfs_read_subreq_terminated(subreq, transferred_or_error, was_async);
195 return;
196 }
197
198 if (transferred_or_error > 0)
199 subreq->transferred += transferred_or_error;
200 netfs_read_subreq_terminated(subreq, 0, was_async);
201 }
202
203 /*
204 * Issue a read against the cache.
205 * - Eats the caller's ref on subreq.
206 */
netfs_read_cache_to_pagecache(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)207 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
208 struct netfs_io_subrequest *subreq)
209 {
210 struct netfs_cache_resources *cres = &rreq->cache_resources;
211
212 netfs_stat(&netfs_n_rh_read);
213 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
214 netfs_cache_read_terminated, subreq);
215 }
216
217 /*
218 * Perform a read to the pagecache from a series of sources of different types,
219 * slicing up the region to be read according to available cache blocks and
220 * network rsize.
221 */
netfs_read_to_pagecache(struct netfs_io_request * rreq)222 static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
223 {
224 struct netfs_inode *ictx = netfs_inode(rreq->inode);
225 unsigned long long start = rreq->start;
226 ssize_t size = rreq->len;
227 int ret = 0;
228
229 atomic_inc(&rreq->nr_outstanding);
230
231 do {
232 struct netfs_io_subrequest *subreq;
233 enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
234 ssize_t slice;
235
236 subreq = netfs_alloc_subrequest(rreq);
237 if (!subreq) {
238 ret = -ENOMEM;
239 break;
240 }
241
242 subreq->start = start;
243 subreq->len = size;
244
245 atomic_inc(&rreq->nr_outstanding);
246 spin_lock_bh(&rreq->lock);
247 list_add_tail(&subreq->rreq_link, &rreq->subrequests);
248 subreq->prev_donated = rreq->prev_donated;
249 rreq->prev_donated = 0;
250 trace_netfs_sreq(subreq, netfs_sreq_trace_added);
251 spin_unlock_bh(&rreq->lock);
252
253 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
254 subreq->source = source;
255 if (source == NETFS_DOWNLOAD_FROM_SERVER) {
256 unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
257 size_t len = subreq->len;
258
259 if (subreq->start >= zp) {
260 subreq->source = source = NETFS_FILL_WITH_ZEROES;
261 goto fill_with_zeroes;
262 }
263
264 if (len > zp - subreq->start)
265 len = zp - subreq->start;
266 if (len == 0) {
267 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
268 rreq->debug_id, subreq->debug_index,
269 subreq->len, size,
270 subreq->start, ictx->zero_point, rreq->i_size);
271 break;
272 }
273 subreq->len = len;
274
275 netfs_stat(&netfs_n_rh_download);
276 if (rreq->netfs_ops->prepare_read) {
277 ret = rreq->netfs_ops->prepare_read(subreq);
278 if (ret < 0) {
279 atomic_dec(&rreq->nr_outstanding);
280 netfs_put_subrequest(subreq, false,
281 netfs_sreq_trace_put_cancel);
282 break;
283 }
284 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
285 }
286
287 slice = netfs_prepare_read_iterator(subreq);
288 if (slice < 0) {
289 atomic_dec(&rreq->nr_outstanding);
290 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
291 ret = slice;
292 break;
293 }
294
295 rreq->netfs_ops->issue_read(subreq);
296 goto done;
297 }
298
299 fill_with_zeroes:
300 if (source == NETFS_FILL_WITH_ZEROES) {
301 subreq->source = NETFS_FILL_WITH_ZEROES;
302 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
303 netfs_stat(&netfs_n_rh_zero);
304 slice = netfs_prepare_read_iterator(subreq);
305 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
306 netfs_read_subreq_terminated(subreq, 0, false);
307 goto done;
308 }
309
310 if (source == NETFS_READ_FROM_CACHE) {
311 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
312 slice = netfs_prepare_read_iterator(subreq);
313 netfs_read_cache_to_pagecache(rreq, subreq);
314 goto done;
315 }
316
317 pr_err("Unexpected read source %u\n", source);
318 WARN_ON_ONCE(1);
319 break;
320
321 done:
322 size -= slice;
323 start += slice;
324 cond_resched();
325 } while (size > 0);
326
327 if (atomic_dec_and_test(&rreq->nr_outstanding))
328 netfs_rreq_terminated(rreq, false);
329
330 /* Defer error return as we may need to wait for outstanding I/O. */
331 cmpxchg(&rreq->error, 0, ret);
332 }
333
334 /*
335 * Wait for the read operation to complete, successfully or otherwise.
336 */
netfs_wait_for_read(struct netfs_io_request * rreq)337 static int netfs_wait_for_read(struct netfs_io_request *rreq)
338 {
339 int ret;
340
341 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
342 wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
343 ret = rreq->error;
344 if (ret == 0 && rreq->submitted < rreq->len) {
345 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
346 ret = -EIO;
347 }
348
349 return ret;
350 }
351
352 /*
353 * Set up the initial folioq of buffer folios in the rolling buffer and set the
354 * iterator to refer to it.
355 */
netfs_prime_buffer(struct netfs_io_request * rreq)356 static int netfs_prime_buffer(struct netfs_io_request *rreq)
357 {
358 struct folio_queue *folioq;
359 struct folio_batch put_batch;
360 size_t added;
361
362 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
363 if (!folioq)
364 return -ENOMEM;
365 netfs_stat(&netfs_n_folioq);
366 folioq_init(folioq);
367 rreq->buffer = folioq;
368 rreq->buffer_tail = folioq;
369 rreq->submitted = rreq->start;
370 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
371
372 folio_batch_init(&put_batch);
373 added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
374 folio_batch_release(&put_batch);
375 rreq->iter.count += added;
376 rreq->submitted += added;
377 return 0;
378 }
379
380 /**
381 * netfs_readahead - Helper to manage a read request
382 * @ractl: The description of the readahead request
383 *
384 * Fulfil a readahead request by drawing data from the cache if possible, or
385 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
386 * requests from different sources will get munged together. If necessary, the
387 * readahead window can be expanded in either direction to a more convenient
388 * alighment for RPC efficiency or to make storage in the cache feasible.
389 *
390 * The calling netfs must initialise a netfs context contiguous to the vfs
391 * inode before calling this.
392 *
393 * This is usable whether or not caching is enabled.
394 */
netfs_readahead(struct readahead_control * ractl)395 void netfs_readahead(struct readahead_control *ractl)
396 {
397 struct netfs_io_request *rreq;
398 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
399 unsigned long long start = readahead_pos(ractl);
400 size_t size = readahead_length(ractl);
401 int ret;
402
403 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
404 NETFS_READAHEAD);
405 if (IS_ERR(rreq))
406 return;
407
408 ret = netfs_begin_cache_read(rreq, ictx);
409 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
410 goto cleanup_free;
411
412 netfs_stat(&netfs_n_rh_readahead);
413 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
414 netfs_read_trace_readahead);
415
416 netfs_rreq_expand(rreq, ractl);
417
418 rreq->ractl = ractl;
419 if (netfs_prime_buffer(rreq) < 0)
420 goto cleanup_free;
421 netfs_read_to_pagecache(rreq);
422
423 netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
424 return;
425
426 cleanup_free:
427 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
428 return;
429 }
430 EXPORT_SYMBOL(netfs_readahead);
431
432 /*
433 * Create a rolling buffer with a single occupying folio.
434 */
netfs_create_singular_buffer(struct netfs_io_request * rreq,struct folio * folio)435 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio)
436 {
437 struct folio_queue *folioq;
438
439 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
440 if (!folioq)
441 return -ENOMEM;
442
443 netfs_stat(&netfs_n_folioq);
444 folioq_init(folioq);
445 folioq_append(folioq, folio);
446 BUG_ON(folioq_folio(folioq, 0) != folio);
447 BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio));
448 rreq->buffer = folioq;
449 rreq->buffer_tail = folioq;
450 rreq->submitted = rreq->start + rreq->len;
451 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len);
452 rreq->ractl = (struct readahead_control *)1UL;
453 return 0;
454 }
455
456 /*
457 * Read into gaps in a folio partially filled by a streaming write.
458 */
netfs_read_gaps(struct file * file,struct folio * folio)459 static int netfs_read_gaps(struct file *file, struct folio *folio)
460 {
461 struct netfs_io_request *rreq;
462 struct address_space *mapping = folio->mapping;
463 struct netfs_folio *finfo = netfs_folio_info(folio);
464 struct netfs_inode *ctx = netfs_inode(mapping->host);
465 struct folio *sink = NULL;
466 struct bio_vec *bvec;
467 unsigned int from = finfo->dirty_offset;
468 unsigned int to = from + finfo->dirty_len;
469 unsigned int off = 0, i = 0;
470 size_t flen = folio_size(folio);
471 size_t nr_bvec = flen / PAGE_SIZE + 2;
472 size_t part;
473 int ret;
474
475 _enter("%lx", folio->index);
476
477 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
478 if (IS_ERR(rreq)) {
479 ret = PTR_ERR(rreq);
480 goto alloc_error;
481 }
482
483 ret = netfs_begin_cache_read(rreq, ctx);
484 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
485 goto discard;
486
487 netfs_stat(&netfs_n_rh_read_folio);
488 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
489
490 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the
491 * end get copied to, but the middle is discarded.
492 */
493 ret = -ENOMEM;
494 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
495 if (!bvec)
496 goto discard;
497
498 sink = folio_alloc(GFP_KERNEL, 0);
499 if (!sink) {
500 kfree(bvec);
501 goto discard;
502 }
503
504 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
505
506 rreq->direct_bv = bvec;
507 rreq->direct_bv_count = nr_bvec;
508 if (from > 0) {
509 bvec_set_folio(&bvec[i++], folio, from, 0);
510 off = from;
511 }
512 while (off < to) {
513 part = min_t(size_t, to - off, PAGE_SIZE);
514 bvec_set_folio(&bvec[i++], sink, part, 0);
515 off += part;
516 }
517 if (to < flen)
518 bvec_set_folio(&bvec[i++], folio, flen - to, to);
519 iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
520 rreq->submitted = rreq->start + flen;
521
522 netfs_read_to_pagecache(rreq);
523
524 if (sink)
525 folio_put(sink);
526
527 ret = netfs_wait_for_read(rreq);
528 if (ret == 0) {
529 flush_dcache_folio(folio);
530 folio_mark_uptodate(folio);
531 }
532 folio_unlock(folio);
533 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
534 return ret < 0 ? ret : 0;
535
536 discard:
537 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
538 alloc_error:
539 folio_unlock(folio);
540 return ret;
541 }
542
543 /**
544 * netfs_read_folio - Helper to manage a read_folio request
545 * @file: The file to read from
546 * @folio: The folio to read
547 *
548 * Fulfil a read_folio request by drawing data from the cache if
549 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
550 * Multiple I/O requests from different sources will get munged together.
551 *
552 * The calling netfs must initialise a netfs context contiguous to the vfs
553 * inode before calling this.
554 *
555 * This is usable whether or not caching is enabled.
556 */
netfs_read_folio(struct file * file,struct folio * folio)557 int netfs_read_folio(struct file *file, struct folio *folio)
558 {
559 struct address_space *mapping = folio->mapping;
560 struct netfs_io_request *rreq;
561 struct netfs_inode *ctx = netfs_inode(mapping->host);
562 int ret;
563
564 if (folio_test_dirty(folio)) {
565 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
566 return netfs_read_gaps(file, folio);
567 }
568
569 _enter("%lx", folio->index);
570
571 rreq = netfs_alloc_request(mapping, file,
572 folio_pos(folio), folio_size(folio),
573 NETFS_READPAGE);
574 if (IS_ERR(rreq)) {
575 ret = PTR_ERR(rreq);
576 goto alloc_error;
577 }
578
579 ret = netfs_begin_cache_read(rreq, ctx);
580 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
581 goto discard;
582
583 netfs_stat(&netfs_n_rh_read_folio);
584 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
585
586 /* Set up the output buffer */
587 ret = netfs_create_singular_buffer(rreq, folio);
588 if (ret < 0)
589 goto discard;
590
591 netfs_read_to_pagecache(rreq);
592 ret = netfs_wait_for_read(rreq);
593 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
594 return ret < 0 ? ret : 0;
595
596 discard:
597 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
598 alloc_error:
599 folio_unlock(folio);
600 return ret;
601 }
602 EXPORT_SYMBOL(netfs_read_folio);
603
604 /*
605 * Prepare a folio for writing without reading first
606 * @folio: The folio being prepared
607 * @pos: starting position for the write
608 * @len: length of write
609 * @always_fill: T if the folio should always be completely filled/cleared
610 *
611 * In some cases, write_begin doesn't need to read at all:
612 * - full folio write
613 * - write that lies in a folio that is completely beyond EOF
614 * - write that covers the folio from start to EOF or beyond it
615 *
616 * If any of these criteria are met, then zero out the unwritten parts
617 * of the folio and return true. Otherwise, return false.
618 */
netfs_skip_folio_read(struct folio * folio,loff_t pos,size_t len,bool always_fill)619 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
620 bool always_fill)
621 {
622 struct inode *inode = folio_inode(folio);
623 loff_t i_size = i_size_read(inode);
624 size_t offset = offset_in_folio(folio, pos);
625 size_t plen = folio_size(folio);
626
627 if (unlikely(always_fill)) {
628 if (pos - offset + len <= i_size)
629 return false; /* Page entirely before EOF */
630 zero_user_segment(&folio->page, 0, plen);
631 folio_mark_uptodate(folio);
632 return true;
633 }
634
635 /* Full folio write */
636 if (offset == 0 && len >= plen)
637 return true;
638
639 /* Page entirely beyond the end of the file */
640 if (pos - offset >= i_size)
641 goto zero_out;
642
643 /* Write that covers from the start of the folio to EOF or beyond */
644 if (offset == 0 && (pos + len) >= i_size)
645 goto zero_out;
646
647 return false;
648 zero_out:
649 zero_user_segments(&folio->page, 0, offset, offset + len, plen);
650 return true;
651 }
652
653 /**
654 * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
655 * @ctx: The netfs context
656 * @file: The file to read from
657 * @mapping: The mapping to read from
658 * @pos: File position at which the write will begin
659 * @len: The length of the write (may extend beyond the end of the folio chosen)
660 * @_folio: Where to put the resultant folio
661 * @_fsdata: Place for the netfs to store a cookie
662 *
663 * Pre-read data for a write-begin request by drawing data from the cache if
664 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
665 * Multiple I/O requests from different sources will get munged together.
666 *
667 * The calling netfs must provide a table of operations, only one of which,
668 * issue_read, is mandatory.
669 *
670 * The check_write_begin() operation can be provided to check for and flush
671 * conflicting writes once the folio is grabbed and locked. It is passed a
672 * pointer to the fsdata cookie that gets returned to the VM to be passed to
673 * write_end. It is permitted to sleep. It should return 0 if the request
674 * should go ahead or it may return an error. It may also unlock and put the
675 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
676 * will cause the folio to be re-got and the process to be retried.
677 *
678 * The calling netfs must initialise a netfs context contiguous to the vfs
679 * inode before calling this.
680 *
681 * This is usable whether or not caching is enabled.
682 *
683 * Note that this should be considered deprecated and netfs_perform_write()
684 * used instead.
685 */
netfs_write_begin(struct netfs_inode * ctx,struct file * file,struct address_space * mapping,loff_t pos,unsigned int len,struct folio ** _folio,void ** _fsdata)686 int netfs_write_begin(struct netfs_inode *ctx,
687 struct file *file, struct address_space *mapping,
688 loff_t pos, unsigned int len, struct folio **_folio,
689 void **_fsdata)
690 {
691 struct netfs_io_request *rreq;
692 struct folio *folio;
693 pgoff_t index = pos >> PAGE_SHIFT;
694 int ret;
695
696 retry:
697 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
698 mapping_gfp_mask(mapping));
699 if (IS_ERR(folio))
700 return PTR_ERR(folio);
701
702 if (ctx->ops->check_write_begin) {
703 /* Allow the netfs (eg. ceph) to flush conflicts. */
704 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
705 if (ret < 0) {
706 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
707 goto error;
708 }
709 if (!folio)
710 goto retry;
711 }
712
713 if (folio_test_uptodate(folio))
714 goto have_folio;
715
716 /* If the page is beyond the EOF, we want to clear it - unless it's
717 * within the cache granule containing the EOF, in which case we need
718 * to preload the granule.
719 */
720 if (!netfs_is_cache_enabled(ctx) &&
721 netfs_skip_folio_read(folio, pos, len, false)) {
722 netfs_stat(&netfs_n_rh_write_zskip);
723 goto have_folio_no_wait;
724 }
725
726 rreq = netfs_alloc_request(mapping, file,
727 folio_pos(folio), folio_size(folio),
728 NETFS_READ_FOR_WRITE);
729 if (IS_ERR(rreq)) {
730 ret = PTR_ERR(rreq);
731 goto error;
732 }
733 rreq->no_unlock_folio = folio->index;
734 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
735
736 ret = netfs_begin_cache_read(rreq, ctx);
737 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
738 goto error_put;
739
740 netfs_stat(&netfs_n_rh_write_begin);
741 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
742
743 /* Set up the output buffer */
744 ret = netfs_create_singular_buffer(rreq, folio);
745 if (ret < 0)
746 goto error_put;
747
748 netfs_read_to_pagecache(rreq);
749 ret = netfs_wait_for_read(rreq);
750 if (ret < 0)
751 goto error;
752 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
753
754 have_folio:
755 ret = folio_wait_private_2_killable(folio);
756 if (ret < 0)
757 goto error;
758 have_folio_no_wait:
759 *_folio = folio;
760 _leave(" = 0");
761 return 0;
762
763 error_put:
764 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
765 error:
766 if (folio) {
767 folio_unlock(folio);
768 folio_put(folio);
769 }
770 _leave(" = %d", ret);
771 return ret;
772 }
773 EXPORT_SYMBOL(netfs_write_begin);
774
775 /*
776 * Preload the data into a page we're proposing to write into.
777 */
netfs_prefetch_for_write(struct file * file,struct folio * folio,size_t offset,size_t len)778 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
779 size_t offset, size_t len)
780 {
781 struct netfs_io_request *rreq;
782 struct address_space *mapping = folio->mapping;
783 struct netfs_inode *ctx = netfs_inode(mapping->host);
784 unsigned long long start = folio_pos(folio);
785 size_t flen = folio_size(folio);
786 int ret;
787
788 _enter("%zx @%llx", flen, start);
789
790 ret = -ENOMEM;
791
792 rreq = netfs_alloc_request(mapping, file, start, flen,
793 NETFS_READ_FOR_WRITE);
794 if (IS_ERR(rreq)) {
795 ret = PTR_ERR(rreq);
796 goto error;
797 }
798
799 rreq->no_unlock_folio = folio->index;
800 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
801 ret = netfs_begin_cache_read(rreq, ctx);
802 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
803 goto error_put;
804
805 netfs_stat(&netfs_n_rh_write_begin);
806 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
807
808 /* Set up the output buffer */
809 ret = netfs_create_singular_buffer(rreq, folio);
810 if (ret < 0)
811 goto error_put;
812
813 folioq_mark2(rreq->buffer, 0);
814 netfs_read_to_pagecache(rreq);
815 ret = netfs_wait_for_read(rreq);
816 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
817 return ret;
818
819 error_put:
820 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
821 error:
822 _leave(" = %d", ret);
823 return ret;
824 }
825
826 /**
827 * netfs_buffered_read_iter - Filesystem buffered I/O read routine
828 * @iocb: kernel I/O control block
829 * @iter: destination for the data read
830 *
831 * This is the ->read_iter() routine for all filesystems that can use the page
832 * cache directly.
833 *
834 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
835 * returned when no data can be read without waiting for I/O requests to
836 * complete; it doesn't prevent readahead.
837 *
838 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
839 * shall be made for the read or for readahead. When no data can be read,
840 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
841 * possibly empty read shall be returned.
842 *
843 * Return:
844 * * number of bytes copied, even for partial reads
845 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
846 */
netfs_buffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)847 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
848 {
849 struct inode *inode = file_inode(iocb->ki_filp);
850 struct netfs_inode *ictx = netfs_inode(inode);
851 ssize_t ret;
852
853 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
854 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
855 return -EINVAL;
856
857 ret = netfs_start_io_read(inode);
858 if (ret == 0) {
859 ret = filemap_read(iocb, iter, 0);
860 netfs_end_io_read(inode);
861 }
862 return ret;
863 }
864 EXPORT_SYMBOL(netfs_buffered_read_iter);
865
866 /**
867 * netfs_file_read_iter - Generic filesystem read routine
868 * @iocb: kernel I/O control block
869 * @iter: destination for the data read
870 *
871 * This is the ->read_iter() routine for all filesystems that can use the page
872 * cache directly.
873 *
874 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
875 * returned when no data can be read without waiting for I/O requests to
876 * complete; it doesn't prevent readahead.
877 *
878 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
879 * shall be made for the read or for readahead. When no data can be read,
880 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
881 * possibly empty read shall be returned.
882 *
883 * Return:
884 * * number of bytes copied, even for partial reads
885 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
886 */
netfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)887 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
888 {
889 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
890
891 if ((iocb->ki_flags & IOCB_DIRECT) ||
892 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
893 return netfs_unbuffered_read_iter(iocb, iter);
894
895 return netfs_buffered_read_iter(iocb, iter);
896 }
897 EXPORT_SYMBOL(netfs_file_read_iter);
898