1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
4   *
5   * Scatterlist handling helpers.
6   */
7  #include <linux/export.h>
8  #include <linux/slab.h>
9  #include <linux/scatterlist.h>
10  #include <linux/highmem.h>
11  #include <linux/kmemleak.h>
12  #include <linux/bvec.h>
13  #include <linux/uio.h>
14  #include <linux/folio_queue.h>
15  
16  /**
17   * sg_next - return the next scatterlist entry in a list
18   * @sg:		The current sg entry
19   *
20   * Description:
21   *   Usually the next entry will be @sg@ + 1, but if this sg element is part
22   *   of a chained scatterlist, it could jump to the start of a new
23   *   scatterlist array.
24   *
25   **/
sg_next(struct scatterlist * sg)26  struct scatterlist *sg_next(struct scatterlist *sg)
27  {
28  	if (sg_is_last(sg))
29  		return NULL;
30  
31  	sg++;
32  	if (unlikely(sg_is_chain(sg)))
33  		sg = sg_chain_ptr(sg);
34  
35  	return sg;
36  }
37  EXPORT_SYMBOL(sg_next);
38  
39  /**
40   * sg_nents - return total count of entries in scatterlist
41   * @sg:		The scatterlist
42   *
43   * Description:
44   * Allows to know how many entries are in sg, taking into account
45   * chaining as well
46   *
47   **/
sg_nents(struct scatterlist * sg)48  int sg_nents(struct scatterlist *sg)
49  {
50  	int nents;
51  	for (nents = 0; sg; sg = sg_next(sg))
52  		nents++;
53  	return nents;
54  }
55  EXPORT_SYMBOL(sg_nents);
56  
57  /**
58   * sg_nents_for_len - return total count of entries in scatterlist
59   *                    needed to satisfy the supplied length
60   * @sg:		The scatterlist
61   * @len:	The total required length
62   *
63   * Description:
64   * Determines the number of entries in sg that are required to meet
65   * the supplied length, taking into account chaining as well
66   *
67   * Returns:
68   *   the number of sg entries needed, negative error on failure
69   *
70   **/
sg_nents_for_len(struct scatterlist * sg,u64 len)71  int sg_nents_for_len(struct scatterlist *sg, u64 len)
72  {
73  	int nents;
74  	u64 total;
75  
76  	if (!len)
77  		return 0;
78  
79  	for (nents = 0, total = 0; sg; sg = sg_next(sg)) {
80  		nents++;
81  		total += sg->length;
82  		if (total >= len)
83  			return nents;
84  	}
85  
86  	return -EINVAL;
87  }
88  EXPORT_SYMBOL(sg_nents_for_len);
89  
90  /**
91   * sg_last - return the last scatterlist entry in a list
92   * @sgl:	First entry in the scatterlist
93   * @nents:	Number of entries in the scatterlist
94   *
95   * Description:
96   *   Should only be used casually, it (currently) scans the entire list
97   *   to get the last entry.
98   *
99   *   Note that the @sgl@ pointer passed in need not be the first one,
100   *   the important bit is that @nents@ denotes the number of entries that
101   *   exist from @sgl@.
102   *
103   **/
sg_last(struct scatterlist * sgl,unsigned int nents)104  struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
105  {
106  	struct scatterlist *sg, *ret = NULL;
107  	unsigned int i;
108  
109  	for_each_sg(sgl, sg, nents, i)
110  		ret = sg;
111  
112  	BUG_ON(!sg_is_last(ret));
113  	return ret;
114  }
115  EXPORT_SYMBOL(sg_last);
116  
117  /**
118   * sg_init_table - Initialize SG table
119   * @sgl:	   The SG table
120   * @nents:	   Number of entries in table
121   *
122   * Notes:
123   *   If this is part of a chained sg table, sg_mark_end() should be
124   *   used only on the last table part.
125   *
126   **/
sg_init_table(struct scatterlist * sgl,unsigned int nents)127  void sg_init_table(struct scatterlist *sgl, unsigned int nents)
128  {
129  	memset(sgl, 0, sizeof(*sgl) * nents);
130  	sg_init_marker(sgl, nents);
131  }
132  EXPORT_SYMBOL(sg_init_table);
133  
134  /**
135   * sg_init_one - Initialize a single entry sg list
136   * @sg:		 SG entry
137   * @buf:	 Virtual address for IO
138   * @buflen:	 IO length
139   *
140   **/
sg_init_one(struct scatterlist * sg,const void * buf,unsigned int buflen)141  void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
142  {
143  	sg_init_table(sg, 1);
144  	sg_set_buf(sg, buf, buflen);
145  }
146  EXPORT_SYMBOL(sg_init_one);
147  
148  /*
149   * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
150   * helpers.
151   */
sg_kmalloc(unsigned int nents,gfp_t gfp_mask)152  static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
153  {
154  	if (nents == SG_MAX_SINGLE_ALLOC) {
155  		/*
156  		 * Kmemleak doesn't track page allocations as they are not
157  		 * commonly used (in a raw form) for kernel data structures.
158  		 * As we chain together a list of pages and then a normal
159  		 * kmalloc (tracked by kmemleak), in order to for that last
160  		 * allocation not to become decoupled (and thus a
161  		 * false-positive) we need to inform kmemleak of all the
162  		 * intermediate allocations.
163  		 */
164  		void *ptr = (void *) __get_free_page(gfp_mask);
165  		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
166  		return ptr;
167  	} else
168  		return kmalloc_array(nents, sizeof(struct scatterlist),
169  				     gfp_mask);
170  }
171  
sg_kfree(struct scatterlist * sg,unsigned int nents)172  static void sg_kfree(struct scatterlist *sg, unsigned int nents)
173  {
174  	if (nents == SG_MAX_SINGLE_ALLOC) {
175  		kmemleak_free(sg);
176  		free_page((unsigned long) sg);
177  	} else
178  		kfree(sg);
179  }
180  
181  /**
182   * __sg_free_table - Free a previously mapped sg table
183   * @table:	The sg table header to use
184   * @max_ents:	The maximum number of entries per single scatterlist
185   * @nents_first_chunk: Number of entries int the (preallocated) first
186   * 	scatterlist chunk, 0 means no such preallocated first chunk
187   * @free_fn:	Free function
188   * @num_ents:	Number of entries in the table
189   *
190   *  Description:
191   *    Free an sg table previously allocated and setup with
192   *    __sg_alloc_table().  The @max_ents value must be identical to
193   *    that previously used with __sg_alloc_table().
194   *
195   **/
__sg_free_table(struct sg_table * table,unsigned int max_ents,unsigned int nents_first_chunk,sg_free_fn * free_fn,unsigned int num_ents)196  void __sg_free_table(struct sg_table *table, unsigned int max_ents,
197  		     unsigned int nents_first_chunk, sg_free_fn *free_fn,
198  		     unsigned int num_ents)
199  {
200  	struct scatterlist *sgl, *next;
201  	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
202  
203  	if (unlikely(!table->sgl))
204  		return;
205  
206  	sgl = table->sgl;
207  	while (num_ents) {
208  		unsigned int alloc_size = num_ents;
209  		unsigned int sg_size;
210  
211  		/*
212  		 * If we have more than max_ents segments left,
213  		 * then assign 'next' to the sg table after the current one.
214  		 * sg_size is then one less than alloc size, since the last
215  		 * element is the chain pointer.
216  		 */
217  		if (alloc_size > curr_max_ents) {
218  			next = sg_chain_ptr(&sgl[curr_max_ents - 1]);
219  			alloc_size = curr_max_ents;
220  			sg_size = alloc_size - 1;
221  		} else {
222  			sg_size = alloc_size;
223  			next = NULL;
224  		}
225  
226  		num_ents -= sg_size;
227  		if (nents_first_chunk)
228  			nents_first_chunk = 0;
229  		else
230  			free_fn(sgl, alloc_size);
231  		sgl = next;
232  		curr_max_ents = max_ents;
233  	}
234  
235  	table->sgl = NULL;
236  }
237  EXPORT_SYMBOL(__sg_free_table);
238  
239  /**
240   * sg_free_append_table - Free a previously allocated append sg table.
241   * @table:	 The mapped sg append table header
242   *
243   **/
sg_free_append_table(struct sg_append_table * table)244  void sg_free_append_table(struct sg_append_table *table)
245  {
246  	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
247  			table->total_nents);
248  }
249  EXPORT_SYMBOL(sg_free_append_table);
250  
251  
252  /**
253   * sg_free_table - Free a previously allocated sg table
254   * @table:	The mapped sg table header
255   *
256   **/
sg_free_table(struct sg_table * table)257  void sg_free_table(struct sg_table *table)
258  {
259  	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
260  			table->orig_nents);
261  }
262  EXPORT_SYMBOL(sg_free_table);
263  
264  /**
265   * __sg_alloc_table - Allocate and initialize an sg table with given allocator
266   * @table:	The sg table header to use
267   * @nents:	Number of entries in sg list
268   * @max_ents:	The maximum number of entries the allocator returns per call
269   * @first_chunk: first SGL if preallocated (may be %NULL)
270   * @nents_first_chunk: Number of entries in the (preallocated) first
271   * 	scatterlist chunk, 0 means no such preallocated chunk provided by user
272   * @gfp_mask:	GFP allocation mask
273   * @alloc_fn:	Allocator to use
274   *
275   * Description:
276   *   This function returns a @table @nents long. The allocator is
277   *   defined to return scatterlist chunks of maximum size @max_ents.
278   *   Thus if @nents is bigger than @max_ents, the scatterlists will be
279   *   chained in units of @max_ents.
280   *
281   * Notes:
282   *   If this function returns non-0 (eg failure), the caller must call
283   *   __sg_free_table() to cleanup any leftover allocations.
284   *
285   **/
__sg_alloc_table(struct sg_table * table,unsigned int nents,unsigned int max_ents,struct scatterlist * first_chunk,unsigned int nents_first_chunk,gfp_t gfp_mask,sg_alloc_fn * alloc_fn)286  int __sg_alloc_table(struct sg_table *table, unsigned int nents,
287  		     unsigned int max_ents, struct scatterlist *first_chunk,
288  		     unsigned int nents_first_chunk, gfp_t gfp_mask,
289  		     sg_alloc_fn *alloc_fn)
290  {
291  	struct scatterlist *sg, *prv;
292  	unsigned int left;
293  	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
294  	unsigned prv_max_ents;
295  
296  	memset(table, 0, sizeof(*table));
297  
298  	if (nents == 0)
299  		return -EINVAL;
300  #ifdef CONFIG_ARCH_NO_SG_CHAIN
301  	if (WARN_ON_ONCE(nents > max_ents))
302  		return -EINVAL;
303  #endif
304  
305  	left = nents;
306  	prv = NULL;
307  	do {
308  		unsigned int sg_size, alloc_size = left;
309  
310  		if (alloc_size > curr_max_ents) {
311  			alloc_size = curr_max_ents;
312  			sg_size = alloc_size - 1;
313  		} else
314  			sg_size = alloc_size;
315  
316  		left -= sg_size;
317  
318  		if (first_chunk) {
319  			sg = first_chunk;
320  			first_chunk = NULL;
321  		} else {
322  			sg = alloc_fn(alloc_size, gfp_mask);
323  		}
324  		if (unlikely(!sg)) {
325  			/*
326  			 * Adjust entry count to reflect that the last
327  			 * entry of the previous table won't be used for
328  			 * linkage.  Without this, sg_kfree() may get
329  			 * confused.
330  			 */
331  			if (prv)
332  				table->nents = ++table->orig_nents;
333  
334  			return -ENOMEM;
335  		}
336  
337  		sg_init_table(sg, alloc_size);
338  		table->nents = table->orig_nents += sg_size;
339  
340  		/*
341  		 * If this is the first mapping, assign the sg table header.
342  		 * If this is not the first mapping, chain previous part.
343  		 */
344  		if (prv)
345  			sg_chain(prv, prv_max_ents, sg);
346  		else
347  			table->sgl = sg;
348  
349  		/*
350  		 * If no more entries after this one, mark the end
351  		 */
352  		if (!left)
353  			sg_mark_end(&sg[sg_size - 1]);
354  
355  		prv = sg;
356  		prv_max_ents = curr_max_ents;
357  		curr_max_ents = max_ents;
358  	} while (left);
359  
360  	return 0;
361  }
362  EXPORT_SYMBOL(__sg_alloc_table);
363  
364  /**
365   * sg_alloc_table - Allocate and initialize an sg table
366   * @table:	The sg table header to use
367   * @nents:	Number of entries in sg list
368   * @gfp_mask:	GFP allocation mask
369   *
370   *  Description:
371   *    Allocate and initialize an sg table. If @nents@ is larger than
372   *    SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
373   *
374   **/
sg_alloc_table(struct sg_table * table,unsigned int nents,gfp_t gfp_mask)375  int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
376  {
377  	int ret;
378  
379  	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
380  			       NULL, 0, gfp_mask, sg_kmalloc);
381  	if (unlikely(ret))
382  		sg_free_table(table);
383  	return ret;
384  }
385  EXPORT_SYMBOL(sg_alloc_table);
386  
get_next_sg(struct sg_append_table * table,struct scatterlist * cur,unsigned long needed_sges,gfp_t gfp_mask)387  static struct scatterlist *get_next_sg(struct sg_append_table *table,
388  				       struct scatterlist *cur,
389  				       unsigned long needed_sges,
390  				       gfp_t gfp_mask)
391  {
392  	struct scatterlist *new_sg, *next_sg;
393  	unsigned int alloc_size;
394  
395  	if (cur) {
396  		next_sg = sg_next(cur);
397  		/* Check if last entry should be keeped for chainning */
398  		if (!sg_is_last(next_sg) || needed_sges == 1)
399  			return next_sg;
400  	}
401  
402  	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
403  	new_sg = sg_kmalloc(alloc_size, gfp_mask);
404  	if (!new_sg)
405  		return ERR_PTR(-ENOMEM);
406  	sg_init_table(new_sg, alloc_size);
407  	if (cur) {
408  		table->total_nents += alloc_size - 1;
409  		__sg_chain(next_sg, new_sg);
410  	} else {
411  		table->sgt.sgl = new_sg;
412  		table->total_nents = alloc_size;
413  	}
414  	return new_sg;
415  }
416  
pages_are_mergeable(struct page * a,struct page * b)417  static bool pages_are_mergeable(struct page *a, struct page *b)
418  {
419  	if (page_to_pfn(a) != page_to_pfn(b) + 1)
420  		return false;
421  	if (!zone_device_pages_have_same_pgmap(a, b))
422  		return false;
423  	return true;
424  }
425  
426  /**
427   * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
428   *                                    table from an array of pages
429   * @sgt_append:  The sg append table to use
430   * @pages:       Pointer to an array of page pointers
431   * @n_pages:     Number of pages in the pages array
432   * @offset:      Offset from start of the first page to the start of a buffer
433   * @size:        Number of valid bytes in the buffer (after offset)
434   * @max_segment: Maximum size of a scatterlist element in bytes
435   * @left_pages:  Left pages caller have to set after this call
436   * @gfp_mask:	 GFP allocation mask
437   *
438   * Description:
439   *    In the first call it allocate and initialize an sg table from a list of
440   *    pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
441   *    the pages are squashed into a single scatterlist entry up to the maximum
442   *    size specified in @max_segment.  A user may provide an offset at a start
443   *    and a size of valid data in a buffer specified by the page array. The
444   *    returned sg table is released by sg_free_append_table
445   *
446   * Returns:
447   *   0 on success, negative error on failure
448   *
449   * Notes:
450   *   If this function returns non-0 (eg failure), the caller must call
451   *   sg_free_append_table() to cleanup any leftover allocations.
452   *
453   *   In the fist call, sgt_append must by initialized.
454   */
sg_alloc_append_table_from_pages(struct sg_append_table * sgt_append,struct page ** pages,unsigned int n_pages,unsigned int offset,unsigned long size,unsigned int max_segment,unsigned int left_pages,gfp_t gfp_mask)455  int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
456  		struct page **pages, unsigned int n_pages, unsigned int offset,
457  		unsigned long size, unsigned int max_segment,
458  		unsigned int left_pages, gfp_t gfp_mask)
459  {
460  	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
461  	unsigned int added_nents = 0;
462  	struct scatterlist *s = sgt_append->prv;
463  	struct page *last_pg;
464  
465  	/*
466  	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
467  	 * otherwise it can overshoot.
468  	 */
469  	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
470  	if (WARN_ON(max_segment < PAGE_SIZE))
471  		return -EINVAL;
472  
473  	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
474  		return -EOPNOTSUPP;
475  
476  	if (sgt_append->prv) {
477  		unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) +
478  			sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE;
479  
480  		if (WARN_ON(offset))
481  			return -EINVAL;
482  
483  		/* Merge contiguous pages into the last SG */
484  		prv_len = sgt_append->prv->length;
485  		if (page_to_pfn(pages[0]) == next_pfn) {
486  			last_pg = pfn_to_page(next_pfn - 1);
487  			while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
488  				if (sgt_append->prv->length + PAGE_SIZE > max_segment)
489  					break;
490  				sgt_append->prv->length += PAGE_SIZE;
491  				last_pg = pages[0];
492  				pages++;
493  				n_pages--;
494  			}
495  			if (!n_pages)
496  				goto out;
497  		}
498  	}
499  
500  	/* compute number of contiguous chunks */
501  	chunks = 1;
502  	seg_len = 0;
503  	for (i = 1; i < n_pages; i++) {
504  		seg_len += PAGE_SIZE;
505  		if (seg_len >= max_segment ||
506  		    !pages_are_mergeable(pages[i], pages[i - 1])) {
507  			chunks++;
508  			seg_len = 0;
509  		}
510  	}
511  
512  	/* merging chunks and putting them into the scatterlist */
513  	cur_page = 0;
514  	for (i = 0; i < chunks; i++) {
515  		unsigned int j, chunk_size;
516  
517  		/* look for the end of the current chunk */
518  		seg_len = 0;
519  		for (j = cur_page + 1; j < n_pages; j++) {
520  			seg_len += PAGE_SIZE;
521  			if (seg_len >= max_segment ||
522  			    !pages_are_mergeable(pages[j], pages[j - 1]))
523  				break;
524  		}
525  
526  		/* Pass how many chunks might be left */
527  		s = get_next_sg(sgt_append, s, chunks - i + left_pages,
528  				gfp_mask);
529  		if (IS_ERR(s)) {
530  			/*
531  			 * Adjust entry length to be as before function was
532  			 * called.
533  			 */
534  			if (sgt_append->prv)
535  				sgt_append->prv->length = prv_len;
536  			return PTR_ERR(s);
537  		}
538  		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
539  		sg_set_page(s, pages[cur_page],
540  			    min_t(unsigned long, size, chunk_size), offset);
541  		added_nents++;
542  		size -= chunk_size;
543  		offset = 0;
544  		cur_page = j;
545  	}
546  	sgt_append->sgt.nents += added_nents;
547  	sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
548  	sgt_append->prv = s;
549  out:
550  	if (!left_pages)
551  		sg_mark_end(s);
552  	return 0;
553  }
554  EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
555  
556  /**
557   * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
558   *                                     an array of pages and given maximum
559   *                                     segment.
560   * @sgt:	 The sg table header to use
561   * @pages:	 Pointer to an array of page pointers
562   * @n_pages:	 Number of pages in the pages array
563   * @offset:      Offset from start of the first page to the start of a buffer
564   * @size:        Number of valid bytes in the buffer (after offset)
565   * @max_segment: Maximum size of a scatterlist element in bytes
566   * @gfp_mask:	 GFP allocation mask
567   *
568   *  Description:
569   *    Allocate and initialize an sg table from a list of pages. Contiguous
570   *    ranges of the pages are squashed into a single scatterlist node up to the
571   *    maximum size specified in @max_segment. A user may provide an offset at a
572   *    start and a size of valid data in a buffer specified by the page array.
573   *
574   *    The returned sg table is released by sg_free_table.
575   *
576   *  Returns:
577   *   0 on success, negative error on failure
578   */
sg_alloc_table_from_pages_segment(struct sg_table * sgt,struct page ** pages,unsigned int n_pages,unsigned int offset,unsigned long size,unsigned int max_segment,gfp_t gfp_mask)579  int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
580  				unsigned int n_pages, unsigned int offset,
581  				unsigned long size, unsigned int max_segment,
582  				gfp_t gfp_mask)
583  {
584  	struct sg_append_table append = {};
585  	int err;
586  
587  	err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
588  					       size, max_segment, 0, gfp_mask);
589  	if (err) {
590  		sg_free_append_table(&append);
591  		return err;
592  	}
593  	memcpy(sgt, &append.sgt, sizeof(*sgt));
594  	WARN_ON(append.total_nents != sgt->orig_nents);
595  	return 0;
596  }
597  EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
598  
599  #ifdef CONFIG_SGL_ALLOC
600  
601  /**
602   * sgl_alloc_order - allocate a scatterlist and its pages
603   * @length: Length in bytes of the scatterlist. Must be at least one
604   * @order: Second argument for alloc_pages()
605   * @chainable: Whether or not to allocate an extra element in the scatterlist
606   *	for scatterlist chaining purposes
607   * @gfp: Memory allocation flags
608   * @nent_p: [out] Number of entries in the scatterlist that have pages
609   *
610   * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
611   */
sgl_alloc_order(unsigned long long length,unsigned int order,bool chainable,gfp_t gfp,unsigned int * nent_p)612  struct scatterlist *sgl_alloc_order(unsigned long long length,
613  				    unsigned int order, bool chainable,
614  				    gfp_t gfp, unsigned int *nent_p)
615  {
616  	struct scatterlist *sgl, *sg;
617  	struct page *page;
618  	unsigned int nent, nalloc;
619  	u32 elem_len;
620  
621  	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
622  	/* Check for integer overflow */
623  	if (length > (nent << (PAGE_SHIFT + order)))
624  		return NULL;
625  	nalloc = nent;
626  	if (chainable) {
627  		/* Check for integer overflow */
628  		if (nalloc + 1 < nalloc)
629  			return NULL;
630  		nalloc++;
631  	}
632  	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
633  			    gfp & ~GFP_DMA);
634  	if (!sgl)
635  		return NULL;
636  
637  	sg_init_table(sgl, nalloc);
638  	sg = sgl;
639  	while (length) {
640  		elem_len = min_t(u64, length, PAGE_SIZE << order);
641  		page = alloc_pages(gfp, order);
642  		if (!page) {
643  			sgl_free_order(sgl, order);
644  			return NULL;
645  		}
646  
647  		sg_set_page(sg, page, elem_len, 0);
648  		length -= elem_len;
649  		sg = sg_next(sg);
650  	}
651  	WARN_ONCE(length, "length = %lld\n", length);
652  	if (nent_p)
653  		*nent_p = nent;
654  	return sgl;
655  }
656  EXPORT_SYMBOL(sgl_alloc_order);
657  
658  /**
659   * sgl_alloc - allocate a scatterlist and its pages
660   * @length: Length in bytes of the scatterlist
661   * @gfp: Memory allocation flags
662   * @nent_p: [out] Number of entries in the scatterlist
663   *
664   * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
665   */
sgl_alloc(unsigned long long length,gfp_t gfp,unsigned int * nent_p)666  struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
667  			      unsigned int *nent_p)
668  {
669  	return sgl_alloc_order(length, 0, false, gfp, nent_p);
670  }
671  EXPORT_SYMBOL(sgl_alloc);
672  
673  /**
674   * sgl_free_n_order - free a scatterlist and its pages
675   * @sgl: Scatterlist with one or more elements
676   * @nents: Maximum number of elements to free
677   * @order: Second argument for __free_pages()
678   *
679   * Notes:
680   * - If several scatterlists have been chained and each chain element is
681   *   freed separately then it's essential to set nents correctly to avoid that a
682   *   page would get freed twice.
683   * - All pages in a chained scatterlist can be freed at once by setting @nents
684   *   to a high number.
685   */
sgl_free_n_order(struct scatterlist * sgl,int nents,int order)686  void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
687  {
688  	struct scatterlist *sg;
689  	struct page *page;
690  	int i;
691  
692  	for_each_sg(sgl, sg, nents, i) {
693  		if (!sg)
694  			break;
695  		page = sg_page(sg);
696  		if (page)
697  			__free_pages(page, order);
698  	}
699  	kfree(sgl);
700  }
701  EXPORT_SYMBOL(sgl_free_n_order);
702  
703  /**
704   * sgl_free_order - free a scatterlist and its pages
705   * @sgl: Scatterlist with one or more elements
706   * @order: Second argument for __free_pages()
707   */
sgl_free_order(struct scatterlist * sgl,int order)708  void sgl_free_order(struct scatterlist *sgl, int order)
709  {
710  	sgl_free_n_order(sgl, INT_MAX, order);
711  }
712  EXPORT_SYMBOL(sgl_free_order);
713  
714  /**
715   * sgl_free - free a scatterlist and its pages
716   * @sgl: Scatterlist with one or more elements
717   */
sgl_free(struct scatterlist * sgl)718  void sgl_free(struct scatterlist *sgl)
719  {
720  	sgl_free_order(sgl, 0);
721  }
722  EXPORT_SYMBOL(sgl_free);
723  
724  #endif /* CONFIG_SGL_ALLOC */
725  
__sg_page_iter_start(struct sg_page_iter * piter,struct scatterlist * sglist,unsigned int nents,unsigned long pgoffset)726  void __sg_page_iter_start(struct sg_page_iter *piter,
727  			  struct scatterlist *sglist, unsigned int nents,
728  			  unsigned long pgoffset)
729  {
730  	piter->__pg_advance = 0;
731  	piter->__nents = nents;
732  
733  	piter->sg = sglist;
734  	piter->sg_pgoffset = pgoffset;
735  }
736  EXPORT_SYMBOL(__sg_page_iter_start);
737  
sg_page_count(struct scatterlist * sg)738  static int sg_page_count(struct scatterlist *sg)
739  {
740  	return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
741  }
742  
__sg_page_iter_next(struct sg_page_iter * piter)743  bool __sg_page_iter_next(struct sg_page_iter *piter)
744  {
745  	if (!piter->__nents || !piter->sg)
746  		return false;
747  
748  	piter->sg_pgoffset += piter->__pg_advance;
749  	piter->__pg_advance = 1;
750  
751  	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
752  		piter->sg_pgoffset -= sg_page_count(piter->sg);
753  		piter->sg = sg_next(piter->sg);
754  		if (!--piter->__nents || !piter->sg)
755  			return false;
756  	}
757  
758  	return true;
759  }
760  EXPORT_SYMBOL(__sg_page_iter_next);
761  
sg_dma_page_count(struct scatterlist * sg)762  static int sg_dma_page_count(struct scatterlist *sg)
763  {
764  	return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
765  }
766  
__sg_page_iter_dma_next(struct sg_dma_page_iter * dma_iter)767  bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
768  {
769  	struct sg_page_iter *piter = &dma_iter->base;
770  
771  	if (!piter->__nents || !piter->sg)
772  		return false;
773  
774  	piter->sg_pgoffset += piter->__pg_advance;
775  	piter->__pg_advance = 1;
776  
777  	while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
778  		piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
779  		piter->sg = sg_next(piter->sg);
780  		if (!--piter->__nents || !piter->sg)
781  			return false;
782  	}
783  
784  	return true;
785  }
786  EXPORT_SYMBOL(__sg_page_iter_dma_next);
787  
788  /**
789   * sg_miter_start - start mapping iteration over a sg list
790   * @miter: sg mapping iter to be started
791   * @sgl: sg list to iterate over
792   * @nents: number of sg entries
793   * @flags: sg iterator flags
794   *
795   * Description:
796   *   Starts mapping iterator @miter.
797   *
798   * Context:
799   *   Don't care.
800   */
sg_miter_start(struct sg_mapping_iter * miter,struct scatterlist * sgl,unsigned int nents,unsigned int flags)801  void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
802  		    unsigned int nents, unsigned int flags)
803  {
804  	memset(miter, 0, sizeof(struct sg_mapping_iter));
805  
806  	__sg_page_iter_start(&miter->piter, sgl, nents, 0);
807  	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
808  	miter->__flags = flags;
809  }
810  EXPORT_SYMBOL(sg_miter_start);
811  
sg_miter_get_next_page(struct sg_mapping_iter * miter)812  static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
813  {
814  	if (!miter->__remaining) {
815  		struct scatterlist *sg;
816  
817  		if (!__sg_page_iter_next(&miter->piter))
818  			return false;
819  
820  		sg = miter->piter.sg;
821  
822  		miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset;
823  		miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT;
824  		miter->__offset &= PAGE_SIZE - 1;
825  		miter->__remaining = sg->offset + sg->length -
826  				     (miter->piter.sg_pgoffset << PAGE_SHIFT) -
827  				     miter->__offset;
828  		miter->__remaining = min_t(unsigned long, miter->__remaining,
829  					   PAGE_SIZE - miter->__offset);
830  	}
831  
832  	return true;
833  }
834  
835  /**
836   * sg_miter_skip - reposition mapping iterator
837   * @miter: sg mapping iter to be skipped
838   * @offset: number of bytes to plus the current location
839   *
840   * Description:
841   *   Sets the offset of @miter to its current location plus @offset bytes.
842   *   If mapping iterator @miter has been proceeded by sg_miter_next(), this
843   *   stops @miter.
844   *
845   * Context:
846   *   Don't care.
847   *
848   * Returns:
849   *   true if @miter contains the valid mapping.  false if end of sg
850   *   list is reached.
851   */
sg_miter_skip(struct sg_mapping_iter * miter,off_t offset)852  bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
853  {
854  	sg_miter_stop(miter);
855  
856  	while (offset) {
857  		off_t consumed;
858  
859  		if (!sg_miter_get_next_page(miter))
860  			return false;
861  
862  		consumed = min_t(off_t, offset, miter->__remaining);
863  		miter->__offset += consumed;
864  		miter->__remaining -= consumed;
865  		offset -= consumed;
866  	}
867  
868  	return true;
869  }
870  EXPORT_SYMBOL(sg_miter_skip);
871  
872  /**
873   * sg_miter_next - proceed mapping iterator to the next mapping
874   * @miter: sg mapping iter to proceed
875   *
876   * Description:
877   *   Proceeds @miter to the next mapping.  @miter should have been started
878   *   using sg_miter_start().  On successful return, @miter->page,
879   *   @miter->addr and @miter->length point to the current mapping.
880   *
881   * Context:
882   *   May sleep if !SG_MITER_ATOMIC.
883   *
884   * Returns:
885   *   true if @miter contains the next mapping.  false if end of sg
886   *   list is reached.
887   */
sg_miter_next(struct sg_mapping_iter * miter)888  bool sg_miter_next(struct sg_mapping_iter *miter)
889  {
890  	sg_miter_stop(miter);
891  
892  	/*
893  	 * Get to the next page if necessary.
894  	 * __remaining, __offset is adjusted by sg_miter_stop
895  	 */
896  	if (!sg_miter_get_next_page(miter))
897  		return false;
898  
899  	miter->page = sg_page_iter_page(&miter->piter);
900  	miter->consumed = miter->length = miter->__remaining;
901  
902  	if (miter->__flags & SG_MITER_ATOMIC)
903  		miter->addr = kmap_atomic(miter->page) + miter->__offset;
904  	else
905  		miter->addr = kmap(miter->page) + miter->__offset;
906  
907  	return true;
908  }
909  EXPORT_SYMBOL(sg_miter_next);
910  
911  /**
912   * sg_miter_stop - stop mapping iteration
913   * @miter: sg mapping iter to be stopped
914   *
915   * Description:
916   *   Stops mapping iterator @miter.  @miter should have been started
917   *   using sg_miter_start().  A stopped iteration can be resumed by
918   *   calling sg_miter_next() on it.  This is useful when resources (kmap)
919   *   need to be released during iteration.
920   *
921   * Context:
922   *   Don't care otherwise.
923   */
sg_miter_stop(struct sg_mapping_iter * miter)924  void sg_miter_stop(struct sg_mapping_iter *miter)
925  {
926  	WARN_ON(miter->consumed > miter->length);
927  
928  	/* drop resources from the last iteration */
929  	if (miter->addr) {
930  		miter->__offset += miter->consumed;
931  		miter->__remaining -= miter->consumed;
932  
933  		if (miter->__flags & SG_MITER_TO_SG)
934  			flush_dcache_page(miter->page);
935  
936  		if (miter->__flags & SG_MITER_ATOMIC) {
937  			WARN_ON_ONCE(!pagefault_disabled());
938  			kunmap_atomic(miter->addr);
939  		} else
940  			kunmap(miter->page);
941  
942  		miter->page = NULL;
943  		miter->addr = NULL;
944  		miter->length = 0;
945  		miter->consumed = 0;
946  	}
947  }
948  EXPORT_SYMBOL(sg_miter_stop);
949  
950  /**
951   * sg_copy_buffer - Copy data between a linear buffer and an SG list
952   * @sgl:		 The SG list
953   * @nents:		 Number of SG entries
954   * @buf:		 Where to copy from
955   * @buflen:		 The number of bytes to copy
956   * @skip:		 Number of bytes to skip before copying
957   * @to_buffer:		 transfer direction (true == from an sg list to a
958   *			 buffer, false == from a buffer to an sg list)
959   *
960   * Returns the number of copied bytes.
961   *
962   **/
sg_copy_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen,off_t skip,bool to_buffer)963  size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
964  		      size_t buflen, off_t skip, bool to_buffer)
965  {
966  	unsigned int offset = 0;
967  	struct sg_mapping_iter miter;
968  	unsigned int sg_flags = SG_MITER_ATOMIC;
969  
970  	if (to_buffer)
971  		sg_flags |= SG_MITER_FROM_SG;
972  	else
973  		sg_flags |= SG_MITER_TO_SG;
974  
975  	sg_miter_start(&miter, sgl, nents, sg_flags);
976  
977  	if (!sg_miter_skip(&miter, skip))
978  		return 0;
979  
980  	while ((offset < buflen) && sg_miter_next(&miter)) {
981  		unsigned int len;
982  
983  		len = min(miter.length, buflen - offset);
984  
985  		if (to_buffer)
986  			memcpy(buf + offset, miter.addr, len);
987  		else
988  			memcpy(miter.addr, buf + offset, len);
989  
990  		offset += len;
991  	}
992  
993  	sg_miter_stop(&miter);
994  
995  	return offset;
996  }
997  EXPORT_SYMBOL(sg_copy_buffer);
998  
999  /**
1000   * sg_copy_from_buffer - Copy from a linear buffer to an SG list
1001   * @sgl:		 The SG list
1002   * @nents:		 Number of SG entries
1003   * @buf:		 Where to copy from
1004   * @buflen:		 The number of bytes to copy
1005   *
1006   * Returns the number of copied bytes.
1007   *
1008   **/
sg_copy_from_buffer(struct scatterlist * sgl,unsigned int nents,const void * buf,size_t buflen)1009  size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1010  			   const void *buf, size_t buflen)
1011  {
1012  	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false);
1013  }
1014  EXPORT_SYMBOL(sg_copy_from_buffer);
1015  
1016  /**
1017   * sg_copy_to_buffer - Copy from an SG list to a linear buffer
1018   * @sgl:		 The SG list
1019   * @nents:		 Number of SG entries
1020   * @buf:		 Where to copy to
1021   * @buflen:		 The number of bytes to copy
1022   *
1023   * Returns the number of copied bytes.
1024   *
1025   **/
sg_copy_to_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen)1026  size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1027  			 void *buf, size_t buflen)
1028  {
1029  	return sg_copy_buffer(sgl, nents, buf, buflen, 0, true);
1030  }
1031  EXPORT_SYMBOL(sg_copy_to_buffer);
1032  
1033  /**
1034   * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list
1035   * @sgl:		 The SG list
1036   * @nents:		 Number of SG entries
1037   * @buf:		 Where to copy from
1038   * @buflen:		 The number of bytes to copy
1039   * @skip:		 Number of bytes to skip before copying
1040   *
1041   * Returns the number of copied bytes.
1042   *
1043   **/
sg_pcopy_from_buffer(struct scatterlist * sgl,unsigned int nents,const void * buf,size_t buflen,off_t skip)1044  size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1045  			    const void *buf, size_t buflen, off_t skip)
1046  {
1047  	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false);
1048  }
1049  EXPORT_SYMBOL(sg_pcopy_from_buffer);
1050  
1051  /**
1052   * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer
1053   * @sgl:		 The SG list
1054   * @nents:		 Number of SG entries
1055   * @buf:		 Where to copy to
1056   * @buflen:		 The number of bytes to copy
1057   * @skip:		 Number of bytes to skip before copying
1058   *
1059   * Returns the number of copied bytes.
1060   *
1061   **/
sg_pcopy_to_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen,off_t skip)1062  size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1063  			  void *buf, size_t buflen, off_t skip)
1064  {
1065  	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
1066  }
1067  EXPORT_SYMBOL(sg_pcopy_to_buffer);
1068  
1069  /**
1070   * sg_zero_buffer - Zero-out a part of a SG list
1071   * @sgl:		 The SG list
1072   * @nents:		 Number of SG entries
1073   * @buflen:		 The number of bytes to zero out
1074   * @skip:		 Number of bytes to skip before zeroing
1075   *
1076   * Returns the number of bytes zeroed.
1077   **/
sg_zero_buffer(struct scatterlist * sgl,unsigned int nents,size_t buflen,off_t skip)1078  size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
1079  		       size_t buflen, off_t skip)
1080  {
1081  	unsigned int offset = 0;
1082  	struct sg_mapping_iter miter;
1083  	unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
1084  
1085  	sg_miter_start(&miter, sgl, nents, sg_flags);
1086  
1087  	if (!sg_miter_skip(&miter, skip))
1088  		return false;
1089  
1090  	while (offset < buflen && sg_miter_next(&miter)) {
1091  		unsigned int len;
1092  
1093  		len = min(miter.length, buflen - offset);
1094  		memset(miter.addr, 0, len);
1095  
1096  		offset += len;
1097  	}
1098  
1099  	sg_miter_stop(&miter);
1100  	return offset;
1101  }
1102  EXPORT_SYMBOL(sg_zero_buffer);
1103  
1104  /*
1105   * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
1106   * iterators, and add them to the scatterlist.
1107   */
extract_user_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1108  static ssize_t extract_user_to_sg(struct iov_iter *iter,
1109  				  ssize_t maxsize,
1110  				  struct sg_table *sgtable,
1111  				  unsigned int sg_max,
1112  				  iov_iter_extraction_t extraction_flags)
1113  {
1114  	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1115  	struct page **pages;
1116  	unsigned int npages;
1117  	ssize_t ret = 0, res;
1118  	size_t len, off;
1119  
1120  	/* We decant the page list into the tail of the scatterlist */
1121  	pages = (void *)sgtable->sgl +
1122  		array_size(sg_max, sizeof(struct scatterlist));
1123  	pages -= sg_max;
1124  
1125  	do {
1126  		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
1127  					     extraction_flags, &off);
1128  		if (res <= 0)
1129  			goto failed;
1130  
1131  		len = res;
1132  		maxsize -= len;
1133  		ret += len;
1134  		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
1135  		sg_max -= npages;
1136  
1137  		for (; npages > 0; npages--) {
1138  			struct page *page = *pages;
1139  			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
1140  
1141  			*pages++ = NULL;
1142  			sg_set_page(sg, page, seg, off);
1143  			sgtable->nents++;
1144  			sg++;
1145  			len -= seg;
1146  			off = 0;
1147  		}
1148  	} while (maxsize > 0 && sg_max > 0);
1149  
1150  	return ret;
1151  
1152  failed:
1153  	while (sgtable->nents > sgtable->orig_nents)
1154  		unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
1155  	return res;
1156  }
1157  
1158  /*
1159   * Extract up to sg_max pages from a BVEC-type iterator and add them to the
1160   * scatterlist.  The pages are not pinned.
1161   */
extract_bvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1162  static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
1163  				  ssize_t maxsize,
1164  				  struct sg_table *sgtable,
1165  				  unsigned int sg_max,
1166  				  iov_iter_extraction_t extraction_flags)
1167  {
1168  	const struct bio_vec *bv = iter->bvec;
1169  	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1170  	unsigned long start = iter->iov_offset;
1171  	unsigned int i;
1172  	ssize_t ret = 0;
1173  
1174  	for (i = 0; i < iter->nr_segs; i++) {
1175  		size_t off, len;
1176  
1177  		len = bv[i].bv_len;
1178  		if (start >= len) {
1179  			start -= len;
1180  			continue;
1181  		}
1182  
1183  		len = min_t(size_t, maxsize, len - start);
1184  		off = bv[i].bv_offset + start;
1185  
1186  		sg_set_page(sg, bv[i].bv_page, len, off);
1187  		sgtable->nents++;
1188  		sg++;
1189  		sg_max--;
1190  
1191  		ret += len;
1192  		maxsize -= len;
1193  		if (maxsize <= 0 || sg_max == 0)
1194  			break;
1195  		start = 0;
1196  	}
1197  
1198  	if (ret > 0)
1199  		iov_iter_advance(iter, ret);
1200  	return ret;
1201  }
1202  
1203  /*
1204   * Extract up to sg_max pages from a KVEC-type iterator and add them to the
1205   * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
1206   * static buffers.  The pages are not pinned.
1207   */
extract_kvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1208  static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
1209  				  ssize_t maxsize,
1210  				  struct sg_table *sgtable,
1211  				  unsigned int sg_max,
1212  				  iov_iter_extraction_t extraction_flags)
1213  {
1214  	const struct kvec *kv = iter->kvec;
1215  	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1216  	unsigned long start = iter->iov_offset;
1217  	unsigned int i;
1218  	ssize_t ret = 0;
1219  
1220  	for (i = 0; i < iter->nr_segs; i++) {
1221  		struct page *page;
1222  		unsigned long kaddr;
1223  		size_t off, len, seg;
1224  
1225  		len = kv[i].iov_len;
1226  		if (start >= len) {
1227  			start -= len;
1228  			continue;
1229  		}
1230  
1231  		kaddr = (unsigned long)kv[i].iov_base + start;
1232  		off = kaddr & ~PAGE_MASK;
1233  		len = min_t(size_t, maxsize, len - start);
1234  		kaddr &= PAGE_MASK;
1235  
1236  		maxsize -= len;
1237  		ret += len;
1238  		do {
1239  			seg = min_t(size_t, len, PAGE_SIZE - off);
1240  			if (is_vmalloc_or_module_addr((void *)kaddr))
1241  				page = vmalloc_to_page((void *)kaddr);
1242  			else
1243  				page = virt_to_page((void *)kaddr);
1244  
1245  			sg_set_page(sg, page, len, off);
1246  			sgtable->nents++;
1247  			sg++;
1248  			sg_max--;
1249  
1250  			len -= seg;
1251  			kaddr += PAGE_SIZE;
1252  			off = 0;
1253  		} while (len > 0 && sg_max > 0);
1254  
1255  		if (maxsize <= 0 || sg_max == 0)
1256  			break;
1257  		start = 0;
1258  	}
1259  
1260  	if (ret > 0)
1261  		iov_iter_advance(iter, ret);
1262  	return ret;
1263  }
1264  
1265  /*
1266   * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to
1267   * the scatterlist.  The pages are not pinned.
1268   */
extract_folioq_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1269  static ssize_t extract_folioq_to_sg(struct iov_iter *iter,
1270  				   ssize_t maxsize,
1271  				   struct sg_table *sgtable,
1272  				   unsigned int sg_max,
1273  				   iov_iter_extraction_t extraction_flags)
1274  {
1275  	const struct folio_queue *folioq = iter->folioq;
1276  	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1277  	unsigned int slot = iter->folioq_slot;
1278  	ssize_t ret = 0;
1279  	size_t offset = iter->iov_offset;
1280  
1281  	BUG_ON(!folioq);
1282  
1283  	if (slot >= folioq_nr_slots(folioq)) {
1284  		folioq = folioq->next;
1285  		if (WARN_ON_ONCE(!folioq))
1286  			return 0;
1287  		slot = 0;
1288  	}
1289  
1290  	do {
1291  		struct folio *folio = folioq_folio(folioq, slot);
1292  		size_t fsize = folioq_folio_size(folioq, slot);
1293  
1294  		if (offset < fsize) {
1295  			size_t part = umin(maxsize - ret, fsize - offset);
1296  
1297  			sg_set_page(sg, folio_page(folio, 0), part, offset);
1298  			sgtable->nents++;
1299  			sg++;
1300  			sg_max--;
1301  			offset += part;
1302  			ret += part;
1303  		}
1304  
1305  		if (offset >= fsize) {
1306  			offset = 0;
1307  			slot++;
1308  			if (slot >= folioq_nr_slots(folioq)) {
1309  				if (!folioq->next) {
1310  					WARN_ON_ONCE(ret < iter->count);
1311  					break;
1312  				}
1313  				folioq = folioq->next;
1314  				slot = 0;
1315  			}
1316  		}
1317  	} while (sg_max > 0 && ret < maxsize);
1318  
1319  	iter->folioq = folioq;
1320  	iter->folioq_slot = slot;
1321  	iter->iov_offset = offset;
1322  	iter->count -= ret;
1323  	return ret;
1324  }
1325  
1326  /*
1327   * Extract up to sg_max folios from an XARRAY-type iterator and add them to
1328   * the scatterlist.  The pages are not pinned.
1329   */
extract_xarray_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1330  static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
1331  				    ssize_t maxsize,
1332  				    struct sg_table *sgtable,
1333  				    unsigned int sg_max,
1334  				    iov_iter_extraction_t extraction_flags)
1335  {
1336  	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1337  	struct xarray *xa = iter->xarray;
1338  	struct folio *folio;
1339  	loff_t start = iter->xarray_start + iter->iov_offset;
1340  	pgoff_t index = start / PAGE_SIZE;
1341  	ssize_t ret = 0;
1342  	size_t offset, len;
1343  	XA_STATE(xas, xa, index);
1344  
1345  	rcu_read_lock();
1346  
1347  	xas_for_each(&xas, folio, ULONG_MAX) {
1348  		if (xas_retry(&xas, folio))
1349  			continue;
1350  		if (WARN_ON(xa_is_value(folio)))
1351  			break;
1352  		if (WARN_ON(folio_test_hugetlb(folio)))
1353  			break;
1354  
1355  		offset = offset_in_folio(folio, start);
1356  		len = min_t(size_t, maxsize, folio_size(folio) - offset);
1357  
1358  		sg_set_page(sg, folio_page(folio, 0), len, offset);
1359  		sgtable->nents++;
1360  		sg++;
1361  		sg_max--;
1362  
1363  		maxsize -= len;
1364  		ret += len;
1365  		if (maxsize <= 0 || sg_max == 0)
1366  			break;
1367  	}
1368  
1369  	rcu_read_unlock();
1370  	if (ret > 0)
1371  		iov_iter_advance(iter, ret);
1372  	return ret;
1373  }
1374  
1375  /**
1376   * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
1377   * @iter: The iterator to extract from
1378   * @maxsize: The amount of iterator to copy
1379   * @sgtable: The scatterlist table to fill in
1380   * @sg_max: Maximum number of elements in @sgtable that may be filled
1381   * @extraction_flags: Flags to qualify the request
1382   *
1383   * Extract the page fragments from the given amount of the source iterator and
1384   * add them to a scatterlist that refers to all of those bits, to a maximum
1385   * addition of @sg_max elements.
1386   *
1387   * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
1388   * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't
1389   * pinned; DISCARD-type is not supported.
1390   *
1391   * No end mark is placed on the scatterlist; that's left to the caller.
1392   *
1393   * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
1394   * be allowed on the pages extracted.
1395   *
1396   * If successful, @sgtable->nents is updated to include the number of elements
1397   * added and the number of bytes added is returned.  @sgtable->orig_nents is
1398   * left unaltered.
1399   *
1400   * The iov_iter_extract_mode() function should be used to query how cleanup
1401   * should be performed.
1402   */
extract_iter_to_sg(struct iov_iter * iter,size_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1403  ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
1404  			   struct sg_table *sgtable, unsigned int sg_max,
1405  			   iov_iter_extraction_t extraction_flags)
1406  {
1407  	if (maxsize == 0)
1408  		return 0;
1409  
1410  	switch (iov_iter_type(iter)) {
1411  	case ITER_UBUF:
1412  	case ITER_IOVEC:
1413  		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
1414  					  extraction_flags);
1415  	case ITER_BVEC:
1416  		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
1417  					  extraction_flags);
1418  	case ITER_KVEC:
1419  		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
1420  					  extraction_flags);
1421  	case ITER_FOLIOQ:
1422  		return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max,
1423  					    extraction_flags);
1424  	case ITER_XARRAY:
1425  		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
1426  					    extraction_flags);
1427  	default:
1428  		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
1429  		WARN_ON_ONCE(1);
1430  		return -EIO;
1431  	}
1432  }
1433  EXPORT_SYMBOL_GPL(extract_iter_to_sg);
1434