1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Bad block management
4   *
5   * - Heavily based on MD badblocks code from Neil Brown
6   *
7   * Copyright (c) 2015, Intel Corporation.
8   */
9  
10  #include <linux/badblocks.h>
11  #include <linux/seqlock.h>
12  #include <linux/device.h>
13  #include <linux/kernel.h>
14  #include <linux/module.h>
15  #include <linux/stddef.h>
16  #include <linux/types.h>
17  #include <linux/slab.h>
18  
19  /*
20   * The purpose of badblocks set/clear is to manage bad blocks ranges which are
21   * identified by LBA addresses.
22   *
23   * When the caller of badblocks_set() wants to set a range of bad blocks, the
24   * setting range can be acked or unacked. And the setting range may merge,
25   * overwrite, skip the overlapped already set range, depends on who they are
26   * overlapped or adjacent, and the acknowledgment type of the ranges. It can be
27   * more complicated when the setting range covers multiple already set bad block
28   * ranges, with restrictions of maximum length of each bad range and the bad
29   * table space limitation.
30   *
31   * It is difficult and unnecessary to take care of all the possible situations,
32   * for setting a large range of bad blocks, we can handle it by dividing the
33   * large range into smaller ones when encounter overlap, max range length or
34   * bad table full conditions. Every time only a smaller piece of the bad range
35   * is handled with a limited number of conditions how it is interacted with
36   * possible overlapped or adjacent already set bad block ranges. Then the hard
37   * complicated problem can be much simpler to handle in proper way.
38   *
39   * When setting a range of bad blocks to the bad table, the simplified situations
40   * to be considered are, (The already set bad blocks ranges are naming with
41   *  prefix E, and the setting bad blocks range is naming with prefix S)
42   *
43   * 1) A setting range is not overlapped or adjacent to any other already set bad
44   *    block range.
45   *                         +--------+
46   *                         |    S   |
47   *                         +--------+
48   *        +-------------+               +-------------+
49   *        |      E1     |               |      E2     |
50   *        +-------------+               +-------------+
51   *    For this situation if the bad blocks table is not full, just allocate a
52   *    free slot from the bad blocks table to mark the setting range S. The
53   *    result is,
54   *        +-------------+  +--------+   +-------------+
55   *        |      E1     |  |    S   |   |      E2     |
56   *        +-------------+  +--------+   +-------------+
57   * 2) A setting range starts exactly at a start LBA of an already set bad blocks
58   *    range.
59   * 2.1) The setting range size < already set range size
60   *        +--------+
61   *        |    S   |
62   *        +--------+
63   *        +-------------+
64   *        |      E      |
65   *        +-------------+
66   * 2.1.1) If S and E are both acked or unacked range, the setting range S can
67   *    be merged into existing bad range E. The result is,
68   *        +-------------+
69   *        |      S      |
70   *        +-------------+
71   * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and
72   *    the result is,
73   *        +-------------+
74   *        |      E      |
75   *        +-------------+
76   * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E.
77   *    An extra slot from the bad blocks table will be allocated for S, and head
78   *    of E will move to end of the inserted range S. The result is,
79   *        +--------+----+
80   *        |    S   | E  |
81   *        +--------+----+
82   * 2.2) The setting range size == already set range size
83   * 2.2.1) If S and E are both acked or unacked range, the setting range S can
84   *    be merged into existing bad range E. The result is,
85   *        +-------------+
86   *        |      S      |
87   *        +-------------+
88   * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and
89   *    the result is,
90   *        +-------------+
91   *        |      E      |
92   *        +-------------+
93   * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of
94        bad blocks range E. The result is,
95   *        +-------------+
96   *        |      S      |
97   *        +-------------+
98   * 2.3) The setting range size > already set range size
99   *        +-------------------+
100   *        |          S        |
101   *        +-------------------+
102   *        +-------------+
103   *        |      E      |
104   *        +-------------+
105   *    For such situation, the setting range S can be treated as two parts, the
106   *    first part (S1) is as same size as the already set range E, the second
107   *    part (S2) is the rest of setting range.
108   *        +-------------+-----+        +-------------+       +-----+
109   *        |    S1       | S2  |        |     S1      |       | S2  |
110   *        +-------------+-----+  ===>  +-------------+       +-----+
111   *        +-------------+              +-------------+
112   *        |      E      |              |      E      |
113   *        +-------------+              +-------------+
114   *    Now we only focus on how to handle the setting range S1 and already set
115   *    range E, which are already explained in 2.2), for the rest S2 it will be
116   *    handled later in next loop.
117   * 3) A setting range starts before the start LBA of an already set bad blocks
118   *    range.
119   *        +-------------+
120   *        |      S      |
121   *        +-------------+
122   *             +-------------+
123   *             |      E      |
124   *             +-------------+
125   *    For this situation, the setting range S can be divided into two parts, the
126   *    first (S1) ends at the start LBA of already set range E, the second part
127   *    (S2) starts exactly at a start LBA of the already set range E.
128   *        +----+---------+             +----+      +---------+
129   *        | S1 |    S2   |             | S1 |      |    S2   |
130   *        +----+---------+      ===>   +----+      +---------+
131   *             +-------------+                     +-------------+
132   *             |      E      |                     |      E      |
133   *             +-------------+                     +-------------+
134   *    Now only the first part S1 should be handled in this loop, which is in
135   *    similar condition as 1). The rest part S2 has exact same start LBA address
136   *    of the already set range E, they will be handled in next loop in one of
137   *    situations in 2).
138   * 4) A setting range starts after the start LBA of an already set bad blocks
139   *    range.
140   * 4.1) If the setting range S exactly matches the tail part of already set bad
141   *    blocks range E, like the following chart shows,
142   *            +---------+
143   *            |   S     |
144   *            +---------+
145   *        +-------------+
146   *        |      E      |
147   *        +-------------+
148   * 4.1.1) If range S and E have same acknowledge value (both acked or unacked),
149   *    they will be merged into one, the result is,
150   *        +-------------+
151   *        |      S      |
152   *        +-------------+
153   * 4.1.2) If range E is acked and the setting range S is unacked, the setting
154   *    request of S will be rejected, the result is,
155   *        +-------------+
156   *        |      E      |
157   *        +-------------+
158   * 4.1.3) If range E is unacked, and the setting range S is acked, then S may
159   *    overwrite the overlapped range of E, the result is,
160   *        +---+---------+
161   *        | E |    S    |
162   *        +---+---------+
163   * 4.2) If the setting range S stays in middle of an already set range E, like
164   *    the following chart shows,
165   *             +----+
166   *             | S  |
167   *             +----+
168   *        +--------------+
169   *        |       E      |
170   *        +--------------+
171   * 4.2.1) If range S and E have same acknowledge value (both acked or unacked),
172   *    they will be merged into one, the result is,
173   *        +--------------+
174   *        |       S      |
175   *        +--------------+
176   * 4.2.2) If range E is acked and the setting range S is unacked, the setting
177   *    request of S will be rejected, the result is also,
178   *        +--------------+
179   *        |       E      |
180   *        +--------------+
181   * 4.2.3) If range E is unacked, and the setting range S is acked, then S will
182   *    inserted into middle of E and split previous range E into two parts (E1
183   *    and E2), the result is,
184   *        +----+----+----+
185   *        | E1 |  S | E2 |
186   *        +----+----+----+
187   * 4.3) If the setting bad blocks range S is overlapped with an already set bad
188   *    blocks range E. The range S starts after the start LBA of range E, and
189   *    ends after the end LBA of range E, as the following chart shows,
190   *            +-------------------+
191   *            |          S        |
192   *            +-------------------+
193   *        +-------------+
194   *        |      E      |
195   *        +-------------+
196   *    For this situation the range S can be divided into two parts, the first
197   *    part (S1) ends at end range E, and the second part (S2) has rest range of
198   *    origin S.
199   *            +---------+---------+            +---------+      +---------+
200   *            |    S1   |    S2   |            |    S1   |      |    S2   |
201   *            +---------+---------+  ===>      +---------+      +---------+
202   *        +-------------+                  +-------------+
203   *        |      E      |                  |      E      |
204   *        +-------------+                  +-------------+
205   *     Now in this loop the setting range S1 and already set range E can be
206   *     handled as the situations 4.1), the rest range S2 will be handled in next
207   *     loop and ignored in this loop.
208   * 5) A setting bad blocks range S is adjacent to one or more already set bad
209   *    blocks range(s), and they are all acked or unacked range.
210   * 5.1) Front merge: If the already set bad blocks range E is before setting
211   *    range S and they are adjacent,
212   *                +------+
213   *                |  S   |
214   *                +------+
215   *        +-------+
216   *        |   E   |
217   *        +-------+
218   * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge
219   *    values are same, the setting range S can front merges into range E. The
220   *    result is,
221   *        +--------------+
222   *        |       S      |
223   *        +--------------+
224   * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting
225   *    range S right after already set range E into the bad blocks table. The
226   *    result is,
227   *        +--------+------+
228   *        |   E    |   S  |
229   *        +--------+------+
230   * 6) Special cases which above conditions cannot handle
231   * 6.1) Multiple already set ranges may merge into less ones in a full bad table
232   *        +-------------------------------------------------------+
233   *        |                           S                           |
234   *        +-------------------------------------------------------+
235   *        |<----- BB_MAX_LEN ----->|
236   *                                 +-----+     +-----+   +-----+
237   *                                 | E1  |     | E2  |   | E3  |
238   *                                 +-----+     +-----+   +-----+
239   *     In the above example, when the bad blocks table is full, inserting the
240   *     first part of setting range S will fail because no more available slot
241   *     can be allocated from bad blocks table. In this situation a proper
242   *     setting method should be go though all the setting bad blocks range and
243   *     look for chance to merge already set ranges into less ones. When there
244   *     is available slot from bad blocks table, re-try again to handle more
245   *     setting bad blocks ranges as many as possible.
246   *        +------------------------+
247   *        |          S3            |
248   *        +------------------------+
249   *        |<----- BB_MAX_LEN ----->|
250   *                                 +-----+-----+-----+---+-----+--+
251   *                                 |       S1        |     S2     |
252   *                                 +-----+-----+-----+---+-----+--+
253   *     The above chart shows although the first part (S3) cannot be inserted due
254   *     to no-space in bad blocks table, but the following E1, E2 and E3 ranges
255   *     can be merged with rest part of S into less range S1 and S2. Now there is
256   *     1 free slot in bad blocks table.
257   *        +------------------------+-----+-----+-----+---+-----+--+
258   *        |           S3           |       S1        |     S2     |
259   *        +------------------------+-----+-----+-----+---+-----+--+
260   *     Since the bad blocks table is not full anymore, re-try again for the
261   *     origin setting range S. Now the setting range S3 can be inserted into the
262   *     bad blocks table with previous freed slot from multiple ranges merge.
263   * 6.2) Front merge after overwrite
264   *    In the following example, in bad blocks table, E1 is an acked bad blocks
265   *    range and E2 is an unacked bad blocks range, therefore they are not able
266   *    to merge into a larger range. The setting bad blocks range S is acked,
267   *    therefore part of E2 can be overwritten by S.
268   *                      +--------+
269   *                      |    S   |                             acknowledged
270   *                      +--------+                         S:       1
271   *              +-------+-------------+                   E1:       1
272   *              |   E1  |    E2       |                   E2:       0
273   *              +-------+-------------+
274   *     With previous simplified routines, after overwriting part of E2 with S,
275   *     the bad blocks table should be (E3 is remaining part of E2 which is not
276   *     overwritten by S),
277   *                                                             acknowledged
278   *              +-------+--------+----+                    S:       1
279   *              |   E1  |    S   | E3 |                   E1:       1
280   *              +-------+--------+----+                   E3:       0
281   *     The above result is correct but not perfect. Range E1 and S in the bad
282   *     blocks table are all acked, merging them into a larger one range may
283   *     occupy less bad blocks table space and make badblocks_check() faster.
284   *     Therefore in such situation, after overwriting range S, the previous range
285   *     E1 should be checked for possible front combination. Then the ideal
286   *     result can be,
287   *              +----------------+----+                        acknowledged
288   *              |       E1       | E3 |                   E1:       1
289   *              +----------------+----+                   E3:       0
290   * 6.3) Behind merge: If the already set bad blocks range E is behind the setting
291   *    range S and they are adjacent. Normally we don't need to care about this
292   *    because front merge handles this while going though range S from head to
293   *    tail, except for the tail part of range S. When the setting range S are
294   *    fully handled, all the above simplified routine doesn't check whether the
295   *    tail LBA of range S is adjacent to the next already set range and not
296   *    merge them even it is possible.
297   *        +------+
298   *        |  S   |
299   *        +------+
300   *               +-------+
301   *               |   E   |
302   *               +-------+
303   *    For the above special situation, when the setting range S are all handled
304   *    and the loop ends, an extra check is necessary for whether next already
305   *    set range E is right after S and mergeable.
306   * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge
307   *    values are same, the setting range S can behind merges into range E. The
308   *    result is,
309   *        +--------------+
310   *        |       S      |
311   *        +--------------+
312   * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range
313   *     S in front of the already set range E in the bad blocks table. The result
314   *     is,
315   *        +------+-------+
316   *        |  S   |   E   |
317   *        +------+-------+
318   *
319   * All the above 5 simplified situations and 3 special cases may cover 99%+ of
320   * the bad block range setting conditions. Maybe there is some rare corner case
321   * is not considered and optimized, it won't hurt if badblocks_set() fails due
322   * to no space, or some ranges are not merged to save bad blocks table space.
323   *
324   * Inside badblocks_set() each loop starts by jumping to re_insert label, every
325   * time for the new loop prev_badblocks() is called to find an already set range
326   * which starts before or at current setting range. Since the setting bad blocks
327   * range is handled from head to tail, most of the cases it is unnecessary to do
328   * the binary search inside prev_badblocks(), it is possible to provide a hint
329   * to prev_badblocks() for a fast path, then the expensive binary search can be
330   * avoided. In my test with the hint to prev_badblocks(), except for the first
331   * loop, all rested calls to prev_badblocks() can go into the fast path and
332   * return correct bad blocks table index immediately.
333   *
334   *
335   * Clearing a bad blocks range from the bad block table has similar idea as
336   * setting does, but much more simpler. The only thing needs to be noticed is
337   * when the clearing range hits middle of a bad block range, the existing bad
338   * block range will split into two, and one more item should be added into the
339   * bad block table. The simplified situations to be considered are, (The already
340   * set bad blocks ranges in bad block table are naming with prefix E, and the
341   * clearing bad blocks range is naming with prefix C)
342   *
343   * 1) A clearing range is not overlapped to any already set ranges in bad block
344   *    table.
345   *    +-----+         |          +-----+         |          +-----+
346   *    |  C  |         |          |  C  |         |          |  C  |
347   *    +-----+         or         +-----+         or         +-----+
348   *            +---+   |   +----+         +----+  |  +---+
349   *            | E |   |   | E1 |         | E2 |  |  | E |
350   *            +---+   |   +----+         +----+  |  +---+
351   *    For the above situations, no bad block to be cleared and no failure
352   *    happens, simply returns 0.
353   * 2) The clearing range hits middle of an already setting bad blocks range in
354   *    the bad block table.
355   *            +---+
356   *            | C |
357   *            +---+
358   *     +-----------------+
359   *     |         E       |
360   *     +-----------------+
361   *    In this situation if the bad block table is not full, the range E will be
362   *    split into two ranges E1 and E2. The result is,
363   *     +------+   +------+
364   *     |  E1  |   |  E2  |
365   *     +------+   +------+
366   * 3) The clearing range starts exactly at same LBA as an already set bad block range
367   *    from the bad block table.
368   * 3.1) Partially covered at head part
369   *         +------------+
370   *         |     C      |
371   *         +------------+
372   *         +-----------------+
373   *         |         E       |
374   *         +-----------------+
375   *    For this situation, the overlapped already set range will update the
376   *    start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No
377   *    item deleted from bad block table. The result is,
378   *                      +----+
379   *                      | E1 |
380   *                      +----+
381   * 3.2) Exact fully covered
382   *         +-----------------+
383   *         |         C       |
384   *         +-----------------+
385   *         +-----------------+
386   *         |         E       |
387   *         +-----------------+
388   *    For this situation the whole bad blocks range E will be cleared and its
389   *    corresponded item is deleted from the bad block table.
390   * 4) The clearing range exactly ends at same LBA as an already set bad block
391   *    range.
392   *                   +-------+
393   *                   |   C   |
394   *                   +-------+
395   *         +-----------------+
396   *         |         E       |
397   *         +-----------------+
398   *    For the above situation, the already set range E is updated to shrink its
399   *    end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C).
400   *    The result is,
401   *         +---------+
402   *         |    E    |
403   *         +---------+
404   * 5) The clearing range is partially overlapped with an already set bad block
405   *    range from the bad block table.
406   * 5.1) The already set bad block range is front overlapped with the clearing
407   *    range.
408   *         +----------+
409   *         |     C    |
410   *         +----------+
411   *              +------------+
412   *              |      E     |
413   *              +------------+
414   *   For such situation, the clearing range C can be treated as two parts. The
415   *   first part ends at the start LBA of range E, and the second part starts at
416   *   same LBA of range E.
417   *         +----+-----+               +----+   +-----+
418   *         | C1 | C2  |               | C1 |   | C2  |
419   *         +----+-----+         ===>  +----+   +-----+
420   *              +------------+                 +------------+
421   *              |      E     |                 |      E     |
422   *              +------------+                 +------------+
423   *   Now the first part C1 can be handled as condition 1), and the second part C2 can be
424   *   handled as condition 3.1) in next loop.
425   * 5.2) The already set bad block range is behind overlaopped with the clearing
426   *   range.
427   *                 +----------+
428   *                 |     C    |
429   *                 +----------+
430   *         +------------+
431   *         |      E     |
432   *         +------------+
433   *   For such situation, the clearing range C can be treated as two parts. The
434   *   first part C1 ends at same end LBA of range E, and the second part starts
435   *   at end LBA of range E.
436   *                 +----+-----+                 +----+    +-----+
437   *                 | C1 | C2  |                 | C1 |    | C2  |
438   *                 +----+-----+  ===>           +----+    +-----+
439   *         +------------+               +------------+
440   *         |      E     |               |      E     |
441   *         +------------+               +------------+
442   *   Now the first part clearing range C1 can be handled as condition 4), and
443   *   the second part clearing range C2 can be handled as condition 1) in next
444   *   loop.
445   *
446   *   All bad blocks range clearing can be simplified into the above 5 situations
447   *   by only handling the head part of the clearing range in each run of the
448   *   while-loop. The idea is similar to bad blocks range setting but much
449   *   simpler.
450   */
451  
452  /*
453   * Find the range starts at-or-before 's' from bad table. The search
454   * starts from index 'hint' and stops at index 'hint_end' from the bad
455   * table.
456   */
prev_by_hint(struct badblocks * bb,sector_t s,int hint)457  static int prev_by_hint(struct badblocks *bb, sector_t s, int hint)
458  {
459  	int hint_end = hint + 2;
460  	u64 *p = bb->page;
461  	int ret = -1;
462  
463  	while ((hint < hint_end) && ((hint + 1) <= bb->count) &&
464  	       (BB_OFFSET(p[hint]) <= s)) {
465  		if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) {
466  			ret = hint;
467  			break;
468  		}
469  		hint++;
470  	}
471  
472  	return ret;
473  }
474  
475  /*
476   * Find the range starts at-or-before bad->start. If 'hint' is provided
477   * (hint >= 0) then search in the bad table from hint firstly. It is
478   * very probably the wanted bad range can be found from the hint index,
479   * then the unnecessary while-loop iteration can be avoided.
480   */
prev_badblocks(struct badblocks * bb,struct badblocks_context * bad,int hint)481  static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad,
482  			  int hint)
483  {
484  	sector_t s = bad->start;
485  	int ret = -1;
486  	int lo, hi;
487  	u64 *p;
488  
489  	if (!bb->count)
490  		goto out;
491  
492  	if (hint >= 0) {
493  		ret = prev_by_hint(bb, s, hint);
494  		if (ret >= 0)
495  			goto out;
496  	}
497  
498  	lo = 0;
499  	hi = bb->count;
500  	p = bb->page;
501  
502  	/* The following bisect search might be unnecessary */
503  	if (BB_OFFSET(p[lo]) > s)
504  		return -1;
505  	if (BB_OFFSET(p[hi - 1]) <= s)
506  		return hi - 1;
507  
508  	/* Do bisect search in bad table */
509  	while (hi - lo > 1) {
510  		int mid = (lo + hi)/2;
511  		sector_t a = BB_OFFSET(p[mid]);
512  
513  		if (a == s) {
514  			ret = mid;
515  			goto out;
516  		}
517  
518  		if (a < s)
519  			lo = mid;
520  		else
521  			hi = mid;
522  	}
523  
524  	if (BB_OFFSET(p[lo]) <= s)
525  		ret = lo;
526  out:
527  	return ret;
528  }
529  
530  /*
531   * Return 'true' if the range indicated by 'bad' can be backward merged
532   * with the bad range (from the bad table) index by 'behind'.
533   */
can_merge_behind(struct badblocks * bb,struct badblocks_context * bad,int behind)534  static bool can_merge_behind(struct badblocks *bb,
535  			     struct badblocks_context *bad, int behind)
536  {
537  	sector_t sectors = bad->len;
538  	sector_t s = bad->start;
539  	u64 *p = bb->page;
540  
541  	if ((s < BB_OFFSET(p[behind])) &&
542  	    ((s + sectors) >= BB_OFFSET(p[behind])) &&
543  	    ((BB_END(p[behind]) - s) <= BB_MAX_LEN) &&
544  	    BB_ACK(p[behind]) == bad->ack)
545  		return true;
546  	return false;
547  }
548  
549  /*
550   * Do backward merge for range indicated by 'bad' and the bad range
551   * (from the bad table) indexed by 'behind'. The return value is merged
552   * sectors from bad->len.
553   */
behind_merge(struct badblocks * bb,struct badblocks_context * bad,int behind)554  static int behind_merge(struct badblocks *bb, struct badblocks_context *bad,
555  			int behind)
556  {
557  	sector_t sectors = bad->len;
558  	sector_t s = bad->start;
559  	u64 *p = bb->page;
560  	int merged = 0;
561  
562  	WARN_ON(s >= BB_OFFSET(p[behind]));
563  	WARN_ON((s + sectors) < BB_OFFSET(p[behind]));
564  
565  	if (s < BB_OFFSET(p[behind])) {
566  		merged = BB_OFFSET(p[behind]) - s;
567  		p[behind] =  BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack);
568  
569  		WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN);
570  	}
571  
572  	return merged;
573  }
574  
575  /*
576   * Return 'true' if the range indicated by 'bad' can be forward
577   * merged with the bad range (from the bad table) indexed by 'prev'.
578   */
can_merge_front(struct badblocks * bb,int prev,struct badblocks_context * bad)579  static bool can_merge_front(struct badblocks *bb, int prev,
580  			    struct badblocks_context *bad)
581  {
582  	sector_t s = bad->start;
583  	u64 *p = bb->page;
584  
585  	if (BB_ACK(p[prev]) == bad->ack &&
586  	    (s < BB_END(p[prev]) ||
587  	     (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN))))
588  		return true;
589  	return false;
590  }
591  
592  /*
593   * Do forward merge for range indicated by 'bad' and the bad range
594   * (from bad table) indexed by 'prev'. The return value is sectors
595   * merged from bad->len.
596   */
front_merge(struct badblocks * bb,int prev,struct badblocks_context * bad)597  static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad)
598  {
599  	sector_t sectors = bad->len;
600  	sector_t s = bad->start;
601  	u64 *p = bb->page;
602  	int merged = 0;
603  
604  	WARN_ON(s > BB_END(p[prev]));
605  
606  	if (s < BB_END(p[prev])) {
607  		merged = min_t(sector_t, sectors, BB_END(p[prev]) - s);
608  	} else {
609  		merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev]));
610  		if ((prev + 1) < bb->count &&
611  		    merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) {
612  			merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]);
613  		}
614  
615  		p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
616  				  BB_LEN(p[prev]) + merged, bad->ack);
617  	}
618  
619  	return merged;
620  }
621  
622  /*
623   * 'Combine' is a special case which can_merge_front() is not able to
624   * handle: If a bad range (indexed by 'prev' from bad table) exactly
625   * starts as bad->start, and the bad range ahead of 'prev' (indexed by
626   * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and
627   * the sum of their lengths does not exceed BB_MAX_LEN limitation, then
628   * these two bad range (from bad table) can be combined.
629   *
630   * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad
631   * table can be combined.
632   */
can_combine_front(struct badblocks * bb,int prev,struct badblocks_context * bad)633  static bool can_combine_front(struct badblocks *bb, int prev,
634  			      struct badblocks_context *bad)
635  {
636  	u64 *p = bb->page;
637  
638  	if ((prev > 0) &&
639  	    (BB_OFFSET(p[prev]) == bad->start) &&
640  	    (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) &&
641  	    (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) &&
642  	    (BB_ACK(p[prev - 1]) == BB_ACK(p[prev])))
643  		return true;
644  	return false;
645  }
646  
647  /*
648   * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad
649   * table) into one larger bad range, and the new range is indexed by
650   * 'prev - 1'.
651   * The caller of front_combine() will decrease bb->count, therefore
652   * it is unnecessary to clear p[perv] after front merge.
653   */
front_combine(struct badblocks * bb,int prev)654  static void front_combine(struct badblocks *bb, int prev)
655  {
656  	u64 *p = bb->page;
657  
658  	p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]),
659  			      BB_LEN(p[prev - 1]) + BB_LEN(p[prev]),
660  			      BB_ACK(p[prev]));
661  	if ((prev + 1) < bb->count)
662  		memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8);
663  }
664  
665  /*
666   * Return 'true' if the range indicated by 'bad' is exactly forward
667   * overlapped with the bad range (from bad table) indexed by 'front'.
668   * Exactly forward overlap means the bad range (from bad table) indexed
669   * by 'prev' does not cover the whole range indicated by 'bad'.
670   */
overlap_front(struct badblocks * bb,int front,struct badblocks_context * bad)671  static bool overlap_front(struct badblocks *bb, int front,
672  			  struct badblocks_context *bad)
673  {
674  	u64 *p = bb->page;
675  
676  	if (bad->start >= BB_OFFSET(p[front]) &&
677  	    bad->start < BB_END(p[front]))
678  		return true;
679  	return false;
680  }
681  
682  /*
683   * Return 'true' if the range indicated by 'bad' is exactly backward
684   * overlapped with the bad range (from bad table) indexed by 'behind'.
685   */
overlap_behind(struct badblocks * bb,struct badblocks_context * bad,int behind)686  static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad,
687  			   int behind)
688  {
689  	u64 *p = bb->page;
690  
691  	if (bad->start < BB_OFFSET(p[behind]) &&
692  	    (bad->start + bad->len) > BB_OFFSET(p[behind]))
693  		return true;
694  	return false;
695  }
696  
697  /*
698   * Return 'true' if the range indicated by 'bad' can overwrite the bad
699   * range (from bad table) indexed by 'prev'.
700   *
701   * The range indicated by 'bad' can overwrite the bad range indexed by
702   * 'prev' when,
703   * 1) The whole range indicated by 'bad' can cover partial or whole bad
704   *    range (from bad table) indexed by 'prev'.
705   * 2) The ack value of 'bad' is larger or equal to the ack value of bad
706   *    range 'prev'.
707   *
708   * If the overwriting doesn't cover the whole bad range (from bad table)
709   * indexed by 'prev', new range might be split from existing bad range,
710   * 1) The overwrite covers head or tail part of existing bad range, 1
711   *    extra bad range will be split and added into the bad table.
712   * 2) The overwrite covers middle of existing bad range, 2 extra bad
713   *    ranges will be split (ahead and after the overwritten range) and
714   *    added into the bad table.
715   * The number of extra split ranges of the overwriting is stored in
716   * 'extra' and returned for the caller.
717   */
can_front_overwrite(struct badblocks * bb,int prev,struct badblocks_context * bad,int * extra)718  static bool can_front_overwrite(struct badblocks *bb, int prev,
719  				struct badblocks_context *bad, int *extra)
720  {
721  	u64 *p = bb->page;
722  	int len;
723  
724  	WARN_ON(!overlap_front(bb, prev, bad));
725  
726  	if (BB_ACK(p[prev]) >= bad->ack)
727  		return false;
728  
729  	if (BB_END(p[prev]) <= (bad->start + bad->len)) {
730  		len = BB_END(p[prev]) - bad->start;
731  		if (BB_OFFSET(p[prev]) == bad->start)
732  			*extra = 0;
733  		else
734  			*extra = 1;
735  
736  		bad->len = len;
737  	} else {
738  		if (BB_OFFSET(p[prev]) == bad->start)
739  			*extra = 1;
740  		else
741  		/*
742  		 * prev range will be split into two, beside the overwritten
743  		 * one, an extra slot needed from bad table.
744  		 */
745  			*extra = 2;
746  	}
747  
748  	if ((bb->count + (*extra)) >= MAX_BADBLOCKS)
749  		return false;
750  
751  	return true;
752  }
753  
754  /*
755   * Do the overwrite from the range indicated by 'bad' to the bad range
756   * (from bad table) indexed by 'prev'.
757   * The previously called can_front_overwrite() will provide how many
758   * extra bad range(s) might be split and added into the bad table. All
759   * the splitting cases in the bad table will be handled here.
760   */
front_overwrite(struct badblocks * bb,int prev,struct badblocks_context * bad,int extra)761  static int front_overwrite(struct badblocks *bb, int prev,
762  			   struct badblocks_context *bad, int extra)
763  {
764  	u64 *p = bb->page;
765  	sector_t orig_end = BB_END(p[prev]);
766  	int orig_ack = BB_ACK(p[prev]);
767  
768  	switch (extra) {
769  	case 0:
770  		p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]),
771  				  bad->ack);
772  		break;
773  	case 1:
774  		if (BB_OFFSET(p[prev]) == bad->start) {
775  			p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
776  					  bad->len, bad->ack);
777  			memmove(p + prev + 2, p + prev + 1,
778  				(bb->count - prev - 1) * 8);
779  			p[prev + 1] = BB_MAKE(bad->start + bad->len,
780  					      orig_end - BB_END(p[prev]),
781  					      orig_ack);
782  		} else {
783  			p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
784  					  bad->start - BB_OFFSET(p[prev]),
785  					  orig_ack);
786  			/*
787  			 * prev +2 -> prev + 1 + 1, which is for,
788  			 * 1) prev + 1: the slot index of the previous one
789  			 * 2) + 1: one more slot for extra being 1.
790  			 */
791  			memmove(p + prev + 2, p + prev + 1,
792  				(bb->count - prev - 1) * 8);
793  			p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
794  		}
795  		break;
796  	case 2:
797  		p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
798  				  bad->start - BB_OFFSET(p[prev]),
799  				  orig_ack);
800  		/*
801  		 * prev + 3 -> prev + 1 + 2, which is for,
802  		 * 1) prev + 1: the slot index of the previous one
803  		 * 2) + 2: two more slots for extra being 2.
804  		 */
805  		memmove(p + prev + 3, p + prev + 1,
806  			(bb->count - prev - 1) * 8);
807  		p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
808  		p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]),
809  				      orig_end - BB_END(p[prev + 1]),
810  				      orig_ack);
811  		break;
812  	default:
813  		break;
814  	}
815  
816  	return bad->len;
817  }
818  
819  /*
820   * Explicitly insert a range indicated by 'bad' to the bad table, where
821   * the location is indexed by 'at'.
822   */
insert_at(struct badblocks * bb,int at,struct badblocks_context * bad)823  static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad)
824  {
825  	u64 *p = bb->page;
826  	int len;
827  
828  	WARN_ON(badblocks_full(bb));
829  
830  	len = min_t(sector_t, bad->len, BB_MAX_LEN);
831  	if (at < bb->count)
832  		memmove(p + at + 1, p + at, (bb->count - at) * 8);
833  	p[at] = BB_MAKE(bad->start, len, bad->ack);
834  
835  	return len;
836  }
837  
badblocks_update_acked(struct badblocks * bb)838  static void badblocks_update_acked(struct badblocks *bb)
839  {
840  	bool unacked = false;
841  	u64 *p = bb->page;
842  	int i;
843  
844  	if (!bb->unacked_exist)
845  		return;
846  
847  	for (i = 0; i < bb->count ; i++) {
848  		if (!BB_ACK(p[i])) {
849  			unacked = true;
850  			break;
851  		}
852  	}
853  
854  	if (!unacked)
855  		bb->unacked_exist = 0;
856  }
857  
858  /* Do exact work to set bad block range into the bad block table */
_badblocks_set(struct badblocks * bb,sector_t s,int sectors,int acknowledged)859  static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors,
860  			  int acknowledged)
861  {
862  	int retried = 0, space_desired = 0;
863  	int orig_len, len = 0, added = 0;
864  	struct badblocks_context bad;
865  	int prev = -1, hint = -1;
866  	sector_t orig_start;
867  	unsigned long flags;
868  	int rv = 0;
869  	u64 *p;
870  
871  	if (bb->shift < 0)
872  		/* badblocks are disabled */
873  		return 1;
874  
875  	if (sectors == 0)
876  		/* Invalid sectors number */
877  		return 1;
878  
879  	if (bb->shift) {
880  		/* round the start down, and the end up */
881  		sector_t next = s + sectors;
882  
883  		rounddown(s, bb->shift);
884  		roundup(next, bb->shift);
885  		sectors = next - s;
886  	}
887  
888  	write_seqlock_irqsave(&bb->lock, flags);
889  
890  	orig_start = s;
891  	orig_len = sectors;
892  	bad.ack = acknowledged;
893  	p = bb->page;
894  
895  re_insert:
896  	bad.start = s;
897  	bad.len = sectors;
898  	len = 0;
899  
900  	if (badblocks_empty(bb)) {
901  		len = insert_at(bb, 0, &bad);
902  		bb->count++;
903  		added++;
904  		goto update_sectors;
905  	}
906  
907  	prev = prev_badblocks(bb, &bad, hint);
908  
909  	/* start before all badblocks */
910  	if (prev < 0) {
911  		if (!badblocks_full(bb)) {
912  			/* insert on the first */
913  			if (bad.len > (BB_OFFSET(p[0]) - bad.start))
914  				bad.len = BB_OFFSET(p[0]) - bad.start;
915  			len = insert_at(bb, 0, &bad);
916  			bb->count++;
917  			added++;
918  			hint = 0;
919  			goto update_sectors;
920  		}
921  
922  		/* No sapce, try to merge */
923  		if (overlap_behind(bb, &bad, 0)) {
924  			if (can_merge_behind(bb, &bad, 0)) {
925  				len = behind_merge(bb, &bad, 0);
926  				added++;
927  			} else {
928  				len = BB_OFFSET(p[0]) - s;
929  				space_desired = 1;
930  			}
931  			hint = 0;
932  			goto update_sectors;
933  		}
934  
935  		/* no table space and give up */
936  		goto out;
937  	}
938  
939  	/* in case p[prev-1] can be merged with p[prev] */
940  	if (can_combine_front(bb, prev, &bad)) {
941  		front_combine(bb, prev);
942  		bb->count--;
943  		added++;
944  		hint = prev;
945  		goto update_sectors;
946  	}
947  
948  	if (overlap_front(bb, prev, &bad)) {
949  		if (can_merge_front(bb, prev, &bad)) {
950  			len = front_merge(bb, prev, &bad);
951  			added++;
952  		} else {
953  			int extra = 0;
954  
955  			if (!can_front_overwrite(bb, prev, &bad, &extra)) {
956  				len = min_t(sector_t,
957  					    BB_END(p[prev]) - s, sectors);
958  				hint = prev;
959  				goto update_sectors;
960  			}
961  
962  			len = front_overwrite(bb, prev, &bad, extra);
963  			added++;
964  			bb->count += extra;
965  
966  			if (can_combine_front(bb, prev, &bad)) {
967  				front_combine(bb, prev);
968  				bb->count--;
969  			}
970  		}
971  		hint = prev;
972  		goto update_sectors;
973  	}
974  
975  	if (can_merge_front(bb, prev, &bad)) {
976  		len = front_merge(bb, prev, &bad);
977  		added++;
978  		hint = prev;
979  		goto update_sectors;
980  	}
981  
982  	/* if no space in table, still try to merge in the covered range */
983  	if (badblocks_full(bb)) {
984  		/* skip the cannot-merge range */
985  		if (((prev + 1) < bb->count) &&
986  		    overlap_behind(bb, &bad, prev + 1) &&
987  		    ((s + sectors) >= BB_END(p[prev + 1]))) {
988  			len = BB_END(p[prev + 1]) - s;
989  			hint = prev + 1;
990  			goto update_sectors;
991  		}
992  
993  		/* no retry any more */
994  		len = sectors;
995  		space_desired = 1;
996  		hint = -1;
997  		goto update_sectors;
998  	}
999  
1000  	/* cannot merge and there is space in bad table */
1001  	if ((prev + 1) < bb->count &&
1002  	    overlap_behind(bb, &bad, prev + 1))
1003  		bad.len = min_t(sector_t,
1004  				bad.len, BB_OFFSET(p[prev + 1]) - bad.start);
1005  
1006  	len = insert_at(bb, prev + 1, &bad);
1007  	bb->count++;
1008  	added++;
1009  	hint = prev + 1;
1010  
1011  update_sectors:
1012  	s += len;
1013  	sectors -= len;
1014  
1015  	if (sectors > 0)
1016  		goto re_insert;
1017  
1018  	WARN_ON(sectors < 0);
1019  
1020  	/*
1021  	 * Check whether the following already set range can be
1022  	 * merged. (prev < 0) condition is not handled here,
1023  	 * because it's already complicated enough.
1024  	 */
1025  	if (prev >= 0 &&
1026  	    (prev + 1) < bb->count &&
1027  	    BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
1028  	    (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
1029  	    BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
1030  		p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1031  				  BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
1032  				  BB_ACK(p[prev]));
1033  
1034  		if ((prev + 2) < bb->count)
1035  			memmove(p + prev + 1, p + prev + 2,
1036  				(bb->count -  (prev + 2)) * 8);
1037  		bb->count--;
1038  	}
1039  
1040  	if (space_desired && !badblocks_full(bb)) {
1041  		s = orig_start;
1042  		sectors = orig_len;
1043  		space_desired = 0;
1044  		if (retried++ < 3)
1045  			goto re_insert;
1046  	}
1047  
1048  out:
1049  	if (added) {
1050  		set_changed(bb);
1051  
1052  		if (!acknowledged)
1053  			bb->unacked_exist = 1;
1054  		else
1055  			badblocks_update_acked(bb);
1056  	}
1057  
1058  	write_sequnlock_irqrestore(&bb->lock, flags);
1059  
1060  	if (!added)
1061  		rv = 1;
1062  
1063  	return rv;
1064  }
1065  
1066  /*
1067   * Clear the bad block range from bad block table which is front overlapped
1068   * with the clearing range. The return value is how many sectors from an
1069   * already set bad block range are cleared. If the whole bad block range is
1070   * covered by the clearing range and fully cleared, 'delete' is set as 1 for
1071   * the caller to reduce bb->count.
1072   */
front_clear(struct badblocks * bb,int prev,struct badblocks_context * bad,int * deleted)1073  static int front_clear(struct badblocks *bb, int prev,
1074  		       struct badblocks_context *bad, int *deleted)
1075  {
1076  	sector_t sectors = bad->len;
1077  	sector_t s = bad->start;
1078  	u64 *p = bb->page;
1079  	int cleared = 0;
1080  
1081  	*deleted = 0;
1082  	if (s == BB_OFFSET(p[prev])) {
1083  		if (BB_LEN(p[prev]) > sectors) {
1084  			p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors,
1085  					  BB_LEN(p[prev]) - sectors,
1086  					  BB_ACK(p[prev]));
1087  			cleared = sectors;
1088  		} else {
1089  			/* BB_LEN(p[prev]) <= sectors */
1090  			cleared = BB_LEN(p[prev]);
1091  			if ((prev + 1) < bb->count)
1092  				memmove(p + prev, p + prev + 1,
1093  				       (bb->count - prev - 1) * 8);
1094  			*deleted = 1;
1095  		}
1096  	} else if (s > BB_OFFSET(p[prev])) {
1097  		if (BB_END(p[prev]) <= (s + sectors)) {
1098  			cleared = BB_END(p[prev]) - s;
1099  			p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1100  					  s - BB_OFFSET(p[prev]),
1101  					  BB_ACK(p[prev]));
1102  		} else {
1103  			/* Splitting is handled in front_splitting_clear() */
1104  			BUG();
1105  		}
1106  	}
1107  
1108  	return cleared;
1109  }
1110  
1111  /*
1112   * Handle the condition that the clearing range hits middle of an already set
1113   * bad block range from bad block table. In this condition the existing bad
1114   * block range is split into two after the middle part is cleared.
1115   */
front_splitting_clear(struct badblocks * bb,int prev,struct badblocks_context * bad)1116  static int front_splitting_clear(struct badblocks *bb, int prev,
1117  				  struct badblocks_context *bad)
1118  {
1119  	u64 *p = bb->page;
1120  	u64 end = BB_END(p[prev]);
1121  	int ack = BB_ACK(p[prev]);
1122  	sector_t sectors = bad->len;
1123  	sector_t s = bad->start;
1124  
1125  	p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1126  			  s - BB_OFFSET(p[prev]),
1127  			  ack);
1128  	memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8);
1129  	p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack);
1130  	return sectors;
1131  }
1132  
1133  /* Do the exact work to clear bad block range from the bad block table */
_badblocks_clear(struct badblocks * bb,sector_t s,int sectors)1134  static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
1135  {
1136  	struct badblocks_context bad;
1137  	int prev = -1, hint = -1;
1138  	int len = 0, cleared = 0;
1139  	int rv = 0;
1140  	u64 *p;
1141  
1142  	if (bb->shift < 0)
1143  		/* badblocks are disabled */
1144  		return 1;
1145  
1146  	if (sectors == 0)
1147  		/* Invalid sectors number */
1148  		return 1;
1149  
1150  	if (bb->shift) {
1151  		sector_t target;
1152  
1153  		/* When clearing we round the start up and the end down.
1154  		 * This should not matter as the shift should align with
1155  		 * the block size and no rounding should ever be needed.
1156  		 * However it is better the think a block is bad when it
1157  		 * isn't than to think a block is not bad when it is.
1158  		 */
1159  		target = s + sectors;
1160  		roundup(s, bb->shift);
1161  		rounddown(target, bb->shift);
1162  		sectors = target - s;
1163  	}
1164  
1165  	write_seqlock_irq(&bb->lock);
1166  
1167  	bad.ack = true;
1168  	p = bb->page;
1169  
1170  re_clear:
1171  	bad.start = s;
1172  	bad.len = sectors;
1173  
1174  	if (badblocks_empty(bb)) {
1175  		len = sectors;
1176  		cleared++;
1177  		goto update_sectors;
1178  	}
1179  
1180  
1181  	prev = prev_badblocks(bb, &bad, hint);
1182  
1183  	/* Start before all badblocks */
1184  	if (prev < 0) {
1185  		if (overlap_behind(bb, &bad, 0)) {
1186  			len = BB_OFFSET(p[0]) - s;
1187  			hint = 0;
1188  		} else {
1189  			len = sectors;
1190  		}
1191  		/*
1192  		 * Both situations are to clear non-bad range,
1193  		 * should be treated as successful
1194  		 */
1195  		cleared++;
1196  		goto update_sectors;
1197  	}
1198  
1199  	/* Start after all badblocks */
1200  	if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) {
1201  		len = sectors;
1202  		cleared++;
1203  		goto update_sectors;
1204  	}
1205  
1206  	/* Clear will split a bad record but the table is full */
1207  	if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) &&
1208  	    (BB_END(p[prev]) > (bad.start + sectors))) {
1209  		len = sectors;
1210  		goto update_sectors;
1211  	}
1212  
1213  	if (overlap_front(bb, prev, &bad)) {
1214  		if ((BB_OFFSET(p[prev]) < bad.start) &&
1215  		    (BB_END(p[prev]) > (bad.start + bad.len))) {
1216  			/* Splitting */
1217  			if ((bb->count + 1) < MAX_BADBLOCKS) {
1218  				len = front_splitting_clear(bb, prev, &bad);
1219  				bb->count += 1;
1220  				cleared++;
1221  			} else {
1222  				/* No space to split, give up */
1223  				len = sectors;
1224  			}
1225  		} else {
1226  			int deleted = 0;
1227  
1228  			len = front_clear(bb, prev, &bad, &deleted);
1229  			bb->count -= deleted;
1230  			cleared++;
1231  			hint = prev;
1232  		}
1233  
1234  		goto update_sectors;
1235  	}
1236  
1237  	/* Not front overlap, but behind overlap */
1238  	if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
1239  		len = BB_OFFSET(p[prev + 1]) - bad.start;
1240  		hint = prev + 1;
1241  		/* Clear non-bad range should be treated as successful */
1242  		cleared++;
1243  		goto update_sectors;
1244  	}
1245  
1246  	/* Not cover any badblocks range in the table */
1247  	len = sectors;
1248  	/* Clear non-bad range should be treated as successful */
1249  	cleared++;
1250  
1251  update_sectors:
1252  	s += len;
1253  	sectors -= len;
1254  
1255  	if (sectors > 0)
1256  		goto re_clear;
1257  
1258  	WARN_ON(sectors < 0);
1259  
1260  	if (cleared) {
1261  		badblocks_update_acked(bb);
1262  		set_changed(bb);
1263  	}
1264  
1265  	write_sequnlock_irq(&bb->lock);
1266  
1267  	if (!cleared)
1268  		rv = 1;
1269  
1270  	return rv;
1271  }
1272  
1273  /* Do the exact work to check bad blocks range from the bad block table */
_badblocks_check(struct badblocks * bb,sector_t s,int sectors,sector_t * first_bad,int * bad_sectors)1274  static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors,
1275  			    sector_t *first_bad, int *bad_sectors)
1276  {
1277  	int unacked_badblocks, acked_badblocks;
1278  	int prev = -1, hint = -1, set = 0;
1279  	struct badblocks_context bad;
1280  	unsigned int seq;
1281  	int len, rv;
1282  	u64 *p;
1283  
1284  	WARN_ON(bb->shift < 0 || sectors == 0);
1285  
1286  	if (bb->shift > 0) {
1287  		sector_t target;
1288  
1289  		/* round the start down, and the end up */
1290  		target = s + sectors;
1291  		rounddown(s, bb->shift);
1292  		roundup(target, bb->shift);
1293  		sectors = target - s;
1294  	}
1295  
1296  retry:
1297  	seq = read_seqbegin(&bb->lock);
1298  
1299  	p = bb->page;
1300  	unacked_badblocks = 0;
1301  	acked_badblocks = 0;
1302  
1303  re_check:
1304  	bad.start = s;
1305  	bad.len = sectors;
1306  
1307  	if (badblocks_empty(bb)) {
1308  		len = sectors;
1309  		goto update_sectors;
1310  	}
1311  
1312  	prev = prev_badblocks(bb, &bad, hint);
1313  
1314  	/* start after all badblocks */
1315  	if ((prev >= 0) &&
1316  	    ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) {
1317  		len = sectors;
1318  		goto update_sectors;
1319  	}
1320  
1321  	/* Overlapped with front badblocks record */
1322  	if ((prev >= 0) && overlap_front(bb, prev, &bad)) {
1323  		if (BB_ACK(p[prev]))
1324  			acked_badblocks++;
1325  		else
1326  			unacked_badblocks++;
1327  
1328  		if (BB_END(p[prev]) >= (s + sectors))
1329  			len = sectors;
1330  		else
1331  			len = BB_END(p[prev]) - s;
1332  
1333  		if (set == 0) {
1334  			*first_bad = BB_OFFSET(p[prev]);
1335  			*bad_sectors = BB_LEN(p[prev]);
1336  			set = 1;
1337  		}
1338  		goto update_sectors;
1339  	}
1340  
1341  	/* Not front overlap, but behind overlap */
1342  	if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
1343  		len = BB_OFFSET(p[prev + 1]) - bad.start;
1344  		hint = prev + 1;
1345  		goto update_sectors;
1346  	}
1347  
1348  	/* not cover any badblocks range in the table */
1349  	len = sectors;
1350  
1351  update_sectors:
1352  	s += len;
1353  	sectors -= len;
1354  
1355  	if (sectors > 0)
1356  		goto re_check;
1357  
1358  	WARN_ON(sectors < 0);
1359  
1360  	if (unacked_badblocks > 0)
1361  		rv = -1;
1362  	else if (acked_badblocks > 0)
1363  		rv = 1;
1364  	else
1365  		rv = 0;
1366  
1367  	if (read_seqretry(&bb->lock, seq))
1368  		goto retry;
1369  
1370  	return rv;
1371  }
1372  
1373  /**
1374   * badblocks_check() - check a given range for bad sectors
1375   * @bb:		the badblocks structure that holds all badblock information
1376   * @s:		sector (start) at which to check for badblocks
1377   * @sectors:	number of sectors to check for badblocks
1378   * @first_bad:	pointer to store location of the first badblock
1379   * @bad_sectors: pointer to store number of badblocks after @first_bad
1380   *
1381   * We can record which blocks on each device are 'bad' and so just
1382   * fail those blocks, or that stripe, rather than the whole device.
1383   * Entries in the bad-block table are 64bits wide.  This comprises:
1384   * Length of bad-range, in sectors: 0-511 for lengths 1-512
1385   * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
1386   *  A 'shift' can be set so that larger blocks are tracked and
1387   *  consequently larger devices can be covered.
1388   * 'Acknowledged' flag - 1 bit. - the most significant bit.
1389   *
1390   * Locking of the bad-block table uses a seqlock so badblocks_check
1391   * might need to retry if it is very unlucky.
1392   * We will sometimes want to check for bad blocks in a bi_end_io function,
1393   * so we use the write_seqlock_irq variant.
1394   *
1395   * When looking for a bad block we specify a range and want to
1396   * know if any block in the range is bad.  So we binary-search
1397   * to the last range that starts at-or-before the given endpoint,
1398   * (or "before the sector after the target range")
1399   * then see if it ends after the given start.
1400   *
1401   * Return:
1402   *  0: there are no known bad blocks in the range
1403   *  1: there are known bad block which are all acknowledged
1404   * -1: there are bad blocks which have not yet been acknowledged in metadata.
1405   * plus the start/length of the first bad section we overlap.
1406   */
badblocks_check(struct badblocks * bb,sector_t s,int sectors,sector_t * first_bad,int * bad_sectors)1407  int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
1408  			sector_t *first_bad, int *bad_sectors)
1409  {
1410  	return _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
1411  }
1412  EXPORT_SYMBOL_GPL(badblocks_check);
1413  
1414  /**
1415   * badblocks_set() - Add a range of bad blocks to the table.
1416   * @bb:		the badblocks structure that holds all badblock information
1417   * @s:		first sector to mark as bad
1418   * @sectors:	number of sectors to mark as bad
1419   * @acknowledged: weather to mark the bad sectors as acknowledged
1420   *
1421   * This might extend the table, or might contract it if two adjacent ranges
1422   * can be merged. We binary-search to find the 'insertion' point, then
1423   * decide how best to handle it.
1424   *
1425   * Return:
1426   *  0: success
1427   *  1: failed to set badblocks (out of space)
1428   */
badblocks_set(struct badblocks * bb,sector_t s,int sectors,int acknowledged)1429  int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
1430  			int acknowledged)
1431  {
1432  	return _badblocks_set(bb, s, sectors, acknowledged);
1433  }
1434  EXPORT_SYMBOL_GPL(badblocks_set);
1435  
1436  /**
1437   * badblocks_clear() - Remove a range of bad blocks to the table.
1438   * @bb:		the badblocks structure that holds all badblock information
1439   * @s:		first sector to mark as bad
1440   * @sectors:	number of sectors to mark as bad
1441   *
1442   * This may involve extending the table if we spilt a region,
1443   * but it must not fail.  So if the table becomes full, we just
1444   * drop the remove request.
1445   *
1446   * Return:
1447   *  0: success
1448   *  1: failed to clear badblocks
1449   */
badblocks_clear(struct badblocks * bb,sector_t s,int sectors)1450  int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
1451  {
1452  	return _badblocks_clear(bb, s, sectors);
1453  }
1454  EXPORT_SYMBOL_GPL(badblocks_clear);
1455  
1456  /**
1457   * ack_all_badblocks() - Acknowledge all bad blocks in a list.
1458   * @bb:		the badblocks structure that holds all badblock information
1459   *
1460   * This only succeeds if ->changed is clear.  It is used by
1461   * in-kernel metadata updates
1462   */
ack_all_badblocks(struct badblocks * bb)1463  void ack_all_badblocks(struct badblocks *bb)
1464  {
1465  	if (bb->page == NULL || bb->changed)
1466  		/* no point even trying */
1467  		return;
1468  	write_seqlock_irq(&bb->lock);
1469  
1470  	if (bb->changed == 0 && bb->unacked_exist) {
1471  		u64 *p = bb->page;
1472  		int i;
1473  
1474  		for (i = 0; i < bb->count ; i++) {
1475  			if (!BB_ACK(p[i])) {
1476  				sector_t start = BB_OFFSET(p[i]);
1477  				int len = BB_LEN(p[i]);
1478  
1479  				p[i] = BB_MAKE(start, len, 1);
1480  			}
1481  		}
1482  		bb->unacked_exist = 0;
1483  	}
1484  	write_sequnlock_irq(&bb->lock);
1485  }
1486  EXPORT_SYMBOL_GPL(ack_all_badblocks);
1487  
1488  /**
1489   * badblocks_show() - sysfs access to bad-blocks list
1490   * @bb:		the badblocks structure that holds all badblock information
1491   * @page:	buffer received from sysfs
1492   * @unack:	weather to show unacknowledged badblocks
1493   *
1494   * Return:
1495   *  Length of returned data
1496   */
badblocks_show(struct badblocks * bb,char * page,int unack)1497  ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
1498  {
1499  	size_t len;
1500  	int i;
1501  	u64 *p = bb->page;
1502  	unsigned seq;
1503  
1504  	if (bb->shift < 0)
1505  		return 0;
1506  
1507  retry:
1508  	seq = read_seqbegin(&bb->lock);
1509  
1510  	len = 0;
1511  	i = 0;
1512  
1513  	while (len < PAGE_SIZE && i < bb->count) {
1514  		sector_t s = BB_OFFSET(p[i]);
1515  		unsigned int length = BB_LEN(p[i]);
1516  		int ack = BB_ACK(p[i]);
1517  
1518  		i++;
1519  
1520  		if (unack && ack)
1521  			continue;
1522  
1523  		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
1524  				(unsigned long long)s << bb->shift,
1525  				length << bb->shift);
1526  	}
1527  	if (unack && len == 0)
1528  		bb->unacked_exist = 0;
1529  
1530  	if (read_seqretry(&bb->lock, seq))
1531  		goto retry;
1532  
1533  	return len;
1534  }
1535  EXPORT_SYMBOL_GPL(badblocks_show);
1536  
1537  /**
1538   * badblocks_store() - sysfs access to bad-blocks list
1539   * @bb:		the badblocks structure that holds all badblock information
1540   * @page:	buffer received from sysfs
1541   * @len:	length of data received from sysfs
1542   * @unack:	weather to show unacknowledged badblocks
1543   *
1544   * Return:
1545   *  Length of the buffer processed or -ve error.
1546   */
badblocks_store(struct badblocks * bb,const char * page,size_t len,int unack)1547  ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
1548  			int unack)
1549  {
1550  	unsigned long long sector;
1551  	int length;
1552  	char newline;
1553  
1554  	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
1555  	case 3:
1556  		if (newline != '\n')
1557  			return -EINVAL;
1558  		fallthrough;
1559  	case 2:
1560  		if (length <= 0)
1561  			return -EINVAL;
1562  		break;
1563  	default:
1564  		return -EINVAL;
1565  	}
1566  
1567  	if (badblocks_set(bb, sector, length, !unack))
1568  		return -ENOSPC;
1569  	else
1570  		return len;
1571  }
1572  EXPORT_SYMBOL_GPL(badblocks_store);
1573  
__badblocks_init(struct device * dev,struct badblocks * bb,int enable)1574  static int __badblocks_init(struct device *dev, struct badblocks *bb,
1575  		int enable)
1576  {
1577  	bb->dev = dev;
1578  	bb->count = 0;
1579  	if (enable)
1580  		bb->shift = 0;
1581  	else
1582  		bb->shift = -1;
1583  	if (dev)
1584  		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
1585  	else
1586  		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1587  	if (!bb->page) {
1588  		bb->shift = -1;
1589  		return -ENOMEM;
1590  	}
1591  	seqlock_init(&bb->lock);
1592  
1593  	return 0;
1594  }
1595  
1596  /**
1597   * badblocks_init() - initialize the badblocks structure
1598   * @bb:		the badblocks structure that holds all badblock information
1599   * @enable:	weather to enable badblocks accounting
1600   *
1601   * Return:
1602   *  0: success
1603   *  -ve errno: on error
1604   */
badblocks_init(struct badblocks * bb,int enable)1605  int badblocks_init(struct badblocks *bb, int enable)
1606  {
1607  	return __badblocks_init(NULL, bb, enable);
1608  }
1609  EXPORT_SYMBOL_GPL(badblocks_init);
1610  
devm_init_badblocks(struct device * dev,struct badblocks * bb)1611  int devm_init_badblocks(struct device *dev, struct badblocks *bb)
1612  {
1613  	if (!bb)
1614  		return -EINVAL;
1615  	return __badblocks_init(dev, bb, 1);
1616  }
1617  EXPORT_SYMBOL_GPL(devm_init_badblocks);
1618  
1619  /**
1620   * badblocks_exit() - free the badblocks structure
1621   * @bb:		the badblocks structure that holds all badblock information
1622   */
badblocks_exit(struct badblocks * bb)1623  void badblocks_exit(struct badblocks *bb)
1624  {
1625  	if (!bb)
1626  		return;
1627  	if (bb->dev)
1628  		devm_kfree(bb->dev, bb->page);
1629  	else
1630  		kfree(bb->page);
1631  	bb->page = NULL;
1632  }
1633  EXPORT_SYMBOL_GPL(badblocks_exit);
1634