1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * This file is part of UBIFS.
4   *
5   * Copyright (C) 2006-2008 Nokia Corporation.
6   *
7   * Authors: Adrian Hunter
8   *          Artem Bityutskiy (Битюцкий Артём)
9   */
10  
11  /* This file implements TNC functions for committing */
12  
13  #include <linux/random.h>
14  #include "ubifs.h"
15  
16  /**
17   * make_idx_node - make an index node for fill-the-gaps method of TNC commit.
18   * @c: UBIFS file-system description object
19   * @idx: buffer in which to place new index node
20   * @znode: znode from which to make new index node
21   * @lnum: LEB number where new index node will be written
22   * @offs: offset where new index node will be written
23   * @len: length of new index node
24   */
make_idx_node(struct ubifs_info * c,struct ubifs_idx_node * idx,struct ubifs_znode * znode,int lnum,int offs,int len)25  static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
26  			 struct ubifs_znode *znode, int lnum, int offs, int len)
27  {
28  	struct ubifs_znode *zp;
29  	u8 hash[UBIFS_HASH_ARR_SZ];
30  	int i, err;
31  
32  	/* Make index node */
33  	idx->ch.node_type = UBIFS_IDX_NODE;
34  	idx->child_cnt = cpu_to_le16(znode->child_cnt);
35  	idx->level = cpu_to_le16(znode->level);
36  	for (i = 0; i < znode->child_cnt; i++) {
37  		struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
38  		struct ubifs_zbranch *zbr = &znode->zbranch[i];
39  
40  		key_write_idx(c, &zbr->key, &br->key);
41  		br->lnum = cpu_to_le32(zbr->lnum);
42  		br->offs = cpu_to_le32(zbr->offs);
43  		br->len = cpu_to_le32(zbr->len);
44  		ubifs_copy_hash(c, zbr->hash, ubifs_branch_hash(c, br));
45  		if (!zbr->lnum || !zbr->len) {
46  			ubifs_err(c, "bad ref in znode");
47  			ubifs_dump_znode(c, znode);
48  			if (zbr->znode)
49  				ubifs_dump_znode(c, zbr->znode);
50  
51  			return -EINVAL;
52  		}
53  	}
54  	ubifs_prepare_node(c, idx, len, 0);
55  	ubifs_node_calc_hash(c, idx, hash);
56  
57  	znode->lnum = lnum;
58  	znode->offs = offs;
59  	znode->len = len;
60  
61  	err = insert_old_idx_znode(c, znode);
62  
63  	/* Update the parent */
64  	zp = znode->parent;
65  	if (zp) {
66  		struct ubifs_zbranch *zbr;
67  
68  		zbr = &zp->zbranch[znode->iip];
69  		zbr->lnum = lnum;
70  		zbr->offs = offs;
71  		zbr->len = len;
72  		ubifs_copy_hash(c, hash, zbr->hash);
73  	} else {
74  		c->zroot.lnum = lnum;
75  		c->zroot.offs = offs;
76  		c->zroot.len = len;
77  		ubifs_copy_hash(c, hash, c->zroot.hash);
78  	}
79  	c->calc_idx_sz += ALIGN(len, 8);
80  
81  	atomic_long_dec(&c->dirty_zn_cnt);
82  
83  	ubifs_assert(c, ubifs_zn_dirty(znode));
84  	ubifs_assert(c, ubifs_zn_cow(znode));
85  
86  	/*
87  	 * Note, unlike 'write_index()' we do not add memory barriers here
88  	 * because this function is called with @c->tnc_mutex locked.
89  	 */
90  	__clear_bit(DIRTY_ZNODE, &znode->flags);
91  	__clear_bit(COW_ZNODE, &znode->flags);
92  
93  	return err;
94  }
95  
96  /**
97   * fill_gap - make index nodes in gaps in dirty index LEBs.
98   * @c: UBIFS file-system description object
99   * @lnum: LEB number that gap appears in
100   * @gap_start: offset of start of gap
101   * @gap_end: offset of end of gap
102   * @dirt: adds dirty space to this
103   *
104   * This function returns the number of index nodes written into the gap.
105   */
fill_gap(struct ubifs_info * c,int lnum,int gap_start,int gap_end,int * dirt)106  static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end,
107  		    int *dirt)
108  {
109  	int len, gap_remains, gap_pos, written, pad_len;
110  
111  	ubifs_assert(c, (gap_start & 7) == 0);
112  	ubifs_assert(c, (gap_end & 7) == 0);
113  	ubifs_assert(c, gap_end >= gap_start);
114  
115  	gap_remains = gap_end - gap_start;
116  	if (!gap_remains)
117  		return 0;
118  	gap_pos = gap_start;
119  	written = 0;
120  	while (c->enext) {
121  		len = ubifs_idx_node_sz(c, c->enext->child_cnt);
122  		if (len < gap_remains) {
123  			struct ubifs_znode *znode = c->enext;
124  			const int alen = ALIGN(len, 8);
125  			int err;
126  
127  			ubifs_assert(c, alen <= gap_remains);
128  			err = make_idx_node(c, c->ileb_buf + gap_pos, znode,
129  					    lnum, gap_pos, len);
130  			if (err)
131  				return err;
132  			gap_remains -= alen;
133  			gap_pos += alen;
134  			c->enext = znode->cnext;
135  			if (c->enext == c->cnext)
136  				c->enext = NULL;
137  			written += 1;
138  		} else
139  			break;
140  	}
141  	if (gap_end == c->leb_size) {
142  		c->ileb_len = ALIGN(gap_pos, c->min_io_size);
143  		/* Pad to end of min_io_size */
144  		pad_len = c->ileb_len - gap_pos;
145  	} else
146  		/* Pad to end of gap */
147  		pad_len = gap_remains;
148  	dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d",
149  	       lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len);
150  	ubifs_pad(c, c->ileb_buf + gap_pos, pad_len);
151  	*dirt += pad_len;
152  	return written;
153  }
154  
155  /**
156   * find_old_idx - find an index node obsoleted since the last commit start.
157   * @c: UBIFS file-system description object
158   * @lnum: LEB number of obsoleted index node
159   * @offs: offset of obsoleted index node
160   *
161   * Returns %1 if found and %0 otherwise.
162   */
find_old_idx(struct ubifs_info * c,int lnum,int offs)163  static int find_old_idx(struct ubifs_info *c, int lnum, int offs)
164  {
165  	struct ubifs_old_idx *o;
166  	struct rb_node *p;
167  
168  	p = c->old_idx.rb_node;
169  	while (p) {
170  		o = rb_entry(p, struct ubifs_old_idx, rb);
171  		if (lnum < o->lnum)
172  			p = p->rb_left;
173  		else if (lnum > o->lnum)
174  			p = p->rb_right;
175  		else if (offs < o->offs)
176  			p = p->rb_left;
177  		else if (offs > o->offs)
178  			p = p->rb_right;
179  		else
180  			return 1;
181  	}
182  	return 0;
183  }
184  
185  /**
186   * is_idx_node_in_use - determine if an index node can be overwritten.
187   * @c: UBIFS file-system description object
188   * @key: key of index node
189   * @level: index node level
190   * @lnum: LEB number of index node
191   * @offs: offset of index node
192   *
193   * If @key / @lnum / @offs identify an index node that was not part of the old
194   * index, then this function returns %0 (obsolete).  Else if the index node was
195   * part of the old index but is now dirty %1 is returned, else if it is clean %2
196   * is returned. A negative error code is returned on failure.
197   */
is_idx_node_in_use(struct ubifs_info * c,union ubifs_key * key,int level,int lnum,int offs)198  static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
199  			      int level, int lnum, int offs)
200  {
201  	int ret;
202  
203  	ret = is_idx_node_in_tnc(c, key, level, lnum, offs);
204  	if (ret < 0)
205  		return ret; /* Error code */
206  	if (ret == 0)
207  		if (find_old_idx(c, lnum, offs))
208  			return 1;
209  	return ret;
210  }
211  
212  /**
213   * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
214   * @c: UBIFS file-system description object
215   * @p: return LEB number in @c->gap_lebs[p]
216   *
217   * This function lays out new index nodes for dirty znodes using in-the-gaps
218   * method of TNC commit.
219   * This function merely puts the next znode into the next gap, making no attempt
220   * to try to maximise the number of znodes that fit.
221   * This function returns the number of index nodes written into the gaps, or a
222   * negative error code on failure.
223   */
layout_leb_in_gaps(struct ubifs_info * c,int p)224  static int layout_leb_in_gaps(struct ubifs_info *c, int p)
225  {
226  	struct ubifs_scan_leb *sleb;
227  	struct ubifs_scan_node *snod;
228  	int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written;
229  
230  	tot_written = 0;
231  	/* Get an index LEB with lots of obsolete index nodes */
232  	lnum = ubifs_find_dirty_idx_leb(c);
233  	if (lnum < 0)
234  		/*
235  		 * There also may be dirt in the index head that could be
236  		 * filled, however we do not check there at present.
237  		 */
238  		return lnum; /* Error code */
239  	c->gap_lebs[p] = lnum;
240  	dbg_gc("LEB %d", lnum);
241  	/*
242  	 * Scan the index LEB.  We use the generic scan for this even though
243  	 * it is more comprehensive and less efficient than is needed for this
244  	 * purpose.
245  	 */
246  	sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0);
247  	c->ileb_len = 0;
248  	if (IS_ERR(sleb))
249  		return PTR_ERR(sleb);
250  	gap_start = 0;
251  	list_for_each_entry(snod, &sleb->nodes, list) {
252  		struct ubifs_idx_node *idx;
253  		int in_use, level;
254  
255  		ubifs_assert(c, snod->type == UBIFS_IDX_NODE);
256  		idx = snod->node;
257  		key_read(c, ubifs_idx_key(c, idx), &snod->key);
258  		level = le16_to_cpu(idx->level);
259  		/* Determine if the index node is in use (not obsolete) */
260  		in_use = is_idx_node_in_use(c, &snod->key, level, lnum,
261  					    snod->offs);
262  		if (in_use < 0) {
263  			ubifs_scan_destroy(sleb);
264  			return in_use; /* Error code */
265  		}
266  		if (in_use) {
267  			if (in_use == 1)
268  				dirt += ALIGN(snod->len, 8);
269  			/*
270  			 * The obsolete index nodes form gaps that can be
271  			 * overwritten.  This gap has ended because we have
272  			 * found an index node that is still in use
273  			 * i.e. not obsolete
274  			 */
275  			gap_end = snod->offs;
276  			/* Try to fill gap */
277  			written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
278  			if (written < 0) {
279  				ubifs_scan_destroy(sleb);
280  				return written; /* Error code */
281  			}
282  			tot_written += written;
283  			gap_start = ALIGN(snod->offs + snod->len, 8);
284  		}
285  	}
286  	ubifs_scan_destroy(sleb);
287  	c->ileb_len = c->leb_size;
288  	gap_end = c->leb_size;
289  	/* Try to fill gap */
290  	written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
291  	if (written < 0)
292  		return written; /* Error code */
293  	tot_written += written;
294  	if (tot_written == 0) {
295  		struct ubifs_lprops lp;
296  
297  		dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
298  		err = ubifs_read_one_lp(c, lnum, &lp);
299  		if (err)
300  			return err;
301  		if (lp.free == c->leb_size) {
302  			/*
303  			 * We must have snatched this LEB from the idx_gc list
304  			 * so we need to correct the free and dirty space.
305  			 */
306  			err = ubifs_change_one_lp(c, lnum,
307  						  c->leb_size - c->ileb_len,
308  						  dirt, 0, 0, 0);
309  			if (err)
310  				return err;
311  		}
312  		return 0;
313  	}
314  	err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt,
315  				  0, 0, 0);
316  	if (err)
317  		return err;
318  	err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len);
319  	if (err)
320  		return err;
321  	dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
322  	return tot_written;
323  }
324  
325  /**
326   * get_leb_cnt - calculate the number of empty LEBs needed to commit.
327   * @c: UBIFS file-system description object
328   * @cnt: number of znodes to commit
329   *
330   * This function returns the number of empty LEBs needed to commit @cnt znodes
331   * to the current index head.  The number is not exact and may be more than
332   * needed.
333   */
get_leb_cnt(struct ubifs_info * c,int cnt)334  static int get_leb_cnt(struct ubifs_info *c, int cnt)
335  {
336  	int d;
337  
338  	/* Assume maximum index node size (i.e. overestimate space needed) */
339  	cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz;
340  	if (cnt < 0)
341  		cnt = 0;
342  	d = c->leb_size / c->max_idx_node_sz;
343  	return DIV_ROUND_UP(cnt, d);
344  }
345  
346  /**
347   * layout_in_gaps - in-the-gaps method of committing TNC.
348   * @c: UBIFS file-system description object
349   * @cnt: number of dirty znodes to commit.
350   *
351   * This function lays out new index nodes for dirty znodes using in-the-gaps
352   * method of TNC commit.
353   *
354   * This function returns %0 on success and a negative error code on failure.
355   */
layout_in_gaps(struct ubifs_info * c,int cnt)356  static int layout_in_gaps(struct ubifs_info *c, int cnt)
357  {
358  	int err, leb_needed_cnt, written, p = 0, old_idx_lebs, *gap_lebs;
359  
360  	dbg_gc("%d znodes to write", cnt);
361  
362  	c->gap_lebs = kmalloc_array(c->lst.idx_lebs + 1, sizeof(int),
363  				    GFP_NOFS);
364  	if (!c->gap_lebs)
365  		return -ENOMEM;
366  
367  	old_idx_lebs = c->lst.idx_lebs;
368  	do {
369  		ubifs_assert(c, p < c->lst.idx_lebs);
370  		written = layout_leb_in_gaps(c, p);
371  		if (written < 0) {
372  			err = written;
373  			if (err != -ENOSPC) {
374  				kfree(c->gap_lebs);
375  				c->gap_lebs = NULL;
376  				return err;
377  			}
378  			if (!dbg_is_chk_index(c)) {
379  				/*
380  				 * Do not print scary warnings if the debugging
381  				 * option which forces in-the-gaps is enabled.
382  				 */
383  				ubifs_warn(c, "out of space");
384  				ubifs_dump_budg(c, &c->bi);
385  				ubifs_dump_lprops(c);
386  			}
387  			/* Try to commit anyway */
388  			break;
389  		}
390  		p++;
391  		cnt -= written;
392  		leb_needed_cnt = get_leb_cnt(c, cnt);
393  		dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
394  		       leb_needed_cnt, c->ileb_cnt);
395  		/*
396  		 * Dynamically change the size of @c->gap_lebs to prevent
397  		 * oob, because @c->lst.idx_lebs could be increased by
398  		 * function @get_idx_gc_leb (called by layout_leb_in_gaps->
399  		 * ubifs_find_dirty_idx_leb) during loop. Only enlarge
400  		 * @c->gap_lebs when needed.
401  		 *
402  		 */
403  		if (leb_needed_cnt > c->ileb_cnt && p >= old_idx_lebs &&
404  		    old_idx_lebs < c->lst.idx_lebs) {
405  			old_idx_lebs = c->lst.idx_lebs;
406  			gap_lebs = krealloc(c->gap_lebs, sizeof(int) *
407  					       (old_idx_lebs + 1), GFP_NOFS);
408  			if (!gap_lebs) {
409  				kfree(c->gap_lebs);
410  				c->gap_lebs = NULL;
411  				return -ENOMEM;
412  			}
413  			c->gap_lebs = gap_lebs;
414  		}
415  	} while (leb_needed_cnt > c->ileb_cnt);
416  
417  	c->gap_lebs[p] = -1;
418  	return 0;
419  }
420  
421  /**
422   * layout_in_empty_space - layout index nodes in empty space.
423   * @c: UBIFS file-system description object
424   *
425   * This function lays out new index nodes for dirty znodes using empty LEBs.
426   *
427   * This function returns %0 on success and a negative error code on failure.
428   */
layout_in_empty_space(struct ubifs_info * c)429  static int layout_in_empty_space(struct ubifs_info *c)
430  {
431  	struct ubifs_znode *znode, *cnext, *zp;
432  	int lnum, offs, len, next_len, buf_len, buf_offs, used, avail;
433  	int wlen, blen, err;
434  
435  	cnext = c->enext;
436  	if (!cnext)
437  		return 0;
438  
439  	lnum = c->ihead_lnum;
440  	buf_offs = c->ihead_offs;
441  
442  	buf_len = ubifs_idx_node_sz(c, c->fanout);
443  	buf_len = ALIGN(buf_len, c->min_io_size);
444  	used = 0;
445  	avail = buf_len;
446  
447  	/* Ensure there is enough room for first write */
448  	next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
449  	if (buf_offs + next_len > c->leb_size)
450  		lnum = -1;
451  
452  	while (1) {
453  		znode = cnext;
454  
455  		len = ubifs_idx_node_sz(c, znode->child_cnt);
456  
457  		/* Determine the index node position */
458  		if (lnum == -1) {
459  			if (c->ileb_nxt >= c->ileb_cnt) {
460  				ubifs_err(c, "out of space");
461  				return -ENOSPC;
462  			}
463  			lnum = c->ilebs[c->ileb_nxt++];
464  			buf_offs = 0;
465  			used = 0;
466  			avail = buf_len;
467  		}
468  
469  		offs = buf_offs + used;
470  
471  		znode->lnum = lnum;
472  		znode->offs = offs;
473  		znode->len = len;
474  
475  		/* Update the parent */
476  		zp = znode->parent;
477  		if (zp) {
478  			struct ubifs_zbranch *zbr;
479  			int i;
480  
481  			i = znode->iip;
482  			zbr = &zp->zbranch[i];
483  			zbr->lnum = lnum;
484  			zbr->offs = offs;
485  			zbr->len = len;
486  		} else {
487  			c->zroot.lnum = lnum;
488  			c->zroot.offs = offs;
489  			c->zroot.len = len;
490  		}
491  		c->calc_idx_sz += ALIGN(len, 8);
492  
493  		/*
494  		 * Once lprops is updated, we can decrease the dirty znode count
495  		 * but it is easier to just do it here.
496  		 */
497  		atomic_long_dec(&c->dirty_zn_cnt);
498  
499  		/*
500  		 * Calculate the next index node length to see if there is
501  		 * enough room for it
502  		 */
503  		cnext = znode->cnext;
504  		if (cnext == c->cnext)
505  			next_len = 0;
506  		else
507  			next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
508  
509  		/* Update buffer positions */
510  		wlen = used + len;
511  		used += ALIGN(len, 8);
512  		avail -= ALIGN(len, 8);
513  
514  		if (next_len != 0 &&
515  		    buf_offs + used + next_len <= c->leb_size &&
516  		    avail > 0)
517  			continue;
518  
519  		if (avail <= 0 && next_len &&
520  		    buf_offs + used + next_len <= c->leb_size)
521  			blen = buf_len;
522  		else
523  			blen = ALIGN(wlen, c->min_io_size);
524  
525  		/* The buffer is full or there are no more znodes to do */
526  		buf_offs += blen;
527  		if (next_len) {
528  			if (buf_offs + next_len > c->leb_size) {
529  				err = ubifs_update_one_lp(c, lnum,
530  					c->leb_size - buf_offs, blen - used,
531  					0, 0);
532  				if (err)
533  					return err;
534  				lnum = -1;
535  			}
536  			used -= blen;
537  			if (used < 0)
538  				used = 0;
539  			avail = buf_len - used;
540  			continue;
541  		}
542  		err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs,
543  					  blen - used, 0, 0);
544  		if (err)
545  			return err;
546  		break;
547  	}
548  
549  	c->dbg->new_ihead_lnum = lnum;
550  	c->dbg->new_ihead_offs = buf_offs;
551  
552  	return 0;
553  }
554  
555  /**
556   * layout_commit - determine positions of index nodes to commit.
557   * @c: UBIFS file-system description object
558   * @no_space: indicates that insufficient empty LEBs were allocated
559   * @cnt: number of znodes to commit
560   *
561   * Calculate and update the positions of index nodes to commit.  If there were
562   * an insufficient number of empty LEBs allocated, then index nodes are placed
563   * into the gaps created by obsolete index nodes in non-empty index LEBs.  For
564   * this purpose, an obsolete index node is one that was not in the index as at
565   * the end of the last commit.  To write "in-the-gaps" requires that those index
566   * LEBs are updated atomically in-place.
567   */
layout_commit(struct ubifs_info * c,int no_space,int cnt)568  static int layout_commit(struct ubifs_info *c, int no_space, int cnt)
569  {
570  	int err;
571  
572  	if (no_space) {
573  		err = layout_in_gaps(c, cnt);
574  		if (err)
575  			return err;
576  	}
577  	err = layout_in_empty_space(c);
578  	return err;
579  }
580  
581  /**
582   * find_first_dirty - find first dirty znode.
583   * @znode: znode to begin searching from
584   */
find_first_dirty(struct ubifs_znode * znode)585  static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode)
586  {
587  	int i, cont;
588  
589  	if (!znode)
590  		return NULL;
591  
592  	while (1) {
593  		if (znode->level == 0) {
594  			if (ubifs_zn_dirty(znode))
595  				return znode;
596  			return NULL;
597  		}
598  		cont = 0;
599  		for (i = 0; i < znode->child_cnt; i++) {
600  			struct ubifs_zbranch *zbr = &znode->zbranch[i];
601  
602  			if (zbr->znode && ubifs_zn_dirty(zbr->znode)) {
603  				znode = zbr->znode;
604  				cont = 1;
605  				break;
606  			}
607  		}
608  		if (!cont) {
609  			if (ubifs_zn_dirty(znode))
610  				return znode;
611  			return NULL;
612  		}
613  	}
614  }
615  
616  /**
617   * find_next_dirty - find next dirty znode.
618   * @znode: znode to begin searching from
619   */
find_next_dirty(struct ubifs_znode * znode)620  static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode)
621  {
622  	int n = znode->iip + 1;
623  
624  	znode = znode->parent;
625  	if (!znode)
626  		return NULL;
627  	for (; n < znode->child_cnt; n++) {
628  		struct ubifs_zbranch *zbr = &znode->zbranch[n];
629  
630  		if (zbr->znode && ubifs_zn_dirty(zbr->znode))
631  			return find_first_dirty(zbr->znode);
632  	}
633  	return znode;
634  }
635  
636  /**
637   * get_znodes_to_commit - create list of dirty znodes to commit.
638   * @c: UBIFS file-system description object
639   *
640   * This function returns the number of znodes to commit.
641   */
get_znodes_to_commit(struct ubifs_info * c)642  static int get_znodes_to_commit(struct ubifs_info *c)
643  {
644  	struct ubifs_znode *znode, *cnext;
645  	int cnt = 0;
646  
647  	c->cnext = find_first_dirty(c->zroot.znode);
648  	znode = c->enext = c->cnext;
649  	if (!znode) {
650  		dbg_cmt("no znodes to commit");
651  		return 0;
652  	}
653  	cnt += 1;
654  	while (1) {
655  		ubifs_assert(c, !ubifs_zn_cow(znode));
656  		__set_bit(COW_ZNODE, &znode->flags);
657  		znode->alt = 0;
658  		cnext = find_next_dirty(znode);
659  		if (!cnext) {
660  			znode->cnext = c->cnext;
661  			break;
662  		}
663  		znode->cparent = znode->parent;
664  		znode->ciip = znode->iip;
665  		znode->cnext = cnext;
666  		znode = cnext;
667  		cnt += 1;
668  	}
669  	dbg_cmt("committing %d znodes", cnt);
670  	ubifs_assert(c, cnt == atomic_long_read(&c->dirty_zn_cnt));
671  	return cnt;
672  }
673  
674  /**
675   * alloc_idx_lebs - allocate empty LEBs to be used to commit.
676   * @c: UBIFS file-system description object
677   * @cnt: number of znodes to commit
678   *
679   * This function returns %-ENOSPC if it cannot allocate a sufficient number of
680   * empty LEBs.  %0 is returned on success, otherwise a negative error code
681   * is returned.
682   */
alloc_idx_lebs(struct ubifs_info * c,int cnt)683  static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
684  {
685  	int i, leb_cnt, lnum;
686  
687  	c->ileb_cnt = 0;
688  	c->ileb_nxt = 0;
689  	leb_cnt = get_leb_cnt(c, cnt);
690  	dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt);
691  	if (!leb_cnt)
692  		return 0;
693  	c->ilebs = kmalloc_array(leb_cnt, sizeof(int), GFP_NOFS);
694  	if (!c->ilebs)
695  		return -ENOMEM;
696  	for (i = 0; i < leb_cnt; i++) {
697  		lnum = ubifs_find_free_leb_for_idx(c);
698  		if (lnum < 0)
699  			return lnum;
700  		c->ilebs[c->ileb_cnt++] = lnum;
701  		dbg_cmt("LEB %d", lnum);
702  	}
703  	if (dbg_is_chk_index(c) && !get_random_u32_below(8))
704  		return -ENOSPC;
705  	return 0;
706  }
707  
708  /**
709   * free_unused_idx_lebs - free unused LEBs that were allocated for the commit.
710   * @c: UBIFS file-system description object
711   *
712   * It is possible that we allocate more empty LEBs for the commit than we need.
713   * This functions frees the surplus.
714   *
715   * This function returns %0 on success and a negative error code on failure.
716   */
free_unused_idx_lebs(struct ubifs_info * c)717  static int free_unused_idx_lebs(struct ubifs_info *c)
718  {
719  	int i, err = 0, lnum, er;
720  
721  	for (i = c->ileb_nxt; i < c->ileb_cnt; i++) {
722  		lnum = c->ilebs[i];
723  		dbg_cmt("LEB %d", lnum);
724  		er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
725  					 LPROPS_INDEX | LPROPS_TAKEN, 0);
726  		if (!err)
727  			err = er;
728  	}
729  	return err;
730  }
731  
732  /**
733   * free_idx_lebs - free unused LEBs after commit end.
734   * @c: UBIFS file-system description object
735   *
736   * This function returns %0 on success and a negative error code on failure.
737   */
free_idx_lebs(struct ubifs_info * c)738  static int free_idx_lebs(struct ubifs_info *c)
739  {
740  	int err;
741  
742  	err = free_unused_idx_lebs(c);
743  	kfree(c->ilebs);
744  	c->ilebs = NULL;
745  	return err;
746  }
747  
748  /**
749   * ubifs_tnc_start_commit - start TNC commit.
750   * @c: UBIFS file-system description object
751   * @zroot: new index root position is returned here
752   *
753   * This function prepares the list of indexing nodes to commit and lays out
754   * their positions on flash. If there is not enough free space it uses the
755   * in-gap commit method. Returns zero in case of success and a negative error
756   * code in case of failure.
757   */
ubifs_tnc_start_commit(struct ubifs_info * c,struct ubifs_zbranch * zroot)758  int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
759  {
760  	int err = 0, cnt;
761  
762  	mutex_lock(&c->tnc_mutex);
763  	err = dbg_check_tnc(c, 1);
764  	if (err)
765  		goto out;
766  	cnt = get_znodes_to_commit(c);
767  	if (cnt != 0) {
768  		int no_space = 0;
769  
770  		err = alloc_idx_lebs(c, cnt);
771  		if (err == -ENOSPC)
772  			no_space = 1;
773  		else if (err)
774  			goto out_free;
775  		err = layout_commit(c, no_space, cnt);
776  		if (err)
777  			goto out_free;
778  		ubifs_assert(c, atomic_long_read(&c->dirty_zn_cnt) == 0);
779  		err = free_unused_idx_lebs(c);
780  		if (err)
781  			goto out;
782  	}
783  	destroy_old_idx(c);
784  	memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch));
785  
786  	err = ubifs_save_dirty_idx_lnums(c);
787  	if (err)
788  		goto out;
789  
790  	spin_lock(&c->space_lock);
791  	/*
792  	 * Although we have not finished committing yet, update size of the
793  	 * committed index ('c->bi.old_idx_sz') and zero out the index growth
794  	 * budget. It is OK to do this now, because we've reserved all the
795  	 * space which is needed to commit the index, and it is save for the
796  	 * budgeting subsystem to assume the index is already committed,
797  	 * even though it is not.
798  	 */
799  	ubifs_assert(c, c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
800  	c->bi.old_idx_sz = c->calc_idx_sz;
801  	c->bi.uncommitted_idx = 0;
802  	c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
803  	spin_unlock(&c->space_lock);
804  	mutex_unlock(&c->tnc_mutex);
805  
806  	dbg_cmt("number of index LEBs %d", c->lst.idx_lebs);
807  	dbg_cmt("size of index %llu", c->calc_idx_sz);
808  	return err;
809  
810  out_free:
811  	free_idx_lebs(c);
812  out:
813  	mutex_unlock(&c->tnc_mutex);
814  	return err;
815  }
816  
817  /**
818   * write_index - write index nodes.
819   * @c: UBIFS file-system description object
820   *
821   * This function writes the index nodes whose positions were laid out in the
822   * layout_in_empty_space function.
823   */
write_index(struct ubifs_info * c)824  static int write_index(struct ubifs_info *c)
825  {
826  	struct ubifs_idx_node *idx;
827  	struct ubifs_znode *znode, *cnext;
828  	int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
829  	int avail, wlen, err, lnum_pos = 0, blen, nxt_offs;
830  
831  	cnext = c->enext;
832  	if (!cnext)
833  		return 0;
834  
835  	/*
836  	 * Always write index nodes to the index head so that index nodes and
837  	 * other types of nodes are never mixed in the same erase block.
838  	 */
839  	lnum = c->ihead_lnum;
840  	buf_offs = c->ihead_offs;
841  
842  	/* Allocate commit buffer */
843  	buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size);
844  	used = 0;
845  	avail = buf_len;
846  
847  	/* Ensure there is enough room for first write */
848  	next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
849  	if (buf_offs + next_len > c->leb_size) {
850  		err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0,
851  					  LPROPS_TAKEN);
852  		if (err)
853  			return err;
854  		lnum = -1;
855  	}
856  
857  	while (1) {
858  		u8 hash[UBIFS_HASH_ARR_SZ];
859  
860  		cond_resched();
861  
862  		znode = cnext;
863  		idx = c->cbuf + used;
864  
865  		/* Make index node */
866  		idx->ch.node_type = UBIFS_IDX_NODE;
867  		idx->child_cnt = cpu_to_le16(znode->child_cnt);
868  		idx->level = cpu_to_le16(znode->level);
869  		for (i = 0; i < znode->child_cnt; i++) {
870  			struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
871  			struct ubifs_zbranch *zbr = &znode->zbranch[i];
872  
873  			key_write_idx(c, &zbr->key, &br->key);
874  			br->lnum = cpu_to_le32(zbr->lnum);
875  			br->offs = cpu_to_le32(zbr->offs);
876  			br->len = cpu_to_le32(zbr->len);
877  			ubifs_copy_hash(c, zbr->hash, ubifs_branch_hash(c, br));
878  			if (!zbr->lnum || !zbr->len) {
879  				ubifs_err(c, "bad ref in znode");
880  				ubifs_dump_znode(c, znode);
881  				if (zbr->znode)
882  					ubifs_dump_znode(c, zbr->znode);
883  
884  				return -EINVAL;
885  			}
886  		}
887  		len = ubifs_idx_node_sz(c, znode->child_cnt);
888  		ubifs_prepare_node(c, idx, len, 0);
889  		ubifs_node_calc_hash(c, idx, hash);
890  
891  		mutex_lock(&c->tnc_mutex);
892  
893  		if (znode->cparent)
894  			ubifs_copy_hash(c, hash,
895  					znode->cparent->zbranch[znode->ciip].hash);
896  
897  		if (znode->parent) {
898  			if (!ubifs_zn_obsolete(znode))
899  				ubifs_copy_hash(c, hash,
900  					znode->parent->zbranch[znode->iip].hash);
901  		} else {
902  			ubifs_copy_hash(c, hash, c->zroot.hash);
903  		}
904  
905  		mutex_unlock(&c->tnc_mutex);
906  
907  		/* Determine the index node position */
908  		if (lnum == -1) {
909  			lnum = c->ilebs[lnum_pos++];
910  			buf_offs = 0;
911  			used = 0;
912  			avail = buf_len;
913  		}
914  		offs = buf_offs + used;
915  
916  		if (lnum != znode->lnum || offs != znode->offs ||
917  		    len != znode->len) {
918  			ubifs_err(c, "inconsistent znode posn");
919  			return -EINVAL;
920  		}
921  
922  		/* Grab some stuff from znode while we still can */
923  		cnext = znode->cnext;
924  
925  		ubifs_assert(c, ubifs_zn_dirty(znode));
926  		ubifs_assert(c, ubifs_zn_cow(znode));
927  
928  		/*
929  		 * It is important that other threads should see %DIRTY_ZNODE
930  		 * flag cleared before %COW_ZNODE. Specifically, it matters in
931  		 * the 'dirty_cow_znode()' function. This is the reason for the
932  		 * first barrier. Also, we want the bit changes to be seen to
933  		 * other threads ASAP, to avoid unnecessary copying, which is
934  		 * the reason for the second barrier.
935  		 */
936  		clear_bit(DIRTY_ZNODE, &znode->flags);
937  		smp_mb__before_atomic();
938  		clear_bit(COW_ZNODE, &znode->flags);
939  		smp_mb__after_atomic();
940  
941  		/*
942  		 * We have marked the znode as clean but have not updated the
943  		 * @c->clean_zn_cnt counter. If this znode becomes dirty again
944  		 * before 'free_obsolete_znodes()' is called, then
945  		 * @c->clean_zn_cnt will be decremented before it gets
946  		 * incremented (resulting in 2 decrements for the same znode).
947  		 * This means that @c->clean_zn_cnt may become negative for a
948  		 * while.
949  		 *
950  		 * Q: why we cannot increment @c->clean_zn_cnt?
951  		 * A: because we do not have the @c->tnc_mutex locked, and the
952  		 *    following code would be racy and buggy:
953  		 *
954  		 *    if (!ubifs_zn_obsolete(znode)) {
955  		 *            atomic_long_inc(&c->clean_zn_cnt);
956  		 *            atomic_long_inc(&ubifs_clean_zn_cnt);
957  		 *    }
958  		 *
959  		 *    Thus, we just delay the @c->clean_zn_cnt update until we
960  		 *    have the mutex locked.
961  		 */
962  
963  		/* Do not access znode from this point on */
964  
965  		/* Update buffer positions */
966  		wlen = used + len;
967  		used += ALIGN(len, 8);
968  		avail -= ALIGN(len, 8);
969  
970  		/*
971  		 * Calculate the next index node length to see if there is
972  		 * enough room for it
973  		 */
974  		if (cnext == c->cnext)
975  			next_len = 0;
976  		else
977  			next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
978  
979  		nxt_offs = buf_offs + used + next_len;
980  		if (next_len && nxt_offs <= c->leb_size) {
981  			if (avail > 0)
982  				continue;
983  			else
984  				blen = buf_len;
985  		} else {
986  			wlen = ALIGN(wlen, 8);
987  			blen = ALIGN(wlen, c->min_io_size);
988  			ubifs_pad(c, c->cbuf + wlen, blen - wlen);
989  		}
990  
991  		/* The buffer is full or there are no more znodes to do */
992  		err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen);
993  		if (err)
994  			return err;
995  		buf_offs += blen;
996  		if (next_len) {
997  			if (nxt_offs > c->leb_size) {
998  				err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0,
999  							  0, LPROPS_TAKEN);
1000  				if (err)
1001  					return err;
1002  				lnum = -1;
1003  			}
1004  			used -= blen;
1005  			if (used < 0)
1006  				used = 0;
1007  			avail = buf_len - used;
1008  			memmove(c->cbuf, c->cbuf + blen, used);
1009  			continue;
1010  		}
1011  		break;
1012  	}
1013  
1014  	if (lnum != c->dbg->new_ihead_lnum ||
1015  	    buf_offs != c->dbg->new_ihead_offs) {
1016  		ubifs_err(c, "inconsistent ihead");
1017  		return -EINVAL;
1018  	}
1019  
1020  	c->ihead_lnum = lnum;
1021  	c->ihead_offs = buf_offs;
1022  
1023  	return 0;
1024  }
1025  
1026  /**
1027   * free_obsolete_znodes - free obsolete znodes.
1028   * @c: UBIFS file-system description object
1029   *
1030   * At the end of commit end, obsolete znodes are freed.
1031   */
free_obsolete_znodes(struct ubifs_info * c)1032  static void free_obsolete_znodes(struct ubifs_info *c)
1033  {
1034  	struct ubifs_znode *znode, *cnext;
1035  
1036  	cnext = c->cnext;
1037  	do {
1038  		znode = cnext;
1039  		cnext = znode->cnext;
1040  		if (ubifs_zn_obsolete(znode))
1041  			kfree(znode);
1042  		else {
1043  			znode->cnext = NULL;
1044  			atomic_long_inc(&c->clean_zn_cnt);
1045  			atomic_long_inc(&ubifs_clean_zn_cnt);
1046  		}
1047  	} while (cnext != c->cnext);
1048  }
1049  
1050  /**
1051   * return_gap_lebs - return LEBs used by the in-gap commit method.
1052   * @c: UBIFS file-system description object
1053   *
1054   * This function clears the "taken" flag for the LEBs which were used by the
1055   * "commit in-the-gaps" method.
1056   */
return_gap_lebs(struct ubifs_info * c)1057  static int return_gap_lebs(struct ubifs_info *c)
1058  {
1059  	int *p, err;
1060  
1061  	if (!c->gap_lebs)
1062  		return 0;
1063  
1064  	dbg_cmt("");
1065  	for (p = c->gap_lebs; *p != -1; p++) {
1066  		err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0,
1067  					  LPROPS_TAKEN, 0);
1068  		if (err)
1069  			return err;
1070  	}
1071  
1072  	kfree(c->gap_lebs);
1073  	c->gap_lebs = NULL;
1074  	return 0;
1075  }
1076  
1077  /**
1078   * ubifs_tnc_end_commit - update the TNC for commit end.
1079   * @c: UBIFS file-system description object
1080   *
1081   * Write the dirty znodes.
1082   */
ubifs_tnc_end_commit(struct ubifs_info * c)1083  int ubifs_tnc_end_commit(struct ubifs_info *c)
1084  {
1085  	int err;
1086  
1087  	if (!c->cnext)
1088  		return 0;
1089  
1090  	err = return_gap_lebs(c);
1091  	if (err)
1092  		return err;
1093  
1094  	err = write_index(c);
1095  	if (err)
1096  		return err;
1097  
1098  	mutex_lock(&c->tnc_mutex);
1099  
1100  	dbg_cmt("TNC height is %d", c->zroot.znode->level + 1);
1101  
1102  	free_obsolete_znodes(c);
1103  
1104  	c->cnext = NULL;
1105  	kfree(c->ilebs);
1106  	c->ilebs = NULL;
1107  
1108  	mutex_unlock(&c->tnc_mutex);
1109  
1110  	return 0;
1111  }
1112