1  /*
2   *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3   */
4  
5  /*
6   *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7   *  Programm System Institute
8   *  Pereslavl-Zalessky Russia
9   */
10  
11  #include <linux/time.h>
12  #include <linux/string.h>
13  #include <linux/pagemap.h>
14  #include <linux/bio.h>
15  #include "reiserfs.h"
16  #include <linux/buffer_head.h>
17  #include <linux/quotaops.h>
18  
19  /* Does the buffer contain a disk block which is in the tree. */
B_IS_IN_TREE(const struct buffer_head * bh)20  inline int B_IS_IN_TREE(const struct buffer_head *bh)
21  {
22  
23  	RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
24  	       "PAP-1010: block (%b) has too big level (%z)", bh, bh);
25  
26  	return (B_LEVEL(bh) != FREE_LEVEL);
27  }
28  
29  /* to get item head in le form */
copy_item_head(struct item_head * to,const struct item_head * from)30  inline void copy_item_head(struct item_head *to,
31  			   const struct item_head *from)
32  {
33  	memcpy(to, from, IH_SIZE);
34  }
35  
36  /*
37   * k1 is pointer to on-disk structure which is stored in little-endian
38   * form. k2 is pointer to cpu variable. For key of items of the same
39   * object this returns 0.
40   * Returns: -1 if key1 < key2
41   * 0 if key1 == key2
42   * 1 if key1 > key2
43   */
comp_short_keys(const struct reiserfs_key * le_key,const struct cpu_key * cpu_key)44  inline int comp_short_keys(const struct reiserfs_key *le_key,
45  			   const struct cpu_key *cpu_key)
46  {
47  	__u32 n;
48  	n = le32_to_cpu(le_key->k_dir_id);
49  	if (n < cpu_key->on_disk_key.k_dir_id)
50  		return -1;
51  	if (n > cpu_key->on_disk_key.k_dir_id)
52  		return 1;
53  	n = le32_to_cpu(le_key->k_objectid);
54  	if (n < cpu_key->on_disk_key.k_objectid)
55  		return -1;
56  	if (n > cpu_key->on_disk_key.k_objectid)
57  		return 1;
58  	return 0;
59  }
60  
61  /*
62   * k1 is pointer to on-disk structure which is stored in little-endian
63   * form. k2 is pointer to cpu variable.
64   * Compare keys using all 4 key fields.
65   * Returns: -1 if key1 < key2 0
66   * if key1 = key2 1 if key1 > key2
67   */
comp_keys(const struct reiserfs_key * le_key,const struct cpu_key * cpu_key)68  static inline int comp_keys(const struct reiserfs_key *le_key,
69  			    const struct cpu_key *cpu_key)
70  {
71  	int retval;
72  
73  	retval = comp_short_keys(le_key, cpu_key);
74  	if (retval)
75  		return retval;
76  	if (le_key_k_offset(le_key_version(le_key), le_key) <
77  	    cpu_key_k_offset(cpu_key))
78  		return -1;
79  	if (le_key_k_offset(le_key_version(le_key), le_key) >
80  	    cpu_key_k_offset(cpu_key))
81  		return 1;
82  
83  	if (cpu_key->key_length == 3)
84  		return 0;
85  
86  	/* this part is needed only when tail conversion is in progress */
87  	if (le_key_k_type(le_key_version(le_key), le_key) <
88  	    cpu_key_k_type(cpu_key))
89  		return -1;
90  
91  	if (le_key_k_type(le_key_version(le_key), le_key) >
92  	    cpu_key_k_type(cpu_key))
93  		return 1;
94  
95  	return 0;
96  }
97  
comp_short_le_keys(const struct reiserfs_key * key1,const struct reiserfs_key * key2)98  inline int comp_short_le_keys(const struct reiserfs_key *key1,
99  			      const struct reiserfs_key *key2)
100  {
101  	__u32 *k1_u32, *k2_u32;
102  	int key_length = REISERFS_SHORT_KEY_LEN;
103  
104  	k1_u32 = (__u32 *) key1;
105  	k2_u32 = (__u32 *) key2;
106  	for (; key_length--; ++k1_u32, ++k2_u32) {
107  		if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
108  			return -1;
109  		if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
110  			return 1;
111  	}
112  	return 0;
113  }
114  
le_key2cpu_key(struct cpu_key * to,const struct reiserfs_key * from)115  inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
116  {
117  	int version;
118  	to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
119  	to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
120  
121  	/* find out version of the key */
122  	version = le_key_version(from);
123  	to->version = version;
124  	to->on_disk_key.k_offset = le_key_k_offset(version, from);
125  	to->on_disk_key.k_type = le_key_k_type(version, from);
126  }
127  
128  /*
129   * this does not say which one is bigger, it only returns 1 if keys
130   * are not equal, 0 otherwise
131   */
comp_le_keys(const struct reiserfs_key * k1,const struct reiserfs_key * k2)132  inline int comp_le_keys(const struct reiserfs_key *k1,
133  			const struct reiserfs_key *k2)
134  {
135  	return memcmp(k1, k2, sizeof(struct reiserfs_key));
136  }
137  
138  /**************************************************************************
139   *  Binary search toolkit function                                        *
140   *  Search for an item in the array by the item key                       *
141   *  Returns:    1 if found,  0 if not found;                              *
142   *        *pos = number of the searched element if found, else the        *
143   *        number of the first element that is larger than key.            *
144   **************************************************************************/
145  /*
146   * For those not familiar with binary search: lbound is the leftmost item
147   * that it could be, rbound the rightmost item that it could be.  We examine
148   * the item halfway between lbound and rbound, and that tells us either
149   * that we can increase lbound, or decrease rbound, or that we have found it,
150   * or if lbound <= rbound that there are no possible items, and we have not
151   * found it. With each examination we cut the number of possible items it
152   * could be by one more than half rounded down, or we find it.
153   */
bin_search(const void * key,const void * base,int num,int width,int * pos)154  static inline int bin_search(const void *key,	/* Key to search for. */
155  			     const void *base,	/* First item in the array. */
156  			     int num,	/* Number of items in the array. */
157  			     /*
158  			      * Item size in the array.  searched. Lest the
159  			      * reader be confused, note that this is crafted
160  			      * as a general function, and when it is applied
161  			      * specifically to the array of item headers in a
162  			      * node, width is actually the item header size
163  			      * not the item size.
164  			      */
165  			     int width,
166  			     int *pos /* Number of the searched for element. */
167      )
168  {
169  	int rbound, lbound, j;
170  
171  	for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
172  	     lbound <= rbound; j = (rbound + lbound) / 2)
173  		switch (comp_keys
174  			((struct reiserfs_key *)((char *)base + j * width),
175  			 (struct cpu_key *)key)) {
176  		case -1:
177  			lbound = j + 1;
178  			continue;
179  		case 1:
180  			rbound = j - 1;
181  			continue;
182  		case 0:
183  			*pos = j;
184  			return ITEM_FOUND;	/* Key found in the array.  */
185  		}
186  
187  	/*
188  	 * bin_search did not find given key, it returns position of key,
189  	 * that is minimal and greater than the given one.
190  	 */
191  	*pos = lbound;
192  	return ITEM_NOT_FOUND;
193  }
194  
195  
196  /* Minimal possible key. It is never in the tree. */
197  const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
198  
199  /* Maximal possible key. It is never in the tree. */
200  static const struct reiserfs_key MAX_KEY = {
201  	cpu_to_le32(0xffffffff),
202  	cpu_to_le32(0xffffffff),
203  	{{cpu_to_le32(0xffffffff),
204  	  cpu_to_le32(0xffffffff)},}
205  };
206  
207  /*
208   * Get delimiting key of the buffer by looking for it in the buffers in the
209   * path, starting from the bottom of the path, and going upwards.  We must
210   * check the path's validity at each step.  If the key is not in the path,
211   * there is no delimiting key in the tree (buffer is first or last buffer
212   * in tree), and in this case we return a special key, either MIN_KEY or
213   * MAX_KEY.
214   */
get_lkey(const struct treepath * chk_path,const struct super_block * sb)215  static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
216  						  const struct super_block *sb)
217  {
218  	int position, path_offset = chk_path->path_length;
219  	struct buffer_head *parent;
220  
221  	RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
222  	       "PAP-5010: invalid offset in the path");
223  
224  	/* While not higher in path than first element. */
225  	while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
226  
227  		RFALSE(!buffer_uptodate
228  		       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
229  		       "PAP-5020: parent is not uptodate");
230  
231  		/* Parent at the path is not in the tree now. */
232  		if (!B_IS_IN_TREE
233  		    (parent =
234  		     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
235  			return &MAX_KEY;
236  		/* Check whether position in the parent is correct. */
237  		if ((position =
238  		     PATH_OFFSET_POSITION(chk_path,
239  					  path_offset)) >
240  		    B_NR_ITEMS(parent))
241  			return &MAX_KEY;
242  		/* Check whether parent at the path really points to the child. */
243  		if (B_N_CHILD_NUM(parent, position) !=
244  		    PATH_OFFSET_PBUFFER(chk_path,
245  					path_offset + 1)->b_blocknr)
246  			return &MAX_KEY;
247  		/*
248  		 * Return delimiting key if position in the parent
249  		 * is not equal to zero.
250  		 */
251  		if (position)
252  			return internal_key(parent, position - 1);
253  	}
254  	/* Return MIN_KEY if we are in the root of the buffer tree. */
255  	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
256  	    b_blocknr == SB_ROOT_BLOCK(sb))
257  		return &MIN_KEY;
258  	return &MAX_KEY;
259  }
260  
261  /* Get delimiting key of the buffer at the path and its right neighbor. */
get_rkey(const struct treepath * chk_path,const struct super_block * sb)262  inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
263  					   const struct super_block *sb)
264  {
265  	int position, path_offset = chk_path->path_length;
266  	struct buffer_head *parent;
267  
268  	RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
269  	       "PAP-5030: invalid offset in the path");
270  
271  	while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
272  
273  		RFALSE(!buffer_uptodate
274  		       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
275  		       "PAP-5040: parent is not uptodate");
276  
277  		/* Parent at the path is not in the tree now. */
278  		if (!B_IS_IN_TREE
279  		    (parent =
280  		     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
281  			return &MIN_KEY;
282  		/* Check whether position in the parent is correct. */
283  		if ((position =
284  		     PATH_OFFSET_POSITION(chk_path,
285  					  path_offset)) >
286  		    B_NR_ITEMS(parent))
287  			return &MIN_KEY;
288  		/*
289  		 * Check whether parent at the path really points
290  		 * to the child.
291  		 */
292  		if (B_N_CHILD_NUM(parent, position) !=
293  		    PATH_OFFSET_PBUFFER(chk_path,
294  					path_offset + 1)->b_blocknr)
295  			return &MIN_KEY;
296  
297  		/*
298  		 * Return delimiting key if position in the parent
299  		 * is not the last one.
300  		 */
301  		if (position != B_NR_ITEMS(parent))
302  			return internal_key(parent, position);
303  	}
304  
305  	/* Return MAX_KEY if we are in the root of the buffer tree. */
306  	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
307  	    b_blocknr == SB_ROOT_BLOCK(sb))
308  		return &MAX_KEY;
309  	return &MIN_KEY;
310  }
311  
312  /*
313   * Check whether a key is contained in the tree rooted from a buffer at a path.
314   * This works by looking at the left and right delimiting keys for the buffer
315   * in the last path_element in the path.  These delimiting keys are stored
316   * at least one level above that buffer in the tree. If the buffer is the
317   * first or last node in the tree order then one of the delimiting keys may
318   * be absent, and in this case get_lkey and get_rkey return a special key
319   * which is MIN_KEY or MAX_KEY.
320   */
key_in_buffer(struct treepath * chk_path,const struct cpu_key * key,struct super_block * sb)321  static inline int key_in_buffer(
322  				/* Path which should be checked. */
323  				struct treepath *chk_path,
324  				/* Key which should be checked. */
325  				const struct cpu_key *key,
326  				struct super_block *sb
327      )
328  {
329  
330  	RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
331  	       || chk_path->path_length > MAX_HEIGHT,
332  	       "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
333  	       key, chk_path->path_length);
334  	RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
335  	       "PAP-5060: device must not be NODEV");
336  
337  	if (comp_keys(get_lkey(chk_path, sb), key) == 1)
338  		/* left delimiting key is bigger, that the key we look for */
339  		return 0;
340  	/*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
341  	if (comp_keys(get_rkey(chk_path, sb), key) != 1)
342  		/* key must be less than right delimitiing key */
343  		return 0;
344  	return 1;
345  }
346  
reiserfs_check_path(struct treepath * p)347  int reiserfs_check_path(struct treepath *p)
348  {
349  	RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
350  	       "path not properly relsed");
351  	return 0;
352  }
353  
354  /*
355   * Drop the reference to each buffer in a path and restore
356   * dirty bits clean when preparing the buffer for the log.
357   * This version should only be called from fix_nodes()
358   */
pathrelse_and_restore(struct super_block * sb,struct treepath * search_path)359  void pathrelse_and_restore(struct super_block *sb,
360  			   struct treepath *search_path)
361  {
362  	int path_offset = search_path->path_length;
363  
364  	RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
365  	       "clm-4000: invalid path offset");
366  
367  	while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
368  		struct buffer_head *bh;
369  		bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
370  		reiserfs_restore_prepared_buffer(sb, bh);
371  		brelse(bh);
372  	}
373  	search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
374  }
375  
376  /* Drop the reference to each buffer in a path */
pathrelse(struct treepath * search_path)377  void pathrelse(struct treepath *search_path)
378  {
379  	int path_offset = search_path->path_length;
380  
381  	RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
382  	       "PAP-5090: invalid path offset");
383  
384  	while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
385  		brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
386  
387  	search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
388  }
389  
has_valid_deh_location(struct buffer_head * bh,struct item_head * ih)390  static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
391  {
392  	struct reiserfs_de_head *deh;
393  	int i;
394  
395  	deh = B_I_DEH(bh, ih);
396  	for (i = 0; i < ih_entry_count(ih); i++) {
397  		if (deh_location(&deh[i]) > ih_item_len(ih)) {
398  			reiserfs_warning(NULL, "reiserfs-5094",
399  					 "directory entry location seems wrong %h",
400  					 &deh[i]);
401  			return 0;
402  		}
403  	}
404  
405  	return 1;
406  }
407  
is_leaf(char * buf,int blocksize,struct buffer_head * bh)408  static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
409  {
410  	struct block_head *blkh;
411  	struct item_head *ih;
412  	int used_space;
413  	int prev_location;
414  	int i;
415  	int nr;
416  
417  	blkh = (struct block_head *)buf;
418  	if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
419  		reiserfs_warning(NULL, "reiserfs-5080",
420  				 "this should be caught earlier");
421  		return 0;
422  	}
423  
424  	nr = blkh_nr_item(blkh);
425  	if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
426  		/* item number is too big or too small */
427  		reiserfs_warning(NULL, "reiserfs-5081",
428  				 "nr_item seems wrong: %z", bh);
429  		return 0;
430  	}
431  	ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
432  	used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
433  
434  	/* free space does not match to calculated amount of use space */
435  	if (used_space != blocksize - blkh_free_space(blkh)) {
436  		reiserfs_warning(NULL, "reiserfs-5082",
437  				 "free space seems wrong: %z", bh);
438  		return 0;
439  	}
440  	/*
441  	 * FIXME: it is_leaf will hit performance too much - we may have
442  	 * return 1 here
443  	 */
444  
445  	/* check tables of item heads */
446  	ih = (struct item_head *)(buf + BLKH_SIZE);
447  	prev_location = blocksize;
448  	for (i = 0; i < nr; i++, ih++) {
449  		if (le_ih_k_type(ih) == TYPE_ANY) {
450  			reiserfs_warning(NULL, "reiserfs-5083",
451  					 "wrong item type for item %h",
452  					 ih);
453  			return 0;
454  		}
455  		if (ih_location(ih) >= blocksize
456  		    || ih_location(ih) < IH_SIZE * nr) {
457  			reiserfs_warning(NULL, "reiserfs-5084",
458  					 "item location seems wrong: %h",
459  					 ih);
460  			return 0;
461  		}
462  		if (ih_item_len(ih) < 1
463  		    || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
464  			reiserfs_warning(NULL, "reiserfs-5085",
465  					 "item length seems wrong: %h",
466  					 ih);
467  			return 0;
468  		}
469  		if (prev_location - ih_location(ih) != ih_item_len(ih)) {
470  			reiserfs_warning(NULL, "reiserfs-5086",
471  					 "item location seems wrong "
472  					 "(second one): %h", ih);
473  			return 0;
474  		}
475  		if (is_direntry_le_ih(ih)) {
476  			if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
477  				reiserfs_warning(NULL, "reiserfs-5093",
478  						 "item entry count seems wrong %h",
479  						 ih);
480  				return 0;
481  			}
482  			return has_valid_deh_location(bh, ih);
483  		}
484  		prev_location = ih_location(ih);
485  	}
486  
487  	/* one may imagine many more checks */
488  	return 1;
489  }
490  
491  /* returns 1 if buf looks like an internal node, 0 otherwise */
is_internal(char * buf,int blocksize,struct buffer_head * bh)492  static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
493  {
494  	struct block_head *blkh;
495  	int nr;
496  	int used_space;
497  
498  	blkh = (struct block_head *)buf;
499  	nr = blkh_level(blkh);
500  	if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
501  		/* this level is not possible for internal nodes */
502  		reiserfs_warning(NULL, "reiserfs-5087",
503  				 "this should be caught earlier");
504  		return 0;
505  	}
506  
507  	nr = blkh_nr_item(blkh);
508  	/* for internal which is not root we might check min number of keys */
509  	if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
510  		reiserfs_warning(NULL, "reiserfs-5088",
511  				 "number of key seems wrong: %z", bh);
512  		return 0;
513  	}
514  
515  	used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
516  	if (used_space != blocksize - blkh_free_space(blkh)) {
517  		reiserfs_warning(NULL, "reiserfs-5089",
518  				 "free space seems wrong: %z", bh);
519  		return 0;
520  	}
521  
522  	/* one may imagine many more checks */
523  	return 1;
524  }
525  
526  /*
527   * make sure that bh contains formatted node of reiserfs tree of
528   * 'level'-th level
529   */
is_tree_node(struct buffer_head * bh,int level)530  static int is_tree_node(struct buffer_head *bh, int level)
531  {
532  	if (B_LEVEL(bh) != level) {
533  		reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
534  				 "not match to the expected one %d",
535  				 B_LEVEL(bh), level);
536  		return 0;
537  	}
538  	if (level == DISK_LEAF_NODE_LEVEL)
539  		return is_leaf(bh->b_data, bh->b_size, bh);
540  
541  	return is_internal(bh->b_data, bh->b_size, bh);
542  }
543  
544  #define SEARCH_BY_KEY_READA 16
545  
546  /*
547   * The function is NOT SCHEDULE-SAFE!
548   * It might unlock the write lock if we needed to wait for a block
549   * to be read. Note that in this case it won't recover the lock to avoid
550   * high contention resulting from too much lock requests, especially
551   * the caller (search_by_key) will perform other schedule-unsafe
552   * operations just after calling this function.
553   *
554   * @return depth of lock to be restored after read completes
555   */
search_by_key_reada(struct super_block * s,struct buffer_head ** bh,b_blocknr_t * b,int num)556  static int search_by_key_reada(struct super_block *s,
557  				struct buffer_head **bh,
558  				b_blocknr_t *b, int num)
559  {
560  	int i, j;
561  	int depth = -1;
562  
563  	for (i = 0; i < num; i++) {
564  		bh[i] = sb_getblk(s, b[i]);
565  	}
566  	/*
567  	 * We are going to read some blocks on which we
568  	 * have a reference. It's safe, though we might be
569  	 * reading blocks concurrently changed if we release
570  	 * the lock. But it's still fine because we check later
571  	 * if the tree changed
572  	 */
573  	for (j = 0; j < i; j++) {
574  		/*
575  		 * note, this needs attention if we are getting rid of the BKL
576  		 * you have to make sure the prepared bit isn't set on this
577  		 * buffer
578  		 */
579  		if (!buffer_uptodate(bh[j])) {
580  			if (depth == -1)
581  				depth = reiserfs_write_unlock_nested(s);
582  			bh_readahead(bh[j], REQ_RAHEAD);
583  		}
584  		brelse(bh[j]);
585  	}
586  	return depth;
587  }
588  
589  /*
590   * This function fills up the path from the root to the leaf as it
591   * descends the tree looking for the key.  It uses reiserfs_bread to
592   * try to find buffers in the cache given their block number.  If it
593   * does not find them in the cache it reads them from disk.  For each
594   * node search_by_key finds using reiserfs_bread it then uses
595   * bin_search to look through that node.  bin_search will find the
596   * position of the block_number of the next node if it is looking
597   * through an internal node.  If it is looking through a leaf node
598   * bin_search will find the position of the item which has key either
599   * equal to given key, or which is the maximal key less than the given
600   * key.  search_by_key returns a path that must be checked for the
601   * correctness of the top of the path but need not be checked for the
602   * correctness of the bottom of the path
603   */
604  /*
605   * search_by_key - search for key (and item) in stree
606   * @sb: superblock
607   * @key: pointer to key to search for
608   * @search_path: Allocated and initialized struct treepath; Returned filled
609   *		 on success.
610   * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
611   *		stop at leaf level.
612   *
613   * The function is NOT SCHEDULE-SAFE!
614   */
search_by_key(struct super_block * sb,const struct cpu_key * key,struct treepath * search_path,int stop_level)615  int search_by_key(struct super_block *sb, const struct cpu_key *key,
616  		  struct treepath *search_path, int stop_level)
617  {
618  	b_blocknr_t block_number;
619  	int expected_level;
620  	struct buffer_head *bh;
621  	struct path_element *last_element;
622  	int node_level, retval;
623  	int fs_gen;
624  	struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
625  	b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
626  	int reada_count = 0;
627  
628  #ifdef CONFIG_REISERFS_CHECK
629  	int repeat_counter = 0;
630  #endif
631  
632  	PROC_INFO_INC(sb, search_by_key);
633  
634  	/*
635  	 * As we add each node to a path we increase its count.  This means
636  	 * that we must be careful to release all nodes in a path before we
637  	 * either discard the path struct or re-use the path struct, as we
638  	 * do here.
639  	 */
640  
641  	pathrelse(search_path);
642  
643  	/*
644  	 * With each iteration of this loop we search through the items in the
645  	 * current node, and calculate the next current node(next path element)
646  	 * for the next iteration of this loop..
647  	 */
648  	block_number = SB_ROOT_BLOCK(sb);
649  	expected_level = -1;
650  	while (1) {
651  
652  #ifdef CONFIG_REISERFS_CHECK
653  		if (!(++repeat_counter % 50000))
654  			reiserfs_warning(sb, "PAP-5100",
655  					 "%s: there were %d iterations of "
656  					 "while loop looking for key %K",
657  					 current->comm, repeat_counter,
658  					 key);
659  #endif
660  
661  		/* prep path to have another element added to it. */
662  		last_element =
663  		    PATH_OFFSET_PELEMENT(search_path,
664  					 ++search_path->path_length);
665  		fs_gen = get_generation(sb);
666  
667  		/*
668  		 * Read the next tree node, and set the last element
669  		 * in the path to have a pointer to it.
670  		 */
671  		if ((bh = last_element->pe_buffer =
672  		     sb_getblk(sb, block_number))) {
673  
674  			/*
675  			 * We'll need to drop the lock if we encounter any
676  			 * buffers that need to be read. If all of them are
677  			 * already up to date, we don't need to drop the lock.
678  			 */
679  			int depth = -1;
680  
681  			if (!buffer_uptodate(bh) && reada_count > 1)
682  				depth = search_by_key_reada(sb, reada_bh,
683  						    reada_blocks, reada_count);
684  
685  			if (!buffer_uptodate(bh) && depth == -1)
686  				depth = reiserfs_write_unlock_nested(sb);
687  
688  			bh_read_nowait(bh, 0);
689  			wait_on_buffer(bh);
690  
691  			if (depth != -1)
692  				reiserfs_write_lock_nested(sb, depth);
693  			if (!buffer_uptodate(bh))
694  				goto io_error;
695  		} else {
696  io_error:
697  			search_path->path_length--;
698  			pathrelse(search_path);
699  			return IO_ERROR;
700  		}
701  		reada_count = 0;
702  		if (expected_level == -1)
703  			expected_level = SB_TREE_HEIGHT(sb);
704  		expected_level--;
705  
706  		/*
707  		 * It is possible that schedule occurred. We must check
708  		 * whether the key to search is still in the tree rooted
709  		 * from the current buffer. If not then repeat search
710  		 * from the root.
711  		 */
712  		if (fs_changed(fs_gen, sb) &&
713  		    (!B_IS_IN_TREE(bh) ||
714  		     B_LEVEL(bh) != expected_level ||
715  		     !key_in_buffer(search_path, key, sb))) {
716  			PROC_INFO_INC(sb, search_by_key_fs_changed);
717  			PROC_INFO_INC(sb, search_by_key_restarted);
718  			PROC_INFO_INC(sb,
719  				      sbk_restarted[expected_level - 1]);
720  			pathrelse(search_path);
721  
722  			/*
723  			 * Get the root block number so that we can
724  			 * repeat the search starting from the root.
725  			 */
726  			block_number = SB_ROOT_BLOCK(sb);
727  			expected_level = -1;
728  
729  			/* repeat search from the root */
730  			continue;
731  		}
732  
733  		/*
734  		 * only check that the key is in the buffer if key is not
735  		 * equal to the MAX_KEY. Latter case is only possible in
736  		 * "finish_unfinished()" processing during mount.
737  		 */
738  		RFALSE(comp_keys(&MAX_KEY, key) &&
739  		       !key_in_buffer(search_path, key, sb),
740  		       "PAP-5130: key is not in the buffer");
741  #ifdef CONFIG_REISERFS_CHECK
742  		if (REISERFS_SB(sb)->cur_tb) {
743  			print_cur_tb("5140");
744  			reiserfs_panic(sb, "PAP-5140",
745  				       "schedule occurred in do_balance!");
746  		}
747  #endif
748  
749  		/*
750  		 * make sure, that the node contents look like a node of
751  		 * certain level
752  		 */
753  		if (!is_tree_node(bh, expected_level)) {
754  			reiserfs_error(sb, "vs-5150",
755  				       "invalid format found in block %ld. "
756  				       "Fsck?", bh->b_blocknr);
757  			pathrelse(search_path);
758  			return IO_ERROR;
759  		}
760  
761  		/* ok, we have acquired next formatted node in the tree */
762  		node_level = B_LEVEL(bh);
763  
764  		PROC_INFO_BH_STAT(sb, bh, node_level - 1);
765  
766  		RFALSE(node_level < stop_level,
767  		       "vs-5152: tree level (%d) is less than stop level (%d)",
768  		       node_level, stop_level);
769  
770  		retval = bin_search(key, item_head(bh, 0),
771  				      B_NR_ITEMS(bh),
772  				      (node_level ==
773  				       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
774  				      KEY_SIZE,
775  				      &last_element->pe_position);
776  		if (node_level == stop_level) {
777  			return retval;
778  		}
779  
780  		/* we are not in the stop level */
781  		/*
782  		 * item has been found, so we choose the pointer which
783  		 * is to the right of the found one
784  		 */
785  		if (retval == ITEM_FOUND)
786  			last_element->pe_position++;
787  
788  		/*
789  		 * if item was not found we choose the position which is to
790  		 * the left of the found item. This requires no code,
791  		 * bin_search did it already.
792  		 */
793  
794  		/*
795  		 * So we have chosen a position in the current node which is
796  		 * an internal node.  Now we calculate child block number by
797  		 * position in the node.
798  		 */
799  		block_number =
800  		    B_N_CHILD_NUM(bh, last_element->pe_position);
801  
802  		/*
803  		 * if we are going to read leaf nodes, try for read
804  		 * ahead as well
805  		 */
806  		if ((search_path->reada & PATH_READA) &&
807  		    node_level == DISK_LEAF_NODE_LEVEL + 1) {
808  			int pos = last_element->pe_position;
809  			int limit = B_NR_ITEMS(bh);
810  			struct reiserfs_key *le_key;
811  
812  			if (search_path->reada & PATH_READA_BACK)
813  				limit = 0;
814  			while (reada_count < SEARCH_BY_KEY_READA) {
815  				if (pos == limit)
816  					break;
817  				reada_blocks[reada_count++] =
818  				    B_N_CHILD_NUM(bh, pos);
819  				if (search_path->reada & PATH_READA_BACK)
820  					pos--;
821  				else
822  					pos++;
823  
824  				/*
825  				 * check to make sure we're in the same object
826  				 */
827  				le_key = internal_key(bh, pos);
828  				if (le32_to_cpu(le_key->k_objectid) !=
829  				    key->on_disk_key.k_objectid) {
830  					break;
831  				}
832  			}
833  		}
834  	}
835  }
836  
837  /*
838   * Form the path to an item and position in this item which contains
839   * file byte defined by key. If there is no such item
840   * corresponding to the key, we point the path to the item with
841   * maximal key less than key, and *pos_in_item is set to one
842   * past the last entry/byte in the item.  If searching for entry in a
843   * directory item, and it is not found, *pos_in_item is set to one
844   * entry more than the entry with maximal key which is less than the
845   * sought key.
846   *
847   * Note that if there is no entry in this same node which is one more,
848   * then we point to an imaginary entry.  for direct items, the
849   * position is in units of bytes, for indirect items the position is
850   * in units of blocknr entries, for directory items the position is in
851   * units of directory entries.
852   */
853  /* The function is NOT SCHEDULE-SAFE! */
search_for_position_by_key(struct super_block * sb,const struct cpu_key * p_cpu_key,struct treepath * search_path)854  int search_for_position_by_key(struct super_block *sb,
855  			       /* Key to search (cpu variable) */
856  			       const struct cpu_key *p_cpu_key,
857  			       /* Filled up by this function. */
858  			       struct treepath *search_path)
859  {
860  	struct item_head *p_le_ih;	/* pointer to on-disk structure */
861  	int blk_size;
862  	loff_t item_offset, offset;
863  	struct reiserfs_dir_entry de;
864  	int retval;
865  
866  	/* If searching for directory entry. */
867  	if (is_direntry_cpu_key(p_cpu_key))
868  		return search_by_entry_key(sb, p_cpu_key, search_path,
869  					   &de);
870  
871  	/* If not searching for directory entry. */
872  
873  	/* If item is found. */
874  	retval = search_item(sb, p_cpu_key, search_path);
875  	if (retval == IO_ERROR)
876  		return retval;
877  	if (retval == ITEM_FOUND) {
878  
879  		RFALSE(!ih_item_len
880  		       (item_head
881  			(PATH_PLAST_BUFFER(search_path),
882  			 PATH_LAST_POSITION(search_path))),
883  		       "PAP-5165: item length equals zero");
884  
885  		pos_in_item(search_path) = 0;
886  		return POSITION_FOUND;
887  	}
888  
889  	RFALSE(!PATH_LAST_POSITION(search_path),
890  	       "PAP-5170: position equals zero");
891  
892  	/* Item is not found. Set path to the previous item. */
893  	p_le_ih =
894  	    item_head(PATH_PLAST_BUFFER(search_path),
895  			   --PATH_LAST_POSITION(search_path));
896  	blk_size = sb->s_blocksize;
897  
898  	if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
899  		return FILE_NOT_FOUND;
900  
901  	/* FIXME: quite ugly this far */
902  
903  	item_offset = le_ih_k_offset(p_le_ih);
904  	offset = cpu_key_k_offset(p_cpu_key);
905  
906  	/* Needed byte is contained in the item pointed to by the path. */
907  	if (item_offset <= offset &&
908  	    item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
909  		pos_in_item(search_path) = offset - item_offset;
910  		if (is_indirect_le_ih(p_le_ih)) {
911  			pos_in_item(search_path) /= blk_size;
912  		}
913  		return POSITION_FOUND;
914  	}
915  
916  	/*
917  	 * Needed byte is not contained in the item pointed to by the
918  	 * path. Set pos_in_item out of the item.
919  	 */
920  	if (is_indirect_le_ih(p_le_ih))
921  		pos_in_item(search_path) =
922  		    ih_item_len(p_le_ih) / UNFM_P_SIZE;
923  	else
924  		pos_in_item(search_path) = ih_item_len(p_le_ih);
925  
926  	return POSITION_NOT_FOUND;
927  }
928  
929  /* Compare given item and item pointed to by the path. */
comp_items(const struct item_head * stored_ih,const struct treepath * path)930  int comp_items(const struct item_head *stored_ih, const struct treepath *path)
931  {
932  	struct buffer_head *bh = PATH_PLAST_BUFFER(path);
933  	struct item_head *ih;
934  
935  	/* Last buffer at the path is not in the tree. */
936  	if (!B_IS_IN_TREE(bh))
937  		return 1;
938  
939  	/* Last path position is invalid. */
940  	if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
941  		return 1;
942  
943  	/* we need only to know, whether it is the same item */
944  	ih = tp_item_head(path);
945  	return memcmp(stored_ih, ih, IH_SIZE);
946  }
947  
948  /* prepare for delete or cut of direct item */
prepare_for_direct_item(struct treepath * path,struct item_head * le_ih,struct inode * inode,loff_t new_file_length,int * cut_size)949  static inline int prepare_for_direct_item(struct treepath *path,
950  					  struct item_head *le_ih,
951  					  struct inode *inode,
952  					  loff_t new_file_length, int *cut_size)
953  {
954  	loff_t round_len;
955  
956  	if (new_file_length == max_reiserfs_offset(inode)) {
957  		/* item has to be deleted */
958  		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
959  		return M_DELETE;
960  	}
961  	/* new file gets truncated */
962  	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
963  		round_len = ROUND_UP(new_file_length);
964  		/* this was new_file_length < le_ih ... */
965  		if (round_len < le_ih_k_offset(le_ih)) {
966  			*cut_size = -(IH_SIZE + ih_item_len(le_ih));
967  			return M_DELETE;	/* Delete this item. */
968  		}
969  		/* Calculate first position and size for cutting from item. */
970  		pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
971  		*cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
972  
973  		return M_CUT;	/* Cut from this item. */
974  	}
975  
976  	/* old file: items may have any length */
977  
978  	if (new_file_length < le_ih_k_offset(le_ih)) {
979  		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
980  		return M_DELETE;	/* Delete this item. */
981  	}
982  
983  	/* Calculate first position and size for cutting from item. */
984  	*cut_size = -(ih_item_len(le_ih) -
985  		      (pos_in_item(path) =
986  		       new_file_length + 1 - le_ih_k_offset(le_ih)));
987  	return M_CUT;		/* Cut from this item. */
988  }
989  
prepare_for_direntry_item(struct treepath * path,struct item_head * le_ih,struct inode * inode,loff_t new_file_length,int * cut_size)990  static inline int prepare_for_direntry_item(struct treepath *path,
991  					    struct item_head *le_ih,
992  					    struct inode *inode,
993  					    loff_t new_file_length,
994  					    int *cut_size)
995  {
996  	if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
997  	    new_file_length == max_reiserfs_offset(inode)) {
998  		RFALSE(ih_entry_count(le_ih) != 2,
999  		       "PAP-5220: incorrect empty directory item (%h)", le_ih);
1000  		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
1001  		/* Delete the directory item containing "." and ".." entry. */
1002  		return M_DELETE;
1003  	}
1004  
1005  	if (ih_entry_count(le_ih) == 1) {
1006  		/*
1007  		 * Delete the directory item such as there is one record only
1008  		 * in this item
1009  		 */
1010  		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
1011  		return M_DELETE;
1012  	}
1013  
1014  	/* Cut one record from the directory item. */
1015  	*cut_size =
1016  	    -(DEH_SIZE +
1017  	      entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
1018  	return M_CUT;
1019  }
1020  
1021  #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
1022  
1023  /*
1024   * If the path points to a directory or direct item, calculate mode
1025   * and the size cut, for balance.
1026   * If the path points to an indirect item, remove some number of its
1027   * unformatted nodes.
1028   * In case of file truncate calculate whether this item must be
1029   * deleted/truncated or last unformatted node of this item will be
1030   * converted to a direct item.
1031   * This function returns a determination of what balance mode the
1032   * calling function should employ.
1033   */
prepare_for_delete_or_cut(struct reiserfs_transaction_handle * th,struct inode * inode,struct treepath * path,const struct cpu_key * item_key,int * removed,int * cut_size,unsigned long long new_file_length)1034  static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
1035  				      struct inode *inode,
1036  				      struct treepath *path,
1037  				      const struct cpu_key *item_key,
1038  				      /*
1039  				       * Number of unformatted nodes
1040  				       * which were removed from end
1041  				       * of the file.
1042  				       */
1043  				      int *removed,
1044  				      int *cut_size,
1045  				      /* MAX_KEY_OFFSET in case of delete. */
1046  				      unsigned long long new_file_length
1047      )
1048  {
1049  	struct super_block *sb = inode->i_sb;
1050  	struct item_head *p_le_ih = tp_item_head(path);
1051  	struct buffer_head *bh = PATH_PLAST_BUFFER(path);
1052  
1053  	BUG_ON(!th->t_trans_id);
1054  
1055  	/* Stat_data item. */
1056  	if (is_statdata_le_ih(p_le_ih)) {
1057  
1058  		RFALSE(new_file_length != max_reiserfs_offset(inode),
1059  		       "PAP-5210: mode must be M_DELETE");
1060  
1061  		*cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1062  		return M_DELETE;
1063  	}
1064  
1065  	/* Directory item. */
1066  	if (is_direntry_le_ih(p_le_ih))
1067  		return prepare_for_direntry_item(path, p_le_ih, inode,
1068  						 new_file_length,
1069  						 cut_size);
1070  
1071  	/* Direct item. */
1072  	if (is_direct_le_ih(p_le_ih))
1073  		return prepare_for_direct_item(path, p_le_ih, inode,
1074  					       new_file_length, cut_size);
1075  
1076  	/* Case of an indirect item. */
1077  	{
1078  	    int blk_size = sb->s_blocksize;
1079  	    struct item_head s_ih;
1080  	    int need_re_search;
1081  	    int delete = 0;
1082  	    int result = M_CUT;
1083  	    int pos = 0;
1084  
1085  	    if ( new_file_length == max_reiserfs_offset (inode) ) {
1086  		/*
1087  		 * prepare_for_delete_or_cut() is called by
1088  		 * reiserfs_delete_item()
1089  		 */
1090  		new_file_length = 0;
1091  		delete = 1;
1092  	    }
1093  
1094  	    do {
1095  		need_re_search = 0;
1096  		*cut_size = 0;
1097  		bh = PATH_PLAST_BUFFER(path);
1098  		copy_item_head(&s_ih, tp_item_head(path));
1099  		pos = I_UNFM_NUM(&s_ih);
1100  
1101  		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1102  		    __le32 *unfm;
1103  		    __u32 block;
1104  
1105  		    /*
1106  		     * Each unformatted block deletion may involve
1107  		     * one additional bitmap block into the transaction,
1108  		     * thereby the initial journal space reservation
1109  		     * might not be enough.
1110  		     */
1111  		    if (!delete && (*cut_size) != 0 &&
1112  			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1113  			break;
1114  
1115  		    unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
1116  		    block = get_block_num(unfm, 0);
1117  
1118  		    if (block != 0) {
1119  			reiserfs_prepare_for_journal(sb, bh, 1);
1120  			put_block_num(unfm, 0, 0);
1121  			journal_mark_dirty(th, bh);
1122  			reiserfs_free_block(th, inode, block, 1);
1123  		    }
1124  
1125  		    reiserfs_cond_resched(sb);
1126  
1127  		    if (item_moved (&s_ih, path))  {
1128  			need_re_search = 1;
1129  			break;
1130  		    }
1131  
1132  		    pos --;
1133  		    (*removed)++;
1134  		    (*cut_size) -= UNFM_P_SIZE;
1135  
1136  		    if (pos == 0) {
1137  			(*cut_size) -= IH_SIZE;
1138  			result = M_DELETE;
1139  			break;
1140  		    }
1141  		}
1142  		/*
1143  		 * a trick.  If the buffer has been logged, this will
1144  		 * do nothing.  If we've broken the loop without logging
1145  		 * it, it will restore the buffer
1146  		 */
1147  		reiserfs_restore_prepared_buffer(sb, bh);
1148  	    } while (need_re_search &&
1149  		     search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1150  	    pos_in_item(path) = pos * UNFM_P_SIZE;
1151  
1152  	    if (*cut_size == 0) {
1153  		/*
1154  		 * Nothing was cut. maybe convert last unformatted node to the
1155  		 * direct item?
1156  		 */
1157  		result = M_CONVERT;
1158  	    }
1159  	    return result;
1160  	}
1161  }
1162  
1163  /* Calculate number of bytes which will be deleted or cut during balance */
calc_deleted_bytes_number(struct tree_balance * tb,char mode)1164  static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1165  {
1166  	int del_size;
1167  	struct item_head *p_le_ih = tp_item_head(tb->tb_path);
1168  
1169  	if (is_statdata_le_ih(p_le_ih))
1170  		return 0;
1171  
1172  	del_size =
1173  	    (mode ==
1174  	     M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1175  	if (is_direntry_le_ih(p_le_ih)) {
1176  		/*
1177  		 * return EMPTY_DIR_SIZE; We delete emty directories only.
1178  		 * we can't use EMPTY_DIR_SIZE, as old format dirs have a
1179  		 * different empty size.  ick. FIXME, is this right?
1180  		 */
1181  		return del_size;
1182  	}
1183  
1184  	if (is_indirect_le_ih(p_le_ih))
1185  		del_size = (del_size / UNFM_P_SIZE) *
1186  				(PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1187  	return del_size;
1188  }
1189  
init_tb_struct(struct reiserfs_transaction_handle * th,struct tree_balance * tb,struct super_block * sb,struct treepath * path,int size)1190  static void init_tb_struct(struct reiserfs_transaction_handle *th,
1191  			   struct tree_balance *tb,
1192  			   struct super_block *sb,
1193  			   struct treepath *path, int size)
1194  {
1195  
1196  	BUG_ON(!th->t_trans_id);
1197  
1198  	memset(tb, '\0', sizeof(struct tree_balance));
1199  	tb->transaction_handle = th;
1200  	tb->tb_sb = sb;
1201  	tb->tb_path = path;
1202  	PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1203  	PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1204  	tb->insert_size[0] = size;
1205  }
1206  
padd_item(char * item,int total_length,int length)1207  void padd_item(char *item, int total_length, int length)
1208  {
1209  	int i;
1210  
1211  	for (i = total_length; i > length;)
1212  		item[--i] = 0;
1213  }
1214  
1215  #ifdef REISERQUOTA_DEBUG
key2type(struct reiserfs_key * ih)1216  char key2type(struct reiserfs_key *ih)
1217  {
1218  	if (is_direntry_le_key(2, ih))
1219  		return 'd';
1220  	if (is_direct_le_key(2, ih))
1221  		return 'D';
1222  	if (is_indirect_le_key(2, ih))
1223  		return 'i';
1224  	if (is_statdata_le_key(2, ih))
1225  		return 's';
1226  	return 'u';
1227  }
1228  
head2type(struct item_head * ih)1229  char head2type(struct item_head *ih)
1230  {
1231  	if (is_direntry_le_ih(ih))
1232  		return 'd';
1233  	if (is_direct_le_ih(ih))
1234  		return 'D';
1235  	if (is_indirect_le_ih(ih))
1236  		return 'i';
1237  	if (is_statdata_le_ih(ih))
1238  		return 's';
1239  	return 'u';
1240  }
1241  #endif
1242  
1243  /*
1244   * Delete object item.
1245   * th       - active transaction handle
1246   * path     - path to the deleted item
1247   * item_key - key to search for the deleted item
1248   * indode   - used for updating i_blocks and quotas
1249   * un_bh    - NULL or unformatted node pointer
1250   */
reiserfs_delete_item(struct reiserfs_transaction_handle * th,struct treepath * path,const struct cpu_key * item_key,struct inode * inode,struct buffer_head * un_bh)1251  int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1252  			 struct treepath *path, const struct cpu_key *item_key,
1253  			 struct inode *inode, struct buffer_head *un_bh)
1254  {
1255  	struct super_block *sb = inode->i_sb;
1256  	struct tree_balance s_del_balance;
1257  	struct item_head s_ih;
1258  	struct item_head *q_ih;
1259  	int quota_cut_bytes;
1260  	int ret_value, del_size, removed;
1261  	int depth;
1262  
1263  #ifdef CONFIG_REISERFS_CHECK
1264  	char mode;
1265  #endif
1266  
1267  	BUG_ON(!th->t_trans_id);
1268  
1269  	init_tb_struct(th, &s_del_balance, sb, path,
1270  		       0 /*size is unknown */ );
1271  
1272  	while (1) {
1273  		removed = 0;
1274  
1275  #ifdef CONFIG_REISERFS_CHECK
1276  		mode =
1277  #endif
1278  		    prepare_for_delete_or_cut(th, inode, path,
1279  					      item_key, &removed,
1280  					      &del_size,
1281  					      max_reiserfs_offset(inode));
1282  
1283  		RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1284  
1285  		copy_item_head(&s_ih, tp_item_head(path));
1286  		s_del_balance.insert_size[0] = del_size;
1287  
1288  		ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1289  		if (ret_value != REPEAT_SEARCH)
1290  			break;
1291  
1292  		PROC_INFO_INC(sb, delete_item_restarted);
1293  
1294  		/* file system changed, repeat search */
1295  		ret_value =
1296  		    search_for_position_by_key(sb, item_key, path);
1297  		if (ret_value == IO_ERROR)
1298  			break;
1299  		if (ret_value == FILE_NOT_FOUND) {
1300  			reiserfs_warning(sb, "vs-5340",
1301  					 "no items of the file %K found",
1302  					 item_key);
1303  			break;
1304  		}
1305  	}			/* while (1) */
1306  
1307  	if (ret_value != CARRY_ON) {
1308  		unfix_nodes(&s_del_balance);
1309  		return 0;
1310  	}
1311  
1312  	/* reiserfs_delete_item returns item length when success */
1313  	ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1314  	q_ih = tp_item_head(path);
1315  	quota_cut_bytes = ih_item_len(q_ih);
1316  
1317  	/*
1318  	 * hack so the quota code doesn't have to guess if the file has a
1319  	 * tail.  On tail insert, we allocate quota for 1 unformatted node.
1320  	 * We test the offset because the tail might have been
1321  	 * split into multiple items, and we only want to decrement for
1322  	 * the unfm node once
1323  	 */
1324  	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1325  		if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1326  			quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1327  		} else {
1328  			quota_cut_bytes = 0;
1329  		}
1330  	}
1331  
1332  	if (un_bh) {
1333  		int off;
1334  		char *data;
1335  
1336  		/*
1337  		 * We are in direct2indirect conversion, so move tail contents
1338  		 * to the unformatted node
1339  		 */
1340  		/*
1341  		 * note, we do the copy before preparing the buffer because we
1342  		 * don't care about the contents of the unformatted node yet.
1343  		 * the only thing we really care about is the direct item's
1344  		 * data is in the unformatted node.
1345  		 *
1346  		 * Otherwise, we would have to call
1347  		 * reiserfs_prepare_for_journal on the unformatted node,
1348  		 * which might schedule, meaning we'd have to loop all the
1349  		 * way back up to the start of the while loop.
1350  		 *
1351  		 * The unformatted node must be dirtied later on.  We can't be
1352  		 * sure here if the entire tail has been deleted yet.
1353  		 *
1354  		 * un_bh is from the page cache (all unformatted nodes are
1355  		 * from the page cache) and might be a highmem page.  So, we
1356  		 * can't use un_bh->b_data.
1357  		 * -clm
1358  		 */
1359  
1360  		data = kmap_atomic(un_bh->b_page);
1361  		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
1362  		memcpy(data + off,
1363  		       ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
1364  		       ret_value);
1365  		kunmap_atomic(data);
1366  	}
1367  
1368  	/* Perform balancing after all resources have been collected at once. */
1369  	do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1370  
1371  #ifdef REISERQUOTA_DEBUG
1372  	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1373  		       "reiserquota delete_item(): freeing %u, id=%u type=%c",
1374  		       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1375  #endif
1376  	depth = reiserfs_write_unlock_nested(inode->i_sb);
1377  	dquot_free_space_nodirty(inode, quota_cut_bytes);
1378  	reiserfs_write_lock_nested(inode->i_sb, depth);
1379  
1380  	/* Return deleted body length */
1381  	return ret_value;
1382  }
1383  
1384  /*
1385   * Summary Of Mechanisms For Handling Collisions Between Processes:
1386   *
1387   *  deletion of the body of the object is performed by iput(), with the
1388   *  result that if multiple processes are operating on a file, the
1389   *  deletion of the body of the file is deferred until the last process
1390   *  that has an open inode performs its iput().
1391   *
1392   *  writes and truncates are protected from collisions by use of
1393   *  semaphores.
1394   *
1395   *  creates, linking, and mknod are protected from collisions with other
1396   *  processes by making the reiserfs_add_entry() the last step in the
1397   *  creation, and then rolling back all changes if there was a collision.
1398   *  - Hans
1399  */
1400  
1401  /* this deletes item which never gets split */
reiserfs_delete_solid_item(struct reiserfs_transaction_handle * th,struct inode * inode,struct reiserfs_key * key)1402  void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1403  				struct inode *inode, struct reiserfs_key *key)
1404  {
1405  	struct super_block *sb = th->t_super;
1406  	struct tree_balance tb;
1407  	INITIALIZE_PATH(path);
1408  	int item_len = 0;
1409  	int tb_init = 0;
1410  	struct cpu_key cpu_key = {};
1411  	int retval;
1412  	int quota_cut_bytes = 0;
1413  
1414  	BUG_ON(!th->t_trans_id);
1415  
1416  	le_key2cpu_key(&cpu_key, key);
1417  
1418  	while (1) {
1419  		retval = search_item(th->t_super, &cpu_key, &path);
1420  		if (retval == IO_ERROR) {
1421  			reiserfs_error(th->t_super, "vs-5350",
1422  				       "i/o failure occurred trying "
1423  				       "to delete %K", &cpu_key);
1424  			break;
1425  		}
1426  		if (retval != ITEM_FOUND) {
1427  			pathrelse(&path);
1428  			/*
1429  			 * No need for a warning, if there is just no free
1430  			 * space to insert '..' item into the
1431  			 * newly-created subdir
1432  			 */
1433  			if (!
1434  			    ((unsigned long long)
1435  			     GET_HASH_VALUE(le_key_k_offset
1436  					    (le_key_version(key), key)) == 0
1437  			     && (unsigned long long)
1438  			     GET_GENERATION_NUMBER(le_key_k_offset
1439  						   (le_key_version(key),
1440  						    key)) == 1))
1441  				reiserfs_warning(th->t_super, "vs-5355",
1442  						 "%k not found", key);
1443  			break;
1444  		}
1445  		if (!tb_init) {
1446  			tb_init = 1;
1447  			item_len = ih_item_len(tp_item_head(&path));
1448  			init_tb_struct(th, &tb, th->t_super, &path,
1449  				       -(IH_SIZE + item_len));
1450  		}
1451  		quota_cut_bytes = ih_item_len(tp_item_head(&path));
1452  
1453  		retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1454  		if (retval == REPEAT_SEARCH) {
1455  			PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1456  			continue;
1457  		}
1458  
1459  		if (retval == CARRY_ON) {
1460  			do_balance(&tb, NULL, NULL, M_DELETE);
1461  			/*
1462  			 * Should we count quota for item? (we don't
1463  			 * count quotas for save-links)
1464  			 */
1465  			if (inode) {
1466  				int depth;
1467  #ifdef REISERQUOTA_DEBUG
1468  				reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1469  					       "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1470  					       quota_cut_bytes, inode->i_uid,
1471  					       key2type(key));
1472  #endif
1473  				depth = reiserfs_write_unlock_nested(sb);
1474  				dquot_free_space_nodirty(inode,
1475  							 quota_cut_bytes);
1476  				reiserfs_write_lock_nested(sb, depth);
1477  			}
1478  			break;
1479  		}
1480  
1481  		/* IO_ERROR, NO_DISK_SPACE, etc */
1482  		reiserfs_warning(th->t_super, "vs-5360",
1483  				 "could not delete %K due to fix_nodes failure",
1484  				 &cpu_key);
1485  		unfix_nodes(&tb);
1486  		break;
1487  	}
1488  
1489  	reiserfs_check_path(&path);
1490  }
1491  
reiserfs_delete_object(struct reiserfs_transaction_handle * th,struct inode * inode)1492  int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1493  			   struct inode *inode)
1494  {
1495  	int err;
1496  	inode->i_size = 0;
1497  	BUG_ON(!th->t_trans_id);
1498  
1499  	/* for directory this deletes item containing "." and ".." */
1500  	err =
1501  	    reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1502  	if (err)
1503  		return err;
1504  
1505  #if defined( USE_INODE_GENERATION_COUNTER )
1506  	if (!old_format_only(th->t_super)) {
1507  		__le32 *inode_generation;
1508  
1509  		inode_generation =
1510  		    &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1511  		le32_add_cpu(inode_generation, 1);
1512  	}
1513  /* USE_INODE_GENERATION_COUNTER */
1514  #endif
1515  	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1516  
1517  	return err;
1518  }
1519  
unmap_buffers(struct page * page,loff_t pos)1520  static void unmap_buffers(struct page *page, loff_t pos)
1521  {
1522  	struct buffer_head *bh;
1523  	struct buffer_head *head;
1524  	struct buffer_head *next;
1525  	unsigned long tail_index;
1526  	unsigned long cur_index;
1527  
1528  	if (page) {
1529  		if (page_has_buffers(page)) {
1530  			tail_index = pos & (PAGE_SIZE - 1);
1531  			cur_index = 0;
1532  			head = page_buffers(page);
1533  			bh = head;
1534  			do {
1535  				next = bh->b_this_page;
1536  
1537  				/*
1538  				 * we want to unmap the buffers that contain
1539  				 * the tail, and all the buffers after it
1540  				 * (since the tail must be at the end of the
1541  				 * file).  We don't want to unmap file data
1542  				 * before the tail, since it might be dirty
1543  				 * and waiting to reach disk
1544  				 */
1545  				cur_index += bh->b_size;
1546  				if (cur_index > tail_index) {
1547  					reiserfs_unmap_buffer(bh);
1548  				}
1549  				bh = next;
1550  			} while (bh != head);
1551  		}
1552  	}
1553  }
1554  
maybe_indirect_to_direct(struct reiserfs_transaction_handle * th,struct inode * inode,struct page * page,struct treepath * path,const struct cpu_key * item_key,loff_t new_file_size,char * mode)1555  static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1556  				    struct inode *inode,
1557  				    struct page *page,
1558  				    struct treepath *path,
1559  				    const struct cpu_key *item_key,
1560  				    loff_t new_file_size, char *mode)
1561  {
1562  	struct super_block *sb = inode->i_sb;
1563  	int block_size = sb->s_blocksize;
1564  	int cut_bytes;
1565  	BUG_ON(!th->t_trans_id);
1566  	BUG_ON(new_file_size != inode->i_size);
1567  
1568  	/*
1569  	 * the page being sent in could be NULL if there was an i/o error
1570  	 * reading in the last block.  The user will hit problems trying to
1571  	 * read the file, but for now we just skip the indirect2direct
1572  	 */
1573  	if (atomic_read(&inode->i_count) > 1 ||
1574  	    !tail_has_to_be_packed(inode) ||
1575  	    !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1576  		/* leave tail in an unformatted node */
1577  		*mode = M_SKIP_BALANCING;
1578  		cut_bytes =
1579  		    block_size - (new_file_size & (block_size - 1));
1580  		pathrelse(path);
1581  		return cut_bytes;
1582  	}
1583  
1584  	/* Perform the conversion to a direct_item. */
1585  	return indirect2direct(th, inode, page, path, item_key,
1586  			       new_file_size, mode);
1587  }
1588  
1589  /*
1590   * we did indirect_to_direct conversion. And we have inserted direct
1591   * item successesfully, but there were no disk space to cut unfm
1592   * pointer being converted. Therefore we have to delete inserted
1593   * direct item(s)
1594   */
indirect_to_direct_roll_back(struct reiserfs_transaction_handle * th,struct inode * inode,struct treepath * path)1595  static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1596  					 struct inode *inode, struct treepath *path)
1597  {
1598  	struct cpu_key tail_key;
1599  	int tail_len;
1600  	int removed;
1601  	BUG_ON(!th->t_trans_id);
1602  
1603  	make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
1604  	tail_key.key_length = 4;
1605  
1606  	tail_len =
1607  	    (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1608  	while (tail_len) {
1609  		/* look for the last byte of the tail */
1610  		if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1611  		    POSITION_NOT_FOUND)
1612  			reiserfs_panic(inode->i_sb, "vs-5615",
1613  				       "found invalid item");
1614  		RFALSE(path->pos_in_item !=
1615  		       ih_item_len(tp_item_head(path)) - 1,
1616  		       "vs-5616: appended bytes found");
1617  		PATH_LAST_POSITION(path)--;
1618  
1619  		removed =
1620  		    reiserfs_delete_item(th, path, &tail_key, inode,
1621  					 NULL /*unbh not needed */ );
1622  		RFALSE(removed <= 0
1623  		       || removed > tail_len,
1624  		       "vs-5617: there was tail %d bytes, removed item length %d bytes",
1625  		       tail_len, removed);
1626  		tail_len -= removed;
1627  		set_cpu_key_k_offset(&tail_key,
1628  				     cpu_key_k_offset(&tail_key) - removed);
1629  	}
1630  	reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1631  			 "conversion has been rolled back due to "
1632  			 "lack of disk space");
1633  	mark_inode_dirty(inode);
1634  }
1635  
1636  /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
reiserfs_cut_from_item(struct reiserfs_transaction_handle * th,struct treepath * path,struct cpu_key * item_key,struct inode * inode,struct page * page,loff_t new_file_size)1637  int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1638  			   struct treepath *path,
1639  			   struct cpu_key *item_key,
1640  			   struct inode *inode,
1641  			   struct page *page, loff_t new_file_size)
1642  {
1643  	struct super_block *sb = inode->i_sb;
1644  	/*
1645  	 * Every function which is going to call do_balance must first
1646  	 * create a tree_balance structure.  Then it must fill up this
1647  	 * structure by using the init_tb_struct and fix_nodes functions.
1648  	 * After that we can make tree balancing.
1649  	 */
1650  	struct tree_balance s_cut_balance;
1651  	struct item_head *p_le_ih;
1652  	int cut_size = 0;	/* Amount to be cut. */
1653  	int ret_value = CARRY_ON;
1654  	int removed = 0;	/* Number of the removed unformatted nodes. */
1655  	int is_inode_locked = 0;
1656  	char mode;		/* Mode of the balance. */
1657  	int retval2 = -1;
1658  	int quota_cut_bytes;
1659  	loff_t tail_pos = 0;
1660  	int depth;
1661  
1662  	BUG_ON(!th->t_trans_id);
1663  
1664  	init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1665  		       cut_size);
1666  
1667  	/*
1668  	 * Repeat this loop until we either cut the item without needing
1669  	 * to balance, or we fix_nodes without schedule occurring
1670  	 */
1671  	while (1) {
1672  		/*
1673  		 * Determine the balance mode, position of the first byte to
1674  		 * be cut, and size to be cut.  In case of the indirect item
1675  		 * free unformatted nodes which are pointed to by the cut
1676  		 * pointers.
1677  		 */
1678  
1679  		mode =
1680  		    prepare_for_delete_or_cut(th, inode, path,
1681  					      item_key, &removed,
1682  					      &cut_size, new_file_size);
1683  		if (mode == M_CONVERT) {
1684  			/*
1685  			 * convert last unformatted node to direct item or
1686  			 * leave tail in the unformatted node
1687  			 */
1688  			RFALSE(ret_value != CARRY_ON,
1689  			       "PAP-5570: can not convert twice");
1690  
1691  			ret_value =
1692  			    maybe_indirect_to_direct(th, inode, page,
1693  						     path, item_key,
1694  						     new_file_size, &mode);
1695  			if (mode == M_SKIP_BALANCING)
1696  				/* tail has been left in the unformatted node */
1697  				return ret_value;
1698  
1699  			is_inode_locked = 1;
1700  
1701  			/*
1702  			 * removing of last unformatted node will
1703  			 * change value we have to return to truncate.
1704  			 * Save it
1705  			 */
1706  			retval2 = ret_value;
1707  
1708  			/*
1709  			 * So, we have performed the first part of the
1710  			 * conversion:
1711  			 * inserting the new direct item.  Now we are
1712  			 * removing the last unformatted node pointer.
1713  			 * Set key to search for it.
1714  			 */
1715  			set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1716  			item_key->key_length = 4;
1717  			new_file_size -=
1718  			    (new_file_size & (sb->s_blocksize - 1));
1719  			tail_pos = new_file_size;
1720  			set_cpu_key_k_offset(item_key, new_file_size + 1);
1721  			if (search_for_position_by_key
1722  			    (sb, item_key,
1723  			     path) == POSITION_NOT_FOUND) {
1724  				print_block(PATH_PLAST_BUFFER(path), 3,
1725  					    PATH_LAST_POSITION(path) - 1,
1726  					    PATH_LAST_POSITION(path) + 1);
1727  				reiserfs_panic(sb, "PAP-5580", "item to "
1728  					       "convert does not exist (%K)",
1729  					       item_key);
1730  			}
1731  			continue;
1732  		}
1733  		if (cut_size == 0) {
1734  			pathrelse(path);
1735  			return 0;
1736  		}
1737  
1738  		s_cut_balance.insert_size[0] = cut_size;
1739  
1740  		ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1741  		if (ret_value != REPEAT_SEARCH)
1742  			break;
1743  
1744  		PROC_INFO_INC(sb, cut_from_item_restarted);
1745  
1746  		ret_value =
1747  		    search_for_position_by_key(sb, item_key, path);
1748  		if (ret_value == POSITION_FOUND)
1749  			continue;
1750  
1751  		reiserfs_warning(sb, "PAP-5610", "item %K not found",
1752  				 item_key);
1753  		unfix_nodes(&s_cut_balance);
1754  		return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1755  	}			/* while */
1756  
1757  	/* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
1758  	if (ret_value != CARRY_ON) {
1759  		if (is_inode_locked) {
1760  			/*
1761  			 * FIXME: this seems to be not needed: we are always
1762  			 * able to cut item
1763  			 */
1764  			indirect_to_direct_roll_back(th, inode, path);
1765  		}
1766  		if (ret_value == NO_DISK_SPACE)
1767  			reiserfs_warning(sb, "reiserfs-5092",
1768  					 "NO_DISK_SPACE");
1769  		unfix_nodes(&s_cut_balance);
1770  		return -EIO;
1771  	}
1772  
1773  	/* go ahead and perform balancing */
1774  
1775  	RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1776  
1777  	/* Calculate number of bytes that need to be cut from the item. */
1778  	quota_cut_bytes =
1779  	    (mode ==
1780  	     M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
1781  	    insert_size[0];
1782  	if (retval2 == -1)
1783  		ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1784  	else
1785  		ret_value = retval2;
1786  
1787  	/*
1788  	 * For direct items, we only change the quota when deleting the last
1789  	 * item.
1790  	 */
1791  	p_le_ih = tp_item_head(s_cut_balance.tb_path);
1792  	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1793  		if (mode == M_DELETE &&
1794  		    (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1795  		    1) {
1796  			/* FIXME: this is to keep 3.5 happy */
1797  			REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1798  			quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1799  		} else {
1800  			quota_cut_bytes = 0;
1801  		}
1802  	}
1803  #ifdef CONFIG_REISERFS_CHECK
1804  	if (is_inode_locked) {
1805  		struct item_head *le_ih =
1806  		    tp_item_head(s_cut_balance.tb_path);
1807  		/*
1808  		 * we are going to complete indirect2direct conversion. Make
1809  		 * sure, that we exactly remove last unformatted node pointer
1810  		 * of the item
1811  		 */
1812  		if (!is_indirect_le_ih(le_ih))
1813  			reiserfs_panic(sb, "vs-5652",
1814  				       "item must be indirect %h", le_ih);
1815  
1816  		if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1817  			reiserfs_panic(sb, "vs-5653", "completing "
1818  				       "indirect2direct conversion indirect "
1819  				       "item %h being deleted must be of "
1820  				       "4 byte long", le_ih);
1821  
1822  		if (mode == M_CUT
1823  		    && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1824  			reiserfs_panic(sb, "vs-5654", "can not complete "
1825  				       "indirect2direct conversion of %h "
1826  				       "(CUT, insert_size==%d)",
1827  				       le_ih, s_cut_balance.insert_size[0]);
1828  		}
1829  		/*
1830  		 * it would be useful to make sure, that right neighboring
1831  		 * item is direct item of this file
1832  		 */
1833  	}
1834  #endif
1835  
1836  	do_balance(&s_cut_balance, NULL, NULL, mode);
1837  	if (is_inode_locked) {
1838  		/*
1839  		 * we've done an indirect->direct conversion.  when the
1840  		 * data block was freed, it was removed from the list of
1841  		 * blocks that must be flushed before the transaction
1842  		 * commits, make sure to unmap and invalidate it
1843  		 */
1844  		unmap_buffers(page, tail_pos);
1845  		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1846  	}
1847  #ifdef REISERQUOTA_DEBUG
1848  	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1849  		       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1850  		       quota_cut_bytes, inode->i_uid, '?');
1851  #endif
1852  	depth = reiserfs_write_unlock_nested(sb);
1853  	dquot_free_space_nodirty(inode, quota_cut_bytes);
1854  	reiserfs_write_lock_nested(sb, depth);
1855  	return ret_value;
1856  }
1857  
truncate_directory(struct reiserfs_transaction_handle * th,struct inode * inode)1858  static void truncate_directory(struct reiserfs_transaction_handle *th,
1859  			       struct inode *inode)
1860  {
1861  	BUG_ON(!th->t_trans_id);
1862  	if (inode->i_nlink)
1863  		reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1864  
1865  	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1866  	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1867  	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1868  	reiserfs_update_sd(th, inode);
1869  	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1870  	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1871  }
1872  
1873  /*
1874   * Truncate file to the new size. Note, this must be called with a
1875   * transaction already started
1876   */
reiserfs_do_truncate(struct reiserfs_transaction_handle * th,struct inode * inode,struct page * page,int update_timestamps)1877  int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1878  			 struct inode *inode,	/* ->i_size contains new size */
1879  			 struct page *page,	/* up to date for last block */
1880  			 /*
1881  			  * when it is called by file_release to convert
1882  			  * the tail - no timestamps should be updated
1883  			  */
1884  			 int update_timestamps
1885      )
1886  {
1887  	INITIALIZE_PATH(s_search_path);	/* Path to the current object item. */
1888  	struct item_head *p_le_ih;	/* Pointer to an item header. */
1889  
1890  	/* Key to search for a previous file item. */
1891  	struct cpu_key s_item_key;
1892  	loff_t file_size,	/* Old file size. */
1893  	 new_file_size;	/* New file size. */
1894  	int deleted;		/* Number of deleted or truncated bytes. */
1895  	int retval;
1896  	int err = 0;
1897  
1898  	BUG_ON(!th->t_trans_id);
1899  	if (!
1900  	    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1901  	     || S_ISLNK(inode->i_mode)))
1902  		return 0;
1903  
1904  	/* deletion of directory - no need to update timestamps */
1905  	if (S_ISDIR(inode->i_mode)) {
1906  		truncate_directory(th, inode);
1907  		return 0;
1908  	}
1909  
1910  	/* Get new file size. */
1911  	new_file_size = inode->i_size;
1912  
1913  	/* FIXME: note, that key type is unimportant here */
1914  	make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1915  		     TYPE_DIRECT, 3);
1916  
1917  	retval =
1918  	    search_for_position_by_key(inode->i_sb, &s_item_key,
1919  				       &s_search_path);
1920  	if (retval == IO_ERROR) {
1921  		reiserfs_error(inode->i_sb, "vs-5657",
1922  			       "i/o failure occurred trying to truncate %K",
1923  			       &s_item_key);
1924  		err = -EIO;
1925  		goto out;
1926  	}
1927  	if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1928  		reiserfs_error(inode->i_sb, "PAP-5660",
1929  			       "wrong result %d of search for %K", retval,
1930  			       &s_item_key);
1931  
1932  		err = -EIO;
1933  		goto out;
1934  	}
1935  
1936  	s_search_path.pos_in_item--;
1937  
1938  	/* Get real file size (total length of all file items) */
1939  	p_le_ih = tp_item_head(&s_search_path);
1940  	if (is_statdata_le_ih(p_le_ih))
1941  		file_size = 0;
1942  	else {
1943  		loff_t offset = le_ih_k_offset(p_le_ih);
1944  		int bytes =
1945  		    op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1946  
1947  		/*
1948  		 * this may mismatch with real file size: if last direct item
1949  		 * had no padding zeros and last unformatted node had no free
1950  		 * space, this file would have this file size
1951  		 */
1952  		file_size = offset + bytes - 1;
1953  	}
1954  	/*
1955  	 * are we doing a full truncate or delete, if so
1956  	 * kick in the reada code
1957  	 */
1958  	if (new_file_size == 0)
1959  		s_search_path.reada = PATH_READA | PATH_READA_BACK;
1960  
1961  	if (file_size == 0 || file_size < new_file_size) {
1962  		goto update_and_out;
1963  	}
1964  
1965  	/* Update key to search for the last file item. */
1966  	set_cpu_key_k_offset(&s_item_key, file_size);
1967  
1968  	do {
1969  		/* Cut or delete file item. */
1970  		deleted =
1971  		    reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1972  					   inode, page, new_file_size);
1973  		if (deleted < 0) {
1974  			reiserfs_warning(inode->i_sb, "vs-5665",
1975  					 "reiserfs_cut_from_item failed");
1976  			reiserfs_check_path(&s_search_path);
1977  			return 0;
1978  		}
1979  
1980  		RFALSE(deleted > file_size,
1981  		       "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1982  		       deleted, file_size, &s_item_key);
1983  
1984  		/* Change key to search the last file item. */
1985  		file_size -= deleted;
1986  
1987  		set_cpu_key_k_offset(&s_item_key, file_size);
1988  
1989  		/*
1990  		 * While there are bytes to truncate and previous
1991  		 * file item is presented in the tree.
1992  		 */
1993  
1994  		/*
1995  		 * This loop could take a really long time, and could log
1996  		 * many more blocks than a transaction can hold.  So, we do
1997  		 * a polite journal end here, and if the transaction needs
1998  		 * ending, we make sure the file is consistent before ending
1999  		 * the current trans and starting a new one
2000  		 */
2001  		if (journal_transaction_should_end(th, 0) ||
2002  		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
2003  			pathrelse(&s_search_path);
2004  
2005  			if (update_timestamps) {
2006  				inode_set_mtime_to_ts(inode,
2007  						      current_time(inode));
2008  				inode_set_ctime_current(inode);
2009  			}
2010  			reiserfs_update_sd(th, inode);
2011  
2012  			err = journal_end(th);
2013  			if (err)
2014  				goto out;
2015  			err = journal_begin(th, inode->i_sb,
2016  					    JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
2017  			if (err)
2018  				goto out;
2019  			reiserfs_update_inode_transaction(inode);
2020  		}
2021  	} while (file_size > ROUND_UP(new_file_size) &&
2022  		 search_for_position_by_key(inode->i_sb, &s_item_key,
2023  					    &s_search_path) == POSITION_FOUND);
2024  
2025  	RFALSE(file_size > ROUND_UP(new_file_size),
2026  	       "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
2027  	       new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
2028  
2029  update_and_out:
2030  	if (update_timestamps) {
2031  		/* this is truncate, not file closing */
2032  		inode_set_mtime_to_ts(inode, current_time(inode));
2033  		inode_set_ctime_current(inode);
2034  	}
2035  	reiserfs_update_sd(th, inode);
2036  
2037  out:
2038  	pathrelse(&s_search_path);
2039  	return err;
2040  }
2041  
2042  #ifdef CONFIG_REISERFS_CHECK
2043  /* this makes sure, that we __append__, not overwrite or add holes */
check_research_for_paste(struct treepath * path,const struct cpu_key * key)2044  static void check_research_for_paste(struct treepath *path,
2045  				     const struct cpu_key *key)
2046  {
2047  	struct item_head *found_ih = tp_item_head(path);
2048  
2049  	if (is_direct_le_ih(found_ih)) {
2050  		if (le_ih_k_offset(found_ih) +
2051  		    op_bytes_number(found_ih,
2052  				    get_last_bh(path)->b_size) !=
2053  		    cpu_key_k_offset(key)
2054  		    || op_bytes_number(found_ih,
2055  				       get_last_bh(path)->b_size) !=
2056  		    pos_in_item(path))
2057  			reiserfs_panic(NULL, "PAP-5720", "found direct item "
2058  				       "%h or position (%d) does not match "
2059  				       "to key %K", found_ih,
2060  				       pos_in_item(path), key);
2061  	}
2062  	if (is_indirect_le_ih(found_ih)) {
2063  		if (le_ih_k_offset(found_ih) +
2064  		    op_bytes_number(found_ih,
2065  				    get_last_bh(path)->b_size) !=
2066  		    cpu_key_k_offset(key)
2067  		    || I_UNFM_NUM(found_ih) != pos_in_item(path)
2068  		    || get_ih_free_space(found_ih) != 0)
2069  			reiserfs_panic(NULL, "PAP-5730", "found indirect "
2070  				       "item (%h) or position (%d) does not "
2071  				       "match to key (%K)",
2072  				       found_ih, pos_in_item(path), key);
2073  	}
2074  }
2075  #endif				/* config reiserfs check */
2076  
2077  /*
2078   * Paste bytes to the existing item.
2079   * Returns bytes number pasted into the item.
2080   */
reiserfs_paste_into_item(struct reiserfs_transaction_handle * th,struct treepath * search_path,const struct cpu_key * key,struct inode * inode,const char * body,int pasted_size)2081  int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2082  			     /* Path to the pasted item. */
2083  			     struct treepath *search_path,
2084  			     /* Key to search for the needed item. */
2085  			     const struct cpu_key *key,
2086  			     /* Inode item belongs to */
2087  			     struct inode *inode,
2088  			     /* Pointer to the bytes to paste. */
2089  			     const char *body,
2090  			     /* Size of pasted bytes. */
2091  			     int pasted_size)
2092  {
2093  	struct super_block *sb = inode->i_sb;
2094  	struct tree_balance s_paste_balance;
2095  	int retval;
2096  	int fs_gen;
2097  	int depth;
2098  
2099  	BUG_ON(!th->t_trans_id);
2100  
2101  	fs_gen = get_generation(inode->i_sb);
2102  
2103  #ifdef REISERQUOTA_DEBUG
2104  	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2105  		       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
2106  		       pasted_size, inode->i_uid,
2107  		       key2type(&key->on_disk_key));
2108  #endif
2109  
2110  	depth = reiserfs_write_unlock_nested(sb);
2111  	retval = dquot_alloc_space_nodirty(inode, pasted_size);
2112  	reiserfs_write_lock_nested(sb, depth);
2113  	if (retval) {
2114  		pathrelse(search_path);
2115  		return retval;
2116  	}
2117  	init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
2118  		       pasted_size);
2119  #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2120  	s_paste_balance.key = key->on_disk_key;
2121  #endif
2122  
2123  	/* DQUOT_* can schedule, must check before the fix_nodes */
2124  	if (fs_changed(fs_gen, inode->i_sb)) {
2125  		goto search_again;
2126  	}
2127  
2128  	while ((retval =
2129  		fix_nodes(M_PASTE, &s_paste_balance, NULL,
2130  			  body)) == REPEAT_SEARCH) {
2131  search_again:
2132  		/* file system changed while we were in the fix_nodes */
2133  		PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2134  		retval =
2135  		    search_for_position_by_key(th->t_super, key,
2136  					       search_path);
2137  		if (retval == IO_ERROR) {
2138  			retval = -EIO;
2139  			goto error_out;
2140  		}
2141  		if (retval == POSITION_FOUND) {
2142  			reiserfs_warning(inode->i_sb, "PAP-5710",
2143  					 "entry or pasted byte (%K) exists",
2144  					 key);
2145  			retval = -EEXIST;
2146  			goto error_out;
2147  		}
2148  #ifdef CONFIG_REISERFS_CHECK
2149  		check_research_for_paste(search_path, key);
2150  #endif
2151  	}
2152  
2153  	/*
2154  	 * Perform balancing after all resources are collected by fix_nodes,
2155  	 * and accessing them will not risk triggering schedule.
2156  	 */
2157  	if (retval == CARRY_ON) {
2158  		do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2159  		return 0;
2160  	}
2161  	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2162  error_out:
2163  	/* this also releases the path */
2164  	unfix_nodes(&s_paste_balance);
2165  #ifdef REISERQUOTA_DEBUG
2166  	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2167  		       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2168  		       pasted_size, inode->i_uid,
2169  		       key2type(&key->on_disk_key));
2170  #endif
2171  	depth = reiserfs_write_unlock_nested(sb);
2172  	dquot_free_space_nodirty(inode, pasted_size);
2173  	reiserfs_write_lock_nested(sb, depth);
2174  	return retval;
2175  }
2176  
2177  /*
2178   * Insert new item into the buffer at the path.
2179   * th   - active transaction handle
2180   * path - path to the inserted item
2181   * ih   - pointer to the item header to insert
2182   * body - pointer to the bytes to insert
2183   */
reiserfs_insert_item(struct reiserfs_transaction_handle * th,struct treepath * path,const struct cpu_key * key,struct item_head * ih,struct inode * inode,const char * body)2184  int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2185  			 struct treepath *path, const struct cpu_key *key,
2186  			 struct item_head *ih, struct inode *inode,
2187  			 const char *body)
2188  {
2189  	struct tree_balance s_ins_balance;
2190  	int retval;
2191  	int fs_gen = 0;
2192  	int quota_bytes = 0;
2193  
2194  	BUG_ON(!th->t_trans_id);
2195  
2196  	if (inode) {		/* Do we count quotas for item? */
2197  		int depth;
2198  		fs_gen = get_generation(inode->i_sb);
2199  		quota_bytes = ih_item_len(ih);
2200  
2201  		/*
2202  		 * hack so the quota code doesn't have to guess
2203  		 * if the file has a tail, links are always tails,
2204  		 * so there's no guessing needed
2205  		 */
2206  		if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2207  			quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2208  #ifdef REISERQUOTA_DEBUG
2209  		reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2210  			       "reiserquota insert_item(): allocating %u id=%u type=%c",
2211  			       quota_bytes, inode->i_uid, head2type(ih));
2212  #endif
2213  		/*
2214  		 * We can't dirty inode here. It would be immediately
2215  		 * written but appropriate stat item isn't inserted yet...
2216  		 */
2217  		depth = reiserfs_write_unlock_nested(inode->i_sb);
2218  		retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2219  		reiserfs_write_lock_nested(inode->i_sb, depth);
2220  		if (retval) {
2221  			pathrelse(path);
2222  			return retval;
2223  		}
2224  	}
2225  	init_tb_struct(th, &s_ins_balance, th->t_super, path,
2226  		       IH_SIZE + ih_item_len(ih));
2227  #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2228  	s_ins_balance.key = key->on_disk_key;
2229  #endif
2230  	/*
2231  	 * DQUOT_* can schedule, must check to be sure calling
2232  	 * fix_nodes is safe
2233  	 */
2234  	if (inode && fs_changed(fs_gen, inode->i_sb)) {
2235  		goto search_again;
2236  	}
2237  
2238  	while ((retval =
2239  		fix_nodes(M_INSERT, &s_ins_balance, ih,
2240  			  body)) == REPEAT_SEARCH) {
2241  search_again:
2242  		/* file system changed while we were in the fix_nodes */
2243  		PROC_INFO_INC(th->t_super, insert_item_restarted);
2244  		retval = search_item(th->t_super, key, path);
2245  		if (retval == IO_ERROR) {
2246  			retval = -EIO;
2247  			goto error_out;
2248  		}
2249  		if (retval == ITEM_FOUND) {
2250  			reiserfs_warning(th->t_super, "PAP-5760",
2251  					 "key %K already exists in the tree",
2252  					 key);
2253  			retval = -EEXIST;
2254  			goto error_out;
2255  		}
2256  	}
2257  
2258  	/* make balancing after all resources will be collected at a time */
2259  	if (retval == CARRY_ON) {
2260  		do_balance(&s_ins_balance, ih, body, M_INSERT);
2261  		return 0;
2262  	}
2263  
2264  	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2265  error_out:
2266  	/* also releases the path */
2267  	unfix_nodes(&s_ins_balance);
2268  #ifdef REISERQUOTA_DEBUG
2269  	if (inode)
2270  		reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2271  		       "reiserquota insert_item(): freeing %u id=%u type=%c",
2272  		       quota_bytes, inode->i_uid, head2type(ih));
2273  #endif
2274  	if (inode) {
2275  		int depth = reiserfs_write_unlock_nested(inode->i_sb);
2276  		dquot_free_space_nodirty(inode, quota_bytes);
2277  		reiserfs_write_lock_nested(inode->i_sb, depth);
2278  	}
2279  	return retval;
2280  }
2281