1  // SPDX-License-Identifier: GPL-2.0+
2  /*
3   * NILFS inode operations.
4   *
5   * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6   *
7   * Written by Ryusuke Konishi.
8   *
9   */
10  
11  #include <linux/buffer_head.h>
12  #include <linux/gfp.h>
13  #include <linux/mpage.h>
14  #include <linux/pagemap.h>
15  #include <linux/writeback.h>
16  #include <linux/uio.h>
17  #include <linux/fiemap.h>
18  #include <linux/random.h>
19  #include "nilfs.h"
20  #include "btnode.h"
21  #include "segment.h"
22  #include "page.h"
23  #include "mdt.h"
24  #include "cpfile.h"
25  #include "ifile.h"
26  
27  /**
28   * struct nilfs_iget_args - arguments used during comparison between inodes
29   * @ino: inode number
30   * @cno: checkpoint number
31   * @root: pointer on NILFS root object (mounted checkpoint)
32   * @type: inode type
33   */
34  struct nilfs_iget_args {
35  	u64 ino;
36  	__u64 cno;
37  	struct nilfs_root *root;
38  	unsigned int type;
39  };
40  
41  static int nilfs_iget_test(struct inode *inode, void *opaque);
42  
nilfs_inode_add_blocks(struct inode * inode,int n)43  void nilfs_inode_add_blocks(struct inode *inode, int n)
44  {
45  	struct nilfs_root *root = NILFS_I(inode)->i_root;
46  
47  	inode_add_bytes(inode, i_blocksize(inode) * n);
48  	if (root)
49  		atomic64_add(n, &root->blocks_count);
50  }
51  
nilfs_inode_sub_blocks(struct inode * inode,int n)52  void nilfs_inode_sub_blocks(struct inode *inode, int n)
53  {
54  	struct nilfs_root *root = NILFS_I(inode)->i_root;
55  
56  	inode_sub_bytes(inode, i_blocksize(inode) * n);
57  	if (root)
58  		atomic64_sub(n, &root->blocks_count);
59  }
60  
61  /**
62   * nilfs_get_block() - get a file block on the filesystem (callback function)
63   * @inode: inode struct of the target file
64   * @blkoff: file block number
65   * @bh_result: buffer head to be mapped on
66   * @create: indicate whether allocating the block or not when it has not
67   *      been allocated yet.
68   *
69   * This function does not issue actual read request of the specified data
70   * block. It is done by VFS.
71   */
nilfs_get_block(struct inode * inode,sector_t blkoff,struct buffer_head * bh_result,int create)72  int nilfs_get_block(struct inode *inode, sector_t blkoff,
73  		    struct buffer_head *bh_result, int create)
74  {
75  	struct nilfs_inode_info *ii = NILFS_I(inode);
76  	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77  	__u64 blknum = 0;
78  	int err = 0, ret;
79  	unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits;
80  
81  	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
82  	ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
83  	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
84  	if (ret >= 0) {	/* found */
85  		map_bh(bh_result, inode->i_sb, blknum);
86  		if (ret > 0)
87  			bh_result->b_size = (ret << inode->i_blkbits);
88  		goto out;
89  	}
90  	/* data block was not found */
91  	if (ret == -ENOENT && create) {
92  		struct nilfs_transaction_info ti;
93  
94  		bh_result->b_blocknr = 0;
95  		err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
96  		if (unlikely(err))
97  			goto out;
98  		err = nilfs_bmap_insert(ii->i_bmap, blkoff,
99  					(unsigned long)bh_result);
100  		if (unlikely(err != 0)) {
101  			if (err == -EEXIST) {
102  				/*
103  				 * The get_block() function could be called
104  				 * from multiple callers for an inode.
105  				 * However, the page having this block must
106  				 * be locked in this case.
107  				 */
108  				nilfs_warn(inode->i_sb,
109  					   "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
110  					   __func__, inode->i_ino,
111  					   (unsigned long long)blkoff);
112  				err = -EAGAIN;
113  			}
114  			nilfs_transaction_abort(inode->i_sb);
115  			goto out;
116  		}
117  		nilfs_mark_inode_dirty_sync(inode);
118  		nilfs_transaction_commit(inode->i_sb); /* never fails */
119  		/* Error handling should be detailed */
120  		set_buffer_new(bh_result);
121  		set_buffer_delay(bh_result);
122  		map_bh(bh_result, inode->i_sb, 0);
123  		/* Disk block number must be changed to proper value */
124  
125  	} else if (ret == -ENOENT) {
126  		/*
127  		 * not found is not error (e.g. hole); must return without
128  		 * the mapped state flag.
129  		 */
130  		;
131  	} else {
132  		err = ret;
133  	}
134  
135   out:
136  	return err;
137  }
138  
139  /**
140   * nilfs_read_folio() - implement read_folio() method of nilfs_aops {}
141   * address_space_operations.
142   * @file: file struct of the file to be read
143   * @folio: the folio to be read
144   */
nilfs_read_folio(struct file * file,struct folio * folio)145  static int nilfs_read_folio(struct file *file, struct folio *folio)
146  {
147  	return mpage_read_folio(folio, nilfs_get_block);
148  }
149  
nilfs_readahead(struct readahead_control * rac)150  static void nilfs_readahead(struct readahead_control *rac)
151  {
152  	mpage_readahead(rac, nilfs_get_block);
153  }
154  
nilfs_writepages(struct address_space * mapping,struct writeback_control * wbc)155  static int nilfs_writepages(struct address_space *mapping,
156  			    struct writeback_control *wbc)
157  {
158  	struct inode *inode = mapping->host;
159  	int err = 0;
160  
161  	if (sb_rdonly(inode->i_sb)) {
162  		nilfs_clear_dirty_pages(mapping);
163  		return -EROFS;
164  	}
165  
166  	if (wbc->sync_mode == WB_SYNC_ALL)
167  		err = nilfs_construct_dsync_segment(inode->i_sb, inode,
168  						    wbc->range_start,
169  						    wbc->range_end);
170  	return err;
171  }
172  
nilfs_writepage(struct page * page,struct writeback_control * wbc)173  static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
174  {
175  	struct folio *folio = page_folio(page);
176  	struct inode *inode = folio->mapping->host;
177  	int err;
178  
179  	if (sb_rdonly(inode->i_sb)) {
180  		/*
181  		 * It means that filesystem was remounted in read-only
182  		 * mode because of error or metadata corruption. But we
183  		 * have dirty pages that try to be flushed in background.
184  		 * So, here we simply discard this dirty page.
185  		 */
186  		nilfs_clear_folio_dirty(folio);
187  		folio_unlock(folio);
188  		return -EROFS;
189  	}
190  
191  	folio_redirty_for_writepage(wbc, folio);
192  	folio_unlock(folio);
193  
194  	if (wbc->sync_mode == WB_SYNC_ALL) {
195  		err = nilfs_construct_segment(inode->i_sb);
196  		if (unlikely(err))
197  			return err;
198  	} else if (wbc->for_reclaim)
199  		nilfs_flush_segment(inode->i_sb, inode->i_ino);
200  
201  	return 0;
202  }
203  
nilfs_dirty_folio(struct address_space * mapping,struct folio * folio)204  static bool nilfs_dirty_folio(struct address_space *mapping,
205  		struct folio *folio)
206  {
207  	struct inode *inode = mapping->host;
208  	struct buffer_head *head;
209  	unsigned int nr_dirty = 0;
210  	bool ret = filemap_dirty_folio(mapping, folio);
211  
212  	/*
213  	 * The page may not be locked, eg if called from try_to_unmap_one()
214  	 */
215  	spin_lock(&mapping->i_private_lock);
216  	head = folio_buffers(folio);
217  	if (head) {
218  		struct buffer_head *bh = head;
219  
220  		do {
221  			/* Do not mark hole blocks dirty */
222  			if (buffer_dirty(bh) || !buffer_mapped(bh))
223  				continue;
224  
225  			set_buffer_dirty(bh);
226  			nr_dirty++;
227  		} while (bh = bh->b_this_page, bh != head);
228  	} else if (ret) {
229  		nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
230  	}
231  	spin_unlock(&mapping->i_private_lock);
232  
233  	if (nr_dirty)
234  		nilfs_set_file_dirty(inode, nr_dirty);
235  	return ret;
236  }
237  
nilfs_write_failed(struct address_space * mapping,loff_t to)238  void nilfs_write_failed(struct address_space *mapping, loff_t to)
239  {
240  	struct inode *inode = mapping->host;
241  
242  	if (to > inode->i_size) {
243  		truncate_pagecache(inode, inode->i_size);
244  		nilfs_truncate(inode);
245  	}
246  }
247  
nilfs_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,struct folio ** foliop,void ** fsdata)248  static int nilfs_write_begin(struct file *file, struct address_space *mapping,
249  			     loff_t pos, unsigned len,
250  			     struct folio **foliop, void **fsdata)
251  
252  {
253  	struct inode *inode = mapping->host;
254  	int err = nilfs_transaction_begin(inode->i_sb, NULL, 1);
255  
256  	if (unlikely(err))
257  		return err;
258  
259  	err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block);
260  	if (unlikely(err)) {
261  		nilfs_write_failed(mapping, pos + len);
262  		nilfs_transaction_abort(inode->i_sb);
263  	}
264  	return err;
265  }
266  
nilfs_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct folio * folio,void * fsdata)267  static int nilfs_write_end(struct file *file, struct address_space *mapping,
268  			   loff_t pos, unsigned len, unsigned copied,
269  			   struct folio *folio, void *fsdata)
270  {
271  	struct inode *inode = mapping->host;
272  	unsigned int start = pos & (PAGE_SIZE - 1);
273  	unsigned int nr_dirty;
274  	int err;
275  
276  	nr_dirty = nilfs_page_count_clean_buffers(&folio->page, start,
277  						  start + copied);
278  	copied = generic_write_end(file, mapping, pos, len, copied, folio,
279  				   fsdata);
280  	nilfs_set_file_dirty(inode, nr_dirty);
281  	err = nilfs_transaction_commit(inode->i_sb);
282  	return err ? : copied;
283  }
284  
285  static ssize_t
nilfs_direct_IO(struct kiocb * iocb,struct iov_iter * iter)286  nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
287  {
288  	struct inode *inode = file_inode(iocb->ki_filp);
289  
290  	if (iov_iter_rw(iter) == WRITE)
291  		return 0;
292  
293  	/* Needs synchronization with the cleaner */
294  	return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block);
295  }
296  
297  const struct address_space_operations nilfs_aops = {
298  	.writepage		= nilfs_writepage,
299  	.read_folio		= nilfs_read_folio,
300  	.writepages		= nilfs_writepages,
301  	.dirty_folio		= nilfs_dirty_folio,
302  	.readahead		= nilfs_readahead,
303  	.write_begin		= nilfs_write_begin,
304  	.write_end		= nilfs_write_end,
305  	.invalidate_folio	= block_invalidate_folio,
306  	.direct_IO		= nilfs_direct_IO,
307  	.is_partially_uptodate  = block_is_partially_uptodate,
308  };
309  
nilfs_insert_inode_locked(struct inode * inode,struct nilfs_root * root,unsigned long ino)310  static int nilfs_insert_inode_locked(struct inode *inode,
311  				     struct nilfs_root *root,
312  				     unsigned long ino)
313  {
314  	struct nilfs_iget_args args = {
315  		.ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
316  	};
317  
318  	return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
319  }
320  
nilfs_new_inode(struct inode * dir,umode_t mode)321  struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
322  {
323  	struct super_block *sb = dir->i_sb;
324  	struct inode *inode;
325  	struct nilfs_inode_info *ii;
326  	struct nilfs_root *root;
327  	struct buffer_head *bh;
328  	int err = -ENOMEM;
329  	ino_t ino;
330  
331  	inode = new_inode(sb);
332  	if (unlikely(!inode))
333  		goto failed;
334  
335  	mapping_set_gfp_mask(inode->i_mapping,
336  			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
337  
338  	root = NILFS_I(dir)->i_root;
339  	ii = NILFS_I(inode);
340  	ii->i_state = BIT(NILFS_I_NEW);
341  	ii->i_type = NILFS_I_TYPE_NORMAL;
342  	ii->i_root = root;
343  
344  	err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
345  	if (unlikely(err))
346  		goto failed_ifile_create_inode;
347  	/* reference count of i_bh inherits from nilfs_mdt_read_block() */
348  	ii->i_bh = bh;
349  
350  	atomic64_inc(&root->inodes_count);
351  	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
352  	inode->i_ino = ino;
353  	simple_inode_init_ts(inode);
354  
355  	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
356  		err = nilfs_bmap_read(ii->i_bmap, NULL);
357  		if (err < 0)
358  			goto failed_after_creation;
359  
360  		set_bit(NILFS_I_BMAP, &ii->i_state);
361  		/* No lock is needed; iget() ensures it. */
362  	}
363  
364  	ii->i_flags = nilfs_mask_flags(
365  		mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
366  
367  	/* ii->i_file_acl = 0; */
368  	/* ii->i_dir_acl = 0; */
369  	ii->i_dir_start_lookup = 0;
370  	nilfs_set_inode_flags(inode);
371  	inode->i_generation = get_random_u32();
372  	if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
373  		err = -EIO;
374  		goto failed_after_creation;
375  	}
376  
377  	err = nilfs_init_acl(inode, dir);
378  	if (unlikely(err))
379  		/*
380  		 * Never occur.  When supporting nilfs_init_acl(),
381  		 * proper cancellation of above jobs should be considered.
382  		 */
383  		goto failed_after_creation;
384  
385  	return inode;
386  
387   failed_after_creation:
388  	clear_nlink(inode);
389  	if (inode->i_state & I_NEW)
390  		unlock_new_inode(inode);
391  	iput(inode);  /*
392  		       * raw_inode will be deleted through
393  		       * nilfs_evict_inode().
394  		       */
395  	goto failed;
396  
397   failed_ifile_create_inode:
398  	make_bad_inode(inode);
399  	iput(inode);
400   failed:
401  	return ERR_PTR(err);
402  }
403  
nilfs_set_inode_flags(struct inode * inode)404  void nilfs_set_inode_flags(struct inode *inode)
405  {
406  	unsigned int flags = NILFS_I(inode)->i_flags;
407  	unsigned int new_fl = 0;
408  
409  	if (flags & FS_SYNC_FL)
410  		new_fl |= S_SYNC;
411  	if (flags & FS_APPEND_FL)
412  		new_fl |= S_APPEND;
413  	if (flags & FS_IMMUTABLE_FL)
414  		new_fl |= S_IMMUTABLE;
415  	if (flags & FS_NOATIME_FL)
416  		new_fl |= S_NOATIME;
417  	if (flags & FS_DIRSYNC_FL)
418  		new_fl |= S_DIRSYNC;
419  	inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
420  			S_NOATIME | S_DIRSYNC);
421  }
422  
nilfs_read_inode_common(struct inode * inode,struct nilfs_inode * raw_inode)423  int nilfs_read_inode_common(struct inode *inode,
424  			    struct nilfs_inode *raw_inode)
425  {
426  	struct nilfs_inode_info *ii = NILFS_I(inode);
427  	int err;
428  
429  	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
430  	i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
431  	i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
432  	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
433  	inode->i_size = le64_to_cpu(raw_inode->i_size);
434  	inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime),
435  			le32_to_cpu(raw_inode->i_mtime_nsec));
436  	inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
437  			le32_to_cpu(raw_inode->i_ctime_nsec));
438  	inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime),
439  			le32_to_cpu(raw_inode->i_mtime_nsec));
440  	if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
441  		return -EIO; /* this inode is for metadata and corrupted */
442  	if (inode->i_nlink == 0)
443  		return -ESTALE; /* this inode is deleted */
444  
445  	inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
446  	ii->i_flags = le32_to_cpu(raw_inode->i_flags);
447  #if 0
448  	ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
449  	ii->i_dir_acl = S_ISREG(inode->i_mode) ?
450  		0 : le32_to_cpu(raw_inode->i_dir_acl);
451  #endif
452  	ii->i_dir_start_lookup = 0;
453  	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
454  
455  	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
456  	    S_ISLNK(inode->i_mode)) {
457  		err = nilfs_bmap_read(ii->i_bmap, raw_inode);
458  		if (err < 0)
459  			return err;
460  		set_bit(NILFS_I_BMAP, &ii->i_state);
461  		/* No lock is needed; iget() ensures it. */
462  	}
463  	return 0;
464  }
465  
__nilfs_read_inode(struct super_block * sb,struct nilfs_root * root,unsigned long ino,struct inode * inode)466  static int __nilfs_read_inode(struct super_block *sb,
467  			      struct nilfs_root *root, unsigned long ino,
468  			      struct inode *inode)
469  {
470  	struct the_nilfs *nilfs = sb->s_fs_info;
471  	struct buffer_head *bh;
472  	struct nilfs_inode *raw_inode;
473  	int err;
474  
475  	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
476  	err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
477  	if (unlikely(err))
478  		goto bad_inode;
479  
480  	raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh);
481  
482  	err = nilfs_read_inode_common(inode, raw_inode);
483  	if (err)
484  		goto failed_unmap;
485  
486  	if (S_ISREG(inode->i_mode)) {
487  		inode->i_op = &nilfs_file_inode_operations;
488  		inode->i_fop = &nilfs_file_operations;
489  		inode->i_mapping->a_ops = &nilfs_aops;
490  	} else if (S_ISDIR(inode->i_mode)) {
491  		inode->i_op = &nilfs_dir_inode_operations;
492  		inode->i_fop = &nilfs_dir_operations;
493  		inode->i_mapping->a_ops = &nilfs_aops;
494  	} else if (S_ISLNK(inode->i_mode)) {
495  		inode->i_op = &nilfs_symlink_inode_operations;
496  		inode_nohighmem(inode);
497  		inode->i_mapping->a_ops = &nilfs_aops;
498  	} else {
499  		inode->i_op = &nilfs_special_inode_operations;
500  		init_special_inode(
501  			inode, inode->i_mode,
502  			huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
503  	}
504  	nilfs_ifile_unmap_inode(raw_inode);
505  	brelse(bh);
506  	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
507  	nilfs_set_inode_flags(inode);
508  	mapping_set_gfp_mask(inode->i_mapping,
509  			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
510  	return 0;
511  
512   failed_unmap:
513  	nilfs_ifile_unmap_inode(raw_inode);
514  	brelse(bh);
515  
516   bad_inode:
517  	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
518  	return err;
519  }
520  
nilfs_iget_test(struct inode * inode,void * opaque)521  static int nilfs_iget_test(struct inode *inode, void *opaque)
522  {
523  	struct nilfs_iget_args *args = opaque;
524  	struct nilfs_inode_info *ii;
525  
526  	if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root)
527  		return 0;
528  
529  	ii = NILFS_I(inode);
530  	if (ii->i_type != args->type)
531  		return 0;
532  
533  	return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno;
534  }
535  
nilfs_iget_set(struct inode * inode,void * opaque)536  static int nilfs_iget_set(struct inode *inode, void *opaque)
537  {
538  	struct nilfs_iget_args *args = opaque;
539  
540  	inode->i_ino = args->ino;
541  	NILFS_I(inode)->i_cno = args->cno;
542  	NILFS_I(inode)->i_root = args->root;
543  	NILFS_I(inode)->i_type = args->type;
544  	if (args->root && args->ino == NILFS_ROOT_INO)
545  		nilfs_get_root(args->root);
546  	return 0;
547  }
548  
nilfs_ilookup(struct super_block * sb,struct nilfs_root * root,unsigned long ino)549  struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
550  			    unsigned long ino)
551  {
552  	struct nilfs_iget_args args = {
553  		.ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
554  	};
555  
556  	return ilookup5(sb, ino, nilfs_iget_test, &args);
557  }
558  
nilfs_iget_locked(struct super_block * sb,struct nilfs_root * root,unsigned long ino)559  struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
560  				unsigned long ino)
561  {
562  	struct nilfs_iget_args args = {
563  		.ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
564  	};
565  
566  	return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
567  }
568  
nilfs_iget(struct super_block * sb,struct nilfs_root * root,unsigned long ino)569  struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
570  			 unsigned long ino)
571  {
572  	struct inode *inode;
573  	int err;
574  
575  	inode = nilfs_iget_locked(sb, root, ino);
576  	if (unlikely(!inode))
577  		return ERR_PTR(-ENOMEM);
578  	if (!(inode->i_state & I_NEW))
579  		return inode;
580  
581  	err = __nilfs_read_inode(sb, root, ino, inode);
582  	if (unlikely(err)) {
583  		iget_failed(inode);
584  		return ERR_PTR(err);
585  	}
586  	unlock_new_inode(inode);
587  	return inode;
588  }
589  
nilfs_iget_for_gc(struct super_block * sb,unsigned long ino,__u64 cno)590  struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
591  				__u64 cno)
592  {
593  	struct nilfs_iget_args args = {
594  		.ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC
595  	};
596  	struct inode *inode;
597  	int err;
598  
599  	inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
600  	if (unlikely(!inode))
601  		return ERR_PTR(-ENOMEM);
602  	if (!(inode->i_state & I_NEW))
603  		return inode;
604  
605  	err = nilfs_init_gcinode(inode);
606  	if (unlikely(err)) {
607  		iget_failed(inode);
608  		return ERR_PTR(err);
609  	}
610  	unlock_new_inode(inode);
611  	return inode;
612  }
613  
614  /**
615   * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
616   * @inode: inode object
617   *
618   * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
619   * or does nothing if the inode already has it.  This function allocates
620   * an additional inode to maintain page cache of B-tree nodes one-on-one.
621   *
622   * Return Value: On success, 0 is returned. On errors, one of the following
623   * negative error code is returned.
624   *
625   * %-ENOMEM - Insufficient memory available.
626   */
nilfs_attach_btree_node_cache(struct inode * inode)627  int nilfs_attach_btree_node_cache(struct inode *inode)
628  {
629  	struct nilfs_inode_info *ii = NILFS_I(inode);
630  	struct inode *btnc_inode;
631  	struct nilfs_iget_args args;
632  
633  	if (ii->i_assoc_inode)
634  		return 0;
635  
636  	args.ino = inode->i_ino;
637  	args.root = ii->i_root;
638  	args.cno = ii->i_cno;
639  	args.type = ii->i_type | NILFS_I_TYPE_BTNC;
640  
641  	btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
642  				  nilfs_iget_set, &args);
643  	if (unlikely(!btnc_inode))
644  		return -ENOMEM;
645  	if (btnc_inode->i_state & I_NEW) {
646  		nilfs_init_btnc_inode(btnc_inode);
647  		unlock_new_inode(btnc_inode);
648  	}
649  	NILFS_I(btnc_inode)->i_assoc_inode = inode;
650  	NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
651  	ii->i_assoc_inode = btnc_inode;
652  
653  	return 0;
654  }
655  
656  /**
657   * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
658   * @inode: inode object
659   *
660   * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
661   * holder inode bound to @inode, or does nothing if @inode doesn't have it.
662   */
nilfs_detach_btree_node_cache(struct inode * inode)663  void nilfs_detach_btree_node_cache(struct inode *inode)
664  {
665  	struct nilfs_inode_info *ii = NILFS_I(inode);
666  	struct inode *btnc_inode = ii->i_assoc_inode;
667  
668  	if (btnc_inode) {
669  		NILFS_I(btnc_inode)->i_assoc_inode = NULL;
670  		ii->i_assoc_inode = NULL;
671  		iput(btnc_inode);
672  	}
673  }
674  
675  /**
676   * nilfs_iget_for_shadow - obtain inode for shadow mapping
677   * @inode: inode object that uses shadow mapping
678   *
679   * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
680   * caches for shadow mapping.  The page cache for data pages is set up
681   * in one inode and the one for b-tree node pages is set up in the
682   * other inode, which is attached to the former inode.
683   *
684   * Return Value: On success, a pointer to the inode for data pages is
685   * returned. On errors, one of the following negative error code is returned
686   * in a pointer type.
687   *
688   * %-ENOMEM - Insufficient memory available.
689   */
nilfs_iget_for_shadow(struct inode * inode)690  struct inode *nilfs_iget_for_shadow(struct inode *inode)
691  {
692  	struct nilfs_iget_args args = {
693  		.ino = inode->i_ino, .root = NULL, .cno = 0,
694  		.type = NILFS_I_TYPE_SHADOW
695  	};
696  	struct inode *s_inode;
697  	int err;
698  
699  	s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
700  			       nilfs_iget_set, &args);
701  	if (unlikely(!s_inode))
702  		return ERR_PTR(-ENOMEM);
703  	if (!(s_inode->i_state & I_NEW))
704  		return inode;
705  
706  	NILFS_I(s_inode)->i_flags = 0;
707  	memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
708  	mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
709  
710  	err = nilfs_attach_btree_node_cache(s_inode);
711  	if (unlikely(err)) {
712  		iget_failed(s_inode);
713  		return ERR_PTR(err);
714  	}
715  	unlock_new_inode(s_inode);
716  	return s_inode;
717  }
718  
719  /**
720   * nilfs_write_inode_common - export common inode information to on-disk inode
721   * @inode:     inode object
722   * @raw_inode: on-disk inode
723   *
724   * This function writes standard information from the on-memory inode @inode
725   * to @raw_inode on ifile, cpfile or a super root block.  Since inode bmap
726   * data is not exported, nilfs_bmap_write() must be called separately during
727   * log writing.
728   */
nilfs_write_inode_common(struct inode * inode,struct nilfs_inode * raw_inode)729  void nilfs_write_inode_common(struct inode *inode,
730  			      struct nilfs_inode *raw_inode)
731  {
732  	struct nilfs_inode_info *ii = NILFS_I(inode);
733  
734  	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
735  	raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
736  	raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
737  	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
738  	raw_inode->i_size = cpu_to_le64(inode->i_size);
739  	raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
740  	raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
741  	raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
742  	raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
743  	raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
744  
745  	raw_inode->i_flags = cpu_to_le32(ii->i_flags);
746  	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
747  
748  	/*
749  	 * When extending inode, nilfs->ns_inode_size should be checked
750  	 * for substitutions of appended fields.
751  	 */
752  }
753  
nilfs_update_inode(struct inode * inode,struct buffer_head * ibh,int flags)754  void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
755  {
756  	ino_t ino = inode->i_ino;
757  	struct nilfs_inode_info *ii = NILFS_I(inode);
758  	struct inode *ifile = ii->i_root->ifile;
759  	struct nilfs_inode *raw_inode;
760  
761  	raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh);
762  
763  	if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
764  		memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size);
765  	if (flags & I_DIRTY_DATASYNC)
766  		set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
767  
768  	nilfs_write_inode_common(inode, raw_inode);
769  
770  	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
771  		raw_inode->i_device_code =
772  			cpu_to_le64(huge_encode_dev(inode->i_rdev));
773  
774  	nilfs_ifile_unmap_inode(raw_inode);
775  }
776  
777  #define NILFS_MAX_TRUNCATE_BLOCKS	16384  /* 64MB for 4KB block */
778  
nilfs_truncate_bmap(struct nilfs_inode_info * ii,unsigned long from)779  static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
780  				unsigned long from)
781  {
782  	__u64 b;
783  	int ret;
784  
785  	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
786  		return;
787  repeat:
788  	ret = nilfs_bmap_last_key(ii->i_bmap, &b);
789  	if (ret == -ENOENT)
790  		return;
791  	else if (ret < 0)
792  		goto failed;
793  
794  	if (b < from)
795  		return;
796  
797  	b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
798  	ret = nilfs_bmap_truncate(ii->i_bmap, b);
799  	nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
800  	if (!ret || (ret == -ENOMEM &&
801  		     nilfs_bmap_truncate(ii->i_bmap, b) == 0))
802  		goto repeat;
803  
804  failed:
805  	nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
806  		   ret, ii->vfs_inode.i_ino);
807  }
808  
nilfs_truncate(struct inode * inode)809  void nilfs_truncate(struct inode *inode)
810  {
811  	unsigned long blkoff;
812  	unsigned int blocksize;
813  	struct nilfs_transaction_info ti;
814  	struct super_block *sb = inode->i_sb;
815  	struct nilfs_inode_info *ii = NILFS_I(inode);
816  
817  	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
818  		return;
819  	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
820  		return;
821  
822  	blocksize = sb->s_blocksize;
823  	blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits;
824  	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
825  
826  	block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block);
827  
828  	nilfs_truncate_bmap(ii, blkoff);
829  
830  	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
831  	if (IS_SYNC(inode))
832  		nilfs_set_transaction_flag(NILFS_TI_SYNC);
833  
834  	nilfs_mark_inode_dirty(inode);
835  	nilfs_set_file_dirty(inode, 0);
836  	nilfs_transaction_commit(sb);
837  	/*
838  	 * May construct a logical segment and may fail in sync mode.
839  	 * But truncate has no return value.
840  	 */
841  }
842  
nilfs_clear_inode(struct inode * inode)843  static void nilfs_clear_inode(struct inode *inode)
844  {
845  	struct nilfs_inode_info *ii = NILFS_I(inode);
846  
847  	/*
848  	 * Free resources allocated in nilfs_read_inode(), here.
849  	 */
850  	BUG_ON(!list_empty(&ii->i_dirty));
851  	brelse(ii->i_bh);
852  	ii->i_bh = NULL;
853  
854  	if (nilfs_is_metadata_file_inode(inode))
855  		nilfs_mdt_clear(inode);
856  
857  	if (test_bit(NILFS_I_BMAP, &ii->i_state))
858  		nilfs_bmap_clear(ii->i_bmap);
859  
860  	if (!(ii->i_type & NILFS_I_TYPE_BTNC))
861  		nilfs_detach_btree_node_cache(inode);
862  
863  	if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
864  		nilfs_put_root(ii->i_root);
865  }
866  
nilfs_evict_inode(struct inode * inode)867  void nilfs_evict_inode(struct inode *inode)
868  {
869  	struct nilfs_transaction_info ti;
870  	struct super_block *sb = inode->i_sb;
871  	struct nilfs_inode_info *ii = NILFS_I(inode);
872  	struct the_nilfs *nilfs;
873  	int ret;
874  
875  	if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
876  		truncate_inode_pages_final(&inode->i_data);
877  		clear_inode(inode);
878  		nilfs_clear_inode(inode);
879  		return;
880  	}
881  	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
882  
883  	truncate_inode_pages_final(&inode->i_data);
884  
885  	nilfs = sb->s_fs_info;
886  	if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
887  		/*
888  		 * If this inode is about to be disposed after the file system
889  		 * has been degraded to read-only due to file system corruption
890  		 * or after the writer has been detached, do not make any
891  		 * changes that cause writes, just clear it.
892  		 * Do this check after read-locking ns_segctor_sem by
893  		 * nilfs_transaction_begin() in order to avoid a race with
894  		 * the writer detach operation.
895  		 */
896  		clear_inode(inode);
897  		nilfs_clear_inode(inode);
898  		nilfs_transaction_abort(sb);
899  		return;
900  	}
901  
902  	/* TODO: some of the following operations may fail.  */
903  	nilfs_truncate_bmap(ii, 0);
904  	nilfs_mark_inode_dirty(inode);
905  	clear_inode(inode);
906  
907  	ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
908  	if (!ret)
909  		atomic64_dec(&ii->i_root->inodes_count);
910  
911  	nilfs_clear_inode(inode);
912  
913  	if (IS_SYNC(inode))
914  		nilfs_set_transaction_flag(NILFS_TI_SYNC);
915  	nilfs_transaction_commit(sb);
916  	/*
917  	 * May construct a logical segment and may fail in sync mode.
918  	 * But delete_inode has no return value.
919  	 */
920  }
921  
nilfs_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * iattr)922  int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
923  		  struct iattr *iattr)
924  {
925  	struct nilfs_transaction_info ti;
926  	struct inode *inode = d_inode(dentry);
927  	struct super_block *sb = inode->i_sb;
928  	int err;
929  
930  	err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
931  	if (err)
932  		return err;
933  
934  	err = nilfs_transaction_begin(sb, &ti, 0);
935  	if (unlikely(err))
936  		return err;
937  
938  	if ((iattr->ia_valid & ATTR_SIZE) &&
939  	    iattr->ia_size != i_size_read(inode)) {
940  		inode_dio_wait(inode);
941  		truncate_setsize(inode, iattr->ia_size);
942  		nilfs_truncate(inode);
943  	}
944  
945  	setattr_copy(&nop_mnt_idmap, inode, iattr);
946  	mark_inode_dirty(inode);
947  
948  	if (iattr->ia_valid & ATTR_MODE) {
949  		err = nilfs_acl_chmod(inode);
950  		if (unlikely(err))
951  			goto out_err;
952  	}
953  
954  	return nilfs_transaction_commit(sb);
955  
956  out_err:
957  	nilfs_transaction_abort(sb);
958  	return err;
959  }
960  
nilfs_permission(struct mnt_idmap * idmap,struct inode * inode,int mask)961  int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
962  		     int mask)
963  {
964  	struct nilfs_root *root = NILFS_I(inode)->i_root;
965  
966  	if ((mask & MAY_WRITE) && root &&
967  	    root->cno != NILFS_CPTREE_CURRENT_CNO)
968  		return -EROFS; /* snapshot is not writable */
969  
970  	return generic_permission(&nop_mnt_idmap, inode, mask);
971  }
972  
nilfs_load_inode_block(struct inode * inode,struct buffer_head ** pbh)973  int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
974  {
975  	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
976  	struct nilfs_inode_info *ii = NILFS_I(inode);
977  	int err;
978  
979  	spin_lock(&nilfs->ns_inode_lock);
980  	if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
981  		spin_unlock(&nilfs->ns_inode_lock);
982  		err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
983  						  inode->i_ino, pbh);
984  		if (unlikely(err))
985  			return err;
986  		spin_lock(&nilfs->ns_inode_lock);
987  		if (ii->i_bh == NULL)
988  			ii->i_bh = *pbh;
989  		else if (unlikely(!buffer_uptodate(ii->i_bh))) {
990  			__brelse(ii->i_bh);
991  			ii->i_bh = *pbh;
992  		} else {
993  			brelse(*pbh);
994  			*pbh = ii->i_bh;
995  		}
996  	} else
997  		*pbh = ii->i_bh;
998  
999  	get_bh(*pbh);
1000  	spin_unlock(&nilfs->ns_inode_lock);
1001  	return 0;
1002  }
1003  
nilfs_inode_dirty(struct inode * inode)1004  int nilfs_inode_dirty(struct inode *inode)
1005  {
1006  	struct nilfs_inode_info *ii = NILFS_I(inode);
1007  	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1008  	int ret = 0;
1009  
1010  	if (!list_empty(&ii->i_dirty)) {
1011  		spin_lock(&nilfs->ns_inode_lock);
1012  		ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
1013  			test_bit(NILFS_I_BUSY, &ii->i_state);
1014  		spin_unlock(&nilfs->ns_inode_lock);
1015  	}
1016  	return ret;
1017  }
1018  
nilfs_set_file_dirty(struct inode * inode,unsigned int nr_dirty)1019  int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
1020  {
1021  	struct nilfs_inode_info *ii = NILFS_I(inode);
1022  	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1023  
1024  	atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
1025  
1026  	if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
1027  		return 0;
1028  
1029  	spin_lock(&nilfs->ns_inode_lock);
1030  	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
1031  	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
1032  		/*
1033  		 * Because this routine may race with nilfs_dispose_list(),
1034  		 * we have to check NILFS_I_QUEUED here, too.
1035  		 */
1036  		if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
1037  			/*
1038  			 * This will happen when somebody is freeing
1039  			 * this inode.
1040  			 */
1041  			nilfs_warn(inode->i_sb,
1042  				   "cannot set file dirty (ino=%lu): the file is being freed",
1043  				   inode->i_ino);
1044  			spin_unlock(&nilfs->ns_inode_lock);
1045  			return -EINVAL; /*
1046  					 * NILFS_I_DIRTY may remain for
1047  					 * freeing inode.
1048  					 */
1049  		}
1050  		list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
1051  		set_bit(NILFS_I_QUEUED, &ii->i_state);
1052  	}
1053  	spin_unlock(&nilfs->ns_inode_lock);
1054  	return 0;
1055  }
1056  
__nilfs_mark_inode_dirty(struct inode * inode,int flags)1057  int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
1058  {
1059  	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1060  	struct buffer_head *ibh;
1061  	int err;
1062  
1063  	/*
1064  	 * Do not dirty inodes after the log writer has been detached
1065  	 * and its nilfs_root struct has been freed.
1066  	 */
1067  	if (unlikely(nilfs_purging(nilfs)))
1068  		return 0;
1069  
1070  	err = nilfs_load_inode_block(inode, &ibh);
1071  	if (unlikely(err)) {
1072  		nilfs_warn(inode->i_sb,
1073  			   "cannot mark inode dirty (ino=%lu): error %d loading inode block",
1074  			   inode->i_ino, err);
1075  		return err;
1076  	}
1077  	nilfs_update_inode(inode, ibh, flags);
1078  	mark_buffer_dirty(ibh);
1079  	nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
1080  	brelse(ibh);
1081  	return 0;
1082  }
1083  
1084  /**
1085   * nilfs_dirty_inode - reflect changes on given inode to an inode block.
1086   * @inode: inode of the file to be registered.
1087   * @flags: flags to determine the dirty state of the inode
1088   *
1089   * nilfs_dirty_inode() loads a inode block containing the specified
1090   * @inode and copies data from a nilfs_inode to a corresponding inode
1091   * entry in the inode block. This operation is excluded from the segment
1092   * construction. This function can be called both as a single operation
1093   * and as a part of indivisible file operations.
1094   */
nilfs_dirty_inode(struct inode * inode,int flags)1095  void nilfs_dirty_inode(struct inode *inode, int flags)
1096  {
1097  	struct nilfs_transaction_info ti;
1098  	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
1099  
1100  	if (is_bad_inode(inode)) {
1101  		nilfs_warn(inode->i_sb,
1102  			   "tried to mark bad_inode dirty. ignored.");
1103  		dump_stack();
1104  		return;
1105  	}
1106  	if (mdi) {
1107  		nilfs_mdt_mark_dirty(inode);
1108  		return;
1109  	}
1110  	nilfs_transaction_begin(inode->i_sb, &ti, 0);
1111  	__nilfs_mark_inode_dirty(inode, flags);
1112  	nilfs_transaction_commit(inode->i_sb); /* never fails */
1113  }
1114  
nilfs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,__u64 start,__u64 len)1115  int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1116  		 __u64 start, __u64 len)
1117  {
1118  	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1119  	__u64 logical = 0, phys = 0, size = 0;
1120  	__u32 flags = 0;
1121  	loff_t isize;
1122  	sector_t blkoff, end_blkoff;
1123  	sector_t delalloc_blkoff;
1124  	unsigned long delalloc_blklen;
1125  	unsigned int blkbits = inode->i_blkbits;
1126  	int ret, n;
1127  
1128  	ret = fiemap_prep(inode, fieinfo, start, &len, 0);
1129  	if (ret)
1130  		return ret;
1131  
1132  	inode_lock(inode);
1133  
1134  	isize = i_size_read(inode);
1135  
1136  	blkoff = start >> blkbits;
1137  	end_blkoff = (start + len - 1) >> blkbits;
1138  
1139  	delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
1140  							&delalloc_blkoff);
1141  
1142  	do {
1143  		__u64 blkphy;
1144  		unsigned int maxblocks;
1145  
1146  		if (delalloc_blklen && blkoff == delalloc_blkoff) {
1147  			if (size) {
1148  				/* End of the current extent */
1149  				ret = fiemap_fill_next_extent(
1150  					fieinfo, logical, phys, size, flags);
1151  				if (ret)
1152  					break;
1153  			}
1154  			if (blkoff > end_blkoff)
1155  				break;
1156  
1157  			flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
1158  			logical = blkoff << blkbits;
1159  			phys = 0;
1160  			size = delalloc_blklen << blkbits;
1161  
1162  			blkoff = delalloc_blkoff + delalloc_blklen;
1163  			delalloc_blklen = nilfs_find_uncommitted_extent(
1164  				inode, blkoff, &delalloc_blkoff);
1165  			continue;
1166  		}
1167  
1168  		/*
1169  		 * Limit the number of blocks that we look up so as
1170  		 * not to get into the next delayed allocation extent.
1171  		 */
1172  		maxblocks = INT_MAX;
1173  		if (delalloc_blklen)
1174  			maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
1175  					  maxblocks);
1176  		blkphy = 0;
1177  
1178  		down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1179  		n = nilfs_bmap_lookup_contig(
1180  			NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
1181  		up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1182  
1183  		if (n < 0) {
1184  			int past_eof;
1185  
1186  			if (unlikely(n != -ENOENT))
1187  				break; /* error */
1188  
1189  			/* HOLE */
1190  			blkoff++;
1191  			past_eof = ((blkoff << blkbits) >= isize);
1192  
1193  			if (size) {
1194  				/* End of the current extent */
1195  
1196  				if (past_eof)
1197  					flags |= FIEMAP_EXTENT_LAST;
1198  
1199  				ret = fiemap_fill_next_extent(
1200  					fieinfo, logical, phys, size, flags);
1201  				if (ret)
1202  					break;
1203  				size = 0;
1204  			}
1205  			if (blkoff > end_blkoff || past_eof)
1206  				break;
1207  		} else {
1208  			if (size) {
1209  				if (phys && blkphy << blkbits == phys + size) {
1210  					/* The current extent goes on */
1211  					size += n << blkbits;
1212  				} else {
1213  					/* Terminate the current extent */
1214  					ret = fiemap_fill_next_extent(
1215  						fieinfo, logical, phys, size,
1216  						flags);
1217  					if (ret || blkoff > end_blkoff)
1218  						break;
1219  
1220  					/* Start another extent */
1221  					flags = FIEMAP_EXTENT_MERGED;
1222  					logical = blkoff << blkbits;
1223  					phys = blkphy << blkbits;
1224  					size = n << blkbits;
1225  				}
1226  			} else {
1227  				/* Start a new extent */
1228  				flags = FIEMAP_EXTENT_MERGED;
1229  				logical = blkoff << blkbits;
1230  				phys = blkphy << blkbits;
1231  				size = n << blkbits;
1232  			}
1233  			blkoff += n;
1234  		}
1235  		cond_resched();
1236  	} while (true);
1237  
1238  	/* If ret is 1 then we just hit the end of the extent array */
1239  	if (ret == 1)
1240  		ret = 0;
1241  
1242  	inode_unlock(inode);
1243  	return ret;
1244  }
1245