1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ext2/file.c
4  *
5  * Copyright (C) 1992, 1993, 1994, 1995
6  * Remy Card (card@masi.ibp.fr)
7  * Laboratoire MASI - Institut Blaise Pascal
8  * Universite Pierre et Marie Curie (Paris VI)
9  *
10  *  from
11  *
12  *  linux/fs/minix/file.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  ext2 fs regular file handling primitives
17  *
18  *  64-bit file support on 64-bit platforms by Jakub Jelinek
19  * 	(jj@sunsite.ms.mff.cuni.cz)
20  */
21 
22 #include <linux/time.h>
23 #include <linux/pagemap.h>
24 #include <linux/dax.h>
25 #include <linux/quotaops.h>
26 #include <linux/iomap.h>
27 #include <linux/uio.h>
28 #include <linux/buffer_head.h>
29 #include "ext2.h"
30 #include "xattr.h"
31 #include "acl.h"
32 #include "trace.h"
33 
34 #ifdef CONFIG_FS_DAX
ext2_dax_read_iter(struct kiocb * iocb,struct iov_iter * to)35 static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
36 {
37 	struct inode *inode = iocb->ki_filp->f_mapping->host;
38 	ssize_t ret;
39 
40 	if (!iov_iter_count(to))
41 		return 0; /* skip atime */
42 
43 	inode_lock_shared(inode);
44 	ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
45 	inode_unlock_shared(inode);
46 
47 	file_accessed(iocb->ki_filp);
48 	return ret;
49 }
50 
ext2_dax_write_iter(struct kiocb * iocb,struct iov_iter * from)51 static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
52 {
53 	struct file *file = iocb->ki_filp;
54 	struct inode *inode = file->f_mapping->host;
55 	ssize_t ret;
56 
57 	inode_lock(inode);
58 	ret = generic_write_checks(iocb, from);
59 	if (ret <= 0)
60 		goto out_unlock;
61 	ret = file_remove_privs(file);
62 	if (ret)
63 		goto out_unlock;
64 	ret = file_update_time(file);
65 	if (ret)
66 		goto out_unlock;
67 
68 	ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
69 	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
70 		i_size_write(inode, iocb->ki_pos);
71 		mark_inode_dirty(inode);
72 	}
73 
74 out_unlock:
75 	inode_unlock(inode);
76 	if (ret > 0)
77 		ret = generic_write_sync(iocb, ret);
78 	return ret;
79 }
80 
81 /*
82  * The lock ordering for ext2 DAX fault paths is:
83  *
84  * mmap_lock (MM)
85  *   sb_start_pagefault (vfs, freeze)
86  *     address_space->invalidate_lock
87  *       address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
88  *         ext2_inode_info->truncate_mutex
89  *
90  * The default page_lock and i_size verification done by non-DAX fault paths
91  * is sufficient because ext2 doesn't support hole punching.
92  */
ext2_dax_fault(struct vm_fault * vmf)93 static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
94 {
95 	struct inode *inode = file_inode(vmf->vma->vm_file);
96 	vm_fault_t ret;
97 	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
98 		(vmf->vma->vm_flags & VM_SHARED);
99 
100 	if (write) {
101 		sb_start_pagefault(inode->i_sb);
102 		file_update_time(vmf->vma->vm_file);
103 	}
104 	filemap_invalidate_lock_shared(inode->i_mapping);
105 
106 	ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops);
107 
108 	filemap_invalidate_unlock_shared(inode->i_mapping);
109 	if (write)
110 		sb_end_pagefault(inode->i_sb);
111 	return ret;
112 }
113 
114 static const struct vm_operations_struct ext2_dax_vm_ops = {
115 	.fault		= ext2_dax_fault,
116 	/*
117 	 * .huge_fault is not supported for DAX because allocation in ext2
118 	 * cannot be reliably aligned to huge page sizes and so pmd faults
119 	 * will always fail and fail back to regular faults.
120 	 */
121 	.page_mkwrite	= ext2_dax_fault,
122 	.pfn_mkwrite	= ext2_dax_fault,
123 };
124 
ext2_file_mmap(struct file * file,struct vm_area_struct * vma)125 static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
126 {
127 	if (!IS_DAX(file_inode(file)))
128 		return generic_file_mmap(file, vma);
129 
130 	file_accessed(file);
131 	vma->vm_ops = &ext2_dax_vm_ops;
132 	return 0;
133 }
134 #else
135 #define ext2_file_mmap	generic_file_mmap
136 #endif
137 
138 /*
139  * Called when filp is released. This happens when all file descriptors
140  * for a single struct file are closed. Note that different open() calls
141  * for the same file yield different struct file structures.
142  */
ext2_release_file(struct inode * inode,struct file * filp)143 static int ext2_release_file (struct inode * inode, struct file * filp)
144 {
145 	if (filp->f_mode & FMODE_WRITE) {
146 		mutex_lock(&EXT2_I(inode)->truncate_mutex);
147 		ext2_discard_reservation(inode);
148 		mutex_unlock(&EXT2_I(inode)->truncate_mutex);
149 	}
150 	return 0;
151 }
152 
ext2_fsync(struct file * file,loff_t start,loff_t end,int datasync)153 int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
154 {
155 	int ret;
156 	struct super_block *sb = file->f_mapping->host->i_sb;
157 
158 	ret = generic_buffers_fsync(file, start, end, datasync);
159 	if (ret == -EIO)
160 		/* We don't really know where the IO error happened... */
161 		ext2_error(sb, __func__,
162 			   "detected IO error when writing metadata buffers");
163 	return ret;
164 }
165 
ext2_dio_read_iter(struct kiocb * iocb,struct iov_iter * to)166 static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
167 {
168 	struct file *file = iocb->ki_filp;
169 	struct inode *inode = file->f_mapping->host;
170 	ssize_t ret;
171 
172 	trace_ext2_dio_read_begin(iocb, to, 0);
173 	inode_lock_shared(inode);
174 	ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
175 	inode_unlock_shared(inode);
176 	trace_ext2_dio_read_end(iocb, to, ret);
177 
178 	return ret;
179 }
180 
ext2_dio_write_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)181 static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
182 				 int error, unsigned int flags)
183 {
184 	loff_t pos = iocb->ki_pos;
185 	struct inode *inode = file_inode(iocb->ki_filp);
186 
187 	if (error)
188 		goto out;
189 
190 	/*
191 	 * If we are extending the file, we have to update i_size here before
192 	 * page cache gets invalidated in iomap_dio_rw(). This prevents racing
193 	 * buffered reads from zeroing out too much from page cache pages.
194 	 * Note that all extending writes always happens synchronously with
195 	 * inode lock held by ext2_dio_write_iter(). So it is safe to update
196 	 * inode size here for extending file writes.
197 	 */
198 	pos += size;
199 	if (pos > i_size_read(inode)) {
200 		i_size_write(inode, pos);
201 		mark_inode_dirty(inode);
202 	}
203 out:
204 	trace_ext2_dio_write_endio(iocb, size, error);
205 	return error;
206 }
207 
208 static const struct iomap_dio_ops ext2_dio_write_ops = {
209 	.end_io = ext2_dio_write_end_io,
210 };
211 
ext2_dio_write_iter(struct kiocb * iocb,struct iov_iter * from)212 static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
213 {
214 	struct file *file = iocb->ki_filp;
215 	struct inode *inode = file->f_mapping->host;
216 	ssize_t ret;
217 	unsigned int flags = 0;
218 	unsigned long blocksize = inode->i_sb->s_blocksize;
219 	loff_t offset = iocb->ki_pos;
220 	loff_t count = iov_iter_count(from);
221 	ssize_t status = 0;
222 
223 	trace_ext2_dio_write_begin(iocb, from, 0);
224 	inode_lock(inode);
225 	ret = generic_write_checks(iocb, from);
226 	if (ret <= 0)
227 		goto out_unlock;
228 
229 	ret = kiocb_modified(iocb);
230 	if (ret)
231 		goto out_unlock;
232 
233 	/* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
234 	if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
235 	   (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
236 		flags |= IOMAP_DIO_FORCE_WAIT;
237 
238 	ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
239 			   flags, NULL, 0);
240 
241 	/* ENOTBLK is magic return value for fallback to buffered-io */
242 	if (ret == -ENOTBLK)
243 		ret = 0;
244 
245 	if (ret < 0 && ret != -EIOCBQUEUED)
246 		ext2_write_failed(inode->i_mapping, offset + count);
247 
248 	/* handle case for partial write and for fallback to buffered write */
249 	if (ret >= 0 && iov_iter_count(from)) {
250 		loff_t pos, endbyte;
251 		int ret2;
252 
253 		iocb->ki_flags &= ~IOCB_DIRECT;
254 		pos = iocb->ki_pos;
255 		status = generic_perform_write(iocb, from);
256 		if (unlikely(status < 0)) {
257 			ret = status;
258 			goto out_unlock;
259 		}
260 
261 		ret += status;
262 		endbyte = pos + status - 1;
263 		ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
264 						    endbyte);
265 		if (!ret2)
266 			invalidate_mapping_pages(inode->i_mapping,
267 						 pos >> PAGE_SHIFT,
268 						 endbyte >> PAGE_SHIFT);
269 		if (ret > 0)
270 			generic_write_sync(iocb, ret);
271 	}
272 
273 out_unlock:
274 	inode_unlock(inode);
275 	if (status)
276 		trace_ext2_dio_write_buff_end(iocb, from, status);
277 	trace_ext2_dio_write_end(iocb, from, ret);
278 	return ret;
279 }
280 
ext2_file_read_iter(struct kiocb * iocb,struct iov_iter * to)281 static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
282 {
283 #ifdef CONFIG_FS_DAX
284 	if (IS_DAX(iocb->ki_filp->f_mapping->host))
285 		return ext2_dax_read_iter(iocb, to);
286 #endif
287 	if (iocb->ki_flags & IOCB_DIRECT)
288 		return ext2_dio_read_iter(iocb, to);
289 
290 	return generic_file_read_iter(iocb, to);
291 }
292 
ext2_file_write_iter(struct kiocb * iocb,struct iov_iter * from)293 static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
294 {
295 #ifdef CONFIG_FS_DAX
296 	if (IS_DAX(iocb->ki_filp->f_mapping->host))
297 		return ext2_dax_write_iter(iocb, from);
298 #endif
299 	if (iocb->ki_flags & IOCB_DIRECT)
300 		return ext2_dio_write_iter(iocb, from);
301 
302 	return generic_file_write_iter(iocb, from);
303 }
304 
ext2_file_open(struct inode * inode,struct file * filp)305 static int ext2_file_open(struct inode *inode, struct file *filp)
306 {
307 	filp->f_mode |= FMODE_CAN_ODIRECT;
308 	return dquot_file_open(inode, filp);
309 }
310 
311 const struct file_operations ext2_file_operations = {
312 	.llseek		= generic_file_llseek,
313 	.read_iter	= ext2_file_read_iter,
314 	.write_iter	= ext2_file_write_iter,
315 	.unlocked_ioctl = ext2_ioctl,
316 #ifdef CONFIG_COMPAT
317 	.compat_ioctl	= ext2_compat_ioctl,
318 #endif
319 	.mmap		= ext2_file_mmap,
320 	.open		= ext2_file_open,
321 	.release	= ext2_release_file,
322 	.fsync		= ext2_fsync,
323 	.get_unmapped_area = thp_get_unmapped_area,
324 	.splice_read	= filemap_splice_read,
325 	.splice_write	= iter_file_splice_write,
326 };
327 
328 const struct inode_operations ext2_file_inode_operations = {
329 	.listxattr	= ext2_listxattr,
330 	.getattr	= ext2_getattr,
331 	.setattr	= ext2_setattr,
332 	.get_inode_acl	= ext2_get_acl,
333 	.set_acl	= ext2_set_acl,
334 	.fiemap		= ext2_fiemap,
335 	.fileattr_get	= ext2_fileattr_get,
336 	.fileattr_set	= ext2_fileattr_set,
337 };
338