1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Copyright 2017 Omnibond Systems, L.L.C.
4   */
5  
6  #include "protocol.h"
7  #include "orangefs-kernel.h"
8  #include "orangefs-bufmap.h"
9  
10  struct orangefs_dir_part {
11  	struct orangefs_dir_part *next;
12  	size_t len;
13  };
14  
15  struct orangefs_dir {
16  	__u64 token;
17  	struct orangefs_dir_part *part;
18  	loff_t end;
19  	int error;
20  };
21  
22  #define PART_SHIFT (24)
23  #define PART_SIZE (1<<24)
24  #define PART_MASK (~(PART_SIZE - 1))
25  
26  /*
27   * There can be up to 512 directory entries.  Each entry is encoded as
28   * follows:
29   * 4 bytes: string size (n)
30   * n bytes: string
31   * 1 byte: trailing zero
32   * padding to 8 bytes
33   * 16 bytes: khandle
34   * padding to 8 bytes
35   *
36   * The trailer_buf starts with a struct orangefs_readdir_response_s
37   * which must be skipped to get to the directory data.
38   *
39   * The data which is received from the userspace daemon is termed a
40   * part and is stored in a linked list in case more than one part is
41   * needed for a large directory.
42   *
43   * The position pointer (ctx->pos) encodes the part and offset on which
44   * to begin reading at.  Bits above PART_SHIFT encode the part and bits
45   * below PART_SHIFT encode the offset.  Parts are stored in a linked
46   * list which grows as data is received from the server.  The overhead
47   * associated with managing the list is presumed to be small compared to
48   * the overhead of communicating with the server.
49   *
50   * As data is received from the server, it is placed at the end of the
51   * part list.  Data is parsed from the current position as it is needed.
52   * When data is determined to be corrupt, it is either because the
53   * userspace component has sent back corrupt data or because the file
54   * pointer has been moved to an invalid location.  Since the two cannot
55   * be differentiated, return EIO.
56   *
57   * Part zero is synthesized to contains `.' and `..'.  Part one is the
58   * first part of the part list.
59   */
60  
do_readdir(struct orangefs_dir * od,struct inode * inode,struct orangefs_kernel_op_s * op)61  static int do_readdir(struct orangefs_dir *od, struct inode *inode,
62      struct orangefs_kernel_op_s *op)
63  {
64  	struct orangefs_inode_s *oi = ORANGEFS_I(inode);
65  	struct orangefs_readdir_response_s *resp;
66  	int bufi, r;
67  
68  	/*
69  	 * Despite the badly named field, readdir does not use shared
70  	 * memory.  However, there are a limited number of readdir
71  	 * slots, which must be allocated here.  This flag simply tells
72  	 * the op scheduler to return the op here for retry.
73  	 */
74  	op->uses_shared_memory = 1;
75  	op->upcall.req.readdir.refn = oi->refn;
76  	op->upcall.req.readdir.token = od->token;
77  	op->upcall.req.readdir.max_dirent_count =
78  	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
79  
80  again:
81  	bufi = orangefs_readdir_index_get();
82  	if (bufi < 0) {
83  		od->error = bufi;
84  		return bufi;
85  	}
86  
87  	op->upcall.req.readdir.buf_index = bufi;
88  
89  	r = service_operation(op, "orangefs_readdir",
90  	    get_interruptible_flag(inode));
91  
92  	orangefs_readdir_index_put(bufi);
93  
94  	if (op_state_purged(op)) {
95  		if (r == -EAGAIN) {
96  			vfree(op->downcall.trailer_buf);
97  			goto again;
98  		} else if (r == -EIO) {
99  			vfree(op->downcall.trailer_buf);
100  			od->error = r;
101  			return r;
102  		}
103  	}
104  
105  	if (r < 0) {
106  		vfree(op->downcall.trailer_buf);
107  		od->error = r;
108  		return r;
109  	} else if (op->downcall.status) {
110  		vfree(op->downcall.trailer_buf);
111  		od->error = op->downcall.status;
112  		return op->downcall.status;
113  	}
114  
115  	/*
116  	 * The maximum size is size per entry times the 512 entries plus
117  	 * the header.  This is well under the limit.
118  	 */
119  	if (op->downcall.trailer_size > PART_SIZE) {
120  		vfree(op->downcall.trailer_buf);
121  		od->error = -EIO;
122  		return -EIO;
123  	}
124  
125  	resp = (struct orangefs_readdir_response_s *)
126  	    op->downcall.trailer_buf;
127  	od->token = resp->token;
128  	return 0;
129  }
130  
parse_readdir(struct orangefs_dir * od,struct orangefs_kernel_op_s * op)131  static int parse_readdir(struct orangefs_dir *od,
132      struct orangefs_kernel_op_s *op)
133  {
134  	struct orangefs_dir_part *part, *new;
135  	size_t count;
136  
137  	count = 1;
138  	part = od->part;
139  	while (part) {
140  		count++;
141  		if (part->next)
142  			part = part->next;
143  		else
144  			break;
145  	}
146  
147  	new = (void *)op->downcall.trailer_buf;
148  	new->next = NULL;
149  	new->len = op->downcall.trailer_size -
150  	    sizeof(struct orangefs_readdir_response_s);
151  	if (!od->part)
152  		od->part = new;
153  	else
154  		part->next = new;
155  	count++;
156  	od->end = count << PART_SHIFT;
157  
158  	return 0;
159  }
160  
orangefs_dir_more(struct orangefs_dir * od,struct inode * inode)161  static int orangefs_dir_more(struct orangefs_dir *od, struct inode *inode)
162  {
163  	struct orangefs_kernel_op_s *op;
164  	int r;
165  
166  	op = op_alloc(ORANGEFS_VFS_OP_READDIR);
167  	if (!op) {
168  		od->error = -ENOMEM;
169  		return -ENOMEM;
170  	}
171  	r = do_readdir(od, inode, op);
172  	if (r) {
173  		od->error = r;
174  		goto out;
175  	}
176  	r = parse_readdir(od, op);
177  	if (r) {
178  		od->error = r;
179  		goto out;
180  	}
181  
182  	od->error = 0;
183  out:
184  	op_release(op);
185  	return od->error;
186  }
187  
fill_from_part(struct orangefs_dir_part * part,struct dir_context * ctx)188  static int fill_from_part(struct orangefs_dir_part *part,
189      struct dir_context *ctx)
190  {
191  	const int offset = sizeof(struct orangefs_readdir_response_s);
192  	struct orangefs_khandle *khandle;
193  	__u32 *len, padlen;
194  	loff_t i;
195  	char *s;
196  	i = ctx->pos & ~PART_MASK;
197  
198  	/* The file offset from userspace is too large. */
199  	if (i > part->len)
200  		return 1;
201  
202  	/*
203  	 * If the seek pointer is positioned just before an entry it
204  	 * should find the next entry.
205  	 */
206  	if (i % 8)
207  		i = i + (8 - i%8)%8;
208  
209  	while (i < part->len) {
210  		if (part->len < i + sizeof *len)
211  			break;
212  		len = (void *)part + offset + i;
213  		/*
214  		 * len is the size of the string itself.  padlen is the
215  		 * total size of the encoded string.
216  		 */
217  		padlen = (sizeof *len + *len + 1) +
218  		    (8 - (sizeof *len + *len + 1)%8)%8;
219  		if (part->len < i + padlen + sizeof *khandle)
220  			goto next;
221  		s = (void *)part + offset + i + sizeof *len;
222  		if (s[*len] != 0)
223  			goto next;
224  		khandle = (void *)part + offset + i + padlen;
225  		if (!dir_emit(ctx, s, *len,
226  		    orangefs_khandle_to_ino(khandle),
227  		    DT_UNKNOWN))
228  			return 0;
229  		i += padlen + sizeof *khandle;
230  		i = i + (8 - i%8)%8;
231  		BUG_ON(i > part->len);
232  		ctx->pos = (ctx->pos & PART_MASK) | i;
233  		continue;
234  next:
235  		i += 8;
236  	}
237  	return 1;
238  }
239  
orangefs_dir_fill(struct orangefs_dir * od,struct dir_context * ctx)240  static int orangefs_dir_fill(struct orangefs_dir *od, struct dir_context *ctx)
241  {
242  	struct orangefs_dir_part *part;
243  	size_t count;
244  
245  	count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
246  
247  	part = od->part;
248  	while (part->next && count) {
249  		count--;
250  		part = part->next;
251  	}
252  	/* This means the userspace file offset is invalid. */
253  	if (count) {
254  		od->error = -EIO;
255  		return -EIO;
256  	}
257  
258  	while (part && part->len) {
259  		int r;
260  		r = fill_from_part(part, ctx);
261  		if (r < 0) {
262  			od->error = r;
263  			return r;
264  		} else if (r == 0) {
265  			/* Userspace buffer is full. */
266  			break;
267  		} else {
268  			/*
269  			 * The part ran out of data.  Move to the next
270  			 * part. */
271  			ctx->pos = (ctx->pos & PART_MASK) +
272  			    (1 << PART_SHIFT);
273  			part = part->next;
274  		}
275  	}
276  	return 0;
277  }
278  
orangefs_dir_llseek(struct file * file,loff_t offset,int whence)279  static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
280      int whence)
281  {
282  	struct orangefs_dir *od = file->private_data;
283  	/*
284  	 * Delete the stored data so userspace sees new directory
285  	 * entries.
286  	 */
287  	if (!whence && offset < od->end) {
288  		struct orangefs_dir_part *part = od->part;
289  		while (part) {
290  			struct orangefs_dir_part *next = part->next;
291  			vfree(part);
292  			part = next;
293  		}
294  		od->token = ORANGEFS_ITERATE_START;
295  		od->part = NULL;
296  		od->end = 1 << PART_SHIFT;
297  	}
298  	return default_llseek(file, offset, whence);
299  }
300  
orangefs_dir_iterate(struct file * file,struct dir_context * ctx)301  static int orangefs_dir_iterate(struct file *file,
302      struct dir_context *ctx)
303  {
304  	struct orangefs_dir *od = file->private_data;
305  	struct inode *inode = file_inode(file);
306  	int r;
307  
308  	if (od->error)
309  		return od->error;
310  
311  	if (ctx->pos == 0) {
312  		if (!dir_emit_dot(file, ctx))
313  			return 0;
314  		ctx->pos++;
315  	}
316  	if (ctx->pos == 1) {
317  		if (!dir_emit_dotdot(file, ctx))
318  			return 0;
319  		ctx->pos = 1 << PART_SHIFT;
320  	}
321  
322  	/*
323  	 * The seek position is in the first synthesized part but is not
324  	 * valid.
325  	 */
326  	if ((ctx->pos & PART_MASK) == 0)
327  		return -EIO;
328  
329  	r = 0;
330  
331  	/*
332  	 * Must read more if the user has sought past what has been read
333  	 * so far.  Stop a user who has sought past the end.
334  	 */
335  	while (od->token != ORANGEFS_ITERATE_END &&
336  	    ctx->pos > od->end) {
337  		r = orangefs_dir_more(od, inode);
338  		if (r)
339  			return r;
340  	}
341  	if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
342  		return -EIO;
343  
344  	/* Then try to fill if there's any left in the buffer. */
345  	if (ctx->pos < od->end) {
346  		r = orangefs_dir_fill(od, ctx);
347  		if (r)
348  			return r;
349  	}
350  
351  	/* Finally get some more and try to fill. */
352  	if (od->token != ORANGEFS_ITERATE_END) {
353  		r = orangefs_dir_more(od, inode);
354  		if (r)
355  			return r;
356  		r = orangefs_dir_fill(od, ctx);
357  	}
358  
359  	return r;
360  }
361  
orangefs_dir_open(struct inode * inode,struct file * file)362  static int orangefs_dir_open(struct inode *inode, struct file *file)
363  {
364  	struct orangefs_dir *od;
365  	file->private_data = kmalloc(sizeof(struct orangefs_dir),
366  	    GFP_KERNEL);
367  	if (!file->private_data)
368  		return -ENOMEM;
369  	od = file->private_data;
370  	od->token = ORANGEFS_ITERATE_START;
371  	od->part = NULL;
372  	od->end = 1 << PART_SHIFT;
373  	od->error = 0;
374  	return 0;
375  }
376  
orangefs_dir_release(struct inode * inode,struct file * file)377  static int orangefs_dir_release(struct inode *inode, struct file *file)
378  {
379  	struct orangefs_dir *od = file->private_data;
380  	struct orangefs_dir_part *part = od->part;
381  	while (part) {
382  		struct orangefs_dir_part *next = part->next;
383  		vfree(part);
384  		part = next;
385  	}
386  	kfree(od);
387  	return 0;
388  }
389  
390  const struct file_operations orangefs_dir_operations = {
391  	.llseek = orangefs_dir_llseek,
392  	.read = generic_read_dir,
393  	.iterate_shared = orangefs_dir_iterate,
394  	.open = orangefs_dir_open,
395  	.release = orangefs_dir_release
396  };
397