1  // SPDX-License-Identifier: GPL-2.0
2  #define _GNU_SOURCE
3  #include <sched.h>
4  #include <stdio.h>
5  #include <errno.h>
6  #include <pthread.h>
7  #include <string.h>
8  #include <sys/stat.h>
9  #include <sys/types.h>
10  #include <sys/mount.h>
11  #include <sys/wait.h>
12  #include <sys/vfs.h>
13  #include <sys/statvfs.h>
14  #include <sys/sysinfo.h>
15  #include <stdlib.h>
16  #include <unistd.h>
17  #include <fcntl.h>
18  #include <grp.h>
19  #include <stdbool.h>
20  #include <stdarg.h>
21  #include <linux/mount.h>
22  
23  #include "../kselftest_harness.h"
24  
25  #ifndef CLONE_NEWNS
26  #define CLONE_NEWNS 0x00020000
27  #endif
28  
29  #ifndef CLONE_NEWUSER
30  #define CLONE_NEWUSER 0x10000000
31  #endif
32  
33  #ifndef MS_REC
34  #define MS_REC 16384
35  #endif
36  
37  #ifndef MS_RELATIME
38  #define MS_RELATIME (1 << 21)
39  #endif
40  
41  #ifndef MS_STRICTATIME
42  #define MS_STRICTATIME (1 << 24)
43  #endif
44  
45  #ifndef MOUNT_ATTR_RDONLY
46  #define MOUNT_ATTR_RDONLY 0x00000001
47  #endif
48  
49  #ifndef MOUNT_ATTR_NOSUID
50  #define MOUNT_ATTR_NOSUID 0x00000002
51  #endif
52  
53  #ifndef MOUNT_ATTR_NOEXEC
54  #define MOUNT_ATTR_NOEXEC 0x00000008
55  #endif
56  
57  #ifndef MOUNT_ATTR_NODIRATIME
58  #define MOUNT_ATTR_NODIRATIME 0x00000080
59  #endif
60  
61  #ifndef MOUNT_ATTR__ATIME
62  #define MOUNT_ATTR__ATIME 0x00000070
63  #endif
64  
65  #ifndef MOUNT_ATTR_RELATIME
66  #define MOUNT_ATTR_RELATIME 0x00000000
67  #endif
68  
69  #ifndef MOUNT_ATTR_NOATIME
70  #define MOUNT_ATTR_NOATIME 0x00000010
71  #endif
72  
73  #ifndef MOUNT_ATTR_STRICTATIME
74  #define MOUNT_ATTR_STRICTATIME 0x00000020
75  #endif
76  
77  #ifndef AT_RECURSIVE
78  #define AT_RECURSIVE 0x8000
79  #endif
80  
81  #ifndef MS_SHARED
82  #define MS_SHARED (1 << 20)
83  #endif
84  
85  #define DEFAULT_THREADS 4
86  #define ptr_to_int(p) ((int)((intptr_t)(p)))
87  #define int_to_ptr(u) ((void *)((intptr_t)(u)))
88  
89  #ifndef __NR_mount_setattr
90  	#if defined __alpha__
91  		#define __NR_mount_setattr 552
92  	#elif defined _MIPS_SIM
93  		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
94  			#define __NR_mount_setattr (442 + 4000)
95  		#endif
96  		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
97  			#define __NR_mount_setattr (442 + 6000)
98  		#endif
99  		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
100  			#define __NR_mount_setattr (442 + 5000)
101  		#endif
102  	#elif defined __ia64__
103  		#define __NR_mount_setattr (442 + 1024)
104  	#else
105  		#define __NR_mount_setattr 442
106  	#endif
107  #endif
108  
109  #ifndef __NR_open_tree
110  	#if defined __alpha__
111  		#define __NR_open_tree 538
112  	#elif defined _MIPS_SIM
113  		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
114  			#define __NR_open_tree 4428
115  		#endif
116  		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
117  			#define __NR_open_tree 6428
118  		#endif
119  		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
120  			#define __NR_open_tree 5428
121  		#endif
122  	#elif defined __ia64__
123  		#define __NR_open_tree (428 + 1024)
124  	#else
125  		#define __NR_open_tree 428
126  	#endif
127  #endif
128  
129  #ifndef MOUNT_ATTR_IDMAP
130  #define MOUNT_ATTR_IDMAP 0x00100000
131  #endif
132  
133  #ifndef MOUNT_ATTR_NOSYMFOLLOW
134  #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
135  #endif
136  
sys_mount_setattr(int dfd,const char * path,unsigned int flags,struct mount_attr * attr,size_t size)137  static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
138  				    struct mount_attr *attr, size_t size)
139  {
140  	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
141  }
142  
143  #ifndef OPEN_TREE_CLONE
144  #define OPEN_TREE_CLONE 1
145  #endif
146  
147  #ifndef OPEN_TREE_CLOEXEC
148  #define OPEN_TREE_CLOEXEC O_CLOEXEC
149  #endif
150  
151  #ifndef AT_RECURSIVE
152  #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
153  #endif
154  
sys_open_tree(int dfd,const char * filename,unsigned int flags)155  static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
156  {
157  	return syscall(__NR_open_tree, dfd, filename, flags);
158  }
159  
write_nointr(int fd,const void * buf,size_t count)160  static ssize_t write_nointr(int fd, const void *buf, size_t count)
161  {
162  	ssize_t ret;
163  
164  	do {
165  		ret = write(fd, buf, count);
166  	} while (ret < 0 && errno == EINTR);
167  
168  	return ret;
169  }
170  
write_file(const char * path,const void * buf,size_t count)171  static int write_file(const char *path, const void *buf, size_t count)
172  {
173  	int fd;
174  	ssize_t ret;
175  
176  	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
177  	if (fd < 0)
178  		return -1;
179  
180  	ret = write_nointr(fd, buf, count);
181  	close(fd);
182  	if (ret < 0 || (size_t)ret != count)
183  		return -1;
184  
185  	return 0;
186  }
187  
create_and_enter_userns(void)188  static int create_and_enter_userns(void)
189  {
190  	uid_t uid;
191  	gid_t gid;
192  	char map[100];
193  
194  	uid = getuid();
195  	gid = getgid();
196  
197  	if (unshare(CLONE_NEWUSER))
198  		return -1;
199  
200  	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
201  	    errno != ENOENT)
202  		return -1;
203  
204  	snprintf(map, sizeof(map), "0 %d 1", uid);
205  	if (write_file("/proc/self/uid_map", map, strlen(map)))
206  		return -1;
207  
208  
209  	snprintf(map, sizeof(map), "0 %d 1", gid);
210  	if (write_file("/proc/self/gid_map", map, strlen(map)))
211  		return -1;
212  
213  	if (setgid(0))
214  		return -1;
215  
216  	if (setuid(0))
217  		return -1;
218  
219  	return 0;
220  }
221  
prepare_unpriv_mountns(void)222  static int prepare_unpriv_mountns(void)
223  {
224  	if (create_and_enter_userns())
225  		return -1;
226  
227  	if (unshare(CLONE_NEWNS))
228  		return -1;
229  
230  	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
231  		return -1;
232  
233  	return 0;
234  }
235  
236  #ifndef ST_NOSYMFOLLOW
237  #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
238  #endif
239  
read_mnt_flags(const char * path)240  static int read_mnt_flags(const char *path)
241  {
242  	int ret;
243  	struct statvfs stat;
244  	unsigned int mnt_flags;
245  
246  	ret = statvfs(path, &stat);
247  	if (ret != 0)
248  		return -EINVAL;
249  
250  	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
251  			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
252  			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
253  		return -EINVAL;
254  
255  	mnt_flags = 0;
256  	if (stat.f_flag & ST_RDONLY)
257  		mnt_flags |= MS_RDONLY;
258  	if (stat.f_flag & ST_NOSUID)
259  		mnt_flags |= MS_NOSUID;
260  	if (stat.f_flag & ST_NODEV)
261  		mnt_flags |= MS_NODEV;
262  	if (stat.f_flag & ST_NOEXEC)
263  		mnt_flags |= MS_NOEXEC;
264  	if (stat.f_flag & ST_NOATIME)
265  		mnt_flags |= MS_NOATIME;
266  	if (stat.f_flag & ST_NODIRATIME)
267  		mnt_flags |= MS_NODIRATIME;
268  	if (stat.f_flag & ST_RELATIME)
269  		mnt_flags |= MS_RELATIME;
270  	if (stat.f_flag & ST_SYNCHRONOUS)
271  		mnt_flags |= MS_SYNCHRONOUS;
272  	if (stat.f_flag & ST_MANDLOCK)
273  		mnt_flags |= ST_MANDLOCK;
274  	if (stat.f_flag & ST_NOSYMFOLLOW)
275  		mnt_flags |= ST_NOSYMFOLLOW;
276  
277  	return mnt_flags;
278  }
279  
get_field(char * src,int nfields)280  static char *get_field(char *src, int nfields)
281  {
282  	int i;
283  	char *p = src;
284  
285  	for (i = 0; i < nfields; i++) {
286  		while (*p && *p != ' ' && *p != '\t')
287  			p++;
288  
289  		if (!*p)
290  			break;
291  
292  		p++;
293  	}
294  
295  	return p;
296  }
297  
null_endofword(char * word)298  static void null_endofword(char *word)
299  {
300  	while (*word && *word != ' ' && *word != '\t')
301  		word++;
302  	*word = '\0';
303  }
304  
is_shared_mount(const char * path)305  static bool is_shared_mount(const char *path)
306  {
307  	size_t len = 0;
308  	char *line = NULL;
309  	FILE *f = NULL;
310  
311  	f = fopen("/proc/self/mountinfo", "re");
312  	if (!f)
313  		return false;
314  
315  	while (getline(&line, &len, f) != -1) {
316  		char *opts, *target;
317  
318  		target = get_field(line, 4);
319  		if (!target)
320  			continue;
321  
322  		opts = get_field(target, 2);
323  		if (!opts)
324  			continue;
325  
326  		null_endofword(target);
327  
328  		if (strcmp(target, path) != 0)
329  			continue;
330  
331  		null_endofword(opts);
332  		if (strstr(opts, "shared:"))
333  			return true;
334  	}
335  
336  	free(line);
337  	fclose(f);
338  
339  	return false;
340  }
341  
mount_setattr_thread(void * data)342  static void *mount_setattr_thread(void *data)
343  {
344  	struct mount_attr attr = {
345  		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
346  		.attr_clr	= 0,
347  		.propagation	= MS_SHARED,
348  	};
349  
350  	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
351  		pthread_exit(int_to_ptr(-1));
352  
353  	pthread_exit(int_to_ptr(0));
354  }
355  
356  /* Attempt to de-conflict with the selftests tree. */
357  #ifndef SKIP
358  #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
359  #endif
360  
mount_setattr_supported(void)361  static bool mount_setattr_supported(void)
362  {
363  	int ret;
364  
365  	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
366  	if (ret < 0 && errno == ENOSYS)
367  		return false;
368  
369  	return true;
370  }
371  
FIXTURE(mount_setattr)372  FIXTURE(mount_setattr) {
373  };
374  
375  #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
376  #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
377  
FIXTURE_SETUP(mount_setattr)378  FIXTURE_SETUP(mount_setattr)
379  {
380  	int fd = -EBADF;
381  
382  	if (!mount_setattr_supported())
383  		SKIP(return, "mount_setattr syscall not supported");
384  
385  	ASSERT_EQ(prepare_unpriv_mountns(), 0);
386  
387  	(void)umount2("/mnt", MNT_DETACH);
388  	(void)umount2("/tmp", MNT_DETACH);
389  
390  	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
391  			"size=100000,mode=700"), 0);
392  
393  	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
394  
395  	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
396  			"size=100000,mode=700"), 0);
397  
398  	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
399  
400  	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
401  			"size=100000,mode=700"), 0);
402  
403  	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
404  			"size=100000,mode=700"), 0);
405  
406  	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
407  
408  	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
409  			"size=100000,mode=700"), 0);
410  
411  	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
412  
413  	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
414  
415  	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
416  
417  	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
418  			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
419  
420  	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
421  
422  	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
423  			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
424  
425  	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
426  	ASSERT_GT(fd, 0);
427  	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
428  	ASSERT_EQ(close(fd), 0);
429  }
430  
FIXTURE_TEARDOWN(mount_setattr)431  FIXTURE_TEARDOWN(mount_setattr)
432  {
433  	if (!mount_setattr_supported())
434  		SKIP(return, "mount_setattr syscall not supported");
435  
436  	(void)umount2("/mnt/A", MNT_DETACH);
437  	(void)umount2("/tmp", MNT_DETACH);
438  }
439  
TEST_F(mount_setattr,invalid_attributes)440  TEST_F(mount_setattr, invalid_attributes)
441  {
442  	struct mount_attr invalid_attr = {
443  		.attr_set = (1U << 31),
444  	};
445  
446  	if (!mount_setattr_supported())
447  		SKIP(return, "mount_setattr syscall not supported");
448  
449  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
450  				    sizeof(invalid_attr)), 0);
451  
452  	invalid_attr.attr_set	= 0;
453  	invalid_attr.attr_clr	= (1U << 31);
454  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
455  				    sizeof(invalid_attr)), 0);
456  
457  	invalid_attr.attr_clr		= 0;
458  	invalid_attr.propagation	= (1U << 31);
459  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
460  				    sizeof(invalid_attr)), 0);
461  
462  	invalid_attr.attr_set		= (1U << 31);
463  	invalid_attr.attr_clr		= (1U << 31);
464  	invalid_attr.propagation	= (1U << 31);
465  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
466  				    sizeof(invalid_attr)), 0);
467  
468  	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
469  				    sizeof(invalid_attr)), 0);
470  }
471  
TEST_F(mount_setattr,extensibility)472  TEST_F(mount_setattr, extensibility)
473  {
474  	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
475  	char *s = "dummy";
476  	struct mount_attr invalid_attr = {};
477  	struct mount_attr_large {
478  		struct mount_attr attr1;
479  		struct mount_attr attr2;
480  		struct mount_attr attr3;
481  	} large_attr = {};
482  
483  	if (!mount_setattr_supported())
484  		SKIP(return, "mount_setattr syscall not supported");
485  
486  	old_flags = read_mnt_flags("/mnt/A");
487  	ASSERT_GT(old_flags, 0);
488  
489  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
490  				    sizeof(invalid_attr)), 0);
491  	ASSERT_EQ(errno, EFAULT);
492  
493  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
494  				    sizeof(invalid_attr)), 0);
495  	ASSERT_EQ(errno, EINVAL);
496  
497  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
498  	ASSERT_EQ(errno, EINVAL);
499  
500  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
501  				    sizeof(invalid_attr) / 2), 0);
502  	ASSERT_EQ(errno, EINVAL);
503  
504  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
505  				    sizeof(invalid_attr) / 2), 0);
506  	ASSERT_EQ(errno, EINVAL);
507  
508  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
509  				    (void *)&large_attr, sizeof(large_attr)), 0);
510  
511  	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
512  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
513  				    (void *)&large_attr, sizeof(large_attr)), 0);
514  
515  	large_attr.attr3.attr_set = 0;
516  	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
517  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
518  				    (void *)&large_attr, sizeof(large_attr)), 0);
519  
520  	expected_flags = old_flags;
521  	expected_flags |= MS_RDONLY;
522  
523  	new_flags = read_mnt_flags("/mnt/A");
524  	ASSERT_EQ(new_flags, expected_flags);
525  
526  	new_flags = read_mnt_flags("/mnt/A/AA");
527  	ASSERT_EQ(new_flags, expected_flags);
528  
529  	new_flags = read_mnt_flags("/mnt/A/AA/B");
530  	ASSERT_EQ(new_flags, expected_flags);
531  
532  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
533  	ASSERT_EQ(new_flags, expected_flags);
534  }
535  
TEST_F(mount_setattr,basic)536  TEST_F(mount_setattr, basic)
537  {
538  	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
539  	struct mount_attr attr = {
540  		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
541  		.attr_clr	= MOUNT_ATTR__ATIME,
542  	};
543  
544  	if (!mount_setattr_supported())
545  		SKIP(return, "mount_setattr syscall not supported");
546  
547  	old_flags = read_mnt_flags("/mnt/A");
548  	ASSERT_GT(old_flags, 0);
549  
550  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
551  
552  	expected_flags = old_flags;
553  	expected_flags |= MS_RDONLY;
554  	expected_flags |= MS_NOEXEC;
555  	expected_flags &= ~MS_NOATIME;
556  	expected_flags |= MS_RELATIME;
557  
558  	new_flags = read_mnt_flags("/mnt/A");
559  	ASSERT_EQ(new_flags, expected_flags);
560  
561  	new_flags = read_mnt_flags("/mnt/A/AA");
562  	ASSERT_EQ(new_flags, old_flags);
563  
564  	new_flags = read_mnt_flags("/mnt/A/AA/B");
565  	ASSERT_EQ(new_flags, old_flags);
566  
567  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
568  	ASSERT_EQ(new_flags, old_flags);
569  }
570  
TEST_F(mount_setattr,basic_recursive)571  TEST_F(mount_setattr, basic_recursive)
572  {
573  	int fd;
574  	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
575  	struct mount_attr attr = {
576  		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
577  		.attr_clr	= MOUNT_ATTR__ATIME,
578  	};
579  
580  	if (!mount_setattr_supported())
581  		SKIP(return, "mount_setattr syscall not supported");
582  
583  	old_flags = read_mnt_flags("/mnt/A");
584  	ASSERT_GT(old_flags, 0);
585  
586  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
587  
588  	expected_flags = old_flags;
589  	expected_flags |= MS_RDONLY;
590  	expected_flags |= MS_NOEXEC;
591  	expected_flags &= ~MS_NOATIME;
592  	expected_flags |= MS_RELATIME;
593  
594  	new_flags = read_mnt_flags("/mnt/A");
595  	ASSERT_EQ(new_flags, expected_flags);
596  
597  	new_flags = read_mnt_flags("/mnt/A/AA");
598  	ASSERT_EQ(new_flags, expected_flags);
599  
600  	new_flags = read_mnt_flags("/mnt/A/AA/B");
601  	ASSERT_EQ(new_flags, expected_flags);
602  
603  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
604  	ASSERT_EQ(new_flags, expected_flags);
605  
606  	memset(&attr, 0, sizeof(attr));
607  	attr.attr_clr = MOUNT_ATTR_RDONLY;
608  	attr.propagation = MS_SHARED;
609  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
610  
611  	expected_flags &= ~MS_RDONLY;
612  	new_flags = read_mnt_flags("/mnt/A");
613  	ASSERT_EQ(new_flags, expected_flags);
614  
615  	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
616  
617  	new_flags = read_mnt_flags("/mnt/A/AA");
618  	ASSERT_EQ(new_flags, expected_flags);
619  
620  	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
621  
622  	new_flags = read_mnt_flags("/mnt/A/AA/B");
623  	ASSERT_EQ(new_flags, expected_flags);
624  
625  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
626  
627  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
628  	ASSERT_EQ(new_flags, expected_flags);
629  
630  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
631  
632  	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
633  	ASSERT_GE(fd, 0);
634  
635  	/*
636  	 * We're holding a fd open for writing so this needs to fail somewhere
637  	 * in the middle and the mount options need to be unchanged.
638  	 */
639  	attr.attr_set = MOUNT_ATTR_RDONLY;
640  	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
641  
642  	new_flags = read_mnt_flags("/mnt/A");
643  	ASSERT_EQ(new_flags, expected_flags);
644  
645  	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
646  
647  	new_flags = read_mnt_flags("/mnt/A/AA");
648  	ASSERT_EQ(new_flags, expected_flags);
649  
650  	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
651  
652  	new_flags = read_mnt_flags("/mnt/A/AA/B");
653  	ASSERT_EQ(new_flags, expected_flags);
654  
655  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
656  
657  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
658  	ASSERT_EQ(new_flags, expected_flags);
659  
660  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
661  
662  	EXPECT_EQ(close(fd), 0);
663  }
664  
TEST_F(mount_setattr,mount_has_writers)665  TEST_F(mount_setattr, mount_has_writers)
666  {
667  	int fd, dfd;
668  	unsigned int old_flags = 0, new_flags = 0;
669  	struct mount_attr attr = {
670  		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
671  		.attr_clr	= MOUNT_ATTR__ATIME,
672  		.propagation	= MS_SHARED,
673  	};
674  
675  	if (!mount_setattr_supported())
676  		SKIP(return, "mount_setattr syscall not supported");
677  
678  	old_flags = read_mnt_flags("/mnt/A");
679  	ASSERT_GT(old_flags, 0);
680  
681  	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
682  	ASSERT_GE(fd, 0);
683  
684  	/*
685  	 * We're holding a fd open to a mount somwhere in the middle so this
686  	 * needs to fail somewhere in the middle. After this the mount options
687  	 * need to be unchanged.
688  	 */
689  	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
690  
691  	new_flags = read_mnt_flags("/mnt/A");
692  	ASSERT_EQ(new_flags, old_flags);
693  
694  	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
695  
696  	new_flags = read_mnt_flags("/mnt/A/AA");
697  	ASSERT_EQ(new_flags, old_flags);
698  
699  	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
700  
701  	new_flags = read_mnt_flags("/mnt/A/AA/B");
702  	ASSERT_EQ(new_flags, old_flags);
703  
704  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
705  
706  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
707  	ASSERT_EQ(new_flags, old_flags);
708  
709  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
710  
711  	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
712  	ASSERT_GE(dfd, 0);
713  	EXPECT_EQ(fsync(dfd), 0);
714  	EXPECT_EQ(close(dfd), 0);
715  
716  	EXPECT_EQ(fsync(fd), 0);
717  	EXPECT_EQ(close(fd), 0);
718  
719  	/* All writers are gone so this should succeed. */
720  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
721  }
722  
TEST_F(mount_setattr,mixed_mount_options)723  TEST_F(mount_setattr, mixed_mount_options)
724  {
725  	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
726  	struct mount_attr attr = {
727  		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
728  		.attr_set = MOUNT_ATTR_RELATIME,
729  	};
730  
731  	if (!mount_setattr_supported())
732  		SKIP(return, "mount_setattr syscall not supported");
733  
734  	old_flags1 = read_mnt_flags("/mnt/B");
735  	ASSERT_GT(old_flags1, 0);
736  
737  	old_flags2 = read_mnt_flags("/mnt/B/BB");
738  	ASSERT_GT(old_flags2, 0);
739  
740  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
741  
742  	expected_flags = old_flags2;
743  	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
744  	expected_flags |= MS_RELATIME;
745  
746  	new_flags = read_mnt_flags("/mnt/B");
747  	ASSERT_EQ(new_flags, expected_flags);
748  
749  	expected_flags = old_flags2;
750  	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
751  	expected_flags |= MS_RELATIME;
752  
753  	new_flags = read_mnt_flags("/mnt/B/BB");
754  	ASSERT_EQ(new_flags, expected_flags);
755  }
756  
TEST_F(mount_setattr,time_changes)757  TEST_F(mount_setattr, time_changes)
758  {
759  	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
760  	struct mount_attr attr = {
761  		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
762  	};
763  
764  	if (!mount_setattr_supported())
765  		SKIP(return, "mount_setattr syscall not supported");
766  
767  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
768  
769  	attr.attr_set = MOUNT_ATTR_STRICTATIME;
770  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
771  
772  	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
773  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
774  
775  	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
776  	attr.attr_clr = MOUNT_ATTR__ATIME;
777  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
778  
779  	attr.attr_set = 0;
780  	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
781  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
782  
783  	attr.attr_clr = MOUNT_ATTR_NOATIME;
784  	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
785  
786  	old_flags = read_mnt_flags("/mnt/A");
787  	ASSERT_GT(old_flags, 0);
788  
789  	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
790  	attr.attr_clr = MOUNT_ATTR__ATIME;
791  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
792  
793  	expected_flags = old_flags;
794  	expected_flags |= MS_NOATIME;
795  	expected_flags |= MS_NODIRATIME;
796  
797  	new_flags = read_mnt_flags("/mnt/A");
798  	ASSERT_EQ(new_flags, expected_flags);
799  
800  	new_flags = read_mnt_flags("/mnt/A/AA");
801  	ASSERT_EQ(new_flags, expected_flags);
802  
803  	new_flags = read_mnt_flags("/mnt/A/AA/B");
804  	ASSERT_EQ(new_flags, expected_flags);
805  
806  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
807  	ASSERT_EQ(new_flags, expected_flags);
808  
809  	memset(&attr, 0, sizeof(attr));
810  	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
811  	attr.attr_set |= MOUNT_ATTR_RELATIME;
812  	attr.attr_clr |= MOUNT_ATTR__ATIME;
813  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
814  
815  	expected_flags &= ~MS_NOATIME;
816  	expected_flags |= MS_RELATIME;
817  
818  	new_flags = read_mnt_flags("/mnt/A");
819  	ASSERT_EQ(new_flags, expected_flags);
820  
821  	new_flags = read_mnt_flags("/mnt/A/AA");
822  	ASSERT_EQ(new_flags, expected_flags);
823  
824  	new_flags = read_mnt_flags("/mnt/A/AA/B");
825  	ASSERT_EQ(new_flags, expected_flags);
826  
827  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
828  	ASSERT_EQ(new_flags, expected_flags);
829  
830  	memset(&attr, 0, sizeof(attr));
831  	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
832  	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
833  	attr.attr_clr |= MOUNT_ATTR__ATIME;
834  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
835  
836  	expected_flags &= ~MS_RELATIME;
837  
838  	new_flags = read_mnt_flags("/mnt/A");
839  	ASSERT_EQ(new_flags, expected_flags);
840  
841  	new_flags = read_mnt_flags("/mnt/A/AA");
842  	ASSERT_EQ(new_flags, expected_flags);
843  
844  	new_flags = read_mnt_flags("/mnt/A/AA/B");
845  	ASSERT_EQ(new_flags, expected_flags);
846  
847  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
848  	ASSERT_EQ(new_flags, expected_flags);
849  
850  	memset(&attr, 0, sizeof(attr));
851  	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
852  	attr.attr_set |= MOUNT_ATTR_NOATIME;
853  	attr.attr_clr |= MOUNT_ATTR__ATIME;
854  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
855  
856  	expected_flags |= MS_NOATIME;
857  	new_flags = read_mnt_flags("/mnt/A");
858  	ASSERT_EQ(new_flags, expected_flags);
859  
860  	new_flags = read_mnt_flags("/mnt/A/AA");
861  	ASSERT_EQ(new_flags, expected_flags);
862  
863  	new_flags = read_mnt_flags("/mnt/A/AA/B");
864  	ASSERT_EQ(new_flags, expected_flags);
865  
866  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
867  	ASSERT_EQ(new_flags, expected_flags);
868  
869  	memset(&attr, 0, sizeof(attr));
870  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
871  
872  	new_flags = read_mnt_flags("/mnt/A");
873  	ASSERT_EQ(new_flags, expected_flags);
874  
875  	new_flags = read_mnt_flags("/mnt/A/AA");
876  	ASSERT_EQ(new_flags, expected_flags);
877  
878  	new_flags = read_mnt_flags("/mnt/A/AA/B");
879  	ASSERT_EQ(new_flags, expected_flags);
880  
881  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
882  	ASSERT_EQ(new_flags, expected_flags);
883  
884  	memset(&attr, 0, sizeof(attr));
885  	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
886  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
887  
888  	expected_flags &= ~MS_NODIRATIME;
889  
890  	new_flags = read_mnt_flags("/mnt/A");
891  	ASSERT_EQ(new_flags, expected_flags);
892  
893  	new_flags = read_mnt_flags("/mnt/A/AA");
894  	ASSERT_EQ(new_flags, expected_flags);
895  
896  	new_flags = read_mnt_flags("/mnt/A/AA/B");
897  	ASSERT_EQ(new_flags, expected_flags);
898  
899  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
900  	ASSERT_EQ(new_flags, expected_flags);
901  }
902  
TEST_F(mount_setattr,multi_threaded)903  TEST_F(mount_setattr, multi_threaded)
904  {
905  	int i, j, nthreads, ret = 0;
906  	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
907  	pthread_attr_t pattr;
908  	pthread_t threads[DEFAULT_THREADS];
909  
910  	if (!mount_setattr_supported())
911  		SKIP(return, "mount_setattr syscall not supported");
912  
913  	old_flags = read_mnt_flags("/mnt/A");
914  	ASSERT_GT(old_flags, 0);
915  
916  	/* Try to change mount options from multiple threads. */
917  	nthreads = get_nprocs_conf();
918  	if (nthreads > DEFAULT_THREADS)
919  		nthreads = DEFAULT_THREADS;
920  
921  	pthread_attr_init(&pattr);
922  	for (i = 0; i < nthreads; i++)
923  		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
924  
925  	for (j = 0; j < i; j++) {
926  		void *retptr = NULL;
927  
928  		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
929  
930  		ret += ptr_to_int(retptr);
931  		EXPECT_EQ(ret, 0);
932  	}
933  	pthread_attr_destroy(&pattr);
934  
935  	ASSERT_EQ(ret, 0);
936  
937  	expected_flags = old_flags;
938  	expected_flags |= MS_RDONLY;
939  	expected_flags |= MS_NOSUID;
940  	new_flags = read_mnt_flags("/mnt/A");
941  	ASSERT_EQ(new_flags, expected_flags);
942  
943  	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
944  
945  	new_flags = read_mnt_flags("/mnt/A/AA");
946  	ASSERT_EQ(new_flags, expected_flags);
947  
948  	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
949  
950  	new_flags = read_mnt_flags("/mnt/A/AA/B");
951  	ASSERT_EQ(new_flags, expected_flags);
952  
953  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
954  
955  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
956  	ASSERT_EQ(new_flags, expected_flags);
957  
958  	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
959  }
960  
TEST_F(mount_setattr,wrong_user_namespace)961  TEST_F(mount_setattr, wrong_user_namespace)
962  {
963  	int ret;
964  	struct mount_attr attr = {
965  		.attr_set = MOUNT_ATTR_RDONLY,
966  	};
967  
968  	if (!mount_setattr_supported())
969  		SKIP(return, "mount_setattr syscall not supported");
970  
971  	EXPECT_EQ(create_and_enter_userns(), 0);
972  	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
973  	ASSERT_LT(ret, 0);
974  	ASSERT_EQ(errno, EPERM);
975  }
976  
TEST_F(mount_setattr,wrong_mount_namespace)977  TEST_F(mount_setattr, wrong_mount_namespace)
978  {
979  	int fd, ret;
980  	struct mount_attr attr = {
981  		.attr_set = MOUNT_ATTR_RDONLY,
982  	};
983  
984  	if (!mount_setattr_supported())
985  		SKIP(return, "mount_setattr syscall not supported");
986  
987  	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
988  	ASSERT_GE(fd, 0);
989  
990  	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
991  
992  	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
993  	ASSERT_LT(ret, 0);
994  	ASSERT_EQ(errno, EINVAL);
995  }
996  
FIXTURE(mount_setattr_idmapped)997  FIXTURE(mount_setattr_idmapped) {
998  };
999  
FIXTURE_SETUP(mount_setattr_idmapped)1000  FIXTURE_SETUP(mount_setattr_idmapped)
1001  {
1002  	int img_fd = -EBADF;
1003  
1004  	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1005  
1006  	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1007  
1008  	(void)umount2("/mnt", MNT_DETACH);
1009  	(void)umount2("/tmp", MNT_DETACH);
1010  
1011  	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1012  			"size=100000,mode=700"), 0);
1013  
1014  	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1015  	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1016  	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1017  
1018  	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1019  			"size=100000,mode=700"), 0);
1020  
1021  	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1022  	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1023  	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1024  
1025  	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1026  			"size=100000,mode=700"), 0);
1027  
1028  	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1029  			"size=100000,mode=700"), 0);
1030  
1031  	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1032  
1033  	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1034  			"size=100000,mode=700"), 0);
1035  
1036  	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1037  
1038  	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1039  
1040  	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1041  
1042  	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1043  			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1044  
1045  	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1046  
1047  	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1048  			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1049  
1050  	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1051  	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1052  	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1053  	ASSERT_GE(img_fd, 0);
1054  	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1055  	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1056  	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1057  	ASSERT_EQ(close(img_fd), 0);
1058  }
1059  
FIXTURE_TEARDOWN(mount_setattr_idmapped)1060  FIXTURE_TEARDOWN(mount_setattr_idmapped)
1061  {
1062  	(void)umount2("/mnt/A", MNT_DETACH);
1063  	(void)umount2("/tmp", MNT_DETACH);
1064  }
1065  
1066  /**
1067   * Validate that negative fd values are rejected.
1068   */
TEST_F(mount_setattr_idmapped,invalid_fd_negative)1069  TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1070  {
1071  	struct mount_attr attr = {
1072  		.attr_set	= MOUNT_ATTR_IDMAP,
1073  		.userns_fd	= -EBADF,
1074  	};
1075  
1076  	if (!mount_setattr_supported())
1077  		SKIP(return, "mount_setattr syscall not supported");
1078  
1079  	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1080  		TH_LOG("failure: created idmapped mount with negative fd");
1081  	}
1082  }
1083  
1084  /**
1085   * Validate that excessively large fd values are rejected.
1086   */
TEST_F(mount_setattr_idmapped,invalid_fd_large)1087  TEST_F(mount_setattr_idmapped, invalid_fd_large)
1088  {
1089  	struct mount_attr attr = {
1090  		.attr_set	= MOUNT_ATTR_IDMAP,
1091  		.userns_fd	= INT64_MAX,
1092  	};
1093  
1094  	if (!mount_setattr_supported())
1095  		SKIP(return, "mount_setattr syscall not supported");
1096  
1097  	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1098  		TH_LOG("failure: created idmapped mount with too large fd value");
1099  	}
1100  }
1101  
1102  /**
1103   * Validate that closed fd values are rejected.
1104   */
TEST_F(mount_setattr_idmapped,invalid_fd_closed)1105  TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1106  {
1107  	int fd;
1108  	struct mount_attr attr = {
1109  		.attr_set = MOUNT_ATTR_IDMAP,
1110  	};
1111  
1112  	if (!mount_setattr_supported())
1113  		SKIP(return, "mount_setattr syscall not supported");
1114  
1115  	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1116  	ASSERT_GE(fd, 0);
1117  	ASSERT_GE(close(fd), 0);
1118  
1119  	attr.userns_fd = fd;
1120  	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1121  		TH_LOG("failure: created idmapped mount with closed fd");
1122  	}
1123  }
1124  
1125  /**
1126   * Validate that the initial user namespace is rejected.
1127   */
TEST_F(mount_setattr_idmapped,invalid_fd_initial_userns)1128  TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1129  {
1130  	int open_tree_fd = -EBADF;
1131  	struct mount_attr attr = {
1132  		.attr_set = MOUNT_ATTR_IDMAP,
1133  	};
1134  
1135  	if (!mount_setattr_supported())
1136  		SKIP(return, "mount_setattr syscall not supported");
1137  
1138  	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1139  				     AT_NO_AUTOMOUNT |
1140  				     AT_SYMLINK_NOFOLLOW |
1141  				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1142  	ASSERT_GE(open_tree_fd, 0);
1143  
1144  	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1145  	ASSERT_GE(attr.userns_fd, 0);
1146  	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1147  	ASSERT_EQ(errno, EPERM);
1148  	ASSERT_EQ(close(attr.userns_fd), 0);
1149  	ASSERT_EQ(close(open_tree_fd), 0);
1150  }
1151  
map_ids(pid_t pid,unsigned long nsid,unsigned long hostid,unsigned long range)1152  static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1153  		   unsigned long range)
1154  {
1155  	char map[100], procfile[256];
1156  
1157  	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1158  	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1159  	if (write_file(procfile, map, strlen(map)))
1160  		return -1;
1161  
1162  
1163  	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1164  	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1165  	if (write_file(procfile, map, strlen(map)))
1166  		return -1;
1167  
1168  	return 0;
1169  }
1170  
1171  #define __STACK_SIZE (8 * 1024 * 1024)
do_clone(int (* fn)(void *),void * arg,int flags)1172  static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1173  {
1174  	void *stack;
1175  
1176  	stack = malloc(__STACK_SIZE);
1177  	if (!stack)
1178  		return -ENOMEM;
1179  
1180  #ifdef __ia64__
1181  	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1182  #else
1183  	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1184  #endif
1185  }
1186  
get_userns_fd_cb(void * data)1187  static int get_userns_fd_cb(void *data)
1188  {
1189  	return kill(getpid(), SIGSTOP);
1190  }
1191  
wait_for_pid(pid_t pid)1192  static int wait_for_pid(pid_t pid)
1193  {
1194  	int status, ret;
1195  
1196  again:
1197  	ret = waitpid(pid, &status, 0);
1198  	if (ret == -1) {
1199  		if (errno == EINTR)
1200  			goto again;
1201  
1202  		return -1;
1203  	}
1204  
1205  	if (!WIFEXITED(status))
1206  		return -1;
1207  
1208  	return WEXITSTATUS(status);
1209  }
1210  
get_userns_fd(unsigned long nsid,unsigned long hostid,unsigned long range)1211  static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1212  {
1213  	int ret;
1214  	pid_t pid;
1215  	char path[256];
1216  
1217  	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1218  	if (pid < 0)
1219  		return -errno;
1220  
1221  	ret = map_ids(pid, nsid, hostid, range);
1222  	if (ret < 0)
1223  		return ret;
1224  
1225  	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1226  	ret = open(path, O_RDONLY | O_CLOEXEC);
1227  	kill(pid, SIGKILL);
1228  	wait_for_pid(pid);
1229  	return ret;
1230  }
1231  
1232  /**
1233   * Validate that an attached mount in our mount namespace cannot be idmapped.
1234   * (The kernel enforces that the mount's mount namespace and the caller's mount
1235   *  namespace match.)
1236   */
TEST_F(mount_setattr_idmapped,attached_mount_inside_current_mount_namespace)1237  TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1238  {
1239  	int open_tree_fd = -EBADF;
1240  	struct mount_attr attr = {
1241  		.attr_set = MOUNT_ATTR_IDMAP,
1242  	};
1243  
1244  	if (!mount_setattr_supported())
1245  		SKIP(return, "mount_setattr syscall not supported");
1246  
1247  	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1248  				     AT_EMPTY_PATH |
1249  				     AT_NO_AUTOMOUNT |
1250  				     AT_SYMLINK_NOFOLLOW |
1251  				     OPEN_TREE_CLOEXEC);
1252  	ASSERT_GE(open_tree_fd, 0);
1253  
1254  	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1255  	ASSERT_GE(attr.userns_fd, 0);
1256  	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1257  	ASSERT_EQ(close(attr.userns_fd), 0);
1258  	ASSERT_EQ(close(open_tree_fd), 0);
1259  }
1260  
1261  /**
1262   * Validate that idmapping a mount is rejected if the mount's mount namespace
1263   * and our mount namespace don't match.
1264   * (The kernel enforces that the mount's mount namespace and the caller's mount
1265   *  namespace match.)
1266   */
TEST_F(mount_setattr_idmapped,attached_mount_outside_current_mount_namespace)1267  TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1268  {
1269  	int open_tree_fd = -EBADF;
1270  	struct mount_attr attr = {
1271  		.attr_set = MOUNT_ATTR_IDMAP,
1272  	};
1273  
1274  	if (!mount_setattr_supported())
1275  		SKIP(return, "mount_setattr syscall not supported");
1276  
1277  	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1278  				     AT_EMPTY_PATH |
1279  				     AT_NO_AUTOMOUNT |
1280  				     AT_SYMLINK_NOFOLLOW |
1281  				     OPEN_TREE_CLOEXEC);
1282  	ASSERT_GE(open_tree_fd, 0);
1283  
1284  	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1285  
1286  	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1287  	ASSERT_GE(attr.userns_fd, 0);
1288  	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1289  				    sizeof(attr)), 0);
1290  	ASSERT_EQ(close(attr.userns_fd), 0);
1291  	ASSERT_EQ(close(open_tree_fd), 0);
1292  }
1293  
1294  /**
1295   * Validate that an attached mount in our mount namespace can be idmapped.
1296   */
TEST_F(mount_setattr_idmapped,detached_mount_inside_current_mount_namespace)1297  TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1298  {
1299  	int open_tree_fd = -EBADF;
1300  	struct mount_attr attr = {
1301  		.attr_set = MOUNT_ATTR_IDMAP,
1302  	};
1303  
1304  	if (!mount_setattr_supported())
1305  		SKIP(return, "mount_setattr syscall not supported");
1306  
1307  	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1308  				     AT_EMPTY_PATH |
1309  				     AT_NO_AUTOMOUNT |
1310  				     AT_SYMLINK_NOFOLLOW |
1311  				     OPEN_TREE_CLOEXEC |
1312  				     OPEN_TREE_CLONE);
1313  	ASSERT_GE(open_tree_fd, 0);
1314  
1315  	/* Changing mount properties on a detached mount. */
1316  	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1317  	ASSERT_GE(attr.userns_fd, 0);
1318  	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1319  				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1320  	ASSERT_EQ(close(attr.userns_fd), 0);
1321  	ASSERT_EQ(close(open_tree_fd), 0);
1322  }
1323  
1324  /**
1325   * Validate that a detached mount not in our mount namespace can be idmapped.
1326   */
TEST_F(mount_setattr_idmapped,detached_mount_outside_current_mount_namespace)1327  TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1328  {
1329  	int open_tree_fd = -EBADF;
1330  	struct mount_attr attr = {
1331  		.attr_set = MOUNT_ATTR_IDMAP,
1332  	};
1333  
1334  	if (!mount_setattr_supported())
1335  		SKIP(return, "mount_setattr syscall not supported");
1336  
1337  	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1338  				     AT_EMPTY_PATH |
1339  				     AT_NO_AUTOMOUNT |
1340  				     AT_SYMLINK_NOFOLLOW |
1341  				     OPEN_TREE_CLOEXEC |
1342  				     OPEN_TREE_CLONE);
1343  	ASSERT_GE(open_tree_fd, 0);
1344  
1345  	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1346  
1347  	/* Changing mount properties on a detached mount. */
1348  	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1349  	ASSERT_GE(attr.userns_fd, 0);
1350  	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1351  				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1352  	ASSERT_EQ(close(attr.userns_fd), 0);
1353  	ASSERT_EQ(close(open_tree_fd), 0);
1354  }
1355  
1356  /**
1357   * Validate that currently changing the idmapping of an idmapped mount fails.
1358   */
TEST_F(mount_setattr_idmapped,change_idmapping)1359  TEST_F(mount_setattr_idmapped, change_idmapping)
1360  {
1361  	int open_tree_fd = -EBADF;
1362  	struct mount_attr attr = {
1363  		.attr_set = MOUNT_ATTR_IDMAP,
1364  	};
1365  
1366  	if (!mount_setattr_supported())
1367  		SKIP(return, "mount_setattr syscall not supported");
1368  
1369  	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1370  				     AT_EMPTY_PATH |
1371  				     AT_NO_AUTOMOUNT |
1372  				     AT_SYMLINK_NOFOLLOW |
1373  				     OPEN_TREE_CLOEXEC |
1374  				     OPEN_TREE_CLONE);
1375  	ASSERT_GE(open_tree_fd, 0);
1376  
1377  	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1378  	ASSERT_GE(attr.userns_fd, 0);
1379  	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1380  				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1381  	ASSERT_EQ(close(attr.userns_fd), 0);
1382  
1383  	/* Change idmapping on a detached mount that is already idmapped. */
1384  	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1385  	ASSERT_GE(attr.userns_fd, 0);
1386  	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1387  	ASSERT_EQ(close(attr.userns_fd), 0);
1388  	ASSERT_EQ(close(open_tree_fd), 0);
1389  }
1390  
expected_uid_gid(int dfd,const char * path,int flags,uid_t expected_uid,gid_t expected_gid)1391  static bool expected_uid_gid(int dfd, const char *path, int flags,
1392  			     uid_t expected_uid, gid_t expected_gid)
1393  {
1394  	int ret;
1395  	struct stat st;
1396  
1397  	ret = fstatat(dfd, path, &st, flags);
1398  	if (ret < 0)
1399  		return false;
1400  
1401  	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1402  }
1403  
TEST_F(mount_setattr_idmapped,idmap_mount_tree_invalid)1404  TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1405  {
1406  	int open_tree_fd = -EBADF;
1407  	struct mount_attr attr = {
1408  		.attr_set = MOUNT_ATTR_IDMAP,
1409  	};
1410  
1411  	if (!mount_setattr_supported())
1412  		SKIP(return, "mount_setattr syscall not supported");
1413  
1414  	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1415  	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1416  
1417  	ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV,
1418  			"size=100000,mode=700"), 0);
1419  
1420  	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1421  
1422  	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1423  
1424  	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1425  				     AT_RECURSIVE |
1426  				     AT_EMPTY_PATH |
1427  				     AT_NO_AUTOMOUNT |
1428  				     AT_SYMLINK_NOFOLLOW |
1429  				     OPEN_TREE_CLOEXEC |
1430  				     OPEN_TREE_CLONE);
1431  	ASSERT_GE(open_tree_fd, 0);
1432  
1433  	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1434  	ASSERT_GE(attr.userns_fd, 0);
1435  	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1436  	ASSERT_EQ(close(attr.userns_fd), 0);
1437  	ASSERT_EQ(close(open_tree_fd), 0);
1438  
1439  	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1440  	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1441  	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1442  	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1443  
1444  	(void)umount2("/mnt/A", MNT_DETACH);
1445  }
1446  
TEST_F(mount_setattr,mount_attr_nosymfollow)1447  TEST_F(mount_setattr, mount_attr_nosymfollow)
1448  {
1449  	int fd;
1450  	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1451  	struct mount_attr attr = {
1452  		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1453  	};
1454  
1455  	if (!mount_setattr_supported())
1456  		SKIP(return, "mount_setattr syscall not supported");
1457  
1458  	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1459  	ASSERT_GT(fd, 0);
1460  	ASSERT_EQ(close(fd), 0);
1461  
1462  	old_flags = read_mnt_flags("/mnt/A");
1463  	ASSERT_GT(old_flags, 0);
1464  
1465  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1466  
1467  	expected_flags = old_flags;
1468  	expected_flags |= ST_NOSYMFOLLOW;
1469  
1470  	new_flags = read_mnt_flags("/mnt/A");
1471  	ASSERT_EQ(new_flags, expected_flags);
1472  
1473  	new_flags = read_mnt_flags("/mnt/A/AA");
1474  	ASSERT_EQ(new_flags, expected_flags);
1475  
1476  	new_flags = read_mnt_flags("/mnt/A/AA/B");
1477  	ASSERT_EQ(new_flags, expected_flags);
1478  
1479  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1480  	ASSERT_EQ(new_flags, expected_flags);
1481  
1482  	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1483  	ASSERT_LT(fd, 0);
1484  	ASSERT_EQ(errno, ELOOP);
1485  
1486  	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1487  	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1488  
1489  	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1490  
1491  	expected_flags &= ~ST_NOSYMFOLLOW;
1492  	new_flags = read_mnt_flags("/mnt/A");
1493  	ASSERT_EQ(new_flags, expected_flags);
1494  
1495  	new_flags = read_mnt_flags("/mnt/A/AA");
1496  	ASSERT_EQ(new_flags, expected_flags);
1497  
1498  	new_flags = read_mnt_flags("/mnt/A/AA/B");
1499  	ASSERT_EQ(new_flags, expected_flags);
1500  
1501  	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1502  	ASSERT_EQ(new_flags, expected_flags);
1503  
1504  	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1505  	ASSERT_GT(fd, 0);
1506  	ASSERT_EQ(close(fd), 0);
1507  }
1508  
1509  TEST_HARNESS_MAIN
1510