1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <signal.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <sys/syscall.h>
19 #include <sys/wait.h>
20 #include <unistd.h>
21 #include <ctype.h>
22 
23 #include "common.h"
24 
25 #define MEMFD_STR	"memfd:"
26 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
27 #define SHARED_FT_STR	"(shared file-table)"
28 
29 #define MFD_DEF_SIZE 8192
30 #define STACK_SIZE 65536
31 
32 #define F_SEAL_EXEC	0x0020
33 
34 #define F_WX_SEALS (F_SEAL_SHRINK | \
35 		    F_SEAL_GROW | \
36 		    F_SEAL_WRITE | \
37 		    F_SEAL_FUTURE_WRITE | \
38 		    F_SEAL_EXEC)
39 
40 #define MFD_NOEXEC_SEAL	0x0008U
41 
42 /*
43  * Default is not to test hugetlbfs
44  */
45 static size_t mfd_def_size = MFD_DEF_SIZE;
46 static const char *memfd_str = MEMFD_STR;
47 
fd2name(int fd,char * buf,size_t bufsize)48 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
49 {
50 	char buf1[PATH_MAX];
51 	int size;
52 	ssize_t nbytes;
53 
54 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
55 	if (size < 0) {
56 		printf("snprintf(%d) failed on %m\n", fd);
57 		abort();
58 	}
59 
60 	/*
61 	 * reserver one byte for string termination.
62 	 */
63 	nbytes = readlink(buf1, buf, bufsize-1);
64 	if (nbytes == -1) {
65 		printf("readlink(%s) failed %m\n", buf1);
66 		abort();
67 	}
68 	buf[nbytes] = '\0';
69 	return nbytes;
70 }
71 
mfd_assert_new(const char * name,loff_t sz,unsigned int flags)72 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
73 {
74 	int r, fd;
75 
76 	fd = sys_memfd_create(name, flags);
77 	if (fd < 0) {
78 		printf("memfd_create(\"%s\", %u) failed: %m\n",
79 		       name, flags);
80 		abort();
81 	}
82 
83 	r = ftruncate(fd, sz);
84 	if (r < 0) {
85 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
86 		abort();
87 	}
88 
89 	return fd;
90 }
91 
sysctl_assert_write(const char * val)92 static void sysctl_assert_write(const char *val)
93 {
94 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
95 
96 	if (fd < 0) {
97 		printf("open sysctl failed: %m\n");
98 		abort();
99 	}
100 
101 	if (write(fd, val, strlen(val)) < 0) {
102 		printf("write sysctl %s failed: %m\n", val);
103 		abort();
104 	}
105 }
106 
sysctl_fail_write(const char * val)107 static void sysctl_fail_write(const char *val)
108 {
109 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
110 
111 	if (fd < 0) {
112 		printf("open sysctl failed: %m\n");
113 		abort();
114 	}
115 
116 	if (write(fd, val, strlen(val)) >= 0) {
117 		printf("write sysctl %s succeeded, but failure expected\n",
118 				val);
119 		abort();
120 	}
121 }
122 
sysctl_assert_equal(const char * val)123 static void sysctl_assert_equal(const char *val)
124 {
125 	char *p, buf[128] = {};
126 	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
127 
128 	if (fd < 0) {
129 		printf("open sysctl failed: %m\n");
130 		abort();
131 	}
132 
133 	if (read(fd, buf, sizeof(buf)) < 0) {
134 		printf("read sysctl failed: %m\n");
135 		abort();
136 	}
137 
138 	/* Strip trailing whitespace. */
139 	p = buf;
140 	while (!isspace(*p))
141 		p++;
142 	*p = '\0';
143 
144 	if (strcmp(buf, val) != 0) {
145 		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
146 		abort();
147 	}
148 }
149 
mfd_assert_reopen_fd(int fd_in)150 static int mfd_assert_reopen_fd(int fd_in)
151 {
152 	int fd;
153 	char path[100];
154 
155 	sprintf(path, "/proc/self/fd/%d", fd_in);
156 
157 	fd = open(path, O_RDWR);
158 	if (fd < 0) {
159 		printf("re-open of existing fd %d failed\n", fd_in);
160 		abort();
161 	}
162 
163 	return fd;
164 }
165 
mfd_fail_new(const char * name,unsigned int flags)166 static void mfd_fail_new(const char *name, unsigned int flags)
167 {
168 	int r;
169 
170 	r = sys_memfd_create(name, flags);
171 	if (r >= 0) {
172 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
173 		       name, flags);
174 		close(r);
175 		abort();
176 	}
177 }
178 
mfd_assert_get_seals(int fd)179 static unsigned int mfd_assert_get_seals(int fd)
180 {
181 	int r;
182 
183 	r = fcntl(fd, F_GET_SEALS);
184 	if (r < 0) {
185 		printf("GET_SEALS(%d) failed: %m\n", fd);
186 		abort();
187 	}
188 
189 	return (unsigned int)r;
190 }
191 
mfd_assert_has_seals(int fd,unsigned int seals)192 static void mfd_assert_has_seals(int fd, unsigned int seals)
193 {
194 	char buf[PATH_MAX];
195 	unsigned int s;
196 	fd2name(fd, buf, PATH_MAX);
197 
198 	s = mfd_assert_get_seals(fd);
199 	if (s != seals) {
200 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
201 		abort();
202 	}
203 }
204 
mfd_assert_add_seals(int fd,unsigned int seals)205 static void mfd_assert_add_seals(int fd, unsigned int seals)
206 {
207 	int r;
208 	unsigned int s;
209 
210 	s = mfd_assert_get_seals(fd);
211 	r = fcntl(fd, F_ADD_SEALS, seals);
212 	if (r < 0) {
213 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
214 		abort();
215 	}
216 }
217 
mfd_fail_add_seals(int fd,unsigned int seals)218 static void mfd_fail_add_seals(int fd, unsigned int seals)
219 {
220 	int r;
221 	unsigned int s;
222 
223 	r = fcntl(fd, F_GET_SEALS);
224 	if (r < 0)
225 		s = 0;
226 	else
227 		s = (unsigned int)r;
228 
229 	r = fcntl(fd, F_ADD_SEALS, seals);
230 	if (r >= 0) {
231 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
232 				fd, s, seals);
233 		abort();
234 	}
235 }
236 
mfd_assert_size(int fd,size_t size)237 static void mfd_assert_size(int fd, size_t size)
238 {
239 	struct stat st;
240 	int r;
241 
242 	r = fstat(fd, &st);
243 	if (r < 0) {
244 		printf("fstat(%d) failed: %m\n", fd);
245 		abort();
246 	} else if (st.st_size != size) {
247 		printf("wrong file size %lld, but expected %lld\n",
248 		       (long long)st.st_size, (long long)size);
249 		abort();
250 	}
251 }
252 
mfd_assert_dup(int fd)253 static int mfd_assert_dup(int fd)
254 {
255 	int r;
256 
257 	r = dup(fd);
258 	if (r < 0) {
259 		printf("dup(%d) failed: %m\n", fd);
260 		abort();
261 	}
262 
263 	return r;
264 }
265 
mfd_assert_mmap_shared(int fd)266 static void *mfd_assert_mmap_shared(int fd)
267 {
268 	void *p;
269 
270 	p = mmap(NULL,
271 		 mfd_def_size,
272 		 PROT_READ | PROT_WRITE,
273 		 MAP_SHARED,
274 		 fd,
275 		 0);
276 	if (p == MAP_FAILED) {
277 		printf("mmap() failed: %m\n");
278 		abort();
279 	}
280 
281 	return p;
282 }
283 
mfd_assert_mmap_private(int fd)284 static void *mfd_assert_mmap_private(int fd)
285 {
286 	void *p;
287 
288 	p = mmap(NULL,
289 		 mfd_def_size,
290 		 PROT_READ,
291 		 MAP_PRIVATE,
292 		 fd,
293 		 0);
294 	if (p == MAP_FAILED) {
295 		printf("mmap() failed: %m\n");
296 		abort();
297 	}
298 
299 	return p;
300 }
301 
mfd_assert_open(int fd,int flags,mode_t mode)302 static int mfd_assert_open(int fd, int flags, mode_t mode)
303 {
304 	char buf[512];
305 	int r;
306 
307 	sprintf(buf, "/proc/self/fd/%d", fd);
308 	r = open(buf, flags, mode);
309 	if (r < 0) {
310 		printf("open(%s) failed: %m\n", buf);
311 		abort();
312 	}
313 
314 	return r;
315 }
316 
mfd_fail_open(int fd,int flags,mode_t mode)317 static void mfd_fail_open(int fd, int flags, mode_t mode)
318 {
319 	char buf[512];
320 	int r;
321 
322 	sprintf(buf, "/proc/self/fd/%d", fd);
323 	r = open(buf, flags, mode);
324 	if (r >= 0) {
325 		printf("open(%s) didn't fail as expected\n", buf);
326 		abort();
327 	}
328 }
329 
mfd_assert_read(int fd)330 static void mfd_assert_read(int fd)
331 {
332 	char buf[16];
333 	void *p;
334 	ssize_t l;
335 
336 	l = read(fd, buf, sizeof(buf));
337 	if (l != sizeof(buf)) {
338 		printf("read() failed: %m\n");
339 		abort();
340 	}
341 
342 	/* verify PROT_READ *is* allowed */
343 	p = mmap(NULL,
344 		 mfd_def_size,
345 		 PROT_READ,
346 		 MAP_PRIVATE,
347 		 fd,
348 		 0);
349 	if (p == MAP_FAILED) {
350 		printf("mmap() failed: %m\n");
351 		abort();
352 	}
353 	munmap(p, mfd_def_size);
354 
355 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
356 	p = mmap(NULL,
357 		 mfd_def_size,
358 		 PROT_READ | PROT_WRITE,
359 		 MAP_PRIVATE,
360 		 fd,
361 		 0);
362 	if (p == MAP_FAILED) {
363 		printf("mmap() failed: %m\n");
364 		abort();
365 	}
366 	munmap(p, mfd_def_size);
367 }
368 
369 /* Test that PROT_READ + MAP_SHARED mappings work. */
mfd_assert_read_shared(int fd)370 static void mfd_assert_read_shared(int fd)
371 {
372 	void *p;
373 
374 	/* verify PROT_READ and MAP_SHARED *is* allowed */
375 	p = mmap(NULL,
376 		 mfd_def_size,
377 		 PROT_READ,
378 		 MAP_SHARED,
379 		 fd,
380 		 0);
381 	if (p == MAP_FAILED) {
382 		printf("mmap() failed: %m\n");
383 		abort();
384 	}
385 	munmap(p, mfd_def_size);
386 }
387 
mfd_assert_fork_private_write(int fd)388 static void mfd_assert_fork_private_write(int fd)
389 {
390 	int *p;
391 	pid_t pid;
392 
393 	p = mmap(NULL,
394 		 mfd_def_size,
395 		 PROT_READ | PROT_WRITE,
396 		 MAP_PRIVATE,
397 		 fd,
398 		 0);
399 	if (p == MAP_FAILED) {
400 		printf("mmap() failed: %m\n");
401 		abort();
402 	}
403 
404 	p[0] = 22;
405 
406 	pid = fork();
407 	if (pid == 0) {
408 		p[0] = 33;
409 		exit(0);
410 	} else {
411 		waitpid(pid, NULL, 0);
412 
413 		if (p[0] != 22) {
414 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
415 			abort();
416 		}
417 	}
418 
419 	munmap(p, mfd_def_size);
420 }
421 
mfd_assert_write(int fd)422 static void mfd_assert_write(int fd)
423 {
424 	ssize_t l;
425 	void *p;
426 	int r;
427 
428 	/*
429 	 * huegtlbfs does not support write, but we want to
430 	 * verify everything else here.
431 	 */
432 	if (!hugetlbfs_test) {
433 		/* verify write() succeeds */
434 		l = write(fd, "\0\0\0\0", 4);
435 		if (l != 4) {
436 			printf("write() failed: %m\n");
437 			abort();
438 		}
439 	}
440 
441 	/* verify PROT_READ | PROT_WRITE is allowed */
442 	p = mmap(NULL,
443 		 mfd_def_size,
444 		 PROT_READ | PROT_WRITE,
445 		 MAP_SHARED,
446 		 fd,
447 		 0);
448 	if (p == MAP_FAILED) {
449 		printf("mmap() failed: %m\n");
450 		abort();
451 	}
452 	*(char *)p = 0;
453 	munmap(p, mfd_def_size);
454 
455 	/* verify PROT_WRITE is allowed */
456 	p = mmap(NULL,
457 		 mfd_def_size,
458 		 PROT_WRITE,
459 		 MAP_SHARED,
460 		 fd,
461 		 0);
462 	if (p == MAP_FAILED) {
463 		printf("mmap() failed: %m\n");
464 		abort();
465 	}
466 	*(char *)p = 0;
467 	munmap(p, mfd_def_size);
468 
469 	/* verify PROT_READ with MAP_SHARED is allowed and a following
470 	 * mprotect(PROT_WRITE) allows writing */
471 	p = mmap(NULL,
472 		 mfd_def_size,
473 		 PROT_READ,
474 		 MAP_SHARED,
475 		 fd,
476 		 0);
477 	if (p == MAP_FAILED) {
478 		printf("mmap() failed: %m\n");
479 		abort();
480 	}
481 
482 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
483 	if (r < 0) {
484 		printf("mprotect() failed: %m\n");
485 		abort();
486 	}
487 
488 	*(char *)p = 0;
489 	munmap(p, mfd_def_size);
490 
491 	/* verify PUNCH_HOLE works */
492 	r = fallocate(fd,
493 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
494 		      0,
495 		      mfd_def_size);
496 	if (r < 0) {
497 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
498 		abort();
499 	}
500 }
501 
mfd_fail_write(int fd)502 static void mfd_fail_write(int fd)
503 {
504 	ssize_t l;
505 	void *p;
506 	int r;
507 
508 	/* verify write() fails */
509 	l = write(fd, "data", 4);
510 	if (l != -EPERM) {
511 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
512 		abort();
513 	}
514 
515 	/* verify PROT_READ | PROT_WRITE is not allowed */
516 	p = mmap(NULL,
517 		 mfd_def_size,
518 		 PROT_READ | PROT_WRITE,
519 		 MAP_SHARED,
520 		 fd,
521 		 0);
522 	if (p != MAP_FAILED) {
523 		printf("mmap() didn't fail as expected\n");
524 		abort();
525 	}
526 
527 	/* verify PROT_WRITE is not allowed */
528 	p = mmap(NULL,
529 		 mfd_def_size,
530 		 PROT_WRITE,
531 		 MAP_SHARED,
532 		 fd,
533 		 0);
534 	if (p != MAP_FAILED) {
535 		printf("mmap() didn't fail as expected\n");
536 		abort();
537 	}
538 
539 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
540 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
541 	p = mmap(NULL,
542 		 mfd_def_size,
543 		 PROT_READ,
544 		 MAP_SHARED,
545 		 fd,
546 		 0);
547 	if (p != MAP_FAILED) {
548 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
549 		if (r >= 0) {
550 			printf("mmap()+mprotect() didn't fail as expected\n");
551 			abort();
552 		}
553 		munmap(p, mfd_def_size);
554 	}
555 
556 	/* verify PUNCH_HOLE fails */
557 	r = fallocate(fd,
558 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
559 		      0,
560 		      mfd_def_size);
561 	if (r >= 0) {
562 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
563 		abort();
564 	}
565 }
566 
mfd_assert_shrink(int fd)567 static void mfd_assert_shrink(int fd)
568 {
569 	int r, fd2;
570 
571 	r = ftruncate(fd, mfd_def_size / 2);
572 	if (r < 0) {
573 		printf("ftruncate(SHRINK) failed: %m\n");
574 		abort();
575 	}
576 
577 	mfd_assert_size(fd, mfd_def_size / 2);
578 
579 	fd2 = mfd_assert_open(fd,
580 			      O_RDWR | O_CREAT | O_TRUNC,
581 			      S_IRUSR | S_IWUSR);
582 	close(fd2);
583 
584 	mfd_assert_size(fd, 0);
585 }
586 
mfd_fail_shrink(int fd)587 static void mfd_fail_shrink(int fd)
588 {
589 	int r;
590 
591 	r = ftruncate(fd, mfd_def_size / 2);
592 	if (r >= 0) {
593 		printf("ftruncate(SHRINK) didn't fail as expected\n");
594 		abort();
595 	}
596 
597 	mfd_fail_open(fd,
598 		      O_RDWR | O_CREAT | O_TRUNC,
599 		      S_IRUSR | S_IWUSR);
600 }
601 
mfd_assert_grow(int fd)602 static void mfd_assert_grow(int fd)
603 {
604 	int r;
605 
606 	r = ftruncate(fd, mfd_def_size * 2);
607 	if (r < 0) {
608 		printf("ftruncate(GROW) failed: %m\n");
609 		abort();
610 	}
611 
612 	mfd_assert_size(fd, mfd_def_size * 2);
613 
614 	r = fallocate(fd,
615 		      0,
616 		      0,
617 		      mfd_def_size * 4);
618 	if (r < 0) {
619 		printf("fallocate(ALLOC) failed: %m\n");
620 		abort();
621 	}
622 
623 	mfd_assert_size(fd, mfd_def_size * 4);
624 }
625 
mfd_fail_grow(int fd)626 static void mfd_fail_grow(int fd)
627 {
628 	int r;
629 
630 	r = ftruncate(fd, mfd_def_size * 2);
631 	if (r >= 0) {
632 		printf("ftruncate(GROW) didn't fail as expected\n");
633 		abort();
634 	}
635 
636 	r = fallocate(fd,
637 		      0,
638 		      0,
639 		      mfd_def_size * 4);
640 	if (r >= 0) {
641 		printf("fallocate(ALLOC) didn't fail as expected\n");
642 		abort();
643 	}
644 }
645 
mfd_assert_grow_write(int fd)646 static void mfd_assert_grow_write(int fd)
647 {
648 	static char *buf;
649 	ssize_t l;
650 
651 	/* hugetlbfs does not support write */
652 	if (hugetlbfs_test)
653 		return;
654 
655 	buf = malloc(mfd_def_size * 8);
656 	if (!buf) {
657 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
658 		abort();
659 	}
660 
661 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
662 	if (l != (mfd_def_size * 8)) {
663 		printf("pwrite() failed: %m\n");
664 		abort();
665 	}
666 
667 	mfd_assert_size(fd, mfd_def_size * 8);
668 }
669 
mfd_fail_grow_write(int fd)670 static void mfd_fail_grow_write(int fd)
671 {
672 	static char *buf;
673 	ssize_t l;
674 
675 	/* hugetlbfs does not support write */
676 	if (hugetlbfs_test)
677 		return;
678 
679 	buf = malloc(mfd_def_size * 8);
680 	if (!buf) {
681 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
682 		abort();
683 	}
684 
685 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
686 	if (l == (mfd_def_size * 8)) {
687 		printf("pwrite() didn't fail as expected\n");
688 		abort();
689 	}
690 }
691 
mfd_assert_mode(int fd,int mode)692 static void mfd_assert_mode(int fd, int mode)
693 {
694 	struct stat st;
695 	char buf[PATH_MAX];
696 
697 	fd2name(fd, buf, PATH_MAX);
698 
699 	if (fstat(fd, &st) < 0) {
700 		printf("fstat(%s) failed: %m\n", buf);
701 		abort();
702 	}
703 
704 	if ((st.st_mode & 07777) != mode) {
705 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
706 		       buf, (int)st.st_mode & 07777, mode);
707 		abort();
708 	}
709 }
710 
mfd_assert_chmod(int fd,int mode)711 static void mfd_assert_chmod(int fd, int mode)
712 {
713 	char buf[PATH_MAX];
714 
715 	fd2name(fd, buf, PATH_MAX);
716 
717 	if (fchmod(fd, mode) < 0) {
718 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
719 		abort();
720 	}
721 
722 	mfd_assert_mode(fd, mode);
723 }
724 
mfd_fail_chmod(int fd,int mode)725 static void mfd_fail_chmod(int fd, int mode)
726 {
727 	struct stat st;
728 	char buf[PATH_MAX];
729 
730 	fd2name(fd, buf, PATH_MAX);
731 
732 	if (fstat(fd, &st) < 0) {
733 		printf("fstat(%s) failed: %m\n", buf);
734 		abort();
735 	}
736 
737 	if (fchmod(fd, mode) == 0) {
738 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
739 		       buf, mode);
740 		abort();
741 	}
742 
743 	/* verify that file mode bits did not change */
744 	mfd_assert_mode(fd, st.st_mode & 07777);
745 }
746 
idle_thread_fn(void * arg)747 static int idle_thread_fn(void *arg)
748 {
749 	sigset_t set;
750 	int sig;
751 
752 	/* dummy waiter; SIGTERM terminates us anyway */
753 	sigemptyset(&set);
754 	sigaddset(&set, SIGTERM);
755 	sigwait(&set, &sig);
756 
757 	return 0;
758 }
759 
spawn_thread(unsigned int flags,int (* fn)(void *),void * arg)760 static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
761 {
762 	uint8_t *stack;
763 	pid_t pid;
764 
765 	stack = malloc(STACK_SIZE);
766 	if (!stack) {
767 		printf("malloc(STACK_SIZE) failed: %m\n");
768 		abort();
769 	}
770 
771 	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
772 	if (pid < 0) {
773 		printf("clone() failed: %m\n");
774 		abort();
775 	}
776 
777 	return pid;
778 }
779 
join_thread(pid_t pid)780 static void join_thread(pid_t pid)
781 {
782 	int wstatus;
783 
784 	if (waitpid(pid, &wstatus, 0) < 0) {
785 		printf("newpid thread: waitpid() failed: %m\n");
786 		abort();
787 	}
788 
789 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
790 		printf("newpid thread: exited with non-zero error code %d\n",
791 		       WEXITSTATUS(wstatus));
792 		abort();
793 	}
794 
795 	if (WIFSIGNALED(wstatus)) {
796 		printf("newpid thread: killed by signal %d\n",
797 		       WTERMSIG(wstatus));
798 		abort();
799 	}
800 }
801 
spawn_idle_thread(unsigned int flags)802 static pid_t spawn_idle_thread(unsigned int flags)
803 {
804 	return spawn_thread(flags, idle_thread_fn, NULL);
805 }
806 
join_idle_thread(pid_t pid)807 static void join_idle_thread(pid_t pid)
808 {
809 	kill(pid, SIGTERM);
810 	waitpid(pid, NULL, 0);
811 }
812 
813 /*
814  * Test memfd_create() syscall
815  * Verify syscall-argument validation, including name checks, flag validation
816  * and more.
817  */
test_create(void)818 static void test_create(void)
819 {
820 	char buf[2048];
821 	int fd;
822 
823 	printf("%s CREATE\n", memfd_str);
824 
825 	/* test NULL name */
826 	mfd_fail_new(NULL, 0);
827 
828 	/* test over-long name (not zero-terminated) */
829 	memset(buf, 0xff, sizeof(buf));
830 	mfd_fail_new(buf, 0);
831 
832 	/* test over-long zero-terminated name */
833 	memset(buf, 0xff, sizeof(buf));
834 	buf[sizeof(buf) - 1] = 0;
835 	mfd_fail_new(buf, 0);
836 
837 	/* verify "" is a valid name */
838 	fd = mfd_assert_new("", 0, 0);
839 	close(fd);
840 
841 	/* verify invalid O_* open flags */
842 	mfd_fail_new("", 0x0100);
843 	mfd_fail_new("", ~MFD_CLOEXEC);
844 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
845 	mfd_fail_new("", ~0);
846 	mfd_fail_new("", 0x80000000U);
847 
848 	/* verify EXEC and NOEXEC_SEAL can't both be set */
849 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
850 
851 	/* verify MFD_CLOEXEC is allowed */
852 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
853 	close(fd);
854 
855 	/* verify MFD_ALLOW_SEALING is allowed */
856 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
857 	close(fd);
858 
859 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
860 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
861 	close(fd);
862 }
863 
864 /*
865  * Test basic sealing
866  * A very basic sealing test to see whether setting/retrieving seals works.
867  */
test_basic(void)868 static void test_basic(void)
869 {
870 	int fd;
871 
872 	printf("%s BASIC\n", memfd_str);
873 
874 	fd = mfd_assert_new("kern_memfd_basic",
875 			    mfd_def_size,
876 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
877 
878 	/* add basic seals */
879 	mfd_assert_has_seals(fd, 0);
880 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
881 				 F_SEAL_WRITE);
882 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
883 				 F_SEAL_WRITE);
884 
885 	/* add them again */
886 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
887 				 F_SEAL_WRITE);
888 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
889 				 F_SEAL_WRITE);
890 
891 	/* add more seals and seal against sealing */
892 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
893 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
894 				 F_SEAL_GROW |
895 				 F_SEAL_WRITE |
896 				 F_SEAL_SEAL);
897 
898 	/* verify that sealing no longer works */
899 	mfd_fail_add_seals(fd, F_SEAL_GROW);
900 	mfd_fail_add_seals(fd, 0);
901 
902 	close(fd);
903 
904 	/* verify sealing does not work without MFD_ALLOW_SEALING */
905 	fd = mfd_assert_new("kern_memfd_basic",
906 			    mfd_def_size,
907 			    MFD_CLOEXEC);
908 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
909 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
910 			       F_SEAL_GROW |
911 			       F_SEAL_WRITE);
912 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
913 	close(fd);
914 }
915 
916 /*
917  * Test SEAL_WRITE
918  * Test whether SEAL_WRITE actually prevents modifications.
919  */
test_seal_write(void)920 static void test_seal_write(void)
921 {
922 	int fd;
923 
924 	printf("%s SEAL-WRITE\n", memfd_str);
925 
926 	fd = mfd_assert_new("kern_memfd_seal_write",
927 			    mfd_def_size,
928 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
929 	mfd_assert_has_seals(fd, 0);
930 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
931 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
932 
933 	mfd_assert_read(fd);
934 	mfd_fail_write(fd);
935 	mfd_assert_shrink(fd);
936 	mfd_assert_grow(fd);
937 	mfd_fail_grow_write(fd);
938 
939 	close(fd);
940 }
941 
942 /*
943  * Test SEAL_FUTURE_WRITE
944  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
945  */
test_seal_future_write(void)946 static void test_seal_future_write(void)
947 {
948 	int fd, fd2;
949 	void *p;
950 
951 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
952 
953 	fd = mfd_assert_new("kern_memfd_seal_future_write",
954 			    mfd_def_size,
955 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
956 
957 	p = mfd_assert_mmap_shared(fd);
958 
959 	mfd_assert_has_seals(fd, 0);
960 
961 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
962 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
963 
964 	/* read should pass, writes should fail */
965 	mfd_assert_read(fd);
966 	mfd_assert_read_shared(fd);
967 	mfd_fail_write(fd);
968 
969 	fd2 = mfd_assert_reopen_fd(fd);
970 	/* read should pass, writes should still fail */
971 	mfd_assert_read(fd2);
972 	mfd_assert_read_shared(fd2);
973 	mfd_fail_write(fd2);
974 
975 	mfd_assert_fork_private_write(fd);
976 
977 	munmap(p, mfd_def_size);
978 	close(fd2);
979 	close(fd);
980 }
981 
982 /*
983  * Test SEAL_SHRINK
984  * Test whether SEAL_SHRINK actually prevents shrinking
985  */
test_seal_shrink(void)986 static void test_seal_shrink(void)
987 {
988 	int fd;
989 
990 	printf("%s SEAL-SHRINK\n", memfd_str);
991 
992 	fd = mfd_assert_new("kern_memfd_seal_shrink",
993 			    mfd_def_size,
994 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
995 	mfd_assert_has_seals(fd, 0);
996 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
997 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
998 
999 	mfd_assert_read(fd);
1000 	mfd_assert_write(fd);
1001 	mfd_fail_shrink(fd);
1002 	mfd_assert_grow(fd);
1003 	mfd_assert_grow_write(fd);
1004 
1005 	close(fd);
1006 }
1007 
1008 /*
1009  * Test SEAL_GROW
1010  * Test whether SEAL_GROW actually prevents growing
1011  */
test_seal_grow(void)1012 static void test_seal_grow(void)
1013 {
1014 	int fd;
1015 
1016 	printf("%s SEAL-GROW\n", memfd_str);
1017 
1018 	fd = mfd_assert_new("kern_memfd_seal_grow",
1019 			    mfd_def_size,
1020 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1021 	mfd_assert_has_seals(fd, 0);
1022 	mfd_assert_add_seals(fd, F_SEAL_GROW);
1023 	mfd_assert_has_seals(fd, F_SEAL_GROW);
1024 
1025 	mfd_assert_read(fd);
1026 	mfd_assert_write(fd);
1027 	mfd_assert_shrink(fd);
1028 	mfd_fail_grow(fd);
1029 	mfd_fail_grow_write(fd);
1030 
1031 	close(fd);
1032 }
1033 
1034 /*
1035  * Test SEAL_SHRINK | SEAL_GROW
1036  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1037  */
test_seal_resize(void)1038 static void test_seal_resize(void)
1039 {
1040 	int fd;
1041 
1042 	printf("%s SEAL-RESIZE\n", memfd_str);
1043 
1044 	fd = mfd_assert_new("kern_memfd_seal_resize",
1045 			    mfd_def_size,
1046 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1047 	mfd_assert_has_seals(fd, 0);
1048 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1049 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1050 
1051 	mfd_assert_read(fd);
1052 	mfd_assert_write(fd);
1053 	mfd_fail_shrink(fd);
1054 	mfd_fail_grow(fd);
1055 	mfd_fail_grow_write(fd);
1056 
1057 	close(fd);
1058 }
1059 
1060 /*
1061  * Test SEAL_EXEC
1062  * Test fd is created with exec and allow sealing.
1063  * chmod() cannot change x bits after sealing.
1064  */
test_exec_seal(void)1065 static void test_exec_seal(void)
1066 {
1067 	int fd;
1068 
1069 	printf("%s SEAL-EXEC\n", memfd_str);
1070 
1071 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1072 	fd = mfd_assert_new("kern_memfd_seal_exec",
1073 			    mfd_def_size,
1074 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1075 
1076 	mfd_assert_mode(fd, 0777);
1077 	mfd_assert_chmod(fd, 0644);
1078 
1079 	mfd_assert_has_seals(fd, 0);
1080 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1081 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1082 
1083 	mfd_assert_chmod(fd, 0600);
1084 	mfd_fail_chmod(fd, 0777);
1085 	mfd_fail_chmod(fd, 0670);
1086 	mfd_fail_chmod(fd, 0605);
1087 	mfd_fail_chmod(fd, 0700);
1088 	mfd_fail_chmod(fd, 0100);
1089 	mfd_assert_chmod(fd, 0666);
1090 	mfd_assert_write(fd);
1091 	close(fd);
1092 
1093 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1094 	fd = mfd_assert_new("kern_memfd_seal_exec",
1095 			    mfd_def_size,
1096 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1097 
1098 	mfd_assert_mode(fd, 0777);
1099 	mfd_assert_chmod(fd, 0700);
1100 
1101 	mfd_assert_has_seals(fd, 0);
1102 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1103 	mfd_assert_has_seals(fd, F_WX_SEALS);
1104 
1105 	mfd_fail_chmod(fd, 0711);
1106 	mfd_fail_chmod(fd, 0600);
1107 	mfd_fail_write(fd);
1108 	close(fd);
1109 }
1110 
1111 /*
1112  * Test EXEC_NO_SEAL
1113  * Test fd is created with exec and not allow sealing.
1114  */
test_exec_no_seal(void)1115 static void test_exec_no_seal(void)
1116 {
1117 	int fd;
1118 
1119 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1120 
1121 	/* Create with EXEC but without ALLOW_SEALING */
1122 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1123 			    mfd_def_size,
1124 			    MFD_CLOEXEC | MFD_EXEC);
1125 	mfd_assert_mode(fd, 0777);
1126 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1127 	mfd_assert_chmod(fd, 0666);
1128 	close(fd);
1129 }
1130 
1131 /*
1132  * Test memfd_create with MFD_NOEXEC flag
1133  */
test_noexec_seal(void)1134 static void test_noexec_seal(void)
1135 {
1136 	int fd;
1137 
1138 	printf("%s NOEXEC_SEAL\n", memfd_str);
1139 
1140 	/* Create with NOEXEC and ALLOW_SEALING */
1141 	fd = mfd_assert_new("kern_memfd_noexec",
1142 			    mfd_def_size,
1143 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1144 	mfd_assert_mode(fd, 0666);
1145 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1146 	mfd_fail_chmod(fd, 0777);
1147 	close(fd);
1148 
1149 	/* Create with NOEXEC but without ALLOW_SEALING */
1150 	fd = mfd_assert_new("kern_memfd_noexec",
1151 			    mfd_def_size,
1152 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1153 	mfd_assert_mode(fd, 0666);
1154 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1155 	mfd_fail_chmod(fd, 0777);
1156 	close(fd);
1157 }
1158 
test_sysctl_sysctl0(void)1159 static void test_sysctl_sysctl0(void)
1160 {
1161 	int fd;
1162 
1163 	sysctl_assert_equal("0");
1164 
1165 	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1166 			    mfd_def_size,
1167 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1168 	mfd_assert_mode(fd, 0777);
1169 	mfd_assert_has_seals(fd, 0);
1170 	mfd_assert_chmod(fd, 0644);
1171 	close(fd);
1172 }
1173 
test_sysctl_set_sysctl0(void)1174 static void test_sysctl_set_sysctl0(void)
1175 {
1176 	sysctl_assert_write("0");
1177 	test_sysctl_sysctl0();
1178 }
1179 
test_sysctl_sysctl1(void)1180 static void test_sysctl_sysctl1(void)
1181 {
1182 	int fd;
1183 
1184 	sysctl_assert_equal("1");
1185 
1186 	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1187 			    mfd_def_size,
1188 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1189 	mfd_assert_mode(fd, 0666);
1190 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1191 	mfd_fail_chmod(fd, 0777);
1192 	close(fd);
1193 
1194 	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1195 			    mfd_def_size,
1196 			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1197 	mfd_assert_mode(fd, 0777);
1198 	mfd_assert_has_seals(fd, 0);
1199 	mfd_assert_chmod(fd, 0644);
1200 	close(fd);
1201 
1202 	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1203 			    mfd_def_size,
1204 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1205 	mfd_assert_mode(fd, 0666);
1206 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1207 	mfd_fail_chmod(fd, 0777);
1208 	close(fd);
1209 }
1210 
test_sysctl_set_sysctl1(void)1211 static void test_sysctl_set_sysctl1(void)
1212 {
1213 	sysctl_assert_write("1");
1214 	test_sysctl_sysctl1();
1215 }
1216 
test_sysctl_sysctl2(void)1217 static void test_sysctl_sysctl2(void)
1218 {
1219 	int fd;
1220 
1221 	sysctl_assert_equal("2");
1222 
1223 	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1224 			    mfd_def_size,
1225 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1226 	mfd_assert_mode(fd, 0666);
1227 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1228 	mfd_fail_chmod(fd, 0777);
1229 	close(fd);
1230 
1231 	mfd_fail_new("kern_memfd_sysctl_2_exec",
1232 		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1233 
1234 	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1235 			    mfd_def_size,
1236 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1237 	mfd_assert_mode(fd, 0666);
1238 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1239 	mfd_fail_chmod(fd, 0777);
1240 	close(fd);
1241 }
1242 
test_sysctl_set_sysctl2(void)1243 static void test_sysctl_set_sysctl2(void)
1244 {
1245 	sysctl_assert_write("2");
1246 	test_sysctl_sysctl2();
1247 }
1248 
sysctl_simple_child(void * arg)1249 static int sysctl_simple_child(void *arg)
1250 {
1251 	printf("%s sysctl 0\n", memfd_str);
1252 	test_sysctl_set_sysctl0();
1253 
1254 	printf("%s sysctl 1\n", memfd_str);
1255 	test_sysctl_set_sysctl1();
1256 
1257 	printf("%s sysctl 0\n", memfd_str);
1258 	test_sysctl_set_sysctl0();
1259 
1260 	printf("%s sysctl 2\n", memfd_str);
1261 	test_sysctl_set_sysctl2();
1262 
1263 	printf("%s sysctl 1\n", memfd_str);
1264 	test_sysctl_set_sysctl1();
1265 
1266 	printf("%s sysctl 0\n", memfd_str);
1267 	test_sysctl_set_sysctl0();
1268 
1269 	return 0;
1270 }
1271 
1272 /*
1273  * Test sysctl
1274  * A very basic test to make sure the core sysctl semantics work.
1275  */
test_sysctl_simple(void)1276 static void test_sysctl_simple(void)
1277 {
1278 	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1279 
1280 	join_thread(pid);
1281 }
1282 
sysctl_nested(void * arg)1283 static int sysctl_nested(void *arg)
1284 {
1285 	void (*fn)(void) = arg;
1286 
1287 	fn();
1288 	return 0;
1289 }
1290 
sysctl_nested_wait(void * arg)1291 static int sysctl_nested_wait(void *arg)
1292 {
1293 	/* Wait for a SIGCONT. */
1294 	kill(getpid(), SIGSTOP);
1295 	return sysctl_nested(arg);
1296 }
1297 
test_sysctl_sysctl1_failset(void)1298 static void test_sysctl_sysctl1_failset(void)
1299 {
1300 	sysctl_fail_write("0");
1301 	test_sysctl_sysctl1();
1302 }
1303 
test_sysctl_sysctl2_failset(void)1304 static void test_sysctl_sysctl2_failset(void)
1305 {
1306 	sysctl_fail_write("1");
1307 	test_sysctl_sysctl2();
1308 
1309 	sysctl_fail_write("0");
1310 	test_sysctl_sysctl2();
1311 }
1312 
sysctl_nested_child(void * arg)1313 static int sysctl_nested_child(void *arg)
1314 {
1315 	int pid;
1316 
1317 	printf("%s nested sysctl 0\n", memfd_str);
1318 	sysctl_assert_write("0");
1319 	/* A further nested pidns works the same. */
1320 	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1321 	join_thread(pid);
1322 
1323 	printf("%s nested sysctl 1\n", memfd_str);
1324 	sysctl_assert_write("1");
1325 	/* Child inherits our setting. */
1326 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1327 	join_thread(pid);
1328 	/* Child cannot raise the setting. */
1329 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1330 			   test_sysctl_sysctl1_failset);
1331 	join_thread(pid);
1332 	/* Child can lower the setting. */
1333 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1334 			   test_sysctl_set_sysctl2);
1335 	join_thread(pid);
1336 	/* Child lowering the setting has no effect on our setting. */
1337 	test_sysctl_sysctl1();
1338 
1339 	printf("%s nested sysctl 2\n", memfd_str);
1340 	sysctl_assert_write("2");
1341 	/* Child inherits our setting. */
1342 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1343 	join_thread(pid);
1344 	/* Child cannot raise the setting. */
1345 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1346 			   test_sysctl_sysctl2_failset);
1347 	join_thread(pid);
1348 
1349 	/* Verify that the rules are actually inherited after fork. */
1350 	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1351 	sysctl_assert_write("0");
1352 
1353 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1354 			   test_sysctl_sysctl1_failset);
1355 	sysctl_assert_write("1");
1356 	kill(pid, SIGCONT);
1357 	join_thread(pid);
1358 
1359 	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1360 	sysctl_assert_write("0");
1361 
1362 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1363 			   test_sysctl_sysctl2_failset);
1364 	sysctl_assert_write("2");
1365 	kill(pid, SIGCONT);
1366 	join_thread(pid);
1367 
1368 	/*
1369 	 * Verify that the current effective setting is saved on fork, meaning
1370 	 * that the parent lowering the sysctl doesn't affect already-forked
1371 	 * children.
1372 	 */
1373 	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1374 	sysctl_assert_write("2");
1375 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1376 			   test_sysctl_sysctl2);
1377 	sysctl_assert_write("1");
1378 	kill(pid, SIGCONT);
1379 	join_thread(pid);
1380 
1381 	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1382 	sysctl_assert_write("2");
1383 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1384 			   test_sysctl_sysctl2);
1385 	sysctl_assert_write("0");
1386 	kill(pid, SIGCONT);
1387 	join_thread(pid);
1388 
1389 	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1390 	sysctl_assert_write("1");
1391 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1392 			   test_sysctl_sysctl1);
1393 	sysctl_assert_write("0");
1394 	kill(pid, SIGCONT);
1395 	join_thread(pid);
1396 
1397 	return 0;
1398 }
1399 
1400 /*
1401  * Test sysctl with nested pid namespaces
1402  * Make sure that the sysctl nesting semantics work correctly.
1403  */
test_sysctl_nested(void)1404 static void test_sysctl_nested(void)
1405 {
1406 	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1407 
1408 	join_thread(pid);
1409 }
1410 
1411 /*
1412  * Test sharing via dup()
1413  * Test that seals are shared between dupped FDs and they're all equal.
1414  */
test_share_dup(char * banner,char * b_suffix)1415 static void test_share_dup(char *banner, char *b_suffix)
1416 {
1417 	int fd, fd2;
1418 
1419 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1420 
1421 	fd = mfd_assert_new("kern_memfd_share_dup",
1422 			    mfd_def_size,
1423 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1424 	mfd_assert_has_seals(fd, 0);
1425 
1426 	fd2 = mfd_assert_dup(fd);
1427 	mfd_assert_has_seals(fd2, 0);
1428 
1429 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1430 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1431 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1432 
1433 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1434 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1435 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1436 
1437 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1438 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1439 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1440 
1441 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1442 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1443 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1444 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1445 
1446 	close(fd2);
1447 
1448 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1449 	close(fd);
1450 }
1451 
1452 /*
1453  * Test sealing with active mmap()s
1454  * Modifying seals is only allowed if no other mmap() refs exist.
1455  */
test_share_mmap(char * banner,char * b_suffix)1456 static void test_share_mmap(char *banner, char *b_suffix)
1457 {
1458 	int fd;
1459 	void *p;
1460 
1461 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1462 
1463 	fd = mfd_assert_new("kern_memfd_share_mmap",
1464 			    mfd_def_size,
1465 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1466 	mfd_assert_has_seals(fd, 0);
1467 
1468 	/* shared/writable ref prevents sealing WRITE, but allows others */
1469 	p = mfd_assert_mmap_shared(fd);
1470 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1471 	mfd_assert_has_seals(fd, 0);
1472 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1473 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1474 	munmap(p, mfd_def_size);
1475 
1476 	/* readable ref allows sealing */
1477 	p = mfd_assert_mmap_private(fd);
1478 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1479 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1480 	munmap(p, mfd_def_size);
1481 
1482 	close(fd);
1483 }
1484 
1485 /*
1486  * Test sealing with open(/proc/self/fd/%d)
1487  * Via /proc we can get access to a separate file-context for the same memfd.
1488  * This is *not* like dup(), but like a real separate open(). Make sure the
1489  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1490  */
test_share_open(char * banner,char * b_suffix)1491 static void test_share_open(char *banner, char *b_suffix)
1492 {
1493 	int fd, fd2;
1494 
1495 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1496 
1497 	fd = mfd_assert_new("kern_memfd_share_open",
1498 			    mfd_def_size,
1499 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1500 	mfd_assert_has_seals(fd, 0);
1501 
1502 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1503 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1504 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1505 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1506 
1507 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1508 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1509 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1510 
1511 	close(fd);
1512 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1513 
1514 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1515 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1516 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1517 
1518 	close(fd2);
1519 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1520 
1521 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1522 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1523 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1524 
1525 	close(fd2);
1526 	close(fd);
1527 }
1528 
1529 /*
1530  * Test sharing via fork()
1531  * Test whether seal-modifications work as expected with forked children.
1532  */
test_share_fork(char * banner,char * b_suffix)1533 static void test_share_fork(char *banner, char *b_suffix)
1534 {
1535 	int fd;
1536 	pid_t pid;
1537 
1538 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1539 
1540 	fd = mfd_assert_new("kern_memfd_share_fork",
1541 			    mfd_def_size,
1542 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1543 	mfd_assert_has_seals(fd, 0);
1544 
1545 	pid = spawn_idle_thread(0);
1546 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1547 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1548 
1549 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1550 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1551 
1552 	join_idle_thread(pid);
1553 
1554 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1555 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1556 
1557 	close(fd);
1558 }
1559 
main(int argc,char ** argv)1560 int main(int argc, char **argv)
1561 {
1562 	pid_t pid;
1563 
1564 	if (argc == 2) {
1565 		if (!strcmp(argv[1], "hugetlbfs")) {
1566 			unsigned long hpage_size = default_huge_page_size();
1567 
1568 			if (!hpage_size) {
1569 				printf("Unable to determine huge page size\n");
1570 				abort();
1571 			}
1572 
1573 			hugetlbfs_test = 1;
1574 			memfd_str = MEMFD_HUGE_STR;
1575 			mfd_def_size = hpage_size * 2;
1576 		} else {
1577 			printf("Unknown option: %s\n", argv[1]);
1578 			abort();
1579 		}
1580 	}
1581 
1582 	test_create();
1583 	test_basic();
1584 	test_exec_seal();
1585 	test_exec_no_seal();
1586 	test_noexec_seal();
1587 
1588 	test_seal_write();
1589 	test_seal_future_write();
1590 	test_seal_shrink();
1591 	test_seal_grow();
1592 	test_seal_resize();
1593 
1594 	test_sysctl_simple();
1595 	test_sysctl_nested();
1596 
1597 	test_share_dup("SHARE-DUP", "");
1598 	test_share_mmap("SHARE-MMAP", "");
1599 	test_share_open("SHARE-OPEN", "");
1600 	test_share_fork("SHARE-FORK", "");
1601 
1602 	/* Run test-suite in a multi-threaded environment with a shared
1603 	 * file-table. */
1604 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1605 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1606 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1607 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1608 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1609 	join_idle_thread(pid);
1610 
1611 	printf("memfd: DONE\n");
1612 
1613 	return 0;
1614 }
1615