1 // SPDX-License-Identifier: GPL-2.0
2
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <linux/kernel.h>
7 #include <limits.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <sys/resource.h>
15 #include <linux/close_range.h>
16
17 #include "../kselftest_harness.h"
18 #include "../clone3/clone3_selftests.h"
19
20
21 #ifndef F_LINUX_SPECIFIC_BASE
22 #define F_LINUX_SPECIFIC_BASE 1024
23 #endif
24
25 #ifndef F_DUPFD_QUERY
26 #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
27 #endif
28
29 #ifndef F_CREATED_QUERY
30 #define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
31 #endif
32
sys_close_range(unsigned int fd,unsigned int max_fd,unsigned int flags)33 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
34 unsigned int flags)
35 {
36 return syscall(__NR_close_range, fd, max_fd, flags);
37 }
38
TEST(core_close_range)39 TEST(core_close_range)
40 {
41 int i, ret;
42 int open_fds[101];
43
44 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
45 int fd;
46
47 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
48 ASSERT_GE(fd, 0) {
49 if (errno == ENOENT)
50 SKIP(return, "Skipping test since /dev/null does not exist");
51 }
52
53 open_fds[i] = fd;
54 }
55
56 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
57 if (errno == ENOSYS)
58 SKIP(return, "close_range() syscall not supported");
59 }
60
61 for (i = 0; i < 100; i++) {
62 ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
63 if (ret < 0) {
64 EXPECT_EQ(errno, EINVAL);
65 } else {
66 EXPECT_EQ(ret, 0);
67 }
68 }
69
70 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
71
72 for (i = 0; i <= 50; i++)
73 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
74
75 for (i = 51; i <= 100; i++)
76 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
77
78 /* create a couple of gaps */
79 close(57);
80 close(78);
81 close(81);
82 close(82);
83 close(84);
84 close(90);
85
86 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
87
88 for (i = 51; i <= 92; i++)
89 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
90
91 for (i = 93; i <= 100; i++)
92 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
93
94 /* test that the kernel caps and still closes all fds */
95 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
96
97 for (i = 93; i <= 99; i++)
98 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
99
100 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
101
102 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
103
104 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
105 }
106
TEST(close_range_unshare)107 TEST(close_range_unshare)
108 {
109 int i, ret, status;
110 pid_t pid;
111 int open_fds[101];
112 struct __clone_args args = {
113 .flags = CLONE_FILES,
114 .exit_signal = SIGCHLD,
115 };
116
117 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
118 int fd;
119
120 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
121 ASSERT_GE(fd, 0) {
122 if (errno == ENOENT)
123 SKIP(return, "Skipping test since /dev/null does not exist");
124 }
125
126 open_fds[i] = fd;
127 }
128
129 pid = sys_clone3(&args, sizeof(args));
130 ASSERT_GE(pid, 0);
131
132 if (pid == 0) {
133 ret = sys_close_range(open_fds[0], open_fds[50],
134 CLOSE_RANGE_UNSHARE);
135 if (ret)
136 exit(EXIT_FAILURE);
137
138 for (i = 0; i <= 50; i++)
139 if (fcntl(open_fds[i], F_GETFL) != -1)
140 exit(EXIT_FAILURE);
141
142 for (i = 51; i <= 100; i++)
143 if (fcntl(open_fds[i], F_GETFL) == -1)
144 exit(EXIT_FAILURE);
145
146 /* create a couple of gaps */
147 close(57);
148 close(78);
149 close(81);
150 close(82);
151 close(84);
152 close(90);
153
154 ret = sys_close_range(open_fds[51], open_fds[92],
155 CLOSE_RANGE_UNSHARE);
156 if (ret)
157 exit(EXIT_FAILURE);
158
159 for (i = 51; i <= 92; i++)
160 if (fcntl(open_fds[i], F_GETFL) != -1)
161 exit(EXIT_FAILURE);
162
163 for (i = 93; i <= 100; i++)
164 if (fcntl(open_fds[i], F_GETFL) == -1)
165 exit(EXIT_FAILURE);
166
167 /* test that the kernel caps and still closes all fds */
168 ret = sys_close_range(open_fds[93], open_fds[99],
169 CLOSE_RANGE_UNSHARE);
170 if (ret)
171 exit(EXIT_FAILURE);
172
173 for (i = 93; i <= 99; i++)
174 if (fcntl(open_fds[i], F_GETFL) != -1)
175 exit(EXIT_FAILURE);
176
177 if (fcntl(open_fds[100], F_GETFL) == -1)
178 exit(EXIT_FAILURE);
179
180 ret = sys_close_range(open_fds[100], open_fds[100],
181 CLOSE_RANGE_UNSHARE);
182 if (ret)
183 exit(EXIT_FAILURE);
184
185 if (fcntl(open_fds[100], F_GETFL) != -1)
186 exit(EXIT_FAILURE);
187
188 exit(EXIT_SUCCESS);
189 }
190
191 EXPECT_EQ(waitpid(pid, &status, 0), pid);
192 EXPECT_EQ(true, WIFEXITED(status));
193 EXPECT_EQ(0, WEXITSTATUS(status));
194 }
195
TEST(close_range_unshare_capped)196 TEST(close_range_unshare_capped)
197 {
198 int i, ret, status;
199 pid_t pid;
200 int open_fds[101];
201 struct __clone_args args = {
202 .flags = CLONE_FILES,
203 .exit_signal = SIGCHLD,
204 };
205
206 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
207 int fd;
208
209 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
210 ASSERT_GE(fd, 0) {
211 if (errno == ENOENT)
212 SKIP(return, "Skipping test since /dev/null does not exist");
213 }
214
215 open_fds[i] = fd;
216 }
217
218 pid = sys_clone3(&args, sizeof(args));
219 ASSERT_GE(pid, 0);
220
221 if (pid == 0) {
222 ret = sys_close_range(open_fds[0], UINT_MAX,
223 CLOSE_RANGE_UNSHARE);
224 if (ret)
225 exit(EXIT_FAILURE);
226
227 for (i = 0; i <= 100; i++)
228 if (fcntl(open_fds[i], F_GETFL) != -1)
229 exit(EXIT_FAILURE);
230
231 exit(EXIT_SUCCESS);
232 }
233
234 EXPECT_EQ(waitpid(pid, &status, 0), pid);
235 EXPECT_EQ(true, WIFEXITED(status));
236 EXPECT_EQ(0, WEXITSTATUS(status));
237 }
238
TEST(close_range_cloexec)239 TEST(close_range_cloexec)
240 {
241 int i, ret;
242 int open_fds[101];
243 struct rlimit rlimit;
244
245 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
246 int fd;
247
248 fd = open("/dev/null", O_RDONLY);
249 ASSERT_GE(fd, 0) {
250 if (errno == ENOENT)
251 SKIP(return, "Skipping test since /dev/null does not exist");
252 }
253
254 open_fds[i] = fd;
255 }
256
257 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
258 if (ret < 0) {
259 if (errno == ENOSYS)
260 SKIP(return, "close_range() syscall not supported");
261 if (errno == EINVAL)
262 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
263 }
264
265 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
266 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
267 rlimit.rlim_cur = 25;
268 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
269
270 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
271 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
272 ASSERT_EQ(0, ret);
273 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
274 ASSERT_EQ(0, ret);
275
276 for (i = 0; i <= 50; i++) {
277 int flags = fcntl(open_fds[i], F_GETFD);
278
279 EXPECT_GT(flags, -1);
280 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
281 }
282
283 for (i = 51; i <= 74; i++) {
284 int flags = fcntl(open_fds[i], F_GETFD);
285
286 EXPECT_GT(flags, -1);
287 EXPECT_EQ(flags & FD_CLOEXEC, 0);
288 }
289
290 for (i = 75; i <= 100; i++) {
291 int flags = fcntl(open_fds[i], F_GETFD);
292
293 EXPECT_GT(flags, -1);
294 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
295 }
296
297 /* Test a common pattern. */
298 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
299 for (i = 0; i <= 100; i++) {
300 int flags = fcntl(open_fds[i], F_GETFD);
301
302 EXPECT_GT(flags, -1);
303 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
304 }
305 }
306
TEST(close_range_cloexec_unshare)307 TEST(close_range_cloexec_unshare)
308 {
309 int i, ret;
310 int open_fds[101];
311 struct rlimit rlimit;
312
313 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
314 int fd;
315
316 fd = open("/dev/null", O_RDONLY);
317 ASSERT_GE(fd, 0) {
318 if (errno == ENOENT)
319 SKIP(return, "Skipping test since /dev/null does not exist");
320 }
321
322 open_fds[i] = fd;
323 }
324
325 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
326 if (ret < 0) {
327 if (errno == ENOSYS)
328 SKIP(return, "close_range() syscall not supported");
329 if (errno == EINVAL)
330 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
331 }
332
333 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
334 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
335 rlimit.rlim_cur = 25;
336 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
337
338 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
339 ret = sys_close_range(open_fds[0], open_fds[50],
340 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
341 ASSERT_EQ(0, ret);
342 ret = sys_close_range(open_fds[75], open_fds[100],
343 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
344 ASSERT_EQ(0, ret);
345
346 for (i = 0; i <= 50; i++) {
347 int flags = fcntl(open_fds[i], F_GETFD);
348
349 EXPECT_GT(flags, -1);
350 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
351 }
352
353 for (i = 51; i <= 74; i++) {
354 int flags = fcntl(open_fds[i], F_GETFD);
355
356 EXPECT_GT(flags, -1);
357 EXPECT_EQ(flags & FD_CLOEXEC, 0);
358 }
359
360 for (i = 75; i <= 100; i++) {
361 int flags = fcntl(open_fds[i], F_GETFD);
362
363 EXPECT_GT(flags, -1);
364 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
365 }
366
367 /* Test a common pattern. */
368 ret = sys_close_range(3, UINT_MAX,
369 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
370 for (i = 0; i <= 100; i++) {
371 int flags = fcntl(open_fds[i], F_GETFD);
372
373 EXPECT_GT(flags, -1);
374 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
375 }
376 }
377
378 /*
379 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
380 */
TEST(close_range_cloexec_syzbot)381 TEST(close_range_cloexec_syzbot)
382 {
383 int fd1, fd2, fd3, fd4, flags, ret, status;
384 pid_t pid;
385 struct __clone_args args = {
386 .flags = CLONE_FILES,
387 .exit_signal = SIGCHLD,
388 };
389
390 /* Create a huge gap in the fd table. */
391 fd1 = open("/dev/null", O_RDWR);
392 EXPECT_GT(fd1, 0);
393
394 fd2 = dup2(fd1, 1000);
395 EXPECT_GT(fd2, 0);
396
397 flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
398 if (flags < 0) {
399 EXPECT_EQ(errno, EINVAL);
400 } else {
401 EXPECT_EQ(flags, 1);
402 }
403
404 pid = sys_clone3(&args, sizeof(args));
405 ASSERT_GE(pid, 0);
406
407 if (pid == 0) {
408 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
409 if (ret)
410 exit(EXIT_FAILURE);
411
412 /*
413 * We now have a private file descriptor table and all
414 * our open fds should still be open but made
415 * close-on-exec.
416 */
417 flags = fcntl(fd1, F_GETFD);
418 EXPECT_GT(flags, -1);
419 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
420
421 flags = fcntl(fd2, F_GETFD);
422 EXPECT_GT(flags, -1);
423 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
424
425 fd3 = dup2(fd1, 42);
426 EXPECT_GT(fd3, 0);
427
428 flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
429 if (flags < 0) {
430 EXPECT_EQ(errno, EINVAL);
431 } else {
432 EXPECT_EQ(flags, 1);
433 }
434
435
436
437 /*
438 * Duplicating the file descriptor must remove the
439 * FD_CLOEXEC flag.
440 */
441 flags = fcntl(fd3, F_GETFD);
442 EXPECT_GT(flags, -1);
443 EXPECT_EQ(flags & FD_CLOEXEC, 0);
444
445 exit(EXIT_SUCCESS);
446 }
447
448 EXPECT_EQ(waitpid(pid, &status, 0), pid);
449 EXPECT_EQ(true, WIFEXITED(status));
450 EXPECT_EQ(0, WEXITSTATUS(status));
451
452 /*
453 * We had a shared file descriptor table before along with requesting
454 * close-on-exec so the original fds must not be close-on-exec.
455 */
456 flags = fcntl(fd1, F_GETFD);
457 EXPECT_GT(flags, -1);
458 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
459
460 flags = fcntl(fd2, F_GETFD);
461 EXPECT_GT(flags, -1);
462 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
463
464 fd3 = dup2(fd1, 42);
465 EXPECT_GT(fd3, 0);
466
467 flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
468 if (flags < 0) {
469 EXPECT_EQ(errno, EINVAL);
470 } else {
471 EXPECT_EQ(flags, 1);
472 }
473
474 fd4 = open("/dev/null", O_RDWR);
475 EXPECT_GT(fd4, 0);
476
477 /* Same inode, different file pointers. */
478 flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
479 if (flags < 0) {
480 EXPECT_EQ(errno, EINVAL);
481 } else {
482 EXPECT_EQ(flags, 0);
483 }
484
485 flags = fcntl(fd3, F_GETFD);
486 EXPECT_GT(flags, -1);
487 EXPECT_EQ(flags & FD_CLOEXEC, 0);
488
489 EXPECT_EQ(close(fd1), 0);
490 EXPECT_EQ(close(fd2), 0);
491 EXPECT_EQ(close(fd3), 0);
492 EXPECT_EQ(close(fd4), 0);
493 }
494
495 /*
496 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
497 */
TEST(close_range_cloexec_unshare_syzbot)498 TEST(close_range_cloexec_unshare_syzbot)
499 {
500 int i, fd1, fd2, fd3, flags, ret, status;
501 pid_t pid;
502 struct __clone_args args = {
503 .flags = CLONE_FILES,
504 .exit_signal = SIGCHLD,
505 };
506
507 /*
508 * Create a huge gap in the fd table. When we now call
509 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
510 * bound the kernel will only copy up to fd1 file descriptors into the
511 * new fd table. If the kernel is buggy and doesn't handle
512 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
513 * descriptors and we will oops!
514 *
515 * On a buggy kernel this should immediately oops. But let's loop just
516 * to be sure.
517 */
518 fd1 = open("/dev/null", O_RDWR);
519 EXPECT_GT(fd1, 0);
520
521 fd2 = dup2(fd1, 1000);
522 EXPECT_GT(fd2, 0);
523
524 for (i = 0; i < 100; i++) {
525
526 pid = sys_clone3(&args, sizeof(args));
527 ASSERT_GE(pid, 0);
528
529 if (pid == 0) {
530 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
531 CLOSE_RANGE_CLOEXEC);
532 if (ret)
533 exit(EXIT_FAILURE);
534
535 /*
536 * We now have a private file descriptor table and all
537 * our open fds should still be open but made
538 * close-on-exec.
539 */
540 flags = fcntl(fd1, F_GETFD);
541 EXPECT_GT(flags, -1);
542 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
543
544 flags = fcntl(fd2, F_GETFD);
545 EXPECT_GT(flags, -1);
546 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
547
548 fd3 = dup2(fd1, 42);
549 EXPECT_GT(fd3, 0);
550
551 /*
552 * Duplicating the file descriptor must remove the
553 * FD_CLOEXEC flag.
554 */
555 flags = fcntl(fd3, F_GETFD);
556 EXPECT_GT(flags, -1);
557 EXPECT_EQ(flags & FD_CLOEXEC, 0);
558
559 EXPECT_EQ(close(fd1), 0);
560 EXPECT_EQ(close(fd2), 0);
561 EXPECT_EQ(close(fd3), 0);
562
563 exit(EXIT_SUCCESS);
564 }
565
566 EXPECT_EQ(waitpid(pid, &status, 0), pid);
567 EXPECT_EQ(true, WIFEXITED(status));
568 EXPECT_EQ(0, WEXITSTATUS(status));
569 }
570
571 /*
572 * We created a private file descriptor table before along with
573 * requesting close-on-exec so the original fds must not be
574 * close-on-exec.
575 */
576 flags = fcntl(fd1, F_GETFD);
577 EXPECT_GT(flags, -1);
578 EXPECT_EQ(flags & FD_CLOEXEC, 0);
579
580 flags = fcntl(fd2, F_GETFD);
581 EXPECT_GT(flags, -1);
582 EXPECT_EQ(flags & FD_CLOEXEC, 0);
583
584 fd3 = dup2(fd1, 42);
585 EXPECT_GT(fd3, 0);
586
587 flags = fcntl(fd3, F_GETFD);
588 EXPECT_GT(flags, -1);
589 EXPECT_EQ(flags & FD_CLOEXEC, 0);
590
591 EXPECT_EQ(close(fd1), 0);
592 EXPECT_EQ(close(fd2), 0);
593 EXPECT_EQ(close(fd3), 0);
594 }
595
TEST(close_range_bitmap_corruption)596 TEST(close_range_bitmap_corruption)
597 {
598 pid_t pid;
599 int status;
600 struct __clone_args args = {
601 .flags = CLONE_FILES,
602 .exit_signal = SIGCHLD,
603 };
604
605 /* get the first 128 descriptors open */
606 for (int i = 2; i < 128; i++)
607 EXPECT_GE(dup2(0, i), 0);
608
609 /* get descriptor table shared */
610 pid = sys_clone3(&args, sizeof(args));
611 ASSERT_GE(pid, 0);
612
613 if (pid == 0) {
614 /* unshare and truncate descriptor table down to 64 */
615 if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
616 exit(EXIT_FAILURE);
617
618 ASSERT_EQ(fcntl(64, F_GETFD), -1);
619 /* ... and verify that the range 64..127 is not
620 stuck "fully used" according to secondary bitmap */
621 EXPECT_EQ(dup(0), 64)
622 exit(EXIT_FAILURE);
623 exit(EXIT_SUCCESS);
624 }
625
626 EXPECT_EQ(waitpid(pid, &status, 0), pid);
627 EXPECT_EQ(true, WIFEXITED(status));
628 EXPECT_EQ(0, WEXITSTATUS(status));
629 }
630
TEST(fcntl_created)631 TEST(fcntl_created)
632 {
633 for (int i = 0; i < 101; i++) {
634 int fd;
635 char path[PATH_MAX];
636
637 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
638 ASSERT_GE(fd, 0) {
639 if (errno == ENOENT)
640 SKIP(return,
641 "Skipping test since /dev/null does not exist");
642 }
643
644 /* We didn't create "/dev/null". */
645 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
646 close(fd);
647
648 sprintf(path, "aaaa_%d", i);
649 fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600);
650 ASSERT_GE(fd, 0);
651
652 /* We created "aaaa_%d". */
653 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1);
654 close(fd);
655
656 fd = open(path, O_RDONLY | O_CLOEXEC);
657 ASSERT_GE(fd, 0);
658
659 /* We're opening it again, so no positive creation check. */
660 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
661 close(fd);
662 unlink(path);
663 }
664 }
665
666 TEST_HARNESS_MAIN
667