1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18 
19 #include "cgroup_util.h"
20 #include "../clone3/clone3_selftests.h"
21 
22 /* Returns read len on success, or -errno on failure. */
read_text(const char * path,char * buf,size_t max_len)23 static ssize_t read_text(const char *path, char *buf, size_t max_len)
24 {
25 	ssize_t len;
26 	int fd;
27 
28 	fd = open(path, O_RDONLY);
29 	if (fd < 0)
30 		return -errno;
31 
32 	len = read(fd, buf, max_len - 1);
33 
34 	if (len >= 0)
35 		buf[len] = 0;
36 
37 	close(fd);
38 	return len < 0 ? -errno : len;
39 }
40 
41 /* Returns written len on success, or -errno on failure. */
write_text(const char * path,char * buf,ssize_t len)42 static ssize_t write_text(const char *path, char *buf, ssize_t len)
43 {
44 	int fd;
45 
46 	fd = open(path, O_WRONLY | O_APPEND);
47 	if (fd < 0)
48 		return -errno;
49 
50 	len = write(fd, buf, len);
51 	close(fd);
52 	return len < 0 ? -errno : len;
53 }
54 
cg_name(const char * root,const char * name)55 char *cg_name(const char *root, const char *name)
56 {
57 	size_t len = strlen(root) + strlen(name) + 2;
58 	char *ret = malloc(len);
59 
60 	snprintf(ret, len, "%s/%s", root, name);
61 
62 	return ret;
63 }
64 
cg_name_indexed(const char * root,const char * name,int index)65 char *cg_name_indexed(const char *root, const char *name, int index)
66 {
67 	size_t len = strlen(root) + strlen(name) + 10;
68 	char *ret = malloc(len);
69 
70 	snprintf(ret, len, "%s/%s_%d", root, name, index);
71 
72 	return ret;
73 }
74 
cg_control(const char * cgroup,const char * control)75 char *cg_control(const char *cgroup, const char *control)
76 {
77 	size_t len = strlen(cgroup) + strlen(control) + 2;
78 	char *ret = malloc(len);
79 
80 	snprintf(ret, len, "%s/%s", cgroup, control);
81 
82 	return ret;
83 }
84 
85 /* Returns 0 on success, or -errno on failure. */
cg_read(const char * cgroup,const char * control,char * buf,size_t len)86 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
87 {
88 	char path[PATH_MAX];
89 	ssize_t ret;
90 
91 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
92 
93 	ret = read_text(path, buf, len);
94 	return ret >= 0 ? 0 : ret;
95 }
96 
cg_read_strcmp(const char * cgroup,const char * control,const char * expected)97 int cg_read_strcmp(const char *cgroup, const char *control,
98 		   const char *expected)
99 {
100 	size_t size;
101 	char *buf;
102 	int ret;
103 
104 	/* Handle the case of comparing against empty string */
105 	if (!expected)
106 		return -1;
107 	else
108 		size = strlen(expected) + 1;
109 
110 	buf = malloc(size);
111 	if (!buf)
112 		return -1;
113 
114 	if (cg_read(cgroup, control, buf, size)) {
115 		free(buf);
116 		return -1;
117 	}
118 
119 	ret = strcmp(expected, buf);
120 	free(buf);
121 	return ret;
122 }
123 
cg_read_strstr(const char * cgroup,const char * control,const char * needle)124 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
125 {
126 	char buf[PAGE_SIZE];
127 
128 	if (cg_read(cgroup, control, buf, sizeof(buf)))
129 		return -1;
130 
131 	return strstr(buf, needle) ? 0 : -1;
132 }
133 
cg_read_long(const char * cgroup,const char * control)134 long cg_read_long(const char *cgroup, const char *control)
135 {
136 	char buf[128];
137 
138 	if (cg_read(cgroup, control, buf, sizeof(buf)))
139 		return -1;
140 
141 	return atol(buf);
142 }
143 
cg_read_long_fd(int fd)144 long cg_read_long_fd(int fd)
145 {
146 	char buf[128];
147 
148 	if (pread(fd, buf, sizeof(buf), 0) <= 0)
149 		return -1;
150 
151 	return atol(buf);
152 }
153 
cg_read_key_long(const char * cgroup,const char * control,const char * key)154 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
155 {
156 	char buf[PAGE_SIZE];
157 	char *ptr;
158 
159 	if (cg_read(cgroup, control, buf, sizeof(buf)))
160 		return -1;
161 
162 	ptr = strstr(buf, key);
163 	if (!ptr)
164 		return -1;
165 
166 	return atol(ptr + strlen(key));
167 }
168 
cg_read_lc(const char * cgroup,const char * control)169 long cg_read_lc(const char *cgroup, const char *control)
170 {
171 	char buf[PAGE_SIZE];
172 	const char delim[] = "\n";
173 	char *line;
174 	long cnt = 0;
175 
176 	if (cg_read(cgroup, control, buf, sizeof(buf)))
177 		return -1;
178 
179 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
180 		cnt++;
181 
182 	return cnt;
183 }
184 
185 /* Returns 0 on success, or -errno on failure. */
cg_write(const char * cgroup,const char * control,char * buf)186 int cg_write(const char *cgroup, const char *control, char *buf)
187 {
188 	char path[PATH_MAX];
189 	ssize_t len = strlen(buf), ret;
190 
191 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
192 	ret = write_text(path, buf, len);
193 	return ret == len ? 0 : ret;
194 }
195 
196 /*
197  * Returns fd on success, or -1 on failure.
198  * (fd should be closed with close() as usual)
199  */
cg_open(const char * cgroup,const char * control,int flags)200 int cg_open(const char *cgroup, const char *control, int flags)
201 {
202 	char path[PATH_MAX];
203 
204 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
205 	return open(path, flags);
206 }
207 
cg_write_numeric(const char * cgroup,const char * control,long value)208 int cg_write_numeric(const char *cgroup, const char *control, long value)
209 {
210 	char buf[64];
211 	int ret;
212 
213 	ret = sprintf(buf, "%lu", value);
214 	if (ret < 0)
215 		return ret;
216 
217 	return cg_write(cgroup, control, buf);
218 }
219 
cg_find_unified_root(char * root,size_t len,bool * nsdelegate)220 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
221 {
222 	char buf[10 * PAGE_SIZE];
223 	char *fs, *mount, *type, *options;
224 	const char delim[] = "\n\t ";
225 
226 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
227 		return -1;
228 
229 	/*
230 	 * Example:
231 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
232 	 */
233 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
234 		mount = strtok(NULL, delim);
235 		type = strtok(NULL, delim);
236 		options = strtok(NULL, delim);
237 		strtok(NULL, delim);
238 		strtok(NULL, delim);
239 
240 		if (strcmp(type, "cgroup2") == 0) {
241 			strncpy(root, mount, len);
242 			if (nsdelegate)
243 				*nsdelegate = !!strstr(options, "nsdelegate");
244 			return 0;
245 		}
246 	}
247 
248 	return -1;
249 }
250 
cg_create(const char * cgroup)251 int cg_create(const char *cgroup)
252 {
253 	return mkdir(cgroup, 0755);
254 }
255 
cg_wait_for_proc_count(const char * cgroup,int count)256 int cg_wait_for_proc_count(const char *cgroup, int count)
257 {
258 	char buf[10 * PAGE_SIZE] = {0};
259 	int attempts;
260 	char *ptr;
261 
262 	for (attempts = 10; attempts >= 0; attempts--) {
263 		int nr = 0;
264 
265 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
266 			break;
267 
268 		for (ptr = buf; *ptr; ptr++)
269 			if (*ptr == '\n')
270 				nr++;
271 
272 		if (nr >= count)
273 			return 0;
274 
275 		usleep(100000);
276 	}
277 
278 	return -1;
279 }
280 
cg_killall(const char * cgroup)281 int cg_killall(const char *cgroup)
282 {
283 	char buf[PAGE_SIZE];
284 	char *ptr = buf;
285 
286 	/* If cgroup.kill exists use it. */
287 	if (!cg_write(cgroup, "cgroup.kill", "1"))
288 		return 0;
289 
290 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
291 		return -1;
292 
293 	while (ptr < buf + sizeof(buf)) {
294 		int pid = strtol(ptr, &ptr, 10);
295 
296 		if (pid == 0)
297 			break;
298 		if (*ptr)
299 			ptr++;
300 		else
301 			break;
302 		if (kill(pid, SIGKILL))
303 			return -1;
304 	}
305 
306 	return 0;
307 }
308 
cg_destroy(const char * cgroup)309 int cg_destroy(const char *cgroup)
310 {
311 	int ret;
312 
313 	if (!cgroup)
314 		return 0;
315 retry:
316 	ret = rmdir(cgroup);
317 	if (ret && errno == EBUSY) {
318 		cg_killall(cgroup);
319 		usleep(100);
320 		goto retry;
321 	}
322 
323 	if (ret && errno == ENOENT)
324 		ret = 0;
325 
326 	return ret;
327 }
328 
cg_enter(const char * cgroup,int pid)329 int cg_enter(const char *cgroup, int pid)
330 {
331 	char pidbuf[64];
332 
333 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
334 	return cg_write(cgroup, "cgroup.procs", pidbuf);
335 }
336 
cg_enter_current(const char * cgroup)337 int cg_enter_current(const char *cgroup)
338 {
339 	return cg_write(cgroup, "cgroup.procs", "0");
340 }
341 
cg_enter_current_thread(const char * cgroup)342 int cg_enter_current_thread(const char *cgroup)
343 {
344 	return cg_write(cgroup, "cgroup.threads", "0");
345 }
346 
cg_run(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)347 int cg_run(const char *cgroup,
348 	   int (*fn)(const char *cgroup, void *arg),
349 	   void *arg)
350 {
351 	int pid, retcode;
352 
353 	pid = fork();
354 	if (pid < 0) {
355 		return pid;
356 	} else if (pid == 0) {
357 		char buf[64];
358 
359 		snprintf(buf, sizeof(buf), "%d", getpid());
360 		if (cg_write(cgroup, "cgroup.procs", buf))
361 			exit(EXIT_FAILURE);
362 		exit(fn(cgroup, arg));
363 	} else {
364 		waitpid(pid, &retcode, 0);
365 		if (WIFEXITED(retcode))
366 			return WEXITSTATUS(retcode);
367 		else
368 			return -1;
369 	}
370 }
371 
clone_into_cgroup(int cgroup_fd)372 pid_t clone_into_cgroup(int cgroup_fd)
373 {
374 #ifdef CLONE_ARGS_SIZE_VER2
375 	pid_t pid;
376 
377 	struct __clone_args args = {
378 		.flags = CLONE_INTO_CGROUP,
379 		.exit_signal = SIGCHLD,
380 		.cgroup = cgroup_fd,
381 	};
382 
383 	pid = sys_clone3(&args, sizeof(struct __clone_args));
384 	/*
385 	 * Verify that this is a genuine test failure:
386 	 * ENOSYS -> clone3() not available
387 	 * E2BIG  -> CLONE_INTO_CGROUP not available
388 	 */
389 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
390 		goto pretend_enosys;
391 
392 	return pid;
393 
394 pretend_enosys:
395 #endif
396 	errno = ENOSYS;
397 	return -ENOSYS;
398 }
399 
clone_reap(pid_t pid,int options)400 int clone_reap(pid_t pid, int options)
401 {
402 	int ret;
403 	siginfo_t info = {
404 		.si_signo = 0,
405 	};
406 
407 again:
408 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
409 	if (ret < 0) {
410 		if (errno == EINTR)
411 			goto again;
412 		return -1;
413 	}
414 
415 	if (options & WEXITED) {
416 		if (WIFEXITED(info.si_status))
417 			return WEXITSTATUS(info.si_status);
418 	}
419 
420 	if (options & WSTOPPED) {
421 		if (WIFSTOPPED(info.si_status))
422 			return WSTOPSIG(info.si_status);
423 	}
424 
425 	if (options & WCONTINUED) {
426 		if (WIFCONTINUED(info.si_status))
427 			return 0;
428 	}
429 
430 	return -1;
431 }
432 
dirfd_open_opath(const char * dir)433 int dirfd_open_opath(const char *dir)
434 {
435 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
436 }
437 
438 #define close_prot_errno(fd)                                                   \
439 	if (fd >= 0) {                                                         \
440 		int _e_ = errno;                                               \
441 		close(fd);                                                     \
442 		errno = _e_;                                                   \
443 	}
444 
clone_into_cgroup_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)445 static int clone_into_cgroup_run_nowait(const char *cgroup,
446 					int (*fn)(const char *cgroup, void *arg),
447 					void *arg)
448 {
449 	int cgroup_fd;
450 	pid_t pid;
451 
452 	cgroup_fd =  dirfd_open_opath(cgroup);
453 	if (cgroup_fd < 0)
454 		return -1;
455 
456 	pid = clone_into_cgroup(cgroup_fd);
457 	close_prot_errno(cgroup_fd);
458 	if (pid == 0)
459 		exit(fn(cgroup, arg));
460 
461 	return pid;
462 }
463 
cg_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)464 int cg_run_nowait(const char *cgroup,
465 		  int (*fn)(const char *cgroup, void *arg),
466 		  void *arg)
467 {
468 	int pid;
469 
470 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
471 	if (pid > 0)
472 		return pid;
473 
474 	/* Genuine test failure. */
475 	if (pid < 0 && errno != ENOSYS)
476 		return -1;
477 
478 	pid = fork();
479 	if (pid == 0) {
480 		char buf[64];
481 
482 		snprintf(buf, sizeof(buf), "%d", getpid());
483 		if (cg_write(cgroup, "cgroup.procs", buf))
484 			exit(EXIT_FAILURE);
485 		exit(fn(cgroup, arg));
486 	}
487 
488 	return pid;
489 }
490 
get_temp_fd(void)491 int get_temp_fd(void)
492 {
493 	return open(".", O_TMPFILE | O_RDWR | O_EXCL);
494 }
495 
alloc_pagecache(int fd,size_t size)496 int alloc_pagecache(int fd, size_t size)
497 {
498 	char buf[PAGE_SIZE];
499 	struct stat st;
500 	int i;
501 
502 	if (fstat(fd, &st))
503 		goto cleanup;
504 
505 	size += st.st_size;
506 
507 	if (ftruncate(fd, size))
508 		goto cleanup;
509 
510 	for (i = 0; i < size; i += sizeof(buf))
511 		read(fd, buf, sizeof(buf));
512 
513 	return 0;
514 
515 cleanup:
516 	return -1;
517 }
518 
alloc_anon(const char * cgroup,void * arg)519 int alloc_anon(const char *cgroup, void *arg)
520 {
521 	size_t size = (unsigned long)arg;
522 	char *buf, *ptr;
523 
524 	buf = malloc(size);
525 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
526 		*ptr = 0;
527 
528 	free(buf);
529 	return 0;
530 }
531 
is_swap_enabled(void)532 int is_swap_enabled(void)
533 {
534 	char buf[PAGE_SIZE];
535 	const char delim[] = "\n";
536 	int cnt = 0;
537 	char *line;
538 
539 	if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
540 		return -1;
541 
542 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
543 		cnt++;
544 
545 	return cnt > 1;
546 }
547 
set_oom_adj_score(int pid,int score)548 int set_oom_adj_score(int pid, int score)
549 {
550 	char path[PATH_MAX];
551 	int fd, len;
552 
553 	sprintf(path, "/proc/%d/oom_score_adj", pid);
554 
555 	fd = open(path, O_WRONLY | O_APPEND);
556 	if (fd < 0)
557 		return fd;
558 
559 	len = dprintf(fd, "%d", score);
560 	if (len < 0) {
561 		close(fd);
562 		return len;
563 	}
564 
565 	close(fd);
566 	return 0;
567 }
568 
proc_mount_contains(const char * option)569 int proc_mount_contains(const char *option)
570 {
571 	char buf[4 * PAGE_SIZE];
572 	ssize_t read;
573 
574 	read = read_text("/proc/mounts", buf, sizeof(buf));
575 	if (read < 0)
576 		return read;
577 
578 	return strstr(buf, option) != NULL;
579 }
580 
proc_read_text(int pid,bool thread,const char * item,char * buf,size_t size)581 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
582 {
583 	char path[PATH_MAX];
584 	ssize_t ret;
585 
586 	if (!pid)
587 		snprintf(path, sizeof(path), "/proc/%s/%s",
588 			 thread ? "thread-self" : "self", item);
589 	else
590 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
591 
592 	ret = read_text(path, buf, size);
593 	return ret < 0 ? -1 : ret;
594 }
595 
proc_read_strstr(int pid,bool thread,const char * item,const char * needle)596 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
597 {
598 	char buf[PAGE_SIZE];
599 
600 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
601 		return -1;
602 
603 	return strstr(buf, needle) ? 0 : -1;
604 }
605 
clone_into_cgroup_run_wait(const char * cgroup)606 int clone_into_cgroup_run_wait(const char *cgroup)
607 {
608 	int cgroup_fd;
609 	pid_t pid;
610 
611 	cgroup_fd =  dirfd_open_opath(cgroup);
612 	if (cgroup_fd < 0)
613 		return -1;
614 
615 	pid = clone_into_cgroup(cgroup_fd);
616 	close_prot_errno(cgroup_fd);
617 	if (pid < 0)
618 		return -1;
619 
620 	if (pid == 0)
621 		exit(EXIT_SUCCESS);
622 
623 	/*
624 	 * We don't care whether this fails. We only care whether the initial
625 	 * clone succeeded.
626 	 */
627 	(void)clone_reap(pid, WEXITED);
628 	return 0;
629 }
630 
__prepare_for_wait(const char * cgroup,const char * filename)631 static int __prepare_for_wait(const char *cgroup, const char *filename)
632 {
633 	int fd, ret = -1;
634 
635 	fd = inotify_init1(0);
636 	if (fd == -1)
637 		return fd;
638 
639 	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
640 	if (ret == -1) {
641 		close(fd);
642 		fd = -1;
643 	}
644 
645 	return fd;
646 }
647 
cg_prepare_for_wait(const char * cgroup)648 int cg_prepare_for_wait(const char *cgroup)
649 {
650 	return __prepare_for_wait(cgroup, "cgroup.events");
651 }
652 
memcg_prepare_for_wait(const char * cgroup)653 int memcg_prepare_for_wait(const char *cgroup)
654 {
655 	return __prepare_for_wait(cgroup, "memory.events");
656 }
657 
cg_wait_for(int fd)658 int cg_wait_for(int fd)
659 {
660 	int ret = -1;
661 	struct pollfd fds = {
662 		.fd = fd,
663 		.events = POLLIN,
664 	};
665 
666 	while (true) {
667 		ret = poll(&fds, 1, 10000);
668 
669 		if (ret == -1) {
670 			if (errno == EINTR)
671 				continue;
672 
673 			break;
674 		}
675 
676 		if (ret > 0 && fds.revents & POLLIN) {
677 			ret = 0;
678 			break;
679 		}
680 	}
681 
682 	return ret;
683 }
684