1 // SPDX-License-Identifier: GPL-2.0
2
3 #define _GNU_SOURCE
4 #include <linux/limits.h>
5 #include <sys/sysinfo.h>
6 #include <sys/wait.h>
7 #include <errno.h>
8 #include <pthread.h>
9 #include <stdio.h>
10 #include <time.h>
11
12 #include "../kselftest.h"
13 #include "cgroup_util.h"
14
15 enum hog_clock_type {
16 // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
17 CPU_HOG_CLOCK_PROCESS,
18 // Count elapsed time using system wallclock time.
19 CPU_HOG_CLOCK_WALL,
20 };
21
22 struct cpu_hogger {
23 char *cgroup;
24 pid_t pid;
25 long usage;
26 };
27
28 struct cpu_hog_func_param {
29 int nprocs;
30 struct timespec ts;
31 enum hog_clock_type clock_type;
32 };
33
34 /*
35 * This test creates two nested cgroups with and without enabling
36 * the cpu controller.
37 */
test_cpucg_subtree_control(const char * root)38 static int test_cpucg_subtree_control(const char *root)
39 {
40 char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
41 int ret = KSFT_FAIL;
42
43 // Create two nested cgroups with the cpu controller enabled.
44 parent = cg_name(root, "cpucg_test_0");
45 if (!parent)
46 goto cleanup;
47
48 if (cg_create(parent))
49 goto cleanup;
50
51 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
52 goto cleanup;
53
54 child = cg_name(parent, "cpucg_test_child");
55 if (!child)
56 goto cleanup;
57
58 if (cg_create(child))
59 goto cleanup;
60
61 if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
62 goto cleanup;
63
64 // Create two nested cgroups without enabling the cpu controller.
65 parent2 = cg_name(root, "cpucg_test_1");
66 if (!parent2)
67 goto cleanup;
68
69 if (cg_create(parent2))
70 goto cleanup;
71
72 child2 = cg_name(parent2, "cpucg_test_child");
73 if (!child2)
74 goto cleanup;
75
76 if (cg_create(child2))
77 goto cleanup;
78
79 if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
80 goto cleanup;
81
82 ret = KSFT_PASS;
83
84 cleanup:
85 cg_destroy(child);
86 free(child);
87 cg_destroy(child2);
88 free(child2);
89 cg_destroy(parent);
90 free(parent);
91 cg_destroy(parent2);
92 free(parent2);
93
94 return ret;
95 }
96
hog_cpu_thread_func(void * arg)97 static void *hog_cpu_thread_func(void *arg)
98 {
99 while (1)
100 ;
101
102 return NULL;
103 }
104
105 static struct timespec
timespec_sub(const struct timespec * lhs,const struct timespec * rhs)106 timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
107 {
108 struct timespec zero = {
109 .tv_sec = 0,
110 .tv_nsec = 0,
111 };
112 struct timespec ret;
113
114 if (lhs->tv_sec < rhs->tv_sec)
115 return zero;
116
117 ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
118
119 if (lhs->tv_nsec < rhs->tv_nsec) {
120 if (ret.tv_sec == 0)
121 return zero;
122
123 ret.tv_sec--;
124 ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
125 } else
126 ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
127
128 return ret;
129 }
130
hog_cpus_timed(const char * cgroup,void * arg)131 static int hog_cpus_timed(const char *cgroup, void *arg)
132 {
133 const struct cpu_hog_func_param *param =
134 (struct cpu_hog_func_param *)arg;
135 struct timespec ts_run = param->ts;
136 struct timespec ts_remaining = ts_run;
137 struct timespec ts_start;
138 int i, ret;
139
140 ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
141 if (ret != 0)
142 return ret;
143
144 for (i = 0; i < param->nprocs; i++) {
145 pthread_t tid;
146
147 ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
148 if (ret != 0)
149 return ret;
150 }
151
152 while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
153 struct timespec ts_total;
154
155 ret = nanosleep(&ts_remaining, NULL);
156 if (ret && errno != EINTR)
157 return ret;
158
159 if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
160 ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
161 if (ret != 0)
162 return ret;
163 } else {
164 struct timespec ts_current;
165
166 ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
167 if (ret != 0)
168 return ret;
169
170 ts_total = timespec_sub(&ts_current, &ts_start);
171 }
172
173 ts_remaining = timespec_sub(&ts_run, &ts_total);
174 }
175
176 return 0;
177 }
178
179 /*
180 * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
181 * cpu.stat shows the expected output.
182 */
test_cpucg_stats(const char * root)183 static int test_cpucg_stats(const char *root)
184 {
185 int ret = KSFT_FAIL;
186 long usage_usec, user_usec, system_usec;
187 long usage_seconds = 2;
188 long expected_usage_usec = usage_seconds * USEC_PER_SEC;
189 char *cpucg;
190
191 cpucg = cg_name(root, "cpucg_test");
192 if (!cpucg)
193 goto cleanup;
194
195 if (cg_create(cpucg))
196 goto cleanup;
197
198 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
199 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
200 system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
201 if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
202 goto cleanup;
203
204 struct cpu_hog_func_param param = {
205 .nprocs = 1,
206 .ts = {
207 .tv_sec = usage_seconds,
208 .tv_nsec = 0,
209 },
210 .clock_type = CPU_HOG_CLOCK_PROCESS,
211 };
212 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m))
213 goto cleanup;
214
215 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
216 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
217 if (user_usec <= 0)
218 goto cleanup;
219
220 if (!values_close(usage_usec, expected_usage_usec, 1))
221 goto cleanup;
222
223 ret = KSFT_PASS;
224
225 cleanup:
226 cg_destroy(cpucg);
227 free(cpucg);
228
229 return ret;
230 }
231
232 static int
run_cpucg_weight_test(const char * root,pid_t (* spawn_child)(const struct cpu_hogger * child),int (* validate)(const struct cpu_hogger * children,int num_children))233 run_cpucg_weight_test(
234 const char *root,
235 pid_t (*spawn_child)(const struct cpu_hogger *child),
236 int (*validate)(const struct cpu_hogger *children, int num_children))
237 {
238 int ret = KSFT_FAIL, i;
239 char *parent = NULL;
240 struct cpu_hogger children[3] = {};
241
242 parent = cg_name(root, "cpucg_test_0");
243 if (!parent)
244 goto cleanup;
245
246 if (cg_create(parent))
247 goto cleanup;
248
249 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
250 goto cleanup;
251
252 for (i = 0; i < ARRAY_SIZE(children); i++) {
253 children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
254 if (!children[i].cgroup)
255 goto cleanup;
256
257 if (cg_create(children[i].cgroup))
258 goto cleanup;
259
260 if (cg_write_numeric(children[i].cgroup, "cpu.weight",
261 50 * (i + 1)))
262 goto cleanup;
263 }
264
265 for (i = 0; i < ARRAY_SIZE(children); i++) {
266 pid_t pid = spawn_child(&children[i]);
267 if (pid <= 0)
268 goto cleanup;
269 children[i].pid = pid;
270 }
271
272 for (i = 0; i < ARRAY_SIZE(children); i++) {
273 int retcode;
274
275 waitpid(children[i].pid, &retcode, 0);
276 if (!WIFEXITED(retcode))
277 goto cleanup;
278 if (WEXITSTATUS(retcode))
279 goto cleanup;
280 }
281
282 for (i = 0; i < ARRAY_SIZE(children); i++)
283 children[i].usage = cg_read_key_long(children[i].cgroup,
284 "cpu.stat", "usage_usec");
285
286 if (validate(children, ARRAY_SIZE(children)))
287 goto cleanup;
288
289 ret = KSFT_PASS;
290 cleanup:
291 for (i = 0; i < ARRAY_SIZE(children); i++) {
292 cg_destroy(children[i].cgroup);
293 free(children[i].cgroup);
294 }
295 cg_destroy(parent);
296 free(parent);
297
298 return ret;
299 }
300
weight_hog_ncpus(const struct cpu_hogger * child,int ncpus)301 static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
302 {
303 long usage_seconds = 10;
304 struct cpu_hog_func_param param = {
305 .nprocs = ncpus,
306 .ts = {
307 .tv_sec = usage_seconds,
308 .tv_nsec = 0,
309 },
310 .clock_type = CPU_HOG_CLOCK_WALL,
311 };
312 return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)¶m);
313 }
314
weight_hog_all_cpus(const struct cpu_hogger * child)315 static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
316 {
317 return weight_hog_ncpus(child, get_nprocs());
318 }
319
320 static int
overprovision_validate(const struct cpu_hogger * children,int num_children)321 overprovision_validate(const struct cpu_hogger *children, int num_children)
322 {
323 int ret = KSFT_FAIL, i;
324
325 for (i = 0; i < num_children - 1; i++) {
326 long delta;
327
328 if (children[i + 1].usage <= children[i].usage)
329 goto cleanup;
330
331 delta = children[i + 1].usage - children[i].usage;
332 if (!values_close(delta, children[0].usage, 35))
333 goto cleanup;
334 }
335
336 ret = KSFT_PASS;
337 cleanup:
338 return ret;
339 }
340
341 /*
342 * First, this test creates the following hierarchy:
343 * A
344 * A/B cpu.weight = 50
345 * A/C cpu.weight = 100
346 * A/D cpu.weight = 150
347 *
348 * A separate process is then created for each child cgroup which spawns as
349 * many threads as there are cores, and hogs each CPU as much as possible
350 * for some time interval.
351 *
352 * Once all of the children have exited, we verify that each child cgroup
353 * was given proportional runtime as informed by their cpu.weight.
354 */
test_cpucg_weight_overprovisioned(const char * root)355 static int test_cpucg_weight_overprovisioned(const char *root)
356 {
357 return run_cpucg_weight_test(root, weight_hog_all_cpus,
358 overprovision_validate);
359 }
360
weight_hog_one_cpu(const struct cpu_hogger * child)361 static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
362 {
363 return weight_hog_ncpus(child, 1);
364 }
365
366 static int
underprovision_validate(const struct cpu_hogger * children,int num_children)367 underprovision_validate(const struct cpu_hogger *children, int num_children)
368 {
369 int ret = KSFT_FAIL, i;
370
371 for (i = 0; i < num_children - 1; i++) {
372 if (!values_close(children[i + 1].usage, children[0].usage, 15))
373 goto cleanup;
374 }
375
376 ret = KSFT_PASS;
377 cleanup:
378 return ret;
379 }
380
381 /*
382 * First, this test creates the following hierarchy:
383 * A
384 * A/B cpu.weight = 50
385 * A/C cpu.weight = 100
386 * A/D cpu.weight = 150
387 *
388 * A separate process is then created for each child cgroup which spawns a
389 * single thread that hogs a CPU. The testcase is only run on systems that
390 * have at least one core per-thread in the child processes.
391 *
392 * Once all of the children have exited, we verify that each child cgroup
393 * had roughly the same runtime despite having different cpu.weight.
394 */
test_cpucg_weight_underprovisioned(const char * root)395 static int test_cpucg_weight_underprovisioned(const char *root)
396 {
397 // Only run the test if there are enough cores to avoid overprovisioning
398 // the system.
399 if (get_nprocs() < 4)
400 return KSFT_SKIP;
401
402 return run_cpucg_weight_test(root, weight_hog_one_cpu,
403 underprovision_validate);
404 }
405
406 static int
run_cpucg_nested_weight_test(const char * root,bool overprovisioned)407 run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
408 {
409 int ret = KSFT_FAIL, i;
410 char *parent = NULL, *child = NULL;
411 struct cpu_hogger leaf[3] = {};
412 long nested_leaf_usage, child_usage;
413 int nprocs = get_nprocs();
414
415 if (!overprovisioned) {
416 if (nprocs < 4)
417 /*
418 * Only run the test if there are enough cores to avoid overprovisioning
419 * the system.
420 */
421 return KSFT_SKIP;
422 nprocs /= 4;
423 }
424
425 parent = cg_name(root, "cpucg_test");
426 child = cg_name(parent, "cpucg_child");
427 if (!parent || !child)
428 goto cleanup;
429
430 if (cg_create(parent))
431 goto cleanup;
432 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
433 goto cleanup;
434
435 if (cg_create(child))
436 goto cleanup;
437 if (cg_write(child, "cgroup.subtree_control", "+cpu"))
438 goto cleanup;
439 if (cg_write(child, "cpu.weight", "1000"))
440 goto cleanup;
441
442 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
443 const char *ancestor;
444 long weight;
445
446 if (i == 0) {
447 ancestor = parent;
448 weight = 1000;
449 } else {
450 ancestor = child;
451 weight = 5000;
452 }
453 leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
454 if (!leaf[i].cgroup)
455 goto cleanup;
456
457 if (cg_create(leaf[i].cgroup))
458 goto cleanup;
459
460 if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
461 goto cleanup;
462 }
463
464 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
465 pid_t pid;
466 struct cpu_hog_func_param param = {
467 .nprocs = nprocs,
468 .ts = {
469 .tv_sec = 10,
470 .tv_nsec = 0,
471 },
472 .clock_type = CPU_HOG_CLOCK_WALL,
473 };
474
475 pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
476 (void *)¶m);
477 if (pid <= 0)
478 goto cleanup;
479 leaf[i].pid = pid;
480 }
481
482 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
483 int retcode;
484
485 waitpid(leaf[i].pid, &retcode, 0);
486 if (!WIFEXITED(retcode))
487 goto cleanup;
488 if (WEXITSTATUS(retcode))
489 goto cleanup;
490 }
491
492 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
493 leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
494 "cpu.stat", "usage_usec");
495 if (leaf[i].usage <= 0)
496 goto cleanup;
497 }
498
499 nested_leaf_usage = leaf[1].usage + leaf[2].usage;
500 if (overprovisioned) {
501 if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
502 goto cleanup;
503 } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
504 goto cleanup;
505
506
507 child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
508 if (child_usage <= 0)
509 goto cleanup;
510 if (!values_close(child_usage, nested_leaf_usage, 1))
511 goto cleanup;
512
513 ret = KSFT_PASS;
514 cleanup:
515 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
516 cg_destroy(leaf[i].cgroup);
517 free(leaf[i].cgroup);
518 }
519 cg_destroy(child);
520 free(child);
521 cg_destroy(parent);
522 free(parent);
523
524 return ret;
525 }
526
527 /*
528 * First, this test creates the following hierarchy:
529 * A
530 * A/B cpu.weight = 1000
531 * A/C cpu.weight = 1000
532 * A/C/D cpu.weight = 5000
533 * A/C/E cpu.weight = 5000
534 *
535 * A separate process is then created for each leaf, which spawn nproc threads
536 * that burn a CPU for a few seconds.
537 *
538 * Once all of those processes have exited, we verify that each of the leaf
539 * cgroups have roughly the same usage from cpu.stat.
540 */
541 static int
test_cpucg_nested_weight_overprovisioned(const char * root)542 test_cpucg_nested_weight_overprovisioned(const char *root)
543 {
544 return run_cpucg_nested_weight_test(root, true);
545 }
546
547 /*
548 * First, this test creates the following hierarchy:
549 * A
550 * A/B cpu.weight = 1000
551 * A/C cpu.weight = 1000
552 * A/C/D cpu.weight = 5000
553 * A/C/E cpu.weight = 5000
554 *
555 * A separate process is then created for each leaf, which nproc / 4 threads
556 * that burns a CPU for a few seconds.
557 *
558 * Once all of those processes have exited, we verify that each of the leaf
559 * cgroups have roughly the same usage from cpu.stat.
560 */
561 static int
test_cpucg_nested_weight_underprovisioned(const char * root)562 test_cpucg_nested_weight_underprovisioned(const char *root)
563 {
564 return run_cpucg_nested_weight_test(root, false);
565 }
566
567 /*
568 * This test creates a cgroup with some maximum value within a period, and
569 * verifies that a process in the cgroup is not overscheduled.
570 */
test_cpucg_max(const char * root)571 static int test_cpucg_max(const char *root)
572 {
573 int ret = KSFT_FAIL;
574 long usage_usec, user_usec;
575 long usage_seconds = 1;
576 long expected_usage_usec = usage_seconds * USEC_PER_SEC;
577 char *cpucg;
578
579 cpucg = cg_name(root, "cpucg_test");
580 if (!cpucg)
581 goto cleanup;
582
583 if (cg_create(cpucg))
584 goto cleanup;
585
586 if (cg_write(cpucg, "cpu.max", "1000"))
587 goto cleanup;
588
589 struct cpu_hog_func_param param = {
590 .nprocs = 1,
591 .ts = {
592 .tv_sec = usage_seconds,
593 .tv_nsec = 0,
594 },
595 .clock_type = CPU_HOG_CLOCK_WALL,
596 };
597 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m))
598 goto cleanup;
599
600 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
601 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
602 if (user_usec <= 0)
603 goto cleanup;
604
605 if (user_usec >= expected_usage_usec)
606 goto cleanup;
607
608 if (values_close(usage_usec, expected_usage_usec, 95))
609 goto cleanup;
610
611 ret = KSFT_PASS;
612
613 cleanup:
614 cg_destroy(cpucg);
615 free(cpucg);
616
617 return ret;
618 }
619
620 /*
621 * This test verifies that a process inside of a nested cgroup whose parent
622 * group has a cpu.max value set, is properly throttled.
623 */
test_cpucg_max_nested(const char * root)624 static int test_cpucg_max_nested(const char *root)
625 {
626 int ret = KSFT_FAIL;
627 long usage_usec, user_usec;
628 long usage_seconds = 1;
629 long expected_usage_usec = usage_seconds * USEC_PER_SEC;
630 char *parent, *child;
631
632 parent = cg_name(root, "cpucg_parent");
633 child = cg_name(parent, "cpucg_child");
634 if (!parent || !child)
635 goto cleanup;
636
637 if (cg_create(parent))
638 goto cleanup;
639
640 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
641 goto cleanup;
642
643 if (cg_create(child))
644 goto cleanup;
645
646 if (cg_write(parent, "cpu.max", "1000"))
647 goto cleanup;
648
649 struct cpu_hog_func_param param = {
650 .nprocs = 1,
651 .ts = {
652 .tv_sec = usage_seconds,
653 .tv_nsec = 0,
654 },
655 .clock_type = CPU_HOG_CLOCK_WALL,
656 };
657 if (cg_run(child, hog_cpus_timed, (void *)¶m))
658 goto cleanup;
659
660 usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
661 user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
662 if (user_usec <= 0)
663 goto cleanup;
664
665 if (user_usec >= expected_usage_usec)
666 goto cleanup;
667
668 if (values_close(usage_usec, expected_usage_usec, 95))
669 goto cleanup;
670
671 ret = KSFT_PASS;
672
673 cleanup:
674 cg_destroy(child);
675 free(child);
676 cg_destroy(parent);
677 free(parent);
678
679 return ret;
680 }
681
682 #define T(x) { x, #x }
683 struct cpucg_test {
684 int (*fn)(const char *root);
685 const char *name;
686 } tests[] = {
687 T(test_cpucg_subtree_control),
688 T(test_cpucg_stats),
689 T(test_cpucg_weight_overprovisioned),
690 T(test_cpucg_weight_underprovisioned),
691 T(test_cpucg_nested_weight_overprovisioned),
692 T(test_cpucg_nested_weight_underprovisioned),
693 T(test_cpucg_max),
694 T(test_cpucg_max_nested),
695 };
696 #undef T
697
main(int argc,char * argv[])698 int main(int argc, char *argv[])
699 {
700 char root[PATH_MAX];
701 int i, ret = EXIT_SUCCESS;
702
703 if (cg_find_unified_root(root, sizeof(root), NULL))
704 ksft_exit_skip("cgroup v2 isn't mounted\n");
705
706 if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
707 if (cg_write(root, "cgroup.subtree_control", "+cpu"))
708 ksft_exit_skip("Failed to set cpu controller\n");
709
710 for (i = 0; i < ARRAY_SIZE(tests); i++) {
711 switch (tests[i].fn(root)) {
712 case KSFT_PASS:
713 ksft_test_result_pass("%s\n", tests[i].name);
714 break;
715 case KSFT_SKIP:
716 ksft_test_result_skip("%s\n", tests[i].name);
717 break;
718 default:
719 ret = EXIT_FAILURE;
720 ksft_test_result_fail("%s\n", tests[i].name);
721 break;
722 }
723 }
724
725 return ret;
726 }
727