1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3
4 #include <argp.h>
5
6 #include <sys/prctl.h>
7 #include "local_storage_rcu_tasks_trace_bench.skel.h"
8 #include "bench.h"
9
10 #include <signal.h>
11
12 static struct {
13 __u32 nr_procs;
14 __u32 kthread_pid;
15 } args = {
16 .nr_procs = 1000,
17 .kthread_pid = 0,
18 };
19
20 enum {
21 ARG_NR_PROCS = 7000,
22 ARG_KTHREAD_PID = 7001,
23 };
24
25 static const struct argp_option opts[] = {
26 { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
27 "Set number of user processes to spin up"},
28 { "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
29 "Pid of rcu_tasks_trace kthread for ticks tracking"},
30 {},
31 };
32
parse_arg(int key,char * arg,struct argp_state * state)33 static error_t parse_arg(int key, char *arg, struct argp_state *state)
34 {
35 long ret;
36
37 switch (key) {
38 case ARG_NR_PROCS:
39 ret = strtol(arg, NULL, 10);
40 if (ret < 1 || ret > UINT_MAX) {
41 fprintf(stderr, "invalid nr_procs\n");
42 argp_usage(state);
43 }
44 args.nr_procs = ret;
45 break;
46 case ARG_KTHREAD_PID:
47 ret = strtol(arg, NULL, 10);
48 if (ret < 1) {
49 fprintf(stderr, "invalid kthread_pid\n");
50 argp_usage(state);
51 }
52 args.kthread_pid = ret;
53 break;
54 break;
55 default:
56 return ARGP_ERR_UNKNOWN;
57 }
58
59 return 0;
60 }
61
62 const struct argp bench_local_storage_rcu_tasks_trace_argp = {
63 .options = opts,
64 .parser = parse_arg,
65 };
66
67 #define MAX_SLEEP_PROCS 150000
68
validate(void)69 static void validate(void)
70 {
71 if (env.producer_cnt != 1) {
72 fprintf(stderr, "benchmark doesn't support multi-producer!\n");
73 exit(1);
74 }
75 if (env.consumer_cnt != 0) {
76 fprintf(stderr, "benchmark doesn't support consumer!\n");
77 exit(1);
78 }
79
80 if (args.nr_procs > MAX_SLEEP_PROCS) {
81 fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
82 MAX_SLEEP_PROCS);
83 exit(1);
84 }
85 }
86
kthread_pid_ticks(void)87 static long kthread_pid_ticks(void)
88 {
89 char procfs_path[100];
90 long stime;
91 FILE *f;
92
93 if (!args.kthread_pid)
94 return -1;
95
96 sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
97 f = fopen(procfs_path, "r");
98 if (!f) {
99 fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
100 goto err_out;
101 }
102 if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
103 fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
104 goto err_out;
105 }
106 fclose(f);
107 return stime;
108
109 err_out:
110 if (f)
111 fclose(f);
112 exit(1);
113 return 0;
114 }
115
116 static struct {
117 struct local_storage_rcu_tasks_trace_bench *skel;
118 long prev_kthread_stime;
119 } ctx;
120
sleep_and_loop(void)121 static void sleep_and_loop(void)
122 {
123 while (true) {
124 sleep(rand() % 4);
125 syscall(__NR_getpgid);
126 }
127 }
128
local_storage_tasks_trace_setup(void)129 static void local_storage_tasks_trace_setup(void)
130 {
131 int i, err, forkret, runner_pid;
132
133 runner_pid = getpid();
134
135 for (i = 0; i < args.nr_procs; i++) {
136 forkret = fork();
137 if (forkret < 0) {
138 fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
139 args.nr_procs);
140 goto err_out;
141 }
142
143 if (!forkret) {
144 err = prctl(PR_SET_PDEATHSIG, SIGKILL);
145 if (err < 0) {
146 fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
147 goto err_out;
148 }
149
150 if (getppid() != runner_pid) {
151 fprintf(stderr, "Runner died while spinning up procs, exiting\n");
152 goto err_out;
153 }
154 sleep_and_loop();
155 }
156 }
157 printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
158
159 setup_libbpf();
160
161 ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
162 if (!ctx.skel) {
163 fprintf(stderr, "Error doing open_and_load, exiting\n");
164 goto err_out;
165 }
166
167 ctx.prev_kthread_stime = kthread_pid_ticks();
168
169 if (!bpf_program__attach(ctx.skel->progs.get_local)) {
170 fprintf(stderr, "Error attaching bpf program\n");
171 goto err_out;
172 }
173
174 if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
175 fprintf(stderr, "Error attaching bpf program\n");
176 goto err_out;
177 }
178
179 if (!bpf_program__attach(ctx.skel->progs.postgp)) {
180 fprintf(stderr, "Error attaching bpf program\n");
181 goto err_out;
182 }
183
184 return;
185 err_out:
186 exit(1);
187 }
188
measure(struct bench_res * res)189 static void measure(struct bench_res *res)
190 {
191 long ticks;
192
193 res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
194 res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
195 ticks = kthread_pid_ticks();
196 res->stime = ticks - ctx.prev_kthread_stime;
197 ctx.prev_kthread_stime = ticks;
198 }
199
producer(void * input)200 static void *producer(void *input)
201 {
202 while (true)
203 syscall(__NR_getpgid);
204 return NULL;
205 }
206
report_progress(int iter,struct bench_res * res,long delta_ns)207 static void report_progress(int iter, struct bench_res *res, long delta_ns)
208 {
209 if (ctx.skel->bss->unexpected) {
210 fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
211 fprintf(stderr, "Data can't be trusted, exiting\n");
212 exit(1);
213 }
214
215 if (env.quiet)
216 return;
217
218 printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
219 iter, res->gp_ns / (double)res->gp_ct);
220 printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
221 iter, res->stime / (double)res->gp_ct);
222 }
223
report_final(struct bench_res res[],int res_cnt)224 static void report_final(struct bench_res res[], int res_cnt)
225 {
226 struct basic_stats gp_stat;
227
228 grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
229 printf("SUMMARY tasks_trace grace period latency");
230 printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
231 grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
232 printf("SUMMARY ticks per tasks_trace grace period");
233 printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
234 }
235
236 /* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
237 * of RCU Tasks-Trace.
238 *
239 * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
240 * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
241 * The number of forked tasks is configurable.
242 *
243 * exercising code paths which call call_rcu_tasks_trace while there are many
244 * thousands of tasks on the system should result in RCU Tasks-Trace having to
245 * do a noticeable amount of work.
246 *
247 * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
248 * after the grace period has ended, or by measuring grace period latency.
249 *
250 * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
251 * and rcu_tasks_trace_postgp functions to measure grace period latency and
252 * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
253 */
254 const struct bench bench_local_storage_tasks_trace = {
255 .name = "local-storage-tasks-trace",
256 .argp = &bench_local_storage_rcu_tasks_trace_argp,
257 .validate = validate,
258 .setup = local_storage_tasks_trace_setup,
259 .producer_thread = producer,
260 .measure = measure,
261 .report_progress = report_progress,
262 .report_final = report_final,
263 };
264