1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 // Copyright (c) 2022, Huawei
3 
4 #include "vmlinux.h"
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_tracing.h>
7 #include <bpf/bpf_core_read.h>
8 
9 /*
10  * This should be in sync with "util/kwork.h"
11  */
12 enum kwork_class_type {
13 	KWORK_CLASS_IRQ,
14 	KWORK_CLASS_SOFTIRQ,
15 	KWORK_CLASS_WORKQUEUE,
16 	KWORK_CLASS_SCHED,
17 	KWORK_CLASS_MAX,
18 };
19 
20 #define MAX_ENTRIES     102400
21 #define MAX_NR_CPUS     2048
22 #define PF_KTHREAD      0x00200000
23 #define MAX_COMMAND_LEN 16
24 
25 struct time_data {
26 	__u64 timestamp;
27 };
28 
29 struct work_data {
30 	__u64 runtime;
31 };
32 
33 struct task_data {
34 	__u32 tgid;
35 	__u32 is_kthread;
36 	char comm[MAX_COMMAND_LEN];
37 };
38 
39 struct work_key {
40 	__u32 type;
41 	__u32 pid;
42 	__u64 task_p;
43 };
44 
45 struct task_key {
46 	__u32 pid;
47 	__u32 cpu;
48 };
49 
50 struct {
51 	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
52 	__uint(map_flags, BPF_F_NO_PREALLOC);
53 	__type(key, int);
54 	__type(value, struct time_data);
55 } kwork_top_task_time SEC(".maps");
56 
57 struct {
58 	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
59 	__uint(key_size, sizeof(struct work_key));
60 	__uint(value_size, sizeof(struct time_data));
61 	__uint(max_entries, MAX_ENTRIES);
62 } kwork_top_irq_time SEC(".maps");
63 
64 struct {
65 	__uint(type, BPF_MAP_TYPE_HASH);
66 	__uint(key_size, sizeof(struct task_key));
67 	__uint(value_size, sizeof(struct task_data));
68 	__uint(max_entries, MAX_ENTRIES);
69 } kwork_top_tasks SEC(".maps");
70 
71 struct {
72 	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
73 	__uint(key_size, sizeof(struct work_key));
74 	__uint(value_size, sizeof(struct work_data));
75 	__uint(max_entries, MAX_ENTRIES);
76 } kwork_top_works SEC(".maps");
77 
78 struct {
79 	__uint(type, BPF_MAP_TYPE_HASH);
80 	__uint(key_size, sizeof(u32));
81 	__uint(value_size, sizeof(u8));
82 	__uint(max_entries, MAX_NR_CPUS);
83 } kwork_top_cpu_filter SEC(".maps");
84 
85 int enabled = 0;
86 
87 const volatile int has_cpu_filter = 0;
88 
89 __u64 from_timestamp = 0;
90 __u64 to_timestamp = 0;
91 
cpu_is_filtered(__u32 cpu)92 static __always_inline int cpu_is_filtered(__u32 cpu)
93 {
94 	__u8 *cpu_val;
95 
96 	if (has_cpu_filter) {
97 		cpu_val = bpf_map_lookup_elem(&kwork_top_cpu_filter, &cpu);
98 		if (!cpu_val)
99 			return 1;
100 	}
101 
102 	return 0;
103 }
104 
update_task_info(struct task_struct * task,__u32 cpu)105 static __always_inline void update_task_info(struct task_struct *task, __u32 cpu)
106 {
107 	struct task_key key = {
108 		.pid = task->pid,
109 		.cpu = cpu,
110 	};
111 
112 	if (!bpf_map_lookup_elem(&kwork_top_tasks, &key)) {
113 		struct task_data data = {
114 			.tgid = task->tgid,
115 			.is_kthread = task->flags & PF_KTHREAD ? 1 : 0,
116 		};
117 		BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
118 
119 		bpf_map_update_elem(&kwork_top_tasks, &key, &data, BPF_ANY);
120 	}
121 }
122 
update_work(struct work_key * key,__u64 delta)123 static __always_inline void update_work(struct work_key *key, __u64 delta)
124 {
125 	struct work_data *data;
126 
127 	data = bpf_map_lookup_elem(&kwork_top_works, key);
128 	if (data) {
129 		data->runtime += delta;
130 	} else {
131 		struct work_data new_data = {
132 			.runtime = delta,
133 		};
134 
135 		bpf_map_update_elem(&kwork_top_works, key, &new_data, BPF_ANY);
136 	}
137 }
138 
on_sched_out(struct task_struct * task,__u64 ts,__u32 cpu)139 static void on_sched_out(struct task_struct *task, __u64 ts, __u32 cpu)
140 {
141 	__u64 delta;
142 	struct time_data *pelem;
143 
144 	pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL, 0);
145 	if (pelem)
146 		delta = ts - pelem->timestamp;
147 	else
148 		delta = ts - from_timestamp;
149 
150 	struct work_key key = {
151 		.type = KWORK_CLASS_SCHED,
152 		.pid = task->pid,
153 		.task_p = (__u64)task,
154 	};
155 
156 	update_work(&key, delta);
157 	update_task_info(task, cpu);
158 }
159 
on_sched_in(struct task_struct * task,__u64 ts)160 static void on_sched_in(struct task_struct *task, __u64 ts)
161 {
162 	struct time_data *pelem;
163 
164 	pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL,
165 				     BPF_LOCAL_STORAGE_GET_F_CREATE);
166 	if (pelem)
167 		pelem->timestamp = ts;
168 }
169 
170 SEC("tp_btf/sched_switch")
on_switch(u64 * ctx)171 int on_switch(u64 *ctx)
172 {
173 	struct task_struct *prev, *next;
174 
175 	prev = (struct task_struct *)ctx[1];
176 	next = (struct task_struct *)ctx[2];
177 
178 	if (!enabled)
179 		return 0;
180 
181 	__u32 cpu = bpf_get_smp_processor_id();
182 
183 	if (cpu_is_filtered(cpu))
184 		return 0;
185 
186 	__u64 ts = bpf_ktime_get_ns();
187 
188 	on_sched_out(prev, ts, cpu);
189 	on_sched_in(next, ts);
190 
191 	return 0;
192 }
193 
194 SEC("tp_btf/irq_handler_entry")
on_irq_handler_entry(u64 * cxt)195 int on_irq_handler_entry(u64 *cxt)
196 {
197 	struct task_struct *task;
198 
199 	if (!enabled)
200 		return 0;
201 
202 	__u32 cpu = bpf_get_smp_processor_id();
203 
204 	if (cpu_is_filtered(cpu))
205 		return 0;
206 
207 	__u64 ts = bpf_ktime_get_ns();
208 
209 	task = (struct task_struct *)bpf_get_current_task();
210 	if (!task)
211 		return 0;
212 
213 	struct work_key key = {
214 		.type = KWORK_CLASS_IRQ,
215 		.pid = BPF_CORE_READ(task, pid),
216 		.task_p = (__u64)task,
217 	};
218 
219 	struct time_data data = {
220 		.timestamp = ts,
221 	};
222 
223 	bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY);
224 
225 	return 0;
226 }
227 
228 SEC("tp_btf/irq_handler_exit")
on_irq_handler_exit(u64 * cxt)229 int on_irq_handler_exit(u64 *cxt)
230 {
231 	__u64 delta;
232 	struct task_struct *task;
233 	struct time_data *pelem;
234 
235 	if (!enabled)
236 		return 0;
237 
238 	__u32 cpu = bpf_get_smp_processor_id();
239 
240 	if (cpu_is_filtered(cpu))
241 		return 0;
242 
243 	__u64 ts = bpf_ktime_get_ns();
244 
245 	task = (struct task_struct *)bpf_get_current_task();
246 	if (!task)
247 		return 0;
248 
249 	struct work_key key = {
250 		.type = KWORK_CLASS_IRQ,
251 		.pid = BPF_CORE_READ(task, pid),
252 		.task_p = (__u64)task,
253 	};
254 
255 	pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key);
256 	if (pelem && pelem->timestamp != 0)
257 		delta = ts - pelem->timestamp;
258 	else
259 		delta = ts - from_timestamp;
260 
261 	update_work(&key, delta);
262 
263 	return 0;
264 }
265 
266 SEC("tp_btf/softirq_entry")
on_softirq_entry(u64 * cxt)267 int on_softirq_entry(u64 *cxt)
268 {
269 	struct task_struct *task;
270 
271 	if (!enabled)
272 		return 0;
273 
274 	__u32 cpu = bpf_get_smp_processor_id();
275 
276 	if (cpu_is_filtered(cpu))
277 		return 0;
278 
279 	__u64 ts = bpf_ktime_get_ns();
280 
281 	task = (struct task_struct *)bpf_get_current_task();
282 	if (!task)
283 		return 0;
284 
285 	struct work_key key = {
286 		.type = KWORK_CLASS_SOFTIRQ,
287 		.pid = BPF_CORE_READ(task, pid),
288 		.task_p = (__u64)task,
289 	};
290 
291 	struct time_data data = {
292 		.timestamp = ts,
293 	};
294 
295 	bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY);
296 
297 	return 0;
298 }
299 
300 SEC("tp_btf/softirq_exit")
on_softirq_exit(u64 * cxt)301 int on_softirq_exit(u64 *cxt)
302 {
303 	__u64 delta;
304 	struct task_struct *task;
305 	struct time_data *pelem;
306 
307 	if (!enabled)
308 		return 0;
309 
310 	__u32 cpu = bpf_get_smp_processor_id();
311 
312 	if (cpu_is_filtered(cpu))
313 		return 0;
314 
315 	__u64 ts = bpf_ktime_get_ns();
316 
317 	task = (struct task_struct *)bpf_get_current_task();
318 	if (!task)
319 		return 0;
320 
321 	struct work_key key = {
322 		.type = KWORK_CLASS_SOFTIRQ,
323 		.pid = BPF_CORE_READ(task, pid),
324 		.task_p = (__u64)task,
325 	};
326 
327 	pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key);
328 	if (pelem)
329 		delta = ts - pelem->timestamp;
330 	else
331 		delta = ts - from_timestamp;
332 
333 	update_work(&key, delta);
334 
335 	return 0;
336 }
337 
338 char LICENSE[] SEC("license") = "Dual BSD/GPL";
339