1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Facebook
3 #include <linux/sched.h>
4 #include <linux/ptrace.h>
5 #include <stdint.h>
6 #include <stddef.h>
7 #include <stdbool.h>
8 #include <linux/bpf.h>
9 #include <bpf/bpf_helpers.h>
10 #include "bpf_misc.h"
11 #include "bpf_compiler.h"
12 
13 #define FUNCTION_NAME_LEN 64
14 #define FILE_NAME_LEN 128
15 #define TASK_COMM_LEN 16
16 
17 typedef struct {
18 	int PyThreadState_frame;
19 	int PyThreadState_thread;
20 	int PyFrameObject_back;
21 	int PyFrameObject_code;
22 	int PyFrameObject_lineno;
23 	int PyCodeObject_filename;
24 	int PyCodeObject_name;
25 	int String_data;
26 	int String_size;
27 } OffsetConfig;
28 
29 typedef struct {
30 	uintptr_t current_state_addr;
31 	uintptr_t tls_key_addr;
32 	OffsetConfig offsets;
33 	bool use_tls;
34 } PidData;
35 
36 typedef struct {
37 	uint32_t success;
38 } Stats;
39 
40 typedef struct {
41 	char name[FUNCTION_NAME_LEN];
42 	char file[FILE_NAME_LEN];
43 } Symbol;
44 
45 typedef struct {
46 	uint32_t pid;
47 	uint32_t tid;
48 	char comm[TASK_COMM_LEN];
49 	int32_t kernel_stack_id;
50 	int32_t user_stack_id;
51 	bool thread_current;
52 	bool pthread_match;
53 	bool stack_complete;
54 	int16_t stack_len;
55 	int32_t stack[STACK_MAX_LEN];
56 
57 	int has_meta;
58 	int metadata;
59 	char dummy_safeguard;
60 } Event;
61 
62 
63 typedef int pid_t;
64 
65 typedef struct {
66 	void* f_back; // PyFrameObject.f_back, previous frame
67 	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
68 	void* co_filename; // PyCodeObject.co_filename
69 	void* co_name; // PyCodeObject.co_name
70 } FrameData;
71 
72 #ifdef SUBPROGS
73 __noinline
74 #else
75 __always_inline
76 #endif
get_thread_state(void * tls_base,PidData * pidData)77 static void *get_thread_state(void *tls_base, PidData *pidData)
78 {
79 	void* thread_state;
80 	int key;
81 
82 	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
83 	bpf_probe_read_user(&thread_state, sizeof(thread_state),
84 			    tls_base + 0x310 + key * 0x10 + 0x08);
85 	return thread_state;
86 }
87 
get_frame_data(void * frame_ptr,PidData * pidData,FrameData * frame,Symbol * symbol)88 static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
89 					   FrameData *frame, Symbol *symbol)
90 {
91 	// read data from PyFrameObject
92 	bpf_probe_read_user(&frame->f_back,
93 			    sizeof(frame->f_back),
94 			    frame_ptr + pidData->offsets.PyFrameObject_back);
95 	bpf_probe_read_user(&frame->f_code,
96 			    sizeof(frame->f_code),
97 			    frame_ptr + pidData->offsets.PyFrameObject_code);
98 
99 	// read data from PyCodeObject
100 	if (!frame->f_code)
101 		return false;
102 	bpf_probe_read_user(&frame->co_filename,
103 			    sizeof(frame->co_filename),
104 			    frame->f_code + pidData->offsets.PyCodeObject_filename);
105 	bpf_probe_read_user(&frame->co_name,
106 			    sizeof(frame->co_name),
107 			    frame->f_code + pidData->offsets.PyCodeObject_name);
108 	// read actual names into symbol
109 	if (frame->co_filename)
110 		bpf_probe_read_user_str(&symbol->file,
111 					sizeof(symbol->file),
112 					frame->co_filename +
113 					pidData->offsets.String_data);
114 	if (frame->co_name)
115 		bpf_probe_read_user_str(&symbol->name,
116 					sizeof(symbol->name),
117 					frame->co_name +
118 					pidData->offsets.String_data);
119 	return true;
120 }
121 
122 struct {
123 	__uint(type, BPF_MAP_TYPE_HASH);
124 	__uint(max_entries, 1);
125 	__type(key, int);
126 	__type(value, PidData);
127 } pidmap SEC(".maps");
128 
129 struct {
130 	__uint(type, BPF_MAP_TYPE_HASH);
131 	__uint(max_entries, 1);
132 	__type(key, int);
133 	__type(value, Event);
134 } eventmap SEC(".maps");
135 
136 struct {
137 	__uint(type, BPF_MAP_TYPE_HASH);
138 	__uint(max_entries, 1);
139 	__type(key, Symbol);
140 	__type(value, int);
141 } symbolmap SEC(".maps");
142 
143 struct {
144 	__uint(type, BPF_MAP_TYPE_ARRAY);
145 	__uint(max_entries, 1);
146 	__type(key, int);
147 	__type(value, Stats);
148 } statsmap SEC(".maps");
149 
150 struct {
151 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
152 	__uint(max_entries, 32);
153 	__uint(key_size, sizeof(int));
154 	__uint(value_size, sizeof(int));
155 } perfmap SEC(".maps");
156 
157 struct {
158 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
159 	__uint(max_entries, 1000);
160 	__uint(key_size, sizeof(int));
161 	__uint(value_size, sizeof(long long) * 127);
162 } stackmap SEC(".maps");
163 
164 #ifdef USE_BPF_LOOP
165 struct process_frame_ctx {
166 	int cur_cpu;
167 	int32_t *symbol_counter;
168 	void *frame_ptr;
169 	FrameData *frame;
170 	PidData *pidData;
171 	Symbol *sym;
172 	Event *event;
173 	bool done;
174 };
175 
process_frame_callback(__u32 i,struct process_frame_ctx * ctx)176 static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
177 {
178 	int zero = 0;
179 	void *frame_ptr = ctx->frame_ptr;
180 	PidData *pidData = ctx->pidData;
181 	FrameData *frame = ctx->frame;
182 	int32_t *symbol_counter = ctx->symbol_counter;
183 	int cur_cpu = ctx->cur_cpu;
184 	Event *event = ctx->event;
185 	Symbol *sym = ctx->sym;
186 
187 	if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
188 		int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
189 		int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
190 
191 		if (!symbol_id) {
192 			bpf_map_update_elem(&symbolmap, sym, &zero, 0);
193 			symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
194 			if (!symbol_id) {
195 				ctx->done = true;
196 				return 1;
197 			}
198 		}
199 		if (*symbol_id == new_symbol_id)
200 			(*symbol_counter)++;
201 
202 		barrier_var(i);
203 		if (i >= STACK_MAX_LEN)
204 			return 1;
205 
206 		event->stack[i] = *symbol_id;
207 
208 		event->stack_len = i + 1;
209 		frame_ptr = frame->f_back;
210 	}
211 	return 0;
212 }
213 #endif /* USE_BPF_LOOP */
214 
215 #ifdef GLOBAL_FUNC
216 __noinline
217 #elif defined(SUBPROGS)
218 static __noinline
219 #else
220 static __always_inline
221 #endif
__on_event(struct bpf_raw_tracepoint_args * ctx)222 int __on_event(struct bpf_raw_tracepoint_args *ctx)
223 {
224 	uint64_t pid_tgid = bpf_get_current_pid_tgid();
225 	pid_t pid = (pid_t)(pid_tgid >> 32);
226 	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
227 	if (!pidData)
228 		return 0;
229 
230 	int zero = 0;
231 	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
232 	if (!event)
233 		return 0;
234 
235 	event->pid = pid;
236 
237 	event->tid = (pid_t)pid_tgid;
238 	bpf_get_current_comm(&event->comm, sizeof(event->comm));
239 
240 	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
241 	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
242 
243 	void* thread_state_current = (void*)0;
244 	bpf_probe_read_user(&thread_state_current,
245 			    sizeof(thread_state_current),
246 			    (void*)(long)pidData->current_state_addr);
247 
248 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
249 	void* tls_base = (void*)task;
250 
251 	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
252 		: thread_state_current;
253 	event->thread_current = thread_state == thread_state_current;
254 
255 	if (pidData->use_tls) {
256 		uint64_t pthread_created;
257 		uint64_t pthread_self;
258 		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
259 				    tls_base + 0x10);
260 
261 		bpf_probe_read_user(&pthread_created,
262 				    sizeof(pthread_created),
263 				    thread_state +
264 				    pidData->offsets.PyThreadState_thread);
265 		event->pthread_match = pthread_created == pthread_self;
266 	} else {
267 		event->pthread_match = 1;
268 	}
269 
270 	if (event->pthread_match || !pidData->use_tls) {
271 		void* frame_ptr;
272 		FrameData frame;
273 		Symbol sym = {};
274 		int cur_cpu = bpf_get_smp_processor_id();
275 
276 		bpf_probe_read_user(&frame_ptr,
277 				    sizeof(frame_ptr),
278 				    thread_state +
279 				    pidData->offsets.PyThreadState_frame);
280 
281 		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
282 		if (symbol_counter == NULL)
283 			return 0;
284 #ifdef USE_BPF_LOOP
285 	struct process_frame_ctx ctx = {
286 		.cur_cpu = cur_cpu,
287 		.symbol_counter = symbol_counter,
288 		.frame_ptr = frame_ptr,
289 		.frame = &frame,
290 		.pidData = pidData,
291 		.sym = &sym,
292 		.event = event,
293 	};
294 
295 	bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
296 	if (ctx.done)
297 		return 0;
298 #else
299 #if defined(USE_ITER)
300 /* no for loop, no unrolling */
301 #elif defined(NO_UNROLL)
302 	__pragma_loop_no_unroll
303 #elif defined(UNROLL_COUNT)
304 	__pragma_loop_unroll_count(UNROLL_COUNT)
305 #else
306 	__pragma_loop_unroll_full
307 #endif /* NO_UNROLL */
308 		/* Unwind python stack */
309 #ifdef USE_ITER
310 		int i;
311 		bpf_for(i, 0, STACK_MAX_LEN) {
312 #else /* !USE_ITER */
313 		for (int i = 0; i < STACK_MAX_LEN; ++i) {
314 #endif
315 			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
316 				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
317 				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
318 				if (!symbol_id) {
319 					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
320 					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
321 					if (!symbol_id)
322 						return 0;
323 				}
324 				if (*symbol_id == new_symbol_id)
325 					(*symbol_counter)++;
326 				event->stack[i] = *symbol_id;
327 				event->stack_len = i + 1;
328 				frame_ptr = frame.f_back;
329 			}
330 		}
331 #endif /* USE_BPF_LOOP */
332 		event->stack_complete = frame_ptr == NULL;
333 	} else {
334 		event->stack_complete = 1;
335 	}
336 
337 	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
338 	if (stats)
339 		stats->success++;
340 
341 	event->has_meta = 0;
342 	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
343 	return 0;
344 }
345 
346 SEC("raw_tracepoint/kfree_skb")
347 int on_event(struct bpf_raw_tracepoint_args* ctx)
348 {
349 	int ret = 0;
350 	ret |= __on_event(ctx);
351 	ret |= __on_event(ctx);
352 	ret |= __on_event(ctx);
353 	ret |= __on_event(ctx);
354 	ret |= __on_event(ctx);
355 	return ret;
356 }
357 
358 char _license[] SEC("license") = "GPL";
359