1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <vmlinux.h>
4 #include <bpf/bpf_core_read.h>
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_tracing.h>
7 
8 #include "profiler.h"
9 #include "err.h"
10 #include "bpf_experimental.h"
11 #include "bpf_compiler.h"
12 #include "bpf_misc.h"
13 
14 #ifndef NULL
15 #define NULL 0
16 #endif
17 
18 #define O_WRONLY 00000001
19 #define O_RDWR 00000002
20 #define O_DIRECTORY 00200000
21 #define __O_TMPFILE 020000000
22 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
23 #define S_IFMT 00170000
24 #define S_IFSOCK 0140000
25 #define S_IFLNK 0120000
26 #define S_IFREG 0100000
27 #define S_IFBLK 0060000
28 #define S_IFDIR 0040000
29 #define S_IFCHR 0020000
30 #define S_IFIFO 0010000
31 #define S_ISUID 0004000
32 #define S_ISGID 0002000
33 #define S_ISVTX 0001000
34 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
35 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
36 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
37 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
38 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
39 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
40 
41 #define KILL_DATA_ARRAY_SIZE 8
42 
43 struct var_kill_data_arr_t {
44 	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
45 };
46 
47 union any_profiler_data_t {
48 	struct var_exec_data_t var_exec;
49 	struct var_kill_data_t var_kill;
50 	struct var_sysctl_data_t var_sysctl;
51 	struct var_filemod_data_t var_filemod;
52 	struct var_fork_data_t var_fork;
53 	struct var_kill_data_arr_t var_kill_data_arr;
54 };
55 
56 volatile struct profiler_config_struct bpf_config = {};
57 
58 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
59 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
60 #define CGROUP_LOGIN_SESSION_INODE \
61 	(bpf_config.cgroup_login_session_inode)
62 #define KILL_SIGNALS (bpf_config.kill_signals_mask)
63 #define STALE_INFO (bpf_config.stale_info_secs)
64 #define INODE_FILTER (bpf_config.inode_filter)
65 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
66 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
67 
68 struct kernfs_iattrs___52 {
69 	struct iattr ia_iattr;
70 };
71 
72 struct kernfs_node___52 {
73 	union /* kernfs_node_id */ {
74 		struct {
75 			u32 ino;
76 			u32 generation;
77 		};
78 		u64 id;
79 	} id;
80 };
81 
82 struct {
83 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
84 	__uint(max_entries, 1);
85 	__type(key, u32);
86 	__type(value, union any_profiler_data_t);
87 } data_heap SEC(".maps");
88 
89 struct {
90 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
91 	__uint(key_size, sizeof(int));
92 	__uint(value_size, sizeof(int));
93 } events SEC(".maps");
94 
95 struct {
96 	__uint(type, BPF_MAP_TYPE_HASH);
97 	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
98 	__type(key, u32);
99 	__type(value, struct var_kill_data_arr_t);
100 } var_tpid_to_data SEC(".maps");
101 
102 struct {
103 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
104 	__uint(max_entries, profiler_bpf_max_function_id);
105 	__type(key, u32);
106 	__type(value, struct bpf_func_stats_data);
107 } bpf_func_stats SEC(".maps");
108 
109 struct {
110 	__uint(type, BPF_MAP_TYPE_HASH);
111 	__type(key, u32);
112 	__type(value, bool);
113 	__uint(max_entries, 16);
114 } allowed_devices SEC(".maps");
115 
116 struct {
117 	__uint(type, BPF_MAP_TYPE_HASH);
118 	__type(key, u64);
119 	__type(value, bool);
120 	__uint(max_entries, 1024);
121 } allowed_file_inodes SEC(".maps");
122 
123 struct {
124 	__uint(type, BPF_MAP_TYPE_HASH);
125 	__type(key, u64);
126 	__type(value, bool);
127 	__uint(max_entries, 1024);
128 } allowed_directory_inodes SEC(".maps");
129 
130 struct {
131 	__uint(type, BPF_MAP_TYPE_HASH);
132 	__type(key, u32);
133 	__type(value, bool);
134 	__uint(max_entries, 16);
135 } disallowed_exec_inodes SEC(".maps");
136 
IS_ERR(const void * ptr)137 static INLINE bool IS_ERR(const void* ptr)
138 {
139 	return IS_ERR_VALUE((unsigned long)ptr);
140 }
141 
get_userspace_pid()142 static INLINE u32 get_userspace_pid()
143 {
144 	return bpf_get_current_pid_tgid() >> 32;
145 }
146 
is_init_process(u32 tgid)147 static INLINE bool is_init_process(u32 tgid)
148 {
149 	return tgid == 1 || tgid == 0;
150 }
151 
152 static INLINE unsigned long
probe_read_lim(void * dst,void * src,unsigned long len,unsigned long max)153 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
154 {
155 	len = len < max ? len : max;
156 	if (len > 1) {
157 		if (bpf_probe_read_kernel(dst, len, src))
158 			return 0;
159 	} else if (len == 1) {
160 		if (bpf_probe_read_kernel(dst, 1, src))
161 			return 0;
162 	}
163 	return len;
164 }
165 
get_var_spid_index(struct var_kill_data_arr_t * arr_struct,int spid)166 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
167 				     int spid)
168 {
169 #ifdef UNROLL
170 	__pragma_loop_unroll
171 #endif
172 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
173 		if (arr_struct->array[i].meta.pid == spid)
174 			return i;
175 	return -1;
176 }
177 
populate_ancestors(struct task_struct * task,struct ancestors_data_t * ancestors_data)178 static INLINE void populate_ancestors(struct task_struct* task,
179 				      struct ancestors_data_t* ancestors_data)
180 {
181 	struct task_struct* parent = task;
182 	u32 num_ancestors, ppid;
183 
184 	ancestors_data->num_ancestors = 0;
185 #ifdef UNROLL
186 	__pragma_loop_unroll
187 #endif
188 	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
189 		parent = BPF_CORE_READ(parent, real_parent);
190 		if (parent == NULL)
191 			break;
192 		ppid = BPF_CORE_READ(parent, tgid);
193 		if (is_init_process(ppid))
194 			break;
195 		ancestors_data->ancestor_pids[num_ancestors] = ppid;
196 		ancestors_data->ancestor_exec_ids[num_ancestors] =
197 			BPF_CORE_READ(parent, self_exec_id);
198 		ancestors_data->ancestor_start_times[num_ancestors] =
199 			BPF_CORE_READ(parent, start_time);
200 		ancestors_data->num_ancestors = num_ancestors;
201 	}
202 }
203 
read_full_cgroup_path(struct kernfs_node * cgroup_node,struct kernfs_node * cgroup_root_node,void * payload,int * root_pos)204 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
205 					  struct kernfs_node* cgroup_root_node,
206 					  void* payload,
207 					  int* root_pos)
208 {
209 	void* payload_start = payload;
210 	size_t filepart_length;
211 
212 #ifdef UNROLL
213 	__pragma_loop_unroll
214 #endif
215 	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
216 		filepart_length =
217 			bpf_probe_read_kernel_str(payload, MAX_PATH,
218 						  BPF_CORE_READ(cgroup_node, name));
219 		if (!cgroup_node)
220 			return payload;
221 		if (cgroup_node == cgroup_root_node)
222 			*root_pos = payload - payload_start;
223 		if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
224 			payload += filepart_length;
225 		}
226 		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
227 	}
228 	return payload;
229 }
230 
get_inode_from_kernfs(struct kernfs_node * node)231 static ino_t get_inode_from_kernfs(struct kernfs_node* node)
232 {
233 	struct kernfs_node___52* node52 = (void*)node;
234 
235 	if (bpf_core_field_exists(node52->id.ino)) {
236 		barrier_var(node52);
237 		return BPF_CORE_READ(node52, id.ino);
238 	} else {
239 		barrier_var(node);
240 		return (u64)BPF_CORE_READ(node, id);
241 	}
242 }
243 
244 extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
245 enum cgroup_subsys_id___local {
246 	pids_cgrp_id___local = 123, /* value doesn't matter */
247 };
248 
populate_cgroup_info(struct cgroup_data_t * cgroup_data,struct task_struct * task,void * payload)249 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
250 					 struct task_struct* task,
251 					 void* payload)
252 {
253 	struct kernfs_node* root_kernfs =
254 		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
255 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
256 
257 #if __has_builtin(__builtin_preserve_enum_value)
258 	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
259 		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
260 						  pids_cgrp_id___local);
261 #ifdef UNROLL
262 		__pragma_loop_unroll
263 #endif
264 		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
265 			struct cgroup_subsys_state* subsys =
266 				BPF_CORE_READ(task, cgroups, subsys[i]);
267 			if (subsys != NULL) {
268 				int subsys_id = BPF_CORE_READ(subsys, ss, id);
269 				if (subsys_id == cgrp_id) {
270 					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
271 					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
272 					break;
273 				}
274 			}
275 		}
276 	}
277 #endif
278 
279 	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
280 	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
281 
282 	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
283 		cgroup_data->cgroup_root_mtime =
284 			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
285 		cgroup_data->cgroup_proc_mtime =
286 			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
287 	} else {
288 		struct kernfs_iattrs___52* root_iattr =
289 			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
290 		cgroup_data->cgroup_root_mtime =
291 			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
292 
293 		struct kernfs_iattrs___52* proc_iattr =
294 			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
295 		cgroup_data->cgroup_proc_mtime =
296 			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
297 	}
298 
299 	cgroup_data->cgroup_root_length = 0;
300 	cgroup_data->cgroup_proc_length = 0;
301 	cgroup_data->cgroup_full_length = 0;
302 
303 	size_t cgroup_root_length =
304 		bpf_probe_read_kernel_str(payload, MAX_PATH,
305 					  BPF_CORE_READ(root_kernfs, name));
306 	if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) {
307 		cgroup_data->cgroup_root_length = cgroup_root_length;
308 		payload += cgroup_root_length;
309 	}
310 
311 	size_t cgroup_proc_length =
312 		bpf_probe_read_kernel_str(payload, MAX_PATH,
313 					  BPF_CORE_READ(proc_kernfs, name));
314 	if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) {
315 		cgroup_data->cgroup_proc_length = cgroup_proc_length;
316 		payload += cgroup_proc_length;
317 	}
318 
319 	if (FETCH_CGROUPS_FROM_BPF) {
320 		cgroup_data->cgroup_full_path_root_pos = -1;
321 		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
322 							      &cgroup_data->cgroup_full_path_root_pos);
323 		cgroup_data->cgroup_full_length = payload_end_pos - payload;
324 		payload = payload_end_pos;
325 	}
326 
327 	return (void*)payload;
328 }
329 
populate_var_metadata(struct var_metadata_t * metadata,struct task_struct * task,u32 pid,void * payload)330 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
331 					  struct task_struct* task,
332 					  u32 pid, void* payload)
333 {
334 	u64 uid_gid = bpf_get_current_uid_gid();
335 
336 	metadata->uid = (u32)uid_gid;
337 	metadata->gid = uid_gid >> 32;
338 	metadata->pid = pid;
339 	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
340 	metadata->start_time = BPF_CORE_READ(task, start_time);
341 	metadata->comm_length = 0;
342 
343 	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
344 	if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
345 		metadata->comm_length = comm_length;
346 		payload += comm_length;
347 	}
348 
349 	return (void*)payload;
350 }
351 
352 static INLINE struct var_kill_data_t*
get_var_kill_data(struct pt_regs * ctx,int spid,int tpid,int sig)353 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
354 {
355 	int zero = 0;
356 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
357 
358 	if (kill_data == NULL)
359 		return NULL;
360 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
361 
362 	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
363 	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
364 	size_t payload_length = payload - (void*)kill_data->payload;
365 	kill_data->payload_length = payload_length;
366 	populate_ancestors(task, &kill_data->ancestors_info);
367 	kill_data->meta.type = KILL_EVENT;
368 	kill_data->kill_target_pid = tpid;
369 	kill_data->kill_sig = sig;
370 	kill_data->kill_count = 1;
371 	kill_data->last_kill_time = bpf_ktime_get_ns();
372 	return kill_data;
373 }
374 
trace_var_sys_kill(void * ctx,int tpid,int sig)375 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
376 {
377 	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
378 		return 0;
379 
380 	u32 spid = get_userspace_pid();
381 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
382 
383 	if (arr_struct == NULL) {
384 		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
385 		int zero = 0;
386 
387 		if (kill_data == NULL)
388 			return 0;
389 		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
390 		if (arr_struct == NULL)
391 			return 0;
392 		bpf_probe_read_kernel(&arr_struct->array[0],
393 				      sizeof(arr_struct->array[0]), kill_data);
394 	} else {
395 		int index = get_var_spid_index(arr_struct, spid);
396 
397 		if (index == -1) {
398 			struct var_kill_data_t* kill_data =
399 				get_var_kill_data(ctx, spid, tpid, sig);
400 			if (kill_data == NULL)
401 				return 0;
402 #ifdef UNROLL
403 			__pragma_loop_unroll
404 #endif
405 			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
406 				if (arr_struct->array[i].meta.pid == 0) {
407 					bpf_probe_read_kernel(&arr_struct->array[i],
408 							      sizeof(arr_struct->array[i]),
409 							      kill_data);
410 					bpf_map_update_elem(&var_tpid_to_data, &tpid,
411 							    arr_struct, 0);
412 
413 					return 0;
414 				}
415 			return 0;
416 		}
417 
418 		struct var_kill_data_t* kill_data = &arr_struct->array[index];
419 
420 		u64 delta_sec =
421 			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
422 
423 		if (delta_sec < STALE_INFO) {
424 			kill_data->kill_count++;
425 			kill_data->last_kill_time = bpf_ktime_get_ns();
426 			bpf_probe_read_kernel(&arr_struct->array[index],
427 					      sizeof(arr_struct->array[index]),
428 					      kill_data);
429 		} else {
430 			struct var_kill_data_t* kill_data =
431 				get_var_kill_data(ctx, spid, tpid, sig);
432 			if (kill_data == NULL)
433 				return 0;
434 			bpf_probe_read_kernel(&arr_struct->array[index],
435 					      sizeof(arr_struct->array[index]),
436 					      kill_data);
437 		}
438 	}
439 	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
440 	return 0;
441 }
442 
bpf_stats_enter(struct bpf_func_stats_ctx * bpf_stat_ctx,enum bpf_function_id func_id)443 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
444 				   enum bpf_function_id func_id)
445 {
446 	int func_id_key = func_id;
447 
448 	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
449 	bpf_stat_ctx->bpf_func_stats_data_val =
450 		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
451 	if (bpf_stat_ctx->bpf_func_stats_data_val)
452 		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
453 }
454 
bpf_stats_exit(struct bpf_func_stats_ctx * bpf_stat_ctx)455 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
456 {
457 	if (bpf_stat_ctx->bpf_func_stats_data_val)
458 		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
459 			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
460 }
461 
462 static INLINE void
bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx * bpf_stat_ctx,struct var_metadata_t * meta)463 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
464 				    struct var_metadata_t* meta)
465 {
466 	if (bpf_stat_ctx->bpf_func_stats_data_val) {
467 		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
468 		meta->bpf_stats_num_perf_events =
469 			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
470 	}
471 	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
472 	meta->cpu_id = bpf_get_smp_processor_id();
473 }
474 
475 static INLINE size_t
read_absolute_file_path_from_dentry(struct dentry * filp_dentry,void * payload)476 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
477 {
478 	size_t length = 0;
479 	size_t filepart_length;
480 	struct dentry* parent_dentry;
481 
482 #ifdef UNROLL
483 	__pragma_loop_unroll
484 #endif
485 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
486 		filepart_length =
487 			bpf_probe_read_kernel_str(payload, MAX_PATH,
488 						  BPF_CORE_READ(filp_dentry, d_name.name));
489 		bpf_nop_mov(filepart_length);
490 		if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH))
491 			break;
492 		payload += filepart_length;
493 		length += filepart_length;
494 
495 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
496 		if (filp_dentry == parent_dentry)
497 			break;
498 		filp_dentry = parent_dentry;
499 	}
500 
501 	return length;
502 }
503 
504 static INLINE bool
is_ancestor_in_allowed_inodes(struct dentry * filp_dentry)505 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
506 {
507 	struct dentry* parent_dentry;
508 #ifdef UNROLL
509 	__pragma_loop_unroll
510 #endif
511 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
512 		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
513 		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
514 
515 		if (allowed_dir != NULL)
516 			return true;
517 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
518 		if (filp_dentry == parent_dentry)
519 			break;
520 		filp_dentry = parent_dentry;
521 	}
522 	return false;
523 }
524 
is_dentry_allowed_for_filemod(struct dentry * file_dentry,u32 * device_id,u64 * file_ino)525 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
526 						 u32* device_id,
527 						 u64* file_ino)
528 {
529 	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
530 	*device_id = dev_id;
531 	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
532 
533 	if (allowed_device == NULL)
534 		return false;
535 
536 	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
537 	*file_ino = ino;
538 	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
539 
540 	if (allowed_file == NULL)
541 		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
542 			return false;
543 	return true;
544 }
545 
546 SEC("kprobe/proc_sys_write")
BPF_KPROBE(kprobe__proc_sys_write,struct file * filp,const char * buf,size_t count,loff_t * ppos)547 ssize_t BPF_KPROBE(kprobe__proc_sys_write,
548 		   struct file* filp, const char* buf,
549 		   size_t count, loff_t* ppos)
550 {
551 	struct bpf_func_stats_ctx stats_ctx;
552 	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
553 
554 	u32 pid = get_userspace_pid();
555 	int zero = 0;
556 	struct var_sysctl_data_t* sysctl_data =
557 		bpf_map_lookup_elem(&data_heap, &zero);
558 	if (!sysctl_data)
559 		goto out;
560 
561 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
562 	sysctl_data->meta.type = SYSCTL_EVENT;
563 	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
564 	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
565 
566 	populate_ancestors(task, &sysctl_data->ancestors_info);
567 
568 	sysctl_data->sysctl_val_length = 0;
569 	sysctl_data->sysctl_path_length = 0;
570 
571 	size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
572 							     CTL_MAXNAME, buf);
573 	if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) {
574 		sysctl_data->sysctl_val_length = sysctl_val_length;
575 		payload += sysctl_val_length;
576 	}
577 
578 	size_t sysctl_path_length =
579 		bpf_probe_read_kernel_str(payload, MAX_PATH,
580 					  BPF_CORE_READ(filp, f_path.dentry,
581 							d_name.name));
582 	if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) {
583 		sysctl_data->sysctl_path_length = sysctl_path_length;
584 		payload += sysctl_path_length;
585 	}
586 
587 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
588 	unsigned long data_len = payload - (void*)sysctl_data;
589 	data_len = data_len > sizeof(struct var_sysctl_data_t)
590 		? sizeof(struct var_sysctl_data_t)
591 		: data_len;
592 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
593 out:
594 	bpf_stats_exit(&stats_ctx);
595 	return 0;
596 }
597 
598 SEC("tracepoint/syscalls/sys_enter_kill")
tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter * ctx)599 int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx)
600 {
601 	struct bpf_func_stats_ctx stats_ctx;
602 
603 	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
604 	int pid = ctx->args[0];
605 	int sig = ctx->args[1];
606 	int ret = trace_var_sys_kill(ctx, pid, sig);
607 	bpf_stats_exit(&stats_ctx);
608 	return ret;
609 };
610 
611 SEC("raw_tracepoint/sched_process_exit")
raw_tracepoint__sched_process_exit(void * ctx)612 int raw_tracepoint__sched_process_exit(void* ctx)
613 {
614 	int zero = 0;
615 	struct bpf_func_stats_ctx stats_ctx;
616 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
617 
618 	u32 tpid = get_userspace_pid();
619 
620 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
621 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
622 
623 	if (arr_struct == NULL || kill_data == NULL)
624 		goto out;
625 
626 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
627 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
628 
629 #ifdef UNROLL
630 	__pragma_loop_unroll
631 #endif
632 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
633 		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
634 
635 		if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) {
636 			bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
637 					      past_kill_data);
638 			void* payload = kill_data->payload;
639 			size_t offset = kill_data->payload_length;
640 			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
641 				return 0;
642 			payload += offset;
643 
644 			kill_data->kill_target_name_length = 0;
645 			kill_data->kill_target_cgroup_proc_length = 0;
646 
647 			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
648 			if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
649 				kill_data->kill_target_name_length = comm_length;
650 				payload += comm_length;
651 			}
652 
653 			size_t cgroup_proc_length =
654 				bpf_probe_read_kernel_str(payload,
655 							  KILL_TARGET_LEN,
656 							  BPF_CORE_READ(proc_kernfs, name));
657 			if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) {
658 				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
659 				payload += cgroup_proc_length;
660 			}
661 
662 			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
663 			unsigned long data_len = (void*)payload - (void*)kill_data;
664 			data_len = data_len > sizeof(struct var_kill_data_t)
665 				? sizeof(struct var_kill_data_t)
666 				: data_len;
667 			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
668 		}
669 	}
670 	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
671 out:
672 	bpf_stats_exit(&stats_ctx);
673 	return 0;
674 }
675 
676 SEC("raw_tracepoint/sched_process_exec")
raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args * ctx)677 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
678 {
679 	struct bpf_func_stats_ctx stats_ctx;
680 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
681 
682 	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
683 	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
684 
685 	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
686 	if (should_filter_binprm != NULL)
687 		goto out;
688 
689 	int zero = 0;
690 	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
691 	if (!proc_exec_data)
692 		goto out;
693 
694 	if (INODE_FILTER && inode != INODE_FILTER)
695 		return 0;
696 
697 	u32 pid = get_userspace_pid();
698 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
699 
700 	proc_exec_data->meta.type = EXEC_EVENT;
701 	proc_exec_data->bin_path_length = 0;
702 	proc_exec_data->cmdline_length = 0;
703 	proc_exec_data->environment_length = 0;
704 	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
705 					      proc_exec_data->payload);
706 	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
707 
708 	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
709 	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
710 	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
711 	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
712 	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
713 
714 	const char* filename = BPF_CORE_READ(bprm, filename);
715 	size_t bin_path_length =
716 		bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
717 	if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) {
718 		proc_exec_data->bin_path_length = bin_path_length;
719 		payload += bin_path_length;
720 	}
721 
722 	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
723 	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
724 	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
725 						     arg_end - arg_start, MAX_ARGS_LEN);
726 
727 	if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) {
728 		proc_exec_data->cmdline_length = cmdline_length;
729 		payload += cmdline_length;
730 	}
731 
732 	if (READ_ENVIRON_FROM_EXEC) {
733 		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
734 		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
735 		unsigned long env_len = probe_read_lim(payload, env_start,
736 						       env_end - env_start, MAX_ENVIRON_LEN);
737 		if (cmdline_length <= MAX_ENVIRON_LEN) {
738 			proc_exec_data->environment_length = env_len;
739 			payload += env_len;
740 		}
741 	}
742 
743 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
744 	unsigned long data_len = payload - (void*)proc_exec_data;
745 	data_len = data_len > sizeof(struct var_exec_data_t)
746 		? sizeof(struct var_exec_data_t)
747 		: data_len;
748 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
749 out:
750 	bpf_stats_exit(&stats_ctx);
751 	return 0;
752 }
753 
754 SEC("kretprobe/do_filp_open")
kprobe_ret__do_filp_open(struct pt_regs * ctx)755 int kprobe_ret__do_filp_open(struct pt_regs* ctx)
756 {
757 	struct bpf_func_stats_ctx stats_ctx;
758 	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
759 
760 	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
761 
762 	if (filp == NULL || IS_ERR(filp))
763 		goto out;
764 	unsigned int flags = BPF_CORE_READ(filp, f_flags);
765 	if ((flags & (O_RDWR | O_WRONLY)) == 0)
766 		goto out;
767 	if ((flags & O_TMPFILE) > 0)
768 		goto out;
769 	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
770 	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
771 	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
772 	    S_ISSOCK(mode))
773 		goto out;
774 
775 	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
776 	u32 device_id = 0;
777 	u64 file_ino = 0;
778 	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
779 		goto out;
780 
781 	int zero = 0;
782 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
783 	if (!filemod_data)
784 		goto out;
785 
786 	u32 pid = get_userspace_pid();
787 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
788 
789 	filemod_data->meta.type = FILEMOD_EVENT;
790 	filemod_data->fmod_type = FMOD_OPEN;
791 	filemod_data->dst_flags = flags;
792 	filemod_data->src_inode = 0;
793 	filemod_data->dst_inode = file_ino;
794 	filemod_data->src_device_id = 0;
795 	filemod_data->dst_device_id = device_id;
796 	filemod_data->src_filepath_length = 0;
797 	filemod_data->dst_filepath_length = 0;
798 
799 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
800 					      filemod_data->payload);
801 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
802 
803 	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
804 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
805 		payload += len;
806 		filemod_data->dst_filepath_length = len;
807 	}
808 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
809 	unsigned long data_len = payload - (void*)filemod_data;
810 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
811 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
812 out:
813 	bpf_stats_exit(&stats_ctx);
814 	return 0;
815 }
816 
817 SEC("kprobe/vfs_link")
BPF_KPROBE(kprobe__vfs_link,struct dentry * old_dentry,struct mnt_idmap * idmap,struct inode * dir,struct dentry * new_dentry,struct inode ** delegated_inode)818 int BPF_KPROBE(kprobe__vfs_link,
819 	       struct dentry* old_dentry, struct mnt_idmap *idmap,
820 	       struct inode* dir, struct dentry* new_dentry,
821 	       struct inode** delegated_inode)
822 {
823 	struct bpf_func_stats_ctx stats_ctx;
824 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
825 
826 	u32 src_device_id = 0;
827 	u64 src_file_ino = 0;
828 	u32 dst_device_id = 0;
829 	u64 dst_file_ino = 0;
830 	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
831 	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
832 		goto out;
833 
834 	int zero = 0;
835 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
836 	if (!filemod_data)
837 		goto out;
838 
839 	u32 pid = get_userspace_pid();
840 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
841 
842 	filemod_data->meta.type = FILEMOD_EVENT;
843 	filemod_data->fmod_type = FMOD_LINK;
844 	filemod_data->dst_flags = 0;
845 	filemod_data->src_inode = src_file_ino;
846 	filemod_data->dst_inode = dst_file_ino;
847 	filemod_data->src_device_id = src_device_id;
848 	filemod_data->dst_device_id = dst_device_id;
849 	filemod_data->src_filepath_length = 0;
850 	filemod_data->dst_filepath_length = 0;
851 
852 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
853 					      filemod_data->payload);
854 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
855 
856 	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
857 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
858 		payload += len;
859 		filemod_data->src_filepath_length = len;
860 	}
861 
862 	len = read_absolute_file_path_from_dentry(new_dentry, payload);
863 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
864 		payload += len;
865 		filemod_data->dst_filepath_length = len;
866 	}
867 
868 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
869 	unsigned long data_len = payload - (void*)filemod_data;
870 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
871 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
872 out:
873 	bpf_stats_exit(&stats_ctx);
874 	return 0;
875 }
876 
877 SEC("kprobe/vfs_symlink")
BPF_KPROBE(kprobe__vfs_symlink,struct inode * dir,struct dentry * dentry,const char * oldname)878 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
879 	       const char* oldname)
880 {
881 	struct bpf_func_stats_ctx stats_ctx;
882 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
883 
884 	u32 dst_device_id = 0;
885 	u64 dst_file_ino = 0;
886 	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
887 		goto out;
888 
889 	int zero = 0;
890 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
891 	if (!filemod_data)
892 		goto out;
893 
894 	u32 pid = get_userspace_pid();
895 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
896 
897 	filemod_data->meta.type = FILEMOD_EVENT;
898 	filemod_data->fmod_type = FMOD_SYMLINK;
899 	filemod_data->dst_flags = 0;
900 	filemod_data->src_inode = 0;
901 	filemod_data->dst_inode = dst_file_ino;
902 	filemod_data->src_device_id = 0;
903 	filemod_data->dst_device_id = dst_device_id;
904 	filemod_data->src_filepath_length = 0;
905 	filemod_data->dst_filepath_length = 0;
906 
907 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
908 					      filemod_data->payload);
909 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
910 
911 	size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
912 					       oldname);
913 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
914 		payload += len;
915 		filemod_data->src_filepath_length = len;
916 	}
917 	len = read_absolute_file_path_from_dentry(dentry, payload);
918 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
919 		payload += len;
920 		filemod_data->dst_filepath_length = len;
921 	}
922 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
923 	unsigned long data_len = payload - (void*)filemod_data;
924 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
925 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
926 out:
927 	bpf_stats_exit(&stats_ctx);
928 	return 0;
929 }
930 
931 SEC("raw_tracepoint/sched_process_fork")
raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args * ctx)932 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
933 {
934 	struct bpf_func_stats_ctx stats_ctx;
935 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
936 
937 	int zero = 0;
938 	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
939 	if (!fork_data)
940 		goto out;
941 
942 	struct task_struct* parent = (struct task_struct*)ctx->args[0];
943 	struct task_struct* child = (struct task_struct*)ctx->args[1];
944 	fork_data->meta.type = FORK_EVENT;
945 
946 	void* payload = populate_var_metadata(&fork_data->meta, child,
947 					      BPF_CORE_READ(child, pid), fork_data->payload);
948 	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
949 	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
950 	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
951 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
952 
953 	unsigned long data_len = payload - (void*)fork_data;
954 	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
955 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
956 out:
957 	bpf_stats_exit(&stats_ctx);
958 	return 0;
959 }
960 char _license[] SEC("license") = "GPL";
961