1 /* Copyright (c) 2016 Facebook
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include "vmlinux.h"
8 #include <linux/version.h>
9 #include <bpf/bpf_helpers.h>
10 #include <bpf/bpf_tracing.h>
11 #include <bpf/bpf_core_read.h>
12 
13 #ifndef PERF_MAX_STACK_DEPTH
14 #define PERF_MAX_STACK_DEPTH         127
15 #endif
16 
17 #define MINBLOCK_US	1
18 #define MAX_ENTRIES	10000
19 
20 struct key_t {
21 	char waker[TASK_COMM_LEN];
22 	char target[TASK_COMM_LEN];
23 	u32 wret;
24 	u32 tret;
25 };
26 
27 struct {
28 	__uint(type, BPF_MAP_TYPE_HASH);
29 	__type(key, struct key_t);
30 	__type(value, u64);
31 	__uint(max_entries, MAX_ENTRIES);
32 } counts SEC(".maps");
33 
34 struct {
35 	__uint(type, BPF_MAP_TYPE_HASH);
36 	__type(key, u32);
37 	__type(value, u64);
38 	__uint(max_entries, MAX_ENTRIES);
39 } start SEC(".maps");
40 
41 struct wokeby_t {
42 	char name[TASK_COMM_LEN];
43 	u32 ret;
44 };
45 
46 struct {
47 	__uint(type, BPF_MAP_TYPE_HASH);
48 	__type(key, u32);
49 	__type(value, struct wokeby_t);
50 	__uint(max_entries, MAX_ENTRIES);
51 } wokeby SEC(".maps");
52 
53 struct {
54 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
55 	__uint(key_size, sizeof(u32));
56 	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
57 	__uint(max_entries, MAX_ENTRIES);
58 } stackmap SEC(".maps");
59 
60 #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
61 
62 SEC("kprobe/try_to_wake_up")
waker(struct pt_regs * ctx)63 int waker(struct pt_regs *ctx)
64 {
65 	struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
66 	u32 pid = BPF_CORE_READ(p, pid);
67 	struct wokeby_t woke;
68 
69 	bpf_get_current_comm(&woke.name, sizeof(woke.name));
70 	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
71 
72 	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
73 	return 0;
74 }
75 
update_counts(void * ctx,u32 pid,u64 delta)76 static inline int update_counts(void *ctx, u32 pid, u64 delta)
77 {
78 	struct wokeby_t *woke;
79 	u64 zero = 0, *val;
80 	struct key_t key;
81 
82 	__builtin_memset(&key.waker, 0, sizeof(key.waker));
83 	bpf_get_current_comm(&key.target, sizeof(key.target));
84 	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
85 	key.wret = 0;
86 
87 	woke = bpf_map_lookup_elem(&wokeby, &pid);
88 	if (woke) {
89 		key.wret = woke->ret;
90 		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
91 		bpf_map_delete_elem(&wokeby, &pid);
92 	}
93 
94 	val = bpf_map_lookup_elem(&counts, &key);
95 	if (!val) {
96 		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
97 		val = bpf_map_lookup_elem(&counts, &key);
98 		if (!val)
99 			return 0;
100 	}
101 	(*val) += delta;
102 	return 0;
103 }
104 
105 #if 1
106 /* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
107 SEC("tracepoint/sched/sched_switch")
oncpu(struct trace_event_raw_sched_switch * ctx)108 int oncpu(struct trace_event_raw_sched_switch *ctx)
109 {
110 	/* record previous thread sleep time */
111 	u32 pid = ctx->prev_pid;
112 #else
113 SEC("kprobe.multi/finish_task_switch*")
114 int oncpu(struct pt_regs *ctx)
115 {
116 	struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
117 	/* record previous thread sleep time */
118 	u32 pid = BPF_CORE_READ(p, pid);
119 #endif
120 	u64 delta, ts, *tsp;
121 
122 	ts = bpf_ktime_get_ns();
123 	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
124 
125 	/* calculate current thread's delta time */
126 	pid = bpf_get_current_pid_tgid();
127 	tsp = bpf_map_lookup_elem(&start, &pid);
128 	if (!tsp)
129 		/* missed start or filtered */
130 		return 0;
131 
132 	delta = bpf_ktime_get_ns() - *tsp;
133 	bpf_map_delete_elem(&start, &pid);
134 	delta = delta / 1000;
135 	if (delta < MINBLOCK_US)
136 		return 0;
137 
138 	return update_counts(ctx, pid, delta);
139 }
140 char _license[] SEC("license") = "GPL";
141 u32 _version SEC("version") = LINUX_VERSION_CODE;
142