1  /* SPDX-License-Identifier: GPL-2.0
2   *
3   * Copyright (c) 2019 Facebook
4   *
5   * This program is free software; you can redistribute it and/or
6   * modify it under the terms of version 2 of the GNU General Public
7   * License as published by the Free Software Foundation.
8   *
9   * Include file for sample Host Bandwidth Manager (HBM) BPF programs
10   */
11  #define KBUILD_MODNAME "foo"
12  #include <uapi/linux/bpf.h>
13  #include <uapi/linux/if_ether.h>
14  #include <uapi/linux/if_packet.h>
15  #include <uapi/linux/ip.h>
16  #include <uapi/linux/ipv6.h>
17  #include <uapi/linux/in.h>
18  #include <uapi/linux/tcp.h>
19  #include <uapi/linux/filter.h>
20  #include <uapi/linux/pkt_cls.h>
21  #include <net/ipv6.h>
22  #include <net/inet_ecn.h>
23  #include <bpf/bpf_endian.h>
24  #include <bpf/bpf_helpers.h>
25  #include "hbm.h"
26  
27  #define DROP_PKT	0
28  #define ALLOW_PKT	1
29  #define TCP_ECN_OK	1
30  #define CWR		2
31  
32  #ifndef HBM_DEBUG  // Define HBM_DEBUG to enable debugging
33  #undef bpf_printk
34  #define bpf_printk(fmt, ...)
35  #endif
36  
37  #define INITIAL_CREDIT_PACKETS	100
38  #define MAX_BYTES_PER_PACKET	1500
39  #define MARK_THRESH		(40 * MAX_BYTES_PER_PACKET)
40  #define DROP_THRESH		(80 * 5 * MAX_BYTES_PER_PACKET)
41  #define LARGE_PKT_DROP_THRESH	(DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
42  #define MARK_REGION_SIZE	(LARGE_PKT_DROP_THRESH - MARK_THRESH)
43  #define LARGE_PKT_THRESH	120
44  #define MAX_CREDIT		(100 * MAX_BYTES_PER_PACKET)
45  #define INIT_CREDIT		(INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
46  
47  // Time base accounting for fq's EDT
48  #define BURST_SIZE_NS		100000 // 100us
49  #define MARK_THRESH_NS		50000 // 50us
50  #define DROP_THRESH_NS		500000 // 500us
51  // Reserve 20us of queuing for small packets (less than 120 bytes)
52  #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
53  #define MARK_REGION_SIZE_NS	(LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
54  
55  // rate in bytes per ns << 20
56  #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
57  #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
58  #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
59  
60  struct {
61  	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
62  	__type(key, struct bpf_cgroup_storage_key);
63  	__type(value, struct hbm_vqueue);
64  } queue_state SEC(".maps");
65  
66  struct {
67  	__uint(type, BPF_MAP_TYPE_ARRAY);
68  	__uint(max_entries, 1);
69  	__type(key, u32);
70  	__type(value, struct hbm_queue_stats);
71  } queue_stats SEC(".maps");
72  
73  struct hbm_pkt_info {
74  	int	cwnd;
75  	int	rtt;
76  	int	packets_out;
77  	bool	is_ip;
78  	bool	is_tcp;
79  	short	ecn;
80  };
81  
get_tcp_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)82  static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
83  {
84  	struct bpf_sock *sk;
85  	struct bpf_tcp_sock *tp;
86  
87  	sk = skb->sk;
88  	if (sk) {
89  		sk = bpf_sk_fullsock(sk);
90  		if (sk) {
91  			if (sk->protocol == IPPROTO_TCP) {
92  				tp = bpf_tcp_sock(sk);
93  				if (tp) {
94  					pkti->cwnd = tp->snd_cwnd;
95  					pkti->rtt = tp->srtt_us >> 3;
96  					pkti->packets_out = tp->packets_out;
97  					return 0;
98  				}
99  			}
100  		}
101  	}
102  	pkti->cwnd = 0;
103  	pkti->rtt = 0;
104  	pkti->packets_out = 0;
105  	return 1;
106  }
107  
hbm_get_pkt_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)108  static void hbm_get_pkt_info(struct __sk_buff *skb,
109  			     struct hbm_pkt_info *pkti)
110  {
111  	struct iphdr iph;
112  	struct ipv6hdr *ip6h;
113  
114  	pkti->cwnd = 0;
115  	pkti->rtt = 0;
116  	bpf_skb_load_bytes(skb, 0, &iph, 12);
117  	if (iph.version == 6) {
118  		ip6h = (struct ipv6hdr *)&iph;
119  		pkti->is_ip = true;
120  		pkti->is_tcp = (ip6h->nexthdr == 6);
121  		pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
122  	} else if (iph.version == 4) {
123  		pkti->is_ip = true;
124  		pkti->is_tcp = (iph.protocol == 6);
125  		pkti->ecn = iph.tos & INET_ECN_MASK;
126  	} else {
127  		pkti->is_ip = false;
128  		pkti->is_tcp = false;
129  		pkti->ecn = 0;
130  	}
131  	if (pkti->is_tcp)
132  		get_tcp_info(skb, pkti);
133  }
134  
hbm_init_vqueue(struct hbm_vqueue * qdp,int rate)135  static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
136  {
137  	bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
138  	qdp->lasttime = bpf_ktime_get_ns();
139  	qdp->credit = INIT_CREDIT;
140  	qdp->rate = rate * 128;
141  }
142  
hbm_init_edt_vqueue(struct hbm_vqueue * qdp,int rate)143  static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
144  						int rate)
145  {
146  	unsigned long long curtime;
147  
148  	curtime = bpf_ktime_get_ns();
149  	bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
150  	qdp->lasttime = curtime - BURST_SIZE_NS;	// support initial burst
151  	qdp->credit = 0;				// not used
152  	qdp->rate = rate * 128;
153  }
154  
hbm_update_stats(struct hbm_queue_stats * qsp,int len,unsigned long long curtime,bool congestion_flag,bool drop_flag,bool cwr_flag,bool ecn_ce_flag,struct hbm_pkt_info * pkti,int credit)155  static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
156  					     int len,
157  					     unsigned long long curtime,
158  					     bool congestion_flag,
159  					     bool drop_flag,
160  					     bool cwr_flag,
161  					     bool ecn_ce_flag,
162  					     struct hbm_pkt_info *pkti,
163  					     int credit)
164  {
165  	int rv = ALLOW_PKT;
166  
167  	if (qsp != NULL) {
168  		// Following is needed for work conserving
169  		__sync_add_and_fetch(&(qsp->bytes_total), len);
170  		if (qsp->stats) {
171  			// Optionally update statistics
172  			if (qsp->firstPacketTime == 0)
173  				qsp->firstPacketTime = curtime;
174  			qsp->lastPacketTime = curtime;
175  			__sync_add_and_fetch(&(qsp->pkts_total), 1);
176  			if (congestion_flag) {
177  				__sync_add_and_fetch(&(qsp->pkts_marked), 1);
178  				__sync_add_and_fetch(&(qsp->bytes_marked), len);
179  			}
180  			if (drop_flag) {
181  				__sync_add_and_fetch(&(qsp->pkts_dropped), 1);
182  				__sync_add_and_fetch(&(qsp->bytes_dropped),
183  						     len);
184  			}
185  			if (ecn_ce_flag)
186  				__sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
187  			if (pkti->cwnd) {
188  				__sync_add_and_fetch(&(qsp->sum_cwnd),
189  						     pkti->cwnd);
190  				__sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
191  			}
192  			if (pkti->rtt)
193  				__sync_add_and_fetch(&(qsp->sum_rtt),
194  						     pkti->rtt);
195  			__sync_add_and_fetch(&(qsp->sum_credit), credit);
196  
197  			if (drop_flag)
198  				rv = DROP_PKT;
199  			if (cwr_flag)
200  				rv |= 2;
201  			if (rv == DROP_PKT)
202  				__sync_add_and_fetch(&(qsp->returnValCount[0]),
203  						     1);
204  			else if (rv == ALLOW_PKT)
205  				__sync_add_and_fetch(&(qsp->returnValCount[1]),
206  						     1);
207  			else if (rv == 2)
208  				__sync_add_and_fetch(&(qsp->returnValCount[2]),
209  						     1);
210  			else if (rv == 3)
211  				__sync_add_and_fetch(&(qsp->returnValCount[3]),
212  						     1);
213  		}
214  	}
215  }
216