1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /*
4 * Test suite of lwt_xmit BPF programs that redirect packets
5 * The file tests focus not only if these programs work as expected normally,
6 * but also if they can handle abnormal situations gracefully.
7 *
8 * WARNING
9 * -------
10 * This test suite may crash the kernel, thus should be run in a VM.
11 *
12 * Setup:
13 * ---------
14 * All tests are performed in a single netns. Two lwt encap routes are setup for
15 * each subtest:
16 *
17 * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err
18 * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err
19 *
20 * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section
21 * of this object holds a program entry to test. The BPF object is built from
22 * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the
23 * attachment for lwt programs are not supported by libbpf yet.
24 *
25 * For testing, ping commands are run in the test netns:
26 *
27 * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100
28 * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100
29 *
30 * Scenarios:
31 * --------------------------------
32 * 1. Redirect to a running tap/tun device
33 * 2. Redirect to a down tap/tun device
34 * 3. Redirect to a vlan device with lower layer down
35 *
36 * Case 1, ping packets should be received by packet socket on target device
37 * when redirected to ingress, and by tun/tap fd when redirected to egress.
38 *
39 * Case 2,3 are considered successful as long as they do not crash the kernel
40 * as a regression.
41 *
42 * Case 1,2 use tap device to test redirect to device that requires MAC
43 * header, and tun device to test the case with no MAC header added.
44 */
45 #include <sys/socket.h>
46 #include <net/if.h>
47 #include <linux/if_ether.h>
48 #include <linux/if_packet.h>
49 #include <linux/if_tun.h>
50 #include <arpa/inet.h>
51 #include <unistd.h>
52 #include <errno.h>
53 #include <stdbool.h>
54 #include <stdlib.h>
55
56 #define NETNS "ns_lwt_redirect"
57 #include "lwt_helpers.h"
58 #include "test_progs.h"
59 #include "network_helpers.h"
60
61 #define BPF_OBJECT "test_lwt_redirect.bpf.o"
62 #define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac")
63 #define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac")
64 #define LOCAL_SRC "10.0.0.1"
65 #define CIDR_TO_INGRESS "10.0.0.0/24"
66 #define CIDR_TO_EGRESS "20.0.0.0/24"
67
68 /* ping to redirect toward given dev, with last byte of dest IP being the target
69 * device index.
70 *
71 * Note: ping command inside BPF-CI is busybox version, so it does not have certain
72 * function, such like -m option to set packet mark.
73 */
ping_dev(const char * dev,bool is_ingress)74 static void ping_dev(const char *dev, bool is_ingress)
75 {
76 int link_index = if_nametoindex(dev);
77 char ip[256];
78
79 if (!ASSERT_GE(link_index, 0, "if_nametoindex"))
80 return;
81
82 if (is_ingress)
83 snprintf(ip, sizeof(ip), "10.0.0.%d", link_index);
84 else
85 snprintf(ip, sizeof(ip), "20.0.0.%d", link_index);
86
87 /* We won't get a reply. Don't fail here */
88 SYS_NOFAIL("ping %s -c1 -W1 -s %d",
89 ip, ICMP_PAYLOAD_SIZE);
90 }
91
new_packet_sock(const char * ifname)92 static int new_packet_sock(const char *ifname)
93 {
94 int err = 0;
95 int ignore_outgoing = 1;
96 int ifindex = -1;
97 int s = -1;
98
99 s = socket(AF_PACKET, SOCK_RAW, 0);
100 if (!ASSERT_GE(s, 0, "socket(AF_PACKET)"))
101 return -1;
102
103 ifindex = if_nametoindex(ifname);
104 if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) {
105 close(s);
106 return -1;
107 }
108
109 struct sockaddr_ll addr = {
110 .sll_family = AF_PACKET,
111 .sll_protocol = htons(ETH_P_IP),
112 .sll_ifindex = ifindex,
113 };
114
115 err = bind(s, (struct sockaddr *)&addr, sizeof(addr));
116 if (!ASSERT_OK(err, "bind(AF_PACKET)")) {
117 close(s);
118 return -1;
119 }
120
121 /* Use packet socket to capture only the ingress, so we can distinguish
122 * the case where a regression that actually redirects the packet to
123 * the egress.
124 */
125 err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING,
126 &ignore_outgoing, sizeof(ignore_outgoing));
127 if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) {
128 close(s);
129 return -1;
130 }
131
132 err = fcntl(s, F_SETFL, O_NONBLOCK);
133 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
134 close(s);
135 return -1;
136 }
137
138 return s;
139 }
140
expect_icmp(char * buf,ssize_t len)141 static int expect_icmp(char *buf, ssize_t len)
142 {
143 struct ethhdr *eth = (struct ethhdr *)buf;
144
145 if (len < (ssize_t)sizeof(*eth))
146 return -1;
147
148 if (eth->h_proto == htons(ETH_P_IP))
149 return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth));
150
151 return -1;
152 }
153
expect_icmp_nomac(char * buf,ssize_t len)154 static int expect_icmp_nomac(char *buf, ssize_t len)
155 {
156 return __expect_icmp_ipv4(buf, len);
157 }
158
send_and_capture_test_packets(const char * test_name,int tap_fd,const char * target_dev,bool need_mac)159 static void send_and_capture_test_packets(const char *test_name, int tap_fd,
160 const char *target_dev, bool need_mac)
161 {
162 int psock = -1;
163 struct timeval timeo = {
164 .tv_sec = 0,
165 .tv_usec = 250000,
166 };
167 int ret = -1;
168
169 filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac;
170
171 ping_dev(target_dev, false);
172
173 ret = wait_for_packet(tap_fd, filter, &timeo);
174 if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) {
175 log_err("%s egress test fails", test_name);
176 goto out;
177 }
178
179 psock = new_packet_sock(target_dev);
180 ping_dev(target_dev, true);
181
182 ret = wait_for_packet(psock, filter, &timeo);
183 if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) {
184 log_err("%s ingress test fails", test_name);
185 goto out;
186 }
187
188 out:
189 if (psock >= 0)
190 close(psock);
191 }
192
setup_redirect_target(const char * target_dev,bool need_mac)193 static int setup_redirect_target(const char *target_dev, bool need_mac)
194 {
195 int target_index = -1;
196 int tap_fd = -1;
197
198 tap_fd = open_tuntap(target_dev, need_mac);
199 if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
200 goto fail;
201
202 target_index = if_nametoindex(target_dev);
203 if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
204 goto fail;
205
206 SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1");
207 SYS(fail, "ip link add link_err type dummy");
208 SYS(fail, "ip link set lo up");
209 SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
210 SYS(fail, "ip link set link_err up");
211 SYS(fail, "ip link set %s up", target_dev);
212
213 SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
214 CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac));
215
216 SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
217 CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac));
218
219 return tap_fd;
220
221 fail:
222 if (tap_fd >= 0)
223 close(tap_fd);
224 return -1;
225 }
226
test_lwt_redirect_normal(void)227 static void test_lwt_redirect_normal(void)
228 {
229 const char *target_dev = "tap0";
230 int tap_fd = -1;
231 bool need_mac = true;
232
233 tap_fd = setup_redirect_target(target_dev, need_mac);
234 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
235 return;
236
237 send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
238 close(tap_fd);
239 }
240
test_lwt_redirect_normal_nomac(void)241 static void test_lwt_redirect_normal_nomac(void)
242 {
243 const char *target_dev = "tun0";
244 int tap_fd = -1;
245 bool need_mac = false;
246
247 tap_fd = setup_redirect_target(target_dev, need_mac);
248 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
249 return;
250
251 send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
252 close(tap_fd);
253 }
254
255 /* This test aims to prevent regression of future. As long as the kernel does
256 * not panic, it is considered as success.
257 */
__test_lwt_redirect_dev_down(bool need_mac)258 static void __test_lwt_redirect_dev_down(bool need_mac)
259 {
260 const char *target_dev = "tap0";
261 int tap_fd = -1;
262
263 tap_fd = setup_redirect_target(target_dev, need_mac);
264 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
265 return;
266
267 SYS(out, "ip link set %s down", target_dev);
268 ping_dev(target_dev, true);
269 ping_dev(target_dev, false);
270
271 out:
272 close(tap_fd);
273 }
274
test_lwt_redirect_dev_down(void)275 static void test_lwt_redirect_dev_down(void)
276 {
277 __test_lwt_redirect_dev_down(true);
278 }
279
test_lwt_redirect_dev_down_nomac(void)280 static void test_lwt_redirect_dev_down_nomac(void)
281 {
282 __test_lwt_redirect_dev_down(false);
283 }
284
285 /* This test aims to prevent regression of future. As long as the kernel does
286 * not panic, it is considered as success.
287 */
test_lwt_redirect_dev_carrier_down(void)288 static void test_lwt_redirect_dev_carrier_down(void)
289 {
290 const char *lower_dev = "tap0";
291 const char *vlan_dev = "vlan100";
292 int tap_fd = -1;
293
294 tap_fd = setup_redirect_target(lower_dev, true);
295 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
296 return;
297
298 SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev);
299 SYS(out, "ip link set %s up", vlan_dev);
300 SYS(out, "ip link set %s down", lower_dev);
301 ping_dev(vlan_dev, true);
302 ping_dev(vlan_dev, false);
303
304 out:
305 close(tap_fd);
306 }
307
test_lwt_redirect_run(void * arg)308 static void *test_lwt_redirect_run(void *arg)
309 {
310 netns_delete();
311 RUN_TEST(lwt_redirect_normal);
312 RUN_TEST(lwt_redirect_normal_nomac);
313 RUN_TEST(lwt_redirect_dev_down);
314 RUN_TEST(lwt_redirect_dev_down_nomac);
315 RUN_TEST(lwt_redirect_dev_carrier_down);
316 return NULL;
317 }
318
test_lwt_redirect(void)319 void test_lwt_redirect(void)
320 {
321 pthread_t test_thread;
322 int err;
323
324 /* Run the tests in their own thread to isolate the namespace changes
325 * so they do not affect the environment of other tests.
326 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
327 */
328 err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL);
329 if (ASSERT_OK(err, "pthread_create"))
330 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
331 }
332