1  // SPDX-License-Identifier: GPL-2.0-only
2  #define _GNU_SOURCE
3  
4  #include <errno.h>
5  #include <stdbool.h>
6  #include <stdio.h>
7  #include <string.h>
8  #include <unistd.h>
9  #include <sched.h>
10  
11  #include <arpa/inet.h>
12  #include <sys/mount.h>
13  #include <sys/stat.h>
14  #include <sys/types.h>
15  #include <sys/un.h>
16  #include <sys/eventfd.h>
17  
18  #include <linux/err.h>
19  #include <linux/in.h>
20  #include <linux/in6.h>
21  #include <linux/limits.h>
22  
23  #include <linux/ip.h>
24  #include <linux/udp.h>
25  #include <netinet/tcp.h>
26  #include <net/if.h>
27  
28  #include "bpf_util.h"
29  #include "network_helpers.h"
30  #include "test_progs.h"
31  
32  #ifdef TRAFFIC_MONITOR
33  /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
34  #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
35  #include <pcap/pcap.h>
36  #include <pcap/dlt.h>
37  #endif
38  
39  #ifndef IPPROTO_MPTCP
40  #define IPPROTO_MPTCP 262
41  #endif
42  
43  #define clean_errno() (errno == 0 ? "None" : strerror(errno))
44  #define log_err(MSG, ...) ({						\
45  			int __save = errno;				\
46  			fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
47  				__FILE__, __LINE__, clean_errno(),	\
48  				##__VA_ARGS__);				\
49  			errno = __save;					\
50  })
51  
52  struct ipv4_packet pkt_v4 = {
53  	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
54  	.iph.ihl = 5,
55  	.iph.protocol = IPPROTO_TCP,
56  	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
57  	.tcp.urg_ptr = 123,
58  	.tcp.doff = 5,
59  };
60  
61  struct ipv6_packet pkt_v6 = {
62  	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
63  	.iph.nexthdr = IPPROTO_TCP,
64  	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
65  	.tcp.urg_ptr = 123,
66  	.tcp.doff = 5,
67  };
68  
69  static const struct network_helper_opts default_opts;
70  
settimeo(int fd,int timeout_ms)71  int settimeo(int fd, int timeout_ms)
72  {
73  	struct timeval timeout = { .tv_sec = 3 };
74  
75  	if (timeout_ms > 0) {
76  		timeout.tv_sec = timeout_ms / 1000;
77  		timeout.tv_usec = (timeout_ms % 1000) * 1000;
78  	}
79  
80  	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
81  		       sizeof(timeout))) {
82  		log_err("Failed to set SO_RCVTIMEO");
83  		return -1;
84  	}
85  
86  	if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
87  		       sizeof(timeout))) {
88  		log_err("Failed to set SO_SNDTIMEO");
89  		return -1;
90  	}
91  
92  	return 0;
93  }
94  
95  #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
96  
start_server_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)97  int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
98  		      const struct network_helper_opts *opts)
99  {
100  	int fd;
101  
102  	if (!opts)
103  		opts = &default_opts;
104  
105  	fd = socket(addr->ss_family, type, opts->proto);
106  	if (fd < 0) {
107  		log_err("Failed to create server socket");
108  		return -1;
109  	}
110  
111  	if (settimeo(fd, opts->timeout_ms))
112  		goto error_close;
113  
114  	if (opts->post_socket_cb &&
115  	    opts->post_socket_cb(fd, opts->cb_opts)) {
116  		log_err("Failed to call post_socket_cb");
117  		goto error_close;
118  	}
119  
120  	if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) {
121  		log_err("Failed to bind socket");
122  		goto error_close;
123  	}
124  
125  	if (type == SOCK_STREAM) {
126  		if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) {
127  			log_err("Failed to listed on socket");
128  			goto error_close;
129  		}
130  	}
131  
132  	return fd;
133  
134  error_close:
135  	save_errno_close(fd);
136  	return -1;
137  }
138  
start_server_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)139  int start_server_str(int family, int type, const char *addr_str, __u16 port,
140  		     const struct network_helper_opts *opts)
141  {
142  	struct sockaddr_storage addr;
143  	socklen_t addrlen;
144  
145  	if (!opts)
146  		opts = &default_opts;
147  
148  	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
149  		return -1;
150  
151  	return start_server_addr(type, &addr, addrlen, opts);
152  }
153  
start_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms)154  int start_server(int family, int type, const char *addr_str, __u16 port,
155  		 int timeout_ms)
156  {
157  	struct network_helper_opts opts = {
158  		.timeout_ms	= timeout_ms,
159  	};
160  
161  	return start_server_str(family, type, addr_str, port, &opts);
162  }
163  
reuseport_cb(int fd,void * opts)164  static int reuseport_cb(int fd, void *opts)
165  {
166  	int on = 1;
167  
168  	return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
169  }
170  
start_reuseport_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms,unsigned int nr_listens)171  int *start_reuseport_server(int family, int type, const char *addr_str,
172  			    __u16 port, int timeout_ms, unsigned int nr_listens)
173  {
174  	struct network_helper_opts opts = {
175  		.timeout_ms = timeout_ms,
176  		.post_socket_cb = reuseport_cb,
177  	};
178  	struct sockaddr_storage addr;
179  	unsigned int nr_fds = 0;
180  	socklen_t addrlen;
181  	int *fds;
182  
183  	if (!nr_listens)
184  		return NULL;
185  
186  	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
187  		return NULL;
188  
189  	fds = malloc(sizeof(*fds) * nr_listens);
190  	if (!fds)
191  		return NULL;
192  
193  	fds[0] = start_server_addr(type, &addr, addrlen, &opts);
194  	if (fds[0] == -1)
195  		goto close_fds;
196  	nr_fds = 1;
197  
198  	if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
199  		goto close_fds;
200  
201  	for (; nr_fds < nr_listens; nr_fds++) {
202  		fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts);
203  		if (fds[nr_fds] == -1)
204  			goto close_fds;
205  	}
206  
207  	return fds;
208  
209  close_fds:
210  	free_fds(fds, nr_fds);
211  	return NULL;
212  }
213  
free_fds(int * fds,unsigned int nr_close_fds)214  void free_fds(int *fds, unsigned int nr_close_fds)
215  {
216  	if (fds) {
217  		while (nr_close_fds)
218  			close(fds[--nr_close_fds]);
219  		free(fds);
220  	}
221  }
222  
fastopen_connect(int server_fd,const char * data,unsigned int data_len,int timeout_ms)223  int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
224  		     int timeout_ms)
225  {
226  	struct sockaddr_storage addr;
227  	socklen_t addrlen = sizeof(addr);
228  	struct sockaddr_in *addr_in;
229  	int fd, ret;
230  
231  	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
232  		log_err("Failed to get server addr");
233  		return -1;
234  	}
235  
236  	addr_in = (struct sockaddr_in *)&addr;
237  	fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
238  	if (fd < 0) {
239  		log_err("Failed to create client socket");
240  		return -1;
241  	}
242  
243  	if (settimeo(fd, timeout_ms))
244  		goto error_close;
245  
246  	ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
247  		     addrlen);
248  	if (ret != data_len) {
249  		log_err("sendto(data, %u) != %d\n", data_len, ret);
250  		goto error_close;
251  	}
252  
253  	return fd;
254  
255  error_close:
256  	save_errno_close(fd);
257  	return -1;
258  }
259  
client_socket(int family,int type,const struct network_helper_opts * opts)260  int client_socket(int family, int type,
261  		  const struct network_helper_opts *opts)
262  {
263  	int fd;
264  
265  	if (!opts)
266  		opts = &default_opts;
267  
268  	fd = socket(family, type, opts->proto);
269  	if (fd < 0) {
270  		log_err("Failed to create client socket");
271  		return -1;
272  	}
273  
274  	if (settimeo(fd, opts->timeout_ms))
275  		goto error_close;
276  
277  	if (opts->post_socket_cb &&
278  	    opts->post_socket_cb(fd, opts->cb_opts))
279  		goto error_close;
280  
281  	return fd;
282  
283  error_close:
284  	save_errno_close(fd);
285  	return -1;
286  }
287  
connect_to_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)288  int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
289  		    const struct network_helper_opts *opts)
290  {
291  	int fd;
292  
293  	if (!opts)
294  		opts = &default_opts;
295  
296  	fd = client_socket(addr->ss_family, type, opts);
297  	if (fd < 0) {
298  		log_err("Failed to create client socket");
299  		return -1;
300  	}
301  
302  	if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
303  		log_err("Failed to connect to server");
304  		save_errno_close(fd);
305  		return -1;
306  	}
307  
308  	return fd;
309  }
310  
connect_to_addr_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)311  int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
312  			const struct network_helper_opts *opts)
313  {
314  	struct sockaddr_storage addr;
315  	socklen_t addrlen;
316  
317  	if (!opts)
318  		opts = &default_opts;
319  
320  	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
321  		return -1;
322  
323  	return connect_to_addr(type, &addr, addrlen, opts);
324  }
325  
connect_to_fd_opts(int server_fd,const struct network_helper_opts * opts)326  int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
327  {
328  	struct sockaddr_storage addr;
329  	socklen_t addrlen, optlen;
330  	int type;
331  
332  	if (!opts)
333  		opts = &default_opts;
334  
335  	optlen = sizeof(type);
336  	if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
337  		log_err("getsockopt(SOL_TYPE)");
338  		return -1;
339  	}
340  
341  	addrlen = sizeof(addr);
342  	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
343  		log_err("Failed to get server addr");
344  		return -1;
345  	}
346  
347  	return connect_to_addr(type, &addr, addrlen, opts);
348  }
349  
connect_to_fd(int server_fd,int timeout_ms)350  int connect_to_fd(int server_fd, int timeout_ms)
351  {
352  	struct network_helper_opts opts = {
353  		.timeout_ms = timeout_ms,
354  	};
355  	socklen_t optlen;
356  	int protocol;
357  
358  	optlen = sizeof(protocol);
359  	if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
360  		log_err("getsockopt(SOL_PROTOCOL)");
361  		return -1;
362  	}
363  	opts.proto = protocol;
364  
365  	return connect_to_fd_opts(server_fd, &opts);
366  }
367  
connect_fd_to_fd(int client_fd,int server_fd,int timeout_ms)368  int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
369  {
370  	struct sockaddr_storage addr;
371  	socklen_t len = sizeof(addr);
372  
373  	if (settimeo(client_fd, timeout_ms))
374  		return -1;
375  
376  	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
377  		log_err("Failed to get server addr");
378  		return -1;
379  	}
380  
381  	if (connect(client_fd, (const struct sockaddr *)&addr, len)) {
382  		log_err("Failed to connect to server");
383  		return -1;
384  	}
385  
386  	return 0;
387  }
388  
make_sockaddr(int family,const char * addr_str,__u16 port,struct sockaddr_storage * addr,socklen_t * len)389  int make_sockaddr(int family, const char *addr_str, __u16 port,
390  		  struct sockaddr_storage *addr, socklen_t *len)
391  {
392  	if (family == AF_INET) {
393  		struct sockaddr_in *sin = (void *)addr;
394  
395  		memset(addr, 0, sizeof(*sin));
396  		sin->sin_family = AF_INET;
397  		sin->sin_port = htons(port);
398  		if (addr_str &&
399  		    inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
400  			log_err("inet_pton(AF_INET, %s)", addr_str);
401  			return -1;
402  		}
403  		if (len)
404  			*len = sizeof(*sin);
405  		return 0;
406  	} else if (family == AF_INET6) {
407  		struct sockaddr_in6 *sin6 = (void *)addr;
408  
409  		memset(addr, 0, sizeof(*sin6));
410  		sin6->sin6_family = AF_INET6;
411  		sin6->sin6_port = htons(port);
412  		if (addr_str &&
413  		    inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
414  			log_err("inet_pton(AF_INET6, %s)", addr_str);
415  			return -1;
416  		}
417  		if (len)
418  			*len = sizeof(*sin6);
419  		return 0;
420  	} else if (family == AF_UNIX) {
421  		/* Note that we always use abstract unix sockets to avoid having
422  		 * to clean up leftover files.
423  		 */
424  		struct sockaddr_un *sun = (void *)addr;
425  
426  		memset(addr, 0, sizeof(*sun));
427  		sun->sun_family = family;
428  		sun->sun_path[0] = 0;
429  		strcpy(sun->sun_path + 1, addr_str);
430  		if (len)
431  			*len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
432  		return 0;
433  	}
434  	return -1;
435  }
436  
ping_command(int family)437  char *ping_command(int family)
438  {
439  	if (family == AF_INET6) {
440  		/* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
441  		if (!system("which ping6 >/dev/null 2>&1"))
442  			return "ping6";
443  		else
444  			return "ping -6";
445  	}
446  	return "ping";
447  }
448  
remove_netns(const char * name)449  int remove_netns(const char *name)
450  {
451  	char *cmd;
452  	int r;
453  
454  	r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name);
455  	if (r < 0) {
456  		log_err("Failed to malloc cmd");
457  		return -1;
458  	}
459  
460  	r = system(cmd);
461  	free(cmd);
462  	return r;
463  }
464  
make_netns(const char * name)465  int make_netns(const char *name)
466  {
467  	char *cmd;
468  	int r;
469  
470  	r = asprintf(&cmd, "ip netns add %s", name);
471  	if (r < 0) {
472  		log_err("Failed to malloc cmd");
473  		return -1;
474  	}
475  
476  	r = system(cmd);
477  	free(cmd);
478  
479  	if (r)
480  		return r;
481  
482  	r = asprintf(&cmd, "ip -n %s link set lo up", name);
483  	if (r < 0) {
484  		log_err("Failed to malloc cmd for setting up lo");
485  		remove_netns(name);
486  		return -1;
487  	}
488  
489  	r = system(cmd);
490  	free(cmd);
491  
492  	return r;
493  }
494  
495  struct nstoken {
496  	int orig_netns_fd;
497  };
498  
open_netns(const char * name)499  struct nstoken *open_netns(const char *name)
500  {
501  	int nsfd;
502  	char nspath[PATH_MAX];
503  	int err;
504  	struct nstoken *token;
505  
506  	token = calloc(1, sizeof(struct nstoken));
507  	if (!token) {
508  		log_err("Failed to malloc token");
509  		return NULL;
510  	}
511  
512  	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
513  	if (token->orig_netns_fd == -1) {
514  		log_err("Failed to open(/proc/self/ns/net)");
515  		goto fail;
516  	}
517  
518  	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
519  	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
520  	if (nsfd == -1) {
521  		log_err("Failed to open(%s)", nspath);
522  		goto fail;
523  	}
524  
525  	err = setns(nsfd, CLONE_NEWNET);
526  	close(nsfd);
527  	if (err) {
528  		log_err("Failed to setns(nsfd)");
529  		goto fail;
530  	}
531  
532  	return token;
533  fail:
534  	if (token->orig_netns_fd != -1)
535  		close(token->orig_netns_fd);
536  	free(token);
537  	return NULL;
538  }
539  
close_netns(struct nstoken * token)540  void close_netns(struct nstoken *token)
541  {
542  	if (!token)
543  		return;
544  
545  	if (setns(token->orig_netns_fd, CLONE_NEWNET))
546  		log_err("Failed to setns(orig_netns_fd)");
547  	close(token->orig_netns_fd);
548  	free(token);
549  }
550  
get_socket_local_port(int sock_fd)551  int get_socket_local_port(int sock_fd)
552  {
553  	struct sockaddr_storage addr;
554  	socklen_t addrlen = sizeof(addr);
555  	int err;
556  
557  	err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
558  	if (err < 0)
559  		return err;
560  
561  	if (addr.ss_family == AF_INET) {
562  		struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
563  
564  		return sin->sin_port;
565  	} else if (addr.ss_family == AF_INET6) {
566  		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
567  
568  		return sin->sin6_port;
569  	}
570  
571  	return -1;
572  }
573  
get_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)574  int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
575  {
576  	struct ifreq ifr = {0};
577  	int sockfd, err;
578  
579  	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
580  	if (sockfd < 0)
581  		return -errno;
582  
583  	memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
584  
585  	ring_param->cmd = ETHTOOL_GRINGPARAM;
586  	ifr.ifr_data = (char *)ring_param;
587  
588  	if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
589  		err = errno;
590  		close(sockfd);
591  		return -err;
592  	}
593  
594  	close(sockfd);
595  	return 0;
596  }
597  
set_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)598  int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
599  {
600  	struct ifreq ifr = {0};
601  	int sockfd, err;
602  
603  	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
604  	if (sockfd < 0)
605  		return -errno;
606  
607  	memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
608  
609  	ring_param->cmd = ETHTOOL_SRINGPARAM;
610  	ifr.ifr_data = (char *)ring_param;
611  
612  	if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
613  		err = errno;
614  		close(sockfd);
615  		return -err;
616  	}
617  
618  	close(sockfd);
619  	return 0;
620  }
621  
622  struct send_recv_arg {
623  	int		fd;
624  	uint32_t	bytes;
625  	int		stop;
626  };
627  
send_recv_server(void * arg)628  static void *send_recv_server(void *arg)
629  {
630  	struct send_recv_arg *a = (struct send_recv_arg *)arg;
631  	ssize_t nr_sent = 0, bytes = 0;
632  	char batch[1500];
633  	int err = 0, fd;
634  
635  	fd = accept(a->fd, NULL, NULL);
636  	while (fd == -1) {
637  		if (errno == EINTR)
638  			continue;
639  		err = -errno;
640  		goto done;
641  	}
642  
643  	if (settimeo(fd, 0)) {
644  		err = -errno;
645  		goto done;
646  	}
647  
648  	while (bytes < a->bytes && !READ_ONCE(a->stop)) {
649  		nr_sent = send(fd, &batch,
650  			       MIN(a->bytes - bytes, sizeof(batch)), 0);
651  		if (nr_sent == -1 && errno == EINTR)
652  			continue;
653  		if (nr_sent == -1) {
654  			err = -errno;
655  			break;
656  		}
657  		bytes += nr_sent;
658  	}
659  
660  	if (bytes != a->bytes) {
661  		log_err("send %zd expected %u", bytes, a->bytes);
662  		if (!err)
663  			err = bytes > a->bytes ? -E2BIG : -EINTR;
664  	}
665  
666  done:
667  	if (fd >= 0)
668  		close(fd);
669  	if (err) {
670  		WRITE_ONCE(a->stop, 1);
671  		return ERR_PTR(err);
672  	}
673  	return NULL;
674  }
675  
send_recv_data(int lfd,int fd,uint32_t total_bytes)676  int send_recv_data(int lfd, int fd, uint32_t total_bytes)
677  {
678  	ssize_t nr_recv = 0, bytes = 0;
679  	struct send_recv_arg arg = {
680  		.fd	= lfd,
681  		.bytes	= total_bytes,
682  		.stop	= 0,
683  	};
684  	pthread_t srv_thread;
685  	void *thread_ret;
686  	char batch[1500];
687  	int err = 0;
688  
689  	err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
690  	if (err) {
691  		log_err("Failed to pthread_create");
692  		return err;
693  	}
694  
695  	/* recv total_bytes */
696  	while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
697  		nr_recv = recv(fd, &batch,
698  			       MIN(total_bytes - bytes, sizeof(batch)), 0);
699  		if (nr_recv == -1 && errno == EINTR)
700  			continue;
701  		if (nr_recv == -1) {
702  			err = -errno;
703  			break;
704  		}
705  		bytes += nr_recv;
706  	}
707  
708  	if (bytes != total_bytes) {
709  		log_err("recv %zd expected %u", bytes, total_bytes);
710  		if (!err)
711  			err = bytes > total_bytes ? -E2BIG : -EINTR;
712  	}
713  
714  	WRITE_ONCE(arg.stop, 1);
715  	pthread_join(srv_thread, &thread_ret);
716  	if (IS_ERR(thread_ret)) {
717  		log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
718  		err = err ? : PTR_ERR(thread_ret);
719  	}
720  
721  	return err;
722  }
723  
724  #ifdef TRAFFIC_MONITOR
725  struct tmonitor_ctx {
726  	pcap_t *pcap;
727  	pcap_dumper_t *dumper;
728  	pthread_t thread;
729  	int wake_fd;
730  
731  	volatile bool done;
732  	char pkt_fname[PATH_MAX];
733  	int pcap_fd;
734  };
735  
736  /* Is this packet captured with a Ethernet protocol type? */
is_ethernet(const u_char * packet)737  static bool is_ethernet(const u_char *packet)
738  {
739  	u16 arphdr_type;
740  
741  	memcpy(&arphdr_type, packet + 8, 2);
742  	arphdr_type = ntohs(arphdr_type);
743  
744  	/* Except the following cases, the protocol type contains the
745  	 * Ethernet protocol type for the packet.
746  	 *
747  	 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
748  	 */
749  	switch (arphdr_type) {
750  	case 770: /* ARPHRD_FRAD */
751  	case 778: /* ARPHDR_IPGRE */
752  	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
753  		printf("Packet captured: arphdr_type=%d\n", arphdr_type);
754  		return false;
755  	}
756  	return true;
757  }
758  
759  static const char * const pkt_types[] = {
760  	"In",
761  	"B",			/* Broadcast */
762  	"M",			/* Multicast */
763  	"C",			/* Captured with the promiscuous mode */
764  	"Out",
765  };
766  
pkt_type_str(u16 pkt_type)767  static const char *pkt_type_str(u16 pkt_type)
768  {
769  	if (pkt_type < ARRAY_SIZE(pkt_types))
770  		return pkt_types[pkt_type];
771  	return "Unknown";
772  }
773  
774  /* Show the information of the transport layer in the packet */
show_transport(const u_char * packet,u16 len,u32 ifindex,const char * src_addr,const char * dst_addr,u16 proto,bool ipv6,u8 pkt_type)775  static void show_transport(const u_char *packet, u16 len, u32 ifindex,
776  			   const char *src_addr, const char *dst_addr,
777  			   u16 proto, bool ipv6, u8 pkt_type)
778  {
779  	char *ifname, _ifname[IF_NAMESIZE];
780  	const char *transport_str;
781  	u16 src_port, dst_port;
782  	struct udphdr *udp;
783  	struct tcphdr *tcp;
784  
785  	ifname = if_indextoname(ifindex, _ifname);
786  	if (!ifname) {
787  		snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
788  		ifname = _ifname;
789  	}
790  
791  	if (proto == IPPROTO_UDP) {
792  		udp = (struct udphdr *)packet;
793  		src_port = ntohs(udp->source);
794  		dst_port = ntohs(udp->dest);
795  		transport_str = "UDP";
796  	} else if (proto == IPPROTO_TCP) {
797  		tcp = (struct tcphdr *)packet;
798  		src_port = ntohs(tcp->source);
799  		dst_port = ntohs(tcp->dest);
800  		transport_str = "TCP";
801  	} else if (proto == IPPROTO_ICMP) {
802  		printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
803  		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
804  		       packet[0], packet[1]);
805  		return;
806  	} else if (proto == IPPROTO_ICMPV6) {
807  		printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
808  		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
809  		       packet[0], packet[1]);
810  		return;
811  	} else {
812  		printf("%-7s %-3s %s %s > %s: protocol %d\n",
813  		       ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
814  		       src_addr, dst_addr, proto);
815  		return;
816  	}
817  
818  	/* TCP or UDP*/
819  
820  	flockfile(stdout);
821  	if (ipv6)
822  		printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d",
823  		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
824  		       dst_addr, dst_port, transport_str, len);
825  	else
826  		printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d",
827  		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
828  		       dst_addr, dst_port, transport_str, len);
829  
830  	if (proto == IPPROTO_TCP) {
831  		if (tcp->fin)
832  			printf(", FIN");
833  		if (tcp->syn)
834  			printf(", SYN");
835  		if (tcp->rst)
836  			printf(", RST");
837  		if (tcp->ack)
838  			printf(", ACK");
839  	}
840  
841  	printf("\n");
842  	funlockfile(stdout);
843  }
844  
show_ipv6_packet(const u_char * packet,u32 ifindex,u8 pkt_type)845  static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
846  {
847  	char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
848  	struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
849  	const char *src, *dst;
850  	u_char proto;
851  
852  	src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
853  	if (!src)
854  		src = "<invalid>";
855  	dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
856  	if (!dst)
857  		dst = "<invalid>";
858  	proto = pkt->nexthdr;
859  	show_transport(packet + sizeof(struct ipv6hdr),
860  		       ntohs(pkt->payload_len),
861  		       ifindex, src, dst, proto, true, pkt_type);
862  }
863  
show_ipv4_packet(const u_char * packet,u32 ifindex,u8 pkt_type)864  static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
865  {
866  	char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
867  	struct iphdr *pkt = (struct iphdr *)packet;
868  	const char *src, *dst;
869  	u_char proto;
870  
871  	src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
872  	if (!src)
873  		src = "<invalid>";
874  	dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
875  	if (!dst)
876  		dst = "<invalid>";
877  	proto = pkt->protocol;
878  	show_transport(packet + sizeof(struct iphdr),
879  		       ntohs(pkt->tot_len),
880  		       ifindex, src, dst, proto, false, pkt_type);
881  }
882  
traffic_monitor_thread(void * arg)883  static void *traffic_monitor_thread(void *arg)
884  {
885  	char *ifname, _ifname[IF_NAMESIZE];
886  	const u_char *packet, *payload;
887  	struct tmonitor_ctx *ctx = arg;
888  	pcap_dumper_t *dumper = ctx->dumper;
889  	int fd = ctx->pcap_fd, nfds, r;
890  	int wake_fd = ctx->wake_fd;
891  	struct pcap_pkthdr header;
892  	pcap_t *pcap = ctx->pcap;
893  	u32 ifindex;
894  	fd_set fds;
895  	u16 proto;
896  	u8 ptype;
897  
898  	nfds = (fd > wake_fd ? fd : wake_fd) + 1;
899  	FD_ZERO(&fds);
900  
901  	while (!ctx->done) {
902  		FD_SET(fd, &fds);
903  		FD_SET(wake_fd, &fds);
904  		r = select(nfds, &fds, NULL, NULL, NULL);
905  		if (!r)
906  			continue;
907  		if (r < 0) {
908  			if (errno == EINTR)
909  				continue;
910  			log_err("Fail to select on pcap fd and wake fd");
911  			break;
912  		}
913  
914  		/* This instance of pcap is non-blocking */
915  		packet = pcap_next(pcap, &header);
916  		if (!packet)
917  			continue;
918  
919  		/* According to the man page of pcap_dump(), first argument
920  		 * is the pcap_dumper_t pointer even it's argument type is
921  		 * u_char *.
922  		 */
923  		pcap_dump((u_char *)dumper, &header, packet);
924  
925  		/* Not sure what other types of packets look like. Here, we
926  		 * parse only Ethernet and compatible packets.
927  		 */
928  		if (!is_ethernet(packet))
929  			continue;
930  
931  		/* Skip SLL2 header
932  		 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
933  		 *
934  		 * Although the document doesn't mention that, the payload
935  		 * doesn't include the Ethernet header. The payload starts
936  		 * from the first byte of the network layer header.
937  		 */
938  		payload = packet + 20;
939  
940  		memcpy(&proto, packet, 2);
941  		proto = ntohs(proto);
942  		memcpy(&ifindex, packet + 4, 4);
943  		ifindex = ntohl(ifindex);
944  		ptype = packet[10];
945  
946  		if (proto == ETH_P_IPV6) {
947  			show_ipv6_packet(payload, ifindex, ptype);
948  		} else if (proto == ETH_P_IP) {
949  			show_ipv4_packet(payload, ifindex, ptype);
950  		} else {
951  			ifname = if_indextoname(ifindex, _ifname);
952  			if (!ifname) {
953  				snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
954  				ifname = _ifname;
955  			}
956  
957  			printf("%-7s %-3s Unknown network protocol type 0x%x\n",
958  			       ifname, pkt_type_str(ptype), proto);
959  		}
960  	}
961  
962  	return NULL;
963  }
964  
965  /* Prepare the pcap handle to capture packets.
966   *
967   * This pcap is non-blocking and immediate mode is enabled to receive
968   * captured packets as soon as possible.  The snaplen is set to 1024 bytes
969   * to limit the size of captured content. The format of the link-layer
970   * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
971   * technologies.
972   */
traffic_monitor_prepare_pcap(void)973  static pcap_t *traffic_monitor_prepare_pcap(void)
974  {
975  	char errbuf[PCAP_ERRBUF_SIZE];
976  	pcap_t *pcap;
977  	int r;
978  
979  	/* Listen on all NICs in the namespace */
980  	pcap = pcap_create("any", errbuf);
981  	if (!pcap) {
982  		log_err("Failed to open pcap: %s", errbuf);
983  		return NULL;
984  	}
985  	/* Limit the size of the packet (first N bytes) */
986  	r = pcap_set_snaplen(pcap, 1024);
987  	if (r) {
988  		log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
989  		goto error;
990  	}
991  	/* To receive packets as fast as possible */
992  	r = pcap_set_immediate_mode(pcap, 1);
993  	if (r) {
994  		log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
995  		goto error;
996  	}
997  	r = pcap_setnonblock(pcap, 1, errbuf);
998  	if (r) {
999  		log_err("Failed to set nonblock: %s", errbuf);
1000  		goto error;
1001  	}
1002  	r = pcap_activate(pcap);
1003  	if (r) {
1004  		log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
1005  		goto error;
1006  	}
1007  	/* Determine the format of the link-layer header */
1008  	r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
1009  	if (r) {
1010  		log_err("Failed to set datalink: %s", pcap_geterr(pcap));
1011  		goto error;
1012  	}
1013  
1014  	return pcap;
1015  error:
1016  	pcap_close(pcap);
1017  	return NULL;
1018  }
1019  
encode_test_name(char * buf,size_t len,const char * test_name,const char * subtest_name)1020  static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
1021  {
1022  	char *p;
1023  
1024  	if (subtest_name)
1025  		snprintf(buf, len, "%s__%s", test_name, subtest_name);
1026  	else
1027  		snprintf(buf, len, "%s", test_name);
1028  	while ((p = strchr(buf, '/')))
1029  		*p = '_';
1030  	while ((p = strchr(buf, ' ')))
1031  		*p = '_';
1032  }
1033  
1034  #define PCAP_DIR "/tmp/tmon_pcap"
1035  
1036  /* Start to monitor the network traffic in the given network namespace.
1037   *
1038   * netns: the name of the network namespace to monitor. If NULL, the
1039   *        current network namespace is monitored.
1040   * test_name: the name of the running test.
1041   * subtest_name: the name of the running subtest if there is. It should be
1042   *               NULL if it is not a subtest.
1043   *
1044   * This function will start a thread to capture packets going through NICs
1045   * in the give network namespace.
1046   */
traffic_monitor_start(const char * netns,const char * test_name,const char * subtest_name)1047  struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
1048  					   const char *subtest_name)
1049  {
1050  	struct nstoken *nstoken = NULL;
1051  	struct tmonitor_ctx *ctx;
1052  	char test_name_buf[64];
1053  	static int tmon_seq;
1054  	int r;
1055  
1056  	if (netns) {
1057  		nstoken = open_netns(netns);
1058  		if (!nstoken)
1059  			return NULL;
1060  	}
1061  	ctx = malloc(sizeof(*ctx));
1062  	if (!ctx) {
1063  		log_err("Failed to malloc ctx");
1064  		goto fail_ctx;
1065  	}
1066  	memset(ctx, 0, sizeof(*ctx));
1067  
1068  	encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
1069  	snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
1070  		 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
1071  		 test_name_buf, netns ? netns : "unknown");
1072  
1073  	r = mkdir(PCAP_DIR, 0755);
1074  	if (r && errno != EEXIST) {
1075  		log_err("Failed to create " PCAP_DIR);
1076  		goto fail_pcap;
1077  	}
1078  
1079  	ctx->pcap = traffic_monitor_prepare_pcap();
1080  	if (!ctx->pcap)
1081  		goto fail_pcap;
1082  	ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
1083  	if (ctx->pcap_fd < 0) {
1084  		log_err("Failed to get pcap fd");
1085  		goto fail_dumper;
1086  	}
1087  
1088  	/* Create a packet file */
1089  	ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
1090  	if (!ctx->dumper) {
1091  		log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
1092  		goto fail_dumper;
1093  	}
1094  
1095  	/* Create an eventfd to wake up the monitor thread */
1096  	ctx->wake_fd = eventfd(0, 0);
1097  	if (ctx->wake_fd < 0) {
1098  		log_err("Failed to create eventfd");
1099  		goto fail_eventfd;
1100  	}
1101  
1102  	r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
1103  	if (r) {
1104  		log_err("Failed to create thread");
1105  		goto fail;
1106  	}
1107  
1108  	close_netns(nstoken);
1109  
1110  	return ctx;
1111  
1112  fail:
1113  	close(ctx->wake_fd);
1114  
1115  fail_eventfd:
1116  	pcap_dump_close(ctx->dumper);
1117  	unlink(ctx->pkt_fname);
1118  
1119  fail_dumper:
1120  	pcap_close(ctx->pcap);
1121  
1122  fail_pcap:
1123  	free(ctx);
1124  
1125  fail_ctx:
1126  	close_netns(nstoken);
1127  
1128  	return NULL;
1129  }
1130  
traffic_monitor_release(struct tmonitor_ctx * ctx)1131  static void traffic_monitor_release(struct tmonitor_ctx *ctx)
1132  {
1133  	pcap_close(ctx->pcap);
1134  	pcap_dump_close(ctx->dumper);
1135  
1136  	close(ctx->wake_fd);
1137  
1138  	free(ctx);
1139  }
1140  
1141  /* Stop the network traffic monitor.
1142   *
1143   * ctx: the context returned by traffic_monitor_start()
1144   */
traffic_monitor_stop(struct tmonitor_ctx * ctx)1145  void traffic_monitor_stop(struct tmonitor_ctx *ctx)
1146  {
1147  	__u64 w = 1;
1148  
1149  	if (!ctx)
1150  		return;
1151  
1152  	/* Stop the monitor thread */
1153  	ctx->done = true;
1154  	/* Wake up the background thread. */
1155  	write(ctx->wake_fd, &w, sizeof(w));
1156  	pthread_join(ctx->thread, NULL);
1157  
1158  	printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
1159  
1160  	traffic_monitor_release(ctx);
1161  }
1162  #endif /* TRAFFIC_MONITOR */
1163