1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * vsock_perf - benchmark utility for vsock.
4  *
5  * Copyright (C) 2022 SberDevices.
6  *
7  * Author: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
8  */
9 #include <getopt.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <stdbool.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <time.h>
17 #include <stdint.h>
18 #include <poll.h>
19 #include <sys/socket.h>
20 #include <linux/vm_sockets.h>
21 #include <sys/mman.h>
22 
23 #include "msg_zerocopy_common.h"
24 
25 #define DEFAULT_BUF_SIZE_BYTES	(128 * 1024)
26 #define DEFAULT_TO_SEND_BYTES	(64 * 1024)
27 #define DEFAULT_VSOCK_BUF_BYTES (256 * 1024)
28 #define DEFAULT_RCVLOWAT_BYTES	1
29 #define DEFAULT_PORT		1234
30 
31 #define BYTES_PER_GB		(1024 * 1024 * 1024ULL)
32 #define NSEC_PER_SEC		(1000000000ULL)
33 
34 static unsigned int port = DEFAULT_PORT;
35 static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES;
36 static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES;
37 static bool zerocopy;
38 
error(const char * s)39 static void error(const char *s)
40 {
41 	perror(s);
42 	exit(EXIT_FAILURE);
43 }
44 
current_nsec(void)45 static time_t current_nsec(void)
46 {
47 	struct timespec ts;
48 
49 	if (clock_gettime(CLOCK_REALTIME, &ts))
50 		error("clock_gettime");
51 
52 	return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
53 }
54 
55 /* From lib/cmdline.c. */
memparse(const char * ptr)56 static unsigned long memparse(const char *ptr)
57 {
58 	char *endptr;
59 
60 	unsigned long long ret = strtoull(ptr, &endptr, 0);
61 
62 	switch (*endptr) {
63 	case 'E':
64 	case 'e':
65 		ret <<= 10;
66 	case 'P':
67 	case 'p':
68 		ret <<= 10;
69 	case 'T':
70 	case 't':
71 		ret <<= 10;
72 	case 'G':
73 	case 'g':
74 		ret <<= 10;
75 	case 'M':
76 	case 'm':
77 		ret <<= 10;
78 	case 'K':
79 	case 'k':
80 		ret <<= 10;
81 		endptr++;
82 	default:
83 		break;
84 	}
85 
86 	return ret;
87 }
88 
vsock_increase_buf_size(int fd)89 static void vsock_increase_buf_size(int fd)
90 {
91 	if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
92 		       &vsock_buf_bytes, sizeof(vsock_buf_bytes)))
93 		error("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
94 
95 	if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
96 		       &vsock_buf_bytes, sizeof(vsock_buf_bytes)))
97 		error("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
98 }
99 
vsock_connect(unsigned int cid,unsigned int port)100 static int vsock_connect(unsigned int cid, unsigned int port)
101 {
102 	union {
103 		struct sockaddr sa;
104 		struct sockaddr_vm svm;
105 	} addr = {
106 		.svm = {
107 			.svm_family = AF_VSOCK,
108 			.svm_port = port,
109 			.svm_cid = cid,
110 		},
111 	};
112 	int fd;
113 
114 	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
115 
116 	if (fd < 0) {
117 		perror("socket");
118 		return -1;
119 	}
120 
121 	if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) {
122 		perror("connect");
123 		close(fd);
124 		return -1;
125 	}
126 
127 	return fd;
128 }
129 
get_gbps(unsigned long bits,time_t ns_delta)130 static float get_gbps(unsigned long bits, time_t ns_delta)
131 {
132 	return ((float)bits / 1000000000ULL) /
133 	       ((float)ns_delta / NSEC_PER_SEC);
134 }
135 
run_receiver(unsigned long rcvlowat_bytes)136 static void run_receiver(unsigned long rcvlowat_bytes)
137 {
138 	unsigned int read_cnt;
139 	time_t rx_begin_ns;
140 	time_t in_read_ns;
141 	size_t total_recv;
142 	int client_fd;
143 	char *data;
144 	int fd;
145 	union {
146 		struct sockaddr sa;
147 		struct sockaddr_vm svm;
148 	} addr = {
149 		.svm = {
150 			.svm_family = AF_VSOCK,
151 			.svm_port = port,
152 			.svm_cid = VMADDR_CID_ANY,
153 		},
154 	};
155 	union {
156 		struct sockaddr sa;
157 		struct sockaddr_vm svm;
158 	} clientaddr;
159 
160 	socklen_t clientaddr_len = sizeof(clientaddr.svm);
161 
162 	printf("Run as receiver\n");
163 	printf("Listen port %u\n", port);
164 	printf("RX buffer %lu bytes\n", buf_size_bytes);
165 	printf("vsock buffer %lu bytes\n", vsock_buf_bytes);
166 	printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes);
167 
168 	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
169 
170 	if (fd < 0)
171 		error("socket");
172 
173 	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0)
174 		error("bind");
175 
176 	if (listen(fd, 1) < 0)
177 		error("listen");
178 
179 	client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
180 
181 	if (client_fd < 0)
182 		error("accept");
183 
184 	vsock_increase_buf_size(client_fd);
185 
186 	if (setsockopt(client_fd, SOL_SOCKET, SO_RCVLOWAT,
187 		       &rcvlowat_bytes,
188 		       sizeof(rcvlowat_bytes)))
189 		error("setsockopt(SO_RCVLOWAT)");
190 
191 	data = malloc(buf_size_bytes);
192 
193 	if (!data) {
194 		fprintf(stderr, "'malloc()' failed\n");
195 		exit(EXIT_FAILURE);
196 	}
197 
198 	read_cnt = 0;
199 	in_read_ns = 0;
200 	total_recv = 0;
201 	rx_begin_ns = current_nsec();
202 
203 	while (1) {
204 		struct pollfd fds = { 0 };
205 
206 		fds.fd = client_fd;
207 		fds.events = POLLIN | POLLERR |
208 			     POLLHUP | POLLRDHUP;
209 
210 		if (poll(&fds, 1, -1) < 0)
211 			error("poll");
212 
213 		if (fds.revents & POLLERR) {
214 			fprintf(stderr, "'poll()' error\n");
215 			exit(EXIT_FAILURE);
216 		}
217 
218 		if (fds.revents & POLLIN) {
219 			ssize_t bytes_read;
220 			time_t t;
221 
222 			t = current_nsec();
223 			bytes_read = read(fds.fd, data, buf_size_bytes);
224 			in_read_ns += (current_nsec() - t);
225 			read_cnt++;
226 
227 			if (!bytes_read)
228 				break;
229 
230 			if (bytes_read < 0) {
231 				perror("read");
232 				exit(EXIT_FAILURE);
233 			}
234 
235 			total_recv += bytes_read;
236 		}
237 
238 		if (fds.revents & (POLLHUP | POLLRDHUP))
239 			break;
240 	}
241 
242 	printf("total bytes received: %zu\n", total_recv);
243 	printf("rx performance: %f Gbits/s\n",
244 	       get_gbps(total_recv * 8, current_nsec() - rx_begin_ns));
245 	printf("total time in 'read()': %f sec\n", (float)in_read_ns / NSEC_PER_SEC);
246 	printf("average time in 'read()': %f ns\n", (float)in_read_ns / read_cnt);
247 	printf("POLLIN wakeups: %i\n", read_cnt);
248 
249 	free(data);
250 	close(client_fd);
251 	close(fd);
252 }
253 
run_sender(int peer_cid,unsigned long to_send_bytes)254 static void run_sender(int peer_cid, unsigned long to_send_bytes)
255 {
256 	time_t tx_begin_ns;
257 	time_t tx_total_ns;
258 	size_t total_send;
259 	time_t time_in_send;
260 	void *data;
261 	int fd;
262 
263 	if (zerocopy)
264 		printf("Run as sender MSG_ZEROCOPY\n");
265 	else
266 		printf("Run as sender\n");
267 
268 	printf("Connect to %i:%u\n", peer_cid, port);
269 	printf("Send %lu bytes\n", to_send_bytes);
270 	printf("TX buffer %lu bytes\n", buf_size_bytes);
271 
272 	fd = vsock_connect(peer_cid, port);
273 
274 	if (fd < 0)
275 		exit(EXIT_FAILURE);
276 
277 	if (zerocopy) {
278 		enable_so_zerocopy(fd);
279 
280 		data = mmap(NULL, buf_size_bytes, PROT_READ | PROT_WRITE,
281 			    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
282 		if (data == MAP_FAILED) {
283 			perror("mmap");
284 			exit(EXIT_FAILURE);
285 		}
286 	} else {
287 		data = malloc(buf_size_bytes);
288 
289 		if (!data) {
290 			fprintf(stderr, "'malloc()' failed\n");
291 			exit(EXIT_FAILURE);
292 		}
293 	}
294 
295 	memset(data, 0, buf_size_bytes);
296 	total_send = 0;
297 	time_in_send = 0;
298 	tx_begin_ns = current_nsec();
299 
300 	while (total_send < to_send_bytes) {
301 		ssize_t sent;
302 		size_t rest_bytes;
303 		time_t before;
304 
305 		rest_bytes = to_send_bytes - total_send;
306 
307 		before = current_nsec();
308 		sent = send(fd, data, (rest_bytes > buf_size_bytes) ?
309 			    buf_size_bytes : rest_bytes,
310 			    zerocopy ? MSG_ZEROCOPY : 0);
311 		time_in_send += (current_nsec() - before);
312 
313 		if (sent <= 0)
314 			error("write");
315 
316 		total_send += sent;
317 
318 		if (zerocopy) {
319 			struct pollfd fds = { 0 };
320 
321 			fds.fd = fd;
322 
323 			if (poll(&fds, 1, -1) < 0) {
324 				perror("poll");
325 				exit(EXIT_FAILURE);
326 			}
327 
328 			if (!(fds.revents & POLLERR)) {
329 				fprintf(stderr, "POLLERR expected\n");
330 				exit(EXIT_FAILURE);
331 			}
332 
333 			vsock_recv_completion(fd, NULL);
334 		}
335 	}
336 
337 	tx_total_ns = current_nsec() - tx_begin_ns;
338 
339 	printf("total bytes sent: %zu\n", total_send);
340 	printf("tx performance: %f Gbits/s\n",
341 	       get_gbps(total_send * 8, time_in_send));
342 	printf("total time in tx loop: %f sec\n",
343 	       (float)tx_total_ns / NSEC_PER_SEC);
344 	printf("time in 'send()': %f sec\n",
345 	       (float)time_in_send / NSEC_PER_SEC);
346 
347 	close(fd);
348 
349 	if (zerocopy)
350 		munmap(data, buf_size_bytes);
351 	else
352 		free(data);
353 }
354 
355 static const char optstring[] = "";
356 static const struct option longopts[] = {
357 	{
358 		.name = "help",
359 		.has_arg = no_argument,
360 		.val = 'H',
361 	},
362 	{
363 		.name = "sender",
364 		.has_arg = required_argument,
365 		.val = 'S',
366 	},
367 	{
368 		.name = "port",
369 		.has_arg = required_argument,
370 		.val = 'P',
371 	},
372 	{
373 		.name = "bytes",
374 		.has_arg = required_argument,
375 		.val = 'M',
376 	},
377 	{
378 		.name = "buf-size",
379 		.has_arg = required_argument,
380 		.val = 'B',
381 	},
382 	{
383 		.name = "vsk-size",
384 		.has_arg = required_argument,
385 		.val = 'V',
386 	},
387 	{
388 		.name = "rcvlowat",
389 		.has_arg = required_argument,
390 		.val = 'R',
391 	},
392 	{
393 		.name = "zerocopy",
394 		.has_arg = no_argument,
395 		.val = 'Z',
396 	},
397 	{},
398 };
399 
usage(void)400 static void usage(void)
401 {
402 	printf("Usage: ./vsock_perf [--help] [options]\n"
403 	       "\n"
404 	       "This is benchmarking utility, to test vsock performance.\n"
405 	       "It runs in two modes: sender or receiver. In sender mode, it\n"
406 	       "connects to the specified CID and starts data transmission.\n"
407 	       "\n"
408 	       "Options:\n"
409 	       "  --help			This message\n"
410 	       "  --sender   <cid>		Sender mode (receiver default)\n"
411 	       "                                <cid> of the receiver to connect to\n"
412 	       "  --zerocopy			Enable zerocopy (for sender mode only)\n"
413 	       "  --port     <port>		Port (default %d)\n"
414 	       "  --bytes    <bytes>KMG		Bytes to send (default %d)\n"
415 	       "  --buf-size <bytes>KMG		Data buffer size (default %d). In sender mode\n"
416 	       "                                it is the buffer size, passed to 'write()'. In\n"
417 	       "                                receiver mode it is the buffer size passed to 'read()'.\n"
418 	       "  --vsk-size <bytes>KMG		Socket buffer size (default %d)\n"
419 	       "  --rcvlowat <bytes>KMG		SO_RCVLOWAT value (default %d)\n"
420 	       "\n", DEFAULT_PORT, DEFAULT_TO_SEND_BYTES,
421 	       DEFAULT_BUF_SIZE_BYTES, DEFAULT_VSOCK_BUF_BYTES,
422 	       DEFAULT_RCVLOWAT_BYTES);
423 	exit(EXIT_FAILURE);
424 }
425 
strtolx(const char * arg)426 static long strtolx(const char *arg)
427 {
428 	long value;
429 	char *end;
430 
431 	value = strtol(arg, &end, 10);
432 
433 	if (end != arg + strlen(arg))
434 		usage();
435 
436 	return value;
437 }
438 
main(int argc,char ** argv)439 int main(int argc, char **argv)
440 {
441 	unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES;
442 	unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES;
443 	int peer_cid = -1;
444 	bool sender = false;
445 
446 	while (1) {
447 		int opt = getopt_long(argc, argv, optstring, longopts, NULL);
448 
449 		if (opt == -1)
450 			break;
451 
452 		switch (opt) {
453 		case 'V': /* Peer buffer size. */
454 			vsock_buf_bytes = memparse(optarg);
455 			break;
456 		case 'R': /* SO_RCVLOWAT value. */
457 			rcvlowat_bytes = memparse(optarg);
458 			break;
459 		case 'P': /* Port to connect to. */
460 			port = strtolx(optarg);
461 			break;
462 		case 'M': /* Bytes to send. */
463 			to_send_bytes = memparse(optarg);
464 			break;
465 		case 'B': /* Size of rx/tx buffer. */
466 			buf_size_bytes = memparse(optarg);
467 			break;
468 		case 'S': /* Sender mode. CID to connect to. */
469 			peer_cid = strtolx(optarg);
470 			sender = true;
471 			break;
472 		case 'H': /* Help. */
473 			usage();
474 			break;
475 		case 'Z': /* Zerocopy. */
476 			zerocopy = true;
477 			break;
478 		default:
479 			usage();
480 		}
481 	}
482 
483 	if (!sender)
484 		run_receiver(rcvlowat_bytes);
485 	else
486 		run_sender(peer_cid, to_send_bytes);
487 
488 	return 0;
489 }
490