1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * KVM userfaultfd util
4  * Adapted from demand_paging_test.c
5  *
6  * Copyright (C) 2018, Red Hat, Inc.
7  * Copyright (C) 2019-2022 Google LLC
8  */
9 #include <inttypes.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <time.h>
13 #include <poll.h>
14 #include <pthread.h>
15 #include <linux/userfaultfd.h>
16 #include <sys/epoll.h>
17 #include <sys/syscall.h>
18 
19 #include "kvm_util.h"
20 #include "test_util.h"
21 #include "memstress.h"
22 #include "userfaultfd_util.h"
23 
24 #ifdef __NR_userfaultfd
25 
uffd_handler_thread_fn(void * arg)26 static void *uffd_handler_thread_fn(void *arg)
27 {
28 	struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg;
29 	int uffd = reader_args->uffd;
30 	int64_t pages = 0;
31 	struct timespec start;
32 	struct timespec ts_diff;
33 	struct epoll_event evt;
34 	int epollfd;
35 
36 	epollfd = epoll_create(1);
37 	TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
38 
39 	evt.events = EPOLLIN | EPOLLEXCLUSIVE;
40 	evt.data.u32 = 0;
41 	TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt),
42 		    "Failed to add uffd to epollfd");
43 
44 	evt.events = EPOLLIN;
45 	evt.data.u32 = 1;
46 	TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt),
47 		    "Failed to add pipe to epollfd");
48 
49 	clock_gettime(CLOCK_MONOTONIC, &start);
50 	while (1) {
51 		struct uffd_msg msg;
52 		int r;
53 
54 		r = epoll_wait(epollfd, &evt, 1, -1);
55 		TEST_ASSERT(r == 1,
56 			    "Unexpected number of events (%d) from epoll, errno = %d",
57 			    r, errno);
58 
59 		if (evt.data.u32 == 1) {
60 			char tmp_chr;
61 
62 			TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
63 				    "Reader thread received EPOLLERR or EPOLLHUP on pipe.");
64 			r = read(reader_args->pipe, &tmp_chr, 1);
65 			TEST_ASSERT(r == 1,
66 				    "Error reading pipefd in uffd reader thread");
67 			break;
68 		}
69 
70 		TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
71 			    "Reader thread received EPOLLERR or EPOLLHUP on uffd.");
72 
73 		r = read(uffd, &msg, sizeof(msg));
74 		if (r == -1) {
75 			TEST_ASSERT(errno == EAGAIN,
76 				    "Error reading from UFFD: errno = %d", errno);
77 			continue;
78 		}
79 
80 		TEST_ASSERT(r == sizeof(msg),
81 			    "Read on uffd returned unexpected number of bytes (%d)", r);
82 
83 		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
84 			continue;
85 
86 		if (reader_args->delay)
87 			usleep(reader_args->delay);
88 		r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
89 		TEST_ASSERT(r >= 0,
90 			    "Reader thread handler fn returned negative value %d", r);
91 		pages++;
92 	}
93 
94 	ts_diff = timespec_elapsed(start);
95 	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
96 		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
97 		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
98 
99 	return NULL;
100 }
101 
uffd_setup_demand_paging(int uffd_mode,useconds_t delay,void * hva,uint64_t len,uint64_t num_readers,uffd_handler_t handler)102 struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
103 					   void *hva, uint64_t len,
104 					   uint64_t num_readers,
105 					   uffd_handler_t handler)
106 {
107 	struct uffd_desc *uffd_desc;
108 	bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
109 	int uffd;
110 	struct uffdio_api uffdio_api;
111 	struct uffdio_register uffdio_register;
112 	uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
113 	int ret, i;
114 
115 	PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
116 		       is_minor ? "MINOR" : "MISSING",
117 		       is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
118 
119 	uffd_desc = malloc(sizeof(struct uffd_desc));
120 	TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
121 
122 	uffd_desc->pipefds = calloc(sizeof(int), num_readers);
123 	TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes");
124 
125 	uffd_desc->readers = calloc(sizeof(pthread_t), num_readers);
126 	TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads");
127 
128 	uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers);
129 	TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args");
130 
131 	uffd_desc->num_readers = num_readers;
132 
133 	/* In order to get minor faults, prefault via the alias. */
134 	if (is_minor)
135 		expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
136 
137 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
138 	TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
139 
140 	uffdio_api.api = UFFD_API;
141 	uffdio_api.features = 0;
142 	TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
143 		    "ioctl UFFDIO_API failed: %" PRIu64,
144 		    (uint64_t)uffdio_api.api);
145 
146 	uffdio_register.range.start = (uint64_t)hva;
147 	uffdio_register.range.len = len;
148 	uffdio_register.mode = uffd_mode;
149 	TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
150 		    "ioctl UFFDIO_REGISTER failed");
151 	TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
152 		    expected_ioctls, "missing userfaultfd ioctls");
153 
154 	uffd_desc->uffd = uffd;
155 	for (i = 0; i < uffd_desc->num_readers; ++i) {
156 		int pipes[2];
157 
158 		ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK);
159 		TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p",
160 			    i, uffd_desc);
161 
162 		uffd_desc->pipefds[i] = pipes[1];
163 
164 		uffd_desc->reader_args[i].uffd_mode = uffd_mode;
165 		uffd_desc->reader_args[i].uffd = uffd;
166 		uffd_desc->reader_args[i].delay = delay;
167 		uffd_desc->reader_args[i].handler = handler;
168 		uffd_desc->reader_args[i].pipe = pipes[0];
169 
170 		pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn,
171 			       &uffd_desc->reader_args[i]);
172 
173 		PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n",
174 			       i, hva, hva + len);
175 	}
176 
177 	return uffd_desc;
178 }
179 
uffd_stop_demand_paging(struct uffd_desc * uffd)180 void uffd_stop_demand_paging(struct uffd_desc *uffd)
181 {
182 	char c = 0;
183 	int i;
184 
185 	for (i = 0; i < uffd->num_readers; ++i)
186 		TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1,
187 			    "Unable to write to pipefd %i for uffd_desc %p", i, uffd);
188 
189 	for (i = 0; i < uffd->num_readers; ++i)
190 		TEST_ASSERT(!pthread_join(uffd->readers[i], NULL),
191 			    "Pthread_join failed on reader %i for uffd_desc %p", i, uffd);
192 
193 	close(uffd->uffd);
194 
195 	for (i = 0; i < uffd->num_readers; ++i) {
196 		close(uffd->pipefds[i]);
197 		close(uffd->reader_args[i].pipe);
198 	}
199 
200 	free(uffd->pipefds);
201 	free(uffd->readers);
202 	free(uffd->reader_args);
203 	free(uffd);
204 }
205 
206 #endif /* __NR_userfaultfd */
207