1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *
4   * Copyright (c) 2009, Microsoft Corporation.
5   *
6   * Authors:
7   *   Haiyang Zhang <haiyangz@microsoft.com>
8   *   Hank Janssen  <hjanssen@microsoft.com>
9   */
10  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11  
12  #include <linux/kernel.h>
13  #include <linux/sched.h>
14  #include <linux/wait.h>
15  #include <linux/delay.h>
16  #include <linux/mm.h>
17  #include <linux/module.h>
18  #include <linux/slab.h>
19  #include <linux/vmalloc.h>
20  #include <linux/hyperv.h>
21  #include <linux/export.h>
22  #include <linux/io.h>
23  #include <linux/set_memory.h>
24  #include <asm/mshyperv.h>
25  
26  #include "hyperv_vmbus.h"
27  
28  
29  struct vmbus_connection vmbus_connection = {
30  	.conn_state		= DISCONNECTED,
31  	.unload_event		= COMPLETION_INITIALIZER(
32  				  vmbus_connection.unload_event),
33  	.next_gpadl_handle	= ATOMIC_INIT(0xE1E10),
34  
35  	.ready_for_suspend_event = COMPLETION_INITIALIZER(
36  				  vmbus_connection.ready_for_suspend_event),
37  	.ready_for_resume_event	= COMPLETION_INITIALIZER(
38  				  vmbus_connection.ready_for_resume_event),
39  };
40  EXPORT_SYMBOL_GPL(vmbus_connection);
41  
42  /*
43   * Negotiated protocol version with the host.
44   */
45  __u32 vmbus_proto_version;
46  EXPORT_SYMBOL_GPL(vmbus_proto_version);
47  
48  /*
49   * Table of VMBus versions listed from newest to oldest.
50   * VERSION_WIN7 and VERSION_WS2008 are no longer supported in
51   * Linux guests and are not listed.
52   */
53  static __u32 vmbus_versions[] = {
54  	VERSION_WIN10_V5_3,
55  	VERSION_WIN10_V5_2,
56  	VERSION_WIN10_V5_1,
57  	VERSION_WIN10_V5,
58  	VERSION_WIN10_V4_1,
59  	VERSION_WIN10,
60  	VERSION_WIN8_1,
61  	VERSION_WIN8
62  };
63  
64  /*
65   * Maximal VMBus protocol version guests can negotiate.  Useful to cap the
66   * VMBus version for testing and debugging purpose.
67   */
68  static uint max_version = VERSION_WIN10_V5_3;
69  
70  module_param(max_version, uint, S_IRUGO);
71  MODULE_PARM_DESC(max_version,
72  		 "Maximal VMBus protocol version which can be negotiated");
73  
vmbus_negotiate_version(struct vmbus_channel_msginfo * msginfo,u32 version)74  int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
75  {
76  	int ret = 0;
77  	struct vmbus_channel_initiate_contact *msg;
78  	unsigned long flags;
79  
80  	init_completion(&msginfo->waitevent);
81  
82  	msg = (struct vmbus_channel_initiate_contact *)msginfo->msg;
83  
84  	memset(msg, 0, sizeof(*msg));
85  	msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT;
86  	msg->vmbus_version_requested = version;
87  
88  	/*
89  	 * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must
90  	 * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message,
91  	 * and for subsequent messages, we must use the Message Connection ID
92  	 * field in the host-returned Version Response Message. And, with
93  	 * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we
94  	 * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for
95  	 * compatibility.
96  	 *
97  	 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1).
98  	 */
99  	if (version >= VERSION_WIN10_V5) {
100  		msg->msg_sint = VMBUS_MESSAGE_SINT;
101  		msg->msg_vtl = ms_hyperv.vtl;
102  		vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4;
103  	} else {
104  		msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
105  		vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
106  	}
107  
108  	/*
109  	 * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always
110  	 * bitwise OR it
111  	 */
112  	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) |
113  				ms_hyperv.shared_gpa_boundary;
114  	msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) |
115  				ms_hyperv.shared_gpa_boundary;
116  
117  	msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
118  
119  	/*
120  	 * Add to list before we send the request since we may
121  	 * receive the response before returning from this routine
122  	 */
123  	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
124  	list_add_tail(&msginfo->msglistentry,
125  		      &vmbus_connection.chn_msg_list);
126  
127  	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
128  
129  	ret = vmbus_post_msg(msg,
130  			     sizeof(struct vmbus_channel_initiate_contact),
131  			     true);
132  
133  	trace_vmbus_negotiate_version(msg, ret);
134  
135  	if (ret != 0) {
136  		spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
137  		list_del(&msginfo->msglistentry);
138  		spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock,
139  					flags);
140  		return ret;
141  	}
142  
143  	/* Wait for the connection response */
144  	wait_for_completion(&msginfo->waitevent);
145  
146  	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
147  	list_del(&msginfo->msglistentry);
148  	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
149  
150  	/* Check if successful */
151  	if (msginfo->response.version_response.version_supported) {
152  		vmbus_connection.conn_state = CONNECTED;
153  
154  		if (version >= VERSION_WIN10_V5)
155  			vmbus_connection.msg_conn_id =
156  				msginfo->response.version_response.msg_conn_id;
157  	} else {
158  		return -ECONNREFUSED;
159  	}
160  
161  	return ret;
162  }
163  
164  /*
165   * vmbus_connect - Sends a connect request on the partition service connection
166   */
vmbus_connect(void)167  int vmbus_connect(void)
168  {
169  	struct vmbus_channel_msginfo *msginfo = NULL;
170  	int i, ret = 0;
171  	__u32 version;
172  
173  	/* Initialize the vmbus connection */
174  	vmbus_connection.conn_state = CONNECTING;
175  	vmbus_connection.work_queue = create_workqueue("hv_vmbus_con");
176  	if (!vmbus_connection.work_queue) {
177  		ret = -ENOMEM;
178  		goto cleanup;
179  	}
180  
181  	vmbus_connection.rescind_work_queue =
182  		create_workqueue("hv_vmbus_rescind");
183  	if (!vmbus_connection.rescind_work_queue) {
184  		ret = -ENOMEM;
185  		goto cleanup;
186  	}
187  	vmbus_connection.ignore_any_offer_msg = false;
188  
189  	vmbus_connection.handle_primary_chan_wq =
190  		create_workqueue("hv_pri_chan");
191  	if (!vmbus_connection.handle_primary_chan_wq) {
192  		ret = -ENOMEM;
193  		goto cleanup;
194  	}
195  
196  	vmbus_connection.handle_sub_chan_wq =
197  		create_workqueue("hv_sub_chan");
198  	if (!vmbus_connection.handle_sub_chan_wq) {
199  		ret = -ENOMEM;
200  		goto cleanup;
201  	}
202  
203  	INIT_LIST_HEAD(&vmbus_connection.chn_msg_list);
204  	spin_lock_init(&vmbus_connection.channelmsg_lock);
205  
206  	INIT_LIST_HEAD(&vmbus_connection.chn_list);
207  	mutex_init(&vmbus_connection.channel_mutex);
208  
209  	/*
210  	 * Setup the vmbus event connection for channel interrupt
211  	 * abstraction stuff
212  	 */
213  	vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page();
214  	if (vmbus_connection.int_page == NULL) {
215  		ret = -ENOMEM;
216  		goto cleanup;
217  	}
218  
219  	vmbus_connection.recv_int_page = vmbus_connection.int_page;
220  	vmbus_connection.send_int_page =
221  		(void *)((unsigned long)vmbus_connection.int_page +
222  			(HV_HYP_PAGE_SIZE >> 1));
223  
224  	/*
225  	 * Setup the monitor notification facility. The 1st page for
226  	 * parent->child and the 2nd page for child->parent
227  	 */
228  	vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page();
229  	vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page();
230  	if ((vmbus_connection.monitor_pages[0] == NULL) ||
231  	    (vmbus_connection.monitor_pages[1] == NULL)) {
232  		ret = -ENOMEM;
233  		goto cleanup;
234  	}
235  
236  	ret = set_memory_decrypted((unsigned long)
237  				vmbus_connection.monitor_pages[0], 1);
238  	ret |= set_memory_decrypted((unsigned long)
239  				vmbus_connection.monitor_pages[1], 1);
240  	if (ret) {
241  		/*
242  		 * If set_memory_decrypted() fails, the encryption state
243  		 * of the memory is unknown. So leak the memory instead
244  		 * of risking returning decrypted memory to the free list.
245  		 * For simplicity, always handle both pages the same.
246  		 */
247  		vmbus_connection.monitor_pages[0] = NULL;
248  		vmbus_connection.monitor_pages[1] = NULL;
249  		goto cleanup;
250  	}
251  
252  	/*
253  	 * Set_memory_decrypted() will change the memory contents if
254  	 * decryption occurs, so zero monitor pages here.
255  	 */
256  	memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE);
257  	memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE);
258  
259  	msginfo = kzalloc(sizeof(*msginfo) +
260  			  sizeof(struct vmbus_channel_initiate_contact),
261  			  GFP_KERNEL);
262  	if (msginfo == NULL) {
263  		ret = -ENOMEM;
264  		goto cleanup;
265  	}
266  
267  	/*
268  	 * Negotiate a compatible VMBUS version number with the
269  	 * host. We start with the highest number we can support
270  	 * and work our way down until we negotiate a compatible
271  	 * version.
272  	 */
273  
274  	for (i = 0; ; i++) {
275  		if (i == ARRAY_SIZE(vmbus_versions)) {
276  			ret = -EDOM;
277  			goto cleanup;
278  		}
279  
280  		version = vmbus_versions[i];
281  		if (version > max_version)
282  			continue;
283  
284  		ret = vmbus_negotiate_version(msginfo, version);
285  		if (ret == -ETIMEDOUT)
286  			goto cleanup;
287  
288  		if (vmbus_connection.conn_state == CONNECTED)
289  			break;
290  	}
291  
292  	if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) {
293  		pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n",
294  		       version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF);
295  		ret = -EINVAL;
296  		goto cleanup;
297  	}
298  
299  	vmbus_proto_version = version;
300  	pr_info("Vmbus version:%d.%d\n",
301  		version >> 16, version & 0xFFFF);
302  
303  	vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS,
304  					    sizeof(struct vmbus_channel *),
305  					    GFP_KERNEL);
306  	if (vmbus_connection.channels == NULL) {
307  		ret = -ENOMEM;
308  		goto cleanup;
309  	}
310  
311  	kfree(msginfo);
312  	return 0;
313  
314  cleanup:
315  	pr_err("Unable to connect to host\n");
316  
317  	vmbus_connection.conn_state = DISCONNECTED;
318  	vmbus_disconnect();
319  
320  	kfree(msginfo);
321  
322  	return ret;
323  }
324  
vmbus_disconnect(void)325  void vmbus_disconnect(void)
326  {
327  	/*
328  	 * First send the unload request to the host.
329  	 */
330  	vmbus_initiate_unload(false);
331  
332  	if (vmbus_connection.handle_sub_chan_wq)
333  		destroy_workqueue(vmbus_connection.handle_sub_chan_wq);
334  
335  	if (vmbus_connection.handle_primary_chan_wq)
336  		destroy_workqueue(vmbus_connection.handle_primary_chan_wq);
337  
338  	if (vmbus_connection.rescind_work_queue)
339  		destroy_workqueue(vmbus_connection.rescind_work_queue);
340  
341  	if (vmbus_connection.work_queue)
342  		destroy_workqueue(vmbus_connection.work_queue);
343  
344  	if (vmbus_connection.int_page) {
345  		hv_free_hyperv_page(vmbus_connection.int_page);
346  		vmbus_connection.int_page = NULL;
347  	}
348  
349  	if (vmbus_connection.monitor_pages[0]) {
350  		if (!set_memory_encrypted(
351  			(unsigned long)vmbus_connection.monitor_pages[0], 1))
352  			hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
353  		vmbus_connection.monitor_pages[0] = NULL;
354  	}
355  
356  	if (vmbus_connection.monitor_pages[1]) {
357  		if (!set_memory_encrypted(
358  			(unsigned long)vmbus_connection.monitor_pages[1], 1))
359  			hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
360  		vmbus_connection.monitor_pages[1] = NULL;
361  	}
362  }
363  
364  /*
365   * relid2channel - Get the channel object given its
366   * child relative id (ie channel id)
367   */
relid2channel(u32 relid)368  struct vmbus_channel *relid2channel(u32 relid)
369  {
370  	if (vmbus_connection.channels == NULL) {
371  		pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid);
372  		return NULL;
373  	}
374  	if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
375  		return NULL;
376  	return READ_ONCE(vmbus_connection.channels[relid]);
377  }
378  
379  /*
380   * vmbus_on_event - Process a channel event notification
381   *
382   * For batched channels (default) optimize host to guest signaling
383   * by ensuring:
384   * 1. While reading the channel, we disable interrupts from host.
385   * 2. Ensure that we process all posted messages from the host
386   *    before returning from this callback.
387   * 3. Once we return, enable signaling from the host. Once this
388   *    state is set we check to see if additional packets are
389   *    available to read. In this case we repeat the process.
390   *    If this tasklet has been running for a long time
391   *    then reschedule ourselves.
392   */
vmbus_on_event(unsigned long data)393  void vmbus_on_event(unsigned long data)
394  {
395  	struct vmbus_channel *channel = (void *) data;
396  	void (*callback_fn)(void *context);
397  
398  	trace_vmbus_on_event(channel);
399  
400  	hv_debug_delay_test(channel, INTERRUPT_DELAY);
401  
402  	/* A channel once created is persistent even when
403  	 * there is no driver handling the device. An
404  	 * unloading driver sets the onchannel_callback to NULL.
405  	 */
406  	callback_fn = READ_ONCE(channel->onchannel_callback);
407  	if (unlikely(!callback_fn))
408  		return;
409  
410  	(*callback_fn)(channel->channel_callback_context);
411  
412  	if (channel->callback_mode != HV_CALL_BATCHED)
413  		return;
414  
415  	if (likely(hv_end_read(&channel->inbound) == 0))
416  		return;
417  
418  	hv_begin_read(&channel->inbound);
419  	tasklet_schedule(&channel->callback_event);
420  }
421  
422  /*
423   * vmbus_post_msg - Send a msg on the vmbus's message connection
424   */
vmbus_post_msg(void * buffer,size_t buflen,bool can_sleep)425  int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep)
426  {
427  	struct vmbus_channel_message_header *hdr;
428  	union hv_connection_id conn_id;
429  	int ret = 0;
430  	int retries = 0;
431  	u32 usec = 1;
432  
433  	conn_id.asu32 = 0;
434  	conn_id.u.id = vmbus_connection.msg_conn_id;
435  
436  	/*
437  	 * hv_post_message() can have transient failures because of
438  	 * insufficient resources. Retry the operation a couple of
439  	 * times before giving up.
440  	 */
441  	while (retries < 100) {
442  		ret = hv_post_message(conn_id, 1, buffer, buflen);
443  
444  		switch (ret) {
445  		case HV_STATUS_INVALID_CONNECTION_ID:
446  			/*
447  			 * See vmbus_negotiate_version(): VMBus protocol 5.0
448  			 * and higher require that we must use
449  			 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate
450  			 * Contact message, but on old hosts that only
451  			 * support VMBus protocol 4.0 or lower, here we get
452  			 * HV_STATUS_INVALID_CONNECTION_ID and we should
453  			 * return an error immediately without retrying.
454  			 */
455  			hdr = buffer;
456  			if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT)
457  				return -EINVAL;
458  			/*
459  			 * We could get this if we send messages too
460  			 * frequently.
461  			 */
462  			ret = -EAGAIN;
463  			break;
464  		case HV_STATUS_INSUFFICIENT_MEMORY:
465  		case HV_STATUS_INSUFFICIENT_BUFFERS:
466  			ret = -ENOBUFS;
467  			break;
468  		case HV_STATUS_SUCCESS:
469  			return ret;
470  		default:
471  			pr_err("hv_post_msg() failed; error code:%d\n", ret);
472  			return -EINVAL;
473  		}
474  
475  		retries++;
476  		if (can_sleep && usec > 1000)
477  			msleep(usec / 1000);
478  		else if (usec < MAX_UDELAY_MS * 1000)
479  			udelay(usec);
480  		else
481  			mdelay(usec / 1000);
482  
483  		if (retries < 22)
484  			usec *= 2;
485  	}
486  	return ret;
487  }
488  
489  /*
490   * vmbus_set_event - Send an event notification to the parent
491   */
vmbus_set_event(struct vmbus_channel * channel)492  void vmbus_set_event(struct vmbus_channel *channel)
493  {
494  	u32 child_relid = channel->offermsg.child_relid;
495  
496  	if (!channel->is_dedicated_interrupt)
497  		vmbus_send_interrupt(child_relid);
498  
499  	++channel->sig_events;
500  
501  	if (ms_hyperv.paravisor_present) {
502  		if (hv_isolation_type_snp())
503  			hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
504  					  NULL, sizeof(channel->sig_event));
505  		else if (hv_isolation_type_tdx())
506  			hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT,
507  					 channel->sig_event, 0);
508  		else
509  			WARN_ON_ONCE(1);
510  	} else {
511  		hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
512  	}
513  }
514  EXPORT_SYMBOL_GPL(vmbus_set_event);
515