1  // SPDX-License-Identifier: GPL-2.0-only
2  
3  /*
4   * Local APIC virtualization
5   *
6   * Copyright (C) 2006 Qumranet, Inc.
7   * Copyright (C) 2007 Novell
8   * Copyright (C) 2007 Intel
9   * Copyright 2009 Red Hat, Inc. and/or its affiliates.
10   *
11   * Authors:
12   *   Dor Laor <dor.laor@qumranet.com>
13   *   Gregory Haskins <ghaskins@novell.com>
14   *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
15   *
16   * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17   */
18  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19  
20  #include <linux/kvm_host.h>
21  #include <linux/kvm.h>
22  #include <linux/mm.h>
23  #include <linux/highmem.h>
24  #include <linux/smp.h>
25  #include <linux/hrtimer.h>
26  #include <linux/io.h>
27  #include <linux/export.h>
28  #include <linux/math64.h>
29  #include <linux/slab.h>
30  #include <asm/processor.h>
31  #include <asm/mce.h>
32  #include <asm/msr.h>
33  #include <asm/page.h>
34  #include <asm/current.h>
35  #include <asm/apicdef.h>
36  #include <asm/delay.h>
37  #include <linux/atomic.h>
38  #include <linux/jump_label.h>
39  #include "kvm_cache_regs.h"
40  #include "irq.h"
41  #include "ioapic.h"
42  #include "trace.h"
43  #include "x86.h"
44  #include "xen.h"
45  #include "cpuid.h"
46  #include "hyperv.h"
47  #include "smm.h"
48  
49  #ifndef CONFIG_X86_64
50  #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
51  #else
52  #define mod_64(x, y) ((x) % (y))
53  #endif
54  
55  /* 14 is the version for Xeon and Pentium 8.4.8*/
56  #define APIC_VERSION			0x14UL
57  #define LAPIC_MMIO_LENGTH		(1 << 12)
58  /* followed define is not in apicdef.h */
59  #define MAX_APIC_VECTOR			256
60  #define APIC_VECTORS_PER_REG		32
61  
62  /*
63   * Enable local APIC timer advancement (tscdeadline mode only) with adaptive
64   * tuning.  When enabled, KVM programs the host timer event to fire early, i.e.
65   * before the deadline expires, to account for the delay between taking the
66   * VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
67   * the guest, i.e. so that the interrupt arrives in the guest with minimal
68   * latency relative to the deadline programmed by the guest.
69   */
70  static bool lapic_timer_advance __read_mostly = true;
71  module_param(lapic_timer_advance, bool, 0444);
72  
73  #define LAPIC_TIMER_ADVANCE_ADJUST_MIN	100	/* clock cycles */
74  #define LAPIC_TIMER_ADVANCE_ADJUST_MAX	10000	/* clock cycles */
75  #define LAPIC_TIMER_ADVANCE_NS_INIT	1000
76  #define LAPIC_TIMER_ADVANCE_NS_MAX     5000
77  /* step-by-step approximation to mitigate fluctuation */
78  #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
79  static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
80  static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data);
81  
__kvm_lapic_set_reg(char * regs,int reg_off,u32 val)82  static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
83  {
84  	*((u32 *) (regs + reg_off)) = val;
85  }
86  
kvm_lapic_set_reg(struct kvm_lapic * apic,int reg_off,u32 val)87  static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
88  {
89  	__kvm_lapic_set_reg(apic->regs, reg_off, val);
90  }
91  
__kvm_lapic_get_reg64(char * regs,int reg)92  static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg)
93  {
94  	BUILD_BUG_ON(reg != APIC_ICR);
95  	return *((u64 *) (regs + reg));
96  }
97  
kvm_lapic_get_reg64(struct kvm_lapic * apic,int reg)98  static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
99  {
100  	return __kvm_lapic_get_reg64(apic->regs, reg);
101  }
102  
__kvm_lapic_set_reg64(char * regs,int reg,u64 val)103  static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val)
104  {
105  	BUILD_BUG_ON(reg != APIC_ICR);
106  	*((u64 *) (regs + reg)) = val;
107  }
108  
kvm_lapic_set_reg64(struct kvm_lapic * apic,int reg,u64 val)109  static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
110  						int reg, u64 val)
111  {
112  	__kvm_lapic_set_reg64(apic->regs, reg, val);
113  }
114  
apic_test_vector(int vec,void * bitmap)115  static inline int apic_test_vector(int vec, void *bitmap)
116  {
117  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
118  }
119  
kvm_apic_pending_eoi(struct kvm_vcpu * vcpu,int vector)120  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
121  {
122  	struct kvm_lapic *apic = vcpu->arch.apic;
123  
124  	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
125  		apic_test_vector(vector, apic->regs + APIC_IRR);
126  }
127  
__apic_test_and_set_vector(int vec,void * bitmap)128  static inline int __apic_test_and_set_vector(int vec, void *bitmap)
129  {
130  	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
131  }
132  
__apic_test_and_clear_vector(int vec,void * bitmap)133  static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
134  {
135  	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
136  }
137  
138  __read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
139  EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
140  
141  __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
142  __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
143  
apic_enabled(struct kvm_lapic * apic)144  static inline int apic_enabled(struct kvm_lapic *apic)
145  {
146  	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
147  }
148  
149  #define LVT_MASK	\
150  	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
151  
152  #define LINT_MASK	\
153  	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
154  	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
155  
kvm_x2apic_id(struct kvm_lapic * apic)156  static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
157  {
158  	return apic->vcpu->vcpu_id;
159  }
160  
kvm_can_post_timer_interrupt(struct kvm_vcpu * vcpu)161  static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
162  {
163  	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
164  		(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
165  }
166  
kvm_can_use_hv_timer(struct kvm_vcpu * vcpu)167  bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
168  {
169  	return kvm_x86_ops.set_hv_timer
170  	       && !(kvm_mwait_in_guest(vcpu->kvm) ||
171  		    kvm_can_post_timer_interrupt(vcpu));
172  }
173  
kvm_use_posted_timer_interrupt(struct kvm_vcpu * vcpu)174  static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
175  {
176  	return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
177  }
178  
kvm_apic_calc_x2apic_ldr(u32 id)179  static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
180  {
181  	return ((id >> 4) << 16) | (1 << (id & 0xf));
182  }
183  
kvm_apic_map_get_logical_dest(struct kvm_apic_map * map,u32 dest_id,struct kvm_lapic *** cluster,u16 * mask)184  static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
185  		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
186  	switch (map->logical_mode) {
187  	case KVM_APIC_MODE_SW_DISABLED:
188  		/* Arbitrarily use the flat map so that @cluster isn't NULL. */
189  		*cluster = map->xapic_flat_map;
190  		*mask = 0;
191  		return true;
192  	case KVM_APIC_MODE_X2APIC: {
193  		u32 offset = (dest_id >> 16) * 16;
194  		u32 max_apic_id = map->max_apic_id;
195  
196  		if (offset <= max_apic_id) {
197  			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
198  
199  			offset = array_index_nospec(offset, map->max_apic_id + 1);
200  			*cluster = &map->phys_map[offset];
201  			*mask = dest_id & (0xffff >> (16 - cluster_size));
202  		} else {
203  			*mask = 0;
204  		}
205  
206  		return true;
207  		}
208  	case KVM_APIC_MODE_XAPIC_FLAT:
209  		*cluster = map->xapic_flat_map;
210  		*mask = dest_id & 0xff;
211  		return true;
212  	case KVM_APIC_MODE_XAPIC_CLUSTER:
213  		*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
214  		*mask = dest_id & 0xf;
215  		return true;
216  	case KVM_APIC_MODE_MAP_DISABLED:
217  		return false;
218  	default:
219  		WARN_ON_ONCE(1);
220  		return false;
221  	}
222  }
223  
kvm_apic_map_free(struct rcu_head * rcu)224  static void kvm_apic_map_free(struct rcu_head *rcu)
225  {
226  	struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
227  
228  	kvfree(map);
229  }
230  
kvm_recalculate_phys_map(struct kvm_apic_map * new,struct kvm_vcpu * vcpu,bool * xapic_id_mismatch)231  static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
232  				    struct kvm_vcpu *vcpu,
233  				    bool *xapic_id_mismatch)
234  {
235  	struct kvm_lapic *apic = vcpu->arch.apic;
236  	u32 x2apic_id = kvm_x2apic_id(apic);
237  	u32 xapic_id = kvm_xapic_id(apic);
238  	u32 physical_id;
239  
240  	/*
241  	 * For simplicity, KVM always allocates enough space for all possible
242  	 * xAPIC IDs.  Yell, but don't kill the VM, as KVM can continue on
243  	 * without the optimized map.
244  	 */
245  	if (WARN_ON_ONCE(xapic_id > new->max_apic_id))
246  		return -EINVAL;
247  
248  	/*
249  	 * Bail if a vCPU was added and/or enabled its APIC between allocating
250  	 * the map and doing the actual calculations for the map.  Note, KVM
251  	 * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if
252  	 * the compiler decides to reload x2apic_id after this check.
253  	 */
254  	if (x2apic_id > new->max_apic_id)
255  		return -E2BIG;
256  
257  	/*
258  	 * Deliberately truncate the vCPU ID when detecting a mismatched APIC
259  	 * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
260  	 * 32-bit value.  Any unwanted aliasing due to truncation results will
261  	 * be detected below.
262  	 */
263  	if (!apic_x2apic_mode(apic) && xapic_id != (u8)vcpu->vcpu_id)
264  		*xapic_id_mismatch = true;
265  
266  	/*
267  	 * Apply KVM's hotplug hack if userspace has enable 32-bit APIC IDs.
268  	 * Allow sending events to vCPUs by their x2APIC ID even if the target
269  	 * vCPU is in legacy xAPIC mode, and silently ignore aliased xAPIC IDs
270  	 * (the x2APIC ID is truncated to 8 bits, causing IDs > 0xff to wrap
271  	 * and collide).
272  	 *
273  	 * Honor the architectural (and KVM's non-optimized) behavior if
274  	 * userspace has not enabled 32-bit x2APIC IDs.  Each APIC is supposed
275  	 * to process messages independently.  If multiple vCPUs have the same
276  	 * effective APIC ID, e.g. due to the x2APIC wrap or because the guest
277  	 * manually modified its xAPIC IDs, events targeting that ID are
278  	 * supposed to be recognized by all vCPUs with said ID.
279  	 */
280  	if (vcpu->kvm->arch.x2apic_format) {
281  		/* See also kvm_apic_match_physical_addr(). */
282  		if (apic_x2apic_mode(apic) || x2apic_id > 0xff)
283  			new->phys_map[x2apic_id] = apic;
284  
285  		if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
286  			new->phys_map[xapic_id] = apic;
287  	} else {
288  		/*
289  		 * Disable the optimized map if the physical APIC ID is already
290  		 * mapped, i.e. is aliased to multiple vCPUs.  The optimized
291  		 * map requires a strict 1:1 mapping between IDs and vCPUs.
292  		 */
293  		if (apic_x2apic_mode(apic))
294  			physical_id = x2apic_id;
295  		else
296  			physical_id = xapic_id;
297  
298  		if (new->phys_map[physical_id])
299  			return -EINVAL;
300  
301  		new->phys_map[physical_id] = apic;
302  	}
303  
304  	return 0;
305  }
306  
kvm_recalculate_logical_map(struct kvm_apic_map * new,struct kvm_vcpu * vcpu)307  static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
308  					struct kvm_vcpu *vcpu)
309  {
310  	struct kvm_lapic *apic = vcpu->arch.apic;
311  	enum kvm_apic_logical_mode logical_mode;
312  	struct kvm_lapic **cluster;
313  	u16 mask;
314  	u32 ldr;
315  
316  	if (new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
317  		return;
318  
319  	if (!kvm_apic_sw_enabled(apic))
320  		return;
321  
322  	ldr = kvm_lapic_get_reg(apic, APIC_LDR);
323  	if (!ldr)
324  		return;
325  
326  	if (apic_x2apic_mode(apic)) {
327  		logical_mode = KVM_APIC_MODE_X2APIC;
328  	} else {
329  		ldr = GET_APIC_LOGICAL_ID(ldr);
330  		if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
331  			logical_mode = KVM_APIC_MODE_XAPIC_FLAT;
332  		else
333  			logical_mode = KVM_APIC_MODE_XAPIC_CLUSTER;
334  	}
335  
336  	/*
337  	 * To optimize logical mode delivery, all software-enabled APICs must
338  	 * be configured for the same mode.
339  	 */
340  	if (new->logical_mode == KVM_APIC_MODE_SW_DISABLED) {
341  		new->logical_mode = logical_mode;
342  	} else if (new->logical_mode != logical_mode) {
343  		new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
344  		return;
345  	}
346  
347  	/*
348  	 * In x2APIC mode, the LDR is read-only and derived directly from the
349  	 * x2APIC ID, thus is guaranteed to be addressable.  KVM reuses
350  	 * kvm_apic_map.phys_map to optimize logical mode x2APIC interrupts by
351  	 * reversing the LDR calculation to get cluster of APICs, i.e. no
352  	 * additional work is required.
353  	 */
354  	if (apic_x2apic_mode(apic))
355  		return;
356  
357  	if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
358  							&cluster, &mask))) {
359  		new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
360  		return;
361  	}
362  
363  	if (!mask)
364  		return;
365  
366  	ldr = ffs(mask) - 1;
367  	if (!is_power_of_2(mask) || cluster[ldr])
368  		new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
369  	else
370  		cluster[ldr] = apic;
371  }
372  
373  /*
374   * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
375   *
376   * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
377   * apic_map_lock_held.
378   */
379  enum {
380  	CLEAN,
381  	UPDATE_IN_PROGRESS,
382  	DIRTY
383  };
384  
kvm_recalculate_apic_map(struct kvm * kvm)385  void kvm_recalculate_apic_map(struct kvm *kvm)
386  {
387  	struct kvm_apic_map *new, *old = NULL;
388  	struct kvm_vcpu *vcpu;
389  	unsigned long i;
390  	u32 max_id = 255; /* enough space for any xAPIC ID */
391  	bool xapic_id_mismatch;
392  	int r;
393  
394  	/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
395  	if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
396  		return;
397  
398  	WARN_ONCE(!irqchip_in_kernel(kvm),
399  		  "Dirty APIC map without an in-kernel local APIC");
400  
401  	mutex_lock(&kvm->arch.apic_map_lock);
402  
403  retry:
404  	/*
405  	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
406  	 * or the APIC registers (if dirty).  Note, on retry the map may have
407  	 * not yet been marked dirty by whatever task changed a vCPU's x2APIC
408  	 * ID, i.e. the map may still show up as in-progress.  In that case
409  	 * this task still needs to retry and complete its calculation.
410  	 */
411  	if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
412  				   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
413  		/* Someone else has updated the map. */
414  		mutex_unlock(&kvm->arch.apic_map_lock);
415  		return;
416  	}
417  
418  	/*
419  	 * Reset the mismatch flag between attempts so that KVM does the right
420  	 * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
421  	 * keep max_id strictly increasing.  Disallowing max_id from shrinking
422  	 * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
423  	 * with the highest x2APIC ID is toggling its APIC on and off.
424  	 */
425  	xapic_id_mismatch = false;
426  
427  	kvm_for_each_vcpu(i, vcpu, kvm)
428  		if (kvm_apic_present(vcpu))
429  			max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
430  
431  	new = kvzalloc(sizeof(struct kvm_apic_map) +
432  	                   sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
433  			   GFP_KERNEL_ACCOUNT);
434  
435  	if (!new)
436  		goto out;
437  
438  	new->max_apic_id = max_id;
439  	new->logical_mode = KVM_APIC_MODE_SW_DISABLED;
440  
441  	kvm_for_each_vcpu(i, vcpu, kvm) {
442  		if (!kvm_apic_present(vcpu))
443  			continue;
444  
445  		r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
446  		if (r) {
447  			kvfree(new);
448  			new = NULL;
449  			if (r == -E2BIG) {
450  				cond_resched();
451  				goto retry;
452  			}
453  
454  			goto out;
455  		}
456  
457  		kvm_recalculate_logical_map(new, vcpu);
458  	}
459  out:
460  	/*
461  	 * The optimized map is effectively KVM's internal version of APICv,
462  	 * and all unwanted aliasing that results in disabling the optimized
463  	 * map also applies to APICv.
464  	 */
465  	if (!new)
466  		kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
467  	else
468  		kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
469  
470  	if (!new || new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
471  		kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
472  	else
473  		kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
474  
475  	if (xapic_id_mismatch)
476  		kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
477  	else
478  		kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
479  
480  	old = rcu_dereference_protected(kvm->arch.apic_map,
481  			lockdep_is_held(&kvm->arch.apic_map_lock));
482  	rcu_assign_pointer(kvm->arch.apic_map, new);
483  	/*
484  	 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
485  	 * If another update has come in, leave it DIRTY.
486  	 */
487  	atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
488  			       UPDATE_IN_PROGRESS, CLEAN);
489  	mutex_unlock(&kvm->arch.apic_map_lock);
490  
491  	if (old)
492  		call_rcu(&old->rcu, kvm_apic_map_free);
493  
494  	kvm_make_scan_ioapic_request(kvm);
495  }
496  
apic_set_spiv(struct kvm_lapic * apic,u32 val)497  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
498  {
499  	bool enabled = val & APIC_SPIV_APIC_ENABLED;
500  
501  	kvm_lapic_set_reg(apic, APIC_SPIV, val);
502  
503  	if (enabled != apic->sw_enabled) {
504  		apic->sw_enabled = enabled;
505  		if (enabled)
506  			static_branch_slow_dec_deferred(&apic_sw_disabled);
507  		else
508  			static_branch_inc(&apic_sw_disabled.key);
509  
510  		atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
511  	}
512  
513  	/* Check if there are APF page ready requests pending */
514  	if (enabled) {
515  		kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
516  		kvm_xen_sw_enable_lapic(apic->vcpu);
517  	}
518  }
519  
kvm_apic_set_xapic_id(struct kvm_lapic * apic,u8 id)520  static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
521  {
522  	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
523  	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
524  }
525  
kvm_apic_set_ldr(struct kvm_lapic * apic,u32 id)526  static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
527  {
528  	kvm_lapic_set_reg(apic, APIC_LDR, id);
529  	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
530  }
531  
kvm_apic_set_dfr(struct kvm_lapic * apic,u32 val)532  static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
533  {
534  	kvm_lapic_set_reg(apic, APIC_DFR, val);
535  	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
536  }
537  
kvm_apic_set_x2apic_id(struct kvm_lapic * apic,u32 id)538  static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
539  {
540  	u32 ldr = kvm_apic_calc_x2apic_ldr(id);
541  
542  	WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
543  
544  	kvm_lapic_set_reg(apic, APIC_ID, id);
545  	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
546  	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
547  }
548  
apic_lvt_enabled(struct kvm_lapic * apic,int lvt_type)549  static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
550  {
551  	return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
552  }
553  
apic_lvtt_oneshot(struct kvm_lapic * apic)554  static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
555  {
556  	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
557  }
558  
apic_lvtt_period(struct kvm_lapic * apic)559  static inline int apic_lvtt_period(struct kvm_lapic *apic)
560  {
561  	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
562  }
563  
apic_lvtt_tscdeadline(struct kvm_lapic * apic)564  static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
565  {
566  	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
567  }
568  
apic_lvt_nmi_mode(u32 lvt_val)569  static inline int apic_lvt_nmi_mode(u32 lvt_val)
570  {
571  	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
572  }
573  
kvm_lapic_lvt_supported(struct kvm_lapic * apic,int lvt_index)574  static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index)
575  {
576  	return apic->nr_lvt_entries > lvt_index;
577  }
578  
kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu * vcpu)579  static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu)
580  {
581  	return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P);
582  }
583  
kvm_apic_set_version(struct kvm_vcpu * vcpu)584  void kvm_apic_set_version(struct kvm_vcpu *vcpu)
585  {
586  	struct kvm_lapic *apic = vcpu->arch.apic;
587  	u32 v = 0;
588  
589  	if (!lapic_in_kernel(vcpu))
590  		return;
591  
592  	v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16);
593  
594  	/*
595  	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
596  	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
597  	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
598  	 * version first and level-triggered interrupts never get EOIed in
599  	 * IOAPIC.
600  	 */
601  	if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
602  	    !ioapic_in_kernel(vcpu->kvm))
603  		v |= APIC_LVR_DIRECTED_EOI;
604  	kvm_lapic_set_reg(apic, APIC_LVR, v);
605  }
606  
kvm_apic_after_set_mcg_cap(struct kvm_vcpu * vcpu)607  void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu)
608  {
609  	int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
610  	struct kvm_lapic *apic = vcpu->arch.apic;
611  	int i;
612  
613  	if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries)
614  		return;
615  
616  	/* Initialize/mask any "new" LVT entries. */
617  	for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++)
618  		kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
619  
620  	apic->nr_lvt_entries = nr_lvt_entries;
621  
622  	/* The number of LVT entries is reflected in the version register. */
623  	kvm_apic_set_version(vcpu);
624  }
625  
626  static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = {
627  	[LVT_TIMER] = LVT_MASK,      /* timer mode mask added at runtime */
628  	[LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK,
629  	[LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK,
630  	[LVT_LINT0] = LINT_MASK,
631  	[LVT_LINT1] = LINT_MASK,
632  	[LVT_ERROR] = LVT_MASK,
633  	[LVT_CMCI] = LVT_MASK | APIC_MODE_MASK
634  };
635  
find_highest_vector(void * bitmap)636  static int find_highest_vector(void *bitmap)
637  {
638  	int vec;
639  	u32 *reg;
640  
641  	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
642  	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
643  		reg = bitmap + REG_POS(vec);
644  		if (*reg)
645  			return __fls(*reg) + vec;
646  	}
647  
648  	return -1;
649  }
650  
count_vectors(void * bitmap)651  static u8 count_vectors(void *bitmap)
652  {
653  	int vec;
654  	u32 *reg;
655  	u8 count = 0;
656  
657  	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
658  		reg = bitmap + REG_POS(vec);
659  		count += hweight32(*reg);
660  	}
661  
662  	return count;
663  }
664  
__kvm_apic_update_irr(u32 * pir,void * regs,int * max_irr)665  bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
666  {
667  	u32 i, vec;
668  	u32 pir_val, irr_val, prev_irr_val;
669  	int max_updated_irr;
670  
671  	max_updated_irr = -1;
672  	*max_irr = -1;
673  
674  	for (i = vec = 0; i <= 7; i++, vec += 32) {
675  		u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
676  
677  		irr_val = *p_irr;
678  		pir_val = READ_ONCE(pir[i]);
679  
680  		if (pir_val) {
681  			pir_val = xchg(&pir[i], 0);
682  
683  			prev_irr_val = irr_val;
684  			do {
685  				irr_val = prev_irr_val | pir_val;
686  			} while (prev_irr_val != irr_val &&
687  				 !try_cmpxchg(p_irr, &prev_irr_val, irr_val));
688  
689  			if (prev_irr_val != irr_val)
690  				max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
691  		}
692  		if (irr_val)
693  			*max_irr = __fls(irr_val) + vec;
694  	}
695  
696  	return ((max_updated_irr != -1) &&
697  		(max_updated_irr == *max_irr));
698  }
699  EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
700  
kvm_apic_update_irr(struct kvm_vcpu * vcpu,u32 * pir,int * max_irr)701  bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
702  {
703  	struct kvm_lapic *apic = vcpu->arch.apic;
704  	bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
705  
706  	if (unlikely(!apic->apicv_active && irr_updated))
707  		apic->irr_pending = true;
708  	return irr_updated;
709  }
710  EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
711  
apic_search_irr(struct kvm_lapic * apic)712  static inline int apic_search_irr(struct kvm_lapic *apic)
713  {
714  	return find_highest_vector(apic->regs + APIC_IRR);
715  }
716  
apic_find_highest_irr(struct kvm_lapic * apic)717  static inline int apic_find_highest_irr(struct kvm_lapic *apic)
718  {
719  	int result;
720  
721  	/*
722  	 * Note that irr_pending is just a hint. It will be always
723  	 * true with virtual interrupt delivery enabled.
724  	 */
725  	if (!apic->irr_pending)
726  		return -1;
727  
728  	result = apic_search_irr(apic);
729  	ASSERT(result == -1 || result >= 16);
730  
731  	return result;
732  }
733  
apic_clear_irr(int vec,struct kvm_lapic * apic)734  static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
735  {
736  	if (unlikely(apic->apicv_active)) {
737  		/* need to update RVI */
738  		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
739  		kvm_x86_call(hwapic_irr_update)(apic->vcpu,
740  						apic_find_highest_irr(apic));
741  	} else {
742  		apic->irr_pending = false;
743  		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
744  		if (apic_search_irr(apic) != -1)
745  			apic->irr_pending = true;
746  	}
747  }
748  
kvm_apic_clear_irr(struct kvm_vcpu * vcpu,int vec)749  void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
750  {
751  	apic_clear_irr(vec, vcpu->arch.apic);
752  }
753  EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
754  
apic_set_isr(int vec,struct kvm_lapic * apic)755  static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
756  {
757  	if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
758  		return;
759  
760  	/*
761  	 * With APIC virtualization enabled, all caching is disabled
762  	 * because the processor can modify ISR under the hood.  Instead
763  	 * just set SVI.
764  	 */
765  	if (unlikely(apic->apicv_active))
766  		kvm_x86_call(hwapic_isr_update)(vec);
767  	else {
768  		++apic->isr_count;
769  		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
770  		/*
771  		 * ISR (in service register) bit is set when injecting an interrupt.
772  		 * The highest vector is injected. Thus the latest bit set matches
773  		 * the highest bit in ISR.
774  		 */
775  		apic->highest_isr_cache = vec;
776  	}
777  }
778  
apic_find_highest_isr(struct kvm_lapic * apic)779  static inline int apic_find_highest_isr(struct kvm_lapic *apic)
780  {
781  	int result;
782  
783  	/*
784  	 * Note that isr_count is always 1, and highest_isr_cache
785  	 * is always -1, with APIC virtualization enabled.
786  	 */
787  	if (!apic->isr_count)
788  		return -1;
789  	if (likely(apic->highest_isr_cache != -1))
790  		return apic->highest_isr_cache;
791  
792  	result = find_highest_vector(apic->regs + APIC_ISR);
793  	ASSERT(result == -1 || result >= 16);
794  
795  	return result;
796  }
797  
apic_clear_isr(int vec,struct kvm_lapic * apic)798  static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
799  {
800  	if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
801  		return;
802  
803  	/*
804  	 * We do get here for APIC virtualization enabled if the guest
805  	 * uses the Hyper-V APIC enlightenment.  In this case we may need
806  	 * to trigger a new interrupt delivery by writing the SVI field;
807  	 * on the other hand isr_count and highest_isr_cache are unused
808  	 * and must be left alone.
809  	 */
810  	if (unlikely(apic->apicv_active))
811  		kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic));
812  	else {
813  		--apic->isr_count;
814  		BUG_ON(apic->isr_count < 0);
815  		apic->highest_isr_cache = -1;
816  	}
817  }
818  
kvm_lapic_find_highest_irr(struct kvm_vcpu * vcpu)819  int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
820  {
821  	/* This may race with setting of irr in __apic_accept_irq() and
822  	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
823  	 * will cause vmexit immediately and the value will be recalculated
824  	 * on the next vmentry.
825  	 */
826  	return apic_find_highest_irr(vcpu->arch.apic);
827  }
828  EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
829  
830  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
831  			     int vector, int level, int trig_mode,
832  			     struct dest_map *dest_map);
833  
kvm_apic_set_irq(struct kvm_vcpu * vcpu,struct kvm_lapic_irq * irq,struct dest_map * dest_map)834  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
835  		     struct dest_map *dest_map)
836  {
837  	struct kvm_lapic *apic = vcpu->arch.apic;
838  
839  	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
840  			irq->level, irq->trig_mode, dest_map);
841  }
842  
__pv_send_ipi(unsigned long * ipi_bitmap,struct kvm_apic_map * map,struct kvm_lapic_irq * irq,u32 min)843  static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
844  			 struct kvm_lapic_irq *irq, u32 min)
845  {
846  	int i, count = 0;
847  	struct kvm_vcpu *vcpu;
848  
849  	if (min > map->max_apic_id)
850  		return 0;
851  
852  	for_each_set_bit(i, ipi_bitmap,
853  		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
854  		if (map->phys_map[min + i]) {
855  			vcpu = map->phys_map[min + i]->vcpu;
856  			count += kvm_apic_set_irq(vcpu, irq, NULL);
857  		}
858  	}
859  
860  	return count;
861  }
862  
kvm_pv_send_ipi(struct kvm * kvm,unsigned long ipi_bitmap_low,unsigned long ipi_bitmap_high,u32 min,unsigned long icr,int op_64_bit)863  int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
864  		    unsigned long ipi_bitmap_high, u32 min,
865  		    unsigned long icr, int op_64_bit)
866  {
867  	struct kvm_apic_map *map;
868  	struct kvm_lapic_irq irq = {0};
869  	int cluster_size = op_64_bit ? 64 : 32;
870  	int count;
871  
872  	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
873  		return -KVM_EINVAL;
874  
875  	irq.vector = icr & APIC_VECTOR_MASK;
876  	irq.delivery_mode = icr & APIC_MODE_MASK;
877  	irq.level = (icr & APIC_INT_ASSERT) != 0;
878  	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
879  
880  	rcu_read_lock();
881  	map = rcu_dereference(kvm->arch.apic_map);
882  
883  	count = -EOPNOTSUPP;
884  	if (likely(map)) {
885  		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
886  		min += cluster_size;
887  		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
888  	}
889  
890  	rcu_read_unlock();
891  	return count;
892  }
893  
pv_eoi_put_user(struct kvm_vcpu * vcpu,u8 val)894  static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
895  {
896  
897  	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
898  				      sizeof(val));
899  }
900  
pv_eoi_get_user(struct kvm_vcpu * vcpu,u8 * val)901  static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
902  {
903  
904  	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
905  				      sizeof(*val));
906  }
907  
pv_eoi_enabled(struct kvm_vcpu * vcpu)908  static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
909  {
910  	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
911  }
912  
pv_eoi_set_pending(struct kvm_vcpu * vcpu)913  static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
914  {
915  	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
916  		return;
917  
918  	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
919  }
920  
pv_eoi_test_and_clr_pending(struct kvm_vcpu * vcpu)921  static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
922  {
923  	u8 val;
924  
925  	if (pv_eoi_get_user(vcpu, &val) < 0)
926  		return false;
927  
928  	val &= KVM_PV_EOI_ENABLED;
929  
930  	if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
931  		return false;
932  
933  	/*
934  	 * Clear pending bit in any case: it will be set again on vmentry.
935  	 * While this might not be ideal from performance point of view,
936  	 * this makes sure pv eoi is only enabled when we know it's safe.
937  	 */
938  	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
939  
940  	return val;
941  }
942  
apic_has_interrupt_for_ppr(struct kvm_lapic * apic,u32 ppr)943  static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
944  {
945  	int highest_irr;
946  	if (kvm_x86_ops.sync_pir_to_irr)
947  		highest_irr = kvm_x86_call(sync_pir_to_irr)(apic->vcpu);
948  	else
949  		highest_irr = apic_find_highest_irr(apic);
950  	if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
951  		return -1;
952  	return highest_irr;
953  }
954  
__apic_update_ppr(struct kvm_lapic * apic,u32 * new_ppr)955  static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
956  {
957  	u32 tpr, isrv, ppr, old_ppr;
958  	int isr;
959  
960  	old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
961  	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
962  	isr = apic_find_highest_isr(apic);
963  	isrv = (isr != -1) ? isr : 0;
964  
965  	if ((tpr & 0xf0) >= (isrv & 0xf0))
966  		ppr = tpr & 0xff;
967  	else
968  		ppr = isrv & 0xf0;
969  
970  	*new_ppr = ppr;
971  	if (old_ppr != ppr)
972  		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
973  
974  	return ppr < old_ppr;
975  }
976  
apic_update_ppr(struct kvm_lapic * apic)977  static void apic_update_ppr(struct kvm_lapic *apic)
978  {
979  	u32 ppr;
980  
981  	if (__apic_update_ppr(apic, &ppr) &&
982  	    apic_has_interrupt_for_ppr(apic, ppr) != -1)
983  		kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
984  }
985  
kvm_apic_update_ppr(struct kvm_vcpu * vcpu)986  void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
987  {
988  	apic_update_ppr(vcpu->arch.apic);
989  }
990  EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
991  
apic_set_tpr(struct kvm_lapic * apic,u32 tpr)992  static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
993  {
994  	kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
995  	apic_update_ppr(apic);
996  }
997  
kvm_apic_broadcast(struct kvm_lapic * apic,u32 mda)998  static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
999  {
1000  	return mda == (apic_x2apic_mode(apic) ?
1001  			X2APIC_BROADCAST : APIC_BROADCAST);
1002  }
1003  
kvm_apic_match_physical_addr(struct kvm_lapic * apic,u32 mda)1004  static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
1005  {
1006  	if (kvm_apic_broadcast(apic, mda))
1007  		return true;
1008  
1009  	/*
1010  	 * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they
1011  	 * were in x2APIC mode if the target APIC ID can't be encoded as an
1012  	 * xAPIC ID.  This allows unique addressing of hotplugged vCPUs (which
1013  	 * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC
1014  	 * mode.  Match the x2APIC ID if and only if the target APIC ID can't
1015  	 * be encoded in xAPIC to avoid spurious matches against a vCPU that
1016  	 * changed its (addressable) xAPIC ID (which is writable).
1017  	 */
1018  	if (apic_x2apic_mode(apic) || mda > 0xff)
1019  		return mda == kvm_x2apic_id(apic);
1020  
1021  	return mda == kvm_xapic_id(apic);
1022  }
1023  
kvm_apic_match_logical_addr(struct kvm_lapic * apic,u32 mda)1024  static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
1025  {
1026  	u32 logical_id;
1027  
1028  	if (kvm_apic_broadcast(apic, mda))
1029  		return true;
1030  
1031  	logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
1032  
1033  	if (apic_x2apic_mode(apic))
1034  		return ((logical_id >> 16) == (mda >> 16))
1035  		       && (logical_id & mda & 0xffff) != 0;
1036  
1037  	logical_id = GET_APIC_LOGICAL_ID(logical_id);
1038  
1039  	switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
1040  	case APIC_DFR_FLAT:
1041  		return (logical_id & mda) != 0;
1042  	case APIC_DFR_CLUSTER:
1043  		return ((logical_id >> 4) == (mda >> 4))
1044  		       && (logical_id & mda & 0xf) != 0;
1045  	default:
1046  		return false;
1047  	}
1048  }
1049  
1050  /* The KVM local APIC implementation has two quirks:
1051   *
1052   *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
1053   *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
1054   *    KVM doesn't do that aliasing.
1055   *
1056   *  - in-kernel IOAPIC messages have to be delivered directly to
1057   *    x2APIC, because the kernel does not support interrupt remapping.
1058   *    In order to support broadcast without interrupt remapping, x2APIC
1059   *    rewrites the destination of non-IPI messages from APIC_BROADCAST
1060   *    to X2APIC_BROADCAST.
1061   *
1062   * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
1063   * important when userspace wants to use x2APIC-format MSIs, because
1064   * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
1065   */
kvm_apic_mda(struct kvm_vcpu * vcpu,unsigned int dest_id,struct kvm_lapic * source,struct kvm_lapic * target)1066  static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
1067  		struct kvm_lapic *source, struct kvm_lapic *target)
1068  {
1069  	bool ipi = source != NULL;
1070  
1071  	if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
1072  	    !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
1073  		return X2APIC_BROADCAST;
1074  
1075  	return dest_id;
1076  }
1077  
kvm_apic_match_dest(struct kvm_vcpu * vcpu,struct kvm_lapic * source,int shorthand,unsigned int dest,int dest_mode)1078  bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
1079  			   int shorthand, unsigned int dest, int dest_mode)
1080  {
1081  	struct kvm_lapic *target = vcpu->arch.apic;
1082  	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
1083  
1084  	ASSERT(target);
1085  	switch (shorthand) {
1086  	case APIC_DEST_NOSHORT:
1087  		if (dest_mode == APIC_DEST_PHYSICAL)
1088  			return kvm_apic_match_physical_addr(target, mda);
1089  		else
1090  			return kvm_apic_match_logical_addr(target, mda);
1091  	case APIC_DEST_SELF:
1092  		return target == source;
1093  	case APIC_DEST_ALLINC:
1094  		return true;
1095  	case APIC_DEST_ALLBUT:
1096  		return target != source;
1097  	default:
1098  		return false;
1099  	}
1100  }
1101  EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
1102  
kvm_vector_to_index(u32 vector,u32 dest_vcpus,const unsigned long * bitmap,u32 bitmap_size)1103  int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
1104  		       const unsigned long *bitmap, u32 bitmap_size)
1105  {
1106  	u32 mod;
1107  	int i, idx = -1;
1108  
1109  	mod = vector % dest_vcpus;
1110  
1111  	for (i = 0; i <= mod; i++) {
1112  		idx = find_next_bit(bitmap, bitmap_size, idx + 1);
1113  		BUG_ON(idx == bitmap_size);
1114  	}
1115  
1116  	return idx;
1117  }
1118  
kvm_apic_disabled_lapic_found(struct kvm * kvm)1119  static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
1120  {
1121  	if (!kvm->arch.disabled_lapic_found) {
1122  		kvm->arch.disabled_lapic_found = true;
1123  		pr_info("Disabled LAPIC found during irq injection\n");
1124  	}
1125  }
1126  
kvm_apic_is_broadcast_dest(struct kvm * kvm,struct kvm_lapic ** src,struct kvm_lapic_irq * irq,struct kvm_apic_map * map)1127  static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
1128  		struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
1129  {
1130  	if (kvm->arch.x2apic_broadcast_quirk_disabled) {
1131  		if ((irq->dest_id == APIC_BROADCAST &&
1132  		     map->logical_mode != KVM_APIC_MODE_X2APIC))
1133  			return true;
1134  		if (irq->dest_id == X2APIC_BROADCAST)
1135  			return true;
1136  	} else {
1137  		bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
1138  		if (irq->dest_id == (x2apic_ipi ?
1139  		                     X2APIC_BROADCAST : APIC_BROADCAST))
1140  			return true;
1141  	}
1142  
1143  	return false;
1144  }
1145  
1146  /* Return true if the interrupt can be handled by using *bitmap as index mask
1147   * for valid destinations in *dst array.
1148   * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
1149   * Note: we may have zero kvm_lapic destinations when we return true, which
1150   * means that the interrupt should be dropped.  In this case, *bitmap would be
1151   * zero and *dst undefined.
1152   */
kvm_apic_map_get_dest_lapic(struct kvm * kvm,struct kvm_lapic ** src,struct kvm_lapic_irq * irq,struct kvm_apic_map * map,struct kvm_lapic *** dst,unsigned long * bitmap)1153  static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
1154  		struct kvm_lapic **src, struct kvm_lapic_irq *irq,
1155  		struct kvm_apic_map *map, struct kvm_lapic ***dst,
1156  		unsigned long *bitmap)
1157  {
1158  	int i, lowest;
1159  
1160  	if (irq->shorthand == APIC_DEST_SELF && src) {
1161  		*dst = src;
1162  		*bitmap = 1;
1163  		return true;
1164  	} else if (irq->shorthand)
1165  		return false;
1166  
1167  	if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
1168  		return false;
1169  
1170  	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
1171  		if (irq->dest_id > map->max_apic_id) {
1172  			*bitmap = 0;
1173  		} else {
1174  			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
1175  			*dst = &map->phys_map[dest_id];
1176  			*bitmap = 1;
1177  		}
1178  		return true;
1179  	}
1180  
1181  	*bitmap = 0;
1182  	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
1183  				(u16 *)bitmap))
1184  		return false;
1185  
1186  	if (!kvm_lowest_prio_delivery(irq))
1187  		return true;
1188  
1189  	if (!kvm_vector_hashing_enabled()) {
1190  		lowest = -1;
1191  		for_each_set_bit(i, bitmap, 16) {
1192  			if (!(*dst)[i])
1193  				continue;
1194  			if (lowest < 0)
1195  				lowest = i;
1196  			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
1197  						(*dst)[lowest]->vcpu) < 0)
1198  				lowest = i;
1199  		}
1200  	} else {
1201  		if (!*bitmap)
1202  			return true;
1203  
1204  		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
1205  				bitmap, 16);
1206  
1207  		if (!(*dst)[lowest]) {
1208  			kvm_apic_disabled_lapic_found(kvm);
1209  			*bitmap = 0;
1210  			return true;
1211  		}
1212  	}
1213  
1214  	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
1215  
1216  	return true;
1217  }
1218  
kvm_irq_delivery_to_apic_fast(struct kvm * kvm,struct kvm_lapic * src,struct kvm_lapic_irq * irq,int * r,struct dest_map * dest_map)1219  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
1220  		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
1221  {
1222  	struct kvm_apic_map *map;
1223  	unsigned long bitmap;
1224  	struct kvm_lapic **dst = NULL;
1225  	int i;
1226  	bool ret;
1227  
1228  	*r = -1;
1229  
1230  	if (irq->shorthand == APIC_DEST_SELF) {
1231  		if (KVM_BUG_ON(!src, kvm)) {
1232  			*r = 0;
1233  			return true;
1234  		}
1235  		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
1236  		return true;
1237  	}
1238  
1239  	rcu_read_lock();
1240  	map = rcu_dereference(kvm->arch.apic_map);
1241  
1242  	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1243  	if (ret) {
1244  		*r = 0;
1245  		for_each_set_bit(i, &bitmap, 16) {
1246  			if (!dst[i])
1247  				continue;
1248  			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1249  		}
1250  	}
1251  
1252  	rcu_read_unlock();
1253  	return ret;
1254  }
1255  
1256  /*
1257   * This routine tries to handle interrupts in posted mode, here is how
1258   * it deals with different cases:
1259   * - For single-destination interrupts, handle it in posted mode
1260   * - Else if vector hashing is enabled and it is a lowest-priority
1261   *   interrupt, handle it in posted mode and use the following mechanism
1262   *   to find the destination vCPU.
1263   *	1. For lowest-priority interrupts, store all the possible
1264   *	   destination vCPUs in an array.
1265   *	2. Use "guest vector % max number of destination vCPUs" to find
1266   *	   the right destination vCPU in the array for the lowest-priority
1267   *	   interrupt.
1268   * - Otherwise, use remapped mode to inject the interrupt.
1269   */
kvm_intr_is_single_vcpu_fast(struct kvm * kvm,struct kvm_lapic_irq * irq,struct kvm_vcpu ** dest_vcpu)1270  bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1271  			struct kvm_vcpu **dest_vcpu)
1272  {
1273  	struct kvm_apic_map *map;
1274  	unsigned long bitmap;
1275  	struct kvm_lapic **dst = NULL;
1276  	bool ret = false;
1277  
1278  	if (irq->shorthand)
1279  		return false;
1280  
1281  	rcu_read_lock();
1282  	map = rcu_dereference(kvm->arch.apic_map);
1283  
1284  	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1285  			hweight16(bitmap) == 1) {
1286  		unsigned long i = find_first_bit(&bitmap, 16);
1287  
1288  		if (dst[i]) {
1289  			*dest_vcpu = dst[i]->vcpu;
1290  			ret = true;
1291  		}
1292  	}
1293  
1294  	rcu_read_unlock();
1295  	return ret;
1296  }
1297  
1298  /*
1299   * Add a pending IRQ into lapic.
1300   * Return 1 if successfully added and 0 if discarded.
1301   */
__apic_accept_irq(struct kvm_lapic * apic,int delivery_mode,int vector,int level,int trig_mode,struct dest_map * dest_map)1302  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1303  			     int vector, int level, int trig_mode,
1304  			     struct dest_map *dest_map)
1305  {
1306  	int result = 0;
1307  	struct kvm_vcpu *vcpu = apic->vcpu;
1308  
1309  	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1310  				  trig_mode, vector);
1311  	switch (delivery_mode) {
1312  	case APIC_DM_LOWEST:
1313  		vcpu->arch.apic_arb_prio++;
1314  		fallthrough;
1315  	case APIC_DM_FIXED:
1316  		if (unlikely(trig_mode && !level))
1317  			break;
1318  
1319  		/* FIXME add logic for vcpu on reset */
1320  		if (unlikely(!apic_enabled(apic)))
1321  			break;
1322  
1323  		result = 1;
1324  
1325  		if (dest_map) {
1326  			__set_bit(vcpu->vcpu_id, dest_map->map);
1327  			dest_map->vectors[vcpu->vcpu_id] = vector;
1328  		}
1329  
1330  		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1331  			if (trig_mode)
1332  				kvm_lapic_set_vector(vector,
1333  						     apic->regs + APIC_TMR);
1334  			else
1335  				kvm_lapic_clear_vector(vector,
1336  						       apic->regs + APIC_TMR);
1337  		}
1338  
1339  		kvm_x86_call(deliver_interrupt)(apic, delivery_mode,
1340  						trig_mode, vector);
1341  		break;
1342  
1343  	case APIC_DM_REMRD:
1344  		result = 1;
1345  		vcpu->arch.pv.pv_unhalted = 1;
1346  		kvm_make_request(KVM_REQ_EVENT, vcpu);
1347  		kvm_vcpu_kick(vcpu);
1348  		break;
1349  
1350  	case APIC_DM_SMI:
1351  		if (!kvm_inject_smi(vcpu)) {
1352  			kvm_vcpu_kick(vcpu);
1353  			result = 1;
1354  		}
1355  		break;
1356  
1357  	case APIC_DM_NMI:
1358  		result = 1;
1359  		kvm_inject_nmi(vcpu);
1360  		kvm_vcpu_kick(vcpu);
1361  		break;
1362  
1363  	case APIC_DM_INIT:
1364  		if (!trig_mode || level) {
1365  			result = 1;
1366  			/* assumes that there are only KVM_APIC_INIT/SIPI */
1367  			apic->pending_events = (1UL << KVM_APIC_INIT);
1368  			kvm_make_request(KVM_REQ_EVENT, vcpu);
1369  			kvm_vcpu_kick(vcpu);
1370  		}
1371  		break;
1372  
1373  	case APIC_DM_STARTUP:
1374  		result = 1;
1375  		apic->sipi_vector = vector;
1376  		/* make sure sipi_vector is visible for the receiver */
1377  		smp_wmb();
1378  		set_bit(KVM_APIC_SIPI, &apic->pending_events);
1379  		kvm_make_request(KVM_REQ_EVENT, vcpu);
1380  		kvm_vcpu_kick(vcpu);
1381  		break;
1382  
1383  	case APIC_DM_EXTINT:
1384  		/*
1385  		 * Should only be called by kvm_apic_local_deliver() with LVT0,
1386  		 * before NMI watchdog was enabled. Already handled by
1387  		 * kvm_apic_accept_pic_intr().
1388  		 */
1389  		break;
1390  
1391  	default:
1392  		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1393  		       delivery_mode);
1394  		break;
1395  	}
1396  	return result;
1397  }
1398  
1399  /*
1400   * This routine identifies the destination vcpus mask meant to receive the
1401   * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1402   * out the destination vcpus array and set the bitmap or it traverses to
1403   * each available vcpu to identify the same.
1404   */
kvm_bitmap_or_dest_vcpus(struct kvm * kvm,struct kvm_lapic_irq * irq,unsigned long * vcpu_bitmap)1405  void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1406  			      unsigned long *vcpu_bitmap)
1407  {
1408  	struct kvm_lapic **dest_vcpu = NULL;
1409  	struct kvm_lapic *src = NULL;
1410  	struct kvm_apic_map *map;
1411  	struct kvm_vcpu *vcpu;
1412  	unsigned long bitmap, i;
1413  	int vcpu_idx;
1414  	bool ret;
1415  
1416  	rcu_read_lock();
1417  	map = rcu_dereference(kvm->arch.apic_map);
1418  
1419  	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1420  					  &bitmap);
1421  	if (ret) {
1422  		for_each_set_bit(i, &bitmap, 16) {
1423  			if (!dest_vcpu[i])
1424  				continue;
1425  			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1426  			__set_bit(vcpu_idx, vcpu_bitmap);
1427  		}
1428  	} else {
1429  		kvm_for_each_vcpu(i, vcpu, kvm) {
1430  			if (!kvm_apic_present(vcpu))
1431  				continue;
1432  			if (!kvm_apic_match_dest(vcpu, NULL,
1433  						 irq->shorthand,
1434  						 irq->dest_id,
1435  						 irq->dest_mode))
1436  				continue;
1437  			__set_bit(i, vcpu_bitmap);
1438  		}
1439  	}
1440  	rcu_read_unlock();
1441  }
1442  
kvm_apic_compare_prio(struct kvm_vcpu * vcpu1,struct kvm_vcpu * vcpu2)1443  int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1444  {
1445  	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1446  }
1447  
kvm_ioapic_handles_vector(struct kvm_lapic * apic,int vector)1448  static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1449  {
1450  	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1451  }
1452  
kvm_ioapic_send_eoi(struct kvm_lapic * apic,int vector)1453  static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1454  {
1455  	int trigger_mode;
1456  
1457  	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
1458  	if (!kvm_ioapic_handles_vector(apic, vector))
1459  		return;
1460  
1461  	/* Request a KVM exit to inform the userspace IOAPIC. */
1462  	if (irqchip_split(apic->vcpu->kvm)) {
1463  		apic->vcpu->arch.pending_ioapic_eoi = vector;
1464  		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1465  		return;
1466  	}
1467  
1468  	if (apic_test_vector(vector, apic->regs + APIC_TMR))
1469  		trigger_mode = IOAPIC_LEVEL_TRIG;
1470  	else
1471  		trigger_mode = IOAPIC_EDGE_TRIG;
1472  
1473  	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1474  }
1475  
apic_set_eoi(struct kvm_lapic * apic)1476  static int apic_set_eoi(struct kvm_lapic *apic)
1477  {
1478  	int vector = apic_find_highest_isr(apic);
1479  
1480  	trace_kvm_eoi(apic, vector);
1481  
1482  	/*
1483  	 * Not every write EOI will has corresponding ISR,
1484  	 * one example is when Kernel check timer on setup_IO_APIC
1485  	 */
1486  	if (vector == -1)
1487  		return vector;
1488  
1489  	apic_clear_isr(vector, apic);
1490  	apic_update_ppr(apic);
1491  
1492  	if (kvm_hv_synic_has_vector(apic->vcpu, vector))
1493  		kvm_hv_synic_send_eoi(apic->vcpu, vector);
1494  
1495  	kvm_ioapic_send_eoi(apic, vector);
1496  	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1497  	return vector;
1498  }
1499  
1500  /*
1501   * this interface assumes a trap-like exit, which has already finished
1502   * desired side effect including vISR and vPPR update.
1503   */
kvm_apic_set_eoi_accelerated(struct kvm_vcpu * vcpu,int vector)1504  void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1505  {
1506  	struct kvm_lapic *apic = vcpu->arch.apic;
1507  
1508  	trace_kvm_eoi(apic, vector);
1509  
1510  	kvm_ioapic_send_eoi(apic, vector);
1511  	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1512  }
1513  EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1514  
kvm_apic_send_ipi(struct kvm_lapic * apic,u32 icr_low,u32 icr_high)1515  void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1516  {
1517  	struct kvm_lapic_irq irq;
1518  
1519  	/* KVM has no delay and should always clear the BUSY/PENDING flag. */
1520  	WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
1521  
1522  	irq.vector = icr_low & APIC_VECTOR_MASK;
1523  	irq.delivery_mode = icr_low & APIC_MODE_MASK;
1524  	irq.dest_mode = icr_low & APIC_DEST_MASK;
1525  	irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1526  	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1527  	irq.shorthand = icr_low & APIC_SHORT_MASK;
1528  	irq.msi_redir_hint = false;
1529  	if (apic_x2apic_mode(apic))
1530  		irq.dest_id = icr_high;
1531  	else
1532  		irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high);
1533  
1534  	trace_kvm_apic_ipi(icr_low, irq.dest_id);
1535  
1536  	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1537  }
1538  EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
1539  
apic_get_tmcct(struct kvm_lapic * apic)1540  static u32 apic_get_tmcct(struct kvm_lapic *apic)
1541  {
1542  	ktime_t remaining, now;
1543  	s64 ns;
1544  
1545  	ASSERT(apic != NULL);
1546  
1547  	/* if initial count is 0, current count should also be 0 */
1548  	if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1549  		apic->lapic_timer.period == 0)
1550  		return 0;
1551  
1552  	now = ktime_get();
1553  	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1554  	if (ktime_to_ns(remaining) < 0)
1555  		remaining = 0;
1556  
1557  	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1558  	return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
1559  			      apic->divide_count));
1560  }
1561  
__report_tpr_access(struct kvm_lapic * apic,bool write)1562  static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1563  {
1564  	struct kvm_vcpu *vcpu = apic->vcpu;
1565  	struct kvm_run *run = vcpu->run;
1566  
1567  	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1568  	run->tpr_access.rip = kvm_rip_read(vcpu);
1569  	run->tpr_access.is_write = write;
1570  }
1571  
report_tpr_access(struct kvm_lapic * apic,bool write)1572  static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1573  {
1574  	if (apic->vcpu->arch.tpr_access_reporting)
1575  		__report_tpr_access(apic, write);
1576  }
1577  
__apic_read(struct kvm_lapic * apic,unsigned int offset)1578  static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1579  {
1580  	u32 val = 0;
1581  
1582  	if (offset >= LAPIC_MMIO_LENGTH)
1583  		return 0;
1584  
1585  	switch (offset) {
1586  	case APIC_ARBPRI:
1587  		break;
1588  
1589  	case APIC_TMCCT:	/* Timer CCR */
1590  		if (apic_lvtt_tscdeadline(apic))
1591  			return 0;
1592  
1593  		val = apic_get_tmcct(apic);
1594  		break;
1595  	case APIC_PROCPRI:
1596  		apic_update_ppr(apic);
1597  		val = kvm_lapic_get_reg(apic, offset);
1598  		break;
1599  	case APIC_TASKPRI:
1600  		report_tpr_access(apic, false);
1601  		fallthrough;
1602  	default:
1603  		val = kvm_lapic_get_reg(apic, offset);
1604  		break;
1605  	}
1606  
1607  	return val;
1608  }
1609  
to_lapic(struct kvm_io_device * dev)1610  static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1611  {
1612  	return container_of(dev, struct kvm_lapic, dev);
1613  }
1614  
1615  #define APIC_REG_MASK(reg)	(1ull << ((reg) >> 4))
1616  #define APIC_REGS_MASK(first, count) \
1617  	(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1618  
kvm_lapic_readable_reg_mask(struct kvm_lapic * apic)1619  u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
1620  {
1621  	/* Leave bits '0' for reserved and write-only registers. */
1622  	u64 valid_reg_mask =
1623  		APIC_REG_MASK(APIC_ID) |
1624  		APIC_REG_MASK(APIC_LVR) |
1625  		APIC_REG_MASK(APIC_TASKPRI) |
1626  		APIC_REG_MASK(APIC_PROCPRI) |
1627  		APIC_REG_MASK(APIC_LDR) |
1628  		APIC_REG_MASK(APIC_SPIV) |
1629  		APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1630  		APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1631  		APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1632  		APIC_REG_MASK(APIC_ESR) |
1633  		APIC_REG_MASK(APIC_ICR) |
1634  		APIC_REG_MASK(APIC_LVTT) |
1635  		APIC_REG_MASK(APIC_LVTTHMR) |
1636  		APIC_REG_MASK(APIC_LVTPC) |
1637  		APIC_REG_MASK(APIC_LVT0) |
1638  		APIC_REG_MASK(APIC_LVT1) |
1639  		APIC_REG_MASK(APIC_LVTERR) |
1640  		APIC_REG_MASK(APIC_TMICT) |
1641  		APIC_REG_MASK(APIC_TMCCT) |
1642  		APIC_REG_MASK(APIC_TDCR);
1643  
1644  	if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
1645  		valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
1646  
1647  	/* ARBPRI, DFR, and ICR2 are not valid in x2APIC mode. */
1648  	if (!apic_x2apic_mode(apic))
1649  		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
1650  				  APIC_REG_MASK(APIC_DFR) |
1651  				  APIC_REG_MASK(APIC_ICR2);
1652  
1653  	return valid_reg_mask;
1654  }
1655  EXPORT_SYMBOL_GPL(kvm_lapic_readable_reg_mask);
1656  
kvm_lapic_reg_read(struct kvm_lapic * apic,u32 offset,int len,void * data)1657  static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1658  			      void *data)
1659  {
1660  	unsigned char alignment = offset & 0xf;
1661  	u32 result;
1662  
1663  	/*
1664  	 * WARN if KVM reads ICR in x2APIC mode, as it's an 8-byte register in
1665  	 * x2APIC and needs to be manually handled by the caller.
1666  	 */
1667  	WARN_ON_ONCE(apic_x2apic_mode(apic) && offset == APIC_ICR);
1668  
1669  	if (alignment + len > 4)
1670  		return 1;
1671  
1672  	if (offset > 0x3f0 ||
1673  	    !(kvm_lapic_readable_reg_mask(apic) & APIC_REG_MASK(offset)))
1674  		return 1;
1675  
1676  	result = __apic_read(apic, offset & ~0xf);
1677  
1678  	trace_kvm_apic_read(offset, result);
1679  
1680  	switch (len) {
1681  	case 1:
1682  	case 2:
1683  	case 4:
1684  		memcpy(data, (char *)&result + alignment, len);
1685  		break;
1686  	default:
1687  		printk(KERN_ERR "Local APIC read with len = %x, "
1688  		       "should be 1,2, or 4 instead\n", len);
1689  		break;
1690  	}
1691  	return 0;
1692  }
1693  
apic_mmio_in_range(struct kvm_lapic * apic,gpa_t addr)1694  static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1695  {
1696  	return addr >= apic->base_address &&
1697  		addr < apic->base_address + LAPIC_MMIO_LENGTH;
1698  }
1699  
apic_mmio_read(struct kvm_vcpu * vcpu,struct kvm_io_device * this,gpa_t address,int len,void * data)1700  static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1701  			   gpa_t address, int len, void *data)
1702  {
1703  	struct kvm_lapic *apic = to_lapic(this);
1704  	u32 offset = address - apic->base_address;
1705  
1706  	if (!apic_mmio_in_range(apic, address))
1707  		return -EOPNOTSUPP;
1708  
1709  	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1710  		if (!kvm_check_has_quirk(vcpu->kvm,
1711  					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1712  			return -EOPNOTSUPP;
1713  
1714  		memset(data, 0xff, len);
1715  		return 0;
1716  	}
1717  
1718  	kvm_lapic_reg_read(apic, offset, len, data);
1719  
1720  	return 0;
1721  }
1722  
update_divide_count(struct kvm_lapic * apic)1723  static void update_divide_count(struct kvm_lapic *apic)
1724  {
1725  	u32 tmp1, tmp2, tdcr;
1726  
1727  	tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1728  	tmp1 = tdcr & 0xf;
1729  	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1730  	apic->divide_count = 0x1 << (tmp2 & 0x7);
1731  }
1732  
limit_periodic_timer_frequency(struct kvm_lapic * apic)1733  static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1734  {
1735  	/*
1736  	 * Do not allow the guest to program periodic timers with small
1737  	 * interval, since the hrtimers are not throttled by the host
1738  	 * scheduler.
1739  	 */
1740  	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1741  		s64 min_period = min_timer_period_us * 1000LL;
1742  
1743  		if (apic->lapic_timer.period < min_period) {
1744  			pr_info_once(
1745  			    "vcpu %i: requested %lld ns "
1746  			    "lapic timer period limited to %lld ns\n",
1747  			    apic->vcpu->vcpu_id,
1748  			    apic->lapic_timer.period, min_period);
1749  			apic->lapic_timer.period = min_period;
1750  		}
1751  	}
1752  }
1753  
1754  static void cancel_hv_timer(struct kvm_lapic *apic);
1755  
cancel_apic_timer(struct kvm_lapic * apic)1756  static void cancel_apic_timer(struct kvm_lapic *apic)
1757  {
1758  	hrtimer_cancel(&apic->lapic_timer.timer);
1759  	preempt_disable();
1760  	if (apic->lapic_timer.hv_timer_in_use)
1761  		cancel_hv_timer(apic);
1762  	preempt_enable();
1763  	atomic_set(&apic->lapic_timer.pending, 0);
1764  }
1765  
apic_update_lvtt(struct kvm_lapic * apic)1766  static void apic_update_lvtt(struct kvm_lapic *apic)
1767  {
1768  	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1769  			apic->lapic_timer.timer_mode_mask;
1770  
1771  	if (apic->lapic_timer.timer_mode != timer_mode) {
1772  		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1773  				APIC_LVT_TIMER_TSCDEADLINE)) {
1774  			cancel_apic_timer(apic);
1775  			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1776  			apic->lapic_timer.period = 0;
1777  			apic->lapic_timer.tscdeadline = 0;
1778  		}
1779  		apic->lapic_timer.timer_mode = timer_mode;
1780  		limit_periodic_timer_frequency(apic);
1781  	}
1782  }
1783  
1784  /*
1785   * On APICv, this test will cause a busy wait
1786   * during a higher-priority task.
1787   */
1788  
lapic_timer_int_injected(struct kvm_vcpu * vcpu)1789  static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1790  {
1791  	struct kvm_lapic *apic = vcpu->arch.apic;
1792  	u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1793  
1794  	if (kvm_apic_hw_enabled(apic)) {
1795  		int vec = reg & APIC_VECTOR_MASK;
1796  		void *bitmap = apic->regs + APIC_ISR;
1797  
1798  		if (apic->apicv_active)
1799  			bitmap = apic->regs + APIC_IRR;
1800  
1801  		if (apic_test_vector(vec, bitmap))
1802  			return true;
1803  	}
1804  	return false;
1805  }
1806  
__wait_lapic_expire(struct kvm_vcpu * vcpu,u64 guest_cycles)1807  static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1808  {
1809  	u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1810  
1811  	/*
1812  	 * If the guest TSC is running at a different ratio than the host, then
1813  	 * convert the delay to nanoseconds to achieve an accurate delay.  Note
1814  	 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1815  	 * always for VMX enabled hardware.
1816  	 */
1817  	if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
1818  		__delay(min(guest_cycles,
1819  			nsec_to_cycles(vcpu, timer_advance_ns)));
1820  	} else {
1821  		u64 delay_ns = guest_cycles * 1000000ULL;
1822  		do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1823  		ndelay(min_t(u32, delay_ns, timer_advance_ns));
1824  	}
1825  }
1826  
adjust_lapic_timer_advance(struct kvm_vcpu * vcpu,s64 advance_expire_delta)1827  static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1828  					      s64 advance_expire_delta)
1829  {
1830  	struct kvm_lapic *apic = vcpu->arch.apic;
1831  	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1832  	u64 ns;
1833  
1834  	/* Do not adjust for tiny fluctuations or large random spikes. */
1835  	if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1836  	    abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1837  		return;
1838  
1839  	/* too early */
1840  	if (advance_expire_delta < 0) {
1841  		ns = -advance_expire_delta * 1000000ULL;
1842  		do_div(ns, vcpu->arch.virtual_tsc_khz);
1843  		timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1844  	} else {
1845  	/* too late */
1846  		ns = advance_expire_delta * 1000000ULL;
1847  		do_div(ns, vcpu->arch.virtual_tsc_khz);
1848  		timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1849  	}
1850  
1851  	if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1852  		timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1853  	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1854  }
1855  
__kvm_wait_lapic_expire(struct kvm_vcpu * vcpu)1856  static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1857  {
1858  	struct kvm_lapic *apic = vcpu->arch.apic;
1859  	u64 guest_tsc, tsc_deadline;
1860  
1861  	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1862  	apic->lapic_timer.expired_tscdeadline = 0;
1863  	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1864  	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1865  
1866  	adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1867  
1868  	/*
1869  	 * If the timer fired early, reread the TSC to account for the overhead
1870  	 * of the above adjustment to avoid waiting longer than is necessary.
1871  	 */
1872  	if (guest_tsc < tsc_deadline)
1873  		guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1874  
1875  	if (guest_tsc < tsc_deadline)
1876  		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1877  }
1878  
kvm_wait_lapic_expire(struct kvm_vcpu * vcpu)1879  void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1880  {
1881  	if (lapic_in_kernel(vcpu) &&
1882  	    vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1883  	    vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1884  	    lapic_timer_int_injected(vcpu))
1885  		__kvm_wait_lapic_expire(vcpu);
1886  }
1887  EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1888  
kvm_apic_inject_pending_timer_irqs(struct kvm_lapic * apic)1889  static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1890  {
1891  	struct kvm_timer *ktimer = &apic->lapic_timer;
1892  
1893  	kvm_apic_local_deliver(apic, APIC_LVTT);
1894  	if (apic_lvtt_tscdeadline(apic)) {
1895  		ktimer->tscdeadline = 0;
1896  	} else if (apic_lvtt_oneshot(apic)) {
1897  		ktimer->tscdeadline = 0;
1898  		ktimer->target_expiration = 0;
1899  	}
1900  }
1901  
apic_timer_expired(struct kvm_lapic * apic,bool from_timer_fn)1902  static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1903  {
1904  	struct kvm_vcpu *vcpu = apic->vcpu;
1905  	struct kvm_timer *ktimer = &apic->lapic_timer;
1906  
1907  	if (atomic_read(&apic->lapic_timer.pending))
1908  		return;
1909  
1910  	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1911  		ktimer->expired_tscdeadline = ktimer->tscdeadline;
1912  
1913  	if (!from_timer_fn && apic->apicv_active) {
1914  		WARN_ON(kvm_get_running_vcpu() != vcpu);
1915  		kvm_apic_inject_pending_timer_irqs(apic);
1916  		return;
1917  	}
1918  
1919  	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1920  		/*
1921  		 * Ensure the guest's timer has truly expired before posting an
1922  		 * interrupt.  Open code the relevant checks to avoid querying
1923  		 * lapic_timer_int_injected(), which will be false since the
1924  		 * interrupt isn't yet injected.  Waiting until after injecting
1925  		 * is not an option since that won't help a posted interrupt.
1926  		 */
1927  		if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1928  		    vcpu->arch.apic->lapic_timer.timer_advance_ns)
1929  			__kvm_wait_lapic_expire(vcpu);
1930  		kvm_apic_inject_pending_timer_irqs(apic);
1931  		return;
1932  	}
1933  
1934  	atomic_inc(&apic->lapic_timer.pending);
1935  	kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
1936  	if (from_timer_fn)
1937  		kvm_vcpu_kick(vcpu);
1938  }
1939  
start_sw_tscdeadline(struct kvm_lapic * apic)1940  static void start_sw_tscdeadline(struct kvm_lapic *apic)
1941  {
1942  	struct kvm_timer *ktimer = &apic->lapic_timer;
1943  	u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1944  	u64 ns = 0;
1945  	ktime_t expire;
1946  	struct kvm_vcpu *vcpu = apic->vcpu;
1947  	u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1948  	unsigned long flags;
1949  	ktime_t now;
1950  
1951  	if (unlikely(!tscdeadline || !this_tsc_khz))
1952  		return;
1953  
1954  	local_irq_save(flags);
1955  
1956  	now = ktime_get();
1957  	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1958  
1959  	ns = (tscdeadline - guest_tsc) * 1000000ULL;
1960  	do_div(ns, this_tsc_khz);
1961  
1962  	if (likely(tscdeadline > guest_tsc) &&
1963  	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
1964  		expire = ktime_add_ns(now, ns);
1965  		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1966  		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1967  	} else
1968  		apic_timer_expired(apic, false);
1969  
1970  	local_irq_restore(flags);
1971  }
1972  
tmict_to_ns(struct kvm_lapic * apic,u32 tmict)1973  static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1974  {
1975  	return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
1976  		(u64)apic->divide_count;
1977  }
1978  
update_target_expiration(struct kvm_lapic * apic,uint32_t old_divisor)1979  static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1980  {
1981  	ktime_t now, remaining;
1982  	u64 ns_remaining_old, ns_remaining_new;
1983  
1984  	apic->lapic_timer.period =
1985  			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1986  	limit_periodic_timer_frequency(apic);
1987  
1988  	now = ktime_get();
1989  	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1990  	if (ktime_to_ns(remaining) < 0)
1991  		remaining = 0;
1992  
1993  	ns_remaining_old = ktime_to_ns(remaining);
1994  	ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1995  	                                   apic->divide_count, old_divisor);
1996  
1997  	apic->lapic_timer.tscdeadline +=
1998  		nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1999  		nsec_to_cycles(apic->vcpu, ns_remaining_old);
2000  	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
2001  }
2002  
set_target_expiration(struct kvm_lapic * apic,u32 count_reg)2003  static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
2004  {
2005  	ktime_t now;
2006  	u64 tscl = rdtsc();
2007  	s64 deadline;
2008  
2009  	now = ktime_get();
2010  	apic->lapic_timer.period =
2011  			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
2012  
2013  	if (!apic->lapic_timer.period) {
2014  		apic->lapic_timer.tscdeadline = 0;
2015  		return false;
2016  	}
2017  
2018  	limit_periodic_timer_frequency(apic);
2019  	deadline = apic->lapic_timer.period;
2020  
2021  	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
2022  		if (unlikely(count_reg != APIC_TMICT)) {
2023  			deadline = tmict_to_ns(apic,
2024  				     kvm_lapic_get_reg(apic, count_reg));
2025  			if (unlikely(deadline <= 0)) {
2026  				if (apic_lvtt_period(apic))
2027  					deadline = apic->lapic_timer.period;
2028  				else
2029  					deadline = 0;
2030  			}
2031  			else if (unlikely(deadline > apic->lapic_timer.period)) {
2032  				pr_info_ratelimited(
2033  				    "vcpu %i: requested lapic timer restore with "
2034  				    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
2035  				    "Using initial count to start timer.\n",
2036  				    apic->vcpu->vcpu_id,
2037  				    count_reg,
2038  				    kvm_lapic_get_reg(apic, count_reg),
2039  				    deadline, apic->lapic_timer.period);
2040  				kvm_lapic_set_reg(apic, count_reg, 0);
2041  				deadline = apic->lapic_timer.period;
2042  			}
2043  		}
2044  	}
2045  
2046  	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
2047  		nsec_to_cycles(apic->vcpu, deadline);
2048  	apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
2049  
2050  	return true;
2051  }
2052  
advance_periodic_target_expiration(struct kvm_lapic * apic)2053  static void advance_periodic_target_expiration(struct kvm_lapic *apic)
2054  {
2055  	ktime_t now = ktime_get();
2056  	u64 tscl = rdtsc();
2057  	ktime_t delta;
2058  
2059  	/*
2060  	 * Synchronize both deadlines to the same time source or
2061  	 * differences in the periods (caused by differences in the
2062  	 * underlying clocks or numerical approximation errors) will
2063  	 * cause the two to drift apart over time as the errors
2064  	 * accumulate.
2065  	 */
2066  	apic->lapic_timer.target_expiration =
2067  		ktime_add_ns(apic->lapic_timer.target_expiration,
2068  				apic->lapic_timer.period);
2069  	delta = ktime_sub(apic->lapic_timer.target_expiration, now);
2070  	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
2071  		nsec_to_cycles(apic->vcpu, delta);
2072  }
2073  
start_sw_period(struct kvm_lapic * apic)2074  static void start_sw_period(struct kvm_lapic *apic)
2075  {
2076  	if (!apic->lapic_timer.period)
2077  		return;
2078  
2079  	if (ktime_after(ktime_get(),
2080  			apic->lapic_timer.target_expiration)) {
2081  		apic_timer_expired(apic, false);
2082  
2083  		if (apic_lvtt_oneshot(apic))
2084  			return;
2085  
2086  		advance_periodic_target_expiration(apic);
2087  	}
2088  
2089  	hrtimer_start(&apic->lapic_timer.timer,
2090  		apic->lapic_timer.target_expiration,
2091  		HRTIMER_MODE_ABS_HARD);
2092  }
2093  
kvm_lapic_hv_timer_in_use(struct kvm_vcpu * vcpu)2094  bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
2095  {
2096  	if (!lapic_in_kernel(vcpu))
2097  		return false;
2098  
2099  	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
2100  }
2101  
cancel_hv_timer(struct kvm_lapic * apic)2102  static void cancel_hv_timer(struct kvm_lapic *apic)
2103  {
2104  	WARN_ON(preemptible());
2105  	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2106  	kvm_x86_call(cancel_hv_timer)(apic->vcpu);
2107  	apic->lapic_timer.hv_timer_in_use = false;
2108  }
2109  
start_hv_timer(struct kvm_lapic * apic)2110  static bool start_hv_timer(struct kvm_lapic *apic)
2111  {
2112  	struct kvm_timer *ktimer = &apic->lapic_timer;
2113  	struct kvm_vcpu *vcpu = apic->vcpu;
2114  	bool expired;
2115  
2116  	WARN_ON(preemptible());
2117  	if (!kvm_can_use_hv_timer(vcpu))
2118  		return false;
2119  
2120  	if (!ktimer->tscdeadline)
2121  		return false;
2122  
2123  	if (kvm_x86_call(set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
2124  		return false;
2125  
2126  	ktimer->hv_timer_in_use = true;
2127  	hrtimer_cancel(&ktimer->timer);
2128  
2129  	/*
2130  	 * To simplify handling the periodic timer, leave the hv timer running
2131  	 * even if the deadline timer has expired, i.e. rely on the resulting
2132  	 * VM-Exit to recompute the periodic timer's target expiration.
2133  	 */
2134  	if (!apic_lvtt_period(apic)) {
2135  		/*
2136  		 * Cancel the hv timer if the sw timer fired while the hv timer
2137  		 * was being programmed, or if the hv timer itself expired.
2138  		 */
2139  		if (atomic_read(&ktimer->pending)) {
2140  			cancel_hv_timer(apic);
2141  		} else if (expired) {
2142  			apic_timer_expired(apic, false);
2143  			cancel_hv_timer(apic);
2144  		}
2145  	}
2146  
2147  	trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
2148  
2149  	return true;
2150  }
2151  
start_sw_timer(struct kvm_lapic * apic)2152  static void start_sw_timer(struct kvm_lapic *apic)
2153  {
2154  	struct kvm_timer *ktimer = &apic->lapic_timer;
2155  
2156  	WARN_ON(preemptible());
2157  	if (apic->lapic_timer.hv_timer_in_use)
2158  		cancel_hv_timer(apic);
2159  	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
2160  		return;
2161  
2162  	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2163  		start_sw_period(apic);
2164  	else if (apic_lvtt_tscdeadline(apic))
2165  		start_sw_tscdeadline(apic);
2166  	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
2167  }
2168  
restart_apic_timer(struct kvm_lapic * apic)2169  static void restart_apic_timer(struct kvm_lapic *apic)
2170  {
2171  	preempt_disable();
2172  
2173  	if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
2174  		goto out;
2175  
2176  	if (!start_hv_timer(apic))
2177  		start_sw_timer(apic);
2178  out:
2179  	preempt_enable();
2180  }
2181  
kvm_lapic_expired_hv_timer(struct kvm_vcpu * vcpu)2182  void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
2183  {
2184  	struct kvm_lapic *apic = vcpu->arch.apic;
2185  
2186  	preempt_disable();
2187  	/* If the preempt notifier has already run, it also called apic_timer_expired */
2188  	if (!apic->lapic_timer.hv_timer_in_use)
2189  		goto out;
2190  	WARN_ON(kvm_vcpu_is_blocking(vcpu));
2191  	apic_timer_expired(apic, false);
2192  	cancel_hv_timer(apic);
2193  
2194  	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
2195  		advance_periodic_target_expiration(apic);
2196  		restart_apic_timer(apic);
2197  	}
2198  out:
2199  	preempt_enable();
2200  }
2201  EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
2202  
kvm_lapic_switch_to_hv_timer(struct kvm_vcpu * vcpu)2203  void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
2204  {
2205  	restart_apic_timer(vcpu->arch.apic);
2206  }
2207  
kvm_lapic_switch_to_sw_timer(struct kvm_vcpu * vcpu)2208  void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
2209  {
2210  	struct kvm_lapic *apic = vcpu->arch.apic;
2211  
2212  	preempt_disable();
2213  	/* Possibly the TSC deadline timer is not enabled yet */
2214  	if (apic->lapic_timer.hv_timer_in_use)
2215  		start_sw_timer(apic);
2216  	preempt_enable();
2217  }
2218  
kvm_lapic_restart_hv_timer(struct kvm_vcpu * vcpu)2219  void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
2220  {
2221  	struct kvm_lapic *apic = vcpu->arch.apic;
2222  
2223  	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2224  	restart_apic_timer(apic);
2225  }
2226  
__start_apic_timer(struct kvm_lapic * apic,u32 count_reg)2227  static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
2228  {
2229  	atomic_set(&apic->lapic_timer.pending, 0);
2230  
2231  	if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2232  	    && !set_target_expiration(apic, count_reg))
2233  		return;
2234  
2235  	restart_apic_timer(apic);
2236  }
2237  
start_apic_timer(struct kvm_lapic * apic)2238  static void start_apic_timer(struct kvm_lapic *apic)
2239  {
2240  	__start_apic_timer(apic, APIC_TMICT);
2241  }
2242  
apic_manage_nmi_watchdog(struct kvm_lapic * apic,u32 lvt0_val)2243  static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
2244  {
2245  	bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
2246  
2247  	if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
2248  		apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
2249  		if (lvt0_in_nmi_mode) {
2250  			atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2251  		} else
2252  			atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2253  	}
2254  }
2255  
get_lvt_index(u32 reg)2256  static int get_lvt_index(u32 reg)
2257  {
2258  	if (reg == APIC_LVTCMCI)
2259  		return LVT_CMCI;
2260  	if (reg < APIC_LVTT || reg > APIC_LVTERR)
2261  		return -1;
2262  	return array_index_nospec(
2263  			(reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES);
2264  }
2265  
kvm_lapic_reg_write(struct kvm_lapic * apic,u32 reg,u32 val)2266  static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2267  {
2268  	int ret = 0;
2269  
2270  	trace_kvm_apic_write(reg, val);
2271  
2272  	switch (reg) {
2273  	case APIC_ID:		/* Local APIC ID */
2274  		if (!apic_x2apic_mode(apic)) {
2275  			kvm_apic_set_xapic_id(apic, val >> 24);
2276  		} else {
2277  			ret = 1;
2278  		}
2279  		break;
2280  
2281  	case APIC_TASKPRI:
2282  		report_tpr_access(apic, true);
2283  		apic_set_tpr(apic, val & 0xff);
2284  		break;
2285  
2286  	case APIC_EOI:
2287  		apic_set_eoi(apic);
2288  		break;
2289  
2290  	case APIC_LDR:
2291  		if (!apic_x2apic_mode(apic))
2292  			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2293  		else
2294  			ret = 1;
2295  		break;
2296  
2297  	case APIC_DFR:
2298  		if (!apic_x2apic_mode(apic))
2299  			kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2300  		else
2301  			ret = 1;
2302  		break;
2303  
2304  	case APIC_SPIV: {
2305  		u32 mask = 0x3ff;
2306  		if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2307  			mask |= APIC_SPIV_DIRECTED_EOI;
2308  		apic_set_spiv(apic, val & mask);
2309  		if (!(val & APIC_SPIV_APIC_ENABLED)) {
2310  			int i;
2311  
2312  			for (i = 0; i < apic->nr_lvt_entries; i++) {
2313  				kvm_lapic_set_reg(apic, APIC_LVTx(i),
2314  					kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED);
2315  			}
2316  			apic_update_lvtt(apic);
2317  			atomic_set(&apic->lapic_timer.pending, 0);
2318  
2319  		}
2320  		break;
2321  	}
2322  	case APIC_ICR:
2323  		WARN_ON_ONCE(apic_x2apic_mode(apic));
2324  
2325  		/* No delay here, so we always clear the pending bit */
2326  		val &= ~APIC_ICR_BUSY;
2327  		kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2328  		kvm_lapic_set_reg(apic, APIC_ICR, val);
2329  		break;
2330  	case APIC_ICR2:
2331  		if (apic_x2apic_mode(apic))
2332  			ret = 1;
2333  		else
2334  			kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
2335  		break;
2336  
2337  	case APIC_LVT0:
2338  		apic_manage_nmi_watchdog(apic, val);
2339  		fallthrough;
2340  	case APIC_LVTTHMR:
2341  	case APIC_LVTPC:
2342  	case APIC_LVT1:
2343  	case APIC_LVTERR:
2344  	case APIC_LVTCMCI: {
2345  		u32 index = get_lvt_index(reg);
2346  		if (!kvm_lapic_lvt_supported(apic, index)) {
2347  			ret = 1;
2348  			break;
2349  		}
2350  		if (!kvm_apic_sw_enabled(apic))
2351  			val |= APIC_LVT_MASKED;
2352  		val &= apic_lvt_mask[index];
2353  		kvm_lapic_set_reg(apic, reg, val);
2354  		break;
2355  	}
2356  
2357  	case APIC_LVTT:
2358  		if (!kvm_apic_sw_enabled(apic))
2359  			val |= APIC_LVT_MASKED;
2360  		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2361  		kvm_lapic_set_reg(apic, APIC_LVTT, val);
2362  		apic_update_lvtt(apic);
2363  		break;
2364  
2365  	case APIC_TMICT:
2366  		if (apic_lvtt_tscdeadline(apic))
2367  			break;
2368  
2369  		cancel_apic_timer(apic);
2370  		kvm_lapic_set_reg(apic, APIC_TMICT, val);
2371  		start_apic_timer(apic);
2372  		break;
2373  
2374  	case APIC_TDCR: {
2375  		uint32_t old_divisor = apic->divide_count;
2376  
2377  		kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2378  		update_divide_count(apic);
2379  		if (apic->divide_count != old_divisor &&
2380  				apic->lapic_timer.period) {
2381  			hrtimer_cancel(&apic->lapic_timer.timer);
2382  			update_target_expiration(apic, old_divisor);
2383  			restart_apic_timer(apic);
2384  		}
2385  		break;
2386  	}
2387  	case APIC_ESR:
2388  		if (apic_x2apic_mode(apic) && val != 0)
2389  			ret = 1;
2390  		break;
2391  
2392  	case APIC_SELF_IPI:
2393  		/*
2394  		 * Self-IPI exists only when x2APIC is enabled.  Bits 7:0 hold
2395  		 * the vector, everything else is reserved.
2396  		 */
2397  		if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
2398  			ret = 1;
2399  		else
2400  			kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
2401  		break;
2402  	default:
2403  		ret = 1;
2404  		break;
2405  	}
2406  
2407  	/*
2408  	 * Recalculate APIC maps if necessary, e.g. if the software enable bit
2409  	 * was toggled, the APIC ID changed, etc...   The maps are marked dirty
2410  	 * on relevant changes, i.e. this is a nop for most writes.
2411  	 */
2412  	kvm_recalculate_apic_map(apic->vcpu->kvm);
2413  
2414  	return ret;
2415  }
2416  
apic_mmio_write(struct kvm_vcpu * vcpu,struct kvm_io_device * this,gpa_t address,int len,const void * data)2417  static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2418  			    gpa_t address, int len, const void *data)
2419  {
2420  	struct kvm_lapic *apic = to_lapic(this);
2421  	unsigned int offset = address - apic->base_address;
2422  	u32 val;
2423  
2424  	if (!apic_mmio_in_range(apic, address))
2425  		return -EOPNOTSUPP;
2426  
2427  	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2428  		if (!kvm_check_has_quirk(vcpu->kvm,
2429  					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2430  			return -EOPNOTSUPP;
2431  
2432  		return 0;
2433  	}
2434  
2435  	/*
2436  	 * APIC register must be aligned on 128-bits boundary.
2437  	 * 32/64/128 bits registers must be accessed thru 32 bits.
2438  	 * Refer SDM 8.4.1
2439  	 */
2440  	if (len != 4 || (offset & 0xf))
2441  		return 0;
2442  
2443  	val = *(u32*)data;
2444  
2445  	kvm_lapic_reg_write(apic, offset & 0xff0, val);
2446  
2447  	return 0;
2448  }
2449  
kvm_lapic_set_eoi(struct kvm_vcpu * vcpu)2450  void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2451  {
2452  	kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2453  }
2454  EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2455  
2456  #define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
2457  
kvm_x2apic_icr_write(struct kvm_lapic * apic,u64 data)2458  int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
2459  {
2460  	if (data & X2APIC_ICR_RESERVED_BITS)
2461  		return 1;
2462  
2463  	/*
2464  	 * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but
2465  	 * only AMD requires it to be zero, Intel essentially just ignores the
2466  	 * bit.  And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled,
2467  	 * the CPU performs the reserved bits checks, i.e. the underlying CPU
2468  	 * behavior will "win".  Arbitrarily clear the BUSY bit, as there is no
2469  	 * sane way to provide consistent behavior with respect to hardware.
2470  	 */
2471  	data &= ~APIC_ICR_BUSY;
2472  
2473  	kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
2474  	if (kvm_x86_ops.x2apic_icr_is_split) {
2475  		kvm_lapic_set_reg(apic, APIC_ICR, data);
2476  		kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
2477  	} else {
2478  		kvm_lapic_set_reg64(apic, APIC_ICR, data);
2479  	}
2480  	trace_kvm_apic_write(APIC_ICR, data);
2481  	return 0;
2482  }
2483  
kvm_x2apic_icr_read(struct kvm_lapic * apic)2484  static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
2485  {
2486  	if (kvm_x86_ops.x2apic_icr_is_split)
2487  		return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
2488  		       (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
2489  
2490  	return kvm_lapic_get_reg64(apic, APIC_ICR);
2491  }
2492  
2493  /* emulate APIC access in a trap manner */
kvm_apic_write_nodecode(struct kvm_vcpu * vcpu,u32 offset)2494  void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2495  {
2496  	struct kvm_lapic *apic = vcpu->arch.apic;
2497  
2498  	/*
2499  	 * ICR is a single 64-bit register when x2APIC is enabled, all others
2500  	 * registers hold 32-bit values.  For legacy xAPIC, ICR writes need to
2501  	 * go down the common path to get the upper half from ICR2.
2502  	 *
2503  	 * Note, using the write helpers may incur an unnecessary write to the
2504  	 * virtual APIC state, but KVM needs to conditionally modify the value
2505  	 * in certain cases, e.g. to clear the ICR busy bit.  The cost of extra
2506  	 * conditional branches is likely a wash relative to the cost of the
2507  	 * maybe-unecessary write, and both are in the noise anyways.
2508  	 */
2509  	if (apic_x2apic_mode(apic) && offset == APIC_ICR)
2510  		WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
2511  	else
2512  		kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
2513  }
2514  EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2515  
kvm_free_lapic(struct kvm_vcpu * vcpu)2516  void kvm_free_lapic(struct kvm_vcpu *vcpu)
2517  {
2518  	struct kvm_lapic *apic = vcpu->arch.apic;
2519  
2520  	if (!vcpu->arch.apic) {
2521  		static_branch_dec(&kvm_has_noapic_vcpu);
2522  		return;
2523  	}
2524  
2525  	hrtimer_cancel(&apic->lapic_timer.timer);
2526  
2527  	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2528  		static_branch_slow_dec_deferred(&apic_hw_disabled);
2529  
2530  	if (!apic->sw_enabled)
2531  		static_branch_slow_dec_deferred(&apic_sw_disabled);
2532  
2533  	if (apic->regs)
2534  		free_page((unsigned long)apic->regs);
2535  
2536  	kfree(apic);
2537  }
2538  
2539  /*
2540   *----------------------------------------------------------------------
2541   * LAPIC interface
2542   *----------------------------------------------------------------------
2543   */
kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu * vcpu)2544  u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2545  {
2546  	struct kvm_lapic *apic = vcpu->arch.apic;
2547  
2548  	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2549  		return 0;
2550  
2551  	return apic->lapic_timer.tscdeadline;
2552  }
2553  
kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu * vcpu,u64 data)2554  void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2555  {
2556  	struct kvm_lapic *apic = vcpu->arch.apic;
2557  
2558  	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2559  		return;
2560  
2561  	hrtimer_cancel(&apic->lapic_timer.timer);
2562  	apic->lapic_timer.tscdeadline = data;
2563  	start_apic_timer(apic);
2564  }
2565  
kvm_lapic_set_tpr(struct kvm_vcpu * vcpu,unsigned long cr8)2566  void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2567  {
2568  	apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2569  }
2570  
kvm_lapic_get_cr8(struct kvm_vcpu * vcpu)2571  u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2572  {
2573  	u64 tpr;
2574  
2575  	tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2576  
2577  	return (tpr & 0xf0) >> 4;
2578  }
2579  
kvm_lapic_set_base(struct kvm_vcpu * vcpu,u64 value)2580  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2581  {
2582  	u64 old_value = vcpu->arch.apic_base;
2583  	struct kvm_lapic *apic = vcpu->arch.apic;
2584  
2585  	vcpu->arch.apic_base = value;
2586  
2587  	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2588  		kvm_update_cpuid_runtime(vcpu);
2589  
2590  	if (!apic)
2591  		return;
2592  
2593  	/* update jump label if enable bit changes */
2594  	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2595  		if (value & MSR_IA32_APICBASE_ENABLE) {
2596  			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2597  			static_branch_slow_dec_deferred(&apic_hw_disabled);
2598  			/* Check if there are APF page ready requests pending */
2599  			kvm_make_request(KVM_REQ_APF_READY, vcpu);
2600  		} else {
2601  			static_branch_inc(&apic_hw_disabled.key);
2602  			atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2603  		}
2604  	}
2605  
2606  	if ((old_value ^ value) & X2APIC_ENABLE) {
2607  		if (value & X2APIC_ENABLE)
2608  			kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2609  		else if (value & MSR_IA32_APICBASE_ENABLE)
2610  			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2611  	}
2612  
2613  	if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
2614  		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
2615  		kvm_x86_call(set_virtual_apic_mode)(vcpu);
2616  	}
2617  
2618  	apic->base_address = apic->vcpu->arch.apic_base &
2619  			     MSR_IA32_APICBASE_BASE;
2620  
2621  	if ((value & MSR_IA32_APICBASE_ENABLE) &&
2622  	     apic->base_address != APIC_DEFAULT_PHYS_BASE) {
2623  		kvm_set_apicv_inhibit(apic->vcpu->kvm,
2624  				      APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
2625  	}
2626  }
2627  
kvm_apic_update_apicv(struct kvm_vcpu * vcpu)2628  void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2629  {
2630  	struct kvm_lapic *apic = vcpu->arch.apic;
2631  
2632  	/*
2633  	 * When APICv is enabled, KVM must always search the IRR for a pending
2634  	 * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
2635  	 * isn't running.  If APICv is disabled, KVM _should_ search the IRR
2636  	 * for a pending IRQ.  But KVM currently doesn't ensure *all* hardware,
2637  	 * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
2638  	 * the IRR at this time could race with IRQ delivery from hardware that
2639  	 * still sees APICv as being enabled.
2640  	 *
2641  	 * FIXME: Ensure other vCPUs and devices observe the change in APICv
2642  	 *        state prior to updating KVM's metadata caches, so that KVM
2643  	 *        can safely search the IRR and set irr_pending accordingly.
2644  	 */
2645  	apic->irr_pending = true;
2646  
2647  	if (apic->apicv_active)
2648  		apic->isr_count = 1;
2649  	else
2650  		apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2651  
2652  	apic->highest_isr_cache = -1;
2653  }
2654  
kvm_alloc_apic_access_page(struct kvm * kvm)2655  int kvm_alloc_apic_access_page(struct kvm *kvm)
2656  {
2657  	struct page *page;
2658  	void __user *hva;
2659  	int ret = 0;
2660  
2661  	mutex_lock(&kvm->slots_lock);
2662  	if (kvm->arch.apic_access_memslot_enabled ||
2663  	    kvm->arch.apic_access_memslot_inhibited)
2664  		goto out;
2665  
2666  	hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
2667  				      APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
2668  	if (IS_ERR(hva)) {
2669  		ret = PTR_ERR(hva);
2670  		goto out;
2671  	}
2672  
2673  	page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
2674  	if (is_error_page(page)) {
2675  		ret = -EFAULT;
2676  		goto out;
2677  	}
2678  
2679  	/*
2680  	 * Do not pin the page in memory, so that memory hot-unplug
2681  	 * is able to migrate it.
2682  	 */
2683  	put_page(page);
2684  	kvm->arch.apic_access_memslot_enabled = true;
2685  out:
2686  	mutex_unlock(&kvm->slots_lock);
2687  	return ret;
2688  }
2689  EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page);
2690  
kvm_inhibit_apic_access_page(struct kvm_vcpu * vcpu)2691  void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu)
2692  {
2693  	struct kvm *kvm = vcpu->kvm;
2694  
2695  	if (!kvm->arch.apic_access_memslot_enabled)
2696  		return;
2697  
2698  	kvm_vcpu_srcu_read_unlock(vcpu);
2699  
2700  	mutex_lock(&kvm->slots_lock);
2701  
2702  	if (kvm->arch.apic_access_memslot_enabled) {
2703  		__x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
2704  		/*
2705  		 * Clear "enabled" after the memslot is deleted so that a
2706  		 * different vCPU doesn't get a false negative when checking
2707  		 * the flag out of slots_lock.  No additional memory barrier is
2708  		 * needed as modifying memslots requires waiting other vCPUs to
2709  		 * drop SRCU (see above), and false positives are ok as the
2710  		 * flag is rechecked after acquiring slots_lock.
2711  		 */
2712  		kvm->arch.apic_access_memslot_enabled = false;
2713  
2714  		/*
2715  		 * Mark the memslot as inhibited to prevent reallocating the
2716  		 * memslot during vCPU creation, e.g. if a vCPU is hotplugged.
2717  		 */
2718  		kvm->arch.apic_access_memslot_inhibited = true;
2719  	}
2720  
2721  	mutex_unlock(&kvm->slots_lock);
2722  
2723  	kvm_vcpu_srcu_read_lock(vcpu);
2724  }
2725  
kvm_lapic_reset(struct kvm_vcpu * vcpu,bool init_event)2726  void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2727  {
2728  	struct kvm_lapic *apic = vcpu->arch.apic;
2729  	u64 msr_val;
2730  	int i;
2731  
2732  	kvm_x86_call(apicv_pre_state_restore)(vcpu);
2733  
2734  	if (!init_event) {
2735  		msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2736  		if (kvm_vcpu_is_reset_bsp(vcpu))
2737  			msr_val |= MSR_IA32_APICBASE_BSP;
2738  		kvm_lapic_set_base(vcpu, msr_val);
2739  	}
2740  
2741  	if (!apic)
2742  		return;
2743  
2744  	/* Stop the timer in case it's a reset to an active apic */
2745  	hrtimer_cancel(&apic->lapic_timer.timer);
2746  
2747  	/* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2748  	if (!init_event)
2749  		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2750  	kvm_apic_set_version(apic->vcpu);
2751  
2752  	for (i = 0; i < apic->nr_lvt_entries; i++)
2753  		kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
2754  	apic_update_lvtt(apic);
2755  	if (kvm_vcpu_is_reset_bsp(vcpu) &&
2756  	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2757  		kvm_lapic_set_reg(apic, APIC_LVT0,
2758  			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2759  	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2760  
2761  	kvm_apic_set_dfr(apic, 0xffffffffU);
2762  	apic_set_spiv(apic, 0xff);
2763  	kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2764  	if (!apic_x2apic_mode(apic))
2765  		kvm_apic_set_ldr(apic, 0);
2766  	kvm_lapic_set_reg(apic, APIC_ESR, 0);
2767  	if (!apic_x2apic_mode(apic)) {
2768  		kvm_lapic_set_reg(apic, APIC_ICR, 0);
2769  		kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2770  	} else {
2771  		kvm_lapic_set_reg64(apic, APIC_ICR, 0);
2772  	}
2773  	kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2774  	kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2775  	for (i = 0; i < 8; i++) {
2776  		kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2777  		kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2778  		kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2779  	}
2780  	kvm_apic_update_apicv(vcpu);
2781  	update_divide_count(apic);
2782  	atomic_set(&apic->lapic_timer.pending, 0);
2783  
2784  	vcpu->arch.pv_eoi.msr_val = 0;
2785  	apic_update_ppr(apic);
2786  	if (apic->apicv_active) {
2787  		kvm_x86_call(apicv_post_state_restore)(vcpu);
2788  		kvm_x86_call(hwapic_irr_update)(vcpu, -1);
2789  		kvm_x86_call(hwapic_isr_update)(-1);
2790  	}
2791  
2792  	vcpu->arch.apic_arb_prio = 0;
2793  	vcpu->arch.apic_attention = 0;
2794  
2795  	kvm_recalculate_apic_map(vcpu->kvm);
2796  }
2797  
2798  /*
2799   *----------------------------------------------------------------------
2800   * timer interface
2801   *----------------------------------------------------------------------
2802   */
2803  
lapic_is_periodic(struct kvm_lapic * apic)2804  static bool lapic_is_periodic(struct kvm_lapic *apic)
2805  {
2806  	return apic_lvtt_period(apic);
2807  }
2808  
apic_has_pending_timer(struct kvm_vcpu * vcpu)2809  int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2810  {
2811  	struct kvm_lapic *apic = vcpu->arch.apic;
2812  
2813  	if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2814  		return atomic_read(&apic->lapic_timer.pending);
2815  
2816  	return 0;
2817  }
2818  
kvm_apic_local_deliver(struct kvm_lapic * apic,int lvt_type)2819  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2820  {
2821  	u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2822  	int vector, mode, trig_mode;
2823  	int r;
2824  
2825  	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2826  		vector = reg & APIC_VECTOR_MASK;
2827  		mode = reg & APIC_MODE_MASK;
2828  		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2829  
2830  		r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
2831  		if (r && lvt_type == APIC_LVTPC &&
2832  		    guest_cpuid_is_intel_compatible(apic->vcpu))
2833  			kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
2834  		return r;
2835  	}
2836  	return 0;
2837  }
2838  
kvm_apic_nmi_wd_deliver(struct kvm_vcpu * vcpu)2839  void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2840  {
2841  	struct kvm_lapic *apic = vcpu->arch.apic;
2842  
2843  	if (apic)
2844  		kvm_apic_local_deliver(apic, APIC_LVT0);
2845  }
2846  
2847  static const struct kvm_io_device_ops apic_mmio_ops = {
2848  	.read     = apic_mmio_read,
2849  	.write    = apic_mmio_write,
2850  };
2851  
apic_timer_fn(struct hrtimer * data)2852  static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2853  {
2854  	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2855  	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2856  
2857  	apic_timer_expired(apic, true);
2858  
2859  	if (lapic_is_periodic(apic)) {
2860  		advance_periodic_target_expiration(apic);
2861  		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2862  		return HRTIMER_RESTART;
2863  	} else
2864  		return HRTIMER_NORESTART;
2865  }
2866  
kvm_create_lapic(struct kvm_vcpu * vcpu)2867  int kvm_create_lapic(struct kvm_vcpu *vcpu)
2868  {
2869  	struct kvm_lapic *apic;
2870  
2871  	ASSERT(vcpu != NULL);
2872  
2873  	if (!irqchip_in_kernel(vcpu->kvm)) {
2874  		static_branch_inc(&kvm_has_noapic_vcpu);
2875  		return 0;
2876  	}
2877  
2878  	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2879  	if (!apic)
2880  		goto nomem;
2881  
2882  	vcpu->arch.apic = apic;
2883  
2884  	if (kvm_x86_ops.alloc_apic_backing_page)
2885  		apic->regs = kvm_x86_call(alloc_apic_backing_page)(vcpu);
2886  	else
2887  		apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2888  	if (!apic->regs) {
2889  		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2890  		       vcpu->vcpu_id);
2891  		goto nomem_free_apic;
2892  	}
2893  	apic->vcpu = vcpu;
2894  
2895  	apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
2896  
2897  	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2898  		     HRTIMER_MODE_ABS_HARD);
2899  	apic->lapic_timer.timer.function = apic_timer_fn;
2900  	if (lapic_timer_advance)
2901  		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2902  
2903  	/*
2904  	 * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
2905  	 * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
2906  	 */
2907  	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2908  	static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2909  	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2910  
2911  	/*
2912  	 * Defer evaluating inhibits until the vCPU is first run, as this vCPU
2913  	 * will not get notified of any changes until this vCPU is visible to
2914  	 * other vCPUs (marked online and added to the set of vCPUs).
2915  	 *
2916  	 * Opportunistically mark APICv active as VMX in particularly is highly
2917  	 * unlikely to have inhibits.  Ignore the current per-VM APICv state so
2918  	 * that vCPU creation is guaranteed to run with a deterministic value,
2919  	 * the request will ensure the vCPU gets the correct state before VM-Entry.
2920  	 */
2921  	if (enable_apicv) {
2922  		apic->apicv_active = true;
2923  		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
2924  	}
2925  
2926  	return 0;
2927  nomem_free_apic:
2928  	kfree(apic);
2929  	vcpu->arch.apic = NULL;
2930  nomem:
2931  	return -ENOMEM;
2932  }
2933  
kvm_apic_has_interrupt(struct kvm_vcpu * vcpu)2934  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2935  {
2936  	struct kvm_lapic *apic = vcpu->arch.apic;
2937  	u32 ppr;
2938  
2939  	if (!kvm_apic_present(vcpu))
2940  		return -1;
2941  
2942  	__apic_update_ppr(apic, &ppr);
2943  	return apic_has_interrupt_for_ppr(apic, ppr);
2944  }
2945  EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2946  
kvm_apic_accept_pic_intr(struct kvm_vcpu * vcpu)2947  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2948  {
2949  	u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2950  
2951  	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2952  		return 1;
2953  	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2954  	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2955  		return 1;
2956  	return 0;
2957  }
2958  
kvm_inject_apic_timer_irqs(struct kvm_vcpu * vcpu)2959  void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2960  {
2961  	struct kvm_lapic *apic = vcpu->arch.apic;
2962  
2963  	if (atomic_read(&apic->lapic_timer.pending) > 0) {
2964  		kvm_apic_inject_pending_timer_irqs(apic);
2965  		atomic_set(&apic->lapic_timer.pending, 0);
2966  	}
2967  }
2968  
kvm_apic_ack_interrupt(struct kvm_vcpu * vcpu,int vector)2969  void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector)
2970  {
2971  	struct kvm_lapic *apic = vcpu->arch.apic;
2972  	u32 ppr;
2973  
2974  	if (WARN_ON_ONCE(vector < 0 || !apic))
2975  		return;
2976  
2977  	/*
2978  	 * We get here even with APIC virtualization enabled, if doing
2979  	 * nested virtualization and L1 runs with the "acknowledge interrupt
2980  	 * on exit" mode.  Then we cannot inject the interrupt via RVI,
2981  	 * because the process would deliver it through the IDT.
2982  	 */
2983  
2984  	apic_clear_irr(vector, apic);
2985  	if (kvm_hv_synic_auto_eoi_set(vcpu, vector)) {
2986  		/*
2987  		 * For auto-EOI interrupts, there might be another pending
2988  		 * interrupt above PPR, so check whether to raise another
2989  		 * KVM_REQ_EVENT.
2990  		 */
2991  		apic_update_ppr(apic);
2992  	} else {
2993  		/*
2994  		 * For normal interrupts, PPR has been raised and there cannot
2995  		 * be a higher-priority pending interrupt---except if there was
2996  		 * a concurrent interrupt injection, but that would have
2997  		 * triggered KVM_REQ_EVENT already.
2998  		 */
2999  		apic_set_isr(vector, apic);
3000  		__apic_update_ppr(apic, &ppr);
3001  	}
3002  
3003  }
3004  EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt);
3005  
kvm_apic_state_fixup(struct kvm_vcpu * vcpu,struct kvm_lapic_state * s,bool set)3006  static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
3007  		struct kvm_lapic_state *s, bool set)
3008  {
3009  	if (apic_x2apic_mode(vcpu->arch.apic)) {
3010  		u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
3011  		u32 *id = (u32 *)(s->regs + APIC_ID);
3012  		u32 *ldr = (u32 *)(s->regs + APIC_LDR);
3013  		u64 icr;
3014  
3015  		if (vcpu->kvm->arch.x2apic_format) {
3016  			if (*id != x2apic_id)
3017  				return -EINVAL;
3018  		} else {
3019  			/*
3020  			 * Ignore the userspace value when setting APIC state.
3021  			 * KVM's model is that the x2APIC ID is readonly, e.g.
3022  			 * KVM only supports delivering interrupts to KVM's
3023  			 * version of the x2APIC ID.  However, for backwards
3024  			 * compatibility, don't reject attempts to set a
3025  			 * mismatched ID for userspace that hasn't opted into
3026  			 * x2apic_format.
3027  			 */
3028  			if (set)
3029  				*id = x2apic_id;
3030  			else
3031  				*id = x2apic_id << 24;
3032  		}
3033  
3034  		/*
3035  		 * In x2APIC mode, the LDR is fixed and based on the id.  And
3036  		 * if the ICR is _not_ split, ICR is internally a single 64-bit
3037  		 * register, but needs to be split to ICR+ICR2 in userspace for
3038  		 * backwards compatibility.
3039  		 */
3040  		if (set)
3041  			*ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
3042  
3043  		if (!kvm_x86_ops.x2apic_icr_is_split) {
3044  			if (set) {
3045  				icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
3046  				      (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
3047  				__kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
3048  			} else {
3049  				icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
3050  				__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
3051  			}
3052  		}
3053  	}
3054  
3055  	return 0;
3056  }
3057  
kvm_apic_get_state(struct kvm_vcpu * vcpu,struct kvm_lapic_state * s)3058  int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
3059  {
3060  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
3061  
3062  	/*
3063  	 * Get calculated timer current count for remaining timer period (if
3064  	 * any) and store it in the returned register set.
3065  	 */
3066  	__kvm_lapic_set_reg(s->regs, APIC_TMCCT,
3067  			    __apic_read(vcpu->arch.apic, APIC_TMCCT));
3068  
3069  	return kvm_apic_state_fixup(vcpu, s, false);
3070  }
3071  
kvm_apic_set_state(struct kvm_vcpu * vcpu,struct kvm_lapic_state * s)3072  int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
3073  {
3074  	struct kvm_lapic *apic = vcpu->arch.apic;
3075  	int r;
3076  
3077  	kvm_x86_call(apicv_pre_state_restore)(vcpu);
3078  
3079  	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
3080  	/* set SPIV separately to get count of SW disabled APICs right */
3081  	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
3082  
3083  	r = kvm_apic_state_fixup(vcpu, s, true);
3084  	if (r) {
3085  		kvm_recalculate_apic_map(vcpu->kvm);
3086  		return r;
3087  	}
3088  	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
3089  
3090  	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
3091  	kvm_recalculate_apic_map(vcpu->kvm);
3092  	kvm_apic_set_version(vcpu);
3093  
3094  	apic_update_ppr(apic);
3095  	cancel_apic_timer(apic);
3096  	apic->lapic_timer.expired_tscdeadline = 0;
3097  	apic_update_lvtt(apic);
3098  	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
3099  	update_divide_count(apic);
3100  	__start_apic_timer(apic, APIC_TMCCT);
3101  	kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
3102  	kvm_apic_update_apicv(vcpu);
3103  	if (apic->apicv_active) {
3104  		kvm_x86_call(apicv_post_state_restore)(vcpu);
3105  		kvm_x86_call(hwapic_irr_update)(vcpu,
3106  						apic_find_highest_irr(apic));
3107  		kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic));
3108  	}
3109  	kvm_make_request(KVM_REQ_EVENT, vcpu);
3110  	if (ioapic_in_kernel(vcpu->kvm))
3111  		kvm_rtc_eoi_tracking_restore_one(vcpu);
3112  
3113  	vcpu->arch.apic_arb_prio = 0;
3114  
3115  	return 0;
3116  }
3117  
__kvm_migrate_apic_timer(struct kvm_vcpu * vcpu)3118  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
3119  {
3120  	struct hrtimer *timer;
3121  
3122  	if (!lapic_in_kernel(vcpu) ||
3123  		kvm_can_post_timer_interrupt(vcpu))
3124  		return;
3125  
3126  	timer = &vcpu->arch.apic->lapic_timer.timer;
3127  	if (hrtimer_cancel(timer))
3128  		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
3129  }
3130  
3131  /*
3132   * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
3133   *
3134   * Detect whether guest triggered PV EOI since the
3135   * last entry. If yes, set EOI on guests's behalf.
3136   * Clear PV EOI in guest memory in any case.
3137   */
apic_sync_pv_eoi_from_guest(struct kvm_vcpu * vcpu,struct kvm_lapic * apic)3138  static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
3139  					struct kvm_lapic *apic)
3140  {
3141  	int vector;
3142  	/*
3143  	 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
3144  	 * and KVM_PV_EOI_ENABLED in guest memory as follows:
3145  	 *
3146  	 * KVM_APIC_PV_EOI_PENDING is unset:
3147  	 * 	-> host disabled PV EOI.
3148  	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
3149  	 * 	-> host enabled PV EOI, guest did not execute EOI yet.
3150  	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
3151  	 * 	-> host enabled PV EOI, guest executed EOI.
3152  	 */
3153  	BUG_ON(!pv_eoi_enabled(vcpu));
3154  
3155  	if (pv_eoi_test_and_clr_pending(vcpu))
3156  		return;
3157  	vector = apic_set_eoi(apic);
3158  	trace_kvm_pv_eoi(apic, vector);
3159  }
3160  
kvm_lapic_sync_from_vapic(struct kvm_vcpu * vcpu)3161  void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
3162  {
3163  	u32 data;
3164  
3165  	if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
3166  		apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
3167  
3168  	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
3169  		return;
3170  
3171  	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
3172  				  sizeof(u32)))
3173  		return;
3174  
3175  	apic_set_tpr(vcpu->arch.apic, data & 0xff);
3176  }
3177  
3178  /*
3179   * apic_sync_pv_eoi_to_guest - called before vmentry
3180   *
3181   * Detect whether it's safe to enable PV EOI and
3182   * if yes do so.
3183   */
apic_sync_pv_eoi_to_guest(struct kvm_vcpu * vcpu,struct kvm_lapic * apic)3184  static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
3185  					struct kvm_lapic *apic)
3186  {
3187  	if (!pv_eoi_enabled(vcpu) ||
3188  	    /* IRR set or many bits in ISR: could be nested. */
3189  	    apic->irr_pending ||
3190  	    /* Cache not set: could be safe but we don't bother. */
3191  	    apic->highest_isr_cache == -1 ||
3192  	    /* Need EOI to update ioapic. */
3193  	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
3194  		/*
3195  		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
3196  		 * so we need not do anything here.
3197  		 */
3198  		return;
3199  	}
3200  
3201  	pv_eoi_set_pending(apic->vcpu);
3202  }
3203  
kvm_lapic_sync_to_vapic(struct kvm_vcpu * vcpu)3204  void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
3205  {
3206  	u32 data, tpr;
3207  	int max_irr, max_isr;
3208  	struct kvm_lapic *apic = vcpu->arch.apic;
3209  
3210  	apic_sync_pv_eoi_to_guest(vcpu, apic);
3211  
3212  	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
3213  		return;
3214  
3215  	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
3216  	max_irr = apic_find_highest_irr(apic);
3217  	if (max_irr < 0)
3218  		max_irr = 0;
3219  	max_isr = apic_find_highest_isr(apic);
3220  	if (max_isr < 0)
3221  		max_isr = 0;
3222  	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
3223  
3224  	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
3225  				sizeof(u32));
3226  }
3227  
kvm_lapic_set_vapic_addr(struct kvm_vcpu * vcpu,gpa_t vapic_addr)3228  int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
3229  {
3230  	if (vapic_addr) {
3231  		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
3232  					&vcpu->arch.apic->vapic_cache,
3233  					vapic_addr, sizeof(u32)))
3234  			return -EINVAL;
3235  		__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
3236  	} else {
3237  		__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
3238  	}
3239  
3240  	vcpu->arch.apic->vapic_addr = vapic_addr;
3241  	return 0;
3242  }
3243  
kvm_lapic_msr_read(struct kvm_lapic * apic,u32 reg,u64 * data)3244  static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
3245  {
3246  	u32 low;
3247  
3248  	if (reg == APIC_ICR) {
3249  		*data = kvm_x2apic_icr_read(apic);
3250  		return 0;
3251  	}
3252  
3253  	if (kvm_lapic_reg_read(apic, reg, 4, &low))
3254  		return 1;
3255  
3256  	*data = low;
3257  
3258  	return 0;
3259  }
3260  
kvm_lapic_msr_write(struct kvm_lapic * apic,u32 reg,u64 data)3261  static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
3262  {
3263  	/*
3264  	 * ICR is a 64-bit register in x2APIC mode (and Hyper-V PV vAPIC) and
3265  	 * can be written as such, all other registers remain accessible only
3266  	 * through 32-bit reads/writes.
3267  	 */
3268  	if (reg == APIC_ICR)
3269  		return kvm_x2apic_icr_write(apic, data);
3270  
3271  	/* Bits 63:32 are reserved in all other registers. */
3272  	if (data >> 32)
3273  		return 1;
3274  
3275  	return kvm_lapic_reg_write(apic, reg, (u32)data);
3276  }
3277  
kvm_x2apic_msr_write(struct kvm_vcpu * vcpu,u32 msr,u64 data)3278  int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
3279  {
3280  	struct kvm_lapic *apic = vcpu->arch.apic;
3281  	u32 reg = (msr - APIC_BASE_MSR) << 4;
3282  
3283  	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
3284  		return 1;
3285  
3286  	return kvm_lapic_msr_write(apic, reg, data);
3287  }
3288  
kvm_x2apic_msr_read(struct kvm_vcpu * vcpu,u32 msr,u64 * data)3289  int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
3290  {
3291  	struct kvm_lapic *apic = vcpu->arch.apic;
3292  	u32 reg = (msr - APIC_BASE_MSR) << 4;
3293  
3294  	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
3295  		return 1;
3296  
3297  	return kvm_lapic_msr_read(apic, reg, data);
3298  }
3299  
kvm_hv_vapic_msr_write(struct kvm_vcpu * vcpu,u32 reg,u64 data)3300  int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
3301  {
3302  	if (!lapic_in_kernel(vcpu))
3303  		return 1;
3304  
3305  	return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
3306  }
3307  
kvm_hv_vapic_msr_read(struct kvm_vcpu * vcpu,u32 reg,u64 * data)3308  int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
3309  {
3310  	if (!lapic_in_kernel(vcpu))
3311  		return 1;
3312  
3313  	return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
3314  }
3315  
kvm_lapic_set_pv_eoi(struct kvm_vcpu * vcpu,u64 data,unsigned long len)3316  int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
3317  {
3318  	u64 addr = data & ~KVM_MSR_ENABLED;
3319  	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
3320  	unsigned long new_len;
3321  	int ret;
3322  
3323  	if (!IS_ALIGNED(addr, 4))
3324  		return 1;
3325  
3326  	if (data & KVM_MSR_ENABLED) {
3327  		if (addr == ghc->gpa && len <= ghc->len)
3328  			new_len = ghc->len;
3329  		else
3330  			new_len = len;
3331  
3332  		ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
3333  		if (ret)
3334  			return ret;
3335  	}
3336  
3337  	vcpu->arch.pv_eoi.msr_val = data;
3338  
3339  	return 0;
3340  }
3341  
kvm_apic_accept_events(struct kvm_vcpu * vcpu)3342  int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
3343  {
3344  	struct kvm_lapic *apic = vcpu->arch.apic;
3345  	u8 sipi_vector;
3346  	int r;
3347  
3348  	if (!kvm_apic_has_pending_init_or_sipi(vcpu))
3349  		return 0;
3350  
3351  	if (is_guest_mode(vcpu)) {
3352  		r = kvm_check_nested_events(vcpu);
3353  		if (r < 0)
3354  			return r == -EBUSY ? 0 : r;
3355  		/*
3356  		 * Continue processing INIT/SIPI even if a nested VM-Exit
3357  		 * occurred, e.g. pending SIPIs should be dropped if INIT+SIPI
3358  		 * are blocked as a result of transitioning to VMX root mode.
3359  		 */
3360  	}
3361  
3362  	/*
3363  	 * INITs are blocked while CPU is in specific states (SMM, VMX root
3364  	 * mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in
3365  	 * wait-for-SIPI (WFS).
3366  	 */
3367  	if (!kvm_apic_init_sipi_allowed(vcpu)) {
3368  		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
3369  		clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3370  		return 0;
3371  	}
3372  
3373  	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
3374  		kvm_vcpu_reset(vcpu, true);
3375  		if (kvm_vcpu_is_bsp(apic->vcpu))
3376  			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3377  		else
3378  			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3379  	}
3380  	if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
3381  		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
3382  			/* evaluate pending_events before reading the vector */
3383  			smp_rmb();
3384  			sipi_vector = apic->sipi_vector;
3385  			kvm_x86_call(vcpu_deliver_sipi_vector)(vcpu,
3386  							       sipi_vector);
3387  			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3388  		}
3389  	}
3390  	return 0;
3391  }
3392  
kvm_lapic_exit(void)3393  void kvm_lapic_exit(void)
3394  {
3395  	static_key_deferred_flush(&apic_hw_disabled);
3396  	WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
3397  	static_key_deferred_flush(&apic_sw_disabled);
3398  	WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
3399  }
3400