1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2017 ARM Ltd.
4   * Author: Marc Zyngier <marc.zyngier@arm.com>
5   */
6  
7  #include <linux/interrupt.h>
8  #include <linux/irq.h>
9  #include <linux/irqdomain.h>
10  #include <linux/kvm_host.h>
11  #include <linux/irqchip/arm-gic-v3.h>
12  
13  #include "vgic.h"
14  
15  /*
16   * How KVM uses GICv4 (insert rude comments here):
17   *
18   * The vgic-v4 layer acts as a bridge between several entities:
19   * - The GICv4 ITS representation offered by the ITS driver
20   * - VFIO, which is in charge of the PCI endpoint
21   * - The virtual ITS, which is the only thing the guest sees
22   *
23   * The configuration of VLPIs is triggered by a callback from VFIO,
24   * instructing KVM that a PCI device has been configured to deliver
25   * MSIs to a vITS.
26   *
27   * kvm_vgic_v4_set_forwarding() is thus called with the routing entry,
28   * and this is used to find the corresponding vITS data structures
29   * (ITS instance, device, event and irq) using a process that is
30   * extremely similar to the injection of an MSI.
31   *
32   * At this stage, we can link the guest's view of an LPI (uniquely
33   * identified by the routing entry) and the host irq, using the GICv4
34   * driver mapping operation. Should the mapping succeed, we've then
35   * successfully upgraded the guest's LPI to a VLPI. We can then start
36   * with updating GICv4's view of the property table and generating an
37   * INValidation in order to kickstart the delivery of this VLPI to the
38   * guest directly, without software intervention. Well, almost.
39   *
40   * When the PCI endpoint is deconfigured, this operation is reversed
41   * with VFIO calling kvm_vgic_v4_unset_forwarding().
42   *
43   * Once the VLPI has been mapped, it needs to follow any change the
44   * guest performs on its LPI through the vITS. For that, a number of
45   * command handlers have hooks to communicate these changes to the HW:
46   * - Any invalidation triggers a call to its_prop_update_vlpi()
47   * - The INT command results in a irq_set_irqchip_state(), which
48   *   generates an INT on the corresponding VLPI.
49   * - The CLEAR command results in a irq_set_irqchip_state(), which
50   *   generates an CLEAR on the corresponding VLPI.
51   * - DISCARD translates into an unmap, similar to a call to
52   *   kvm_vgic_v4_unset_forwarding().
53   * - MOVI is translated by an update of the existing mapping, changing
54   *   the target vcpu, resulting in a VMOVI being generated.
55   * - MOVALL is translated by a string of mapping updates (similar to
56   *   the handling of MOVI). MOVALL is horrible.
57   *
58   * Note that a DISCARD/MAPTI sequence emitted from the guest without
59   * reprogramming the PCI endpoint after MAPTI does not result in a
60   * VLPI being mapped, as there is no callback from VFIO (the guest
61   * will get the interrupt via the normal SW injection). Fixing this is
62   * not trivial, and requires some horrible messing with the VFIO
63   * internals. Not fun. Don't do that.
64   *
65   * Then there is the scheduling. Each time a vcpu is about to run on a
66   * physical CPU, KVM must tell the corresponding redistributor about
67   * it. And if we've migrated our vcpu from one CPU to another, we must
68   * tell the ITS (so that the messages reach the right redistributor).
69   * This is done in two steps: first issue a irq_set_affinity() on the
70   * irq corresponding to the vcpu, then call its_make_vpe_resident().
71   * You must be in a non-preemptible context. On exit, a call to
72   * its_make_vpe_non_resident() tells the redistributor that we're done
73   * with the vcpu.
74   *
75   * Finally, the doorbell handling: Each vcpu is allocated an interrupt
76   * which will fire each time a VLPI is made pending whilst the vcpu is
77   * not running. Each time the vcpu gets blocked, the doorbell
78   * interrupt gets enabled. When the vcpu is unblocked (for whatever
79   * reason), the doorbell interrupt is disabled.
80   */
81  
82  #define DB_IRQ_FLAGS	(IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING)
83  
vgic_v4_doorbell_handler(int irq,void * info)84  static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
85  {
86  	struct kvm_vcpu *vcpu = info;
87  
88  	/* We got the message, no need to fire again */
89  	if (!kvm_vgic_global_state.has_gicv4_1 &&
90  	    !irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
91  		disable_irq_nosync(irq);
92  
93  	/*
94  	 * The v4.1 doorbell can fire concurrently with the vPE being
95  	 * made non-resident. Ensure we only update pending_last
96  	 * *after* the non-residency sequence has completed.
97  	 */
98  	raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);
99  	vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
100  	raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);
101  
102  	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
103  	kvm_vcpu_kick(vcpu);
104  
105  	return IRQ_HANDLED;
106  }
107  
vgic_v4_sync_sgi_config(struct its_vpe * vpe,struct vgic_irq * irq)108  static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq)
109  {
110  	vpe->sgi_config[irq->intid].enabled	= irq->enabled;
111  	vpe->sgi_config[irq->intid].group 	= irq->group;
112  	vpe->sgi_config[irq->intid].priority	= irq->priority;
113  }
114  
vgic_v4_enable_vsgis(struct kvm_vcpu * vcpu)115  static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu)
116  {
117  	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
118  	int i;
119  
120  	/*
121  	 * With GICv4.1, every virtual SGI can be directly injected. So
122  	 * let's pretend that they are HW interrupts, tied to a host
123  	 * IRQ. The SGI code will do its magic.
124  	 */
125  	for (i = 0; i < VGIC_NR_SGIS; i++) {
126  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i);
127  		struct irq_desc *desc;
128  		unsigned long flags;
129  		int ret;
130  
131  		raw_spin_lock_irqsave(&irq->irq_lock, flags);
132  
133  		if (irq->hw)
134  			goto unlock;
135  
136  		irq->hw = true;
137  		irq->host_irq = irq_find_mapping(vpe->sgi_domain, i);
138  
139  		/* Transfer the full irq state to the vPE */
140  		vgic_v4_sync_sgi_config(vpe, irq);
141  		desc = irq_to_desc(irq->host_irq);
142  		ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc),
143  					      false);
144  		if (!WARN_ON(ret)) {
145  			/* Transfer pending state */
146  			ret = irq_set_irqchip_state(irq->host_irq,
147  						    IRQCHIP_STATE_PENDING,
148  						    irq->pending_latch);
149  			WARN_ON(ret);
150  			irq->pending_latch = false;
151  		}
152  	unlock:
153  		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
154  		vgic_put_irq(vcpu->kvm, irq);
155  	}
156  }
157  
vgic_v4_disable_vsgis(struct kvm_vcpu * vcpu)158  static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
159  {
160  	int i;
161  
162  	for (i = 0; i < VGIC_NR_SGIS; i++) {
163  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i);
164  		struct irq_desc *desc;
165  		unsigned long flags;
166  		int ret;
167  
168  		raw_spin_lock_irqsave(&irq->irq_lock, flags);
169  
170  		if (!irq->hw)
171  			goto unlock;
172  
173  		irq->hw = false;
174  		ret = irq_get_irqchip_state(irq->host_irq,
175  					    IRQCHIP_STATE_PENDING,
176  					    &irq->pending_latch);
177  		WARN_ON(ret);
178  
179  		desc = irq_to_desc(irq->host_irq);
180  		irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
181  	unlock:
182  		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
183  		vgic_put_irq(vcpu->kvm, irq);
184  	}
185  }
186  
vgic_v4_configure_vsgis(struct kvm * kvm)187  void vgic_v4_configure_vsgis(struct kvm *kvm)
188  {
189  	struct vgic_dist *dist = &kvm->arch.vgic;
190  	struct kvm_vcpu *vcpu;
191  	unsigned long i;
192  
193  	lockdep_assert_held(&kvm->arch.config_lock);
194  
195  	kvm_arm_halt_guest(kvm);
196  
197  	kvm_for_each_vcpu(i, vcpu, kvm) {
198  		if (dist->nassgireq)
199  			vgic_v4_enable_vsgis(vcpu);
200  		else
201  			vgic_v4_disable_vsgis(vcpu);
202  	}
203  
204  	kvm_arm_resume_guest(kvm);
205  }
206  
207  /*
208   * Must be called with GICv4.1 and the vPE unmapped, which
209   * indicates the invalidation of any VPT caches associated
210   * with the vPE, thus we can get the VLPI state by peeking
211   * at the VPT.
212   */
vgic_v4_get_vlpi_state(struct vgic_irq * irq,bool * val)213  void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val)
214  {
215  	struct its_vpe *vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
216  	int mask = BIT(irq->intid % BITS_PER_BYTE);
217  	void *va;
218  	u8 *ptr;
219  
220  	va = page_address(vpe->vpt_page);
221  	ptr = va + irq->intid / BITS_PER_BYTE;
222  
223  	*val = !!(*ptr & mask);
224  }
225  
vgic_v4_request_vpe_irq(struct kvm_vcpu * vcpu,int irq)226  int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq)
227  {
228  	return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu);
229  }
230  
231  /**
232   * vgic_v4_init - Initialize the GICv4 data structures
233   * @kvm:	Pointer to the VM being initialized
234   *
235   * We may be called each time a vITS is created, or when the
236   * vgic is initialized. In both cases, the number of vcpus
237   * should now be fixed.
238   */
vgic_v4_init(struct kvm * kvm)239  int vgic_v4_init(struct kvm *kvm)
240  {
241  	struct vgic_dist *dist = &kvm->arch.vgic;
242  	struct kvm_vcpu *vcpu;
243  	int nr_vcpus, ret;
244  	unsigned long i;
245  
246  	lockdep_assert_held(&kvm->arch.config_lock);
247  
248  	if (!kvm_vgic_global_state.has_gicv4)
249  		return 0; /* Nothing to see here... move along. */
250  
251  	if (dist->its_vm.vpes)
252  		return 0;
253  
254  	nr_vcpus = atomic_read(&kvm->online_vcpus);
255  
256  	dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes),
257  				    GFP_KERNEL_ACCOUNT);
258  	if (!dist->its_vm.vpes)
259  		return -ENOMEM;
260  
261  	dist->its_vm.nr_vpes = nr_vcpus;
262  
263  	kvm_for_each_vcpu(i, vcpu, kvm)
264  		dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
265  
266  	ret = its_alloc_vcpu_irqs(&dist->its_vm);
267  	if (ret < 0) {
268  		kvm_err("VPE IRQ allocation failure\n");
269  		kfree(dist->its_vm.vpes);
270  		dist->its_vm.nr_vpes = 0;
271  		dist->its_vm.vpes = NULL;
272  		return ret;
273  	}
274  
275  	kvm_for_each_vcpu(i, vcpu, kvm) {
276  		int irq = dist->its_vm.vpes[i]->irq;
277  		unsigned long irq_flags = DB_IRQ_FLAGS;
278  
279  		/*
280  		 * Don't automatically enable the doorbell, as we're
281  		 * flipping it back and forth when the vcpu gets
282  		 * blocked. Also disable the lazy disabling, as the
283  		 * doorbell could kick us out of the guest too
284  		 * early...
285  		 *
286  		 * On GICv4.1, the doorbell is managed in HW and must
287  		 * be left enabled.
288  		 */
289  		if (kvm_vgic_global_state.has_gicv4_1)
290  			irq_flags &= ~IRQ_NOAUTOEN;
291  		irq_set_status_flags(irq, irq_flags);
292  
293  		ret = vgic_v4_request_vpe_irq(vcpu, irq);
294  		if (ret) {
295  			kvm_err("failed to allocate vcpu IRQ%d\n", irq);
296  			/*
297  			 * Trick: adjust the number of vpes so we know
298  			 * how many to nuke on teardown...
299  			 */
300  			dist->its_vm.nr_vpes = i;
301  			break;
302  		}
303  	}
304  
305  	if (ret)
306  		vgic_v4_teardown(kvm);
307  
308  	return ret;
309  }
310  
311  /**
312   * vgic_v4_teardown - Free the GICv4 data structures
313   * @kvm:	Pointer to the VM being destroyed
314   */
vgic_v4_teardown(struct kvm * kvm)315  void vgic_v4_teardown(struct kvm *kvm)
316  {
317  	struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
318  	int i;
319  
320  	lockdep_assert_held(&kvm->arch.config_lock);
321  
322  	if (!its_vm->vpes)
323  		return;
324  
325  	for (i = 0; i < its_vm->nr_vpes; i++) {
326  		struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
327  		int irq = its_vm->vpes[i]->irq;
328  
329  		irq_clear_status_flags(irq, DB_IRQ_FLAGS);
330  		free_irq(irq, vcpu);
331  	}
332  
333  	its_free_vcpu_irqs(its_vm);
334  	kfree(its_vm->vpes);
335  	its_vm->nr_vpes = 0;
336  	its_vm->vpes = NULL;
337  }
338  
vgic_v4_put(struct kvm_vcpu * vcpu)339  int vgic_v4_put(struct kvm_vcpu *vcpu)
340  {
341  	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
342  
343  	if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
344  		return 0;
345  
346  	return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
347  }
348  
vgic_v4_load(struct kvm_vcpu * vcpu)349  int vgic_v4_load(struct kvm_vcpu *vcpu)
350  {
351  	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
352  	int err;
353  
354  	if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
355  		return 0;
356  
357  	if (vcpu_get_flag(vcpu, IN_WFI))
358  		return 0;
359  
360  	/*
361  	 * Before making the VPE resident, make sure the redistributor
362  	 * corresponding to our current CPU expects us here. See the
363  	 * doc in drivers/irqchip/irq-gic-v4.c to understand how this
364  	 * turns into a VMOVP command at the ITS level.
365  	 */
366  	err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
367  	if (err)
368  		return err;
369  
370  	err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled);
371  	if (err)
372  		return err;
373  
374  	/*
375  	 * Now that the VPE is resident, let's get rid of a potential
376  	 * doorbell interrupt that would still be pending. This is a
377  	 * GICv4.0 only "feature"...
378  	 */
379  	if (!kvm_vgic_global_state.has_gicv4_1)
380  		err = irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
381  
382  	return err;
383  }
384  
vgic_v4_commit(struct kvm_vcpu * vcpu)385  void vgic_v4_commit(struct kvm_vcpu *vcpu)
386  {
387  	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
388  
389  	/*
390  	 * No need to wait for the vPE to be ready across a shallow guest
391  	 * exit, as only a vcpu_put will invalidate it.
392  	 */
393  	if (!vpe->ready)
394  		its_commit_vpe(vpe);
395  }
396  
vgic_get_its(struct kvm * kvm,struct kvm_kernel_irq_routing_entry * irq_entry)397  static struct vgic_its *vgic_get_its(struct kvm *kvm,
398  				     struct kvm_kernel_irq_routing_entry *irq_entry)
399  {
400  	struct kvm_msi msi  = (struct kvm_msi) {
401  		.address_lo	= irq_entry->msi.address_lo,
402  		.address_hi	= irq_entry->msi.address_hi,
403  		.data		= irq_entry->msi.data,
404  		.flags		= irq_entry->msi.flags,
405  		.devid		= irq_entry->msi.devid,
406  	};
407  
408  	return vgic_msi_to_its(kvm, &msi);
409  }
410  
kvm_vgic_v4_set_forwarding(struct kvm * kvm,int virq,struct kvm_kernel_irq_routing_entry * irq_entry)411  int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
412  			       struct kvm_kernel_irq_routing_entry *irq_entry)
413  {
414  	struct vgic_its *its;
415  	struct vgic_irq *irq;
416  	struct its_vlpi_map map;
417  	unsigned long flags;
418  	int ret;
419  
420  	if (!vgic_supports_direct_msis(kvm))
421  		return 0;
422  
423  	/*
424  	 * Get the ITS, and escape early on error (not a valid
425  	 * doorbell for any of our vITSs).
426  	 */
427  	its = vgic_get_its(kvm, irq_entry);
428  	if (IS_ERR(its))
429  		return 0;
430  
431  	mutex_lock(&its->its_lock);
432  
433  	/* Perform the actual DevID/EventID -> LPI translation. */
434  	ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
435  				   irq_entry->msi.data, &irq);
436  	if (ret)
437  		goto out;
438  
439  	/* Silently exit if the vLPI is already mapped */
440  	if (irq->hw)
441  		goto out;
442  
443  	/*
444  	 * Emit the mapping request. If it fails, the ITS probably
445  	 * isn't v4 compatible, so let's silently bail out. Holding
446  	 * the ITS lock should ensure that nothing can modify the
447  	 * target vcpu.
448  	 */
449  	map = (struct its_vlpi_map) {
450  		.vm		= &kvm->arch.vgic.its_vm,
451  		.vpe		= &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe,
452  		.vintid		= irq->intid,
453  		.properties	= ((irq->priority & 0xfc) |
454  				   (irq->enabled ? LPI_PROP_ENABLED : 0) |
455  				   LPI_PROP_GROUP1),
456  		.db_enabled	= true,
457  	};
458  
459  	ret = its_map_vlpi(virq, &map);
460  	if (ret)
461  		goto out;
462  
463  	irq->hw		= true;
464  	irq->host_irq	= virq;
465  	atomic_inc(&map.vpe->vlpi_count);
466  
467  	/* Transfer pending state */
468  	raw_spin_lock_irqsave(&irq->irq_lock, flags);
469  	if (irq->pending_latch) {
470  		ret = irq_set_irqchip_state(irq->host_irq,
471  					    IRQCHIP_STATE_PENDING,
472  					    irq->pending_latch);
473  		WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);
474  
475  		/*
476  		 * Clear pending_latch and communicate this state
477  		 * change via vgic_queue_irq_unlock.
478  		 */
479  		irq->pending_latch = false;
480  		vgic_queue_irq_unlock(kvm, irq, flags);
481  	} else {
482  		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
483  	}
484  
485  out:
486  	mutex_unlock(&its->its_lock);
487  	return ret;
488  }
489  
kvm_vgic_v4_unset_forwarding(struct kvm * kvm,int virq,struct kvm_kernel_irq_routing_entry * irq_entry)490  int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
491  				 struct kvm_kernel_irq_routing_entry *irq_entry)
492  {
493  	struct vgic_its *its;
494  	struct vgic_irq *irq;
495  	int ret;
496  
497  	if (!vgic_supports_direct_msis(kvm))
498  		return 0;
499  
500  	/*
501  	 * Get the ITS, and escape early on error (not a valid
502  	 * doorbell for any of our vITSs).
503  	 */
504  	its = vgic_get_its(kvm, irq_entry);
505  	if (IS_ERR(its))
506  		return 0;
507  
508  	mutex_lock(&its->its_lock);
509  
510  	ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
511  				   irq_entry->msi.data, &irq);
512  	if (ret)
513  		goto out;
514  
515  	WARN_ON(!(irq->hw && irq->host_irq == virq));
516  	if (irq->hw) {
517  		atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
518  		irq->hw = false;
519  		ret = its_unmap_vlpi(virq);
520  	}
521  
522  out:
523  	mutex_unlock(&its->its_lock);
524  	return ret;
525  }
526