1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
5 */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/list.h>
11 #include <linux/perf_event.h>
12 #include <linux/perf/arm_pmu.h>
13 #include <linux/uaccess.h>
14 #include <asm/kvm_emulate.h>
15 #include <kvm/arm_pmu.h>
16 #include <kvm/arm_vgic.h>
17
18 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
19
20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
21
22 static LIST_HEAD(arm_pmus);
23 static DEFINE_MUTEX(arm_pmus_lock);
24
25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
27
kvm_pmc_to_vcpu(const struct kvm_pmc * pmc)28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
29 {
30 return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
31 }
32
kvm_vcpu_idx_to_pmc(struct kvm_vcpu * vcpu,int cnt_idx)33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
34 {
35 return &vcpu->arch.pmu.pmc[cnt_idx];
36 }
37
__kvm_pmu_event_mask(unsigned int pmuver)38 static u32 __kvm_pmu_event_mask(unsigned int pmuver)
39 {
40 switch (pmuver) {
41 case ID_AA64DFR0_EL1_PMUVer_IMP:
42 return GENMASK(9, 0);
43 case ID_AA64DFR0_EL1_PMUVer_V3P1:
44 case ID_AA64DFR0_EL1_PMUVer_V3P4:
45 case ID_AA64DFR0_EL1_PMUVer_V3P5:
46 case ID_AA64DFR0_EL1_PMUVer_V3P7:
47 return GENMASK(15, 0);
48 default: /* Shouldn't be here, just for sanity */
49 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
50 return 0;
51 }
52 }
53
kvm_pmu_event_mask(struct kvm * kvm)54 static u32 kvm_pmu_event_mask(struct kvm *kvm)
55 {
56 u64 dfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1);
57 u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
58
59 return __kvm_pmu_event_mask(pmuver);
60 }
61
kvm_pmu_evtyper_mask(struct kvm * kvm)62 u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
63 {
64 u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 |
65 kvm_pmu_event_mask(kvm);
66
67 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL2, IMP))
68 mask |= ARMV8_PMU_INCLUDE_EL2;
69
70 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP))
71 mask |= ARMV8_PMU_EXCLUDE_NS_EL0 |
72 ARMV8_PMU_EXCLUDE_NS_EL1 |
73 ARMV8_PMU_EXCLUDE_EL3;
74
75 return mask;
76 }
77
78 /**
79 * kvm_pmc_is_64bit - determine if counter is 64bit
80 * @pmc: counter context
81 */
kvm_pmc_is_64bit(struct kvm_pmc * pmc)82 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
83 {
84 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
85
86 return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
87 kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5));
88 }
89
kvm_pmc_has_64bit_overflow(struct kvm_pmc * pmc)90 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
91 {
92 u64 val = kvm_vcpu_read_pmcr(kvm_pmc_to_vcpu(pmc));
93
94 return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
95 (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
96 }
97
kvm_pmu_counter_can_chain(struct kvm_pmc * pmc)98 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc)
99 {
100 return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX &&
101 !kvm_pmc_has_64bit_overflow(pmc));
102 }
103
counter_index_to_reg(u64 idx)104 static u32 counter_index_to_reg(u64 idx)
105 {
106 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx;
107 }
108
counter_index_to_evtreg(u64 idx)109 static u32 counter_index_to_evtreg(u64 idx)
110 {
111 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
112 }
113
kvm_pmu_get_pmc_value(struct kvm_pmc * pmc)114 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
115 {
116 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
117 u64 counter, reg, enabled, running;
118
119 reg = counter_index_to_reg(pmc->idx);
120 counter = __vcpu_sys_reg(vcpu, reg);
121
122 /*
123 * The real counter value is equal to the value of counter register plus
124 * the value perf event counts.
125 */
126 if (pmc->perf_event)
127 counter += perf_event_read_value(pmc->perf_event, &enabled,
128 &running);
129
130 if (!kvm_pmc_is_64bit(pmc))
131 counter = lower_32_bits(counter);
132
133 return counter;
134 }
135
136 /**
137 * kvm_pmu_get_counter_value - get PMU counter value
138 * @vcpu: The vcpu pointer
139 * @select_idx: The counter index
140 */
kvm_pmu_get_counter_value(struct kvm_vcpu * vcpu,u64 select_idx)141 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
142 {
143 if (!kvm_vcpu_has_pmu(vcpu))
144 return 0;
145
146 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
147 }
148
kvm_pmu_set_pmc_value(struct kvm_pmc * pmc,u64 val,bool force)149 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
150 {
151 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
152 u64 reg;
153
154 kvm_pmu_release_perf_event(pmc);
155
156 reg = counter_index_to_reg(pmc->idx);
157
158 if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX &&
159 !force) {
160 /*
161 * Even with PMUv3p5, AArch32 cannot write to the top
162 * 32bit of the counters. The only possible course of
163 * action is to use PMCR.P, which will reset them to
164 * 0 (the only use of the 'force' parameter).
165 */
166 val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32);
167 val |= lower_32_bits(val);
168 }
169
170 __vcpu_sys_reg(vcpu, reg) = val;
171
172 /* Recreate the perf event to reflect the updated sample_period */
173 kvm_pmu_create_perf_event(pmc);
174 }
175
176 /**
177 * kvm_pmu_set_counter_value - set PMU counter value
178 * @vcpu: The vcpu pointer
179 * @select_idx: The counter index
180 * @val: The counter value
181 */
kvm_pmu_set_counter_value(struct kvm_vcpu * vcpu,u64 select_idx,u64 val)182 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
183 {
184 if (!kvm_vcpu_has_pmu(vcpu))
185 return;
186
187 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false);
188 }
189
190 /**
191 * kvm_pmu_release_perf_event - remove the perf event
192 * @pmc: The PMU counter pointer
193 */
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)194 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
195 {
196 if (pmc->perf_event) {
197 perf_event_disable(pmc->perf_event);
198 perf_event_release_kernel(pmc->perf_event);
199 pmc->perf_event = NULL;
200 }
201 }
202
203 /**
204 * kvm_pmu_stop_counter - stop PMU counter
205 * @pmc: The PMU counter pointer
206 *
207 * If this counter has been configured to monitor some event, release it here.
208 */
kvm_pmu_stop_counter(struct kvm_pmc * pmc)209 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
210 {
211 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
212 u64 reg, val;
213
214 if (!pmc->perf_event)
215 return;
216
217 val = kvm_pmu_get_pmc_value(pmc);
218
219 reg = counter_index_to_reg(pmc->idx);
220
221 __vcpu_sys_reg(vcpu, reg) = val;
222
223 kvm_pmu_release_perf_event(pmc);
224 }
225
226 /**
227 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
228 * @vcpu: The vcpu pointer
229 *
230 */
kvm_pmu_vcpu_init(struct kvm_vcpu * vcpu)231 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
232 {
233 int i;
234 struct kvm_pmu *pmu = &vcpu->arch.pmu;
235
236 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++)
237 pmu->pmc[i].idx = i;
238 }
239
240 /**
241 * kvm_pmu_vcpu_reset - reset pmu state for cpu
242 * @vcpu: The vcpu pointer
243 *
244 */
kvm_pmu_vcpu_reset(struct kvm_vcpu * vcpu)245 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
246 {
247 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
248 int i;
249
250 for_each_set_bit(i, &mask, 32)
251 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i));
252 }
253
254 /**
255 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
256 * @vcpu: The vcpu pointer
257 *
258 */
kvm_pmu_vcpu_destroy(struct kvm_vcpu * vcpu)259 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
260 {
261 int i;
262
263 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++)
264 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
265 irq_work_sync(&vcpu->arch.pmu.overflow_work);
266 }
267
kvm_pmu_valid_counter_mask(struct kvm_vcpu * vcpu)268 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
269 {
270 u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu));
271
272 if (val == 0)
273 return BIT(ARMV8_PMU_CYCLE_IDX);
274 else
275 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
276 }
277
278 /**
279 * kvm_pmu_enable_counter_mask - enable selected PMU counters
280 * @vcpu: The vcpu pointer
281 * @val: the value guest writes to PMCNTENSET register
282 *
283 * Call perf_event_enable to start counting the perf event
284 */
kvm_pmu_enable_counter_mask(struct kvm_vcpu * vcpu,u64 val)285 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
286 {
287 int i;
288 if (!kvm_vcpu_has_pmu(vcpu))
289 return;
290
291 if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val)
292 return;
293
294 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
295 struct kvm_pmc *pmc;
296
297 if (!(val & BIT(i)))
298 continue;
299
300 pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
301
302 if (!pmc->perf_event) {
303 kvm_pmu_create_perf_event(pmc);
304 } else {
305 perf_event_enable(pmc->perf_event);
306 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
307 kvm_debug("fail to enable perf event\n");
308 }
309 }
310 }
311
312 /**
313 * kvm_pmu_disable_counter_mask - disable selected PMU counters
314 * @vcpu: The vcpu pointer
315 * @val: the value guest writes to PMCNTENCLR register
316 *
317 * Call perf_event_disable to stop counting the perf event
318 */
kvm_pmu_disable_counter_mask(struct kvm_vcpu * vcpu,u64 val)319 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
320 {
321 int i;
322
323 if (!kvm_vcpu_has_pmu(vcpu) || !val)
324 return;
325
326 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
327 struct kvm_pmc *pmc;
328
329 if (!(val & BIT(i)))
330 continue;
331
332 pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
333
334 if (pmc->perf_event)
335 perf_event_disable(pmc->perf_event);
336 }
337 }
338
kvm_pmu_overflow_status(struct kvm_vcpu * vcpu)339 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
340 {
341 u64 reg = 0;
342
343 if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) {
344 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
345 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
346 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
347 }
348
349 return reg;
350 }
351
kvm_pmu_update_state(struct kvm_vcpu * vcpu)352 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
353 {
354 struct kvm_pmu *pmu = &vcpu->arch.pmu;
355 bool overflow;
356
357 if (!kvm_vcpu_has_pmu(vcpu))
358 return;
359
360 overflow = !!kvm_pmu_overflow_status(vcpu);
361 if (pmu->irq_level == overflow)
362 return;
363
364 pmu->irq_level = overflow;
365
366 if (likely(irqchip_in_kernel(vcpu->kvm))) {
367 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
368 pmu->irq_num, overflow, pmu);
369 WARN_ON(ret);
370 }
371 }
372
kvm_pmu_should_notify_user(struct kvm_vcpu * vcpu)373 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
374 {
375 struct kvm_pmu *pmu = &vcpu->arch.pmu;
376 struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
377 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
378
379 if (likely(irqchip_in_kernel(vcpu->kvm)))
380 return false;
381
382 return pmu->irq_level != run_level;
383 }
384
385 /*
386 * Reflect the PMU overflow interrupt output level into the kvm_run structure
387 */
kvm_pmu_update_run(struct kvm_vcpu * vcpu)388 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
389 {
390 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
391
392 /* Populate the timer bitmap for user space */
393 regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
394 if (vcpu->arch.pmu.irq_level)
395 regs->device_irq_level |= KVM_ARM_DEV_PMU;
396 }
397
398 /**
399 * kvm_pmu_flush_hwstate - flush pmu state to cpu
400 * @vcpu: The vcpu pointer
401 *
402 * Check if the PMU has overflowed while we were running in the host, and inject
403 * an interrupt if that was the case.
404 */
kvm_pmu_flush_hwstate(struct kvm_vcpu * vcpu)405 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
406 {
407 kvm_pmu_update_state(vcpu);
408 }
409
410 /**
411 * kvm_pmu_sync_hwstate - sync pmu state from cpu
412 * @vcpu: The vcpu pointer
413 *
414 * Check if the PMU has overflowed while we were running in the guest, and
415 * inject an interrupt if that was the case.
416 */
kvm_pmu_sync_hwstate(struct kvm_vcpu * vcpu)417 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
418 {
419 kvm_pmu_update_state(vcpu);
420 }
421
422 /*
423 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
424 * to the event.
425 * This is why we need a callback to do it once outside of the NMI context.
426 */
kvm_pmu_perf_overflow_notify_vcpu(struct irq_work * work)427 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
428 {
429 struct kvm_vcpu *vcpu;
430
431 vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work);
432 kvm_vcpu_kick(vcpu);
433 }
434
435 /*
436 * Perform an increment on any of the counters described in @mask,
437 * generating the overflow if required, and propagate it as a chained
438 * event if possible.
439 */
kvm_pmu_counter_increment(struct kvm_vcpu * vcpu,unsigned long mask,u32 event)440 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
441 unsigned long mask, u32 event)
442 {
443 int i;
444
445 if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E))
446 return;
447
448 /* Weed out disabled counters */
449 mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
450
451 for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
452 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
453 u64 type, reg;
454
455 /* Filter on event type */
456 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i));
457 type &= kvm_pmu_event_mask(vcpu->kvm);
458 if (type != event)
459 continue;
460
461 /* Increment this counter */
462 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
463 if (!kvm_pmc_is_64bit(pmc))
464 reg = lower_32_bits(reg);
465 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
466
467 /* No overflow? move on */
468 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
469 continue;
470
471 /* Mark overflow */
472 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
473
474 if (kvm_pmu_counter_can_chain(pmc))
475 kvm_pmu_counter_increment(vcpu, BIT(i + 1),
476 ARMV8_PMUV3_PERFCTR_CHAIN);
477 }
478 }
479
480 /* Compute the sample period for a given counter value */
compute_period(struct kvm_pmc * pmc,u64 counter)481 static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
482 {
483 u64 val;
484
485 if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc))
486 val = (-counter) & GENMASK(63, 0);
487 else
488 val = (-counter) & GENMASK(31, 0);
489
490 return val;
491 }
492
493 /*
494 * When the perf event overflows, set the overflow status and inform the vcpu.
495 */
kvm_pmu_perf_overflow(struct perf_event * perf_event,struct perf_sample_data * data,struct pt_regs * regs)496 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
497 struct perf_sample_data *data,
498 struct pt_regs *regs)
499 {
500 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
501 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
502 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
503 int idx = pmc->idx;
504 u64 period;
505
506 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
507
508 /*
509 * Reset the sample period to the architectural limit,
510 * i.e. the point where the counter overflows.
511 */
512 period = compute_period(pmc, local64_read(&perf_event->count));
513
514 local64_set(&perf_event->hw.period_left, 0);
515 perf_event->attr.sample_period = period;
516 perf_event->hw.sample_period = period;
517
518 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
519
520 if (kvm_pmu_counter_can_chain(pmc))
521 kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
522 ARMV8_PMUV3_PERFCTR_CHAIN);
523
524 if (kvm_pmu_overflow_status(vcpu)) {
525 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
526
527 if (!in_nmi())
528 kvm_vcpu_kick(vcpu);
529 else
530 irq_work_queue(&vcpu->arch.pmu.overflow_work);
531 }
532
533 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
534 }
535
536 /**
537 * kvm_pmu_software_increment - do software increment
538 * @vcpu: The vcpu pointer
539 * @val: the value guest writes to PMSWINC register
540 */
kvm_pmu_software_increment(struct kvm_vcpu * vcpu,u64 val)541 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
542 {
543 kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
544 }
545
546 /**
547 * kvm_pmu_handle_pmcr - handle PMCR register
548 * @vcpu: The vcpu pointer
549 * @val: the value guest writes to PMCR register
550 */
kvm_pmu_handle_pmcr(struct kvm_vcpu * vcpu,u64 val)551 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
552 {
553 int i;
554
555 if (!kvm_vcpu_has_pmu(vcpu))
556 return;
557
558 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
559 if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
560 val &= ~ARMV8_PMU_PMCR_LP;
561
562 /* The reset bits don't indicate any state, and shouldn't be saved. */
563 __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
564
565 if (val & ARMV8_PMU_PMCR_E) {
566 kvm_pmu_enable_counter_mask(vcpu,
567 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
568 } else {
569 kvm_pmu_disable_counter_mask(vcpu,
570 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
571 }
572
573 if (val & ARMV8_PMU_PMCR_C)
574 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
575
576 if (val & ARMV8_PMU_PMCR_P) {
577 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
578 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
579 for_each_set_bit(i, &mask, 32)
580 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
581 }
582 kvm_vcpu_pmu_restore_guest(vcpu);
583 }
584
kvm_pmu_counter_is_enabled(struct kvm_pmc * pmc)585 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
586 {
587 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
588 return (kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) &&
589 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
590 }
591
592 /**
593 * kvm_pmu_create_perf_event - create a perf event for a counter
594 * @pmc: Counter context
595 */
kvm_pmu_create_perf_event(struct kvm_pmc * pmc)596 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
597 {
598 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
599 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
600 struct perf_event *event;
601 struct perf_event_attr attr;
602 u64 eventsel, reg, data;
603 bool p, u, nsk, nsu;
604
605 reg = counter_index_to_evtreg(pmc->idx);
606 data = __vcpu_sys_reg(vcpu, reg);
607
608 kvm_pmu_stop_counter(pmc);
609 if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
610 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
611 else
612 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
613
614 /*
615 * Neither SW increment nor chained events need to be backed
616 * by a perf event.
617 */
618 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
619 eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
620 return;
621
622 /*
623 * If we have a filter in place and that the event isn't allowed, do
624 * not install a perf event either.
625 */
626 if (vcpu->kvm->arch.pmu_filter &&
627 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
628 return;
629
630 p = data & ARMV8_PMU_EXCLUDE_EL1;
631 u = data & ARMV8_PMU_EXCLUDE_EL0;
632 nsk = data & ARMV8_PMU_EXCLUDE_NS_EL1;
633 nsu = data & ARMV8_PMU_EXCLUDE_NS_EL0;
634
635 memset(&attr, 0, sizeof(struct perf_event_attr));
636 attr.type = arm_pmu->pmu.type;
637 attr.size = sizeof(attr);
638 attr.pinned = 1;
639 attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
640 attr.exclude_user = (u != nsu);
641 attr.exclude_kernel = (p != nsk);
642 attr.exclude_hv = 1; /* Don't count EL2 events */
643 attr.exclude_host = 1; /* Don't count host events */
644 attr.config = eventsel;
645
646 /*
647 * If counting with a 64bit counter, advertise it to the perf
648 * code, carefully dealing with the initial sample period
649 * which also depends on the overflow.
650 */
651 if (kvm_pmc_is_64bit(pmc))
652 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
653
654 attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc));
655
656 event = perf_event_create_kernel_counter(&attr, -1, current,
657 kvm_pmu_perf_overflow, pmc);
658
659 if (IS_ERR(event)) {
660 pr_err_once("kvm: pmu event creation failed %ld\n",
661 PTR_ERR(event));
662 return;
663 }
664
665 pmc->perf_event = event;
666 }
667
668 /**
669 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
670 * @vcpu: The vcpu pointer
671 * @data: The data guest writes to PMXEVTYPER_EL0
672 * @select_idx: The number of selected counter
673 *
674 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
675 * event with given hardware event number. Here we call perf_event API to
676 * emulate this action and create a kernel perf event for it.
677 */
kvm_pmu_set_counter_event_type(struct kvm_vcpu * vcpu,u64 data,u64 select_idx)678 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
679 u64 select_idx)
680 {
681 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
682 u64 reg;
683
684 if (!kvm_vcpu_has_pmu(vcpu))
685 return;
686
687 reg = counter_index_to_evtreg(pmc->idx);
688 __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm);
689
690 kvm_pmu_create_perf_event(pmc);
691 }
692
kvm_host_pmu_init(struct arm_pmu * pmu)693 void kvm_host_pmu_init(struct arm_pmu *pmu)
694 {
695 struct arm_pmu_entry *entry;
696
697 /*
698 * Check the sanitised PMU version for the system, as KVM does not
699 * support implementations where PMUv3 exists on a subset of CPUs.
700 */
701 if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
702 return;
703
704 mutex_lock(&arm_pmus_lock);
705
706 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
707 if (!entry)
708 goto out_unlock;
709
710 entry->arm_pmu = pmu;
711 list_add_tail(&entry->entry, &arm_pmus);
712
713 if (list_is_singular(&arm_pmus))
714 static_branch_enable(&kvm_arm_pmu_available);
715
716 out_unlock:
717 mutex_unlock(&arm_pmus_lock);
718 }
719
kvm_pmu_probe_armpmu(void)720 static struct arm_pmu *kvm_pmu_probe_armpmu(void)
721 {
722 struct arm_pmu *tmp, *pmu = NULL;
723 struct arm_pmu_entry *entry;
724 int cpu;
725
726 mutex_lock(&arm_pmus_lock);
727
728 /*
729 * It is safe to use a stale cpu to iterate the list of PMUs so long as
730 * the same value is used for the entirety of the loop. Given this, and
731 * the fact that no percpu data is used for the lookup there is no need
732 * to disable preemption.
733 *
734 * It is still necessary to get a valid cpu, though, to probe for the
735 * default PMU instance as userspace is not required to specify a PMU
736 * type. In order to uphold the preexisting behavior KVM selects the
737 * PMU instance for the core during vcpu init. A dependent use
738 * case would be a user with disdain of all things big.LITTLE that
739 * affines the VMM to a particular cluster of cores.
740 *
741 * In any case, userspace should just do the sane thing and use the UAPI
742 * to select a PMU type directly. But, be wary of the baggage being
743 * carried here.
744 */
745 cpu = raw_smp_processor_id();
746 list_for_each_entry(entry, &arm_pmus, entry) {
747 tmp = entry->arm_pmu;
748
749 if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
750 pmu = tmp;
751 break;
752 }
753 }
754
755 mutex_unlock(&arm_pmus_lock);
756
757 return pmu;
758 }
759
kvm_pmu_get_pmceid(struct kvm_vcpu * vcpu,bool pmceid1)760 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
761 {
762 unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
763 u64 val, mask = 0;
764 int base, i, nr_events;
765
766 if (!kvm_vcpu_has_pmu(vcpu))
767 return 0;
768
769 if (!pmceid1) {
770 val = read_sysreg(pmceid0_el0);
771 /* always support CHAIN */
772 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
773 base = 0;
774 } else {
775 val = read_sysreg(pmceid1_el0);
776 /*
777 * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
778 * as RAZ
779 */
780 val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
781 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
782 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
783 base = 32;
784 }
785
786 if (!bmap)
787 return val;
788
789 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
790
791 for (i = 0; i < 32; i += 8) {
792 u64 byte;
793
794 byte = bitmap_get_value8(bmap, base + i);
795 mask |= byte << i;
796 if (nr_events >= (0x4000 + base + 32)) {
797 byte = bitmap_get_value8(bmap, 0x4000 + base + i);
798 mask |= byte << (32 + i);
799 }
800 }
801
802 return val & mask;
803 }
804
kvm_vcpu_reload_pmu(struct kvm_vcpu * vcpu)805 void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
806 {
807 u64 mask = kvm_pmu_valid_counter_mask(vcpu);
808
809 kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu));
810
811 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask;
812 __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask;
813 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask;
814 }
815
kvm_arm_pmu_v3_enable(struct kvm_vcpu * vcpu)816 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
817 {
818 if (!kvm_vcpu_has_pmu(vcpu))
819 return 0;
820
821 if (!vcpu->arch.pmu.created)
822 return -EINVAL;
823
824 /*
825 * A valid interrupt configuration for the PMU is either to have a
826 * properly configured interrupt number and using an in-kernel
827 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
828 */
829 if (irqchip_in_kernel(vcpu->kvm)) {
830 int irq = vcpu->arch.pmu.irq_num;
831 /*
832 * If we are using an in-kernel vgic, at this point we know
833 * the vgic will be initialized, so we can check the PMU irq
834 * number against the dimensions of the vgic and make sure
835 * it's valid.
836 */
837 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
838 return -EINVAL;
839 } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
840 return -EINVAL;
841 }
842
843 /* One-off reload of the PMU on first run */
844 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
845
846 return 0;
847 }
848
kvm_arm_pmu_v3_init(struct kvm_vcpu * vcpu)849 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
850 {
851 if (irqchip_in_kernel(vcpu->kvm)) {
852 int ret;
853
854 /*
855 * If using the PMU with an in-kernel virtual GIC
856 * implementation, we require the GIC to be already
857 * initialized when initializing the PMU.
858 */
859 if (!vgic_initialized(vcpu->kvm))
860 return -ENODEV;
861
862 if (!kvm_arm_pmu_irq_initialized(vcpu))
863 return -ENXIO;
864
865 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
866 &vcpu->arch.pmu);
867 if (ret)
868 return ret;
869 }
870
871 init_irq_work(&vcpu->arch.pmu.overflow_work,
872 kvm_pmu_perf_overflow_notify_vcpu);
873
874 vcpu->arch.pmu.created = true;
875 return 0;
876 }
877
878 /*
879 * For one VM the interrupt type must be same for each vcpu.
880 * As a PPI, the interrupt number is the same for all vcpus,
881 * while as an SPI it must be a separate number per vcpu.
882 */
pmu_irq_is_valid(struct kvm * kvm,int irq)883 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
884 {
885 unsigned long i;
886 struct kvm_vcpu *vcpu;
887
888 kvm_for_each_vcpu(i, vcpu, kvm) {
889 if (!kvm_arm_pmu_irq_initialized(vcpu))
890 continue;
891
892 if (irq_is_ppi(irq)) {
893 if (vcpu->arch.pmu.irq_num != irq)
894 return false;
895 } else {
896 if (vcpu->arch.pmu.irq_num == irq)
897 return false;
898 }
899 }
900
901 return true;
902 }
903
904 /**
905 * kvm_arm_pmu_get_max_counters - Return the max number of PMU counters.
906 * @kvm: The kvm pointer
907 */
kvm_arm_pmu_get_max_counters(struct kvm * kvm)908 u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm)
909 {
910 struct arm_pmu *arm_pmu = kvm->arch.arm_pmu;
911
912 /*
913 * The arm_pmu->cntr_mask considers the fixed counter(s) as well.
914 * Ignore those and return only the general-purpose counters.
915 */
916 return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS);
917 }
918
kvm_arm_set_pmu(struct kvm * kvm,struct arm_pmu * arm_pmu)919 static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
920 {
921 lockdep_assert_held(&kvm->arch.config_lock);
922
923 kvm->arch.arm_pmu = arm_pmu;
924 kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm);
925 }
926
927 /**
928 * kvm_arm_set_default_pmu - No PMU set, get the default one.
929 * @kvm: The kvm pointer
930 *
931 * The observant among you will notice that the supported_cpus
932 * mask does not get updated for the default PMU even though it
933 * is quite possible the selected instance supports only a
934 * subset of cores in the system. This is intentional, and
935 * upholds the preexisting behavior on heterogeneous systems
936 * where vCPUs can be scheduled on any core but the guest
937 * counters could stop working.
938 */
kvm_arm_set_default_pmu(struct kvm * kvm)939 int kvm_arm_set_default_pmu(struct kvm *kvm)
940 {
941 struct arm_pmu *arm_pmu = kvm_pmu_probe_armpmu();
942
943 if (!arm_pmu)
944 return -ENODEV;
945
946 kvm_arm_set_pmu(kvm, arm_pmu);
947 return 0;
948 }
949
kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu * vcpu,int pmu_id)950 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
951 {
952 struct kvm *kvm = vcpu->kvm;
953 struct arm_pmu_entry *entry;
954 struct arm_pmu *arm_pmu;
955 int ret = -ENXIO;
956
957 lockdep_assert_held(&kvm->arch.config_lock);
958 mutex_lock(&arm_pmus_lock);
959
960 list_for_each_entry(entry, &arm_pmus, entry) {
961 arm_pmu = entry->arm_pmu;
962 if (arm_pmu->pmu.type == pmu_id) {
963 if (kvm_vm_has_ran_once(kvm) ||
964 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
965 ret = -EBUSY;
966 break;
967 }
968
969 kvm_arm_set_pmu(kvm, arm_pmu);
970 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
971 ret = 0;
972 break;
973 }
974 }
975
976 mutex_unlock(&arm_pmus_lock);
977 return ret;
978 }
979
kvm_arm_pmu_v3_set_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)980 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
981 {
982 struct kvm *kvm = vcpu->kvm;
983
984 lockdep_assert_held(&kvm->arch.config_lock);
985
986 if (!kvm_vcpu_has_pmu(vcpu))
987 return -ENODEV;
988
989 if (vcpu->arch.pmu.created)
990 return -EBUSY;
991
992 switch (attr->attr) {
993 case KVM_ARM_VCPU_PMU_V3_IRQ: {
994 int __user *uaddr = (int __user *)(long)attr->addr;
995 int irq;
996
997 if (!irqchip_in_kernel(kvm))
998 return -EINVAL;
999
1000 if (get_user(irq, uaddr))
1001 return -EFAULT;
1002
1003 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
1004 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
1005 return -EINVAL;
1006
1007 if (!pmu_irq_is_valid(kvm, irq))
1008 return -EINVAL;
1009
1010 if (kvm_arm_pmu_irq_initialized(vcpu))
1011 return -EBUSY;
1012
1013 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
1014 vcpu->arch.pmu.irq_num = irq;
1015 return 0;
1016 }
1017 case KVM_ARM_VCPU_PMU_V3_FILTER: {
1018 u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
1019 struct kvm_pmu_event_filter __user *uaddr;
1020 struct kvm_pmu_event_filter filter;
1021 int nr_events;
1022
1023 /*
1024 * Allow userspace to specify an event filter for the entire
1025 * event range supported by PMUVer of the hardware, rather
1026 * than the guest's PMUVer for KVM backward compatibility.
1027 */
1028 nr_events = __kvm_pmu_event_mask(pmuver) + 1;
1029
1030 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
1031
1032 if (copy_from_user(&filter, uaddr, sizeof(filter)))
1033 return -EFAULT;
1034
1035 if (((u32)filter.base_event + filter.nevents) > nr_events ||
1036 (filter.action != KVM_PMU_EVENT_ALLOW &&
1037 filter.action != KVM_PMU_EVENT_DENY))
1038 return -EINVAL;
1039
1040 if (kvm_vm_has_ran_once(kvm))
1041 return -EBUSY;
1042
1043 if (!kvm->arch.pmu_filter) {
1044 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
1045 if (!kvm->arch.pmu_filter)
1046 return -ENOMEM;
1047
1048 /*
1049 * The default depends on the first applied filter.
1050 * If it allows events, the default is to deny.
1051 * Conversely, if the first filter denies a set of
1052 * events, the default is to allow.
1053 */
1054 if (filter.action == KVM_PMU_EVENT_ALLOW)
1055 bitmap_zero(kvm->arch.pmu_filter, nr_events);
1056 else
1057 bitmap_fill(kvm->arch.pmu_filter, nr_events);
1058 }
1059
1060 if (filter.action == KVM_PMU_EVENT_ALLOW)
1061 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1062 else
1063 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1064
1065 return 0;
1066 }
1067 case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
1068 int __user *uaddr = (int __user *)(long)attr->addr;
1069 int pmu_id;
1070
1071 if (get_user(pmu_id, uaddr))
1072 return -EFAULT;
1073
1074 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
1075 }
1076 case KVM_ARM_VCPU_PMU_V3_INIT:
1077 return kvm_arm_pmu_v3_init(vcpu);
1078 }
1079
1080 return -ENXIO;
1081 }
1082
kvm_arm_pmu_v3_get_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1083 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1084 {
1085 switch (attr->attr) {
1086 case KVM_ARM_VCPU_PMU_V3_IRQ: {
1087 int __user *uaddr = (int __user *)(long)attr->addr;
1088 int irq;
1089
1090 if (!irqchip_in_kernel(vcpu->kvm))
1091 return -EINVAL;
1092
1093 if (!kvm_vcpu_has_pmu(vcpu))
1094 return -ENODEV;
1095
1096 if (!kvm_arm_pmu_irq_initialized(vcpu))
1097 return -ENXIO;
1098
1099 irq = vcpu->arch.pmu.irq_num;
1100 return put_user(irq, uaddr);
1101 }
1102 }
1103
1104 return -ENXIO;
1105 }
1106
kvm_arm_pmu_v3_has_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1107 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1108 {
1109 switch (attr->attr) {
1110 case KVM_ARM_VCPU_PMU_V3_IRQ:
1111 case KVM_ARM_VCPU_PMU_V3_INIT:
1112 case KVM_ARM_VCPU_PMU_V3_FILTER:
1113 case KVM_ARM_VCPU_PMU_V3_SET_PMU:
1114 if (kvm_vcpu_has_pmu(vcpu))
1115 return 0;
1116 }
1117
1118 return -ENXIO;
1119 }
1120
kvm_arm_pmu_get_pmuver_limit(void)1121 u8 kvm_arm_pmu_get_pmuver_limit(void)
1122 {
1123 u64 tmp;
1124
1125 tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
1126 tmp = cpuid_feature_cap_perfmon_field(tmp,
1127 ID_AA64DFR0_EL1_PMUVer_SHIFT,
1128 ID_AA64DFR0_EL1_PMUVer_V3P5);
1129 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
1130 }
1131
1132 /**
1133 * kvm_vcpu_read_pmcr - Read PMCR_EL0 register for the vCPU
1134 * @vcpu: The vcpu pointer
1135 */
kvm_vcpu_read_pmcr(struct kvm_vcpu * vcpu)1136 u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
1137 {
1138 u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
1139
1140 return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N);
1141 }
1142