1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * vmx_nested_tsc_scaling_test
4 *
5 * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 *
7 * This test case verifies that nested TSC scaling behaves as expected when
8 * both L1 and L2 are scaled using different ratios. For this test we scale
9 * L1 down and scale L2 up.
10 */
11
12 #include <time.h>
13
14 #include "kvm_util.h"
15 #include "vmx.h"
16 #include "kselftest.h"
17
18 /* L2 is scaled up (from L1's perspective) by this factor */
19 #define L2_SCALE_FACTOR 4ULL
20
21 #define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
22 #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
23
24 #define L2_GUEST_STACK_SIZE 64
25
26 enum { USLEEP, UCHECK_L1, UCHECK_L2 };
27 #define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
28 #define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
29
30
31 /*
32 * This function checks whether the "actual" TSC frequency of a guest matches
33 * its expected frequency. In order to account for delays in taking the TSC
34 * measurements, a difference of 1% between the actual and the expected value
35 * is tolerated.
36 */
compare_tsc_freq(uint64_t actual,uint64_t expected)37 static void compare_tsc_freq(uint64_t actual, uint64_t expected)
38 {
39 uint64_t tolerance, thresh_low, thresh_high;
40
41 tolerance = expected / 100;
42 thresh_low = expected - tolerance;
43 thresh_high = expected + tolerance;
44
45 TEST_ASSERT(thresh_low < actual,
46 "TSC freq is expected to be between %"PRIu64" and %"PRIu64
47 " but it actually is %"PRIu64,
48 thresh_low, thresh_high, actual);
49 TEST_ASSERT(thresh_high > actual,
50 "TSC freq is expected to be between %"PRIu64" and %"PRIu64
51 " but it actually is %"PRIu64,
52 thresh_low, thresh_high, actual);
53 }
54
check_tsc_freq(int level)55 static void check_tsc_freq(int level)
56 {
57 uint64_t tsc_start, tsc_end, tsc_freq;
58
59 /*
60 * Reading the TSC twice with about a second's difference should give
61 * us an approximation of the TSC frequency from the guest's
62 * perspective. Now, this won't be completely accurate, but it should
63 * be good enough for the purposes of this test.
64 */
65 tsc_start = rdmsr(MSR_IA32_TSC);
66 GUEST_SLEEP(1);
67 tsc_end = rdmsr(MSR_IA32_TSC);
68
69 tsc_freq = tsc_end - tsc_start;
70
71 GUEST_CHECK(level, tsc_freq);
72 }
73
l2_guest_code(void)74 static void l2_guest_code(void)
75 {
76 check_tsc_freq(UCHECK_L2);
77
78 /* exit to L1 */
79 __asm__ __volatile__("vmcall");
80 }
81
l1_guest_code(struct vmx_pages * vmx_pages)82 static void l1_guest_code(struct vmx_pages *vmx_pages)
83 {
84 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
85 uint32_t control;
86
87 /* check that L1's frequency looks alright before launching L2 */
88 check_tsc_freq(UCHECK_L1);
89
90 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
91 GUEST_ASSERT(load_vmcs(vmx_pages));
92
93 /* prepare the VMCS for L2 execution */
94 prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
95
96 /* enable TSC offsetting and TSC scaling for L2 */
97 control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
98 control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
99 vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
100
101 control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
102 control |= SECONDARY_EXEC_TSC_SCALING;
103 vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
104
105 vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
106 vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
107 vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
108
109 /* launch L2 */
110 GUEST_ASSERT(!vmlaunch());
111 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
112
113 /* check that L1's frequency still looks good */
114 check_tsc_freq(UCHECK_L1);
115
116 GUEST_DONE();
117 }
118
main(int argc,char * argv[])119 int main(int argc, char *argv[])
120 {
121 struct kvm_vcpu *vcpu;
122 struct kvm_vm *vm;
123 vm_vaddr_t vmx_pages_gva;
124
125 uint64_t tsc_start, tsc_end;
126 uint64_t tsc_khz;
127 uint64_t l1_scale_factor;
128 uint64_t l0_tsc_freq = 0;
129 uint64_t l1_tsc_freq = 0;
130 uint64_t l2_tsc_freq = 0;
131
132 TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
133 TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
134 TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
135
136 /*
137 * We set L1's scale factor to be a random number from 2 to 10.
138 * Ideally we would do the same for L2's factor but that one is
139 * referenced by both main() and l1_guest_code() and using a global
140 * variable does not work.
141 */
142 srand(time(NULL));
143 l1_scale_factor = (rand() % 9) + 2;
144 printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
145 printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
146
147 tsc_start = rdtsc();
148 sleep(1);
149 tsc_end = rdtsc();
150
151 l0_tsc_freq = tsc_end - tsc_start;
152 printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
153
154 vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
155 vcpu_alloc_vmx(vm, &vmx_pages_gva);
156 vcpu_args_set(vcpu, 1, vmx_pages_gva);
157
158 tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
159 TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
160
161 /* scale down L1's TSC frequency */
162 vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
163
164 for (;;) {
165 struct ucall uc;
166
167 vcpu_run(vcpu);
168 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
169
170 switch (get_ucall(vcpu, &uc)) {
171 case UCALL_ABORT:
172 REPORT_GUEST_ASSERT(uc);
173 case UCALL_SYNC:
174 switch (uc.args[0]) {
175 case USLEEP:
176 sleep(uc.args[1]);
177 break;
178 case UCHECK_L1:
179 l1_tsc_freq = uc.args[1];
180 printf("L1's TSC frequency is around: %"PRIu64
181 "\n", l1_tsc_freq);
182
183 compare_tsc_freq(l1_tsc_freq,
184 l0_tsc_freq / l1_scale_factor);
185 break;
186 case UCHECK_L2:
187 l2_tsc_freq = uc.args[1];
188 printf("L2's TSC frequency is around: %"PRIu64
189 "\n", l2_tsc_freq);
190
191 compare_tsc_freq(l2_tsc_freq,
192 l1_tsc_freq * L2_SCALE_FACTOR);
193 break;
194 }
195 break;
196 case UCALL_DONE:
197 goto done;
198 default:
199 TEST_FAIL("Unknown ucall %lu", uc.cmd);
200 }
201 }
202
203 done:
204 kvm_vm_free(vm);
205 return 0;
206 }
207