Lines Matching +full:sync +full:- +full:2

1 // SPDX-License-Identifier: GPL-2.0
3 * A memslot-related performance benchmark.
36 #define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE)
45 #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE)
51 * 2 MiB chunk size like a typical huge page
68 * architecture slots memory-per-slot memory-on-last-slot
69 * --------------------------------------------------------------
70 * x86-4KB 32763 16KB 160KB
71 * arm64-4KB 32766 16KB 112KB
72 * arm64-16KB 32766 16KB 112KB
73 * arm64-64KB 8192 64KB 128KB
105 * Technically, we need also for the atomic bool to be address-free, which
109 * all KVM-supported platforms.
111 static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
129 TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit"); in check_mmio_access()
130 TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); in check_mmio_access()
131 TEST_ASSERT(run->mmio.len == 8, in check_mmio_access()
132 "Unexpected exit mmio size = %u", run->mmio.len); in check_mmio_access()
133 TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min && in check_mmio_access()
134 run->mmio.phys_addr <= data->mmio_gpa_max, in check_mmio_access()
136 run->mmio.phys_addr); in check_mmio_access()
142 struct kvm_vcpu *vcpu = data->vcpu; in vcpu_worker()
143 struct kvm_run *run = vcpu->run; in vcpu_worker()
152 "Unexpected sync ucall, got %lx", in vcpu_worker()
157 if (run->exit_reason == KVM_EXIT_MMIO) in vcpu_worker()
183 ts.tv_sec += 2; in wait_for_vcpu()
193 uint32_t guest_page_size = data->vm->page_size; in vm_gpa2hva()
196 TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, in vm_gpa2hva()
198 gpa -= MEM_GPA; in vm_gpa2hva()
202 slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); in vm_gpa2hva()
203 slotoffs = gpage - (slot * data->pages_per_slot); in vm_gpa2hva()
208 if (slot == data->nslots - 1) in vm_gpa2hva()
209 slotpages = data->npages - slot * data->pages_per_slot; in vm_gpa2hva()
211 slotpages = data->pages_per_slot; in vm_gpa2hva()
215 *rempages = slotpages - slotoffs; in vm_gpa2hva()
218 base = data->hva_slots[slot]; in vm_gpa2hva()
224 uint32_t guest_page_size = data->vm->page_size; in vm_slot2gpa()
226 TEST_ASSERT(slot < data->nslots, "Too high slot number"); in vm_slot2gpa()
228 return MEM_GPA + slot * data->pages_per_slot * guest_page_size; in vm_slot2gpa()
238 data->vm = NULL; in alloc_vm()
239 data->vcpu = NULL; in alloc_vm()
240 data->hva_slots = NULL; in alloc_vm()
263 uint32_t guest_page_size = data->vm->page_size; in get_max_slots()
267 mempages = data->npages; in get_max_slots()
268 slots = data->nslots; in get_max_slots()
269 while (--slots > 1) { in get_max_slots()
291 struct sync_area *sync; in prepare_vm() local
297 data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); in prepare_vm()
298 TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size"); in prepare_vm()
300 data->npages = mempages; in prepare_vm()
301 TEST_ASSERT(data->npages > 1, "Can't test without any memory"); in prepare_vm()
302 data->nslots = nslots; in prepare_vm()
303 data->pages_per_slot = data->npages / data->nslots; in prepare_vm()
304 rempages = data->npages % data->nslots; in prepare_vm()
306 data->pages_per_slot, rempages)) { in prepare_vm()
311 data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); in prepare_vm()
312 TEST_ASSERT(data->hva_slots, "malloc() fail"); in prepare_vm()
315 data->nslots, data->pages_per_slot, rempages); in prepare_vm()
318 for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { in prepare_vm()
321 npages = data->pages_per_slot; in prepare_vm()
322 if (slot == data->nslots) in prepare_vm()
325 vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, in prepare_vm()
332 for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { in prepare_vm()
336 npages = data->pages_per_slot; in prepare_vm()
337 if (slot == data->nslots) in prepare_vm()
340 gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); in prepare_vm()
344 data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); in prepare_vm()
345 memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); in prepare_vm()
350 virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages); in prepare_vm()
352 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); in prepare_vm()
353 sync->guest_page_size = data->vm->page_size; in prepare_vm()
354 atomic_init(&sync->start_flag, false); in prepare_vm()
355 atomic_init(&sync->exit_flag, false); in prepare_vm()
356 atomic_init(&sync->sync_flag, false); in prepare_vm()
358 data->mmio_ok = false; in prepare_vm()
367 pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data); in launch_vm()
375 kvm_vm_free(data->vm); in free_vm()
376 free(data->hva_slots); in free_vm()
382 pthread_join(data->vcpu_thread, NULL); in wait_guest_exit()
385 static void let_guest_run(struct sync_area *sync) in let_guest_run() argument
387 atomic_store_explicit(&sync->start_flag, true, memory_order_release); in let_guest_run()
392 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_spin_until_start() local
394 while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire)) in guest_spin_until_start()
398 static void make_guest_exit(struct sync_area *sync) in make_guest_exit() argument
400 atomic_store_explicit(&sync->exit_flag, true, memory_order_release); in make_guest_exit()
405 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in _guest_should_exit() local
407 return atomic_load_explicit(&sync->exit_flag, memory_order_acquire); in _guest_should_exit()
418 static noinline void host_perform_sync(struct sync_area *sync) in host_perform_sync() argument
422 atomic_store_explicit(&sync->sync_flag, true, memory_order_release); in host_perform_sync()
423 while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) in host_perform_sync()
431 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_perform_sync() local
439 } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag, in guest_perform_sync()
449 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_code_test_memslot_move() local
450 uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); in guest_code_test_memslot_move()
451 uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); in guest_code_test_memslot_move()
465 * No host sync here since the MMIO exits are so expensive in guest_code_test_memslot_move()
478 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_code_test_memslot_map() local
479 uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); in guest_code_test_memslot_map()
489 ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; in guest_code_test_memslot_map()
496 for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; in guest_code_test_memslot_map()
510 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_code_test_memslot_unmap() local
521 * per host sync as otherwise the host will spend in guest_code_test_memslot_unmap()
533 ptr += MEM_TEST_UNMAP_SIZE / 2; in guest_code_test_memslot_unmap()
545 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_code_test_memslot_rw() local
546 uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); in guest_code_test_memslot_rw()
562 for (ptr = MEM_TEST_GPA + page_size / 2; in guest_code_test_memslot_rw()
578 struct sync_area *sync, in test_memslot_move_prepare() argument
581 uint32_t guest_page_size = data->vm->page_size; in test_memslot_move_prepare()
586 vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); in test_memslot_move_prepare()
589 movesrcgpa = vm_slot2gpa(data, data->nslots - 1); in test_memslot_move_prepare()
595 if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { in test_memslot_move_prepare()
601 movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1)); in test_memslot_move_prepare()
602 sync->move_area_ptr = (void *)movetestgpa; in test_memslot_move_prepare()
605 data->mmio_ok = true; in test_memslot_move_prepare()
606 data->mmio_gpa_min = movesrcgpa; in test_memslot_move_prepare()
607 data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1; in test_memslot_move_prepare()
614 struct sync_area *sync, in test_memslot_move_prepare_active() argument
617 return test_memslot_move_prepare(data, sync, maxslots, true); in test_memslot_move_prepare_active()
621 struct sync_area *sync, in test_memslot_move_prepare_inactive() argument
624 return test_memslot_move_prepare(data, sync, maxslots, false); in test_memslot_move_prepare_inactive()
627 static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) in test_memslot_move_loop() argument
631 movesrcgpa = vm_slot2gpa(data, data->nslots - 1); in test_memslot_move_loop()
632 vm_mem_region_move(data->vm, data->nslots - 1 + 1, in test_memslot_move_loop()
634 vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa); in test_memslot_move_loop()
641 uint32_t guest_page_size = data->vm->page_size; in test_memslot_do_unmap()
650 npages = min(npages, count - ctr); in test_memslot_do_unmap()
667 uint32_t guest_page_size = data->vm->page_size; in test_memslot_map_unmap_check()
680 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) in test_memslot_map_loop() argument
682 uint32_t guest_page_size = data->vm->page_size; in test_memslot_map_loop()
689 test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2); in test_memslot_map_loop()
698 host_perform_sync(sync); in test_memslot_map_loop()
700 test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1); in test_memslot_map_loop()
701 test_memslot_do_unmap(data, 0, guest_pages / 2); in test_memslot_map_loop()
713 host_perform_sync(sync); in test_memslot_map_loop()
714 test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); in test_memslot_map_loop()
715 test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2); in test_memslot_map_loop()
719 struct sync_area *sync, in test_memslot_unmap_loop_common() argument
722 uint32_t guest_page_size = data->vm->page_size; in test_memslot_unmap_loop_common()
733 host_perform_sync(sync); in test_memslot_unmap_loop_common()
735 for (ctr = 0; ctr < guest_pages / 2; ctr += chunk) in test_memslot_unmap_loop_common()
739 host_perform_sync(sync); in test_memslot_unmap_loop_common()
740 test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); in test_memslot_unmap_loop_common()
741 for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk) in test_memslot_unmap_loop_common()
746 struct sync_area *sync) in test_memslot_unmap_loop() argument
749 uint32_t guest_page_size = data->vm->page_size; in test_memslot_unmap_loop()
753 test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); in test_memslot_unmap_loop()
757 struct sync_area *sync) in test_memslot_unmap_loop_chunked() argument
759 uint32_t guest_page_size = data->vm->page_size; in test_memslot_unmap_loop_chunked()
762 test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); in test_memslot_unmap_loop_chunked()
765 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) in test_memslot_rw_loop() argument
768 uint32_t guest_page_size = data->vm->page_size; in test_memslot_rw_loop()
770 for (gptr = MEM_TEST_GPA + guest_page_size / 2; in test_memslot_rw_loop()
774 host_perform_sync(sync); in test_memslot_rw_loop()
787 host_perform_sync(sync); in test_memslot_rw_loop()
794 bool (*prepare)(struct vm_data *data, struct sync_area *sync,
796 void (*loop)(struct vm_data *data, struct sync_area *sync);
806 uint64_t mem_size = tdata->mem_size ? : MEM_SIZE; in test_execute()
808 struct sync_area *sync; in test_execute() local
813 if (!prepare_vm(data, nslots, maxslots, tdata->guest_code, in test_execute()
819 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); in test_execute()
820 if (tdata->prepare && in test_execute()
821 !tdata->prepare(data, sync, maxslots)) { in test_execute()
829 let_guest_run(sync); in test_execute()
833 if (guest_runtime->tv_sec >= maxtime) in test_execute()
836 tdata->loop(data, sync); in test_execute()
841 make_guest_exit(sync); in test_execute()
902 …pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r r… in help()
904 pr_info(" -h: print this help screen.\n"); in help()
905 pr_info(" -v: enable verbose mode (not for benchmarking).\n"); in help()
906 pr_info(" -d: enable extra debug checks.\n"); in help()
907 pr_info(" -q: Disable memslot zap quirk during memslot move.\n"); in help()
908 pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", in help()
909 targs->nslots); in help()
910 pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", in help()
911 targs->tfirst, NTESTS - 1); in help()
912 pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n", in help()
913 targs->tlast, NTESTS - 1); in help()
914 pr_info(" -l: specify the test length in seconds (currently: %i)\n", in help()
915 targs->seconds); in help()
916 pr_info(" -r: specify the number of runs per test (currently: %i)\n", in help()
917 targs->runs); in help()
943 (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 || in check_memory_sizes()
944 (MEM_TEST_MAP_SIZE / guest_page_size) % 2) { in check_memory_sizes()
952 (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) { in check_memory_sizes()
966 while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { in parse_args()
986 targs->nslots = atoi_paranoid(optarg); in parse_args()
987 if (targs->nslots <= 1 && targs->nslots != -1) { in parse_args()
988 pr_info("Slot count cap must be larger than 1 or -1 for no cap\n"); in parse_args()
993 targs->tfirst = atoi_non_negative("First test", optarg); in parse_args()
996 targs->tlast = atoi_non_negative("Last test", optarg); in parse_args()
997 if (targs->tlast >= NTESTS) { in parse_args()
998 pr_info("Last test to run has to be non-negative and less than %zu\n", in parse_args()
1004 targs->seconds = atoi_non_negative("Test length", optarg); in parse_args()
1007 targs->runs = atoi_positive("Runs per test", optarg); in parse_args()
1017 if (targs->tfirst > targs->tlast) { in parse_args()
1029 if (targs->nslots == -1) in parse_args()
1030 targs->nslots = max_mem_slots - 1; in parse_args()
1032 targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1; in parse_args()
1035 targs->nslots + 1); in parse_args()
1054 if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, in test_loop()
1070 pr_info("No full loops done - too short test time or system too loaded?\n"); in test_loop()
1087 if (!data->mem_size && in test_loop()
1088 (!rbestslottime->slottimens || in test_loop()
1089 result.slottimens < rbestslottime->slottimens)) in test_loop()
1091 if (!rbestruntime->runtimens || in test_loop()
1092 result.runtimens < rbestruntime->runtimens) in test_loop()
1102 .tlast = NTESTS - 1, in main()
1103 .nslots = -1, in main()
1111 return -1; in main()
1114 return -1; in main()
1125 data->name, targs.runs, targs.seconds); in main()