Lines Matching +full:fails +full:- +full:without +full:- +full:test +full:- +full:cd
1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine (KVM) Hypervisor
74 MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor");
82 /* Default doubles per-vcpu halt_poll_ns. */
92 /* Default halves per-vcpu halt_poll_ns. */
100 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
125 * - Prevent a compat task from opening /dev/kvm
126 * - If the open has been done by a 64bit task, and the KVM fd
130 unsigned long arg) { return -EINVAL; } in kvm_no_compat_ioctl()
134 return is_compat_task() ? -ENODEV : 0; in kvm_no_compat_open()
210 preempt_notifier_register(&vcpu->preempt_notifier); in vcpu_load()
220 preempt_notifier_unregister(&vcpu->preempt_notifier); in vcpu_put()
279 cpu = READ_ONCE(vcpu->cpu); in kvm_make_vcpu_request()
280 if (cpu != -1 && cpu != current_cpu) in kvm_make_vcpu_request()
336 ++kvm->stat.generic.remote_tlb_flush_requests; in kvm_flush_remote_tlbs()
340 * mode. Pairs with a memory barrier in arch-specific code. in kvm_flush_remote_tlbs()
341 * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest in kvm_flush_remote_tlbs()
343 * - powerpc: smp_mb in kvmppc_prepare_to_enter. in kvm_flush_remote_tlbs()
346 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that in kvm_flush_remote_tlbs()
351 ++kvm->stat.generic.remote_tlb_flush; in kvm_flush_remote_tlbs()
361 * Fall back to a flushing entire TLBs if the architecture range-based in kvm_flush_remote_tlbs_range()
378 lockdep_assert_held(&kvm->slots_lock); in kvm_flush_remote_tlbs_memslot()
379 kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); in kvm_flush_remote_tlbs_memslot()
394 gfp_flags |= mc->gfp_zero; in mmu_memory_cache_alloc_obj()
396 if (mc->kmem_cache) in mmu_memory_cache_alloc_obj()
397 return kmem_cache_alloc(mc->kmem_cache, gfp_flags); in mmu_memory_cache_alloc_obj()
400 if (page && mc->init_value) in mmu_memory_cache_alloc_obj()
401 memset64(page, mc->init_value, PAGE_SIZE / sizeof(u64)); in mmu_memory_cache_alloc_obj()
407 gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT; in __kvm_mmu_topup_memory_cache()
410 if (mc->nobjs >= min) in __kvm_mmu_topup_memory_cache()
413 if (unlikely(!mc->objects)) { in __kvm_mmu_topup_memory_cache()
415 return -EIO; in __kvm_mmu_topup_memory_cache()
421 if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero))) in __kvm_mmu_topup_memory_cache()
422 return -EIO; in __kvm_mmu_topup_memory_cache()
424 mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp); in __kvm_mmu_topup_memory_cache()
425 if (!mc->objects) in __kvm_mmu_topup_memory_cache()
426 return -ENOMEM; in __kvm_mmu_topup_memory_cache()
428 mc->capacity = capacity; in __kvm_mmu_topup_memory_cache()
432 if (WARN_ON_ONCE(mc->capacity != capacity)) in __kvm_mmu_topup_memory_cache()
433 return -EIO; in __kvm_mmu_topup_memory_cache()
435 while (mc->nobjs < mc->capacity) { in __kvm_mmu_topup_memory_cache()
438 return mc->nobjs >= min ? 0 : -ENOMEM; in __kvm_mmu_topup_memory_cache()
439 mc->objects[mc->nobjs++] = obj; in __kvm_mmu_topup_memory_cache()
451 return mc->nobjs; in kvm_mmu_memory_cache_nr_free_objects()
456 while (mc->nobjs) { in kvm_mmu_free_memory_cache()
457 if (mc->kmem_cache) in kvm_mmu_free_memory_cache()
458 kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
460 free_page((unsigned long)mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
463 kvfree(mc->objects); in kvm_mmu_free_memory_cache()
465 mc->objects = NULL; in kvm_mmu_free_memory_cache()
466 mc->capacity = 0; in kvm_mmu_free_memory_cache()
473 if (WARN_ON(!mc->nobjs)) in kvm_mmu_memory_cache_alloc()
476 p = mc->objects[--mc->nobjs]; in kvm_mmu_memory_cache_alloc()
484 mutex_init(&vcpu->mutex); in kvm_vcpu_init()
485 vcpu->cpu = -1; in kvm_vcpu_init()
486 vcpu->kvm = kvm; in kvm_vcpu_init()
487 vcpu->vcpu_id = id; in kvm_vcpu_init()
488 vcpu->pid = NULL; in kvm_vcpu_init()
490 rcuwait_init(&vcpu->wait); in kvm_vcpu_init()
496 vcpu->preempted = false; in kvm_vcpu_init()
497 vcpu->ready = false; in kvm_vcpu_init()
498 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); in kvm_vcpu_init()
499 vcpu->last_used_slot = NULL; in kvm_vcpu_init()
502 snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", in kvm_vcpu_init()
509 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vcpu_destroy()
513 * the vcpu->pid pointer, and at destruction time all file descriptors in kvm_vcpu_destroy()
516 put_pid(rcu_dereference_protected(vcpu->pid, 1)); in kvm_vcpu_destroy()
518 free_page((unsigned long)vcpu->run); in kvm_vcpu_destroy()
529 xa_erase(&kvm->vcpu_array, i); in kvm_destroy_vcpus()
532 atomic_set(&kvm->online_vcpus, 0); in kvm_destroy_vcpus()
548 * 64-bit addresses, as KVM notifiers can operate on host virtual
549 * addresses (unsigned long) and guest physical addresses (64-bit).
561 * The inner-most helper returns a tuple containing the return value from the
562 * arch- and action-specific handler, plus a flag indicating whether or not at
566 * return from arch code as a bool, outer helpers will cast it to an int. :-(
576 * function will have a non-zero address, and so it will generate code to
588 for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
604 if (WARN_ON_ONCE(range->end <= range->start)) in __kvm_handle_hva_range()
608 if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) && in __kvm_handle_hva_range()
609 IS_KVM_NULL_FN(range->handler))) in __kvm_handle_hva_range()
612 idx = srcu_read_lock(&kvm->srcu); in __kvm_handle_hva_range()
619 range->start, range->end - 1) { in __kvm_handle_hva_range()
622 slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]); in __kvm_handle_hva_range()
623 hva_start = max_t(unsigned long, range->start, slot->userspace_addr); in __kvm_handle_hva_range()
624 hva_end = min_t(unsigned long, range->end, in __kvm_handle_hva_range()
625 slot->userspace_addr + (slot->npages << PAGE_SHIFT)); in __kvm_handle_hva_range()
633 gfn_range.arg = range->arg; in __kvm_handle_hva_range()
634 gfn_range.may_block = range->may_block; in __kvm_handle_hva_range()
638 * {gfn_start, gfn_start+1, ..., gfn_end-1}. in __kvm_handle_hva_range()
641 gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot); in __kvm_handle_hva_range()
647 if (!IS_KVM_NULL_FN(range->on_lock)) in __kvm_handle_hva_range()
648 range->on_lock(kvm); in __kvm_handle_hva_range()
650 if (IS_KVM_NULL_FN(range->handler)) in __kvm_handle_hva_range()
653 r.ret |= range->handler(kvm, &gfn_range); in __kvm_handle_hva_range()
657 if (range->flush_on_ret && r.ret) in __kvm_handle_hva_range()
664 srcu_read_unlock(&kvm->srcu, idx); in __kvm_handle_hva_range()
707 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_begin()
710 * spte can be established without taking the mmu_lock and in kvm_mmu_invalidate_begin()
713 kvm->mmu_invalidate_in_progress++; in kvm_mmu_invalidate_begin()
715 if (likely(kvm->mmu_invalidate_in_progress == 1)) { in kvm_mmu_invalidate_begin()
716 kvm->mmu_invalidate_range_start = INVALID_GPA; in kvm_mmu_invalidate_begin()
717 kvm->mmu_invalidate_range_end = INVALID_GPA; in kvm_mmu_invalidate_begin()
723 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_range_add()
725 WARN_ON_ONCE(!kvm->mmu_invalidate_in_progress); in kvm_mmu_invalidate_range_add()
727 if (likely(kvm->mmu_invalidate_range_start == INVALID_GPA)) { in kvm_mmu_invalidate_range_add()
728 kvm->mmu_invalidate_range_start = start; in kvm_mmu_invalidate_range_add()
729 kvm->mmu_invalidate_range_end = end; in kvm_mmu_invalidate_range_add()
740 kvm->mmu_invalidate_range_start = in kvm_mmu_invalidate_range_add()
741 min(kvm->mmu_invalidate_range_start, start); in kvm_mmu_invalidate_range_add()
742 kvm->mmu_invalidate_range_end = in kvm_mmu_invalidate_range_add()
743 max(kvm->mmu_invalidate_range_end, end); in kvm_mmu_invalidate_range_add()
749 kvm_mmu_invalidate_range_add(kvm, range->start, range->end); in kvm_mmu_unmap_gfn_range()
758 .start = range->start, in kvm_mmu_notifier_invalidate_range_start()
759 .end = range->end, in kvm_mmu_notifier_invalidate_range_start()
766 trace_kvm_unmap_hva_range(range->start, range->end); in kvm_mmu_notifier_invalidate_range_start()
771 * functions. Without that guarantee, the mmu_invalidate_in_progress in kvm_mmu_notifier_invalidate_range_start()
776 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
777 kvm->mn_active_invalidate_count++; in kvm_mmu_notifier_invalidate_range_start()
778 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
786 * Because this runs without holding mmu_lock, the pfn caches must use in kvm_mmu_notifier_invalidate_range_start()
790 gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end); in kvm_mmu_notifier_invalidate_range_start()
805 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_mmu_invalidate_end()
812 kvm->mmu_invalidate_seq++; in kvm_mmu_invalidate_end()
819 kvm->mmu_invalidate_in_progress--; in kvm_mmu_invalidate_end()
820 KVM_BUG_ON(kvm->mmu_invalidate_in_progress < 0, kvm); in kvm_mmu_invalidate_end()
826 WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA); in kvm_mmu_invalidate_end()
834 .start = range->start, in kvm_mmu_notifier_invalidate_range_end()
835 .end = range->end, in kvm_mmu_notifier_invalidate_range_end()
846 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
847 if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count)) in kvm_mmu_notifier_invalidate_range_end()
848 --kvm->mn_active_invalidate_count; in kvm_mmu_notifier_invalidate_range_end()
849 wake = !kvm->mn_active_invalidate_count; in kvm_mmu_notifier_invalidate_range_end()
850 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
857 rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); in kvm_mmu_notifier_invalidate_range_end()
879 * affect performance on pre-Haswell Intel EPT, where there is in kvm_mmu_notifier_clear_young()
909 idx = srcu_read_lock(&kvm->srcu); in kvm_mmu_notifier_release()
911 srcu_read_unlock(&kvm->srcu, idx); in kvm_mmu_notifier_release()
925 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; in kvm_init_mmu_notifier()
926 return mmu_notifier_register(&kvm->mmu_notifier, current->mm); in kvm_init_mmu_notifier()
950 kvm->pm_notifier.notifier_call = kvm_pm_notifier_call; in kvm_init_pm_notifier()
952 kvm->pm_notifier.priority = INT_MAX; in kvm_init_pm_notifier()
953 register_pm_notifier(&kvm->pm_notifier); in kvm_init_pm_notifier()
958 unregister_pm_notifier(&kvm->pm_notifier); in kvm_destroy_pm_notifier()
972 if (!memslot->dirty_bitmap) in kvm_destroy_dirty_bitmap()
975 vfree(memslot->dirty_bitmap); in kvm_destroy_dirty_bitmap()
976 memslot->dirty_bitmap = NULL; in kvm_destroy_dirty_bitmap()
982 if (slot->flags & KVM_MEM_GUEST_MEMFD) in kvm_free_memslot()
1004 if (!slots->node_idx) in kvm_free_memslots()
1007 hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1]) in kvm_free_memslots()
1013 switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { in kvm_stats_debugfs_mode()
1030 if (IS_ERR(kvm->debugfs_dentry)) in kvm_destroy_vm_debugfs()
1033 debugfs_remove_recursive(kvm->debugfs_dentry); in kvm_destroy_vm_debugfs()
1035 if (kvm->debugfs_stat_data) { in kvm_destroy_vm_debugfs()
1037 kfree(kvm->debugfs_stat_data[i]); in kvm_destroy_vm_debugfs()
1038 kfree(kvm->debugfs_stat_data); in kvm_destroy_vm_debugfs()
1049 int i, ret = -ENOMEM; in kvm_create_vm_debugfs()
1056 snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); in kvm_create_vm_debugfs()
1070 kvm->debugfs_dentry = dent; in kvm_create_vm_debugfs()
1071 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, in kvm_create_vm_debugfs()
1072 sizeof(*kvm->debugfs_stat_data), in kvm_create_vm_debugfs()
1074 if (!kvm->debugfs_stat_data) in kvm_create_vm_debugfs()
1083 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1084 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1085 stat_data->kind = KVM_STAT_VM; in kvm_create_vm_debugfs()
1086 kvm->debugfs_stat_data[i] = stat_data; in kvm_create_vm_debugfs()
1087 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1088 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1098 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1099 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1100 stat_data->kind = KVM_STAT_VCPU; in kvm_create_vm_debugfs()
1101 kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data; in kvm_create_vm_debugfs()
1102 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1103 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1132 * Called after per-vm debugfs created. When called kvm->debugfs_dentry should
1133 * be setup already, so we can create arch-specific debugfs entries under it.
1135 * a per-arch destroy interface is not needed.
1148 return ERR_PTR(-ENOMEM); in kvm_create_vm()
1151 mmgrab(current->mm); in kvm_create_vm()
1152 kvm->mm = current->mm; in kvm_create_vm()
1154 mutex_init(&kvm->lock); in kvm_create_vm()
1155 mutex_init(&kvm->irq_lock); in kvm_create_vm()
1156 mutex_init(&kvm->slots_lock); in kvm_create_vm()
1157 mutex_init(&kvm->slots_arch_lock); in kvm_create_vm()
1158 spin_lock_init(&kvm->mn_invalidate_lock); in kvm_create_vm()
1159 rcuwait_init(&kvm->mn_memslots_update_rcuwait); in kvm_create_vm()
1160 xa_init(&kvm->vcpu_array); in kvm_create_vm()
1162 xa_init(&kvm->mem_attr_array); in kvm_create_vm()
1165 INIT_LIST_HEAD(&kvm->gpc_list); in kvm_create_vm()
1166 spin_lock_init(&kvm->gpc_lock); in kvm_create_vm()
1168 INIT_LIST_HEAD(&kvm->devices); in kvm_create_vm()
1169 kvm->max_vcpus = KVM_MAX_VCPUS; in kvm_create_vm()
1177 kvm->debugfs_dentry = ERR_PTR(-ENOENT); in kvm_create_vm()
1179 snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", in kvm_create_vm()
1182 r = -ENOMEM; in kvm_create_vm()
1183 if (init_srcu_struct(&kvm->srcu)) in kvm_create_vm()
1185 if (init_srcu_struct(&kvm->irq_srcu)) in kvm_create_vm()
1192 refcount_set(&kvm->users_count, 1); in kvm_create_vm()
1196 slots = &kvm->__memslots[i][j]; in kvm_create_vm()
1198 atomic_long_set(&slots->last_used_slot, (unsigned long)NULL); in kvm_create_vm()
1199 slots->hva_tree = RB_ROOT_CACHED; in kvm_create_vm()
1200 slots->gfn_tree = RB_ROOT; in kvm_create_vm()
1201 hash_init(slots->id_hash); in kvm_create_vm()
1202 slots->node_idx = j; in kvm_create_vm()
1205 slots->generation = i; in kvm_create_vm()
1208 rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]); in kvm_create_vm()
1211 r = -ENOMEM; in kvm_create_vm()
1213 rcu_assign_pointer(kvm->buses[i], in kvm_create_vm()
1215 if (!kvm->buses[i]) in kvm_create_vm()
1228 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); in kvm_create_vm()
1248 list_add(&kvm->vm_list, &vm_list); in kvm_create_vm()
1262 if (kvm->mmu_notifier.ops) in kvm_create_vm()
1263 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); in kvm_create_vm()
1270 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); in kvm_create_vm()
1275 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_create_vm()
1277 cleanup_srcu_struct(&kvm->srcu); in kvm_create_vm()
1280 mmdrop(current->mm); in kvm_create_vm()
1289 * We do not need to take the kvm->lock here, because nobody else in kvm_destroy_devices()
1297 * use-after-free, even though this cannot be guaranteed. in kvm_destroy_devices()
1299 list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { in kvm_destroy_devices()
1300 list_del(&dev->vm_node); in kvm_destroy_devices()
1301 dev->ops->destroy(dev); in kvm_destroy_devices()
1308 struct mm_struct *mm = kvm->mm; in kvm_destroy_vm()
1315 list_del(&kvm->vm_list); in kvm_destroy_vm()
1325 kvm->buses[i] = NULL; in kvm_destroy_vm()
1329 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); in kvm_destroy_vm()
1336 * memslots would deadlock without this manual intervention. in kvm_destroy_vm()
1340 * in-progress invalidations. in kvm_destroy_vm()
1342 WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait)); in kvm_destroy_vm()
1343 if (kvm->mn_active_invalidate_count) in kvm_destroy_vm()
1344 kvm->mn_active_invalidate_count = 0; in kvm_destroy_vm()
1346 WARN_ON(kvm->mmu_invalidate_in_progress); in kvm_destroy_vm()
1353 kvm_free_memslots(kvm, &kvm->__memslots[i][0]); in kvm_destroy_vm()
1354 kvm_free_memslots(kvm, &kvm->__memslots[i][1]); in kvm_destroy_vm()
1356 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_destroy_vm()
1357 cleanup_srcu_struct(&kvm->srcu); in kvm_destroy_vm()
1359 xa_destroy(&kvm->mem_attr_array); in kvm_destroy_vm()
1369 refcount_inc(&kvm->users_count); in kvm_get_kvm()
1379 return refcount_inc_not_zero(&kvm->users_count); in kvm_get_kvm_safe()
1385 if (refcount_dec_and_test(&kvm->users_count)) in kvm_put_kvm()
1392 * with a user-visible file descriptor, e.g. a vcpu or device, if installation
1393 * of the new file descriptor fails and the reference cannot be transferred to
1399 WARN_ON(refcount_dec_and_test(&kvm->users_count)); in kvm_put_kvm_no_destroy()
1405 struct kvm *kvm = filp->private_data; in kvm_vm_release()
1421 memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); in kvm_alloc_dirty_bitmap()
1422 if (!memslot->dirty_bitmap) in kvm_alloc_dirty_bitmap()
1423 return -ENOMEM; in kvm_alloc_dirty_bitmap()
1431 int node_idx_inactive = active->node_idx ^ 1; in kvm_get_inactive_memslots()
1433 return &kvm->__memslots[as_id][node_idx_inactive]; in kvm_get_inactive_memslots()
1438 * This also serves as a sanity that at least one of the pointers is non-NULL,
1448 return b->as_id; in kvm_memslots_get_as_id()
1450 return a->as_id; in kvm_memslots_get_as_id()
1452 WARN_ON_ONCE(a->as_id != b->as_id); in kvm_memslots_get_as_id()
1453 return a->as_id; in kvm_memslots_get_as_id()
1459 struct rb_root *gfn_tree = &slots->gfn_tree; in kvm_insert_gfn_node()
1461 int idx = slots->node_idx; in kvm_insert_gfn_node()
1464 for (node = &gfn_tree->rb_node; *node; ) { in kvm_insert_gfn_node()
1469 if (slot->base_gfn < tmp->base_gfn) in kvm_insert_gfn_node()
1470 node = &(*node)->rb_left; in kvm_insert_gfn_node()
1471 else if (slot->base_gfn > tmp->base_gfn) in kvm_insert_gfn_node()
1472 node = &(*node)->rb_right; in kvm_insert_gfn_node()
1477 rb_link_node(&slot->gfn_node[idx], parent, node); in kvm_insert_gfn_node()
1478 rb_insert_color(&slot->gfn_node[idx], gfn_tree); in kvm_insert_gfn_node()
1484 rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree); in kvm_erase_gfn_node()
1491 int idx = slots->node_idx; in kvm_replace_gfn_node()
1493 WARN_ON_ONCE(old->base_gfn != new->base_gfn); in kvm_replace_gfn_node()
1495 rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx], in kvm_replace_gfn_node()
1496 &slots->gfn_tree); in kvm_replace_gfn_node()
1505 * If @new is non-NULL its hva_node[slots_idx] range has to be set
1514 int idx = slots->node_idx; in kvm_replace_memslot()
1517 hash_del(&old->id_node[idx]); in kvm_replace_memslot()
1518 interval_tree_remove(&old->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1520 if ((long)old == atomic_long_read(&slots->last_used_slot)) in kvm_replace_memslot()
1521 atomic_long_set(&slots->last_used_slot, (long)new); in kvm_replace_memslot()
1533 new->hva_node[idx].start = new->userspace_addr; in kvm_replace_memslot()
1534 new->hva_node[idx].last = new->userspace_addr + in kvm_replace_memslot()
1535 (new->npages << PAGE_SHIFT) - 1; in kvm_replace_memslot()
1542 hash_add(slots->id_hash, &new->id_node[idx], new->id); in kvm_replace_memslot()
1543 interval_tree_insert(&new->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1552 if (old && old->base_gfn == new->base_gfn) { in kvm_replace_memslot()
1578 if (mem->flags & KVM_MEM_GUEST_MEMFD) in check_memory_region_flags()
1582 * GUEST_MEMFD is incompatible with read-only memslots, as writes to in check_memory_region_flags()
1583 * read-only memslots have emulated MMIO, not page fault, semantics, in check_memory_region_flags()
1587 !(mem->flags & KVM_MEM_GUEST_MEMFD)) in check_memory_region_flags()
1590 if (mem->flags & ~valid_flags) in check_memory_region_flags()
1591 return -EINVAL; in check_memory_region_flags()
1601 u64 gen = __kvm_memslots(kvm, as_id)->generation; in kvm_swap_active_memslots()
1604 slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1611 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1612 prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1613 while (kvm->mn_active_invalidate_count) { in kvm_swap_active_memslots()
1615 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1617 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1619 finish_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1620 rcu_assign_pointer(kvm->memslots[as_id], slots); in kvm_swap_active_memslots()
1621 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1628 mutex_unlock(&kvm->slots_arch_lock); in kvm_swap_active_memslots()
1630 synchronize_srcu_expedited(&kvm->srcu); in kvm_swap_active_memslots()
1634 * update in-progress flag and incrementing the generation based on in kvm_swap_active_memslots()
1638 gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1651 slots->generation = gen; in kvm_swap_active_memslots()
1669 if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) in kvm_prepare_memory_region()
1670 new->dirty_bitmap = NULL; in kvm_prepare_memory_region()
1671 else if (old && old->dirty_bitmap) in kvm_prepare_memory_region()
1672 new->dirty_bitmap = old->dirty_bitmap; in kvm_prepare_memory_region()
1679 bitmap_set(new->dirty_bitmap, 0, new->npages); in kvm_prepare_memory_region()
1686 if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) in kvm_prepare_memory_region()
1697 int old_flags = old ? old->flags : 0; in kvm_commit_memory_region()
1698 int new_flags = new ? new->flags : 0; in kvm_commit_memory_region()
1704 kvm->nr_memslot_pages -= old->npages; in kvm_commit_memory_region()
1706 kvm->nr_memslot_pages += new->npages; in kvm_commit_memory_region()
1709 int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1; in kvm_commit_memory_region()
1710 atomic_set(&kvm->nr_memslots_dirty_logging, in kvm_commit_memory_region()
1711 atomic_read(&kvm->nr_memslots_dirty_logging) + change); in kvm_commit_memory_region()
1730 if (old->dirty_bitmap && !new->dirty_bitmap) in kvm_commit_memory_region()
1769 dest->base_gfn = src->base_gfn; in kvm_copy_memslot()
1770 dest->npages = src->npages; in kvm_copy_memslot()
1771 dest->dirty_bitmap = src->dirty_bitmap; in kvm_copy_memslot()
1772 dest->arch = src->arch; in kvm_copy_memslot()
1773 dest->userspace_addr = src->userspace_addr; in kvm_copy_memslot()
1774 dest->flags = src->flags; in kvm_copy_memslot()
1775 dest->id = src->id; in kvm_copy_memslot()
1776 dest->as_id = src->as_id; in kvm_copy_memslot()
1789 invalid_slot->flags |= KVM_MEMSLOT_INVALID; in kvm_invalidate_memslot()
1797 kvm_swap_active_memslots(kvm, old->as_id); in kvm_invalidate_memslot()
1801 * memslot will be created. Validation of sp->gfn happens in: in kvm_invalidate_memslot()
1802 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) in kvm_invalidate_memslot()
1803 * - kvm_is_visible_gfn (mmu_check_root) in kvm_invalidate_memslot()
1809 mutex_lock(&kvm->slots_arch_lock); in kvm_invalidate_memslot()
1812 * Copy the arch-specific field of the newly-installed slot back to the in kvm_invalidate_memslot()
1814 * slots_arch_lock in kvm_swap_active_memslots() and re-acquiring the lock in kvm_invalidate_memslot()
1818 old->arch = invalid_slot->arch; in kvm_invalidate_memslot()
1889 mutex_lock(&kvm->slots_arch_lock); in kvm_set_memslot()
1895 * for the memslot when it is deleted/moved. Without pre-invalidation in kvm_set_memslot()
1896 * (and without a lock), a window would exist between effecting the in kvm_set_memslot()
1898 * guest could access a non-existent memslot. in kvm_set_memslot()
1901 * slot needs to be preserved in case a later step fails and the in kvm_set_memslot()
1907 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1908 return -ENOMEM; in kvm_set_memslot()
1925 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1953 * No need to refresh new->arch, changes after dropping slots_arch_lock in kvm_set_memslot()
1955 * responsible for knowing that new->arch may be stale. in kvm_set_memslot()
1968 if (iter.slot->id != id) in kvm_check_memslot_overlap()
1981 * Must be called holding kvm->slots_lock for write.
1998 as_id = mem->slot >> 16; in __kvm_set_memory_region()
1999 id = (u16)mem->slot; in __kvm_set_memory_region()
2002 if ((mem->memory_size & (PAGE_SIZE - 1)) || in __kvm_set_memory_region()
2003 (mem->memory_size != (unsigned long)mem->memory_size)) in __kvm_set_memory_region()
2004 return -EINVAL; in __kvm_set_memory_region()
2005 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) in __kvm_set_memory_region()
2006 return -EINVAL; in __kvm_set_memory_region()
2008 if ((mem->userspace_addr & (PAGE_SIZE - 1)) || in __kvm_set_memory_region()
2009 (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || in __kvm_set_memory_region()
2010 !access_ok((void __user *)(unsigned long)mem->userspace_addr, in __kvm_set_memory_region()
2011 mem->memory_size)) in __kvm_set_memory_region()
2012 return -EINVAL; in __kvm_set_memory_region()
2013 if (mem->flags & KVM_MEM_GUEST_MEMFD && in __kvm_set_memory_region()
2014 (mem->guest_memfd_offset & (PAGE_SIZE - 1) || in __kvm_set_memory_region()
2015 mem->guest_memfd_offset + mem->memory_size < mem->guest_memfd_offset)) in __kvm_set_memory_region()
2016 return -EINVAL; in __kvm_set_memory_region()
2018 return -EINVAL; in __kvm_set_memory_region()
2019 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) in __kvm_set_memory_region()
2020 return -EINVAL; in __kvm_set_memory_region()
2021 if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) in __kvm_set_memory_region()
2022 return -EINVAL; in __kvm_set_memory_region()
2032 if (!mem->memory_size) { in __kvm_set_memory_region()
2033 if (!old || !old->npages) in __kvm_set_memory_region()
2034 return -EINVAL; in __kvm_set_memory_region()
2036 if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages)) in __kvm_set_memory_region()
2037 return -EIO; in __kvm_set_memory_region()
2042 base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT); in __kvm_set_memory_region()
2043 npages = (mem->memory_size >> PAGE_SHIFT); in __kvm_set_memory_region()
2045 if (!old || !old->npages) { in __kvm_set_memory_region()
2052 if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages) in __kvm_set_memory_region()
2053 return -EINVAL; in __kvm_set_memory_region()
2056 if (mem->flags & KVM_MEM_GUEST_MEMFD) in __kvm_set_memory_region()
2057 return -EINVAL; in __kvm_set_memory_region()
2058 if ((mem->userspace_addr != old->userspace_addr) || in __kvm_set_memory_region()
2059 (npages != old->npages) || in __kvm_set_memory_region()
2060 ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) in __kvm_set_memory_region()
2061 return -EINVAL; in __kvm_set_memory_region()
2063 if (base_gfn != old->base_gfn) in __kvm_set_memory_region()
2065 else if (mem->flags != old->flags) in __kvm_set_memory_region()
2073 return -EEXIST; in __kvm_set_memory_region()
2078 return -ENOMEM; in __kvm_set_memory_region()
2080 new->as_id = as_id; in __kvm_set_memory_region()
2081 new->id = id; in __kvm_set_memory_region()
2082 new->base_gfn = base_gfn; in __kvm_set_memory_region()
2083 new->npages = npages; in __kvm_set_memory_region()
2084 new->flags = mem->flags; in __kvm_set_memory_region()
2085 new->userspace_addr = mem->userspace_addr; in __kvm_set_memory_region()
2086 if (mem->flags & KVM_MEM_GUEST_MEMFD) { in __kvm_set_memory_region()
2087 r = kvm_gmem_bind(kvm, new, mem->guest_memfd, mem->guest_memfd_offset); in __kvm_set_memory_region()
2099 if (mem->flags & KVM_MEM_GUEST_MEMFD) in __kvm_set_memory_region()
2112 mutex_lock(&kvm->slots_lock); in kvm_set_memory_region()
2114 mutex_unlock(&kvm->slots_lock); in kvm_set_memory_region()
2122 if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) in kvm_vm_ioctl_set_memory_region()
2123 return -EINVAL; in kvm_vm_ioctl_set_memory_region()
2130 * kvm_get_dirty_log - get a snapshot of dirty pages
2146 return -ENXIO; in kvm_get_dirty_log()
2151 as_id = log->slot >> 16; in kvm_get_dirty_log()
2152 id = (u16)log->slot; in kvm_get_dirty_log()
2154 return -EINVAL; in kvm_get_dirty_log()
2158 if (!(*memslot) || !(*memslot)->dirty_bitmap) in kvm_get_dirty_log()
2159 return -ENOENT; in kvm_get_dirty_log()
2166 any = (*memslot)->dirty_bitmap[i]; in kvm_get_dirty_log()
2168 if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) in kvm_get_dirty_log()
2169 return -EFAULT; in kvm_get_dirty_log()
2179 * kvm_get_dirty_log_protect - get a snapshot of dirty pages
2211 return -ENXIO; in kvm_get_dirty_log_protect()
2213 as_id = log->slot >> 16; in kvm_get_dirty_log_protect()
2214 id = (u16)log->slot; in kvm_get_dirty_log_protect()
2216 return -EINVAL; in kvm_get_dirty_log_protect()
2220 if (!memslot || !memslot->dirty_bitmap) in kvm_get_dirty_log_protect()
2221 return -ENOENT; in kvm_get_dirty_log_protect()
2223 dirty_bitmap = memslot->dirty_bitmap; in kvm_get_dirty_log_protect()
2229 if (kvm->manual_dirty_log_protect) { in kvm_get_dirty_log_protect()
2265 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) in kvm_get_dirty_log_protect()
2266 return -EFAULT; in kvm_get_dirty_log_protect()
2272 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
2276 * Steps 1-4 below provide general overview of dirty page logging. See
2279 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
2295 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2299 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2304 * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
2323 return -ENXIO; in kvm_clear_dirty_log_protect()
2325 as_id = log->slot >> 16; in kvm_clear_dirty_log_protect()
2326 id = (u16)log->slot; in kvm_clear_dirty_log_protect()
2328 return -EINVAL; in kvm_clear_dirty_log_protect()
2330 if (log->first_page & 63) in kvm_clear_dirty_log_protect()
2331 return -EINVAL; in kvm_clear_dirty_log_protect()
2335 if (!memslot || !memslot->dirty_bitmap) in kvm_clear_dirty_log_protect()
2336 return -ENOENT; in kvm_clear_dirty_log_protect()
2338 dirty_bitmap = memslot->dirty_bitmap; in kvm_clear_dirty_log_protect()
2340 n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; in kvm_clear_dirty_log_protect()
2342 if (log->first_page > memslot->npages || in kvm_clear_dirty_log_protect()
2343 log->num_pages > memslot->npages - log->first_page || in kvm_clear_dirty_log_protect()
2344 (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) in kvm_clear_dirty_log_protect()
2345 return -EINVAL; in kvm_clear_dirty_log_protect()
2351 if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) in kvm_clear_dirty_log_protect()
2352 return -EFAULT; in kvm_clear_dirty_log_protect()
2355 for (offset = log->first_page, i = offset / BITS_PER_LONG, in kvm_clear_dirty_log_protect()
2356 n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; in kvm_clear_dirty_log_protect()
2369 * a problem if userspace sets them in log->dirty_bitmap. in kvm_clear_dirty_log_protect()
2390 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2394 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2415 XA_STATE(xas, &kvm->mem_attr_array, start); in kvm_range_has_memory_attributes()
2428 return !xas_find(&xas, end - 1); in kvm_range_has_memory_attributes()
2454 gfn_range.arg = range->arg; in kvm_handle_gfn_range()
2455 gfn_range.may_block = range->may_block; in kvm_handle_gfn_range()
2460 kvm_for_each_memslot_in_gfn_range(&iter, slots, range->start, range->end) { in kvm_handle_gfn_range()
2464 gfn_range.start = max(range->start, slot->base_gfn); in kvm_handle_gfn_range()
2465 gfn_range.end = min(range->end, slot->base_gfn + slot->npages); in kvm_handle_gfn_range()
2472 if (!IS_KVM_NULL_FN(range->on_lock)) in kvm_handle_gfn_range()
2473 range->on_lock(kvm); in kvm_handle_gfn_range()
2476 ret |= range->handler(kvm, &gfn_range); in kvm_handle_gfn_range()
2480 if (range->flush_on_ret && ret) in kvm_handle_gfn_range()
2501 kvm_mmu_invalidate_range_add(kvm, range->start, range->end); in kvm_pre_set_memory_attributes()
2532 mutex_lock(&kvm->slots_lock); in kvm_vm_set_mem_attributes()
2543 r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); in kvm_vm_set_mem_attributes()
2551 r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, in kvm_vm_set_mem_attributes()
2559 mutex_unlock(&kvm->slots_lock); in kvm_vm_set_mem_attributes()
2569 if (attrs->flags) in kvm_vm_ioctl_set_mem_attributes()
2570 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2571 if (attrs->attributes & ~kvm_supported_mem_attributes(kvm)) in kvm_vm_ioctl_set_mem_attributes()
2572 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2573 if (attrs->size == 0 || attrs->address + attrs->size < attrs->address) in kvm_vm_ioctl_set_mem_attributes()
2574 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2575 if (!PAGE_ALIGNED(attrs->address) || !PAGE_ALIGNED(attrs->size)) in kvm_vm_ioctl_set_mem_attributes()
2576 return -EINVAL; in kvm_vm_ioctl_set_mem_attributes()
2578 start = attrs->address >> PAGE_SHIFT; in kvm_vm_ioctl_set_mem_attributes()
2579 end = (attrs->address + attrs->size) >> PAGE_SHIFT; in kvm_vm_ioctl_set_mem_attributes()
2583 * KVM. For simplicity, supports generic attributes only on 64-bit in kvm_vm_ioctl_set_mem_attributes()
2586 BUILD_BUG_ON(sizeof(attrs->attributes) != sizeof(unsigned long)); in kvm_vm_ioctl_set_mem_attributes()
2588 return kvm_vm_set_mem_attributes(kvm, start, end, attrs->attributes); in kvm_vm_ioctl_set_mem_attributes()
2601 u64 gen = slots->generation; in kvm_vcpu_gfn_to_memslot()
2608 if (unlikely(gen != vcpu->last_used_slot_gen)) { in kvm_vcpu_gfn_to_memslot()
2609 vcpu->last_used_slot = NULL; in kvm_vcpu_gfn_to_memslot()
2610 vcpu->last_used_slot_gen = gen; in kvm_vcpu_gfn_to_memslot()
2613 slot = try_get_memslot(vcpu->last_used_slot, gfn); in kvm_vcpu_gfn_to_memslot()
2620 * thrashing the VM-wide last_used_slot in kvm_memslots. in kvm_vcpu_gfn_to_memslot()
2624 vcpu->last_used_slot = slot; in kvm_vcpu_gfn_to_memslot()
2658 mmap_read_lock(current->mm); in kvm_host_page_size()
2659 vma = find_vma(current->mm, addr); in kvm_host_page_size()
2666 mmap_read_unlock(current->mm); in kvm_host_page_size()
2673 return slot->flags & KVM_MEM_READONLY; in memslot_is_readonly()
2679 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) in __gfn_to_hva_many()
2686 *nr_pages = slot->npages - (gfn - slot->base_gfn); in __gfn_to_hva_many()
2754 return rc == -EHWPOISON; in check_user_page_hwpoison()
2788 * 1 indicates success, -errno is returned if error is detected.
2840 if (unlikely(!(vma->vm_flags & VM_READ))) in vma_is_valid()
2843 if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) in vma_is_valid()
2870 * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does in hva_to_pfn_remapped()
2874 r = fixup_user_fault(current->mm, addr, in hva_to_pfn_remapped()
2878 return -EAGAIN; in hva_to_pfn_remapped()
2908 * struct pages, but be allocated without refcounting e.g., in hva_to_pfn_remapped()
2909 * tail pages of non-compound higher order allocations, which in hva_to_pfn_remapped()
2914 r = -EFAULT; in hva_to_pfn_remapped()
2926 * @interruptible: whether the process can be interrupted by non-fatal signals
2957 if (npages == -EINTR) in hva_to_pfn()
2960 mmap_read_lock(current->mm); in hva_to_pfn()
2961 if (npages == -EHWPOISON || in hva_to_pfn()
2968 vma = vma_lookup(current->mm, addr); in hva_to_pfn()
2972 else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { in hva_to_pfn()
2974 if (r == -EAGAIN) in hva_to_pfn()
2984 mmap_read_unlock(current->mm); in hva_to_pfn()
3052 return -1; in gfn_to_page_many_atomic()
3100 return -EINVAL; in kvm_vcpu_map()
3102 pfn = gfn_to_pfn(vcpu->kvm, gfn); in kvm_vcpu_map()
3104 return -EINVAL; in kvm_vcpu_map()
3116 return -EFAULT; in kvm_vcpu_map()
3118 map->page = page; in kvm_vcpu_map()
3119 map->hva = hva; in kvm_vcpu_map()
3120 map->pfn = pfn; in kvm_vcpu_map()
3121 map->gfn = gfn; in kvm_vcpu_map()
3132 if (!map->hva) in kvm_vcpu_unmap()
3135 if (map->page != KVM_UNMAPPED_PAGE) in kvm_vcpu_unmap()
3136 kunmap(map->page); in kvm_vcpu_unmap()
3139 memunmap(map->hva); in kvm_vcpu_unmap()
3143 kvm_vcpu_mark_page_dirty(vcpu, map->gfn); in kvm_vcpu_unmap()
3145 kvm_release_pfn(map->pfn, dirty); in kvm_vcpu_unmap()
3147 map->hva = NULL; in kvm_vcpu_unmap()
3148 map->page = NULL; in kvm_vcpu_unmap()
3155 * Per page-flags.h, pages tagged PG_reserved "should in general not be in kvm_is_ad_tracked_page()
3248 if (len > PAGE_SIZE - offset) in next_segment()
3249 return PAGE_SIZE - offset; in next_segment()
3262 return -EFAULT; in __kvm_read_guest_page()
3266 return -EFAULT; in __kvm_read_guest_page()
3269 return -EFAULT; in __kvm_read_guest_page()
3303 len -= seg; in kvm_read_guest()
3323 len -= seg; in kvm_vcpu_read_guest()
3338 return -EFAULT; in __kvm_read_guest_atomic()
3342 return -EFAULT; in __kvm_read_guest_atomic()
3347 return -EFAULT; in __kvm_read_guest_atomic()
3371 return -EFAULT; in __kvm_write_guest_page()
3375 return -EFAULT; in __kvm_write_guest_page()
3378 return -EFAULT; in __kvm_write_guest_page()
3397 return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); in kvm_vcpu_write_guest_page()
3414 len -= seg; in kvm_write_guest()
3435 len -= seg; in kvm_vcpu_write_guest()
3449 gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; in __kvm_gfn_to_hva_cache_init()
3450 gfn_t nr_pages_needed = end_gfn - start_gfn + 1; in __kvm_gfn_to_hva_cache_init()
3453 /* Update ghc->generation before performing any error checks. */ in __kvm_gfn_to_hva_cache_init()
3454 ghc->generation = slots->generation; in __kvm_gfn_to_hva_cache_init()
3457 ghc->hva = KVM_HVA_ERR_BAD; in __kvm_gfn_to_hva_cache_init()
3458 return -EINVAL; in __kvm_gfn_to_hva_cache_init()
3466 ghc->memslot = __gfn_to_memslot(slots, start_gfn); in __kvm_gfn_to_hva_cache_init()
3467 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, in __kvm_gfn_to_hva_cache_init()
3469 if (kvm_is_error_hva(ghc->hva)) in __kvm_gfn_to_hva_cache_init()
3470 return -EFAULT; in __kvm_gfn_to_hva_cache_init()
3475 ghc->hva += offset; in __kvm_gfn_to_hva_cache_init()
3477 ghc->memslot = NULL; in __kvm_gfn_to_hva_cache_init()
3479 ghc->gpa = gpa; in __kvm_gfn_to_hva_cache_init()
3480 ghc->len = len; in __kvm_gfn_to_hva_cache_init()
3498 gpa_t gpa = ghc->gpa + offset; in kvm_write_guest_offset_cached()
3500 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_write_guest_offset_cached()
3501 return -EINVAL; in kvm_write_guest_offset_cached()
3503 if (slots->generation != ghc->generation) { in kvm_write_guest_offset_cached()
3504 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_write_guest_offset_cached()
3505 return -EFAULT; in kvm_write_guest_offset_cached()
3508 if (kvm_is_error_hva(ghc->hva)) in kvm_write_guest_offset_cached()
3509 return -EFAULT; in kvm_write_guest_offset_cached()
3511 if (unlikely(!ghc->memslot)) in kvm_write_guest_offset_cached()
3514 r = __copy_to_user((void __user *)ghc->hva + offset, data, len); in kvm_write_guest_offset_cached()
3516 return -EFAULT; in kvm_write_guest_offset_cached()
3517 mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); in kvm_write_guest_offset_cached()
3536 gpa_t gpa = ghc->gpa + offset; in kvm_read_guest_offset_cached()
3538 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_read_guest_offset_cached()
3539 return -EINVAL; in kvm_read_guest_offset_cached()
3541 if (slots->generation != ghc->generation) { in kvm_read_guest_offset_cached()
3542 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_read_guest_offset_cached()
3543 return -EFAULT; in kvm_read_guest_offset_cached()
3546 if (kvm_is_error_hva(ghc->hva)) in kvm_read_guest_offset_cached()
3547 return -EFAULT; in kvm_read_guest_offset_cached()
3549 if (unlikely(!ghc->memslot)) in kvm_read_guest_offset_cached()
3552 r = __copy_from_user(data, (void __user *)ghc->hva + offset, len); in kvm_read_guest_offset_cached()
3554 return -EFAULT; in kvm_read_guest_offset_cached()
3580 len -= seg; in kvm_clear_guest()
3594 if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) in mark_page_dirty_in_slot()
3601 unsigned long rel_gfn = gfn - memslot->base_gfn; in mark_page_dirty_in_slot()
3602 u32 slot = (memslot->as_id << 16) | memslot->id; in mark_page_dirty_in_slot()
3604 if (kvm->dirty_ring_size && vcpu) in mark_page_dirty_in_slot()
3606 else if (memslot->dirty_bitmap) in mark_page_dirty_in_slot()
3607 set_bit_le(rel_gfn, memslot->dirty_bitmap); in mark_page_dirty_in_slot()
3626 mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); in kvm_vcpu_mark_page_dirty()
3632 if (!vcpu->sigset_active) in kvm_sigset_activate()
3636 * This does a lockless modification of ->real_blocked, which is fine in kvm_sigset_activate()
3637 * because, only current can change ->real_blocked and all readers of in kvm_sigset_activate()
3638 * ->real_blocked don't care as long ->real_blocked is always a subset in kvm_sigset_activate()
3639 * of ->blocked. in kvm_sigset_activate()
3641 sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); in kvm_sigset_activate()
3646 if (!vcpu->sigset_active) in kvm_sigset_deactivate()
3649 sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); in kvm_sigset_deactivate()
3650 sigemptyset(¤t->real_blocked); in kvm_sigset_deactivate()
3657 old = val = vcpu->halt_poll_ns; in grow_halt_poll_ns()
3667 vcpu->halt_poll_ns = val; in grow_halt_poll_ns()
3669 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); in grow_halt_poll_ns()
3676 old = val = vcpu->halt_poll_ns; in shrink_halt_poll_ns()
3687 vcpu->halt_poll_ns = val; in shrink_halt_poll_ns()
3688 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); in shrink_halt_poll_ns()
3693 int ret = -EINTR; in kvm_vcpu_check_block()
3694 int idx = srcu_read_lock(&vcpu->kvm->srcu); in kvm_vcpu_check_block()
3707 srcu_read_unlock(&vcpu->kvm->srcu, idx); in kvm_vcpu_check_block()
3714 * directly for other vCPU non-runnable states, e.g. x86's Wait-For-SIPI.
3721 vcpu->stat.generic.blocking = 1; in kvm_vcpu_block()
3743 vcpu->stat.generic.blocking = 0; in kvm_vcpu_block()
3751 struct kvm_vcpu_stat_generic *stats = &vcpu->stat.generic; in update_halt_poll_stats()
3754 ++vcpu->stat.generic.halt_attempted_poll; in update_halt_poll_stats()
3757 ++vcpu->stat.generic.halt_successful_poll; in update_halt_poll_stats()
3760 ++vcpu->stat.generic.halt_poll_invalid; in update_halt_poll_stats()
3762 stats->halt_poll_success_ns += poll_ns; in update_halt_poll_stats()
3763 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_success_hist, poll_ns); in update_halt_poll_stats()
3765 stats->halt_poll_fail_ns += poll_ns; in update_halt_poll_stats()
3766 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_fail_hist, poll_ns); in update_halt_poll_stats()
3772 struct kvm *kvm = vcpu->kvm; in kvm_vcpu_max_halt_poll_ns()
3774 if (kvm->override_halt_poll_ns) { in kvm_vcpu_max_halt_poll_ns()
3776 * Ensure kvm->max_halt_poll_ns is not read before in kvm_vcpu_max_halt_poll_ns()
3777 * kvm->override_halt_poll_ns. in kvm_vcpu_max_halt_poll_ns()
3782 return READ_ONCE(kvm->max_halt_poll_ns); in kvm_vcpu_max_halt_poll_ns()
3803 if (vcpu->halt_poll_ns > max_halt_poll_ns) in kvm_vcpu_halt()
3804 vcpu->halt_poll_ns = max_halt_poll_ns; in kvm_vcpu_halt()
3806 do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; in kvm_vcpu_halt()
3810 ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); in kvm_vcpu_halt()
3824 vcpu->stat.generic.halt_wait_ns += in kvm_vcpu_halt()
3825 ktime_to_ns(cur) - ktime_to_ns(poll_end); in kvm_vcpu_halt()
3826 KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.generic.halt_wait_hist, in kvm_vcpu_halt()
3827 ktime_to_ns(cur) - ktime_to_ns(poll_end)); in kvm_vcpu_halt()
3831 halt_ns = ktime_to_ns(cur) - ktime_to_ns(start); in kvm_vcpu_halt()
3834 * Note, halt-polling is considered successful so long as the vCPU was in kvm_vcpu_halt()
3836 * after of the halt-polling loop itself, but before the full wait. in kvm_vcpu_halt()
3848 if (halt_ns <= vcpu->halt_poll_ns) in kvm_vcpu_halt()
3851 else if (vcpu->halt_poll_ns && in kvm_vcpu_halt()
3855 else if (vcpu->halt_poll_ns < max_halt_poll_ns && in kvm_vcpu_halt()
3859 vcpu->halt_poll_ns = 0; in kvm_vcpu_halt()
3870 WRITE_ONCE(vcpu->ready, true); in kvm_vcpu_wake_up()
3871 ++vcpu->stat.generic.halt_wakeup; in kvm_vcpu_wake_up()
3898 if (vcpu->mode == IN_GUEST_MODE) in kvm_vcpu_kick()
3899 WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE); in kvm_vcpu_kick()
3911 cpu = READ_ONCE(vcpu->cpu); in kvm_vcpu_kick()
3928 pid = rcu_dereference(target->pid); in kvm_vcpu_yield_to()
3945 * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
3949 * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
3954 * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
3955 * to preempted lock-holder could result in wrong VCPU selection and CPU
3956 * burning. Giving priority for a potential lock-holder increases lock
3959 * Since algorithm is based on heuristics, accessing another VCPU data without
3968 eligible = !vcpu->spin_loop.in_spin_loop || in kvm_vcpu_eligible_for_directed_yield()
3969 vcpu->spin_loop.dy_eligible; in kvm_vcpu_eligible_for_directed_yield()
3971 if (vcpu->spin_loop.in_spin_loop) in kvm_vcpu_eligible_for_directed_yield()
3972 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); in kvm_vcpu_eligible_for_directed_yield()
3996 if (!list_empty_careful(&vcpu->async_pf.done)) in vcpu_dy_runnable()
4008 * directly for cross-vCPU checks is functionally correct and accurate.
4022 struct kvm *kvm = me->kvm; in kvm_vcpu_on_spin()
4030 last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu); in kvm_vcpu_on_spin()
4037 * We approximate round-robin by starting at the last boosted VCPU. in kvm_vcpu_on_spin()
4046 if (!READ_ONCE(vcpu->ready)) in kvm_vcpu_on_spin()
4054 * Treat the target vCPU as being in-kernel if it has a in kvm_vcpu_on_spin()
4057 * vCPU is in-kernel for the purposes of directed yield. in kvm_vcpu_on_spin()
4059 if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && in kvm_vcpu_on_spin()
4068 WRITE_ONCE(kvm->last_boosted_vcpu, i); in kvm_vcpu_on_spin()
4071 try--; in kvm_vcpu_on_spin()
4089 kvm->dirty_ring_size / PAGE_SIZE); in kvm_page_in_dirty_ring()
4097 struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; in kvm_vcpu_fault()
4100 if (vmf->pgoff == 0) in kvm_vcpu_fault()
4101 page = virt_to_page(vcpu->run); in kvm_vcpu_fault()
4103 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) in kvm_vcpu_fault()
4104 page = virt_to_page(vcpu->arch.pio_data); in kvm_vcpu_fault()
4107 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) in kvm_vcpu_fault()
4108 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); in kvm_vcpu_fault()
4110 else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) in kvm_vcpu_fault()
4112 &vcpu->dirty_ring, in kvm_vcpu_fault()
4113 vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); in kvm_vcpu_fault()
4117 vmf->page = page; in kvm_vcpu_fault()
4127 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_mmap()
4130 if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || in kvm_vcpu_mmap()
4131 kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && in kvm_vcpu_mmap()
4132 ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) in kvm_vcpu_mmap()
4133 return -EINVAL; in kvm_vcpu_mmap()
4135 vma->vm_ops = &kvm_vcpu_vm_ops; in kvm_vcpu_mmap()
4141 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_release()
4143 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_release()
4162 snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); in create_vcpu_fd()
4172 *val = pid_nr(rcu_dereference(vcpu->pid)); in vcpu_get_pid()
4187 snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); in kvm_create_vcpu_debugfs()
4189 vcpu->kvm->debugfs_dentry); in kvm_create_vcpu_debugfs()
4208 * too-large values instead of silently truncating. in kvm_vm_ioctl_create_vcpu()
4210 * Ensure KVM_MAX_VCPU_IDS isn't pushed above INT_MAX without first in kvm_vm_ioctl_create_vcpu()
4216 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
4218 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4219 if (kvm->created_vcpus >= kvm->max_vcpus) { in kvm_vm_ioctl_create_vcpu()
4220 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4221 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
4226 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4230 kvm->created_vcpus++; in kvm_vm_ioctl_create_vcpu()
4231 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4235 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
4242 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
4245 vcpu->run = page_address(page); in kvm_vm_ioctl_create_vcpu()
4253 if (kvm->dirty_ring_size) { in kvm_vm_ioctl_create_vcpu()
4254 r = kvm_dirty_ring_alloc(&vcpu->dirty_ring, in kvm_vm_ioctl_create_vcpu()
4255 id, kvm->dirty_ring_size); in kvm_vm_ioctl_create_vcpu()
4260 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4263 /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ in kvm_vm_ioctl_create_vcpu()
4264 mutex_lock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4265 mutex_unlock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4269 r = -EEXIST; in kvm_vm_ioctl_create_vcpu()
4273 vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4274 r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT); in kvm_vm_ioctl_create_vcpu()
4284 if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { in kvm_vm_ioctl_create_vcpu()
4285 r = -EINVAL; in kvm_vm_ioctl_create_vcpu()
4291 * pointer before kvm->online_vcpu's incremented value. in kvm_vm_ioctl_create_vcpu()
4294 atomic_inc(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4296 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4303 xa_release(&kvm->vcpu_array, vcpu->vcpu_idx); in kvm_vm_ioctl_create_vcpu()
4305 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4306 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vm_ioctl_create_vcpu()
4310 free_page((unsigned long)vcpu->run); in kvm_vm_ioctl_create_vcpu()
4314 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4315 kvm->created_vcpus--; in kvm_vm_ioctl_create_vcpu()
4316 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4324 vcpu->sigset_active = 1; in kvm_vcpu_ioctl_set_sigmask()
4325 vcpu->sigset = *sigset; in kvm_vcpu_ioctl_set_sigmask()
4327 vcpu->sigset_active = 0; in kvm_vcpu_ioctl_set_sigmask()
4334 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_read()
4336 return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header, in kvm_vcpu_stats_read()
4337 &kvm_vcpu_stats_desc[0], &vcpu->stat, in kvm_vcpu_stats_read()
4338 sizeof(vcpu->stat), user_buffer, size, offset); in kvm_vcpu_stats_read()
4343 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_release()
4345 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_stats_release()
4362 snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id); in kvm_vcpu_ioctl_get_stats_fd()
4374 kvm_get_kvm(vcpu->kvm); in kvm_vcpu_ioctl_get_stats_fd()
4376 file->f_mode |= FMODE_PREAD; in kvm_vcpu_ioctl_get_stats_fd()
4390 if (range->flags) in kvm_vcpu_pre_fault_memory()
4391 return -EINVAL; in kvm_vcpu_pre_fault_memory()
4393 if (!PAGE_ALIGNED(range->gpa) || in kvm_vcpu_pre_fault_memory()
4394 !PAGE_ALIGNED(range->size) || in kvm_vcpu_pre_fault_memory()
4395 range->gpa + range->size <= range->gpa) in kvm_vcpu_pre_fault_memory()
4396 return -EINVAL; in kvm_vcpu_pre_fault_memory()
4399 idx = srcu_read_lock(&vcpu->kvm->srcu); in kvm_vcpu_pre_fault_memory()
4401 full_size = range->size; in kvm_vcpu_pre_fault_memory()
4404 r = -EINTR; in kvm_vcpu_pre_fault_memory()
4409 if (WARN_ON_ONCE(r == 0 || r == -EIO)) in kvm_vcpu_pre_fault_memory()
4415 range->size -= r; in kvm_vcpu_pre_fault_memory()
4416 range->gpa += r; in kvm_vcpu_pre_fault_memory()
4418 } while (range->size); in kvm_vcpu_pre_fault_memory()
4420 srcu_read_unlock(&vcpu->kvm->srcu, idx); in kvm_vcpu_pre_fault_memory()
4424 return full_size == range->size ? r : 0; in kvm_vcpu_pre_fault_memory()
4431 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_ioctl()
4437 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_ioctl()
4438 return -EIO; in kvm_vcpu_ioctl()
4441 return -EINVAL; in kvm_vcpu_ioctl()
4448 if (r != -ENOIOCTLCMD) in kvm_vcpu_ioctl()
4451 if (mutex_lock_killable(&vcpu->mutex)) in kvm_vcpu_ioctl()
4452 return -EINTR; in kvm_vcpu_ioctl()
4456 r = -EINVAL; in kvm_vcpu_ioctl()
4459 oldpid = rcu_access_pointer(vcpu->pid); in kvm_vcpu_ioctl()
4469 rcu_assign_pointer(vcpu->pid, newpid); in kvm_vcpu_ioctl()
4474 vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); in kvm_vcpu_ioctl()
4476 vcpu->wants_to_run = false; in kvm_vcpu_ioctl()
4478 trace_kvm_userspace_exit(vcpu->run->exit_reason, r); in kvm_vcpu_ioctl()
4484 r = -ENOMEM; in kvm_vcpu_ioctl()
4491 r = -EFAULT; in kvm_vcpu_ioctl()
4513 r = -ENOMEM; in kvm_vcpu_ioctl()
4519 r = -EFAULT; in kvm_vcpu_ioctl()
4541 r = -EFAULT; in kvm_vcpu_ioctl()
4550 r = -EFAULT; in kvm_vcpu_ioctl()
4559 r = -EFAULT; in kvm_vcpu_ioctl()
4565 r = -EFAULT; in kvm_vcpu_ioctl()
4574 r = -EFAULT; in kvm_vcpu_ioctl()
4587 r = -EFAULT; in kvm_vcpu_ioctl()
4591 r = -EINVAL; in kvm_vcpu_ioctl()
4594 r = -EFAULT; in kvm_vcpu_ioctl()
4595 if (copy_from_user(&sigset, sigmask_arg->sigset, in kvm_vcpu_ioctl()
4605 r = -ENOMEM; in kvm_vcpu_ioctl()
4611 r = -EFAULT; in kvm_vcpu_ioctl()
4635 r = -EFAULT; in kvm_vcpu_ioctl()
4641 r = -EFAULT; in kvm_vcpu_ioctl()
4649 mutex_unlock(&vcpu->mutex); in kvm_vcpu_ioctl()
4659 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_compat_ioctl()
4663 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_compat_ioctl()
4664 return -EIO; in kvm_vcpu_compat_ioctl()
4673 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4677 r = -EINVAL; in kvm_vcpu_compat_ioctl()
4680 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4682 (compat_sigset_t __user *)sigmask_arg->sigset)) in kvm_vcpu_compat_ioctl()
4700 struct kvm_device *dev = filp->private_data; in kvm_device_mmap()
4702 if (dev->ops->mmap) in kvm_device_mmap()
4703 return dev->ops->mmap(dev, vma); in kvm_device_mmap()
4705 return -ENODEV; in kvm_device_mmap()
4716 return -EPERM; in kvm_device_ioctl_attr()
4719 return -EFAULT; in kvm_device_ioctl_attr()
4727 struct kvm_device *dev = filp->private_data; in kvm_device_ioctl()
4729 if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) in kvm_device_ioctl()
4730 return -EIO; in kvm_device_ioctl()
4734 return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); in kvm_device_ioctl()
4736 return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); in kvm_device_ioctl()
4738 return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); in kvm_device_ioctl()
4740 if (dev->ops->ioctl) in kvm_device_ioctl()
4741 return dev->ops->ioctl(dev, ioctl, arg); in kvm_device_ioctl()
4743 return -ENOTTY; in kvm_device_ioctl()
4749 struct kvm_device *dev = filp->private_data; in kvm_device_release()
4750 struct kvm *kvm = dev->kvm; in kvm_device_release()
4752 if (dev->ops->release) { in kvm_device_release()
4753 mutex_lock(&kvm->lock); in kvm_device_release()
4754 list_del_rcu(&dev->vm_node); in kvm_device_release()
4756 dev->ops->release(dev); in kvm_device_release()
4757 mutex_unlock(&kvm->lock); in kvm_device_release()
4773 if (filp->f_op != &kvm_device_fops) in kvm_device_from_filp()
4776 return filp->private_data; in kvm_device_from_filp()
4789 return -ENOSPC; in kvm_register_device_ops()
4792 return -EEXIST; in kvm_register_device_ops()
4805 struct kvm_create_device *cd) in kvm_ioctl_create_device() argument
4809 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; in kvm_ioctl_create_device() local
4813 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) in kvm_ioctl_create_device()
4814 return -ENODEV; in kvm_ioctl_create_device()
4816 type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); in kvm_ioctl_create_device()
4819 return -ENODEV; in kvm_ioctl_create_device()
4821 if (test) in kvm_ioctl_create_device()
4826 return -ENOMEM; in kvm_ioctl_create_device()
4828 dev->ops = ops; in kvm_ioctl_create_device()
4829 dev->kvm = kvm; in kvm_ioctl_create_device()
4831 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4832 ret = ops->create(dev, type); in kvm_ioctl_create_device()
4834 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4838 list_add_rcu(&dev->vm_node, &kvm->devices); in kvm_ioctl_create_device()
4839 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4841 if (ops->init) in kvm_ioctl_create_device()
4842 ops->init(dev); in kvm_ioctl_create_device()
4845 ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); in kvm_ioctl_create_device()
4848 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4849 list_del_rcu(&dev->vm_node); in kvm_ioctl_create_device()
4851 if (ops->release) in kvm_ioctl_create_device()
4852 ops->release(dev); in kvm_ioctl_create_device()
4853 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4854 if (ops->destroy) in kvm_ioctl_create_device()
4855 ops->destroy(dev); in kvm_ioctl_create_device()
4859 cd->fd = ret; in kvm_ioctl_create_device()
4942 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4945 if (!size || (size & (size - 1))) in kvm_vm_ioctl_enable_dirty_log_ring()
4946 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4951 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4955 return -E2BIG; in kvm_vm_ioctl_enable_dirty_log_ring()
4958 if (kvm->dirty_ring_size) in kvm_vm_ioctl_enable_dirty_log_ring()
4959 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4961 mutex_lock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4963 if (kvm->created_vcpus) { in kvm_vm_ioctl_enable_dirty_log_ring()
4965 r = -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4967 kvm->dirty_ring_size = size; in kvm_vm_ioctl_enable_dirty_log_ring()
4971 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4981 if (!kvm->dirty_ring_size) in kvm_vm_ioctl_reset_dirty_pages()
4982 return -EINVAL; in kvm_vm_ioctl_reset_dirty_pages()
4984 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
4987 cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); in kvm_vm_ioctl_reset_dirty_pages()
4989 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
5000 return -EINVAL; in kvm_vm_ioctl_enable_cap()
5007 lockdep_assert_held(&kvm->slots_lock); in kvm_are_all_memslots_empty()
5021 switch (cap->cap) { in kvm_vm_ioctl_enable_cap_generic()
5026 if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) in kvm_vm_ioctl_enable_cap_generic()
5029 if (cap->flags || (cap->args[0] & ~allowed_options)) in kvm_vm_ioctl_enable_cap_generic()
5030 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5031 kvm->manual_dirty_log_protect = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
5036 if (cap->flags || cap->args[0] != (unsigned int)cap->args[0]) in kvm_vm_ioctl_enable_cap_generic()
5037 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5039 kvm->max_halt_poll_ns = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
5042 * Ensure kvm->override_halt_poll_ns does not become visible in kvm_vm_ioctl_enable_cap_generic()
5043 * before kvm->max_halt_poll_ns. in kvm_vm_ioctl_enable_cap_generic()
5048 kvm->override_halt_poll_ns = true; in kvm_vm_ioctl_enable_cap_generic()
5054 if (!kvm_vm_ioctl_check_extension_generic(kvm, cap->cap)) in kvm_vm_ioctl_enable_cap_generic()
5055 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5057 return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); in kvm_vm_ioctl_enable_cap_generic()
5059 int r = -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
5062 !kvm->dirty_ring_size || cap->flags) in kvm_vm_ioctl_enable_cap_generic()
5065 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
5073 kvm->dirty_ring_with_bitmap = true; in kvm_vm_ioctl_enable_cap_generic()
5077 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
5089 struct kvm *kvm = file->private_data; in kvm_vm_stats_read()
5091 return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header, in kvm_vm_stats_read()
5092 &kvm_vm_stats_desc[0], &kvm->stat, in kvm_vm_stats_read()
5093 sizeof(kvm->stat), user_buffer, size, offset); in kvm_vm_stats_read()
5098 struct kvm *kvm = file->private_data; in kvm_vm_stats_release()
5120 file = anon_inode_getfile("kvm-vm-stats", in kvm_vm_ioctl_get_stats_fd()
5129 file->f_mode |= FMODE_PREAD; in kvm_vm_ioctl_get_stats_fd()
5146 struct kvm *kvm = filp->private_data; in kvm_vm_ioctl()
5150 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_ioctl()
5151 return -EIO; in kvm_vm_ioctl()
5159 r = -EFAULT; in kvm_vm_ioctl()
5188 r = -EFAULT; in kvm_vm_ioctl()
5192 r = -EINVAL; in kvm_vm_ioctl()
5203 r = -EFAULT; in kvm_vm_ioctl()
5213 r = -EFAULT; in kvm_vm_ioctl()
5224 r = -EFAULT; in kvm_vm_ioctl()
5233 r = -EFAULT; in kvm_vm_ioctl()
5243 r = -EFAULT; in kvm_vm_ioctl()
5252 r = -EFAULT; in kvm_vm_ioctl()
5262 r = -EFAULT; in kvm_vm_ioctl()
5274 r = -EFAULT; in kvm_vm_ioctl()
5283 r = -EFAULT; in kvm_vm_ioctl()
5299 r = -EFAULT; in kvm_vm_ioctl()
5302 r = -EINVAL; in kvm_vm_ioctl()
5311 entries = vmemdup_array_user(urouting->entries, in kvm_vm_ioctl()
5328 r = -EFAULT; in kvm_vm_ioctl()
5337 struct kvm_create_device cd; in kvm_vm_ioctl() local
5339 r = -EFAULT; in kvm_vm_ioctl()
5340 if (copy_from_user(&cd, argp, sizeof(cd))) in kvm_vm_ioctl()
5343 r = kvm_ioctl_create_device(kvm, &cd); in kvm_vm_ioctl()
5347 r = -EFAULT; in kvm_vm_ioctl()
5348 if (copy_to_user(argp, &cd, sizeof(cd))) in kvm_vm_ioctl()
5367 r = -EFAULT; in kvm_vm_ioctl()
5405 return -ENOTTY; in kvm_arch_vm_compat_ioctl()
5411 struct kvm *kvm = filp->private_data; in kvm_vm_compat_ioctl()
5414 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_compat_ioctl()
5415 return -EIO; in kvm_vm_compat_ioctl()
5418 if (r != -ENOTTY) in kvm_vm_compat_ioctl()
5429 return -EFAULT; in kvm_vm_compat_ioctl()
5446 return -EFAULT; in kvm_vm_compat_ioctl()
5471 return file && file->f_op == &kvm_vm_fops; in file_is_kvm()
5494 file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); in kvm_dev_ioctl_create_vm()
5501 * Don't call kvm_put_kvm anymore at this point; file->f_op is in kvm_dev_ioctl_create_vm()
5502 * already set, with ->release() being kvm_vm_release(). In error in kvm_dev_ioctl_create_vm()
5521 int r = -EINVAL; in kvm_dev_ioctl()
5594 return -EIO; in kvm_enable_virtualization_cpu()
5696 * If userspace initiated a forced reboot, e.g. reboot -f, then it's in kvm_enable_virtualization()
5697 * possible for an in-flight operation to enable virtualization after in kvm_enable_virtualization()
5698 * syscore_shutdown() is called, i.e. without kvm_shutdown() being in kvm_enable_virtualization()
5707 r = -EBUSY; in kvm_enable_virtualization()
5718 --kvm_usage_count; in kvm_enable_virtualization()
5726 if (--kvm_usage_count) in kvm_disable_virtualization()
5771 if (dev->ops->destructor) in kvm_iodevice_destructor()
5772 dev->ops->destructor(dev); in kvm_iodevice_destructor()
5779 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_destroy()
5780 struct kvm_io_device *pos = bus->range[i].dev; in kvm_io_bus_destroy()
5790 gpa_t addr1 = r1->addr; in kvm_io_bus_cmp()
5791 gpa_t addr2 = r2->addr; in kvm_io_bus_cmp()
5794 return -1; in kvm_io_bus_cmp()
5796 /* If r2->len == 0, match the exact address. If r2->len != 0, in kvm_io_bus_cmp()
5801 if (r2->len) { in kvm_io_bus_cmp()
5802 addr1 += r1->len; in kvm_io_bus_cmp()
5803 addr2 += r2->len; in kvm_io_bus_cmp()
5828 range = bsearch(&key, bus->range, bus->dev_count, in kvm_io_bus_get_first_dev()
5831 return -ENOENT; in kvm_io_bus_get_first_dev()
5833 off = range - bus->range; in kvm_io_bus_get_first_dev()
5835 while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) in kvm_io_bus_get_first_dev()
5836 off--; in kvm_io_bus_get_first_dev()
5846 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_write()
5848 return -EOPNOTSUPP; in __kvm_io_bus_write()
5850 while (idx < bus->dev_count && in __kvm_io_bus_write()
5851 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_write()
5852 if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_write()
5853 range->len, val)) in __kvm_io_bus_write()
5858 return -EOPNOTSUPP; in __kvm_io_bus_write()
5861 /* kvm_io_bus_write - called under kvm->slots_lock */
5874 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_write()
5876 return -ENOMEM; in kvm_io_bus_write()
5882 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
5894 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_write_cookie()
5896 return -ENOMEM; in kvm_io_bus_write_cookie()
5899 if ((cookie >= 0) && (cookie < bus->dev_count) && in kvm_io_bus_write_cookie()
5900 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) in kvm_io_bus_write_cookie()
5901 if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, in kvm_io_bus_write_cookie()
5917 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_read()
5919 return -EOPNOTSUPP; in __kvm_io_bus_read()
5921 while (idx < bus->dev_count && in __kvm_io_bus_read()
5922 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_read()
5923 if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_read()
5924 range->len, val)) in __kvm_io_bus_read()
5929 return -EOPNOTSUPP; in __kvm_io_bus_read()
5932 /* kvm_io_bus_read - called under kvm->slots_lock */
5945 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_read()
5947 return -ENOMEM; in kvm_io_bus_read()
5959 lockdep_assert_held(&kvm->slots_lock); in kvm_io_bus_register_dev()
5963 return -ENOMEM; in kvm_io_bus_register_dev()
5966 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) in kvm_io_bus_register_dev()
5967 return -ENOSPC; in kvm_io_bus_register_dev()
5969 new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), in kvm_io_bus_register_dev()
5972 return -ENOMEM; in kvm_io_bus_register_dev()
5980 for (i = 0; i < bus->dev_count; i++) in kvm_io_bus_register_dev()
5981 if (kvm_io_bus_cmp(&bus->range[i], &range) > 0) in kvm_io_bus_register_dev()
5985 new_bus->dev_count++; in kvm_io_bus_register_dev()
5986 new_bus->range[i] = range; in kvm_io_bus_register_dev()
5987 memcpy(new_bus->range + i + 1, bus->range + i, in kvm_io_bus_register_dev()
5988 (bus->dev_count - i) * sizeof(struct kvm_io_range)); in kvm_io_bus_register_dev()
5989 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_register_dev()
5990 synchronize_srcu_expedited(&kvm->srcu); in kvm_io_bus_register_dev()
6002 lockdep_assert_held(&kvm->slots_lock); in kvm_io_bus_unregister_dev()
6008 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_unregister_dev()
6009 if (bus->range[i].dev == dev) { in kvm_io_bus_unregister_dev()
6014 if (i == bus->dev_count) in kvm_io_bus_unregister_dev()
6017 new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), in kvm_io_bus_unregister_dev()
6021 new_bus->dev_count--; in kvm_io_bus_unregister_dev()
6022 memcpy(new_bus->range + i, bus->range + i + 1, in kvm_io_bus_unregister_dev()
6023 flex_array_size(new_bus, range, new_bus->dev_count - i)); in kvm_io_bus_unregister_dev()
6026 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_unregister_dev()
6027 synchronize_srcu_expedited(&kvm->srcu); in kvm_io_bus_unregister_dev()
6036 return -ENOMEM; in kvm_io_bus_unregister_dev()
6051 srcu_idx = srcu_read_lock(&kvm->srcu); in kvm_io_bus_get_dev()
6053 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); in kvm_io_bus_get_dev()
6061 iodev = bus->range[dev_idx].dev; in kvm_io_bus_get_dev()
6064 srcu_read_unlock(&kvm->srcu, srcu_idx); in kvm_io_bus_get_dev()
6075 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_open()
6082 if (!kvm_get_kvm_safe(stat_data->kvm)) in kvm_debugfs_open()
6083 return -ENOENT; in kvm_debugfs_open()
6086 kvm_stats_debugfs_mode(stat_data->desc) & 0222 in kvm_debugfs_open()
6089 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_open()
6096 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_release()
6099 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_release()
6106 *val = *(u64 *)((void *)(&kvm->stat) + offset); in kvm_get_stat_per_vm()
6113 *(u64 *)((void *)(&kvm->stat) + offset) = 0; in kvm_clear_stat_per_vm()
6126 *val += *(u64 *)((void *)(&vcpu->stat) + offset); in kvm_get_stat_per_vcpu()
6137 *(u64 *)((void *)(&vcpu->stat) + offset) = 0; in kvm_clear_stat_per_vcpu()
6144 int r = -EFAULT; in kvm_stat_data_get()
6147 switch (stat_data->kind) { in kvm_stat_data_get()
6149 r = kvm_get_stat_per_vm(stat_data->kvm, in kvm_stat_data_get()
6150 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
6153 r = kvm_get_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_get()
6154 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
6163 int r = -EFAULT; in kvm_stat_data_clear()
6167 return -EINVAL; in kvm_stat_data_clear()
6169 switch (stat_data->kind) { in kvm_stat_data_clear()
6171 r = kvm_clear_stat_per_vm(stat_data->kvm, in kvm_stat_data_clear()
6172 stat_data->desc->desc.offset); in kvm_stat_data_clear()
6175 r = kvm_clear_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_clear()
6176 stat_data->desc->desc.offset); in kvm_stat_data_clear()
6220 return -EINVAL; in vm_stat_clear()
6256 return -EINVAL; in vcpu_stat_clear()
6284 kvm_active_vms--; in kvm_uevent_notify_change()
6299 kvm->userspace_pid = task_pid_nr(current); in kvm_uevent_notify_change()
6303 add_uevent_var(env, "PID=%d", kvm->userspace_pid); in kvm_uevent_notify_change()
6305 if (!IS_ERR(kvm->debugfs_dentry)) { in kvm_uevent_notify_change()
6309 tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); in kvm_uevent_notify_change()
6316 env->envp[env->envp_idx++] = NULL; in kvm_uevent_notify_change()
6317 kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); in kvm_uevent_notify_change()
6335 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
6337 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
6346 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
6348 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
6362 WRITE_ONCE(vcpu->preempted, false); in kvm_sched_in()
6363 WRITE_ONCE(vcpu->ready, false); in kvm_sched_in()
6368 WRITE_ONCE(vcpu->scheduled_out, false); in kvm_sched_in()
6376 WRITE_ONCE(vcpu->scheduled_out, true); in kvm_sched_out()
6378 if (task_is_runnable(current) && vcpu->wants_to_run) { in kvm_sched_out()
6379 WRITE_ONCE(vcpu->preempted, true); in kvm_sched_out()
6380 WRITE_ONCE(vcpu->ready, true); in kvm_sched_out()
6387 * kvm_get_running_vcpu - get the vcpu running on the current CPU.
6389 * We can disable preemption locally around accessing the per-CPU variable,
6392 * the per-CPU value later will give us the same value as we update the
6393 * per-CPU variable in the preempt notifier handlers.
6408 * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.
6472 - offsetof(struct kvm_vcpu, arch), in kvm_init()
6475 return -ENOMEM; in kvm_init()
6480 r = -ENOMEM; in kvm_init()
6582 struct kvm *kvm = init_context->kvm; in kvm_vm_worker_thread()
6583 kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; in kvm_vm_worker_thread()
6584 uintptr_t data = init_context->data; in kvm_vm_worker_thread()
6593 err = cgroup_attach_task_all(init_context->parent, current); in kvm_vm_worker_thread()
6600 set_user_nice(current, task_nice(init_context->parent)); in kvm_vm_worker_thread()
6603 init_context->err = err; in kvm_vm_worker_thread()
6604 complete(&init_context->init_done); in kvm_vm_worker_thread()
6629 parent = rcu_dereference(current->real_parent); in kvm_vm_worker_thread()
6653 "%s-%d", name, task_pid_nr(current)); in kvm_vm_create_worker_thread()