KVM
Classes | Macros | Typedefs | Functions | Variables
mmu.c File Reference
#include "irq.h"
#include "ioapic.h"
#include "mmu.h"
#include "mmu_internal.h"
#include "tdp_mmu.h"
#include "x86.h"
#include "kvm_cache_regs.h"
#include "smm.h"
#include "kvm_emulate.h"
#include "page_track.h"
#include "cpuid.h"
#include "spte.h"
#include <linux/kvm_host.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/moduleparam.h>
#include <linux/export.h>
#include <linux/swap.h>
#include <linux/hugetlb.h>
#include <linux/compiler.h>
#include <linux/srcu.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/hash.h>
#include <linux/kern_levels.h>
#include <linux/kstrtox.h>
#include <linux/kthread.h>
#include <asm/page.h>
#include <asm/memtype.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/set_memory.h>
#include <asm/vmx.h>
#include "trace.h"
#include <trace/events/kvm.h>
#include "mmutrace.h"
#include "paging_tmpl.h"
Include dependency graph for mmu.c:

Go to the source code of this file.

Classes

struct  pte_list_desc
 
struct  kvm_shadow_walk_iterator
 
struct  kvm_mmu_role_regs
 
union  split_spte
 
struct  rmap_iterator
 
struct  slot_rmap_walk_iterator
 
struct  kvm_mmu_pages
 
struct  kvm_mmu_pages::mmu_page_and_offset
 
struct  mmu_page_path
 
struct  shadow_page_caches
 

Macros

#define pr_fmt(fmt)   KBUILD_MODNAME ": " fmt
 
#define PTE_PREFETCH_NUM   8
 
#define PTE_LIST_EXT   14
 
#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker)
 
#define for_each_shadow_entry(_vcpu, _addr, _walker)
 
#define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte)
 
#define CREATE_TRACE_POINTS
 
#define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag)
 
#define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name)
 
#define KVM_LPAGE_MIXED_FLAG   BIT(31)
 
#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)
 
#define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_, _start_gfn, _end_gfn, _iter_)
 
#define RMAP_RECYCLE_THRESHOLD   1000
 
#define KVM_PAGE_ARRAY_NR   16
 
#define INVALID_INDEX   (-1)
 
#define for_each_valid_sp(_kvm, _sp, _list)
 
#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn)
 
#define for_each_sp(pvec, sp, parents, i)
 
#define PTTYPE_EPT   18 /* arbitrary */
 
#define PTTYPE   PTTYPE_EPT
 
#define PTTYPE   64
 
#define PTTYPE   32
 
#define BYTE_MASK(access)
 
#define BATCH_ZAP_PAGES   10
 

Typedefs

typedef bool(* rmap_handler_t) (struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t pte)
 
typedef bool(* slot_rmaps_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
 

Functions

static int get_nx_huge_pages (char *buffer, const struct kernel_param *kp)
 
static int set_nx_huge_pages (const char *val, const struct kernel_param *kp)
 
static int set_nx_huge_pages_recovery_param (const char *val, const struct kernel_param *kp)
 
 module_param_cb (nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644)
 
 __MODULE_PARM_TYPE (nx_huge_pages, "bool")
 
 module_param_cb (nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_param_ops, &nx_huge_pages_recovery_ratio, 0644)
 
 __MODULE_PARM_TYPE (nx_huge_pages_recovery_ratio, "uint")
 
 module_param_cb (nx_huge_pages_recovery_period_ms, &nx_huge_pages_recovery_param_ops, &nx_huge_pages_recovery_period_ms, 0644)
 
 __MODULE_PARM_TYPE (nx_huge_pages_recovery_period_ms, "uint")
 
 module_param_named (flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644)
 
static void mmu_spte_set (u64 *sptep, u64 spte)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr0, pg, X86_CR0_PG)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr0, wp, X86_CR0_WP)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr4, pse, X86_CR4_PSE)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr4, pae, X86_CR4_PAE)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr4, smep, X86_CR4_SMEP)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr4, smap, X86_CR4_SMAP)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr4, pke, X86_CR4_PKE)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (cr4, la57, X86_CR4_LA57)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (efer, nx, EFER_NX)
 
 BUILD_MMU_ROLE_REGS_ACCESSOR (efer, lma, EFER_LMA)
 
 BUILD_MMU_ROLE_ACCESSOR (base, cr0, wp)
 
 BUILD_MMU_ROLE_ACCESSOR (ext, cr4, pse)
 
 BUILD_MMU_ROLE_ACCESSOR (ext, cr4, smep)
 
 BUILD_MMU_ROLE_ACCESSOR (ext, cr4, smap)
 
 BUILD_MMU_ROLE_ACCESSOR (ext, cr4, pke)
 
 BUILD_MMU_ROLE_ACCESSOR (ext, cr4, la57)
 
 BUILD_MMU_ROLE_ACCESSOR (base, efer, nx)
 
 BUILD_MMU_ROLE_ACCESSOR (ext, efer, lma)
 
static bool is_cr0_pg (struct kvm_mmu *mmu)
 
static bool is_cr4_pae (struct kvm_mmu *mmu)
 
static struct kvm_mmu_role_regs vcpu_to_role_regs (struct kvm_vcpu *vcpu)
 
static unsigned long get_guest_cr3 (struct kvm_vcpu *vcpu)
 
static unsigned long kvm_mmu_get_guest_pgd (struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 
static bool kvm_available_flush_remote_tlbs_range (void)
 
static gfn_t kvm_mmu_page_get_gfn (struct kvm_mmu_page *sp, int index)
 
static void kvm_flush_remote_tlbs_sptep (struct kvm *kvm, u64 *sptep)
 
static void mark_mmio_spte (struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, unsigned int access)
 
static gfn_t get_mmio_spte_gfn (u64 spte)
 
static unsigned get_mmio_spte_access (u64 spte)
 
static bool check_mmio_spte (struct kvm_vcpu *vcpu, u64 spte)
 
static int is_cpuid_PSE36 (void)
 
static void count_spte_clear (u64 *sptep, u64 spte)
 
static void __set_spte (u64 *sptep, u64 spte)
 
static void __update_clear_spte_fast (u64 *sptep, u64 spte)
 
static u64 __update_clear_spte_slow (u64 *sptep, u64 spte)
 
static u64 __get_spte_lockless (u64 *sptep)
 
static u64 mmu_spte_update_no_track (u64 *sptep, u64 new_spte)
 
static bool mmu_spte_update (u64 *sptep, u64 new_spte)
 
static u64 mmu_spte_clear_track_bits (struct kvm *kvm, u64 *sptep)
 
static void mmu_spte_clear_no_track (u64 *sptep)
 
static u64 mmu_spte_get_lockless (u64 *sptep)
 
static bool mmu_spte_age (u64 *sptep)
 
static bool is_tdp_mmu_active (struct kvm_vcpu *vcpu)
 
static void walk_shadow_page_lockless_begin (struct kvm_vcpu *vcpu)
 
static void walk_shadow_page_lockless_end (struct kvm_vcpu *vcpu)
 
static int mmu_topup_memory_caches (struct kvm_vcpu *vcpu, bool maybe_indirect)
 
static void mmu_free_memory_caches (struct kvm_vcpu *vcpu)
 
static void mmu_free_pte_list_desc (struct pte_list_desc *pte_list_desc)
 
static bool sp_has_gptes (struct kvm_mmu_page *sp)
 
static u32 kvm_mmu_page_get_access (struct kvm_mmu_page *sp, int index)
 
static void kvm_mmu_page_set_translation (struct kvm_mmu_page *sp, int index, gfn_t gfn, unsigned int access)
 
static void kvm_mmu_page_set_access (struct kvm_mmu_page *sp, int index, unsigned int access)
 
static struct kvm_lpage_info * lpage_info_slot (gfn_t gfn, const struct kvm_memory_slot *slot, int level)
 
static void update_gfn_disallow_lpage_count (const struct kvm_memory_slot *slot, gfn_t gfn, int count)
 
void kvm_mmu_gfn_disallow_lpage (const struct kvm_memory_slot *slot, gfn_t gfn)
 
void kvm_mmu_gfn_allow_lpage (const struct kvm_memory_slot *slot, gfn_t gfn)
 
static void account_shadowed (struct kvm *kvm, struct kvm_mmu_page *sp)
 
void track_possible_nx_huge_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static void account_nx_huge_page (struct kvm *kvm, struct kvm_mmu_page *sp, bool nx_huge_page_possible)
 
static void unaccount_shadowed (struct kvm *kvm, struct kvm_mmu_page *sp)
 
void untrack_possible_nx_huge_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static void unaccount_nx_huge_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap (struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log)
 
static int pte_list_add (struct kvm_mmu_memory_cache *cache, u64 *spte, struct kvm_rmap_head *rmap_head)
 
static void pte_list_desc_remove_entry (struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct pte_list_desc *desc, int i)
 
static void pte_list_remove (struct kvm *kvm, u64 *spte, struct kvm_rmap_head *rmap_head)
 
static void kvm_zap_one_rmap_spte (struct kvm *kvm, struct kvm_rmap_head *rmap_head, u64 *sptep)
 
static bool kvm_zap_all_rmap_sptes (struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 
unsigned int pte_list_count (struct kvm_rmap_head *rmap_head)
 
static struct kvm_rmap_head * gfn_to_rmap (gfn_t gfn, int level, const struct kvm_memory_slot *slot)
 
static void rmap_remove (struct kvm *kvm, u64 *spte)
 
static u64 * rmap_get_first (struct kvm_rmap_head *rmap_head, struct rmap_iterator *iter)
 
static u64 * rmap_get_next (struct rmap_iterator *iter)
 
static void drop_spte (struct kvm *kvm, u64 *sptep)
 
static void drop_large_spte (struct kvm *kvm, u64 *sptep, bool flush)
 
static bool spte_write_protect (u64 *sptep, bool pt_protect)
 
static bool rmap_write_protect (struct kvm_rmap_head *rmap_head, bool pt_protect)
 
static bool spte_clear_dirty (u64 *sptep)
 
static bool spte_wrprot_for_clear_dirty (u64 *sptep)
 
static bool __rmap_clear_dirty (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
 
static void kvm_mmu_write_protect_pt_masked (struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask)
 
static void kvm_mmu_clear_dirty_pt_masked (struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask)
 
void kvm_arch_mmu_enable_log_dirty_pt_masked (struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask)
 
int kvm_cpu_dirty_log_size (void)
 
bool kvm_mmu_slot_gfn_write_protect (struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level)
 
static bool kvm_vcpu_write_protect_gfn (struct kvm_vcpu *vcpu, u64 gfn)
 
static bool __kvm_zap_rmap (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
 
static bool kvm_zap_rmap (struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t unused)
 
static bool kvm_set_pte_rmap (struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t pte)
 
static void rmap_walk_init_level (struct slot_rmap_walk_iterator *iterator, int level)
 
static void slot_rmap_walk_init (struct slot_rmap_walk_iterator *iterator, const struct kvm_memory_slot *slot, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn)
 
static bool slot_rmap_walk_okay (struct slot_rmap_walk_iterator *iterator)
 
static void slot_rmap_walk_next (struct slot_rmap_walk_iterator *iterator)
 
static __always_inline bool kvm_handle_gfn_range (struct kvm *kvm, struct kvm_gfn_range *range, rmap_handler_t handler)
 
bool kvm_unmap_gfn_range (struct kvm *kvm, struct kvm_gfn_range *range)
 
bool kvm_set_spte_gfn (struct kvm *kvm, struct kvm_gfn_range *range)
 
static bool kvm_age_rmap (struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t unused)
 
static bool kvm_test_age_rmap (struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t unused)
 
static void __rmap_add (struct kvm *kvm, struct kvm_mmu_memory_cache *cache, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access)
 
static void rmap_add (struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access)
 
bool kvm_age_gfn (struct kvm *kvm, struct kvm_gfn_range *range)
 
bool kvm_test_age_gfn (struct kvm *kvm, struct kvm_gfn_range *range)
 
static void kvm_mmu_check_sptes_at_free (struct kvm_mmu_page *sp)
 
static void kvm_mod_used_mmu_pages (struct kvm *kvm, long nr)
 
static void kvm_account_mmu_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static void kvm_unaccount_mmu_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static void kvm_mmu_free_shadow_page (struct kvm_mmu_page *sp)
 
static unsigned kvm_page_table_hashfn (gfn_t gfn)
 
static void mmu_page_add_parent_pte (struct kvm_mmu_memory_cache *cache, struct kvm_mmu_page *sp, u64 *parent_pte)
 
static void mmu_page_remove_parent_pte (struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte)
 
static void drop_parent_pte (struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte)
 
static void mark_unsync (u64 *spte)
 
static void kvm_mmu_mark_parents_unsync (struct kvm_mmu_page *sp)
 
static int mmu_pages_add (struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, int idx)
 
static void clear_unsync_child_bit (struct kvm_mmu_page *sp, int idx)
 
static int __mmu_unsync_walk (struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec)
 
static int mmu_unsync_walk (struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec)
 
static void kvm_unlink_unsync_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static bool kvm_mmu_prepare_zap_page (struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list)
 
static void kvm_mmu_commit_zap_page (struct kvm *kvm, struct list_head *invalid_list)
 
static bool kvm_sync_page_check (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
static int kvm_sync_spte (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i)
 
static int __kvm_sync_page (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
static int kvm_sync_page (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list)
 
static bool kvm_mmu_remote_flush_or_zap (struct kvm *kvm, struct list_head *invalid_list, bool remote_flush)
 
static bool is_obsolete_sp (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static int mmu_pages_next (struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, int i)
 
static int mmu_pages_first (struct kvm_mmu_pages *pvec, struct mmu_page_path *parents)
 
static void mmu_pages_clear_parents (struct mmu_page_path *parents)
 
static int mmu_sync_children (struct kvm_vcpu *vcpu, struct kvm_mmu_page *parent, bool can_yield)
 
static void __clear_sp_write_flooding_count (struct kvm_mmu_page *sp)
 
static void clear_sp_write_flooding_count (u64 *spte)
 
static struct kvm_mmu_pagekvm_mmu_find_shadow_page (struct kvm *kvm, struct kvm_vcpu *vcpu, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role)
 
static struct kvm_mmu_pagekvm_mmu_alloc_shadow_page (struct kvm *kvm, struct shadow_page_caches *caches, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role)
 
static struct kvm_mmu_page__kvm_mmu_get_shadow_page (struct kvm *kvm, struct kvm_vcpu *vcpu, struct shadow_page_caches *caches, gfn_t gfn, union kvm_mmu_page_role role)
 
static struct kvm_mmu_pagekvm_mmu_get_shadow_page (struct kvm_vcpu *vcpu, gfn_t gfn, union kvm_mmu_page_role role)
 
static union kvm_mmu_page_role kvm_mmu_child_role (u64 *sptep, bool direct, unsigned int access)
 
static struct kvm_mmu_pagekvm_mmu_get_child_sp (struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, bool direct, unsigned int access)
 
static void shadow_walk_init_using_root (struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, hpa_t root, u64 addr)
 
static void shadow_walk_init (struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, u64 addr)
 
static bool shadow_walk_okay (struct kvm_shadow_walk_iterator *iterator)
 
static void __shadow_walk_next (struct kvm_shadow_walk_iterator *iterator, u64 spte)
 
static void shadow_walk_next (struct kvm_shadow_walk_iterator *iterator)
 
static void __link_shadow_page (struct kvm *kvm, struct kvm_mmu_memory_cache *cache, u64 *sptep, struct kvm_mmu_page *sp, bool flush)
 
static void link_shadow_page (struct kvm_vcpu *vcpu, u64 *sptep, struct kvm_mmu_page *sp)
 
static void validate_direct_spte (struct kvm_vcpu *vcpu, u64 *sptep, unsigned direct_access)
 
static int mmu_page_zap_pte (struct kvm *kvm, struct kvm_mmu_page *sp, u64 *spte, struct list_head *invalid_list)
 
static int kvm_mmu_page_unlink_children (struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list)
 
static void kvm_mmu_unlink_parents (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static int mmu_zap_unsync_children (struct kvm *kvm, struct kvm_mmu_page *parent, struct list_head *invalid_list)
 
static bool __kvm_mmu_prepare_zap_page (struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list, int *nr_zapped)
 
static unsigned long kvm_mmu_zap_oldest_mmu_pages (struct kvm *kvm, unsigned long nr_to_zap)
 
static unsigned long kvm_mmu_available_pages (struct kvm *kvm)
 
static int make_mmu_pages_available (struct kvm_vcpu *vcpu)
 
void kvm_mmu_change_mmu_pages (struct kvm *kvm, unsigned long goal_nr_mmu_pages)
 
int kvm_mmu_unprotect_page (struct kvm *kvm, gfn_t gfn)
 
static int kvm_mmu_unprotect_page_virt (struct kvm_vcpu *vcpu, gva_t gva)
 
static void kvm_unsync_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
int mmu_try_to_unsync_pages (struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, bool can_unsync, bool prefetch)
 
static int mmu_set_spte (struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, u64 *sptep, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, struct kvm_page_fault *fault)
 
static int direct_pte_prefetch_many (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *start, u64 *end)
 
static void __direct_pte_prefetch (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *sptep)
 
static void direct_pte_prefetch (struct kvm_vcpu *vcpu, u64 *sptep)
 
static int host_pfn_mapping_level (struct kvm *kvm, gfn_t gfn, const struct kvm_memory_slot *slot)
 
static int __kvm_mmu_max_mapping_level (struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, int max_level, bool is_private)
 
int kvm_mmu_max_mapping_level (struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, int max_level)
 
void kvm_mmu_hugepage_adjust (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
void disallowed_hugepage_adjust (struct kvm_page_fault *fault, u64 spte, int cur_level)
 
static int direct_map (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static void kvm_send_hwpoison_signal (struct kvm_memory_slot *slot, gfn_t gfn)
 
static int kvm_handle_error_pfn (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static int kvm_handle_noslot_fault (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access)
 
static bool page_fault_can_be_fast (struct kvm_page_fault *fault)
 
static bool fast_pf_fix_direct_spte (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, u64 *sptep, u64 old_spte, u64 new_spte)
 
static bool is_access_allowed (struct kvm_page_fault *fault, u64 spte)
 
static u64 * fast_pf_get_last_sptep (struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
 
static int fast_page_fault (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static void mmu_free_root_page (struct kvm *kvm, hpa_t *root_hpa, struct list_head *invalid_list)
 
void kvm_mmu_free_roots (struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free)
 
 EXPORT_SYMBOL_GPL (kvm_mmu_free_roots)
 
void kvm_mmu_free_guest_mode_roots (struct kvm *kvm, struct kvm_mmu *mmu)
 
 EXPORT_SYMBOL_GPL (kvm_mmu_free_guest_mode_roots)
 
static hpa_t mmu_alloc_root (struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant, u8 level)
 
static int mmu_alloc_direct_roots (struct kvm_vcpu *vcpu)
 
static int mmu_first_shadow_root_alloc (struct kvm *kvm)
 
static int mmu_alloc_shadow_roots (struct kvm_vcpu *vcpu)
 
static int mmu_alloc_special_roots (struct kvm_vcpu *vcpu)
 
static bool is_unsync_root (hpa_t root)
 
void kvm_mmu_sync_roots (struct kvm_vcpu *vcpu)
 
void kvm_mmu_sync_prev_roots (struct kvm_vcpu *vcpu)
 
static gpa_t nonpaging_gva_to_gpa (struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t vaddr, u64 access, struct x86_exception *exception)
 
static bool mmio_info_in_cache (struct kvm_vcpu *vcpu, u64 addr, bool direct)
 
static int get_walk (struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 
static bool get_mmio_spte (struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 
static int handle_mmio_page_fault (struct kvm_vcpu *vcpu, u64 addr, bool direct)
 
static bool page_fault_handle_page_track (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static void shadow_page_table_clear_flood (struct kvm_vcpu *vcpu, gva_t addr)
 
static u32 alloc_apf_token (struct kvm_vcpu *vcpu)
 
static bool kvm_arch_setup_async_pf (struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, gfn_t gfn)
 
void kvm_arch_async_page_ready (struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 
static u8 kvm_max_level_for_order (int order)
 
static void kvm_mmu_prepare_memory_fault_exit (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static int kvm_faultin_pfn_private (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static int __kvm_faultin_pfn (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static int kvm_faultin_pfn (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access)
 
static bool is_page_fault_stale (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static int direct_page_fault (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static int nonpaging_page_fault (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
int kvm_handle_page_fault (struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len)
 
 EXPORT_SYMBOL_GPL (kvm_handle_page_fault)
 
bool __kvm_mmu_honors_guest_mtrrs (bool vm_has_noncoherent_dma)
 
int kvm_tdp_page_fault (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
static void nonpaging_init_context (struct kvm_mmu *context)
 
static bool is_root_usable (struct kvm_mmu_root_info *root, gpa_t pgd, union kvm_mmu_page_role role)
 
static bool cached_root_find_and_keep_current (struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role)
 
static bool cached_root_find_without_current (struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role)
 
static bool fast_pgd_switch (struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role)
 
void kvm_mmu_new_pgd (struct kvm_vcpu *vcpu, gpa_t new_pgd)
 
 EXPORT_SYMBOL_GPL (kvm_mmu_new_pgd)
 
static bool sync_mmio_spte (struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access)
 
static void __reset_rsvds_bits_mask (struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, int level, bool nx, bool gbpages, bool pse, bool amd)
 
static void reset_guest_rsvds_bits_mask (struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 
static void __reset_rsvds_bits_mask_ept (struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, bool execonly, int huge_page_level)
 
static void reset_rsvds_bits_mask_ept (struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly, int huge_page_level)
 
static u64 reserved_hpa_bits (void)
 
static void reset_shadow_zero_bits_mask (struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 
static bool boot_cpu_is_amd (void)
 
static void reset_tdp_shadow_zero_bits_mask (struct kvm_mmu *context)
 
static void reset_ept_shadow_zero_bits_mask (struct kvm_mmu *context, bool execonly)
 
static void update_permission_bitmask (struct kvm_mmu *mmu, bool ept)
 
static void update_pkru_bitmask (struct kvm_mmu *mmu)
 
static void reset_guest_paging_metadata (struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 
static void paging64_init_context (struct kvm_mmu *context)
 
static void paging32_init_context (struct kvm_mmu *context)
 
static union kvm_cpu_role kvm_calc_cpu_role (struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
 
void __kvm_mmu_refresh_passthrough_bits (struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 
static int kvm_mmu_get_tdp_level (struct kvm_vcpu *vcpu)
 
static union kvm_mmu_page_role kvm_calc_tdp_mmu_root_page_role (struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
 
static void init_kvm_tdp_mmu (struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
 
static void shadow_mmu_init_context (struct kvm_vcpu *vcpu, struct kvm_mmu *context, union kvm_cpu_role cpu_role, union kvm_mmu_page_role root_role)
 
static void kvm_init_shadow_mmu (struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
 
void kvm_init_shadow_npt_mmu (struct kvm_vcpu *vcpu, unsigned long cr0, unsigned long cr4, u64 efer, gpa_t nested_cr3)
 
 EXPORT_SYMBOL_GPL (kvm_init_shadow_npt_mmu)
 
static union kvm_cpu_role kvm_calc_shadow_ept_root_page_role (struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly, u8 level)
 
void kvm_init_shadow_ept_mmu (struct kvm_vcpu *vcpu, bool execonly, int huge_page_level, bool accessed_dirty, gpa_t new_eptp)
 
 EXPORT_SYMBOL_GPL (kvm_init_shadow_ept_mmu)
 
static void init_kvm_softmmu (struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
 
static void init_kvm_nested_mmu (struct kvm_vcpu *vcpu, union kvm_cpu_role new_mode)
 
void kvm_init_mmu (struct kvm_vcpu *vcpu)
 
 EXPORT_SYMBOL_GPL (kvm_init_mmu)
 
void kvm_mmu_after_set_cpuid (struct kvm_vcpu *vcpu)
 
void kvm_mmu_reset_context (struct kvm_vcpu *vcpu)
 
 EXPORT_SYMBOL_GPL (kvm_mmu_reset_context)
 
int kvm_mmu_load (struct kvm_vcpu *vcpu)
 
void kvm_mmu_unload (struct kvm_vcpu *vcpu)
 
static bool is_obsolete_root (struct kvm *kvm, hpa_t root_hpa)
 
static void __kvm_mmu_free_obsolete_roots (struct kvm *kvm, struct kvm_mmu *mmu)
 
void kvm_mmu_free_obsolete_roots (struct kvm_vcpu *vcpu)
 
static u64 mmu_pte_write_fetch_gpte (struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes)
 
static bool detect_write_flooding (struct kvm_mmu_page *sp)
 
static bool detect_write_misaligned (struct kvm_mmu_page *sp, gpa_t gpa, int bytes)
 
static u64 * get_written_sptes (struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
 
void kvm_mmu_track_write (struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes)
 
int noinline kvm_mmu_page_fault (struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len)
 
 EXPORT_SYMBOL_GPL (kvm_mmu_page_fault)
 
static void __kvm_mmu_invalidate_addr (struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, hpa_t root_hpa)
 
void kvm_mmu_invalidate_addr (struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, unsigned long roots)
 
 EXPORT_SYMBOL_GPL (kvm_mmu_invalidate_addr)
 
void kvm_mmu_invlpg (struct kvm_vcpu *vcpu, gva_t gva)
 
 EXPORT_SYMBOL_GPL (kvm_mmu_invlpg)
 
void kvm_mmu_invpcid_gva (struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 
void kvm_configure_mmu (bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level)
 
 EXPORT_SYMBOL_GPL (kvm_configure_mmu)
 
static __always_inline bool __walk_slot_rmaps (struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool flush_on_yield, bool flush)
 
static __always_inline bool walk_slot_rmaps (struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, bool flush_on_yield)
 
static __always_inline bool walk_slot_rmaps_4k (struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, bool flush_on_yield)
 
static void free_mmu_pages (struct kvm_mmu *mmu)
 
static int __kvm_mmu_create (struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 
int kvm_mmu_create (struct kvm_vcpu *vcpu)
 
static void kvm_zap_obsolete_pages (struct kvm *kvm)
 
static void kvm_mmu_zap_all_fast (struct kvm *kvm)
 
static bool kvm_has_zapped_obsolete_pages (struct kvm *kvm)
 
void kvm_mmu_init_vm (struct kvm *kvm)
 
static void mmu_free_vm_memory_caches (struct kvm *kvm)
 
void kvm_mmu_uninit_vm (struct kvm *kvm)
 
static bool kvm_rmap_zap_gfn_range (struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 
void kvm_zap_gfn_range (struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 
static bool slot_rmap_write_protect (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
 
void kvm_mmu_slot_remove_write_access (struct kvm *kvm, const struct kvm_memory_slot *memslot, int start_level)
 
static bool need_topup (struct kvm_mmu_memory_cache *cache, int min)
 
static bool need_topup_split_caches_or_resched (struct kvm *kvm)
 
static int topup_split_caches (struct kvm *kvm)
 
static struct kvm_mmu_pageshadow_mmu_get_sp_for_split (struct kvm *kvm, u64 *huge_sptep)
 
static void shadow_mmu_split_huge_page (struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep)
 
static int shadow_mmu_try_split_huge_page (struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep)
 
static bool shadow_mmu_try_split_huge_pages (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
 
static void kvm_shadow_mmu_try_split_huge_pages (struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, int target_level)
 
void kvm_mmu_try_split_huge_pages (struct kvm *kvm, const struct kvm_memory_slot *memslot, u64 start, u64 end, int target_level)
 
void kvm_mmu_slot_try_split_huge_pages (struct kvm *kvm, const struct kvm_memory_slot *memslot, int target_level)
 
static bool kvm_mmu_zap_collapsible_spte (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
 
static void kvm_rmap_zap_collapsible_sptes (struct kvm *kvm, const struct kvm_memory_slot *slot)
 
void kvm_mmu_zap_collapsible_sptes (struct kvm *kvm, const struct kvm_memory_slot *slot)
 
void kvm_mmu_slot_leaf_clear_dirty (struct kvm *kvm, const struct kvm_memory_slot *memslot)
 
static void kvm_mmu_zap_all (struct kvm *kvm)
 
void kvm_arch_flush_shadow_all (struct kvm *kvm)
 
void kvm_arch_flush_shadow_memslot (struct kvm *kvm, struct kvm_memory_slot *slot)
 
void kvm_mmu_invalidate_mmio_sptes (struct kvm *kvm, u64 gen)
 
static unsigned long mmu_shrink_scan (struct shrinker *shrink, struct shrink_control *sc)
 
static unsigned long mmu_shrink_count (struct shrinker *shrink, struct shrink_control *sc)
 
static void mmu_destroy_caches (void)
 
static bool get_nx_auto_mode (void)
 
static void __set_nx_huge_pages (bool val)
 
void __init kvm_mmu_x86_module_init (void)
 
int kvm_mmu_vendor_module_init (void)
 
void kvm_mmu_destroy (struct kvm_vcpu *vcpu)
 
void kvm_mmu_vendor_module_exit (void)
 
static bool calc_nx_huge_pages_recovery_period (uint *period)
 
static void kvm_recover_nx_huge_pages (struct kvm *kvm)
 
static long get_nx_huge_page_recovery_timeout (u64 start_time)
 
static int kvm_nx_huge_page_recovery_worker (struct kvm *kvm, uintptr_t data)
 
int kvm_mmu_post_init_vm (struct kvm *kvm)
 
void kvm_mmu_pre_destroy_vm (struct kvm *kvm)
 

Variables

bool itlb_multihit_kvm_mitigation
 
static bool nx_hugepage_mitigation_hard_disabled
 
int __read_mostly nx_huge_pages = -1
 
static uint __read_mostly nx_huge_pages_recovery_period_ms
 
static uint __read_mostly nx_huge_pages_recovery_ratio = 60
 
static const struct kernel_param_ops nx_huge_pages_ops
 
static const struct kernel_param_ops nx_huge_pages_recovery_param_ops
 
static bool __read_mostly force_flush_and_sync_on_reuse
 
bool tdp_enabled = false
 
static bool __ro_after_init tdp_mmu_allowed
 
static int max_huge_page_level __read_mostly
 
static struct kmem_cache * pte_list_desc_cache
 
struct kmem_cache * mmu_page_header_cache
 
static struct percpu_counter kvm_total_used_mmu_pages
 
static struct shrinker * mmu_shrinker
 

Macro Definition Documentation

◆ BATCH_ZAP_PAGES

#define BATCH_ZAP_PAGES   10

Definition at line 6182 of file mmu.c.

◆ BUILD_MMU_ROLE_ACCESSOR

#define BUILD_MMU_ROLE_ACCESSOR (   base_or_ext,
  reg,
  name 
)
Value:
static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu) \
{ \
return !!(mmu->cpu_role. base_or_ext . reg##_##name); \
}

Definition at line 223 of file mmu.c.

◆ BUILD_MMU_ROLE_REGS_ACCESSOR

#define BUILD_MMU_ROLE_REGS_ACCESSOR (   reg,
  name,
  flag 
)
Value:
static inline bool __maybe_unused \
____is_##reg##_##name(const struct kvm_mmu_role_regs *regs) \
{ \
return !!(regs->reg & flag); \
}

Definition at line 200 of file mmu.c.

◆ BYTE_MASK

#define BYTE_MASK (   access)
Value:
((1 & (access) ? 2 : 0) | \
(2 & (access) ? 4 : 0) | \
(3 & (access) ? 8 : 0) | \
(4 & (access) ? 16 : 0) | \
(5 & (access) ? 32 : 0) | \
(6 & (access) ? 64 : 0) | \
(7 & (access) ? 128 : 0))

Definition at line 5069 of file mmu.c.

◆ CREATE_TRACE_POINTS

#define CREATE_TRACE_POINTS

Definition at line 192 of file mmu.c.

◆ for_each_gfn_valid_sp_with_gptes

#define for_each_gfn_valid_sp_with_gptes (   _kvm,
  _sp,
  _gfn 
)
Value:
for_each_valid_sp(_kvm, _sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else
#define for_each_valid_sp(_kvm, _sp, _list)
Definition: mmu.c:1908
static bool sp_has_gptes(struct kvm_mmu_page *sp)
Definition: mmu.c:1897
static unsigned kvm_page_table_hashfn(gfn_t gfn)
Definition: mmu.c:1749

Definition at line 1913 of file mmu.c.

◆ for_each_rmap_spte

#define for_each_rmap_spte (   _rmap_head_,
  _iter_,
  _spte_ 
)
Value:
for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
_spte_; _spte_ = rmap_get_next(_iter_))
static u64 * rmap_get_next(struct rmap_iterator *iter)
Definition: mmu.c:1163
static u64 * rmap_get_first(struct kvm_rmap_head *rmap_head, struct rmap_iterator *iter)
Definition: mmu.c:1136

Definition at line 1191 of file mmu.c.

◆ for_each_shadow_entry

#define for_each_shadow_entry (   _vcpu,
  _addr,
  _walker 
)
Value:
for (shadow_walk_init(&(_walker), _vcpu, _addr); \
shadow_walk_okay(&(_walker)); \
shadow_walk_next(&(_walker)))
static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, u64 addr)
Definition: mmu.c:2395

Definition at line 169 of file mmu.c.

◆ for_each_shadow_entry_lockless

#define for_each_shadow_entry_lockless (   _vcpu,
  _addr,
  _walker,
  spte 
)
Value:
for (shadow_walk_init(&(_walker), _vcpu, _addr); \
shadow_walk_okay(&(_walker)) && \
({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
__shadow_walk_next(&(_walker), spte))
static u64 mmu_spte_get_lockless(u64 *sptep)
Definition: mmu.c:609

Definition at line 174 of file mmu.c.

◆ for_each_shadow_entry_using_root

#define for_each_shadow_entry_using_root (   _vcpu,
  _root,
  _addr,
  _walker 
)
Value:
for (shadow_walk_init_using_root(&(_walker), (_vcpu), \
(_root), (_addr)); \
shadow_walk_okay(&(_walker)); \
shadow_walk_next(&(_walker)))
static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, hpa_t root, u64 addr)
Definition: mmu.c:2366

Definition at line 163 of file mmu.c.

◆ for_each_slot_rmap_range

#define for_each_slot_rmap_range (   _slot_,
  _start_level_,
  _end_level_,
  _start_gfn,
  _end_gfn,
  _iter_ 
)
Value:
for (slot_rmap_walk_init(_iter_, _slot_, _start_level_, \
_end_level_, _start_gfn, _end_gfn); \
slot_rmap_walk_okay(_iter_); \
slot_rmap_walk_next(_iter_))
static void slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator, const struct kvm_memory_slot *slot, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn)
Definition: mmu.c:1520

Definition at line 1556 of file mmu.c.

◆ for_each_sp

#define for_each_sp (   pvec,
  sp,
  parents,
 
)
Value:
for (i = mmu_pages_first(&pvec, &parents); \
i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
i = mmu_pages_next(&pvec, &parents, i))
static int mmu_pages_first(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents)
Definition: mmu.c:2052
static int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, int i)
Definition: mmu.c:2031

Definition at line 2026 of file mmu.c.

◆ for_each_valid_sp

#define for_each_valid_sp (   _kvm,
  _sp,
  _list 
)
Value:
hlist_for_each_entry(_sp, _list, hash_link) \
if (is_obsolete_sp((_kvm), (_sp))) { \
} else
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:2011

Definition at line 1908 of file mmu.c.

◆ INVALID_INDEX

#define INVALID_INDEX   (-1)

Definition at line 1871 of file mmu.c.

◆ KVM_LPAGE_MIXED_FLAG

#define KVM_LPAGE_MIXED_FLAG   BIT(31)

Definition at line 800 of file mmu.c.

◆ KVM_PAGE_ARRAY_NR

#define KVM_PAGE_ARRAY_NR   16

Definition at line 1799 of file mmu.c.

◆ pr_fmt

#define pr_fmt (   fmt)    KBUILD_MODNAME ": " fmt

Definition at line 17 of file mmu.c.

◆ PTE_LIST_EXT

#define PTE_LIST_EXT   14

Definition at line 124 of file mmu.c.

◆ PTE_PREFETCH_NUM

#define PTE_PREFETCH_NUM   8

Definition at line 119 of file mmu.c.

◆ PTTYPE [1/3]

#define PTTYPE   PTTYPE_EPT

Definition at line 4824 of file mmu.c.

◆ PTTYPE [2/3]

#define PTTYPE   64

Definition at line 4824 of file mmu.c.

◆ PTTYPE [3/3]

#define PTTYPE   32

Definition at line 4824 of file mmu.c.

◆ PTTYPE_EPT

#define PTTYPE_EPT   18 /* arbitrary */

Definition at line 4815 of file mmu.c.

◆ RMAP_RECYCLE_THRESHOLD

#define RMAP_RECYCLE_THRESHOLD   1000

Definition at line 1639 of file mmu.c.

Typedef Documentation

◆ rmap_handler_t

typedef bool(* rmap_handler_t) (struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t pte)

Definition at line 1563 of file mmu.c.

◆ slot_rmaps_handler

typedef bool(* slot_rmaps_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)

Definition at line 6039 of file mmu.c.

Function Documentation

◆ __clear_sp_write_flooding_count()

static void __clear_sp_write_flooding_count ( struct kvm_mmu_page sp)
static

Definition at line 2135 of file mmu.c.

2136 {
2137  atomic_set(&sp->write_flooding_count, 0);
2138 }
atomic_t write_flooding_count
Definition: mmu_internal.h:128
Here is the caller graph for this function:

◆ __direct_pte_prefetch()

static void __direct_pte_prefetch ( struct kvm_vcpu *  vcpu,
struct kvm_mmu_page sp,
u64 *  sptep 
)
static

Definition at line 3005 of file mmu.c.

3007 {
3008  u64 *spte, *start = NULL;
3009  int i;
3010 
3011  WARN_ON_ONCE(!sp->role.direct);
3012 
3013  i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1);
3014  spte = sp->spt + i;
3015 
3016  for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
3017  if (is_shadow_present_pte(*spte) || spte == sptep) {
3018  if (!start)
3019  continue;
3020  if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0)
3021  return;
3022  start = NULL;
3023  } else if (!start)
3024  start = spte;
3025  }
3026  if (start)
3027  direct_pte_prefetch_many(vcpu, sp, start, spte);
3028 }
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *start, u64 *end)
Definition: mmu.c:2977
#define PTE_PREFETCH_NUM
Definition: mmu.c:119
static bool is_shadow_present_pte(u64 pte)
Definition: spte.h:258
static int spte_index(u64 *sptep)
Definition: spte.h:208
union kvm_mmu_page_role role
Definition: mmu_internal.h:80
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __get_spte_lockless()

static u64 __get_spte_lockless ( u64 *  sptep)
static

Definition at line 449 of file mmu.c.

450 {
451  struct kvm_mmu_page *sp = sptep_to_sp(sptep);
452  union split_spte spte, *orig = (union split_spte *)sptep;
453  int count;
454 
455 retry:
456  count = sp->clear_spte_count;
457  smp_rmb();
458 
459  spte.spte_low = orig->spte_low;
460  smp_rmb();
461 
462  spte.spte_high = orig->spte_high;
463  smp_rmb();
464 
465  if (unlikely(spte.spte_low != orig->spte_low ||
466  count != sp->clear_spte_count))
467  goto retry;
468 
469  return spte.spte;
470 }
static struct kvm_mmu_page * sptep_to_sp(u64 *sptep)
Definition: spte.h:235
u64 spte
Definition: mmu.c:362
u32 spte_low
Definition: mmu.c:359
u32 spte_high
Definition: mmu.c:360
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_faultin_pfn()

static int __kvm_faultin_pfn ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 4331 of file mmu.c.

4332 {
4333  struct kvm_memory_slot *slot = fault->slot;
4334  bool async;
4335 
4336  /*
4337  * Retry the page fault if the gfn hit a memslot that is being deleted
4338  * or moved. This ensures any existing SPTEs for the old memslot will
4339  * be zapped before KVM inserts a new MMIO SPTE for the gfn.
4340  */
4341  if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
4342  return RET_PF_RETRY;
4343 
4344  if (!kvm_is_visible_memslot(slot)) {
4345  /* Don't expose private memslots to L2. */
4346  if (is_guest_mode(vcpu)) {
4347  fault->slot = NULL;
4348  fault->pfn = KVM_PFN_NOSLOT;
4349  fault->map_writable = false;
4350  return RET_PF_CONTINUE;
4351  }
4352  /*
4353  * If the APIC access page exists but is disabled, go directly
4354  * to emulation without caching the MMIO access or creating a
4355  * MMIO SPTE. That way the cache doesn't need to be purged
4356  * when the AVIC is re-enabled.
4357  */
4358  if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
4359  !kvm_apicv_activated(vcpu->kvm))
4360  return RET_PF_EMULATE;
4361  }
4362 
4363  if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
4364  kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
4365  return -EFAULT;
4366  }
4367 
4368  if (fault->is_private)
4369  return kvm_faultin_pfn_private(vcpu, fault);
4370 
4371  async = false;
4372  fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
4373  fault->write, &fault->map_writable,
4374  &fault->hva);
4375  if (!async)
4376  return RET_PF_CONTINUE; /* *pfn has correct page already */
4377 
4378  if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
4379  trace_kvm_try_async_get_page(fault->addr, fault->gfn);
4380  if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
4381  trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
4382  kvm_make_request(KVM_REQ_APF_HALT, vcpu);
4383  return RET_PF_RETRY;
4384  } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
4385  return RET_PF_RETRY;
4386  }
4387  }
4388 
4389  /*
4390  * Allow gup to bail on pending non-fatal signals when it's also allowed
4391  * to wait for IO. Note, gup always bails if it is unable to quickly
4392  * get a page and a fatal signal, i.e. SIGKILL, is pending.
4393  */
4394  fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL,
4395  fault->write, &fault->map_writable,
4396  &fault->hva);
4397  return RET_PF_CONTINUE;
4398 }
static bool is_guest_mode(struct kvm_vcpu *vcpu)
kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, bool interruptible, bool *async, bool write_fault, bool *writable, hva_t *hva)
Definition: kvm_main.c:3031
static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4299
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, gfn_t gfn)
Definition: mmu.c:4249
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4307
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
Definition: x86.c:13317
@ RET_PF_RETRY
Definition: mmu_internal.h:275
@ RET_PF_EMULATE
Definition: mmu_internal.h:276
@ RET_PF_CONTINUE
Definition: mmu_internal.h:274
const bool prefetch
Definition: mmu_internal.h:194
const bool write
Definition: mmu_internal.h:198
kvm_pfn_t pfn
Definition: mmu_internal.h:240
struct kvm_memory_slot * slot
Definition: mmu_internal.h:236
const bool is_private
Definition: mmu_internal.h:205
const gpa_t addr
Definition: mmu_internal.h:192
bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
Definition: x86.c:13231
bool kvm_apicv_activated(struct kvm *kvm)
Definition: x86.c:9935
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_mmu_create()

static int __kvm_mmu_create ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  mmu 
)
static

Definition at line 6100 of file mmu.c.

6101 {
6102  struct page *page;
6103  int i;
6104 
6105  mmu->root.hpa = INVALID_PAGE;
6106  mmu->root.pgd = 0;
6107  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
6108  mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
6109 
6110  /* vcpu->arch.guest_mmu isn't used when !tdp_enabled. */
6111  if (!tdp_enabled && mmu == &vcpu->arch.guest_mmu)
6112  return 0;
6113 
6114  /*
6115  * When using PAE paging, the four PDPTEs are treated as 'root' pages,
6116  * while the PDP table is a per-vCPU construct that's allocated at MMU
6117  * creation. When emulating 32-bit mode, cr3 is only 32 bits even on
6118  * x86_64. Therefore we need to allocate the PDP table in the first
6119  * 4GB of memory, which happens to fit the DMA32 zone. TDP paging
6120  * generally doesn't use PAE paging and can skip allocating the PDP
6121  * table. The main exception, handled here, is SVM's 32-bit NPT. The
6122  * other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit
6123  * KVM; that horror is handled on-demand by mmu_alloc_special_roots().
6124  */
6126  return 0;
6127 
6128  page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
6129  if (!page)
6130  return -ENOMEM;
6131 
6132  mmu->pae_root = page_address(page);
6133 
6134  /*
6135  * CR3 is only 32 bits when PAE paging is used, thus it's impossible to
6136  * get the CPU to treat the PDPTEs as encrypted. Decrypt the page so
6137  * that KVM's writes and the CPU's reads get along. Note, this is
6138  * only necessary when using shadow paging, as 64-bit NPT can get at
6139  * the C-bit even when shadowing 32-bit NPT, and SME isn't supported
6140  * by 32-bit kernels (when KVM itself uses 32-bit NPT).
6141  */
6142  if (!tdp_enabled)
6143  set_memory_decrypted((unsigned long)mmu->pae_root, 1);
6144  else
6145  WARN_ON_ONCE(shadow_me_value);
6146 
6147  for (i = 0; i < 4; ++i)
6148  mmu->pae_root[i] = INVALID_PAE_ROOT;
6149 
6150  return 0;
6151 }
static int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
Definition: mmu.c:5299
bool tdp_enabled
Definition: mmu.c:106
#define PT32E_ROOT_LEVEL
Definition: mmu.h:37
#define INVALID_PAE_ROOT
Definition: mmu_internal.h:37
u64 __read_mostly shadow_me_value
Definition: spte.c:39
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_mmu_free_obsolete_roots()

static void __kvm_mmu_free_obsolete_roots ( struct kvm *  kvm,
struct kvm_mmu *  mmu 
)
static

Definition at line 5659 of file mmu.c.

5660 {
5661  unsigned long roots_to_free = 0;
5662  int i;
5663 
5664  if (is_obsolete_root(kvm, mmu->root.hpa))
5665  roots_to_free |= KVM_MMU_ROOT_CURRENT;
5666 
5667  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5668  if (is_obsolete_root(kvm, mmu->prev_roots[i].hpa))
5669  roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5670  }
5671 
5672  if (roots_to_free)
5673  kvm_mmu_free_roots(kvm, mmu, roots_to_free);
5674 }
static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
Definition: mmu.c:5632
void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free)
Definition: mmu.c:3587
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_mmu_get_shadow_page()

static struct kvm_mmu_page* __kvm_mmu_get_shadow_page ( struct kvm *  kvm,
struct kvm_vcpu *  vcpu,
struct shadow_page_caches caches,
gfn_t  gfn,
union kvm_mmu_page_role  role 
)
static

Definition at line 2272 of file mmu.c.

2277 {
2278  struct hlist_head *sp_list;
2279  struct kvm_mmu_page *sp;
2280  bool created = false;
2281 
2282  sp_list = &kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)];
2283 
2284  sp = kvm_mmu_find_shadow_page(kvm, vcpu, gfn, sp_list, role);
2285  if (!sp) {
2286  created = true;
2287  sp = kvm_mmu_alloc_shadow_page(kvm, caches, gfn, sp_list, role);
2288  }
2289 
2290  trace_kvm_mmu_get_page(sp, created);
2291  return sp;
2292 }
static struct kvm_mmu_page * kvm_mmu_find_shadow_page(struct kvm *kvm, struct kvm_vcpu *vcpu, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role)
Definition: mmu.c:2151
static struct kvm_mmu_page * kvm_mmu_alloc_shadow_page(struct kvm *kvm, struct shadow_page_caches *caches, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role)
Definition: mmu.c:2236
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_mmu_honors_guest_mtrrs()

bool __kvm_mmu_honors_guest_mtrrs ( bool  vm_has_noncoherent_dma)

Definition at line 4609 of file mmu.c.

4610 {
4611  /*
4612  * If host MTRRs are ignored (shadow_memtype_mask is non-zero), and the
4613  * VM has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is
4614  * to honor the memtype from the guest's MTRRs so that guest accesses
4615  * to memory that is DMA'd aren't cached against the guest's wishes.
4616  *
4617  * Note, KVM may still ultimately ignore guest MTRRs for certain PFNs,
4618  * e.g. KVM will force UC memtype for host MMIO.
4619  */
4620  return vm_has_noncoherent_dma && shadow_memtype_mask;
4621 }
u64 __read_mostly shadow_memtype_mask
Definition: spte.c:38
Here is the caller graph for this function:

◆ __kvm_mmu_invalidate_addr()

static void __kvm_mmu_invalidate_addr ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  mmu,
u64  addr,
hpa_t  root_hpa 
)
static

Definition at line 5902 of file mmu.c.

5904 {
5905  struct kvm_shadow_walk_iterator iterator;
5906 
5907  vcpu_clear_mmio_info(vcpu, addr);
5908 
5909  /*
5910  * Walking and synchronizing SPTEs both assume they are operating in
5911  * the context of the current MMU, and would need to be reworked if
5912  * this is ever used to sync the guest_mmu, e.g. to emulate INVEPT.
5913  */
5914  if (WARN_ON_ONCE(mmu != vcpu->arch.mmu))
5915  return;
5916 
5917  if (!VALID_PAGE(root_hpa))
5918  return;
5919 
5920  write_lock(&vcpu->kvm->mmu_lock);
5921  for_each_shadow_entry_using_root(vcpu, root_hpa, addr, iterator) {
5922  struct kvm_mmu_page *sp = sptep_to_sp(iterator.sptep);
5923 
5924  if (sp->unsync) {
5925  int ret = kvm_sync_spte(vcpu, sp, iterator.index);
5926 
5927  if (ret < 0)
5928  mmu_page_zap_pte(vcpu->kvm, sp, iterator.sptep, NULL);
5929  if (ret)
5930  kvm_flush_remote_tlbs_sptep(vcpu->kvm, iterator.sptep);
5931  }
5932 
5933  if (!sp->unsync_children)
5934  break;
5935  }
5936  write_unlock(&vcpu->kvm->mmu_lock);
5937 }
#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker)
Definition: mmu.c:163
static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *spte, struct list_head *invalid_list)
Definition: mmu.c:2493
static void kvm_flush_remote_tlbs_sptep(struct kvm *kvm, u64 *sptep)
Definition: mmu.c:284
static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i)
Definition: mmu.c:1951
unsigned int unsync_children
Definition: mmu_internal.h:104
static void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
Definition: x86.h:247
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_mmu_max_mapping_level()

static int __kvm_mmu_max_mapping_level ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  max_level,
bool  is_private 
)
static

Definition at line 3146 of file mmu.c.

3149 {
3150  struct kvm_lpage_info *linfo;
3151  int host_level;
3152 
3153  max_level = min(max_level, max_huge_page_level);
3154  for ( ; max_level > PG_LEVEL_4K; max_level--) {
3155  linfo = lpage_info_slot(gfn, slot, max_level);
3156  if (!linfo->disallow_lpage)
3157  break;
3158  }
3159 
3160  if (is_private)
3161  return max_level;
3162 
3163  if (max_level == PG_LEVEL_4K)
3164  return PG_LEVEL_4K;
3165 
3166  host_level = host_pfn_mapping_level(kvm, gfn, slot);
3167  return min(host_level, max_level);
3168 }
static struct kvm_lpage_info * lpage_info_slot(gfn_t gfn, const struct kvm_memory_slot *slot, int level)
Definition: mmu.c:785
static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, const struct kvm_memory_slot *slot)
Definition: mmu.c:3082
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_mmu_prepare_zap_page()

static bool __kvm_mmu_prepare_zap_page ( struct kvm *  kvm,
struct kvm_mmu_page sp,
struct list_head *  invalid_list,
int *  nr_zapped 
)
static

Definition at line 2569 of file mmu.c.

2573 {
2574  bool list_unstable, zapped_root = false;
2575 
2576  lockdep_assert_held_write(&kvm->mmu_lock);
2577  trace_kvm_mmu_prepare_zap_page(sp);
2578  ++kvm->stat.mmu_shadow_zapped;
2579  *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
2580  *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
2581  kvm_mmu_unlink_parents(kvm, sp);
2582 
2583  /* Zapping children means active_mmu_pages has become unstable. */
2584  list_unstable = *nr_zapped;
2585 
2586  if (!sp->role.invalid && sp_has_gptes(sp))
2587  unaccount_shadowed(kvm, sp);
2588 
2589  if (sp->unsync)
2590  kvm_unlink_unsync_page(kvm, sp);
2591  if (!sp->root_count) {
2592  /* Count self */
2593  (*nr_zapped)++;
2594 
2595  /*
2596  * Already invalid pages (previously active roots) are not on
2597  * the active page list. See list_del() in the "else" case of
2598  * !sp->root_count.
2599  */
2600  if (sp->role.invalid)
2601  list_add(&sp->link, invalid_list);
2602  else
2603  list_move(&sp->link, invalid_list);
2604  kvm_unaccount_mmu_page(kvm, sp);
2605  } else {
2606  /*
2607  * Remove the active root from the active page list, the root
2608  * will be explicitly freed when the root_count hits zero.
2609  */
2610  list_del(&sp->link);
2611 
2612  /*
2613  * Obsolete pages cannot be used on any vCPUs, see the comment
2614  * in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also
2615  * treats invalid shadow pages as being obsolete.
2616  */
2617  zapped_root = !is_obsolete_sp(kvm, sp);
2618  }
2619 
2620  if (sp->nx_huge_page_disallowed)
2621  unaccount_nx_huge_page(kvm, sp);
2622 
2623  sp->role.invalid = 1;
2624 
2625  /*
2626  * Make the request to free obsolete roots after marking the root
2627  * invalid, otherwise other vCPUs may not see it as invalid.
2628  */
2629  if (zapped_root)
2630  kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
2631  return list_unstable;
2632 }
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
Definition: kvm_main.c:340
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:1884
static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:900
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:2536
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:875
static int kvm_mmu_page_unlink_children(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list)
Definition: mmu.c:2523
static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:1731
static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *parent, struct list_head *invalid_list)
Definition: mmu.c:2545
bool nx_huge_page_disallowed
Definition: mmu_internal.h:74
struct list_head link
Definition: mmu_internal.h:57
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_mmu_refresh_passthrough_bits()

void __kvm_mmu_refresh_passthrough_bits ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  mmu 
)

Definition at line 5284 of file mmu.c.

5286 {
5287  const bool cr0_wp = kvm_is_cr0_bit_set(vcpu, X86_CR0_WP);
5288 
5289  BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP);
5291 
5292  if (is_cr0_wp(mmu) == cr0_wp)
5293  return;
5294 
5295  mmu->cpu_role.base.cr0_wp = cr0_wp;
5296  reset_guest_paging_metadata(vcpu, mmu);
5297 }
static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu, unsigned long cr0_bit)
#define KVM_POSSIBLE_CR4_GUEST_BITS
Definition: kvm_cache_regs.h:8
#define KVM_POSSIBLE_CR0_GUEST_BITS
Definition: kvm_cache_regs.h:7
static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
Definition: mmu.c:5219
#define KVM_MMU_CR0_ROLE_BITS
Definition: mmu.h:42
#define KVM_MMU_CR4_ROLE_BITS
Definition: mmu.h:39
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_sync_page()

static int __kvm_sync_page ( struct kvm_vcpu *  vcpu,
struct kvm_mmu_page sp 
)
static

Definition at line 1959 of file mmu.c.

1960 {
1961  int flush = 0;
1962  int i;
1963 
1964  if (!kvm_sync_page_check(vcpu, sp))
1965  return -1;
1966 
1967  for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
1968  int ret = kvm_sync_spte(vcpu, sp, i);
1969 
1970  if (ret < -1)
1971  return -1;
1972  flush |= ret;
1973  }
1974 
1975  /*
1976  * Note, any flush is purely for KVM's correctness, e.g. when dropping
1977  * an existing SPTE or clearing W/A/D bits to ensure an mmu_notifier
1978  * unmap or dirty logging event doesn't fail to flush. The guest is
1979  * responsible for flushing the TLB to ensure any changes in protection
1980  * bits are recognized, i.e. until the guest flushes or page faults on
1981  * a relevant address, KVM is architecturally allowed to let vCPUs use
1982  * cached translations with the old protection bits.
1983  */
1984  return flush;
1985 }
static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
Definition: mmu.c:1918
#define SPTE_ENT_PER_PAGE
Definition: spte.h:58
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __kvm_zap_rmap()

static bool __kvm_zap_rmap ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 1444 of file mmu.c.

1446 {
1447  return kvm_zap_all_rmap_sptes(kvm, rmap_head);
1448 }
static bool kvm_zap_all_rmap_sptes(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
Definition: mmu.c:1045
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __link_shadow_page()

static void __link_shadow_page ( struct kvm *  kvm,
struct kvm_mmu_memory_cache *  cache,
u64 *  sptep,
struct kvm_mmu_page sp,
bool  flush 
)
static

Definition at line 2429 of file mmu.c.

2432 {
2433  u64 spte;
2434 
2435  BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
2436 
2437  /*
2438  * If an SPTE is present already, it must be a leaf and therefore
2439  * a large one. Drop it, and flush the TLB if needed, before
2440  * installing sp.
2441  */
2442  if (is_shadow_present_pte(*sptep))
2443  drop_large_spte(kvm, sptep, flush);
2444 
2445  spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));
2446 
2447  mmu_spte_set(sptep, spte);
2448 
2449  mmu_page_add_parent_pte(cache, sp, sptep);
2450 
2451  /*
2452  * The non-direct sub-pagetable must be updated before linking. For
2453  * L1 sp, the pagetable is updated via kvm_sync_page() in
2454  * kvm_mmu_find_shadow_page() without write-protecting the gfn,
2455  * so sp->unsync can be true or false. For higher level non-direct
2456  * sp, the pagetable is updated/synced via mmu_sync_children() in
2457  * FNAME(fetch)(), so sp->unsync_children can only be false.
2458  * WARN_ON_ONCE() if anything happens unexpectedly.
2459  */
2460  if (WARN_ON_ONCE(sp->unsync_children) || sp->unsync)
2461  mark_unsync(sptep);
2462 }
static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
Definition: mmu.c:1203
static void mmu_spte_set(u64 *sptep, u64 spte)
Definition: mmu.c:479
static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache, struct kvm_mmu_page *sp, u64 *parent_pte)
Definition: mmu.c:1754
static void mark_unsync(u64 *spte)
Definition: mmu.c:1787
#define PT_WRITABLE_MASK
Definition: mmu.h:15
u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
Definition: spte.c:310
static bool sp_ad_disabled(struct kvm_mmu_page *sp)
Definition: spte.h:274
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __mmu_unsync_walk()

static int __mmu_unsync_walk ( struct kvm_mmu_page sp,
struct kvm_mmu_pages pvec 
)
static

Definition at line 1832 of file mmu.c.

1834 {
1835  int i, ret, nr_unsync_leaf = 0;
1836 
1837  for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
1838  struct kvm_mmu_page *child;
1839  u64 ent = sp->spt[i];
1840 
1841  if (!is_shadow_present_pte(ent) || is_large_pte(ent)) {
1842  clear_unsync_child_bit(sp, i);
1843  continue;
1844  }
1845 
1846  child = spte_to_child_sp(ent);
1847 
1848  if (child->unsync_children) {
1849  if (mmu_pages_add(pvec, child, i))
1850  return -ENOSPC;
1851 
1852  ret = __mmu_unsync_walk(child, pvec);
1853  if (!ret) {
1854  clear_unsync_child_bit(sp, i);
1855  continue;
1856  } else if (ret > 0) {
1857  nr_unsync_leaf += ret;
1858  } else
1859  return ret;
1860  } else if (child->unsync) {
1861  nr_unsync_leaf++;
1862  if (mmu_pages_add(pvec, child, i))
1863  return -ENOSPC;
1864  } else
1865  clear_unsync_child_bit(sp, i);
1866  }
1867 
1868  return nr_unsync_leaf;
1869 }
static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec)
Definition: mmu.c:1832
static void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
Definition: mmu.c:1825
static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, int idx)
Definition: mmu.c:1809
static bool is_large_pte(u64 pte)
Definition: spte.h:313
static struct kvm_mmu_page * spte_to_child_sp(u64 spte)
Definition: spte.h:230
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __MODULE_PARM_TYPE() [1/3]

__MODULE_PARM_TYPE ( nx_huge_pages  ,
"bool"   
)

◆ __MODULE_PARM_TYPE() [2/3]

__MODULE_PARM_TYPE ( nx_huge_pages_recovery_period_ms  ,
"uint"   
)

◆ __MODULE_PARM_TYPE() [3/3]

__MODULE_PARM_TYPE ( nx_huge_pages_recovery_ratio  ,
"uint"   
)

◆ __reset_rsvds_bits_mask()

static void __reset_rsvds_bits_mask ( struct rsvd_bits_validate *  rsvd_check,
u64  pa_bits_rsvd,
int  level,
bool  nx,
bool  gbpages,
bool  pse,
bool  amd 
)
static

Definition at line 4828 of file mmu.c.

4831 {
4832  u64 gbpages_bit_rsvd = 0;
4833  u64 nonleaf_bit8_rsvd = 0;
4834  u64 high_bits_rsvd;
4835 
4836  rsvd_check->bad_mt_xwr = 0;
4837 
4838  if (!gbpages)
4839  gbpages_bit_rsvd = rsvd_bits(7, 7);
4840 
4841  if (level == PT32E_ROOT_LEVEL)
4842  high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 62);
4843  else
4844  high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51);
4845 
4846  /* Note, NX doesn't exist in PDPTEs, this is handled below. */
4847  if (!nx)
4848  high_bits_rsvd |= rsvd_bits(63, 63);
4849 
4850  /*
4851  * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
4852  * leaf entries) on AMD CPUs only.
4853  */
4854  if (amd)
4855  nonleaf_bit8_rsvd = rsvd_bits(8, 8);
4856 
4857  switch (level) {
4858  case PT32_ROOT_LEVEL:
4859  /* no rsvd bits for 2 level 4K page table entries */
4860  rsvd_check->rsvd_bits_mask[0][1] = 0;
4861  rsvd_check->rsvd_bits_mask[0][0] = 0;
4862  rsvd_check->rsvd_bits_mask[1][0] =
4863  rsvd_check->rsvd_bits_mask[0][0];
4864 
4865  if (!pse) {
4866  rsvd_check->rsvd_bits_mask[1][1] = 0;
4867  break;
4868  }
4869 
4870  if (is_cpuid_PSE36())
4871  /* 36bits PSE 4MB page */
4872  rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
4873  else
4874  /* 32 bits PSE 4MB page */
4875  rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
4876  break;
4877  case PT32E_ROOT_LEVEL:
4878  rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(63, 63) |
4879  high_bits_rsvd |
4880  rsvd_bits(5, 8) |
4881  rsvd_bits(1, 2); /* PDPTE */
4882  rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; /* PDE */
4883  rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; /* PTE */
4884  rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd |
4885  rsvd_bits(13, 20); /* large page */
4886  rsvd_check->rsvd_bits_mask[1][0] =
4887  rsvd_check->rsvd_bits_mask[0][0];
4888  break;
4889  case PT64_ROOT_5LEVEL:
4890  rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd |
4891  nonleaf_bit8_rsvd |
4892  rsvd_bits(7, 7);
4893  rsvd_check->rsvd_bits_mask[1][4] =
4894  rsvd_check->rsvd_bits_mask[0][4];
4895  fallthrough;
4896  case PT64_ROOT_4LEVEL:
4897  rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd |
4898  nonleaf_bit8_rsvd |
4899  rsvd_bits(7, 7);
4900  rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd |
4901  gbpages_bit_rsvd;
4902  rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd;
4903  rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd;
4904  rsvd_check->rsvd_bits_mask[1][3] =
4905  rsvd_check->rsvd_bits_mask[0][3];
4906  rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd |
4907  gbpages_bit_rsvd |
4908  rsvd_bits(13, 29);
4909  rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd |
4910  rsvd_bits(13, 20); /* large page */
4911  rsvd_check->rsvd_bits_mask[1][0] =
4912  rsvd_check->rsvd_bits_mask[0][0];
4913  break;
4914  }
4915 }
static int is_cpuid_PSE36(void)
Definition: mmu.c:331
#define PT64_ROOT_5LEVEL
Definition: mmu.h:34
static __always_inline u64 rsvd_bits(int s, int e)
Definition: mmu.h:45
#define PT64_ROOT_4LEVEL
Definition: mmu.h:35
#define PT32_ROOT_LEVEL
Definition: mmu.h:36
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __reset_rsvds_bits_mask_ept()

static void __reset_rsvds_bits_mask_ept ( struct rsvd_bits_validate *  rsvd_check,
u64  pa_bits_rsvd,
bool  execonly,
int  huge_page_level 
)
static

Definition at line 4928 of file mmu.c.

4931 {
4932  u64 high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51);
4933  u64 large_1g_rsvd = 0, large_2m_rsvd = 0;
4934  u64 bad_mt_xwr;
4935 
4936  if (huge_page_level < PG_LEVEL_1G)
4937  large_1g_rsvd = rsvd_bits(7, 7);
4938  if (huge_page_level < PG_LEVEL_2M)
4939  large_2m_rsvd = rsvd_bits(7, 7);
4940 
4941  rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7);
4942  rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7);
4943  rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6) | large_1g_rsvd;
4944  rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6) | large_2m_rsvd;
4945  rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd;
4946 
4947  /* large page */
4948  rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
4949  rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
4950  rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29) | large_1g_rsvd;
4951  rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20) | large_2m_rsvd;
4952  rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
4953 
4954  bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */
4955  bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */
4956  bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */
4957  bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */
4958  bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */
4959  if (!execonly) {
4960  /* bits 0..2 must not be 100 unless VMX capabilities allow it */
4961  bad_mt_xwr |= REPEAT_BYTE(1ull << 4);
4962  }
4963  rsvd_check->bad_mt_xwr = bad_mt_xwr;
4964 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __rmap_add()

static void __rmap_add ( struct kvm *  kvm,
struct kvm_mmu_memory_cache *  cache,
const struct kvm_memory_slot *  slot,
u64 *  spte,
gfn_t  gfn,
unsigned int  access 
)
static

Definition at line 1641 of file mmu.c.

1645 {
1646  struct kvm_mmu_page *sp;
1647  struct kvm_rmap_head *rmap_head;
1648  int rmap_count;
1649 
1650  sp = sptep_to_sp(spte);
1651  kvm_mmu_page_set_translation(sp, spte_index(spte), gfn, access);
1652  kvm_update_page_stats(kvm, sp->role.level, 1);
1653 
1654  rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
1655  rmap_count = pte_list_add(cache, spte, rmap_head);
1656 
1657  if (rmap_count > kvm->stat.max_mmu_rmap_size)
1658  kvm->stat.max_mmu_rmap_size = rmap_count;
1659  if (rmap_count > RMAP_RECYCLE_THRESHOLD) {
1660  kvm_zap_all_rmap_sptes(kvm, rmap_head);
1661  kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level);
1662  }
1663 }
static struct kvm_rmap_head * gfn_to_rmap(gfn_t gfn, int level, const struct kvm_memory_slot *slot)
Definition: mmu.c:1086
static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, struct kvm_rmap_head *rmap_head)
Definition: mmu.c:933
static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, gfn_t gfn, unsigned int access)
Definition: mmu.c:754
#define RMAP_RECYCLE_THRESHOLD
Definition: mmu.c:1639
static void kvm_update_page_stats(struct kvm *kvm, int level, int count)
Definition: mmu.h:305
static void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
Definition: mmu_internal.h:176
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __rmap_clear_dirty()

static bool __rmap_clear_dirty ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 1282 of file mmu.c.

1284 {
1285  u64 *sptep;
1286  struct rmap_iterator iter;
1287  bool flush = false;
1288 
1289  for_each_rmap_spte(rmap_head, &iter, sptep)
1290  if (spte_ad_need_write_protect(*sptep))
1291  flush |= spte_wrprot_for_clear_dirty(sptep);
1292  else
1293  flush |= spte_clear_dirty(sptep);
1294 
1295  return flush;
1296 }
static bool spte_wrprot_for_clear_dirty(u64 *sptep)
Definition: mmu.c:1266
#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)
Definition: mmu.c:1191
static bool spte_clear_dirty(u64 *sptep)
Definition: mmu.c:1257
static bool spte_ad_need_write_protect(u64 spte)
Definition: spte.h:285
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __set_nx_huge_pages()

static void __set_nx_huge_pages ( bool  val)
static

Definition at line 6946 of file mmu.c.

6947 {
6949 }
bool itlb_multihit_kvm_mitigation
int __read_mostly nx_huge_pages
Definition: mmu.c:64
Here is the caller graph for this function:

◆ __set_spte()

static void __set_spte ( u64 *  sptep,
u64  spte 
)
static

Definition at line 377 of file mmu.c.

378 {
379  union split_spte *ssptep, sspte;
380 
381  ssptep = (union split_spte *)sptep;
382  sspte = (union split_spte)spte;
383 
384  ssptep->spte_high = sspte.spte_high;
385 
386  /*
387  * If we map the spte from nonpresent to present, We should store
388  * the high bits firstly, then set present bit, so cpu can not
389  * fetch this spte while we are setting the spte.
390  */
391  smp_wmb();
392 
393  WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
394 }
Here is the caller graph for this function:

◆ __shadow_walk_next()

static void __shadow_walk_next ( struct kvm_shadow_walk_iterator iterator,
u64  spte 
)
static

Definition at line 2412 of file mmu.c.

2414 {
2415  if (!is_shadow_present_pte(spte) || is_last_spte(spte, iterator->level)) {
2416  iterator->level = 0;
2417  return;
2418  }
2419 
2420  iterator->shadow_addr = spte & SPTE_BASE_ADDR_MASK;
2421  --iterator->level;
2422 }
static bool is_last_spte(u64 pte, int level)
Definition: spte.h:318
#define SPTE_BASE_ADDR_MASK
Definition: spte.h:40
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __update_clear_spte_fast()

static void __update_clear_spte_fast ( u64 *  sptep,
u64  spte 
)
static

Definition at line 396 of file mmu.c.

397 {
398  union split_spte *ssptep, sspte;
399 
400  ssptep = (union split_spte *)sptep;
401  sspte = (union split_spte)spte;
402 
403  WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
404 
405  /*
406  * If we map the spte from present to nonpresent, we should clear
407  * present bit firstly to avoid vcpu fetch the old high bits.
408  */
409  smp_wmb();
410 
411  ssptep->spte_high = sspte.spte_high;
412  count_spte_clear(sptep, spte);
413 }
static void count_spte_clear(u64 *sptep, u64 spte)
Definition: mmu.c:365
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __update_clear_spte_slow()

static u64 __update_clear_spte_slow ( u64 *  sptep,
u64  spte 
)
static

Definition at line 415 of file mmu.c.

416 {
417  union split_spte *ssptep, sspte, orig;
418 
419  ssptep = (union split_spte *)sptep;
420  sspte = (union split_spte)spte;
421 
422  /* xchg acts as a barrier before the setting of the high bits */
423  orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low);
424  orig.spte_high = ssptep->spte_high;
425  ssptep->spte_high = sspte.spte_high;
426  count_spte_clear(sptep, spte);
427 
428  return orig.spte;
429 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __walk_slot_rmaps()

static __always_inline bool __walk_slot_rmaps ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
slot_rmaps_handler  fn,
int  start_level,
int  end_level,
gfn_t  start_gfn,
gfn_t  end_gfn,
bool  flush_on_yield,
bool  flush 
)
static

Definition at line 6043 of file mmu.c.

6049 {
6050  struct slot_rmap_walk_iterator iterator;
6051 
6052  lockdep_assert_held_write(&kvm->mmu_lock);
6053 
6055  end_gfn, &iterator) {
6056  if (iterator.rmap)
6057  flush |= fn(kvm, iterator.rmap, slot);
6058 
6059  if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
6060  if (flush && flush_on_yield) {
6062  iterator.gfn - start_gfn + 1);
6063  flush = false;
6064  }
6065  cond_resched_rwlock_write(&kvm->mmu_lock);
6066  }
6067  }
6068 
6069  return flush;
6070 }
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
Definition: kvm_main.c:367
#define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_, _start_gfn, _end_gfn, _iter_)
Definition: mmu.c:1556
const struct kvm_memory_slot * slot
Definition: mmu.c:1496
Here is the call graph for this function:
Here is the caller graph for this function:

◆ account_nx_huge_page()

static void account_nx_huge_page ( struct kvm *  kvm,
struct kvm_mmu_page sp,
bool  nx_huge_page_possible 
)
static

Definition at line 866 of file mmu.c.

868 {
869  sp->nx_huge_page_disallowed = true;
870 
871  if (nx_huge_page_possible)
873 }
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:848
Here is the call graph for this function:
Here is the caller graph for this function:

◆ account_shadowed()

static void account_shadowed ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 827 of file mmu.c.

828 {
829  struct kvm_memslots *slots;
830  struct kvm_memory_slot *slot;
831  gfn_t gfn;
832 
833  kvm->arch.indirect_shadow_pages++;
834  gfn = sp->gfn;
835  slots = kvm_memslots_for_spte_role(kvm, sp->role);
836  slot = __gfn_to_memslot(slots, gfn);
837 
838  /* the non-leaf shadow pages are keeping readonly. */
839  if (sp->role.level > PG_LEVEL_4K)
840  return __kvm_write_track_add_gfn(kvm, slot, gfn);
841 
842  kvm_mmu_gfn_disallow_lpage(slot, gfn);
843 
844  if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
845  kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K);
846 }
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level)
Definition: mmu.c:1414
void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn)
Definition: mmu.c:817
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn)
Definition: page_track.c:77
Here is the call graph for this function:
Here is the caller graph for this function:

◆ alloc_apf_token()

static u32 alloc_apf_token ( struct kvm_vcpu *  vcpu)
static

Definition at line 4238 of file mmu.c.

4239 {
4240  /* make sure the token value is not 0 */
4241  u32 id = vcpu->arch.apf.id;
4242 
4243  if (id << 12 == 0)
4244  vcpu->arch.apf.id = 1;
4245 
4246  return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
4247 }
Here is the caller graph for this function:

◆ boot_cpu_is_amd()

static bool boot_cpu_is_amd ( void  )
inlinestatic

Definition at line 5021 of file mmu.c.

5022 {
5023  WARN_ON_ONCE(!tdp_enabled);
5024  return shadow_x_mask == 0;
5025 }
u64 __read_mostly shadow_x_mask
Definition: spte.c:30
Here is the caller graph for this function:

◆ BUILD_MMU_ROLE_ACCESSOR() [1/8]

BUILD_MMU_ROLE_ACCESSOR ( base  ,
cr0  ,
wp   
)

◆ BUILD_MMU_ROLE_ACCESSOR() [2/8]

BUILD_MMU_ROLE_ACCESSOR ( base  ,
efer  ,
nx   
)

◆ BUILD_MMU_ROLE_ACCESSOR() [3/8]

BUILD_MMU_ROLE_ACCESSOR ( ext  ,
cr4  ,
la57   
)

◆ BUILD_MMU_ROLE_ACCESSOR() [4/8]

BUILD_MMU_ROLE_ACCESSOR ( ext  ,
cr4  ,
pke   
)

◆ BUILD_MMU_ROLE_ACCESSOR() [5/8]

BUILD_MMU_ROLE_ACCESSOR ( ext  ,
cr4  ,
pse   
)

◆ BUILD_MMU_ROLE_ACCESSOR() [6/8]

BUILD_MMU_ROLE_ACCESSOR ( ext  ,
cr4  ,
smap   
)

◆ BUILD_MMU_ROLE_ACCESSOR() [7/8]

BUILD_MMU_ROLE_ACCESSOR ( ext  ,
cr4  ,
smep   
)

◆ BUILD_MMU_ROLE_ACCESSOR() [8/8]

BUILD_MMU_ROLE_ACCESSOR ( ext  ,
efer  ,
lma   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [1/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr0  ,
pg  ,
X86_CR0_PG   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [2/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr0  ,
wp  ,
X86_CR0_WP   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [3/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr4  ,
la57  ,
X86_CR4_LA57   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [4/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr4  ,
pae  ,
X86_CR4_PAE   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [5/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr4  ,
pke  ,
X86_CR4_PKE   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [6/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr4  ,
pse  ,
X86_CR4_PSE   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [7/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr4  ,
smap  ,
X86_CR4_SMAP   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [8/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( cr4  ,
smep  ,
X86_CR4_SMEP   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [9/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( efer  ,
lma  ,
EFER_LMA   
)

◆ BUILD_MMU_ROLE_REGS_ACCESSOR() [10/10]

BUILD_MMU_ROLE_REGS_ACCESSOR ( efer  ,
nx  ,
EFER_NX   
)

◆ cached_root_find_and_keep_current()

static bool cached_root_find_and_keep_current ( struct kvm *  kvm,
struct kvm_mmu *  mmu,
gpa_t  new_pgd,
union kvm_mmu_page_role  new_role 
)
static

Definition at line 4682 of file mmu.c.

4685 {
4686  uint i;
4687 
4688  if (is_root_usable(&mmu->root, new_pgd, new_role))
4689  return true;
4690 
4691  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
4692  /*
4693  * The swaps end up rotating the cache like this:
4694  * C 0 1 2 3 (on entry to the function)
4695  * 0 C 1 2 3
4696  * 1 C 0 2 3
4697  * 2 C 0 1 3
4698  * 3 C 0 1 2 (on exit from the loop)
4699  */
4700  swap(mmu->root, mmu->prev_roots[i]);
4701  if (is_root_usable(&mmu->root, new_pgd, new_role))
4702  return true;
4703  }
4704 
4705  kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
4706  return false;
4707 }
static bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd, union kvm_mmu_page_role role)
Definition: mmu.c:4656
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cached_root_find_without_current()

static bool cached_root_find_without_current ( struct kvm *  kvm,
struct kvm_mmu *  mmu,
gpa_t  new_pgd,
union kvm_mmu_page_role  new_role 
)
static

Definition at line 4716 of file mmu.c.

4719 {
4720  uint i;
4721 
4722  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
4723  if (is_root_usable(&mmu->prev_roots[i], new_pgd, new_role))
4724  goto hit;
4725 
4726  return false;
4727 
4728 hit:
4729  swap(mmu->root, mmu->prev_roots[i]);
4730  /* Bubble up the remaining roots. */
4731  for (; i < KVM_MMU_NUM_PREV_ROOTS - 1; i++)
4732  mmu->prev_roots[i] = mmu->prev_roots[i + 1];
4733  mmu->prev_roots[i].hpa = INVALID_PAGE;
4734  return true;
4735 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ calc_nx_huge_pages_recovery_period()

static bool calc_nx_huge_pages_recovery_period ( uint *  period)
static

Definition at line 7095 of file mmu.c.

7096 {
7097  /*
7098  * Use READ_ONCE to get the params, this may be called outside of the
7099  * param setters, e.g. by the kthread to compute its next timeout.
7100  */
7101  bool enabled = READ_ONCE(nx_huge_pages);
7102  uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
7103 
7104  if (!enabled || !ratio)
7105  return false;
7106 
7107  *period = READ_ONCE(nx_huge_pages_recovery_period_ms);
7108  if (!*period) {
7109  /* Make sure the period is not less than one second. */
7110  ratio = min(ratio, 3600u);
7111  *period = 60 * 60 * 1000 / ratio;
7112  }
7113  return true;
7114 }
static uint __read_mostly nx_huge_pages_recovery_period_ms
Definition: mmu.c:65
static uint __read_mostly nx_huge_pages_recovery_ratio
Definition: mmu.c:70
Here is the caller graph for this function:

◆ check_mmio_spte()

static bool check_mmio_spte ( struct kvm_vcpu *  vcpu,
u64  spte 
)
static

Definition at line 316 of file mmu.c.

317 {
318  u64 kvm_gen, spte_gen, gen;
319 
320  gen = kvm_vcpu_memslots(vcpu)->generation;
321  if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
322  return false;
323 
324  kvm_gen = gen & MMIO_SPTE_GEN_MASK;
325  spte_gen = get_mmio_spte_generation(spte);
326 
327  trace_check_mmio_spte(spte, kvm_gen, spte_gen);
328  return likely(kvm_gen == spte_gen);
329 }
static u64 get_mmio_spte_generation(u64 spte)
Definition: spte.h:463
#define MMIO_SPTE_GEN_MASK
Definition: spte.h:150
Here is the call graph for this function:
Here is the caller graph for this function:

◆ clear_sp_write_flooding_count()

static void clear_sp_write_flooding_count ( u64 *  spte)
static

Definition at line 2140 of file mmu.c.

2141 {
2143 }
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
Definition: mmu.c:2135
Here is the call graph for this function:
Here is the caller graph for this function:

◆ clear_unsync_child_bit()

static void clear_unsync_child_bit ( struct kvm_mmu_page sp,
int  idx 
)
inlinestatic

Definition at line 1825 of file mmu.c.

1826 {
1827  --sp->unsync_children;
1828  WARN_ON_ONCE((int)sp->unsync_children < 0);
1829  __clear_bit(idx, sp->unsync_child_bitmap);
1830 }
Here is the caller graph for this function:

◆ count_spte_clear()

static void count_spte_clear ( u64 *  sptep,
u64  spte 
)
static

Definition at line 365 of file mmu.c.

366 {
367  struct kvm_mmu_page *sp = sptep_to_sp(sptep);
368 
369  if (is_shadow_present_pte(spte))
370  return;
371 
372  /* Ensure the spte is completely set before we increase the count */
373  smp_wmb();
374  sp->clear_spte_count++;
375 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ detect_write_flooding()

static bool detect_write_flooding ( struct kvm_mmu_page sp)
static

Definition at line 5712 of file mmu.c.

5713 {
5714  /*
5715  * Skip write-flooding detected for the sp whose level is 1, because
5716  * it can become unsync, then the guest page is not write-protected.
5717  */
5718  if (sp->role.level == PG_LEVEL_4K)
5719  return false;
5720 
5721  atomic_inc(&sp->write_flooding_count);
5722  return atomic_read(&sp->write_flooding_count) >= 3;
5723 }
Here is the caller graph for this function:

◆ detect_write_misaligned()

static bool detect_write_misaligned ( struct kvm_mmu_page sp,
gpa_t  gpa,
int  bytes 
)
static

Definition at line 5729 of file mmu.c.

5731 {
5732  unsigned offset, pte_size, misaligned;
5733 
5734  offset = offset_in_page(gpa);
5735  pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
5736 
5737  /*
5738  * Sometimes, the OS only writes the last one bytes to update status
5739  * bits, for example, in linux, andb instruction is used in clear_bit().
5740  */
5741  if (!(offset & (pte_size - 1)) && bytes == 1)
5742  return false;
5743 
5744  misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
5745  misaligned |= bytes < 4;
5746 
5747  return misaligned;
5748 }
Here is the caller graph for this function:

◆ direct_map()

static int direct_map ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 3237 of file mmu.c.

3238 {
3239  struct kvm_shadow_walk_iterator it;
3240  struct kvm_mmu_page *sp;
3241  int ret;
3242  gfn_t base_gfn = fault->gfn;
3243 
3244  kvm_mmu_hugepage_adjust(vcpu, fault);
3245 
3246  trace_kvm_mmu_spte_requested(fault);
3247  for_each_shadow_entry(vcpu, fault->addr, it) {
3248  /*
3249  * We cannot overwrite existing page tables with an NX
3250  * large page, as the leaf could be executable.
3251  */
3253  disallowed_hugepage_adjust(fault, *it.sptep, it.level);
3254 
3255  base_gfn = gfn_round_for_level(fault->gfn, it.level);
3256  if (it.level == fault->goal_level)
3257  break;
3258 
3259  sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, ACC_ALL);
3260  if (sp == ERR_PTR(-EEXIST))
3261  continue;
3262 
3263  link_shadow_page(vcpu, it.sptep, sp);
3264  if (fault->huge_page_disallowed)
3265  account_nx_huge_page(vcpu->kvm, sp,
3266  fault->req_level >= it.level);
3267  }
3268 
3269  if (WARN_ON_ONCE(it.level != fault->goal_level))
3270  return -EFAULT;
3271 
3272  ret = mmu_set_spte(vcpu, fault->slot, it.sptep, ACC_ALL,
3273  base_gfn, fault->pfn, fault);
3274  if (ret == RET_PF_SPURIOUS)
3275  return ret;
3276 
3277  direct_pte_prefetch(vcpu, it.sptep);
3278  return ret;
3279 }
static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
Definition: mmu.c:3030
#define for_each_shadow_entry(_vcpu, _addr, _walker)
Definition: mmu.c:169
static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, struct kvm_mmu_page *sp)
Definition: mmu.c:2464
static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, u64 *sptep, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, struct kvm_page_fault *fault)
Definition: mmu.c:2906
static struct kvm_mmu_page * kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, bool direct, unsigned int access)
Definition: mmu.c:2353
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:3180
static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, bool nx_huge_page_possible)
Definition: mmu.c:866
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level)
Definition: mmu.c:3216
static gfn_t gfn_round_for_level(gfn_t gfn, int level)
Definition: mmu_internal.h:161
@ RET_PF_SPURIOUS
Definition: mmu_internal.h:279
#define ACC_ALL
Definition: spte.h:49
bool huge_page_disallowed
Definition: mmu_internal.h:212
const bool nx_huge_page_workaround_enabled
Definition: mmu_internal.h:206
Here is the call graph for this function:
Here is the caller graph for this function:

◆ direct_page_fault()

static int direct_page_fault ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 4491 of file mmu.c.

4492 {
4493  int r;
4494 
4495  /* Dummy roots are used only for shadowing bad guest roots. */
4496  if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa)))
4497  return RET_PF_RETRY;
4498 
4499  if (page_fault_handle_page_track(vcpu, fault))
4500  return RET_PF_EMULATE;
4501 
4502  r = fast_page_fault(vcpu, fault);
4503  if (r != RET_PF_INVALID)
4504  return r;
4505 
4506  r = mmu_topup_memory_caches(vcpu, false);
4507  if (r)
4508  return r;
4509 
4510  r = kvm_faultin_pfn(vcpu, fault, ACC_ALL);
4511  if (r != RET_PF_CONTINUE)
4512  return r;
4513 
4514  r = RET_PF_RETRY;
4515  write_lock(&vcpu->kvm->mmu_lock);
4516 
4517  if (is_page_fault_stale(vcpu, fault))
4518  goto out_unlock;
4519 
4520  r = make_mmu_pages_available(vcpu);
4521  if (r)
4522  goto out_unlock;
4523 
4524  r = direct_map(vcpu, fault);
4525 
4526 out_unlock:
4527  write_unlock(&vcpu->kvm->mmu_lock);
4528  kvm_release_pfn_clean(fault->pfn);
4529  return r;
4530 }
void kvm_release_pfn_clean(kvm_pfn_t pfn)
Definition: kvm_main.c:3241
static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:3444
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
Definition: mmu.c:679
static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:3237
static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access)
Definition: mmu.c:4400
static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4208
static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
Definition: mmu.c:2714
static bool is_page_fault_stale(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4462
static bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
Definition: mmu_internal.h:45
@ RET_PF_INVALID
Definition: mmu_internal.h:277
Here is the call graph for this function:
Here is the caller graph for this function:

◆ direct_pte_prefetch()

static void direct_pte_prefetch ( struct kvm_vcpu *  vcpu,
u64 *  sptep 
)
static

Definition at line 3030 of file mmu.c.

3031 {
3032  struct kvm_mmu_page *sp;
3033 
3034  sp = sptep_to_sp(sptep);
3035 
3036  /*
3037  * Without accessed bits, there's no way to distinguish between
3038  * actually accessed translations and prefetched, so disable pte
3039  * prefetch if accessed bits aren't available.
3040  */
3041  if (sp_ad_disabled(sp))
3042  return;
3043 
3044  if (sp->role.level > PG_LEVEL_4K)
3045  return;
3046 
3047  /*
3048  * If addresses are being invalidated, skip prefetching to avoid
3049  * accidentally prefetching those addresses.
3050  */
3051  if (unlikely(vcpu->kvm->mmu_invalidate_in_progress))
3052  return;
3053 
3054  __direct_pte_prefetch(vcpu, sp, sptep);
3055 }
static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *sptep)
Definition: mmu.c:3005
Here is the call graph for this function:
Here is the caller graph for this function:

◆ direct_pte_prefetch_many()

static int direct_pte_prefetch_many ( struct kvm_vcpu *  vcpu,
struct kvm_mmu_page sp,
u64 *  start,
u64 *  end 
)
static

Definition at line 2977 of file mmu.c.

2980 {
2981  struct page *pages[PTE_PREFETCH_NUM];
2982  struct kvm_memory_slot *slot;
2983  unsigned int access = sp->role.access;
2984  int i, ret;
2985  gfn_t gfn;
2986 
2987  gfn = kvm_mmu_page_get_gfn(sp, spte_index(start));
2988  slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK);
2989  if (!slot)
2990  return -1;
2991 
2992  ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start);
2993  if (ret <= 0)
2994  return -1;
2995 
2996  for (i = 0; i < ret; i++, gfn++, start++) {
2997  mmu_set_spte(vcpu, slot, start, access, gfn,
2998  page_to_pfn(pages[i]), NULL);
2999  put_page(pages[i]);
3000  }
3001 
3002  return 0;
3003 }
int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, struct page **pages, int nr_pages)
Definition: kvm_main.c:3103
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
Definition: mmu.c:717
static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log)
Definition: mmu.c:907
#define ACC_WRITE_MASK
Definition: spte.h:47
Here is the call graph for this function:
Here is the caller graph for this function:

◆ disallowed_hugepage_adjust()

void disallowed_hugepage_adjust ( struct kvm_page_fault fault,
u64  spte,
int  cur_level 
)

Definition at line 3216 of file mmu.c.

3217 {
3218  if (cur_level > PG_LEVEL_4K &&
3219  cur_level == fault->goal_level &&
3220  is_shadow_present_pte(spte) &&
3221  !is_large_pte(spte) &&
3222  spte_to_child_sp(spte)->nx_huge_page_disallowed) {
3223  /*
3224  * A small SPTE exists for this pfn, but FNAME(fetch),
3225  * direct_map(), or kvm_tdp_mmu_map() would like to create a
3226  * large PTE instead: just force them to go down another level,
3227  * patching back for them into pfn the next 9 bits of the
3228  * address.
3229  */
3230  u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) -
3231  KVM_PAGES_PER_HPAGE(cur_level - 1);
3232  fault->pfn |= fault->gfn & page_mask;
3233  fault->goal_level--;
3234  }
3235 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ drop_large_spte()

static void drop_large_spte ( struct kvm *  kvm,
u64 *  sptep,
bool  flush 
)
static

Definition at line 1203 of file mmu.c.

1204 {
1205  struct kvm_mmu_page *sp;
1206 
1207  sp = sptep_to_sp(sptep);
1208  WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K);
1209 
1210  drop_spte(kvm, sptep);
1211 
1212  if (flush)
1213  kvm_flush_remote_tlbs_sptep(kvm, sptep);
1214 }
static void drop_spte(struct kvm *kvm, u64 *sptep)
Definition: mmu.c:1195
Here is the call graph for this function:
Here is the caller graph for this function:

◆ drop_parent_pte()

static void drop_parent_pte ( struct kvm *  kvm,
struct kvm_mmu_page sp,
u64 *  parent_pte 
)
static

Definition at line 1769 of file mmu.c.

1771 {
1772  mmu_page_remove_parent_pte(kvm, sp, parent_pte);
1773  mmu_spte_clear_no_track(parent_pte);
1774 }
static void mmu_spte_clear_no_track(u64 *sptep)
Definition: mmu.c:604
static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte)
Definition: mmu.c:1763
Here is the call graph for this function:
Here is the caller graph for this function:

◆ drop_spte()

static void drop_spte ( struct kvm *  kvm,
u64 *  sptep 
)
static

Definition at line 1195 of file mmu.c.

1196 {
1197  u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep);
1198 
1199  if (is_shadow_present_pte(old_spte))
1200  rmap_remove(kvm, sptep);
1201 }
static void rmap_remove(struct kvm *kvm, u64 *spte)
Definition: mmu.c:1095
static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
Definition: mmu.c:561
Here is the call graph for this function:
Here is the caller graph for this function:

◆ EXPORT_SYMBOL_GPL() [1/12]

EXPORT_SYMBOL_GPL ( kvm_configure_mmu  )

◆ EXPORT_SYMBOL_GPL() [2/12]

EXPORT_SYMBOL_GPL ( kvm_handle_page_fault  )

◆ EXPORT_SYMBOL_GPL() [3/12]

EXPORT_SYMBOL_GPL ( kvm_init_mmu  )

◆ EXPORT_SYMBOL_GPL() [4/12]

EXPORT_SYMBOL_GPL ( kvm_init_shadow_ept_mmu  )

◆ EXPORT_SYMBOL_GPL() [5/12]

EXPORT_SYMBOL_GPL ( kvm_init_shadow_npt_mmu  )

◆ EXPORT_SYMBOL_GPL() [6/12]

EXPORT_SYMBOL_GPL ( kvm_mmu_free_guest_mode_roots  )

◆ EXPORT_SYMBOL_GPL() [7/12]

EXPORT_SYMBOL_GPL ( kvm_mmu_free_roots  )

◆ EXPORT_SYMBOL_GPL() [8/12]

EXPORT_SYMBOL_GPL ( kvm_mmu_invalidate_addr  )

◆ EXPORT_SYMBOL_GPL() [9/12]

EXPORT_SYMBOL_GPL ( kvm_mmu_invlpg  )

◆ EXPORT_SYMBOL_GPL() [10/12]

EXPORT_SYMBOL_GPL ( kvm_mmu_new_pgd  )

◆ EXPORT_SYMBOL_GPL() [11/12]

EXPORT_SYMBOL_GPL ( kvm_mmu_page_fault  )

◆ EXPORT_SYMBOL_GPL() [12/12]

EXPORT_SYMBOL_GPL ( kvm_mmu_reset_context  )

◆ fast_page_fault()

static int fast_page_fault ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 3444 of file mmu.c.

3445 {
3446  struct kvm_mmu_page *sp;
3447  int ret = RET_PF_INVALID;
3448  u64 spte;
3449  u64 *sptep;
3450  uint retry_count = 0;
3451 
3452  if (!page_fault_can_be_fast(fault))
3453  return ret;
3454 
3456 
3457  do {
3458  u64 new_spte;
3459 
3460  if (tdp_mmu_enabled)
3461  sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, fault->addr, &spte);
3462  else
3463  sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte);
3464 
3465  /*
3466  * It's entirely possible for the mapping to have been zapped
3467  * by a different task, but the root page should always be
3468  * available as the vCPU holds a reference to its root(s).
3469  */
3470  if (WARN_ON_ONCE(!sptep))
3471  spte = REMOVED_SPTE;
3472 
3473  if (!is_shadow_present_pte(spte))
3474  break;
3475 
3476  sp = sptep_to_sp(sptep);
3477  if (!is_last_spte(spte, sp->role.level))
3478  break;
3479 
3480  /*
3481  * Check whether the memory access that caused the fault would
3482  * still cause it if it were to be performed right now. If not,
3483  * then this is a spurious fault caused by TLB lazily flushed,
3484  * or some other CPU has already fixed the PTE after the
3485  * current CPU took the fault.
3486  *
3487  * Need not check the access of upper level table entries since
3488  * they are always ACC_ALL.
3489  */
3490  if (is_access_allowed(fault, spte)) {
3491  ret = RET_PF_SPURIOUS;
3492  break;
3493  }
3494 
3495  new_spte = spte;
3496 
3497  /*
3498  * KVM only supports fixing page faults outside of MMU lock for
3499  * direct MMUs, nested MMUs are always indirect, and KVM always
3500  * uses A/D bits for non-nested MMUs. Thus, if A/D bits are
3501  * enabled, the SPTE can't be an access-tracked SPTE.
3502  */
3503  if (unlikely(!kvm_ad_enabled()) && is_access_track_spte(spte))
3504  new_spte = restore_acc_track_spte(new_spte);
3505 
3506  /*
3507  * To keep things simple, only SPTEs that are MMU-writable can
3508  * be made fully writable outside of mmu_lock, e.g. only SPTEs
3509  * that were write-protected for dirty-logging or access
3510  * tracking are handled here. Don't bother checking if the
3511  * SPTE is writable to prioritize running with A/D bits enabled.
3512  * The is_access_allowed() check above handles the common case
3513  * of the fault being spurious, and the SPTE is known to be
3514  * shadow-present, i.e. except for access tracking restoration
3515  * making the new SPTE writable, the check is wasteful.
3516  */
3517  if (fault->write && is_mmu_writable_spte(spte)) {
3518  new_spte |= PT_WRITABLE_MASK;
3519 
3520  /*
3521  * Do not fix write-permission on the large spte when
3522  * dirty logging is enabled. Since we only dirty the
3523  * first page into the dirty-bitmap in
3524  * fast_pf_fix_direct_spte(), other pages are missed
3525  * if its slot has dirty logging enabled.
3526  *
3527  * Instead, we let the slow page fault path create a
3528  * normal spte to fix the access.
3529  */
3530  if (sp->role.level > PG_LEVEL_4K &&
3531  kvm_slot_dirty_track_enabled(fault->slot))
3532  break;
3533  }
3534 
3535  /* Verify that the fault can be handled in the fast path */
3536  if (new_spte == spte ||
3537  !is_access_allowed(fault, new_spte))
3538  break;
3539 
3540  /*
3541  * Currently, fast page fault only works for direct mapping
3542  * since the gfn is not stable for indirect shadow page. See
3543  * Documentation/virt/kvm/locking.rst to get more detail.
3544  */
3545  if (fast_pf_fix_direct_spte(vcpu, fault, sptep, spte, new_spte)) {
3546  ret = RET_PF_FIXED;
3547  break;
3548  }
3549 
3550  if (++retry_count > 4) {
3551  pr_warn_once("Fast #PF retrying more than 4 times.\n");
3552  break;
3553  }
3554 
3555  } while (true);
3556 
3557  trace_fast_page_fault(vcpu, fault, sptep, spte, ret);
3559 
3560  if (ret != RET_PF_INVALID)
3561  vcpu->stat.pf_fast++;
3562 
3563  return ret;
3564 }
static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
Definition: mmu.c:664
static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, u64 *sptep, u64 old_spte, u64 new_spte)
Definition: mmu.c:3381
static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
Definition: mmu.c:645
static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
Definition: mmu.c:3341
static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
Definition: mmu.c:3406
static u64 * fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
Definition: mmu.c:3427
#define tdp_mmu_enabled
Definition: mmu.h:276
@ RET_PF_FIXED
Definition: mmu_internal.h:278
static bool is_access_track_spte(u64 spte)
Definition: spte.h:308
#define REMOVED_SPTE
Definition: spte.h:197
static u64 restore_acc_track_spte(u64 spte)
Definition: spte.h:486
static bool kvm_ad_enabled(void)
Definition: spte.h:269
static bool is_mmu_writable_spte(u64 spte)
Definition: spte.h:458
u64 * kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr, u64 *spte)
Definition: tdp_mmu.c:1795
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fast_pf_fix_direct_spte()

static bool fast_pf_fix_direct_spte ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault,
u64 *  sptep,
u64  old_spte,
u64  new_spte 
)
static

Definition at line 3381 of file mmu.c.

3384 {
3385  /*
3386  * Theoretically we could also set dirty bit (and flush TLB) here in
3387  * order to eliminate unnecessary PML logging. See comments in
3388  * set_spte. But fast_page_fault is very unlikely to happen with PML
3389  * enabled, so we do not do this. This might result in the same GPA
3390  * to be logged in PML buffer again when the write really happens, and
3391  * eventually to be called by mark_page_dirty twice. But it's also no
3392  * harm. This also avoids the TLB flush needed after setting dirty bit
3393  * so non-PML cases won't be impacted.
3394  *
3395  * Compare with set_spte where instead shadow_dirty_mask is set.
3396  */
3397  if (!try_cmpxchg64(sptep, &old_spte, new_spte))
3398  return false;
3399 
3400  if (is_writable_pte(new_spte) && !is_writable_pte(old_spte))
3401  mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn);
3402 
3403  return true;
3404 }
void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn)
Definition: kvm_main.c:3635
static bool is_writable_pte(unsigned long pte)
Definition: spte.h:441
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fast_pf_get_last_sptep()

static u64* fast_pf_get_last_sptep ( struct kvm_vcpu *  vcpu,
gpa_t  gpa,
u64 *  spte 
)
static

Definition at line 3427 of file mmu.c.

3428 {
3429  struct kvm_shadow_walk_iterator iterator;
3430  u64 old_spte;
3431  u64 *sptep = NULL;
3432 
3433  for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
3434  sptep = iterator.sptep;
3435  *spte = old_spte;
3436  }
3437 
3438  return sptep;
3439 }
#define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte)
Definition: mmu.c:174
Here is the caller graph for this function:

◆ fast_pgd_switch()

static bool fast_pgd_switch ( struct kvm *  kvm,
struct kvm_mmu *  mmu,
gpa_t  new_pgd,
union kvm_mmu_page_role  new_role 
)
static

Definition at line 4737 of file mmu.c.

4739 {
4740  /*
4741  * Limit reuse to 64-bit hosts+VMs without "special" roots in order to
4742  * avoid having to deal with PDPTEs and other complexities.
4743  */
4744  if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa))
4745  kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
4746 
4747  if (VALID_PAGE(mmu->root.hpa))
4748  return cached_root_find_and_keep_current(kvm, mmu, new_pgd, new_role);
4749  else
4750  return cached_root_find_without_current(kvm, mmu, new_pgd, new_role);
4751 }
static bool cached_root_find_without_current(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role)
Definition: mmu.c:4716
static bool cached_root_find_and_keep_current(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role)
Definition: mmu.c:4682
static struct kvm_mmu_page * root_to_sp(hpa_t root)
Definition: spte.h:240
Here is the call graph for this function:
Here is the caller graph for this function:

◆ free_mmu_pages()

static void free_mmu_pages ( struct kvm_mmu *  mmu)
static

Definition at line 6091 of file mmu.c.

6092 {
6093  if (!tdp_enabled && mmu->pae_root)
6094  set_memory_encrypted((unsigned long)mmu->pae_root, 1);
6095  free_page((unsigned long)mmu->pae_root);
6096  free_page((unsigned long)mmu->pml4_root);
6097  free_page((unsigned long)mmu->pml5_root);
6098 }
Here is the caller graph for this function:

◆ get_guest_cr3()

static unsigned long get_guest_cr3 ( struct kvm_vcpu *  vcpu)
static

Definition at line 258 of file mmu.c.

259 {
260  return kvm_read_cr3(vcpu);
261 }
static ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_mmio_spte()

static bool get_mmio_spte ( struct kvm_vcpu *  vcpu,
u64  addr,
u64 *  sptep 
)
static

Definition at line 4125 of file mmu.c.

4126 {
4127  u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
4128  struct rsvd_bits_validate *rsvd_check;
4129  int root, leaf, level;
4130  bool reserved = false;
4131 
4133 
4134  if (is_tdp_mmu_active(vcpu))
4135  leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
4136  else
4137  leaf = get_walk(vcpu, addr, sptes, &root);
4138 
4140 
4141  if (unlikely(leaf < 0)) {
4142  *sptep = 0ull;
4143  return reserved;
4144  }
4145 
4146  *sptep = sptes[leaf];
4147 
4148  /*
4149  * Skip reserved bits checks on the terminal leaf if it's not a valid
4150  * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by
4151  * design, always have reserved bits set. The purpose of the checks is
4152  * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
4153  */
4154  if (!is_shadow_present_pte(sptes[leaf]))
4155  leaf++;
4156 
4157  rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
4158 
4159  for (level = root; level >= leaf; level--)
4160  reserved |= is_rsvd_spte(rsvd_check, sptes[level], level);
4161 
4162  if (reserved) {
4163  pr_err("%s: reserved bits set on MMU-present spte, addr 0x%llx, hierarchy:\n",
4164  __func__, addr);
4165  for (level = root; level >= leaf; level--)
4166  pr_err("------ spte = 0x%llx level = %d, rsvd bits = 0x%llx",
4167  sptes[level], level,
4168  get_rsvd_bits(rsvd_check, sptes[level], level));
4169  }
4170 
4171  return reserved;
4172 }
static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
Definition: mmu.c:4105
static bool is_tdp_mmu_active(struct kvm_vcpu *vcpu)
Definition: mmu.c:640
static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, u64 spte, int level)
Definition: spte.h:368
static u64 get_rsvd_bits(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
Definition: spte.h:348
int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
Definition: tdp_mmu.c:1766
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_mmio_spte_access()

static unsigned get_mmio_spte_access ( u64  spte)
static

Definition at line 311 of file mmu.c.

312 {
313  return spte & shadow_mmio_access_mask;
314 }
u64 __read_mostly shadow_mmio_access_mask
Definition: spte.c:36
Here is the caller graph for this function:

◆ get_mmio_spte_gfn()

static gfn_t get_mmio_spte_gfn ( u64  spte)
static

Definition at line 301 of file mmu.c.

302 {
304 
305  gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)
307 
308  return gpa >> PAGE_SHIFT;
309 }
u64 __read_mostly shadow_nonpresent_or_rsvd_mask
Definition: spte.c:43
u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask
Definition: spte.c:44
#define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN
Definition: spte.h:183
Here is the caller graph for this function:

◆ get_nx_auto_mode()

static bool get_nx_auto_mode ( void  )
static

Definition at line 6940 of file mmu.c.

6941 {
6942  /* Return true when CPU has the bug, and mitigations are ON */
6943  return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
6944 }
Here is the caller graph for this function:

◆ get_nx_huge_page_recovery_timeout()

static long get_nx_huge_page_recovery_timeout ( u64  start_time)
static

Definition at line 7243 of file mmu.c.

7244 {
7245  bool enabled;
7246  uint period;
7247 
7248  enabled = calc_nx_huge_pages_recovery_period(&period);
7249 
7250  return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
7251  : MAX_SCHEDULE_TIMEOUT;
7252 }
static bool calc_nx_huge_pages_recovery_period(uint *period)
Definition: mmu.c:7095
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_nx_huge_pages()

static int get_nx_huge_pages ( char *  buffer,
const struct kernel_param *  kp 
)
static

Definition at line 6932 of file mmu.c.

6933 {
6935  return sysfs_emit(buffer, "never\n");
6936 
6937  return param_get_bool(buffer, kp);
6938 }
static bool nx_hugepage_mitigation_hard_disabled
Definition: mmu.c:62

◆ get_walk()

static int get_walk ( struct kvm_vcpu *  vcpu,
u64  addr,
u64 *  sptes,
int *  root_level 
)
static

Definition at line 4105 of file mmu.c.

4106 {
4107  struct kvm_shadow_walk_iterator iterator;
4108  int leaf = -1;
4109  u64 spte;
4110 
4111  for (shadow_walk_init(&iterator, vcpu, addr),
4112  *root_level = iterator.level;
4113  shadow_walk_okay(&iterator);
4114  __shadow_walk_next(&iterator, spte)) {
4115  leaf = iterator.level;
4116  spte = mmu_spte_get_lockless(iterator.sptep);
4117 
4118  sptes[leaf] = spte;
4119  }
4120 
4121  return leaf;
4122 }
static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
Definition: mmu.c:2402
static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator, u64 spte)
Definition: mmu.c:2412
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_written_sptes()

static u64* get_written_sptes ( struct kvm_mmu_page sp,
gpa_t  gpa,
int *  nspte 
)
static

Definition at line 5750 of file mmu.c.

5751 {
5752  unsigned page_offset, quadrant;
5753  u64 *spte;
5754  int level;
5755 
5756  page_offset = offset_in_page(gpa);
5757  level = sp->role.level;
5758  *nspte = 1;
5759  if (sp->role.has_4_byte_gpte) {
5760  page_offset <<= 1; /* 32->64 */
5761  /*
5762  * A 32-bit pde maps 4MB while the shadow pdes map
5763  * only 2MB. So we need to double the offset again
5764  * and zap two pdes instead of one.
5765  */
5766  if (level == PT32_ROOT_LEVEL) {
5767  page_offset &= ~7; /* kill rounding error */
5768  page_offset <<= 1;
5769  *nspte = 2;
5770  }
5771  quadrant = page_offset >> PAGE_SHIFT;
5772  page_offset &= ~PAGE_MASK;
5773  if (quadrant != sp->role.quadrant)
5774  return NULL;
5775  }
5776 
5777  spte = &sp->spt[page_offset / sizeof(*spte)];
5778  return spte;
5779 }
Here is the caller graph for this function:

◆ gfn_to_memslot_dirty_bitmap()

static struct kvm_memory_slot* gfn_to_memslot_dirty_bitmap ( struct kvm_vcpu *  vcpu,
gfn_t  gfn,
bool  no_dirty_log 
)
static

Definition at line 907 of file mmu.c.

910 {
911  struct kvm_memory_slot *slot;
912 
913  slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
914  if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
915  return NULL;
916  if (no_dirty_log && kvm_slot_dirty_track_enabled(slot))
917  return NULL;
918 
919  return slot;
920 }
struct kvm_memory_slot * kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
Definition: kvm_main.c:2636
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gfn_to_rmap()

static struct kvm_rmap_head* gfn_to_rmap ( gfn_t  gfn,
int  level,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 1086 of file mmu.c.

1088 {
1089  unsigned long idx;
1090 
1091  idx = gfn_to_index(gfn, slot->base_gfn, level);
1092  return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
1093 }
static gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
Definition: mmu.h:284
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handle_mmio_page_fault()

static int handle_mmio_page_fault ( struct kvm_vcpu *  vcpu,
u64  addr,
bool  direct 
)
static

Definition at line 4174 of file mmu.c.

4175 {
4176  u64 spte;
4177  bool reserved;
4178 
4179  if (mmio_info_in_cache(vcpu, addr, direct))
4180  return RET_PF_EMULATE;
4181 
4182  reserved = get_mmio_spte(vcpu, addr, &spte);
4183  if (WARN_ON_ONCE(reserved))
4184  return -EINVAL;
4185 
4186  if (is_mmio_spte(spte)) {
4187  gfn_t gfn = get_mmio_spte_gfn(spte);
4188  unsigned int access = get_mmio_spte_access(spte);
4189 
4190  if (!check_mmio_spte(vcpu, spte))
4191  return RET_PF_INVALID;
4192 
4193  if (direct)
4194  addr = 0;
4195 
4196  trace_handle_mmio_page_fault(addr, gfn, access);
4197  vcpu_cache_mmio_info(vcpu, addr, gfn, access);
4198  return RET_PF_EMULATE;
4199  }
4200 
4201  /*
4202  * If the page table is zapped by other cpus, let CPU fault again on
4203  * the address.
4204  */
4205  return RET_PF_RETRY;
4206 }
static gfn_t get_mmio_spte_gfn(u64 spte)
Definition: mmu.c:301
static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
Definition: mmu.c:4125
static unsigned get_mmio_spte_access(u64 spte)
Definition: mmu.c:311
static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
Definition: mmu.c:316
static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
Definition: mmu.c:4084
static bool is_mmio_spte(u64 spte)
Definition: spte.h:252
static void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, unsigned access)
Definition: x86.h:218
Here is the call graph for this function:
Here is the caller graph for this function:

◆ host_pfn_mapping_level()

static int host_pfn_mapping_level ( struct kvm *  kvm,
gfn_t  gfn,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 3082 of file mmu.c.

3084 {
3085  int level = PG_LEVEL_4K;
3086  unsigned long hva;
3087  unsigned long flags;
3088  pgd_t pgd;
3089  p4d_t p4d;
3090  pud_t pud;
3091  pmd_t pmd;
3092 
3093  /*
3094  * Note, using the already-retrieved memslot and __gfn_to_hva_memslot()
3095  * is not solely for performance, it's also necessary to avoid the
3096  * "writable" check in __gfn_to_hva_many(), which will always fail on
3097  * read-only memslots due to gfn_to_hva() assuming writes. Earlier
3098  * page fault steps have already verified the guest isn't writing a
3099  * read-only memslot.
3100  */
3101  hva = __gfn_to_hva_memslot(slot, gfn);
3102 
3103  /*
3104  * Disable IRQs to prevent concurrent tear down of host page tables,
3105  * e.g. if the primary MMU promotes a P*D to a huge page and then frees
3106  * the original page table.
3107  */
3108  local_irq_save(flags);
3109 
3110  /*
3111  * Read each entry once. As above, a non-leaf entry can be promoted to
3112  * a huge page _during_ this walk. Re-reading the entry could send the
3113  * walk into the weeks, e.g. p*d_large() returns false (sees the old
3114  * value) and then p*d_offset() walks into the target huge page instead
3115  * of the old page table (sees the new value).
3116  */
3117  pgd = READ_ONCE(*pgd_offset(kvm->mm, hva));
3118  if (pgd_none(pgd))
3119  goto out;
3120 
3121  p4d = READ_ONCE(*p4d_offset(&pgd, hva));
3122  if (p4d_none(p4d) || !p4d_present(p4d))
3123  goto out;
3124 
3125  pud = READ_ONCE(*pud_offset(&p4d, hva));
3126  if (pud_none(pud) || !pud_present(pud))
3127  goto out;
3128 
3129  if (pud_leaf(pud)) {
3130  level = PG_LEVEL_1G;
3131  goto out;
3132  }
3133 
3134  pmd = READ_ONCE(*pmd_offset(&pud, hva));
3135  if (pmd_none(pmd) || !pmd_present(pmd))
3136  goto out;
3137 
3138  if (pmd_large(pmd))
3139  level = PG_LEVEL_2M;
3140 
3141 out:
3142  local_irq_restore(flags);
3143  return level;
3144 }
uint32_t flags
Definition: xen.c:1
Here is the caller graph for this function:

◆ init_kvm_nested_mmu()

static void init_kvm_nested_mmu ( struct kvm_vcpu *  vcpu,
union kvm_cpu_role  new_mode 
)
static

Definition at line 5499 of file mmu.c.

5501 {
5502  struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
5503 
5504  if (new_mode.as_u64 == g_context->cpu_role.as_u64)
5505  return;
5506 
5507  g_context->cpu_role.as_u64 = new_mode.as_u64;
5508  g_context->get_guest_pgd = get_guest_cr3;
5509  g_context->get_pdptr = kvm_pdptr_read;
5510  g_context->inject_page_fault = kvm_inject_page_fault;
5511 
5512  /*
5513  * L2 page tables are never shadowed, so there is no need to sync
5514  * SPTEs.
5515  */
5516  g_context->sync_spte = NULL;
5517 
5518  /*
5519  * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
5520  * L1's nested page tables (e.g. EPT12). The nested translation
5521  * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
5522  * L2's page tables as the first level of translation and L1's
5523  * nested page tables as the second level of translation. Basically
5524  * the gva_to_gpa functions between mmu and nested_mmu are swapped.
5525  */
5526  if (!is_paging(vcpu))
5527  g_context->gva_to_gpa = nonpaging_gva_to_gpa;
5528  else if (is_long_mode(vcpu))
5529  g_context->gva_to_gpa = paging64_gva_to_gpa;
5530  else if (is_pae(vcpu))
5531  g_context->gva_to_gpa = paging64_gva_to_gpa;
5532  else
5533  g_context->gva_to_gpa = paging32_gva_to_gpa;
5534 
5535  reset_guest_paging_metadata(vcpu, g_context);
5536 }
static u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t vaddr, u64 access, struct x86_exception *exception)
Definition: mmu.c:4075
static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu)
Definition: mmu.c:258
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
Definition: x86.c:779
static bool is_paging(struct kvm_vcpu *vcpu)
Definition: x86.h:198
static bool is_long_mode(struct kvm_vcpu *vcpu)
Definition: x86.h:143
static bool is_pae(struct kvm_vcpu *vcpu)
Definition: x86.h:188
Here is the call graph for this function:
Here is the caller graph for this function:

◆ init_kvm_softmmu()

static void init_kvm_softmmu ( struct kvm_vcpu *  vcpu,
union kvm_cpu_role  cpu_role 
)
static

Definition at line 5487 of file mmu.c.

5489 {
5490  struct kvm_mmu *context = &vcpu->arch.root_mmu;
5491 
5492  kvm_init_shadow_mmu(vcpu, cpu_role);
5493 
5494  context->get_guest_pgd = get_guest_cr3;
5495  context->get_pdptr = kvm_pdptr_read;
5496  context->inject_page_fault = kvm_inject_page_fault;
5497 }
static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
Definition: mmu.c:5382
Here is the call graph for this function:
Here is the caller graph for this function:

◆ init_kvm_tdp_mmu()

static void init_kvm_tdp_mmu ( struct kvm_vcpu *  vcpu,
union kvm_cpu_role  cpu_role 
)
static

Definition at line 5331 of file mmu.c.

5333 {
5334  struct kvm_mmu *context = &vcpu->arch.root_mmu;
5335  union kvm_mmu_page_role root_role = kvm_calc_tdp_mmu_root_page_role(vcpu, cpu_role);
5336 
5337  if (cpu_role.as_u64 == context->cpu_role.as_u64 &&
5338  root_role.word == context->root_role.word)
5339  return;
5340 
5341  context->cpu_role.as_u64 = cpu_role.as_u64;
5342  context->root_role.word = root_role.word;
5343  context->page_fault = kvm_tdp_page_fault;
5344  context->sync_spte = NULL;
5345  context->get_guest_pgd = get_guest_cr3;
5346  context->get_pdptr = kvm_pdptr_read;
5347  context->inject_page_fault = kvm_inject_page_fault;
5348 
5349  if (!is_cr0_pg(context))
5350  context->gva_to_gpa = nonpaging_gva_to_gpa;
5351  else if (is_cr4_pae(context))
5352  context->gva_to_gpa = paging64_gva_to_gpa;
5353  else
5354  context->gva_to_gpa = paging32_gva_to_gpa;
5355 
5356  reset_guest_paging_metadata(vcpu, context);
5358 }
static union kvm_mmu_page_role kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
Definition: mmu.c:5313
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4623
static void reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context)
Definition: mmu.c:5031
static bool is_cr4_pae(struct kvm_mmu *mmu)
Definition: mmu.c:242
static bool is_cr0_pg(struct kvm_mmu *mmu)
Definition: mmu.c:237
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_access_allowed()

static bool is_access_allowed ( struct kvm_page_fault fault,
u64  spte 
)
static

Definition at line 3406 of file mmu.c.

3407 {
3408  if (fault->exec)
3409  return is_executable_pte(spte);
3410 
3411  if (fault->write)
3412  return is_writable_pte(spte);
3413 
3414  /* Fault was on Read access */
3415  return spte & PT_PRESENT_MASK;
3416 }
#define PT_PRESENT_MASK
Definition: mmu.h:14
static bool is_executable_pte(u64 spte)
Definition: spte.h:323
const bool exec
Definition: mmu_internal.h:197
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_cpuid_PSE36()

static int is_cpuid_PSE36 ( void  )
static

Definition at line 331 of file mmu.c.

332 {
333  return 1;
334 }
Here is the caller graph for this function:

◆ is_cr0_pg()

static bool is_cr0_pg ( struct kvm_mmu *  mmu)
inlinestatic

Definition at line 237 of file mmu.c.

238 {
239  return mmu->cpu_role.base.level > 0;
240 }
Here is the caller graph for this function:

◆ is_cr4_pae()

static bool is_cr4_pae ( struct kvm_mmu *  mmu)
inlinestatic

Definition at line 242 of file mmu.c.

243 {
244  return !mmu->cpu_role.base.has_4_byte_gpte;
245 }
Here is the caller graph for this function:

◆ is_obsolete_root()

static bool is_obsolete_root ( struct kvm *  kvm,
hpa_t  root_hpa 
)
static

Definition at line 5632 of file mmu.c.

5633 {
5634  struct kvm_mmu_page *sp;
5635 
5636  if (!VALID_PAGE(root_hpa))
5637  return false;
5638 
5639  /*
5640  * When freeing obsolete roots, treat roots as obsolete if they don't
5641  * have an associated shadow page, as it's impossible to determine if
5642  * such roots are fresh or stale. This does mean KVM will get false
5643  * positives and free roots that don't strictly need to be freed, but
5644  * such false positives are relatively rare:
5645  *
5646  * (a) only PAE paging and nested NPT have roots without shadow pages
5647  * (or any shadow paging flavor with a dummy root, see note below)
5648  * (b) remote reloads due to a memslot update obsoletes _all_ roots
5649  * (c) KVM doesn't track previous roots for PAE paging, and the guest
5650  * is unlikely to zap an in-use PGD.
5651  *
5652  * Note! Dummy roots are unique in that they are obsoleted by memslot
5653  * _creation_! See also FNAME(fetch).
5654  */
5655  sp = root_to_sp(root_hpa);
5656  return !sp || is_obsolete_sp(kvm, sp);
5657 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_obsolete_sp()

static bool is_obsolete_sp ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 2011 of file mmu.c.

2012 {
2013  if (sp->role.invalid)
2014  return true;
2015 
2016  /* TDP MMU pages do not use the MMU generation. */
2017  return !is_tdp_mmu_page(sp) &&
2018  unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
2019 }
static bool is_tdp_mmu_page(struct kvm_mmu_page *sp)
Definition: tdp_mmu.h:74
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_page_fault_stale()

static bool is_page_fault_stale ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 4462 of file mmu.c.

4464 {
4465  struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
4466 
4467  /* Special roots, e.g. pae_root, are not backed by shadow pages. */
4468  if (sp && is_obsolete_sp(vcpu->kvm, sp))
4469  return true;
4470 
4471  /*
4472  * Roots without an associated shadow page are considered invalid if
4473  * there is a pending request to free obsolete roots. The request is
4474  * only a hint that the current root _may_ be obsolete and needs to be
4475  * reloaded, e.g. if the guest frees a PGD that KVM is tracking as a
4476  * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs
4477  * to reload even if no vCPU is actively using the root.
4478  */
4479  if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
4480  return true;
4481 
4482  /*
4483  * Check for a relevant mmu_notifier invalidation event one last time
4484  * now that mmu_lock is held, as the "unsafe" checks performed without
4485  * holding mmu_lock can get false negatives.
4486  */
4487  return fault->slot &&
4488  mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
4489 }
unsigned long mmu_seq
Definition: mmu_internal.h:239
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_root_usable()

static bool is_root_usable ( struct kvm_mmu_root_info *  root,
gpa_t  pgd,
union kvm_mmu_page_role  role 
)
inlinestatic

Definition at line 4656 of file mmu.c.

4658 {
4659  struct kvm_mmu_page *sp;
4660 
4661  if (!VALID_PAGE(root->hpa))
4662  return false;
4663 
4664  if (!role.direct && pgd != root->pgd)
4665  return false;
4666 
4667  sp = root_to_sp(root->hpa);
4668  if (WARN_ON_ONCE(!sp))
4669  return false;
4670 
4671  return role.word == sp->role.word;
4672 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_tdp_mmu_active()

static bool is_tdp_mmu_active ( struct kvm_vcpu *  vcpu)
inlinestatic

Definition at line 640 of file mmu.c.

641 {
642  return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct;
643 }
Here is the caller graph for this function:

◆ is_unsync_root()

static bool is_unsync_root ( hpa_t  root)
static

Definition at line 3986 of file mmu.c.

3987 {
3988  struct kvm_mmu_page *sp;
3989 
3990  if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root))
3991  return false;
3992 
3993  /*
3994  * The read barrier orders the CPU's read of SPTE.W during the page table
3995  * walk before the reads of sp->unsync/sp->unsync_children here.
3996  *
3997  * Even if another CPU was marking the SP as unsync-ed simultaneously,
3998  * any guest page table changes are not guaranteed to be visible anyway
3999  * until this VCPU issues a TLB flush strictly after those changes are
4000  * made. We only need to ensure that the other CPU sets these flags
4001  * before any actual changes to the page tables are made. The comments
4002  * in mmu_try_to_unsync_pages() describe what could go wrong if this
4003  * requirement isn't satisfied.
4004  */
4005  smp_rmb();
4006  sp = root_to_sp(root);
4007 
4008  /*
4009  * PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the
4010  * PDPTEs for a given PAE root need to be synchronized individually.
4011  */
4012  if (WARN_ON_ONCE(!sp))
4013  return false;
4014 
4015  if (sp->unsync || sp->unsync_children)
4016  return true;
4017 
4018  return false;
4019 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_account_mmu_page()

static void kvm_account_mmu_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 1725 of file mmu.c.

1726 {
1727  kvm_mod_used_mmu_pages(kvm, +1);
1728  kvm_account_pgtable_pages((void *)sp->spt, +1);
1729 }
static void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
Definition: mmu.c:1719
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_age_gfn()

bool kvm_age_gfn ( struct kvm *  kvm,
struct kvm_gfn_range *  range 
)

Definition at line 1673 of file mmu.c.

1674 {
1675  bool young = false;
1676 
1677  if (kvm_memslots_have_rmaps(kvm))
1678  young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap);
1679 
1680  if (tdp_mmu_enabled)
1681  young |= kvm_tdp_mmu_age_gfn_range(kvm, range);
1682 
1683  return young;
1684 }
static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, rmap_handler_t handler)
Definition: mmu.c:1567
static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t unused)
Definition: mmu.c:1612
static bool kvm_memslots_have_rmaps(struct kvm *kvm)
Definition: mmu.h:279
bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
Definition: tdp_mmu.c:1195
Here is the call graph for this function:

◆ kvm_age_rmap()

static bool kvm_age_rmap ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  level,
pte_t  unused 
)
static

Definition at line 1612 of file mmu.c.

1615 {
1616  u64 *sptep;
1617  struct rmap_iterator iter;
1618  int young = 0;
1619 
1620  for_each_rmap_spte(rmap_head, &iter, sptep)
1621  young |= mmu_spte_age(sptep);
1622 
1623  return young;
1624 }
static bool mmu_spte_age(u64 *sptep)
Definition: mmu.c:615
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_arch_async_page_ready()

void kvm_arch_async_page_ready ( struct kvm_vcpu *  vcpu,
struct kvm_async_pf *  work 
)

Definition at line 4263 of file mmu.c.

4264 {
4265  int r;
4266 
4267  if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) ||
4268  work->wakeup_all)
4269  return;
4270 
4271  r = kvm_mmu_reload(vcpu);
4272  if (unlikely(r))
4273  return;
4274 
4275  if (!vcpu->arch.mmu->root_role.direct &&
4276  work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
4277  return;
4278 
4279  kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
4280 }
static unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
Definition: mmu.c:263
static int kvm_mmu_reload(struct kvm_vcpu *vcpu)
Definition: mmu.h:127
static int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefetch, int *emulation_type)
Definition: mmu_internal.h:282
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_arch_flush_shadow_all()

void kvm_arch_flush_shadow_all ( struct kvm *  kvm)

Definition at line 6823 of file mmu.c.

6824 {
6825  kvm_mmu_zap_all(kvm);
6826 }
static void kvm_mmu_zap_all(struct kvm *kvm)
Definition: mmu.c:6798
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_arch_flush_shadow_memslot()

void kvm_arch_flush_shadow_memslot ( struct kvm *  kvm,
struct kvm_memory_slot *  slot 
)

Definition at line 6828 of file mmu.c.

6830 {
6831  kvm_mmu_zap_all_fast(kvm);
6832 }
static void kvm_mmu_zap_all_fast(struct kvm *kvm)
Definition: mmu.c:6248
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_arch_mmu_enable_log_dirty_pt_masked()

void kvm_arch_mmu_enable_log_dirty_pt_masked ( struct kvm *  kvm,
struct kvm_memory_slot *  slot,
gfn_t  gfn_offset,
unsigned long  mask 
)

kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected PT level pages.

It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to enable dirty logging for them.

We need to care about huge page mappings: e.g. during dirty logging we may have such mappings.

Definition at line 1373 of file mmu.c.

1376 {
1377  /*
1378  * Huge pages are NOT write protected when we start dirty logging in
1379  * initially-all-set mode; must write protect them here so that they
1380  * are split to 4K on the first write.
1381  *
1382  * The gfn_offset is guaranteed to be aligned to 64, but the base_gfn
1383  * of memslot has no such restriction, so the range can cross two large
1384  * pages.
1385  */
1386  if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
1387  gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask);
1388  gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
1389 
1390  if (READ_ONCE(eager_page_split))
1391  kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K);
1392 
1393  kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
1394 
1395  /* Cross two large pages? */
1396  if (ALIGN(start << PAGE_SHIFT, PMD_SIZE) !=
1397  ALIGN(end << PAGE_SHIFT, PMD_SIZE))
1398  kvm_mmu_slot_gfn_write_protect(kvm, slot, end,
1399  PG_LEVEL_2M);
1400  }
1401 
1402  /* Now handle 4K PTEs. */
1403  if (kvm_x86_ops.cpu_dirty_log_size)
1404  kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
1405  else
1406  kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1407 }
void kvm_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, u64 start, u64 end, int target_level)
Definition: mmu.c:6654
static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask)
Definition: mmu.c:1307
static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask)
Definition: mmu.c:1340
bool __read_mostly eager_page_split
Definition: x86.c:196
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_arch_setup_async_pf()

static bool kvm_arch_setup_async_pf ( struct kvm_vcpu *  vcpu,
gpa_t  cr2_or_gpa,
gfn_t  gfn 
)
static

Definition at line 4249 of file mmu.c.

4251 {
4252  struct kvm_arch_async_pf arch;
4253 
4254  arch.token = alloc_apf_token(vcpu);
4255  arch.gfn = gfn;
4256  arch.direct_map = vcpu->arch.mmu->root_role.direct;
4257  arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
4258 
4259  return kvm_setup_async_pf(vcpu, cr2_or_gpa,
4260  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
4261 }
bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, unsigned long hva, struct kvm_arch_async_pf *arch)
Definition: async_pf.c:184
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
Definition: kvm_main.c:2748
static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
Definition: mmu.c:4238
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_available_flush_remote_tlbs_range()

static bool kvm_available_flush_remote_tlbs_range ( void  )
inlinestatic

Definition at line 272 of file mmu.c.

273 {
274 #if IS_ENABLED(CONFIG_HYPERV)
275  return kvm_x86_ops.flush_remote_tlbs_range;
276 #else
277  return false;
278 #endif
279 }
Here is the caller graph for this function:

◆ kvm_calc_cpu_role()

static union kvm_cpu_role kvm_calc_cpu_role ( struct kvm_vcpu *  vcpu,
const struct kvm_mmu_role_regs regs 
)
static

Definition at line 5237 of file mmu.c.

5246 {
5247  union kvm_cpu_role role = {0};
5248 
5249  role.base.access = ACC_ALL;
5250  role.base.smm = is_smm(vcpu);
5251  role.base.guest_mode = is_guest_mode(vcpu);
5252  role.ext.valid = 1;
5253 
5254  if (!____is_cr0_pg(regs)) {
5255  role.base.direct = 1;
5256  return role;
5257  }
5258 
5259  role.base.efer_nx = ____is_efer_nx(regs);
5260  role.base.cr0_wp = ____is_cr0_wp(regs);
5261  role.base.smep_andnot_wp = ____is_cr4_smep(regs) && !____is_cr0_wp(regs);
5262  role.base.smap_andnot_wp = ____is_cr4_smap(regs) && !____is_cr0_wp(regs);
5263  role.base.has_4_byte_gpte = !____is_cr4_pae(regs);
5264 
5265  if (____is_efer_lma(regs))
5266  role.base.level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL
5267  : PT64_ROOT_4LEVEL;
5268  else if (____is_cr4_pae(regs))
5269  role.base.level = PT32E_ROOT_LEVEL;
5270  else
5271  role.base.level = PT32_ROOT_LEVEL;
5272 
5273  role.ext.cr4_smep = ____is_cr4_smep(regs);
5274  role.ext.cr4_smap = ____is_cr4_smap(regs);
5275  role.ext.cr4_pse = ____is_cr4_pse(regs);
5276 
5277  /* PKEY and LA57 are active iff long mode is active. */
5278  role.ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
5279  role.ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
5280  role.ext.efer_lma = ____is_efer_lma(regs);
5281  return role;
5282 }
static bool is_smm(struct kvm_vcpu *vcpu)
Definition: smm.h:160
Here is the caller graph for this function:

◆ kvm_calc_shadow_ept_root_page_role()

static union kvm_cpu_role kvm_calc_shadow_ept_root_page_role ( struct kvm_vcpu *  vcpu,
bool  accessed_dirty,
bool  execonly,
u8  level 
)
static

Definition at line 5431 of file mmu.c.

5436 {
5437  union kvm_cpu_role role = {0};
5438 
5439  /*
5440  * KVM does not support SMM transfer monitors, and consequently does not
5441  * support the "entry to SMM" control either. role.base.smm is always 0.
5442  */
5443  WARN_ON_ONCE(is_smm(vcpu));
5444  role.base.level = level;
5445  role.base.has_4_byte_gpte = false;
5446  role.base.direct = false;
5447  role.base.ad_disabled = !accessed_dirty;
5448  role.base.guest_mode = true;
5449  role.base.access = ACC_ALL;
5450 
5451  role.ext.word = 0;
5452  role.ext.execonly = execonly;
5453  role.ext.valid = 1;
5454 
5455  return role;
5456 }
Here is the caller graph for this function:

◆ kvm_calc_tdp_mmu_root_page_role()

static union kvm_mmu_page_role kvm_calc_tdp_mmu_root_page_role ( struct kvm_vcpu *  vcpu,
union kvm_cpu_role  cpu_role 
)
static

Definition at line 5299 of file mmu.c.

5315 {
5316  union kvm_mmu_page_role role = {0};
5317 
5318  role.access = ACC_ALL;
5319  role.cr0_wp = true;
5320  role.efer_nx = true;
5321  role.smm = cpu_role.base.smm;
5322  role.guest_mode = cpu_role.base.guest_mode;
5323  role.ad_disabled = !kvm_ad_enabled();
5324  role.level = kvm_mmu_get_tdp_level(vcpu);
5325  role.direct = true;
5326  role.has_4_byte_gpte = false;
5327 
5328  return role;
5329 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_configure_mmu()

void kvm_configure_mmu ( bool  enable_tdp,
int  tdp_forced_root_level,
int  tdp_max_root_level,
int  tdp_huge_page_level 
)

Definition at line 6012 of file mmu.c.

6014 {
6015  tdp_enabled = enable_tdp;
6016  tdp_root_level = tdp_forced_root_level;
6017  max_tdp_level = tdp_max_root_level;
6018 
6019 #ifdef CONFIG_X86_64
6021 #endif
6022  /*
6023  * max_huge_page_level reflects KVM's MMU capabilities irrespective
6024  * of kernel support, e.g. KVM may be capable of using 1GB pages when
6025  * the kernel is not. But, KVM never creates a page size greater than
6026  * what is used by the kernel for any given HVA, i.e. the kernel's
6027  * capabilities are ultimately consulted by kvm_mmu_hugepage_adjust().
6028  */
6029  if (tdp_enabled)
6030  max_huge_page_level = tdp_huge_page_level;
6031  else if (boot_cpu_has(X86_FEATURE_GBPAGES))
6032  max_huge_page_level = PG_LEVEL_1G;
6033  else
6034  max_huge_page_level = PG_LEVEL_2M;
6035 }
static bool __ro_after_init tdp_mmu_allowed
Definition: mmu.c:108
Here is the caller graph for this function:

◆ kvm_cpu_dirty_log_size()

int kvm_cpu_dirty_log_size ( void  )

Definition at line 1409 of file mmu.c.

1410 {
1411  return kvm_x86_ops.cpu_dirty_log_size;
1412 }

◆ kvm_faultin_pfn()

static int kvm_faultin_pfn ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault,
unsigned int  access 
)
static

Definition at line 4400 of file mmu.c.

4402 {
4403  int ret;
4404 
4405  fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
4406  smp_rmb();
4407 
4408  /*
4409  * Check for a relevant mmu_notifier invalidation event before getting
4410  * the pfn from the primary MMU, and before acquiring mmu_lock.
4411  *
4412  * For mmu_lock, if there is an in-progress invalidation and the kernel
4413  * allows preemption, the invalidation task may drop mmu_lock and yield
4414  * in response to mmu_lock being contended, which is *very* counter-
4415  * productive as this vCPU can't actually make forward progress until
4416  * the invalidation completes.
4417  *
4418  * Retrying now can also avoid unnessary lock contention in the primary
4419  * MMU, as the primary MMU doesn't necessarily hold a single lock for
4420  * the duration of the invalidation, i.e. faulting in a conflicting pfn
4421  * can cause the invalidation to take longer by holding locks that are
4422  * needed to complete the invalidation.
4423  *
4424  * Do the pre-check even for non-preemtible kernels, i.e. even if KVM
4425  * will never yield mmu_lock in response to contention, as this vCPU is
4426  * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held
4427  * to detect retry guarantees the worst case latency for the vCPU.
4428  */
4429  if (fault->slot &&
4430  mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
4431  return RET_PF_RETRY;
4432 
4433  ret = __kvm_faultin_pfn(vcpu, fault);
4434  if (ret != RET_PF_CONTINUE)
4435  return ret;
4436 
4437  if (unlikely(is_error_pfn(fault->pfn)))
4438  return kvm_handle_error_pfn(vcpu, fault);
4439 
4440  if (unlikely(!fault->slot))
4441  return kvm_handle_noslot_fault(vcpu, fault, access);
4442 
4443  /*
4444  * Check again for a relevant mmu_notifier invalidation event purely to
4445  * avoid contending mmu_lock. Most invalidations will be detected by
4446  * the previous check, but checking is extremely cheap relative to the
4447  * overall cost of failing to detect the invalidation until after
4448  * mmu_lock is acquired.
4449  */
4450  if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) {
4451  kvm_release_pfn_clean(fault->pfn);
4452  return RET_PF_RETRY;
4453  }
4454 
4455  return RET_PF_CONTINUE;
4456 }
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4331
static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:3288
static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access)
Definition: mmu.c:3311
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_faultin_pfn_private()

static int kvm_faultin_pfn_private ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 4307 of file mmu.c.

4309 {
4310  int max_order, r;
4311 
4312  if (!kvm_slot_can_be_private(fault->slot)) {
4313  kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
4314  return -EFAULT;
4315  }
4316 
4317  r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
4318  &max_order);
4319  if (r) {
4320  kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
4321  return r;
4322  }
4323 
4324  fault->max_level = min(kvm_max_level_for_order(max_order),
4325  fault->max_level);
4326  fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
4327 
4328  return RET_PF_CONTINUE;
4329 }
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
Definition: guest_memfd.c:485
static u8 kvm_max_level_for_order(int order)
Definition: mmu.c:4282
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_flush_remote_tlbs_sptep()

static void kvm_flush_remote_tlbs_sptep ( struct kvm *  kvm,
u64 *  sptep 
)
static

Definition at line 284 of file mmu.c.

285 {
286  struct kvm_mmu_page *sp = sptep_to_sp(sptep);
287  gfn_t gfn = kvm_mmu_page_get_gfn(sp, spte_index(sptep));
288 
289  kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level);
290 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_handle_error_pfn()

static int kvm_handle_error_pfn ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 3288 of file mmu.c.

3289 {
3290  if (is_sigpending_pfn(fault->pfn)) {
3291  kvm_handle_signal_exit(vcpu);
3292  return -EINTR;
3293  }
3294 
3295  /*
3296  * Do not cache the mmio info caused by writing the readonly gfn
3297  * into the spte otherwise read access on readonly gfn also can
3298  * caused mmio page fault and treat it as mmio access.
3299  */
3300  if (fault->pfn == KVM_PFN_ERR_RO_FAULT)
3301  return RET_PF_EMULATE;
3302 
3303  if (fault->pfn == KVM_PFN_ERR_HWPOISON) {
3304  kvm_send_hwpoison_signal(fault->slot, fault->gfn);
3305  return RET_PF_RETRY;
3306  }
3307 
3308  return -EFAULT;
3309 }
static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn)
Definition: mmu.c:3281
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_handle_gfn_range()

static __always_inline bool kvm_handle_gfn_range ( struct kvm *  kvm,
struct kvm_gfn_range *  range,
rmap_handler_t  handler 
)
static

Definition at line 1567 of file mmu.c.

1570 {
1571  struct slot_rmap_walk_iterator iterator;
1572  bool ret = false;
1573 
1574  for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
1575  range->start, range->end - 1, &iterator)
1576  ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
1577  iterator.level, range->arg.pte);
1578 
1579  return ret;
1580 }
Here is the caller graph for this function:

◆ kvm_handle_noslot_fault()

static int kvm_handle_noslot_fault ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault,
unsigned int  access 
)
static

Definition at line 3311 of file mmu.c.

3314 {
3315  gva_t gva = fault->is_tdp ? 0 : fault->addr;
3316 
3317  vcpu_cache_mmio_info(vcpu, gva, fault->gfn,
3318  access & shadow_mmio_access_mask);
3319 
3320  /*
3321  * If MMIO caching is disabled, emulate immediately without
3322  * touching the shadow page tables as attempting to install an
3323  * MMIO SPTE will just be an expensive nop.
3324  */
3325  if (unlikely(!enable_mmio_caching))
3326  return RET_PF_EMULATE;
3327 
3328  /*
3329  * Do not create an MMIO SPTE for a gfn greater than host.MAXPHYADDR,
3330  * any guest that generates such gfns is running nested and is being
3331  * tricked by L0 userspace (you can observe gfn > L1.MAXPHYADDR if and
3332  * only if L1's MAXPHYADDR is inaccurate with respect to the
3333  * hardware's).
3334  */
3335  if (unlikely(fault->gfn > kvm_mmu_max_gfn()))
3336  return RET_PF_EMULATE;
3337 
3338  return RET_PF_CONTINUE;
3339 }
static gfn_t kvm_mmu_max_gfn(void)
Definition: mmu.h:66
bool __read_mostly enable_mmio_caching
Definition: spte.c:22
const bool is_tdp
Definition: mmu_internal.h:204
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_handle_page_fault()

int kvm_handle_page_fault ( struct kvm_vcpu *  vcpu,
u64  error_code,
u64  fault_address,
char *  insn,
int  insn_len 
)

Definition at line 4540 of file mmu.c.

4542 {
4543  int r = 1;
4544  u32 flags = vcpu->arch.apf.host_apf_flags;
4545 
4546 #ifndef CONFIG_X86_64
4547  /* A 64-bit CR2 should be impossible on 32-bit KVM. */
4548  if (WARN_ON_ONCE(fault_address >> 32))
4549  return -EFAULT;
4550 #endif
4551 
4552  vcpu->arch.l1tf_flush_l1d = true;
4553  if (!flags) {
4554  trace_kvm_page_fault(vcpu, fault_address, error_code);
4555 
4556  if (kvm_event_needs_reinjection(vcpu))
4557  kvm_mmu_unprotect_page_virt(vcpu, fault_address);
4558  r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
4559  insn_len);
4560  } else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
4561  vcpu->arch.apf.host_apf_flags = 0;
4562  local_irq_disable();
4563  kvm_async_pf_task_wait_schedule(fault_address);
4564  local_irq_enable();
4565  } else {
4566  WARN_ONCE(1, "Unexpected host async PF flags: %x\n", flags);
4567  }
4568 
4569  return r;
4570 }
int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len)
Definition: mmu.c:5830
static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
Definition: mmu.c:2775
static bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
Definition: x86.h:127
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_has_zapped_obsolete_pages()

static bool kvm_has_zapped_obsolete_pages ( struct kvm *  kvm)
static

Definition at line 6299 of file mmu.c.

6300 {
6301  return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
6302 }
Here is the caller graph for this function:

◆ kvm_init_mmu()

void kvm_init_mmu ( struct kvm_vcpu *  vcpu)

Definition at line 5538 of file mmu.c.

5539 {
5540  struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu);
5541  union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, &regs);
5542 
5543  if (mmu_is_nested(vcpu))
5544  init_kvm_nested_mmu(vcpu, cpu_role);
5545  else if (tdp_enabled)
5546  init_kvm_tdp_mmu(vcpu, cpu_role);
5547  else
5548  init_kvm_softmmu(vcpu, cpu_role);
5549 }
static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
Definition: mmu.c:5331
static void init_kvm_softmmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role)
Definition: mmu.c:5487
static union kvm_cpu_role kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
Definition: mmu.c:5244
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role new_mode)
Definition: mmu.c:5499
static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
Definition: mmu.c:247
static bool mmu_is_nested(struct kvm_vcpu *vcpu)
Definition: x86.h:183
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_init_shadow_ept_mmu()

void kvm_init_shadow_ept_mmu ( struct kvm_vcpu *  vcpu,
bool  execonly,
int  huge_page_level,
bool  accessed_dirty,
gpa_t  new_eptp 
)

Definition at line 5458 of file mmu.c.

5461 {
5462  struct kvm_mmu *context = &vcpu->arch.guest_mmu;
5463  u8 level = vmx_eptp_page_walk_level(new_eptp);
5464  union kvm_cpu_role new_mode =
5465  kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
5466  execonly, level);
5467 
5468  if (new_mode.as_u64 != context->cpu_role.as_u64) {
5469  /* EPT, and thus nested EPT, does not consume CR0, CR4, nor EFER. */
5470  context->cpu_role.as_u64 = new_mode.as_u64;
5471  context->root_role.word = new_mode.base.word;
5472 
5473  context->page_fault = ept_page_fault;
5474  context->gva_to_gpa = ept_gva_to_gpa;
5475  context->sync_spte = ept_sync_spte;
5476 
5477  update_permission_bitmask(context, true);
5478  context->pkru_mask = 0;
5479  reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level);
5480  reset_ept_shadow_zero_bits_mask(context, execonly);
5481  }
5482 
5483  kvm_mmu_new_pgd(vcpu, new_eptp);
5484 }
static void reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
Definition: mmu.c:5062
static union kvm_cpu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly, u8 level)
Definition: mmu.c:5434
static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
Definition: mmu.c:5079
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
Definition: mmu.c:4753
static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly, int huge_page_level)
Definition: mmu.c:4966
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_init_shadow_mmu()

static void kvm_init_shadow_mmu ( struct kvm_vcpu *  vcpu,
union kvm_cpu_role  cpu_role 
)
static

Definition at line 5382 of file mmu.c.

5384 {
5385  struct kvm_mmu *context = &vcpu->arch.root_mmu;
5386  union kvm_mmu_page_role root_role;
5387 
5388  root_role = cpu_role.base;
5389 
5390  /* KVM uses PAE paging whenever the guest isn't using 64-bit paging. */
5391  root_role.level = max_t(u32, root_role.level, PT32E_ROOT_LEVEL);
5392 
5393  /*
5394  * KVM forces EFER.NX=1 when TDP is disabled, reflect it in the MMU role.
5395  * KVM uses NX when TDP is disabled to handle a variety of scenarios,
5396  * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and
5397  * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0.
5398  * The iTLB multi-hit workaround can be toggled at any time, so assume
5399  * NX can be used by any non-nested shadow MMU to avoid having to reset
5400  * MMU contexts.
5401  */
5402  root_role.efer_nx = true;
5403 
5404  shadow_mmu_init_context(vcpu, context, cpu_role, root_role);
5405 }
static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context, union kvm_cpu_role cpu_role, union kvm_mmu_page_role root_role)
Definition: mmu.c:5360
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_init_shadow_npt_mmu()

void kvm_init_shadow_npt_mmu ( struct kvm_vcpu *  vcpu,
unsigned long  cr0,
unsigned long  cr4,
u64  efer,
gpa_t  nested_cr3 
)

Definition at line 5407 of file mmu.c.

5409 {
5410  struct kvm_mmu *context = &vcpu->arch.guest_mmu;
5411  struct kvm_mmu_role_regs regs = {
5412  .cr0 = cr0,
5413  .cr4 = cr4 & ~X86_CR4_PKE,
5414  .efer = efer,
5415  };
5416  union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, &regs);
5417  union kvm_mmu_page_role root_role;
5418 
5419  /* NPT requires CR0.PG=1. */
5420  WARN_ON_ONCE(cpu_role.base.direct);
5421 
5422  root_role = cpu_role.base;
5423  root_role.level = kvm_mmu_get_tdp_level(vcpu);
5424  if (root_role.level == PT64_ROOT_5LEVEL &&
5425  cpu_role.base.level == PT64_ROOT_4LEVEL)
5426  root_role.passthrough = 1;
5427 
5428  shadow_mmu_init_context(vcpu, context, cpu_role, root_role);
5429  kvm_mmu_new_pgd(vcpu, nested_cr3);
5430 }
const unsigned long cr4
Definition: mmu.c:188
const u64 efer
Definition: mmu.c:189
const unsigned long cr0
Definition: mmu.c:187
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_max_level_for_order()

static u8 kvm_max_level_for_order ( int  order)
inlinestatic

Definition at line 4282 of file mmu.c.

4283 {
4284  BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
4285 
4286  KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
4287  order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
4288  order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
4289 
4290  if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
4291  return PG_LEVEL_1G;
4292 
4293  if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
4294  return PG_LEVEL_2M;
4295 
4296  return PG_LEVEL_4K;
4297 }
#define KVM_MMU_WARN_ON(x)
Definition: mmu_internal.h:12
Here is the caller graph for this function:

◆ kvm_mmu_after_set_cpuid()

void kvm_mmu_after_set_cpuid ( struct kvm_vcpu *  vcpu)

Definition at line 5552 of file mmu.c.

5553 {
5554  /*
5555  * Invalidate all MMU roles to force them to reinitialize as CPUID
5556  * information is factored into reserved bit calculations.
5557  *
5558  * Correctly handling multiple vCPU models with respect to paging and
5559  * physical address properties) in a single VM would require tracking
5560  * all relevant CPUID information in kvm_mmu_page_role. That is very
5561  * undesirable as it would increase the memory requirements for
5562  * gfn_write_track (see struct kvm_mmu_page_role comments). For now
5563  * that problem is swept under the rug; KVM's CPUID API is horrific and
5564  * it's all but impossible to solve it without introducing a new API.
5565  */
5566  vcpu->arch.root_mmu.root_role.word = 0;
5567  vcpu->arch.guest_mmu.root_role.word = 0;
5568  vcpu->arch.nested_mmu.root_role.word = 0;
5569  vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
5570  vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
5571  vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
5572  kvm_mmu_reset_context(vcpu);
5573 
5574  /*
5575  * Changing guest CPUID after KVM_RUN is forbidden, see the comment in
5576  * kvm_arch_vcpu_ioctl().
5577  */
5578  KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm);
5579 }
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
Definition: mmu.c:5581
static bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
Definition: x86.h:95
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_alloc_shadow_page()

static struct kvm_mmu_page* kvm_mmu_alloc_shadow_page ( struct kvm *  kvm,
struct shadow_page_caches caches,
gfn_t  gfn,
struct hlist_head *  sp_list,
union kvm_mmu_page_role  role 
)
static

Definition at line 2236 of file mmu.c.

2241 {
2242  struct kvm_mmu_page *sp;
2243 
2244  sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
2245  sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
2246  if (!role.direct)
2247  sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
2248 
2249  set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
2250 
2251  INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
2252 
2253  /*
2254  * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
2255  * depends on valid pages being added to the head of the list. See
2256  * comments in kvm_zap_obsolete_pages().
2257  */
2258  sp->mmu_valid_gen = kvm->arch.mmu_valid_gen;
2259  list_add(&sp->link, &kvm->arch.active_mmu_pages);
2260  kvm_account_mmu_page(kvm, sp);
2261 
2262  sp->gfn = gfn;
2263  sp->role = role;
2264  hlist_add_head(&sp->hash_link, sp_list);
2265  if (sp_has_gptes(sp))
2266  account_shadowed(kvm, sp);
2267 
2268  return sp;
2269 }
static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:1725
static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:827
struct hlist_node hash_link
Definition: mmu_internal.h:58
struct list_head possible_nx_huge_page_link
Definition: mmu_internal.h:118
u64 * shadowed_translation
Definition: mmu_internal.h:97
struct kvm_mmu_memory_cache * shadow_page_cache
Definition: mmu.c:2232
struct kvm_mmu_memory_cache * page_header_cache
Definition: mmu.c:2231
struct kvm_mmu_memory_cache * shadowed_info_cache
Definition: mmu.c:2233
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_available_pages()

static unsigned long kvm_mmu_available_pages ( struct kvm *  kvm)
inlinestatic

Definition at line 2705 of file mmu.c.

2706 {
2707  if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
2708  return kvm->arch.n_max_mmu_pages -
2709  kvm->arch.n_used_mmu_pages;
2710 
2711  return 0;
2712 }
Here is the caller graph for this function:

◆ kvm_mmu_change_mmu_pages()

void kvm_mmu_change_mmu_pages ( struct kvm *  kvm,
unsigned long  goal_nr_mmu_pages 
)

Definition at line 2741 of file mmu.c.

2742 {
2743  write_lock(&kvm->mmu_lock);
2744 
2745  if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
2746  kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages -
2747  goal_nr_mmu_pages);
2748 
2749  goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
2750  }
2751 
2752  kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
2753 
2754  write_unlock(&kvm->mmu_lock);
2755 }
static unsigned long kvm_mmu_zap_oldest_mmu_pages(struct kvm *kvm, unsigned long nr_to_zap)
Definition: mmu.c:2668
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_check_sptes_at_free()

static void kvm_mmu_check_sptes_at_free ( struct kvm_mmu_page sp)
static

Definition at line 1699 of file mmu.c.

1700 {
1701 #ifdef CONFIG_KVM_PROVE_MMU
1702  int i;
1703 
1704  for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
1706  pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free",
1707  sp->spt[i], &sp->spt[i],
1708  kvm_mmu_page_get_gfn(sp, i));
1709  }
1710 #endif
1711 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_child_role()

static union kvm_mmu_page_role kvm_mmu_child_role ( u64 *  sptep,
bool  direct,
unsigned int  access 
)
static

Definition at line 2294 of file mmu.c.

2309 {
2310  struct kvm_mmu_page *parent_sp = sptep_to_sp(sptep);
2311  union kvm_mmu_page_role role;
2312 
2313  role = parent_sp->role;
2314  role.level--;
2315  role.access = access;
2316  role.direct = direct;
2317  role.passthrough = 0;
2318 
2319  /*
2320  * If the guest has 4-byte PTEs then that means it's using 32-bit,
2321  * 2-level, non-PAE paging. KVM shadows such guests with PAE paging
2322  * (i.e. 8-byte PTEs). The difference in PTE size means that KVM must
2323  * shadow each guest page table with multiple shadow page tables, which
2324  * requires extra bookkeeping in the role.
2325  *
2326  * Specifically, to shadow the guest's page directory (which covers a
2327  * 4GiB address space), KVM uses 4 PAE page directories, each mapping
2328  * 1GiB of the address space. @role.quadrant encodes which quarter of
2329  * the address space each maps.
2330  *
2331  * To shadow the guest's page tables (which each map a 4MiB region), KVM
2332  * uses 2 PAE page tables, each mapping a 2MiB region. For these,
2333  * @role.quadrant encodes which half of the region they map.
2334  *
2335  * Concretely, a 4-byte PDE consumes bits 31:22, while an 8-byte PDE
2336  * consumes bits 29:21. To consume bits 31:30, KVM's uses 4 shadow
2337  * PDPTEs; those 4 PAE page directories are pre-allocated and their
2338  * quadrant is assigned in mmu_alloc_root(). A 4-byte PTE consumes
2339  * bits 21:12, while an 8-byte PTE consumes bits 20:12. To consume
2340  * bit 21 in the PTE (the child here), KVM propagates that bit to the
2341  * quadrant, i.e. sets quadrant to '0' or '1'. The parent 8-byte PDE
2342  * covers bit 21 (see above), thus the quadrant is calculated from the
2343  * _least_ significant bit of the PDE index.
2344  */
2345  if (role.has_4_byte_gpte) {
2346  WARN_ON_ONCE(role.level != PG_LEVEL_4K);
2347  role.quadrant = spte_index(sptep) & 1;
2348  }
2349 
2350  return role;
2351 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_clear_dirty_pt_masked()

static void kvm_mmu_clear_dirty_pt_masked ( struct kvm *  kvm,
struct kvm_memory_slot *  slot,
gfn_t  gfn_offset,
unsigned long  mask 
)
static

kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write protect the page if the D-bit isn't supported. @kvm: kvm instance @slot: slot to clear D-bit @gfn_offset: start of the BITS_PER_LONG pages we care about @mask: indicates which pages we should clear D-bit

Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.

Definition at line 1340 of file mmu.c.

1343 {
1344  struct kvm_rmap_head *rmap_head;
1345 
1346  if (tdp_mmu_enabled)
1348  slot->base_gfn + gfn_offset, mask, false);
1349 
1350  if (!kvm_memslots_have_rmaps(kvm))
1351  return;
1352 
1353  while (mask) {
1354  rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
1355  PG_LEVEL_4K, slot);
1356  __rmap_clear_dirty(kvm, rmap_head, slot);
1357 
1358  /* clear the first set bit */
1359  mask &= mask - 1;
1360  }
1361 }
static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
Definition: mmu.c:1282
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, unsigned long mask, bool wrprot)
Definition: tdp_mmu.c:1629
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_commit_zap_page()

static void kvm_mmu_commit_zap_page ( struct kvm *  kvm,
struct list_head *  invalid_list 
)
static

Definition at line 2643 of file mmu.c.

2645 {
2646  struct kvm_mmu_page *sp, *nsp;
2647 
2648  if (list_empty(invalid_list))
2649  return;
2650 
2651  /*
2652  * We need to make sure everyone sees our modifications to
2653  * the page tables and see changes to vcpu->mode here. The barrier
2654  * in the kvm_flush_remote_tlbs() achieves this. This pairs
2655  * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end.
2656  *
2657  * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit
2658  * guest mode and/or lockless shadow page table walks.
2659  */
2660  kvm_flush_remote_tlbs(kvm);
2661 
2662  list_for_each_entry_safe(sp, nsp, invalid_list, link) {
2663  WARN_ON_ONCE(!sp->role.invalid || sp->root_count);
2665  }
2666 }
void kvm_flush_remote_tlbs(struct kvm *kvm)
Definition: kvm_main.c:346
static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
Definition: mmu.c:1737
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_create()

int kvm_mmu_create ( struct kvm_vcpu *  vcpu)

Definition at line 6153 of file mmu.c.

6154 {
6155  int ret;
6156 
6157  vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache;
6158  vcpu->arch.mmu_pte_list_desc_cache.gfp_zero = __GFP_ZERO;
6159 
6160  vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
6161  vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
6162 
6163  vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
6164 
6165  vcpu->arch.mmu = &vcpu->arch.root_mmu;
6166  vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
6167 
6168  ret = __kvm_mmu_create(vcpu, &vcpu->arch.guest_mmu);
6169  if (ret)
6170  return ret;
6171 
6172  ret = __kvm_mmu_create(vcpu, &vcpu->arch.root_mmu);
6173  if (ret)
6174  goto fail_allocate_root;
6175 
6176  return ret;
6177  fail_allocate_root:
6178  free_mmu_pages(&vcpu->arch.guest_mmu);
6179  return ret;
6180 }
struct kmem_cache * mmu_page_header_cache
Definition: mmu.c:181
static struct kmem_cache * pte_list_desc_cache
Definition: mmu.c:180
static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
Definition: mmu.c:6100
static void free_mmu_pages(struct kvm_mmu *mmu)
Definition: mmu.c:6091
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_destroy()

void kvm_mmu_destroy ( struct kvm_vcpu *  vcpu)

Definition at line 7076 of file mmu.c.

7077 {
7078  kvm_mmu_unload(vcpu);
7079  free_mmu_pages(&vcpu->arch.root_mmu);
7080  free_mmu_pages(&vcpu->arch.guest_mmu);
7081  mmu_free_memory_caches(vcpu);
7082 }
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
Definition: mmu.c:5621
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
Definition: mmu.c:702
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_find_shadow_page()

static struct kvm_mmu_page* kvm_mmu_find_shadow_page ( struct kvm *  kvm,
struct kvm_vcpu *  vcpu,
gfn_t  gfn,
struct hlist_head *  sp_list,
union kvm_mmu_page_role  role 
)
static

Definition at line 2151 of file mmu.c.

2156 {
2157  struct kvm_mmu_page *sp;
2158  int ret;
2159  int collisions = 0;
2160  LIST_HEAD(invalid_list);
2161 
2162  for_each_valid_sp(kvm, sp, sp_list) {
2163  if (sp->gfn != gfn) {
2164  collisions++;
2165  continue;
2166  }
2167 
2168  if (sp->role.word != role.word) {
2169  /*
2170  * If the guest is creating an upper-level page, zap
2171  * unsync pages for the same gfn. While it's possible
2172  * the guest is using recursive page tables, in all
2173  * likelihood the guest has stopped using the unsync
2174  * page and is installing a completely unrelated page.
2175  * Unsync pages must not be left as is, because the new
2176  * upper-level page will be write-protected.
2177  */
2178  if (role.level > PG_LEVEL_4K && sp->unsync)
2179  kvm_mmu_prepare_zap_page(kvm, sp,
2180  &invalid_list);
2181  continue;
2182  }
2183 
2184  /* unsync and write-flooding only apply to indirect SPs. */
2185  if (sp->role.direct)
2186  goto out;
2187 
2188  if (sp->unsync) {
2189  if (KVM_BUG_ON(!vcpu, kvm))
2190  break;
2191 
2192  /*
2193  * The page is good, but is stale. kvm_sync_page does
2194  * get the latest guest state, but (unlike mmu_unsync_children)
2195  * it doesn't write-protect the page or mark it synchronized!
2196  * This way the validity of the mapping is ensured, but the
2197  * overhead of write protection is not incurred until the
2198  * guest invalidates the TLB mapping. This allows multiple
2199  * SPs for a single gfn to be unsync.
2200  *
2201  * If the sync fails, the page is zapped. If so, break
2202  * in order to rebuild it.
2203  */
2204  ret = kvm_sync_page(vcpu, sp, &invalid_list);
2205  if (ret < 0)
2206  break;
2207 
2208  WARN_ON_ONCE(!list_empty(&invalid_list));
2209  if (ret > 0)
2210  kvm_flush_remote_tlbs(kvm);
2211  }
2212 
2214 
2215  goto out;
2216  }
2217 
2218  sp = NULL;
2219  ++kvm->stat.mmu_cache_miss;
2220 
2221 out:
2222  kvm_mmu_commit_zap_page(kvm, &invalid_list);
2223 
2224  if (collisions > kvm->stat.max_mmu_page_hash_collisions)
2225  kvm->stat.max_mmu_page_hash_collisions = collisions;
2226  return sp;
2227 }
LIST_HEAD(vm_list)
static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list)
Definition: mmu.c:1987
static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list)
Definition: mmu.c:2634
static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list)
Definition: mmu.c:2643
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_free_guest_mode_roots()

void kvm_mmu_free_guest_mode_roots ( struct kvm *  kvm,
struct kvm_mmu *  mmu 
)

Definition at line 3643 of file mmu.c.

3644 {
3645  unsigned long roots_to_free = 0;
3646  struct kvm_mmu_page *sp;
3647  hpa_t root_hpa;
3648  int i;
3649 
3650  /*
3651  * This should not be called while L2 is active, L2 can't invalidate
3652  * _only_ its own roots, e.g. INVVPID unconditionally exits.
3653  */
3654  WARN_ON_ONCE(mmu->root_role.guest_mode);
3655 
3656  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
3657  root_hpa = mmu->prev_roots[i].hpa;
3658  if (!VALID_PAGE(root_hpa))
3659  continue;
3660 
3661  sp = root_to_sp(root_hpa);
3662  if (!sp || sp->role.guest_mode)
3663  roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
3664  }
3665 
3666  kvm_mmu_free_roots(kvm, mmu, roots_to_free);
3667 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_free_obsolete_roots()

void kvm_mmu_free_obsolete_roots ( struct kvm_vcpu *  vcpu)

Definition at line 5676 of file mmu.c.

5677 {
5678  __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu);
5679  __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
5680 }
static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu)
Definition: mmu.c:5659
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_free_roots()

void kvm_mmu_free_roots ( struct kvm *  kvm,
struct kvm_mmu *  mmu,
ulong  roots_to_free 
)

Definition at line 3587 of file mmu.c.

3589 {
3590  int i;
3591  LIST_HEAD(invalid_list);
3592  bool free_active_root;
3593 
3594  WARN_ON_ONCE(roots_to_free & ~KVM_MMU_ROOTS_ALL);
3595 
3596  BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
3597 
3598  /* Before acquiring the MMU lock, see if we need to do any real work. */
3599  free_active_root = (roots_to_free & KVM_MMU_ROOT_CURRENT)
3600  && VALID_PAGE(mmu->root.hpa);
3601 
3602  if (!free_active_root) {
3603  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
3604  if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
3605  VALID_PAGE(mmu->prev_roots[i].hpa))
3606  break;
3607 
3608  if (i == KVM_MMU_NUM_PREV_ROOTS)
3609  return;
3610  }
3611 
3612  write_lock(&kvm->mmu_lock);
3613 
3614  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
3615  if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
3616  mmu_free_root_page(kvm, &mmu->prev_roots[i].hpa,
3617  &invalid_list);
3618 
3619  if (free_active_root) {
3620  if (kvm_mmu_is_dummy_root(mmu->root.hpa)) {
3621  /* Nothing to cleanup for dummy roots. */
3622  } else if (root_to_sp(mmu->root.hpa)) {
3623  mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
3624  } else if (mmu->pae_root) {
3625  for (i = 0; i < 4; ++i) {
3626  if (!IS_VALID_PAE_ROOT(mmu->pae_root[i]))
3627  continue;
3628 
3629  mmu_free_root_page(kvm, &mmu->pae_root[i],
3630  &invalid_list);
3631  mmu->pae_root[i] = INVALID_PAE_ROOT;
3632  }
3633  }
3634  mmu->root.hpa = INVALID_PAGE;
3635  mmu->root.pgd = 0;
3636  }
3637 
3638  kvm_mmu_commit_zap_page(kvm, &invalid_list);
3639  write_unlock(&kvm->mmu_lock);
3640 }
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, struct list_head *invalid_list)
Definition: mmu.c:3566
#define IS_VALID_PAE_ROOT(x)
Definition: mmu_internal.h:38
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_free_shadow_page()

static void kvm_mmu_free_shadow_page ( struct kvm_mmu_page sp)
static

Definition at line 1737 of file mmu.c.

1738 {
1740 
1741  hlist_del(&sp->hash_link);
1742  list_del(&sp->link);
1743  free_page((unsigned long)sp->spt);
1744  if (!sp->role.direct)
1745  free_page((unsigned long)sp->shadowed_translation);
1746  kmem_cache_free(mmu_page_header_cache, sp);
1747 }
static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
Definition: mmu.c:1699
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_get_child_sp()

static struct kvm_mmu_page* kvm_mmu_get_child_sp ( struct kvm_vcpu *  vcpu,
u64 *  sptep,
gfn_t  gfn,
bool  direct,
unsigned int  access 
)
static

Definition at line 2353 of file mmu.c.

2356 {
2357  union kvm_mmu_page_role role;
2358 
2359  if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
2360  return ERR_PTR(-EEXIST);
2361 
2362  role = kvm_mmu_child_role(sptep, direct, access);
2363  return kvm_mmu_get_shadow_page(vcpu, gfn, role);
2364 }
static struct kvm_mmu_page * kvm_mmu_get_shadow_page(struct kvm_vcpu *vcpu, gfn_t gfn, union kvm_mmu_page_role role)
Definition: mmu.c:2294
static union kvm_mmu_page_role kvm_mmu_child_role(u64 *sptep, bool direct, unsigned int access)
Definition: mmu.c:2307
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_get_guest_pgd()

static unsigned long kvm_mmu_get_guest_pgd ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  mmu 
)
inlinestatic

Definition at line 263 of file mmu.c.

265 {
266  if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3)
267  return kvm_read_cr3(vcpu);
268 
269  return mmu->get_guest_pgd(vcpu);
270 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_get_shadow_page()

static struct kvm_mmu_page* kvm_mmu_get_shadow_page ( struct kvm_vcpu *  vcpu,
gfn_t  gfn,
union kvm_mmu_page_role  role 
)
static

Definition at line 2294 of file mmu.c.

2297 {
2298  struct shadow_page_caches caches = {
2299  .page_header_cache = &vcpu->arch.mmu_page_header_cache,
2300  .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache,
2301  .shadowed_info_cache = &vcpu->arch.mmu_shadowed_info_cache,
2302  };
2303 
2304  return __kvm_mmu_get_shadow_page(vcpu->kvm, vcpu, &caches, gfn, role);
2305 }
static struct kvm_mmu_page * __kvm_mmu_get_shadow_page(struct kvm *kvm, struct kvm_vcpu *vcpu, struct shadow_page_caches *caches, gfn_t gfn, union kvm_mmu_page_role role)
Definition: mmu.c:2272
Here is the caller graph for this function:

◆ kvm_mmu_get_tdp_level()

static int kvm_mmu_get_tdp_level ( struct kvm_vcpu *  vcpu)
inlinestatic

Definition at line 5299 of file mmu.c.

5300 {
5301  /* tdp_root_level is architecture forced level, use it if nonzero */
5302  if (tdp_root_level)
5303  return tdp_root_level;
5304 
5305  /* Use 5-level TDP if and only if it's useful/necessary. */
5306  if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
5307  return 4;
5308 
5309  return max_tdp_level;
5310 }
static int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
Definition: cpuid.h:40
Here is the caller graph for this function:

◆ kvm_mmu_gfn_allow_lpage()

void kvm_mmu_gfn_allow_lpage ( const struct kvm_memory_slot *  slot,
gfn_t  gfn 
)

Definition at line 822 of file mmu.c.

823 {
824  update_gfn_disallow_lpage_count(slot, gfn, -1);
825 }
static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot, gfn_t gfn, int count)
Definition: mmu.c:802
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_gfn_disallow_lpage()

void kvm_mmu_gfn_disallow_lpage ( const struct kvm_memory_slot *  slot,
gfn_t  gfn 
)

Definition at line 817 of file mmu.c.

818 {
819  update_gfn_disallow_lpage_count(slot, gfn, 1);
820 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_hugepage_adjust()

void kvm_mmu_hugepage_adjust ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)

Definition at line 3180 of file mmu.c.

3181 {
3182  struct kvm_memory_slot *slot = fault->slot;
3183  kvm_pfn_t mask;
3184 
3185  fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled;
3186 
3187  if (unlikely(fault->max_level == PG_LEVEL_4K))
3188  return;
3189 
3190  if (is_error_noslot_pfn(fault->pfn))
3191  return;
3192 
3193  if (kvm_slot_dirty_track_enabled(slot))
3194  return;
3195 
3196  /*
3197  * Enforce the iTLB multihit workaround after capturing the requested
3198  * level, which will be used to do precise, accurate accounting.
3199  */
3200  fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
3201  fault->gfn, fault->max_level,
3202  fault->is_private);
3203  if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
3204  return;
3205 
3206  /*
3207  * mmu_invalidate_retry() was successful and mmu_lock is held, so
3208  * the pmd can't be split from under us.
3209  */
3210  fault->goal_level = fault->req_level;
3211  mask = KVM_PAGES_PER_HPAGE(fault->goal_level) - 1;
3212  VM_BUG_ON((fault->gfn & mask) != (fault->pfn & mask));
3213  fault->pfn &= ~mask;
3214 }
static int __kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, int max_level, bool is_private)
Definition: mmu.c:3146
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_init_vm()

void kvm_mmu_init_vm ( struct kvm *  kvm)

Definition at line 6304 of file mmu.c.

6305 {
6306  INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6307  INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
6308  INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
6309  spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
6310 
6311  if (tdp_mmu_enabled)
6312  kvm_mmu_init_tdp_mmu(kvm);
6313 
6314  kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
6315  kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
6316 
6317  kvm->arch.split_shadow_page_cache.gfp_zero = __GFP_ZERO;
6318 
6319  kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
6320  kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO;
6321 }
void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
Definition: tdp_mmu.c:15
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_invalidate_addr()

void kvm_mmu_invalidate_addr ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  mmu,
u64  addr,
unsigned long  roots 
)

Definition at line 5939 of file mmu.c.

5941 {
5942  int i;
5943 
5944  WARN_ON_ONCE(roots & ~KVM_MMU_ROOTS_ALL);
5945 
5946  /* It's actually a GPA for vcpu->arch.guest_mmu. */
5947  if (mmu != &vcpu->arch.guest_mmu) {
5948  /* INVLPG on a non-canonical address is a NOP according to the SDM. */
5949  if (is_noncanonical_address(addr, vcpu))
5950  return;
5951 
5952  static_call(kvm_x86_flush_tlb_gva)(vcpu, addr);
5953  }
5954 
5955  if (!mmu->sync_spte)
5956  return;
5957 
5958  if (roots & KVM_MMU_ROOT_CURRENT)
5959  __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->root.hpa);
5960 
5961  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5962  if (roots & KVM_MMU_ROOT_PREVIOUS(i))
5963  __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->prev_roots[i].hpa);
5964  }
5965 }
static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, hpa_t root_hpa)
Definition: mmu.c:5902
static bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
Definition: x86.h:213
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_invalidate_mmio_sptes()

void kvm_mmu_invalidate_mmio_sptes ( struct kvm *  kvm,
u64  gen 
)

Definition at line 6834 of file mmu.c.

6835 {
6836  WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
6837 
6838  gen &= MMIO_SPTE_GEN_MASK;
6839 
6840  /*
6841  * Generation numbers are incremented in multiples of the number of
6842  * address spaces in order to provide unique generations across all
6843  * address spaces. Strip what is effectively the address space
6844  * modifier prior to checking for a wrap of the MMIO generation so
6845  * that a wrap in any address space is detected.
6846  */
6847  gen &= ~((u64)kvm_arch_nr_memslot_as_ids(kvm) - 1);
6848 
6849  /*
6850  * The very rare case: if the MMIO generation number has wrapped,
6851  * zap all shadow pages.
6852  */
6853  if (unlikely(gen == 0)) {
6854  kvm_debug_ratelimited("zapping shadow pages for mmio generation wraparound\n");
6855  kvm_mmu_zap_all_fast(kvm);
6856  }
6857 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_invlpg()

void kvm_mmu_invlpg ( struct kvm_vcpu *  vcpu,
gva_t  gva 
)

Definition at line 5968 of file mmu.c.

5969 {
5970  /*
5971  * INVLPG is required to invalidate any global mappings for the VA,
5972  * irrespective of PCID. Blindly sync all roots as it would take
5973  * roughly the same amount of work/time to determine whether any of the
5974  * previous roots have a global mapping.
5975  *
5976  * Mappings not reachable via the current or previous cached roots will
5977  * be synced when switching to that new cr3, so nothing needs to be
5978  * done here for them.
5979  */
5980  kvm_mmu_invalidate_addr(vcpu, vcpu->arch.walk_mmu, gva, KVM_MMU_ROOTS_ALL);
5981  ++vcpu->stat.invlpg;
5982 }
void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, unsigned long roots)
Definition: mmu.c:5939
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_invpcid_gva()

void kvm_mmu_invpcid_gva ( struct kvm_vcpu *  vcpu,
gva_t  gva,
unsigned long  pcid 
)

Definition at line 5986 of file mmu.c.

5987 {
5988  struct kvm_mmu *mmu = vcpu->arch.mmu;
5989  unsigned long roots = 0;
5990  uint i;
5991 
5992  if (pcid == kvm_get_active_pcid(vcpu))
5993  roots |= KVM_MMU_ROOT_CURRENT;
5994 
5995  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5996  if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
5997  pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd))
5998  roots |= KVM_MMU_ROOT_PREVIOUS(i);
5999  }
6000 
6001  if (roots)
6002  kvm_mmu_invalidate_addr(vcpu, mmu, gva, roots);
6003  ++vcpu->stat.invlpg;
6004 
6005  /*
6006  * Mappings not reachable via the current cr3 or the prev_roots will be
6007  * synced when switching to that cr3, so nothing needs to be done here
6008  * for them.
6009  */
6010 }
static unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
Definition: mmu.h:135
static unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
Definition: mmu.h:144
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_load()

int kvm_mmu_load ( struct kvm_vcpu *  vcpu)

Definition at line 5588 of file mmu.c.

5589 {
5590  int r;
5591 
5592  r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->root_role.direct);
5593  if (r)
5594  goto out;
5595  r = mmu_alloc_special_roots(vcpu);
5596  if (r)
5597  goto out;
5598  if (vcpu->arch.mmu->root_role.direct)
5599  r = mmu_alloc_direct_roots(vcpu);
5600  else
5601  r = mmu_alloc_shadow_roots(vcpu);
5602  if (r)
5603  goto out;
5604 
5605  kvm_mmu_sync_roots(vcpu);
5606 
5607  kvm_mmu_load_pgd(vcpu);
5608 
5609  /*
5610  * Flush any TLB entries for the new root, the provenance of the root
5611  * is unknown. Even if KVM ensures there are no stale TLB entries
5612  * for a freed root, in theory another hypervisor could have left
5613  * stale entries. Flushing on alloc also allows KVM to skip the TLB
5614  * flush when freeing a root (see kvm_tdp_mmu_put_root()).
5615  */
5616  static_call(kvm_x86_flush_tlb_current)(vcpu);
5617 out:
5618  return r;
5619 }
static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
Definition: mmu.c:3688
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
Definition: mmu.c:3914
static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
Definition: mmu.c:3796
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
Definition: mmu.c:4021
static void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
Definition: mmu.h:157
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_mark_parents_unsync()

static void kvm_mmu_mark_parents_unsync ( struct kvm_mmu_page sp)
static

Definition at line 1777 of file mmu.c.

1778 {
1779  u64 *sptep;
1780  struct rmap_iterator iter;
1781 
1782  for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
1783  mark_unsync(sptep);
1784  }
1785 }
struct kvm_rmap_head parent_ptes
Definition: mmu_internal.h:106
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_max_mapping_level()

int kvm_mmu_max_mapping_level ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  max_level 
)

Definition at line 3170 of file mmu.c.

3173 {
3174  bool is_private = kvm_slot_can_be_private(slot) &&
3175  kvm_mem_is_private(kvm, gfn);
3176 
3177  return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private);
3178 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_new_pgd()

void kvm_mmu_new_pgd ( struct kvm_vcpu *  vcpu,
gpa_t  new_pgd 
)

Definition at line 4753 of file mmu.c.

4754 {
4755  struct kvm_mmu *mmu = vcpu->arch.mmu;
4756  union kvm_mmu_page_role new_role = mmu->root_role;
4757 
4758  /*
4759  * Return immediately if no usable root was found, kvm_mmu_reload()
4760  * will establish a valid root prior to the next VM-Enter.
4761  */
4762  if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role))
4763  return;
4764 
4765  /*
4766  * It's possible that the cached previous root page is obsolete because
4767  * of a change in the MMU generation number. However, changing the
4768  * generation number is accompanied by KVM_REQ_MMU_FREE_OBSOLETE_ROOTS,
4769  * which will free the root set here and allocate a new one.
4770  */
4771  kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
4772 
4774  kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
4775  kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
4776  }
4777 
4778  /*
4779  * The last MMIO access's GVA and GPA are cached in the VCPU. When
4780  * switching to a new CR3, that GVA->GPA mapping may no longer be
4781  * valid. So clear any cached MMIO info even when we don't need to sync
4782  * the shadow page tables.
4783  */
4785 
4786  /*
4787  * If this is a direct root page, it doesn't have a write flooding
4788  * count. Otherwise, clear the write flooding count.
4789  */
4790  if (!new_role.direct) {
4791  struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
4792 
4793  if (!WARN_ON_ONCE(!sp))
4795  }
4796 }
static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role)
Definition: mmu.c:4737
static bool __read_mostly force_flush_and_sync_on_reuse
Definition: mmu.c:96
#define MMIO_GVA_ANY
Definition: x86.h:245
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_page_fault()

int noinline kvm_mmu_page_fault ( struct kvm_vcpu *  vcpu,
gpa_t  cr2_or_gpa,
u64  error_code,
void *  insn,
int  insn_len 
)

Definition at line 5830 of file mmu.c.

5832 {
5833  int r, emulation_type = EMULTYPE_PF;
5834  bool direct = vcpu->arch.mmu->root_role.direct;
5835 
5836  /*
5837  * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
5838  * checks when emulating instructions that triggers implicit access.
5839  * WARN if hardware generates a fault with an error code that collides
5840  * with the KVM-defined value. Clear the flag and continue on, i.e.
5841  * don't terminate the VM, as KVM can't possibly be relying on a flag
5842  * that KVM doesn't know about.
5843  */
5844  if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
5845  error_code &= ~PFERR_IMPLICIT_ACCESS;
5846 
5847  if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
5848  return RET_PF_RETRY;
5849 
5850  r = RET_PF_INVALID;
5851  if (unlikely(error_code & PFERR_RSVD_MASK)) {
5852  r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
5853  if (r == RET_PF_EMULATE)
5854  goto emulate;
5855  }
5856 
5857  if (r == RET_PF_INVALID) {
5858  r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
5859  lower_32_bits(error_code), false,
5860  &emulation_type);
5861  if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
5862  return -EIO;
5863  }
5864 
5865  if (r < 0)
5866  return r;
5867  if (r != RET_PF_EMULATE)
5868  return 1;
5869 
5870  /*
5871  * Before emulating the instruction, check if the error code
5872  * was due to a RO violation while translating the guest page.
5873  * This can occur when using nested virtualization with nested
5874  * paging in both guests. If true, we simply unprotect the page
5875  * and resume the guest.
5876  */
5877  if (vcpu->arch.mmu->root_role.direct &&
5878  (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
5879  kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
5880  return 1;
5881  }
5882 
5883  /*
5884  * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
5885  * optimistically try to just unprotect the page and let the processor
5886  * re-execute the instruction that caused the page fault. Do not allow
5887  * retrying MMIO emulation, as it's not only pointless but could also
5888  * cause us to enter an infinite loop because the processor will keep
5889  * faulting on the non-existent MMIO address. Retrying an instruction
5890  * from a nested guest is also pointless and dangerous as we are only
5891  * explicitly shadowing L1's page tables, i.e. unprotecting something
5892  * for L1 isn't going to magically fix whatever issue cause L2 to fail.
5893  */
5894  if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
5895  emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
5896 emulate:
5897  return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
5898  insn_len);
5899 }
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
Definition: mmu.c:2757
static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
Definition: mmu.c:4174
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len)
Definition: x86.c:9074
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_page_get_access()

static u32 kvm_mmu_page_get_access ( struct kvm_mmu_page sp,
int  index 
)
static

Definition at line 734 of file mmu.c.

735 {
736  if (sp_has_gptes(sp))
737  return sp->shadowed_translation[index] & ACC_ALL;
738 
739  /*
740  * For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs,
741  * KVM is not shadowing any guest page tables, so the "guest access
742  * permissions" are just ACC_ALL.
743  *
744  * For direct SPs in indirect MMUs (shadow paging), i.e. when KVM
745  * is shadowing a guest huge page with small pages, the guest access
746  * permissions being shadowed are the access permissions of the huge
747  * page.
748  *
749  * In both cases, sp->role.access contains the correct access bits.
750  */
751  return sp->role.access;
752 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_page_get_gfn()

static gfn_t kvm_mmu_page_get_gfn ( struct kvm_mmu_page sp,
int  index 
)
static

Definition at line 717 of file mmu.c.

718 {
719  if (sp->role.passthrough)
720  return sp->gfn;
721 
722  if (!sp->role.direct)
723  return sp->shadowed_translation[index] >> PAGE_SHIFT;
724 
725  return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
726 }
#define SPTE_LEVEL_BITS
Definition: spte.h:55
Here is the caller graph for this function:

◆ kvm_mmu_page_set_access()

static void kvm_mmu_page_set_access ( struct kvm_mmu_page sp,
int  index,
unsigned int  access 
)
static

Definition at line 773 of file mmu.c.

775 {
776  gfn_t gfn = kvm_mmu_page_get_gfn(sp, index);
777 
778  kvm_mmu_page_set_translation(sp, index, gfn, access);
779 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_page_set_translation()

static void kvm_mmu_page_set_translation ( struct kvm_mmu_page sp,
int  index,
gfn_t  gfn,
unsigned int  access 
)
static

Definition at line 754 of file mmu.c.

756 {
757  if (sp_has_gptes(sp)) {
758  sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
759  return;
760  }
761 
762  WARN_ONCE(access != kvm_mmu_page_get_access(sp, index),
763  "access mismatch under %s page %llx (expected %u, got %u)\n",
764  sp->role.passthrough ? "passthrough" : "direct",
765  sp->gfn, kvm_mmu_page_get_access(sp, index), access);
766 
767  WARN_ONCE(gfn != kvm_mmu_page_get_gfn(sp, index),
768  "gfn mismatch under %s page %llx (expected %llx, got %llx)\n",
769  sp->role.passthrough ? "passthrough" : "direct",
770  sp->gfn, kvm_mmu_page_get_gfn(sp, index), gfn);
771 }
static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
Definition: mmu.c:734
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_page_unlink_children()

static int kvm_mmu_page_unlink_children ( struct kvm *  kvm,
struct kvm_mmu_page sp,
struct list_head *  invalid_list 
)
static

Definition at line 2523 of file mmu.c.

2526 {
2527  int zapped = 0;
2528  unsigned i;
2529 
2530  for (i = 0; i < SPTE_ENT_PER_PAGE; ++i)
2531  zapped += mmu_page_zap_pte(kvm, sp, sp->spt + i, invalid_list);
2532 
2533  return zapped;
2534 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_post_init_vm()

int kvm_mmu_post_init_vm ( struct kvm *  kvm)

Definition at line 7279 of file mmu.c.

7280 {
7281  int err;
7282 
7284  return 0;
7285 
7287  "kvm-nx-lpage-recovery",
7288  &kvm->arch.nx_huge_page_recovery_thread);
7289  if (!err)
7290  kthread_unpark(kvm->arch.nx_huge_page_recovery_thread);
7291 
7292  return err;
7293 }
int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, uintptr_t data, const char *name, struct task_struct **thread_ptr)
Definition: kvm_main.c:6593
static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
Definition: mmu.c:7254
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_pre_destroy_vm()

void kvm_mmu_pre_destroy_vm ( struct kvm *  kvm)

Definition at line 7295 of file mmu.c.

7296 {
7297  if (kvm->arch.nx_huge_page_recovery_thread)
7298  kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
7299 }
Here is the caller graph for this function:

◆ kvm_mmu_prepare_memory_fault_exit()

static void kvm_mmu_prepare_memory_fault_exit ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 4299 of file mmu.c.

4301 {
4302  kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
4303  PAGE_SIZE, fault->write, fault->exec,
4304  fault->is_private);
4305 }
Here is the caller graph for this function:

◆ kvm_mmu_prepare_zap_page()

static bool kvm_mmu_prepare_zap_page ( struct kvm *  kvm,
struct kvm_mmu_page sp,
struct list_head *  invalid_list 
)
static

Definition at line 2634 of file mmu.c.

2636 {
2637  int nr_zapped;
2638 
2639  __kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped);
2640  return nr_zapped;
2641 }
static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list, int *nr_zapped)
Definition: mmu.c:2569
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_remote_flush_or_zap()

static bool kvm_mmu_remote_flush_or_zap ( struct kvm *  kvm,
struct list_head *  invalid_list,
bool  remote_flush 
)
static

Definition at line 1997 of file mmu.c.

2000 {
2001  if (!remote_flush && list_empty(invalid_list))
2002  return false;
2003 
2004  if (!list_empty(invalid_list))
2005  kvm_mmu_commit_zap_page(kvm, invalid_list);
2006  else
2007  kvm_flush_remote_tlbs(kvm);
2008  return true;
2009 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_reset_context()

void kvm_mmu_reset_context ( struct kvm_vcpu *  vcpu)

Definition at line 5581 of file mmu.c.

5582 {
5583  kvm_mmu_unload(vcpu);
5584  kvm_init_mmu(vcpu);
5585 }
void kvm_init_mmu(struct kvm_vcpu *vcpu)
Definition: mmu.c:5538
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_slot_gfn_write_protect()

bool kvm_mmu_slot_gfn_write_protect ( struct kvm *  kvm,
struct kvm_memory_slot *  slot,
u64  gfn,
int  min_level 
)

Definition at line 1414 of file mmu.c.

1417 {
1418  struct kvm_rmap_head *rmap_head;
1419  int i;
1420  bool write_protected = false;
1421 
1422  if (kvm_memslots_have_rmaps(kvm)) {
1423  for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
1424  rmap_head = gfn_to_rmap(gfn, i, slot);
1425  write_protected |= rmap_write_protect(rmap_head, true);
1426  }
1427  }
1428 
1429  if (tdp_mmu_enabled)
1430  write_protected |=
1431  kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level);
1432 
1433  return write_protected;
1434 }
static bool rmap_write_protect(struct kvm_rmap_head *rmap_head, bool pt_protect)
Definition: mmu.c:1244
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int min_level)
Definition: tdp_mmu.c:1746
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_slot_leaf_clear_dirty()

void kvm_mmu_slot_leaf_clear_dirty ( struct kvm *  kvm,
const struct kvm_memory_slot *  memslot 
)

Definition at line 6769 of file mmu.c.

6771 {
6772  if (kvm_memslots_have_rmaps(kvm)) {
6773  write_lock(&kvm->mmu_lock);
6774  /*
6775  * Clear dirty bits only on 4k SPTEs since the legacy MMU only
6776  * support dirty logging at a 4k granularity.
6777  */
6778  walk_slot_rmaps_4k(kvm, memslot, __rmap_clear_dirty, false);
6779  write_unlock(&kvm->mmu_lock);
6780  }
6781 
6782  if (tdp_mmu_enabled) {
6783  read_lock(&kvm->mmu_lock);
6784  kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
6785  read_unlock(&kvm->mmu_lock);
6786  }
6787 
6788  /*
6789  * The caller will flush the TLBs after this function returns.
6790  *
6791  * It's also safe to flush TLBs out of mmu lock here as currently this
6792  * function is only used for dirty logging, in which case flushing TLB
6793  * out of mmu lock also guarantees no dirty pages will be lost in
6794  * dirty_bitmap.
6795  */
6796 }
static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, bool flush_on_yield)
Definition: mmu.c:6083
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, const struct kvm_memory_slot *slot)
Definition: tdp_mmu.c:1560
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_slot_remove_write_access()

void kvm_mmu_slot_remove_write_access ( struct kvm *  kvm,
const struct kvm_memory_slot *  memslot,
int  start_level 
)

Definition at line 6406 of file mmu.c.

6409 {
6410  if (kvm_memslots_have_rmaps(kvm)) {
6411  write_lock(&kvm->mmu_lock);
6413  start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
6414  write_unlock(&kvm->mmu_lock);
6415  }
6416 
6417  if (tdp_mmu_enabled) {
6418  read_lock(&kvm->mmu_lock);
6419  kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
6420  read_unlock(&kvm->mmu_lock);
6421  }
6422 }
static bool slot_rmap_write_protect(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
Definition: mmu.c:6399
static __always_inline bool walk_slot_rmaps(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, bool flush_on_yield)
Definition: mmu.c:6072
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, const struct kvm_memory_slot *slot, int min_level)
Definition: tdp_mmu.c:1300
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_slot_try_split_huge_pages()

void kvm_mmu_slot_try_split_huge_pages ( struct kvm *  kvm,
const struct kvm_memory_slot *  memslot,
int  target_level 
)

Definition at line 6673 of file mmu.c.

6676 {
6677  u64 start = memslot->base_gfn;
6678  u64 end = start + memslot->npages;
6679 
6680  if (!tdp_mmu_enabled)
6681  return;
6682 
6683  if (kvm_memslots_have_rmaps(kvm)) {
6684  write_lock(&kvm->mmu_lock);
6685  kvm_shadow_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level);
6686  write_unlock(&kvm->mmu_lock);
6687  }
6688 
6689  read_lock(&kvm->mmu_lock);
6690  kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, true);
6691  read_unlock(&kvm->mmu_lock);
6692 
6693  /*
6694  * No TLB flush is necessary here. KVM will flush TLBs after
6695  * write-protecting and/or clearing dirty on the newly split SPTEs to
6696  * ensure that guest writes are reflected in the dirty log before the
6697  * ioctl to enable dirty logging on this memslot completes. Since the
6698  * split SPTEs retain the write and dirty bits of the huge SPTE, it is
6699  * safe for KVM to decide if a TLB flush is necessary based on the split
6700  * SPTEs.
6701  */
6702 }
static void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, int target_level)
Definition: mmu.c:6635
void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, int target_level, bool shared)
Definition: tdp_mmu.c:1483
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_sync_prev_roots()

void kvm_mmu_sync_prev_roots ( struct kvm_vcpu *  vcpu)

Definition at line 4062 of file mmu.c.

4063 {
4064  unsigned long roots_to_free = 0;
4065  int i;
4066 
4067  for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
4068  if (is_unsync_root(vcpu->arch.mmu->prev_roots[i].hpa))
4069  roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
4070 
4071  /* sync prev_roots by simply freeing them */
4072  kvm_mmu_free_roots(vcpu->kvm, vcpu->arch.mmu, roots_to_free);
4073 }
static bool is_unsync_root(hpa_t root)
Definition: mmu.c:3986
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_sync_roots()

void kvm_mmu_sync_roots ( struct kvm_vcpu *  vcpu)

Definition at line 4021 of file mmu.c.

4022 {
4023  int i;
4024  struct kvm_mmu_page *sp;
4025 
4026  if (vcpu->arch.mmu->root_role.direct)
4027  return;
4028 
4029  if (!VALID_PAGE(vcpu->arch.mmu->root.hpa))
4030  return;
4031 
4033 
4034  if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
4035  hpa_t root = vcpu->arch.mmu->root.hpa;
4036 
4037  if (!is_unsync_root(root))
4038  return;
4039 
4040  sp = root_to_sp(root);
4041 
4042  write_lock(&vcpu->kvm->mmu_lock);
4043  mmu_sync_children(vcpu, sp, true);
4044  write_unlock(&vcpu->kvm->mmu_lock);
4045  return;
4046  }
4047 
4048  write_lock(&vcpu->kvm->mmu_lock);
4049 
4050  for (i = 0; i < 4; ++i) {
4051  hpa_t root = vcpu->arch.mmu->pae_root[i];
4052 
4053  if (IS_VALID_PAE_ROOT(root)) {
4054  sp = spte_to_child_sp(root);
4055  mmu_sync_children(vcpu, sp, true);
4056  }
4057  }
4058 
4059  write_unlock(&vcpu->kvm->mmu_lock);
4060 }
static int mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *parent, bool can_yield)
Definition: mmu.c:2093
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_track_write()

void kvm_mmu_track_write ( struct kvm_vcpu *  vcpu,
gpa_t  gpa,
const u8 *  new,
int  bytes 
)

Definition at line 5781 of file mmu.c.

5783 {
5784  gfn_t gfn = gpa >> PAGE_SHIFT;
5785  struct kvm_mmu_page *sp;
5786  LIST_HEAD(invalid_list);
5787  u64 entry, gentry, *spte;
5788  int npte;
5789  bool flush = false;
5790 
5791  /*
5792  * If we don't have indirect shadow pages, it means no page is
5793  * write-protected, so we can exit simply.
5794  */
5795  if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
5796  return;
5797 
5798  write_lock(&vcpu->kvm->mmu_lock);
5799 
5800  gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
5801 
5802  ++vcpu->kvm->stat.mmu_pte_write;
5803 
5804  for_each_gfn_valid_sp_with_gptes(vcpu->kvm, sp, gfn) {
5805  if (detect_write_misaligned(sp, gpa, bytes) ||
5806  detect_write_flooding(sp)) {
5807  kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
5808  ++vcpu->kvm->stat.mmu_flooded;
5809  continue;
5810  }
5811 
5812  spte = get_written_sptes(sp, gpa, &npte);
5813  if (!spte)
5814  continue;
5815 
5816  while (npte--) {
5817  entry = *spte;
5818  mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
5819  if (gentry && sp->role.level != PG_LEVEL_4K)
5820  ++vcpu->kvm->stat.mmu_pde_zapped;
5821  if (is_shadow_present_pte(entry))
5822  flush = true;
5823  ++spte;
5824  }
5825  }
5826  kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush);
5827  write_unlock(&vcpu->kvm->mmu_lock);
5828 }
static bool detect_write_flooding(struct kvm_mmu_page *sp)
Definition: mmu.c:5712
static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, struct list_head *invalid_list, bool remote_flush)
Definition: mmu.c:1997
#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn)
Definition: mmu.c:1913
static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, int bytes)
Definition: mmu.c:5729
static u64 * get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
Definition: mmu.c:5750
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes)
Definition: mmu.c:5682
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_try_split_huge_pages()

void kvm_mmu_try_split_huge_pages ( struct kvm *  kvm,
const struct kvm_memory_slot *  memslot,
u64  start,
u64  end,
int  target_level 
)

Definition at line 6654 of file mmu.c.

6658 {
6659  if (!tdp_mmu_enabled)
6660  return;
6661 
6662  if (kvm_memslots_have_rmaps(kvm))
6663  kvm_shadow_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level);
6664 
6665  kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, false);
6666 
6667  /*
6668  * A TLB flush is unnecessary at this point for the same reasons as in
6669  * kvm_mmu_slot_try_split_huge_pages().
6670  */
6671 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_uninit_vm()

void kvm_mmu_uninit_vm ( struct kvm *  kvm)

Definition at line 6330 of file mmu.c.

6331 {
6332  if (tdp_mmu_enabled)
6334 
6336 }
static void mmu_free_vm_memory_caches(struct kvm *kvm)
Definition: mmu.c:6323
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
Definition: tdp_mmu.c:33
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_unlink_parents()

static void kvm_mmu_unlink_parents ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 2536 of file mmu.c.

2537 {
2538  u64 *sptep;
2539  struct rmap_iterator iter;
2540 
2541  while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
2542  drop_parent_pte(kvm, sp, sptep);
2543 }
static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte)
Definition: mmu.c:1769
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_unload()

void kvm_mmu_unload ( struct kvm_vcpu *  vcpu)

Definition at line 5621 of file mmu.c.

5622 {
5623  struct kvm *kvm = vcpu->kvm;
5624 
5625  kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
5626  WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
5627  kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
5628  WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
5630 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_unprotect_page()

int kvm_mmu_unprotect_page ( struct kvm *  kvm,
gfn_t  gfn 
)

Definition at line 2757 of file mmu.c.

2758 {
2759  struct kvm_mmu_page *sp;
2760  LIST_HEAD(invalid_list);
2761  int r;
2762 
2763  r = 0;
2764  write_lock(&kvm->mmu_lock);
2766  r = 1;
2767  kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
2768  }
2769  kvm_mmu_commit_zap_page(kvm, &invalid_list);
2770  write_unlock(&kvm->mmu_lock);
2771 
2772  return r;
2773 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_unprotect_page_virt()

static int kvm_mmu_unprotect_page_virt ( struct kvm_vcpu *  vcpu,
gva_t  gva 
)
static

Definition at line 2775 of file mmu.c.

2776 {
2777  gpa_t gpa;
2778  int r;
2779 
2780  if (vcpu->arch.mmu->root_role.direct)
2781  return 0;
2782 
2783  gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
2784 
2785  r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
2786 
2787  return r;
2788 }
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception)
Definition: x86.c:7483
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_vendor_module_exit()

void kvm_mmu_vendor_module_exit ( void  )

Definition at line 7084 of file mmu.c.

7085 {
7087  percpu_counter_destroy(&kvm_total_used_mmu_pages);
7088  shrinker_free(mmu_shrinker);
7089 }
static void mmu_destroy_caches(void)
Definition: mmu.c:6926
static struct shrinker * mmu_shrinker
Definition: mmu.c:6924
static struct percpu_counter kvm_total_used_mmu_pages
Definition: mmu.c:182
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_vendor_module_init()

int kvm_mmu_vendor_module_init ( void  )

Definition at line 7026 of file mmu.c.

7027 {
7028  int ret = -ENOMEM;
7029 
7030  /*
7031  * MMU roles use union aliasing which is, generally speaking, an
7032  * undefined behavior. However, we supposedly know how compilers behave
7033  * and the current status quo is unlikely to change. Guardians below are
7034  * supposed to let us know if the assumption becomes false.
7035  */
7036  BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32));
7037  BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32));
7038  BUILD_BUG_ON(sizeof(union kvm_cpu_role) != sizeof(u64));
7039 
7041 
7042  pte_list_desc_cache = kmem_cache_create("pte_list_desc",
7043  sizeof(struct pte_list_desc),
7044  0, SLAB_ACCOUNT, NULL);
7045  if (!pte_list_desc_cache)
7046  goto out;
7047 
7048  mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
7049  sizeof(struct kvm_mmu_page),
7050  0, SLAB_ACCOUNT, NULL);
7051  if (!mmu_page_header_cache)
7052  goto out;
7053 
7054  if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
7055  goto out;
7056 
7057  mmu_shrinker = shrinker_alloc(0, "x86-mmu");
7058  if (!mmu_shrinker)
7059  goto out_shrinker;
7060 
7061  mmu_shrinker->count_objects = mmu_shrink_count;
7062  mmu_shrinker->scan_objects = mmu_shrink_scan;
7063  mmu_shrinker->seeks = DEFAULT_SEEKS * 10;
7064 
7065  shrinker_register(mmu_shrinker);
7066 
7067  return 0;
7068 
7069 out_shrinker:
7070  percpu_counter_destroy(&kvm_total_used_mmu_pages);
7071 out:
7073  return ret;
7074 }
static unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
Definition: mmu.c:6859
static unsigned long mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
Definition: mmu.c:6918
void kvm_mmu_reset_all_pte_masks(void)
Definition: spte.c:453
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_write_protect_pt_masked()

static void kvm_mmu_write_protect_pt_masked ( struct kvm *  kvm,
struct kvm_memory_slot *  slot,
gfn_t  gfn_offset,
unsigned long  mask 
)
static

kvm_mmu_write_protect_pt_masked - write protect selected PT level pages @kvm: kvm instance @slot: slot to protect @gfn_offset: start of the BITS_PER_LONG pages we care about @mask: indicates which pages we should protect

Used when we do not need to care about huge page mappings.

Definition at line 1307 of file mmu.c.

1310 {
1311  struct kvm_rmap_head *rmap_head;
1312 
1313  if (tdp_mmu_enabled)
1315  slot->base_gfn + gfn_offset, mask, true);
1316 
1317  if (!kvm_memslots_have_rmaps(kvm))
1318  return;
1319 
1320  while (mask) {
1321  rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
1322  PG_LEVEL_4K, slot);
1323  rmap_write_protect(rmap_head, false);
1324 
1325  /* clear the first set bit */
1326  mask &= mask - 1;
1327  }
1328 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_x86_module_init()

void __init kvm_mmu_x86_module_init ( void  )

Definition at line 7006 of file mmu.c.

7007 {
7008  if (nx_huge_pages == -1)
7010 
7011  /*
7012  * Snapshot userspace's desire to enable the TDP MMU. Whether or not the
7013  * TDP MMU is actually enabled is determined in kvm_configure_mmu()
7014  * when the vendor module is loaded.
7015  */
7017 
7019 }
static void __set_nx_huge_pages(bool val)
Definition: mmu.c:6946
static bool get_nx_auto_mode(void)
Definition: mmu.c:6940
void __init kvm_mmu_spte_module_init(void)
Definition: spte.c:48
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_zap_all()

static void kvm_mmu_zap_all ( struct kvm *  kvm)
static

Definition at line 6798 of file mmu.c.

6799 {
6800  struct kvm_mmu_page *sp, *node;
6801  LIST_HEAD(invalid_list);
6802  int ign;
6803 
6804  write_lock(&kvm->mmu_lock);
6805 restart:
6806  list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
6807  if (WARN_ON_ONCE(sp->role.invalid))
6808  continue;
6809  if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
6810  goto restart;
6811  if (cond_resched_rwlock_write(&kvm->mmu_lock))
6812  goto restart;
6813  }
6814 
6815  kvm_mmu_commit_zap_page(kvm, &invalid_list);
6816 
6817  if (tdp_mmu_enabled)
6818  kvm_tdp_mmu_zap_all(kvm);
6819 
6820  write_unlock(&kvm->mmu_lock);
6821 }
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
Definition: tdp_mmu.c:831
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_zap_all_fast()

static void kvm_mmu_zap_all_fast ( struct kvm *  kvm)
static

Definition at line 6248 of file mmu.c.

6249 {
6250  lockdep_assert_held(&kvm->slots_lock);
6251 
6252  write_lock(&kvm->mmu_lock);
6253  trace_kvm_mmu_zap_all_fast(kvm);
6254 
6255  /*
6256  * Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is
6257  * held for the entire duration of zapping obsolete pages, it's
6258  * impossible for there to be multiple invalid generations associated
6259  * with *valid* shadow pages at any given time, i.e. there is exactly
6260  * one valid generation and (at most) one invalid generation.
6261  */
6262  kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1;
6263 
6264  /*
6265  * In order to ensure all vCPUs drop their soon-to-be invalid roots,
6266  * invalidating TDP MMU roots must be done while holding mmu_lock for
6267  * write and in the same critical section as making the reload request,
6268  * e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield.
6269  */
6270  if (tdp_mmu_enabled)
6272 
6273  /*
6274  * Notify all vcpus to reload its shadow page table and flush TLB.
6275  * Then all vcpus will switch to new shadow page table with the new
6276  * mmu_valid_gen.
6277  *
6278  * Note: we need to do this under the protection of mmu_lock,
6279  * otherwise, vcpu would purge shadow page but miss tlb flush.
6280  */
6281  kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
6282 
6284 
6285  write_unlock(&kvm->mmu_lock);
6286 
6287  /*
6288  * Zap the invalidated TDP MMU roots, all SPTEs must be dropped before
6289  * returning to the caller, e.g. if the zap is in response to a memslot
6290  * deletion, mmu_notifier callbacks will be unable to reach the SPTEs
6291  * associated with the deleted memslot once the update completes, and
6292  * Deferring the zap until the final reference to the root is put would
6293  * lead to use-after-free.
6294  */
6295  if (tdp_mmu_enabled)
6297 }
static void kvm_zap_obsolete_pages(struct kvm *kvm)
Definition: mmu.c:6183
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
Definition: tdp_mmu.c:856
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
Definition: tdp_mmu.c:901
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_zap_collapsible_spte()

static bool kvm_mmu_zap_collapsible_spte ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 6704 of file mmu.c.

6707 {
6708  u64 *sptep;
6709  struct rmap_iterator iter;
6710  int need_tlb_flush = 0;
6711  struct kvm_mmu_page *sp;
6712 
6713 restart:
6714  for_each_rmap_spte(rmap_head, &iter, sptep) {
6715  sp = sptep_to_sp(sptep);
6716 
6717  /*
6718  * We cannot do huge page mapping for indirect shadow pages,
6719  * which are found on the last rmap (level = 1) when not using
6720  * tdp; such shadow pages are synced with the page table in
6721  * the guest, and the guest page table is using 4K page size
6722  * mapping if the indirect sp has level = 1.
6723  */
6724  if (sp->role.direct &&
6725  sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
6726  PG_LEVEL_NUM)) {
6727  kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
6728 
6730  kvm_flush_remote_tlbs_sptep(kvm, sptep);
6731  else
6732  need_tlb_flush = 1;
6733 
6734  goto restart;
6735  }
6736  }
6737 
6738  return need_tlb_flush;
6739 }
int kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, int max_level)
Definition: mmu.c:3170
static bool kvm_available_flush_remote_tlbs_range(void)
Definition: mmu.c:272
static void kvm_zap_one_rmap_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, u64 *sptep)
Definition: mmu.c:1037
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_zap_collapsible_sptes()

void kvm_mmu_zap_collapsible_sptes ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot 
)

Definition at line 6753 of file mmu.c.

6755 {
6756  if (kvm_memslots_have_rmaps(kvm)) {
6757  write_lock(&kvm->mmu_lock);
6758  kvm_rmap_zap_collapsible_sptes(kvm, slot);
6759  write_unlock(&kvm->mmu_lock);
6760  }
6761 
6762  if (tdp_mmu_enabled) {
6763  read_lock(&kvm->mmu_lock);
6765  read_unlock(&kvm->mmu_lock);
6766  }
6767 }
static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot)
Definition: mmu.c:6741
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot)
Definition: tdp_mmu.c:1695
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mmu_zap_oldest_mmu_pages()

static unsigned long kvm_mmu_zap_oldest_mmu_pages ( struct kvm *  kvm,
unsigned long  nr_to_zap 
)
static

Definition at line 2668 of file mmu.c.

2670 {
2671  unsigned long total_zapped = 0;
2672  struct kvm_mmu_page *sp, *tmp;
2673  LIST_HEAD(invalid_list);
2674  bool unstable;
2675  int nr_zapped;
2676 
2677  if (list_empty(&kvm->arch.active_mmu_pages))
2678  return 0;
2679 
2680 restart:
2681  list_for_each_entry_safe_reverse(sp, tmp, &kvm->arch.active_mmu_pages, link) {
2682  /*
2683  * Don't zap active root pages, the page itself can't be freed
2684  * and zapping it will just force vCPUs to realloc and reload.
2685  */
2686  if (sp->root_count)
2687  continue;
2688 
2689  unstable = __kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list,
2690  &nr_zapped);
2691  total_zapped += nr_zapped;
2692  if (total_zapped >= nr_to_zap)
2693  break;
2694 
2695  if (unstable)
2696  goto restart;
2697  }
2698 
2699  kvm_mmu_commit_zap_page(kvm, &invalid_list);
2700 
2701  kvm->stat.mmu_recycled += total_zapped;
2702  return total_zapped;
2703 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_mod_used_mmu_pages()

static void kvm_mod_used_mmu_pages ( struct kvm *  kvm,
long  nr 
)
inlinestatic

Definition at line 1719 of file mmu.c.

1720 {
1721  kvm->arch.n_used_mmu_pages += nr;
1722  percpu_counter_add(&kvm_total_used_mmu_pages, nr);
1723 }
Here is the caller graph for this function:

◆ kvm_nx_huge_page_recovery_worker()

static int kvm_nx_huge_page_recovery_worker ( struct kvm *  kvm,
uintptr_t  data 
)
static

Definition at line 7254 of file mmu.c.

7255 {
7256  u64 start_time;
7257  long remaining_time;
7258 
7259  while (true) {
7260  start_time = get_jiffies_64();
7261  remaining_time = get_nx_huge_page_recovery_timeout(start_time);
7262 
7263  set_current_state(TASK_INTERRUPTIBLE);
7264  while (!kthread_should_stop() && remaining_time > 0) {
7265  schedule_timeout(remaining_time);
7266  remaining_time = get_nx_huge_page_recovery_timeout(start_time);
7267  set_current_state(TASK_INTERRUPTIBLE);
7268  }
7269 
7270  set_current_state(TASK_RUNNING);
7271 
7272  if (kthread_should_stop())
7273  return 0;
7274 
7276  }
7277 }
static void kvm_recover_nx_huge_pages(struct kvm *kvm)
Definition: mmu.c:7148
static long get_nx_huge_page_recovery_timeout(u64 start_time)
Definition: mmu.c:7243
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_page_table_hashfn()

static unsigned kvm_page_table_hashfn ( gfn_t  gfn)
static

Definition at line 1749 of file mmu.c.

1750 {
1751  return hash_64(gfn, KVM_MMU_HASH_SHIFT);
1752 }
Here is the caller graph for this function:

◆ kvm_recover_nx_huge_pages()

static void kvm_recover_nx_huge_pages ( struct kvm *  kvm)
static

Definition at line 7148 of file mmu.c.

7149 {
7150  unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
7151  struct kvm_memory_slot *slot;
7152  int rcu_idx;
7153  struct kvm_mmu_page *sp;
7154  unsigned int ratio;
7155  LIST_HEAD(invalid_list);
7156  bool flush = false;
7157  ulong to_zap;
7158 
7159  rcu_idx = srcu_read_lock(&kvm->srcu);
7160  write_lock(&kvm->mmu_lock);
7161 
7162  /*
7163  * Zapping TDP MMU shadow pages, including the remote TLB flush, must
7164  * be done under RCU protection, because the pages are freed via RCU
7165  * callback.
7166  */
7167  rcu_read_lock();
7168 
7169  ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
7170  to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
7171  for ( ; to_zap; --to_zap) {
7172  if (list_empty(&kvm->arch.possible_nx_huge_pages))
7173  break;
7174 
7175  /*
7176  * We use a separate list instead of just using active_mmu_pages
7177  * because the number of shadow pages that be replaced with an
7178  * NX huge page is expected to be relatively small compared to
7179  * the total number of shadow pages. And because the TDP MMU
7180  * doesn't use active_mmu_pages.
7181  */
7182  sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
7183  struct kvm_mmu_page,
7185  WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
7186  WARN_ON_ONCE(!sp->role.direct);
7187 
7188  /*
7189  * Unaccount and do not attempt to recover any NX Huge Pages
7190  * that are being dirty tracked, as they would just be faulted
7191  * back in as 4KiB pages. The NX Huge Pages in this slot will be
7192  * recovered, along with all the other huge pages in the slot,
7193  * when dirty logging is disabled.
7194  *
7195  * Since gfn_to_memslot() is relatively expensive, it helps to
7196  * skip it if it the test cannot possibly return true. On the
7197  * other hand, if any memslot has logging enabled, chances are
7198  * good that all of them do, in which case unaccount_nx_huge_page()
7199  * is much cheaper than zapping the page.
7200  *
7201  * If a memslot update is in progress, reading an incorrect value
7202  * of kvm->nr_memslots_dirty_logging is not a problem: if it is
7203  * becoming zero, gfn_to_memslot() will be done unnecessarily; if
7204  * it is becoming nonzero, the page will be zapped unnecessarily.
7205  * Either way, this only affects efficiency in racy situations,
7206  * and not correctness.
7207  */
7208  slot = NULL;
7209  if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
7210  struct kvm_memslots *slots;
7211 
7212  slots = kvm_memslots_for_spte_role(kvm, sp->role);
7213  slot = __gfn_to_memslot(slots, sp->gfn);
7214  WARN_ON_ONCE(!slot);
7215  }
7216 
7217  if (slot && kvm_slot_dirty_track_enabled(slot))
7218  unaccount_nx_huge_page(kvm, sp);
7219  else if (is_tdp_mmu_page(sp))
7220  flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
7221  else
7222  kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
7223  WARN_ON_ONCE(sp->nx_huge_page_disallowed);
7224 
7225  if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
7226  kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
7227  rcu_read_unlock();
7228 
7229  cond_resched_rwlock_write(&kvm->mmu_lock);
7230  flush = false;
7231 
7232  rcu_read_lock();
7233  }
7234  }
7235  kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
7236 
7237  rcu_read_unlock();
7238 
7239  write_unlock(&kvm->mmu_lock);
7240  srcu_read_unlock(&kvm->srcu, rcu_idx);
7241 }
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: tdp_mmu.c:752
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_rmap_zap_collapsible_sptes()

static void kvm_rmap_zap_collapsible_sptes ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 6741 of file mmu.c.

6743 {
6744  /*
6745  * Note, use KVM_MAX_HUGEPAGE_LEVEL - 1 since there's no need to zap
6746  * pages that are already mapped at the maximum hugepage level.
6747  */
6749  PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
6750  kvm_flush_remote_tlbs_memslot(kvm, slot);
6751 }
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, const struct kvm_memory_slot *memslot)
Definition: kvm_main.c:380
static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
Definition: mmu.c:6704
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_rmap_zap_gfn_range()

static bool kvm_rmap_zap_gfn_range ( struct kvm *  kvm,
gfn_t  gfn_start,
gfn_t  gfn_end 
)
static

Definition at line 6338 of file mmu.c.

6339 {
6340  const struct kvm_memory_slot *memslot;
6341  struct kvm_memslots *slots;
6342  struct kvm_memslot_iter iter;
6343  bool flush = false;
6344  gfn_t start, end;
6345  int i;
6346 
6347  if (!kvm_memslots_have_rmaps(kvm))
6348  return flush;
6349 
6350  for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
6351  slots = __kvm_memslots(kvm, i);
6352 
6353  kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
6354  memslot = iter.slot;
6355  start = max(gfn_start, memslot->base_gfn);
6356  end = min(gfn_end, memslot->base_gfn + memslot->npages);
6357  if (WARN_ON_ONCE(start >= end))
6358  continue;
6359 
6360  flush = __walk_slot_rmaps(kvm, memslot, __kvm_zap_rmap,
6361  PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
6362  start, end - 1, true, flush);
6363  }
6364  }
6365 
6366  return flush;
6367 }
static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool flush_on_yield, bool flush)
Definition: mmu.c:6043
static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
Definition: mmu.c:1444
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_send_hwpoison_signal()

static void kvm_send_hwpoison_signal ( struct kvm_memory_slot *  slot,
gfn_t  gfn 
)
static

Definition at line 3281 of file mmu.c.

3282 {
3283  unsigned long hva = gfn_to_hva_memslot(slot, gfn);
3284 
3285  send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current);
3286 }
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
Definition: kvm_main.c:2735
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_set_pte_rmap()

static bool kvm_set_pte_rmap ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  level,
pte_t  pte 
)
static

Definition at line 1457 of file mmu.c.

1460 {
1461  u64 *sptep;
1462  struct rmap_iterator iter;
1463  bool need_flush = false;
1464  u64 new_spte;
1465  kvm_pfn_t new_pfn;
1466 
1467  WARN_ON_ONCE(pte_huge(pte));
1468  new_pfn = pte_pfn(pte);
1469 
1470 restart:
1471  for_each_rmap_spte(rmap_head, &iter, sptep) {
1472  need_flush = true;
1473 
1474  if (pte_write(pte)) {
1475  kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
1476  goto restart;
1477  } else {
1479  *sptep, new_pfn);
1480 
1481  mmu_spte_clear_track_bits(kvm, sptep);
1482  mmu_spte_set(sptep, new_spte);
1483  }
1484  }
1485 
1486  if (need_flush && kvm_available_flush_remote_tlbs_range()) {
1487  kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
1488  return false;
1489  }
1490 
1491  return need_flush;
1492 }
u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
Definition: spte.c:325
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_set_spte_gfn()

bool kvm_set_spte_gfn ( struct kvm *  kvm,
struct kvm_gfn_range *  range 
)

Definition at line 1599 of file mmu.c.

1600 {
1601  bool flush = false;
1602 
1603  if (kvm_memslots_have_rmaps(kvm))
1604  flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap);
1605 
1606  if (tdp_mmu_enabled)
1607  flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
1608 
1609  return flush;
1610 }
static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t pte)
Definition: mmu.c:1457
bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
Definition: tdp_mmu.c:1247
Here is the call graph for this function:

◆ kvm_shadow_mmu_try_split_huge_pages()

static void kvm_shadow_mmu_try_split_huge_pages ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
gfn_t  start,
gfn_t  end,
int  target_level 
)
static

Definition at line 6635 of file mmu.c.

6639 {
6640  int level;
6641 
6642  /*
6643  * Split huge pages starting with KVM_MAX_HUGEPAGE_LEVEL and working
6644  * down to the target level. This ensures pages are recursively split
6645  * all the way to the target level. There's no need to split pages
6646  * already at the target level.
6647  */
6648  for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--)
6650  level, level, start, end - 1, true, false);
6651 }
static bool shadow_mmu_try_split_huge_pages(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot)
Definition: mmu.c:6589
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_sync_page()

static int kvm_sync_page ( struct kvm_vcpu *  vcpu,
struct kvm_mmu_page sp,
struct list_head *  invalid_list 
)
static

Definition at line 1987 of file mmu.c.

1989 {
1990  int ret = __kvm_sync_page(vcpu, sp);
1991 
1992  if (ret < 0)
1993  kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
1994  return ret;
1995 }
static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
Definition: mmu.c:1959
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_sync_page_check()

static bool kvm_sync_page_check ( struct kvm_vcpu *  vcpu,
struct kvm_mmu_page sp 
)
static

Definition at line 1918 of file mmu.c.

1919 {
1920  union kvm_mmu_page_role root_role = vcpu->arch.mmu->root_role;
1921 
1922  /*
1923  * Ignore various flags when verifying that it's safe to sync a shadow
1924  * page using the current MMU context.
1925  *
1926  * - level: not part of the overall MMU role and will never match as the MMU's
1927  * level tracks the root level
1928  * - access: updated based on the new guest PTE
1929  * - quadrant: not part of the overall MMU role (similar to level)
1930  */
1931  const union kvm_mmu_page_role sync_role_ign = {
1932  .level = 0xf,
1933  .access = 0x7,
1934  .quadrant = 0x3,
1935  .passthrough = 0x1,
1936  };
1937 
1938  /*
1939  * Direct pages can never be unsync, and KVM should never attempt to
1940  * sync a shadow page for a different MMU context, e.g. if the role
1941  * differs then the memslot lookup (SMM vs. non-SMM) will be bogus, the
1942  * reserved bits checks will be wrong, etc...
1943  */
1944  if (WARN_ON_ONCE(sp->role.direct || !vcpu->arch.mmu->sync_spte ||
1945  (sp->role.word ^ root_role.word) & ~sync_role_ign.word))
1946  return false;
1947 
1948  return true;
1949 }
Here is the caller graph for this function:

◆ kvm_sync_spte()

static int kvm_sync_spte ( struct kvm_vcpu *  vcpu,
struct kvm_mmu_page sp,
int  i 
)
static

Definition at line 1951 of file mmu.c.

1952 {
1953  if (!sp->spt[i])
1954  return 0;
1955 
1956  return vcpu->arch.mmu->sync_spte(vcpu, sp, i);
1957 }
Here is the caller graph for this function:

◆ kvm_tdp_page_fault()

int kvm_tdp_page_fault ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)

Definition at line 4623 of file mmu.c.

4624 {
4625  /*
4626  * If the guest's MTRRs may be used to compute the "real" memtype,
4627  * restrict the mapping level to ensure KVM uses a consistent memtype
4628  * across the entire mapping.
4629  */
4630  if (kvm_mmu_honors_guest_mtrrs(vcpu->kvm)) {
4631  for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
4632  int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
4633  gfn_t base = gfn_round_for_level(fault->gfn,
4634  fault->max_level);
4635 
4636  if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
4637  break;
4638  }
4639  }
4640 
4641 #ifdef CONFIG_X86_64
4642  if (tdp_mmu_enabled)
4643  return kvm_tdp_mmu_page_fault(vcpu, fault);
4644 #endif
4645 
4646  return direct_page_fault(vcpu, fault);
4647 }
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4491
static bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm)
Definition: mmu.h:250
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num)
Definition: mtrr.c:690
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_test_age_gfn()

bool kvm_test_age_gfn ( struct kvm *  kvm,
struct kvm_gfn_range *  range 
)

Definition at line 1686 of file mmu.c.

1687 {
1688  bool young = false;
1689 
1690  if (kvm_memslots_have_rmaps(kvm))
1691  young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap);
1692 
1693  if (tdp_mmu_enabled)
1694  young |= kvm_tdp_mmu_test_age_gfn(kvm, range);
1695 
1696  return young;
1697 }
static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t unused)
Definition: mmu.c:1626
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
Definition: tdp_mmu.c:1206
Here is the call graph for this function:

◆ kvm_test_age_rmap()

static bool kvm_test_age_rmap ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  level,
pte_t  unused 
)
static

Definition at line 1626 of file mmu.c.

1629 {
1630  u64 *sptep;
1631  struct rmap_iterator iter;
1632 
1633  for_each_rmap_spte(rmap_head, &iter, sptep)
1634  if (is_accessed_spte(*sptep))
1635  return true;
1636  return false;
1637 }
static bool is_accessed_spte(u64 spte)
Definition: spte.h:333
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_unaccount_mmu_page()

static void kvm_unaccount_mmu_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 1731 of file mmu.c.

1732 {
1733  kvm_mod_used_mmu_pages(kvm, -1);
1734  kvm_account_pgtable_pages((void *)sp->spt, -1);
1735 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_unlink_unsync_page()

static void kvm_unlink_unsync_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 1884 of file mmu.c.

1885 {
1886  WARN_ON_ONCE(!sp->unsync);
1887  trace_kvm_mmu_sync_page(sp);
1888  sp->unsync = 0;
1889  --kvm->stat.mmu_unsync;
1890 }
Here is the caller graph for this function:

◆ kvm_unmap_gfn_range()

bool kvm_unmap_gfn_range ( struct kvm *  kvm,
struct kvm_gfn_range *  range 
)

Definition at line 1582 of file mmu.c.

1583 {
1584  bool flush = false;
1585 
1586  if (kvm_memslots_have_rmaps(kvm))
1587  flush = kvm_handle_gfn_range(kvm, range, kvm_zap_rmap);
1588 
1589  if (tdp_mmu_enabled)
1590  flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
1591 
1592  if (kvm_x86_ops.set_apic_access_page_addr &&
1593  range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT)
1594  kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
1595 
1596  return flush;
1597 }
static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, pte_t unused)
Definition: mmu.c:1450
bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush)
Definition: tdp_mmu.c:1114
Here is the call graph for this function:

◆ kvm_unsync_page()

static void kvm_unsync_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 2790 of file mmu.c.

2791 {
2792  trace_kvm_mmu_unsync_page(sp);
2793  ++kvm->stat.mmu_unsync;
2794  sp->unsync = 1;
2795 
2797 }
static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
Definition: mmu.c:1777
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_vcpu_write_protect_gfn()

static bool kvm_vcpu_write_protect_gfn ( struct kvm_vcpu *  vcpu,
u64  gfn 
)
static

Definition at line 1436 of file mmu.c.

1437 {
1438  struct kvm_memory_slot *slot;
1439 
1440  slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
1441  return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K);
1442 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_zap_all_rmap_sptes()

static bool kvm_zap_all_rmap_sptes ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head 
)
static

Definition at line 1045 of file mmu.c.

1047 {
1048  struct pte_list_desc *desc, *next;
1049  int i;
1050 
1051  if (!rmap_head->val)
1052  return false;
1053 
1054  if (!(rmap_head->val & 1)) {
1055  mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val);
1056  goto out;
1057  }
1058 
1059  desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
1060 
1061  for (; desc; desc = next) {
1062  for (i = 0; i < desc->spte_count; i++)
1063  mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
1064  next = desc->more;
1065  mmu_free_pte_list_desc(desc);
1066  }
1067 out:
1068  /* rmap_head is meaningless now, remember to reset it */
1069  rmap_head->val = 0;
1070  return true;
1071 }
static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
Definition: mmu.c:710
struct pte_list_desc * more
Definition: mmu.c:147
u32 spte_count
Definition: mmu.c:149
u64 * sptes[PTE_LIST_EXT]
Definition: mmu.c:152
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_zap_gfn_range()

void kvm_zap_gfn_range ( struct kvm *  kvm,
gfn_t  gfn_start,
gfn_t  gfn_end 
)

Definition at line 6373 of file mmu.c.

6374 {
6375  bool flush;
6376 
6377  if (WARN_ON_ONCE(gfn_end <= gfn_start))
6378  return;
6379 
6380  write_lock(&kvm->mmu_lock);
6381 
6382  kvm_mmu_invalidate_begin(kvm);
6383 
6384  kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end);
6385 
6386  flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
6387 
6388  if (tdp_mmu_enabled)
6389  flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush);
6390 
6391  if (flush)
6392  kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
6393 
6394  kvm_mmu_invalidate_end(kvm);
6395 
6396  write_unlock(&kvm->mmu_lock);
6397 }
static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
Definition: mmu.c:6338
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
Definition: tdp_mmu.c:820
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_zap_obsolete_pages()

static void kvm_zap_obsolete_pages ( struct kvm *  kvm)
static

Definition at line 6183 of file mmu.c.

6184 {
6185  struct kvm_mmu_page *sp, *node;
6186  int nr_zapped, batch = 0;
6187  bool unstable;
6188 
6189 restart:
6190  list_for_each_entry_safe_reverse(sp, node,
6191  &kvm->arch.active_mmu_pages, link) {
6192  /*
6193  * No obsolete valid page exists before a newly created page
6194  * since active_mmu_pages is a FIFO list.
6195  */
6196  if (!is_obsolete_sp(kvm, sp))
6197  break;
6198 
6199  /*
6200  * Invalid pages should never land back on the list of active
6201  * pages. Skip the bogus page, otherwise we'll get stuck in an
6202  * infinite loop if the page gets put back on the list (again).
6203  */
6204  if (WARN_ON_ONCE(sp->role.invalid))
6205  continue;
6206 
6207  /*
6208  * No need to flush the TLB since we're only zapping shadow
6209  * pages with an obsolete generation number and all vCPUS have
6210  * loaded a new root, i.e. the shadow pages being zapped cannot
6211  * be in active use by the guest.
6212  */
6213  if (batch >= BATCH_ZAP_PAGES &&
6214  cond_resched_rwlock_write(&kvm->mmu_lock)) {
6215  batch = 0;
6216  goto restart;
6217  }
6218 
6219  unstable = __kvm_mmu_prepare_zap_page(kvm, sp,
6220  &kvm->arch.zapped_obsolete_pages, &nr_zapped);
6221  batch += nr_zapped;
6222 
6223  if (unstable)
6224  goto restart;
6225  }
6226 
6227  /*
6228  * Kick all vCPUs (via remote TLB flush) before freeing the page tables
6229  * to ensure KVM is not in the middle of a lockless shadow page table
6230  * walk, which may reference the pages. The remote TLB flush itself is
6231  * not required and is simply a convenient way to kick vCPUs as needed.
6232  * KVM performs a local TLB flush when allocating a new root (see
6233  * kvm_mmu_load()), and the reload in the caller ensure no vCPUs are
6234  * running with an obsolete MMU.
6235  */
6236  kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
6237 }
#define BATCH_ZAP_PAGES
Definition: mmu.c:6182
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_zap_one_rmap_spte()

static void kvm_zap_one_rmap_spte ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
u64 *  sptep 
)
static

Definition at line 1037 of file mmu.c.

1039 {
1040  mmu_spte_clear_track_bits(kvm, sptep);
1041  pte_list_remove(kvm, sptep, rmap_head);
1042 }
static void pte_list_remove(struct kvm *kvm, u64 *spte, struct kvm_rmap_head *rmap_head)
Definition: mmu.c:1006
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_zap_rmap()

static bool kvm_zap_rmap ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  level,
pte_t  unused 
)
static

Definition at line 1450 of file mmu.c.

1453 {
1454  return __kvm_zap_rmap(kvm, rmap_head, slot);
1455 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ link_shadow_page()

static void link_shadow_page ( struct kvm_vcpu *  vcpu,
u64 *  sptep,
struct kvm_mmu_page sp 
)
static

Definition at line 2464 of file mmu.c.

2466 {
2467  __link_shadow_page(vcpu->kvm, &vcpu->arch.mmu_pte_list_desc_cache, sptep, sp, true);
2468 }
static void __link_shadow_page(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, u64 *sptep, struct kvm_mmu_page *sp, bool flush)
Definition: mmu.c:2429
Here is the call graph for this function:
Here is the caller graph for this function:

◆ lpage_info_slot()

static struct kvm_lpage_info* lpage_info_slot ( gfn_t  gfn,
const struct kvm_memory_slot *  slot,
int  level 
)
static

Definition at line 785 of file mmu.c.

787 {
788  unsigned long idx;
789 
790  idx = gfn_to_index(gfn, slot->base_gfn, level);
791  return &slot->arch.lpage_info[level - 2][idx];
792 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ make_mmu_pages_available()

static int make_mmu_pages_available ( struct kvm_vcpu *  vcpu)
static

Definition at line 2714 of file mmu.c.

2715 {
2716  unsigned long avail = kvm_mmu_available_pages(vcpu->kvm);
2717 
2718  if (likely(avail >= KVM_MIN_FREE_MMU_PAGES))
2719  return 0;
2720 
2721  kvm_mmu_zap_oldest_mmu_pages(vcpu->kvm, KVM_REFILL_PAGES - avail);
2722 
2723  /*
2724  * Note, this check is intentionally soft, it only guarantees that one
2725  * page is available, while the caller may end up allocating as many as
2726  * four pages, e.g. for PAE roots or for 5-level paging. Temporarily
2727  * exceeding the (arbitrary by default) limit will not harm the host,
2728  * being too aggressive may unnecessarily kill the guest, and getting an
2729  * exact count is far more trouble than it's worth, especially in the
2730  * page fault paths.
2731  */
2732  if (!kvm_mmu_available_pages(vcpu->kvm))
2733  return -ENOSPC;
2734  return 0;
2735 }
static unsigned long kvm_mmu_available_pages(struct kvm *kvm)
Definition: mmu.c:2705
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mark_mmio_spte()

static void mark_mmio_spte ( struct kvm_vcpu *  vcpu,
u64 *  sptep,
u64  gfn,
unsigned int  access 
)
static

Definition at line 292 of file mmu.c.

294 {
295  u64 spte = make_mmio_spte(vcpu, gfn, access);
296 
297  trace_mark_mmio_spte(sptep, gfn, spte);
298  mmu_spte_set(sptep, spte);
299 }
u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
Definition: spte.c:71
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mark_unsync()

static void mark_unsync ( u64 *  spte)
static

Definition at line 1787 of file mmu.c.

1788 {
1789  struct kvm_mmu_page *sp;
1790 
1791  sp = sptep_to_sp(spte);
1792  if (__test_and_set_bit(spte_index(spte), sp->unsync_child_bitmap))
1793  return;
1794  if (sp->unsync_children++)
1795  return;
1797 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmio_info_in_cache()

static bool mmio_info_in_cache ( struct kvm_vcpu *  vcpu,
u64  addr,
bool  direct 
)
static

Definition at line 4084 of file mmu.c.

4085 {
4086  /*
4087  * A nested guest cannot use the MMIO cache if it is using nested
4088  * page tables, because cr2 is a nGPA while the cache stores GPAs.
4089  */
4090  if (mmu_is_nested(vcpu))
4091  return false;
4092 
4093  if (direct)
4094  return vcpu_match_mmio_gpa(vcpu, addr);
4095 
4096  return vcpu_match_mmio_gva(vcpu, addr);
4097 }
static bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
Definition: x86.h:255
static bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
Definition: x86.h:264
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_alloc_direct_roots()

static int mmu_alloc_direct_roots ( struct kvm_vcpu *  vcpu)
static

Definition at line 3688 of file mmu.c.

3689 {
3690  struct kvm_mmu *mmu = vcpu->arch.mmu;
3691  u8 shadow_root_level = mmu->root_role.level;
3692  hpa_t root;
3693  unsigned i;
3694  int r;
3695 
3696  write_lock(&vcpu->kvm->mmu_lock);
3697  r = make_mmu_pages_available(vcpu);
3698  if (r < 0)
3699  goto out_unlock;
3700 
3701  if (tdp_mmu_enabled) {
3702  root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
3703  mmu->root.hpa = root;
3704  } else if (shadow_root_level >= PT64_ROOT_4LEVEL) {
3705  root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level);
3706  mmu->root.hpa = root;
3707  } else if (shadow_root_level == PT32E_ROOT_LEVEL) {
3708  if (WARN_ON_ONCE(!mmu->pae_root)) {
3709  r = -EIO;
3710  goto out_unlock;
3711  }
3712 
3713  for (i = 0; i < 4; ++i) {
3714  WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i]));
3715 
3716  root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), 0,
3717  PT32_ROOT_LEVEL);
3718  mmu->pae_root[i] = root | PT_PRESENT_MASK |
3720  }
3721  mmu->root.hpa = __pa(mmu->pae_root);
3722  } else {
3723  WARN_ONCE(1, "Bad TDP root level = %d\n", shadow_root_level);
3724  r = -EIO;
3725  goto out_unlock;
3726  }
3727 
3728  /* root.pgd is ignored for direct MMUs. */
3729  mmu->root.pgd = 0;
3730 out_unlock:
3731  write_unlock(&vcpu->kvm->mmu_lock);
3732  return r;
3733 }
static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant, u8 level)
Definition: mmu.c:3670
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
Definition: tdp_mmu.c:219
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_alloc_root()

static hpa_t mmu_alloc_root ( struct kvm_vcpu *  vcpu,
gfn_t  gfn,
int  quadrant,
u8  level 
)
static

Definition at line 3670 of file mmu.c.

3672 {
3673  union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
3674  struct kvm_mmu_page *sp;
3675 
3676  role.level = level;
3677  role.quadrant = quadrant;
3678 
3679  WARN_ON_ONCE(quadrant && !role.has_4_byte_gpte);
3680  WARN_ON_ONCE(role.direct && role.has_4_byte_gpte);
3681 
3682  sp = kvm_mmu_get_shadow_page(vcpu, gfn, role);
3683  ++sp->root_count;
3684 
3685  return __pa(sp->spt);
3686 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_alloc_shadow_roots()

static int mmu_alloc_shadow_roots ( struct kvm_vcpu *  vcpu)
static

Definition at line 3796 of file mmu.c.

3797 {
3798  struct kvm_mmu *mmu = vcpu->arch.mmu;
3799  u64 pdptrs[4], pm_mask;
3800  gfn_t root_gfn, root_pgd;
3801  int quadrant, i, r;
3802  hpa_t root;
3803 
3804  root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
3805  root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
3806 
3807  if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
3808  mmu->root.hpa = kvm_mmu_get_dummy_root();
3809  return 0;
3810  }
3811 
3812  /*
3813  * On SVM, reading PDPTRs might access guest memory, which might fault
3814  * and thus might sleep. Grab the PDPTRs before acquiring mmu_lock.
3815  */
3816  if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) {
3817  for (i = 0; i < 4; ++i) {
3818  pdptrs[i] = mmu->get_pdptr(vcpu, i);
3819  if (!(pdptrs[i] & PT_PRESENT_MASK))
3820  continue;
3821 
3822  if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT))
3823  pdptrs[i] = 0;
3824  }
3825  }
3826 
3827  r = mmu_first_shadow_root_alloc(vcpu->kvm);
3828  if (r)
3829  return r;
3830 
3831  write_lock(&vcpu->kvm->mmu_lock);
3832  r = make_mmu_pages_available(vcpu);
3833  if (r < 0)
3834  goto out_unlock;
3835 
3836  /*
3837  * Do we shadow a long mode page table? If so we need to
3838  * write-protect the guests page table root.
3839  */
3840  if (mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
3841  root = mmu_alloc_root(vcpu, root_gfn, 0,
3842  mmu->root_role.level);
3843  mmu->root.hpa = root;
3844  goto set_root_pgd;
3845  }
3846 
3847  if (WARN_ON_ONCE(!mmu->pae_root)) {
3848  r = -EIO;
3849  goto out_unlock;
3850  }
3851 
3852  /*
3853  * We shadow a 32 bit page table. This may be a legacy 2-level
3854  * or a PAE 3-level page table. In either case we need to be aware that
3855  * the shadow page table may be a PAE or a long mode page table.
3856  */
3857  pm_mask = PT_PRESENT_MASK | shadow_me_value;
3858  if (mmu->root_role.level >= PT64_ROOT_4LEVEL) {
3860 
3861  if (WARN_ON_ONCE(!mmu->pml4_root)) {
3862  r = -EIO;
3863  goto out_unlock;
3864  }
3865  mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
3866 
3867  if (mmu->root_role.level == PT64_ROOT_5LEVEL) {
3868  if (WARN_ON_ONCE(!mmu->pml5_root)) {
3869  r = -EIO;
3870  goto out_unlock;
3871  }
3872  mmu->pml5_root[0] = __pa(mmu->pml4_root) | pm_mask;
3873  }
3874  }
3875 
3876  for (i = 0; i < 4; ++i) {
3877  WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i]));
3878 
3879  if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) {
3880  if (!(pdptrs[i] & PT_PRESENT_MASK)) {
3881  mmu->pae_root[i] = INVALID_PAE_ROOT;
3882  continue;
3883  }
3884  root_gfn = pdptrs[i] >> PAGE_SHIFT;
3885  }
3886 
3887  /*
3888  * If shadowing 32-bit non-PAE page tables, each PAE page
3889  * directory maps one quarter of the guest's non-PAE page
3890  * directory. Othwerise each PAE page direct shadows one guest
3891  * PAE page directory so that quadrant should be 0.
3892  */
3893  quadrant = (mmu->cpu_role.base.level == PT32_ROOT_LEVEL) ? i : 0;
3894 
3895  root = mmu_alloc_root(vcpu, root_gfn, quadrant, PT32_ROOT_LEVEL);
3896  mmu->pae_root[i] = root | pm_mask;
3897  }
3898 
3899  if (mmu->root_role.level == PT64_ROOT_5LEVEL)
3900  mmu->root.hpa = __pa(mmu->pml5_root);
3901  else if (mmu->root_role.level == PT64_ROOT_4LEVEL)
3902  mmu->root.hpa = __pa(mmu->pml4_root);
3903  else
3904  mmu->root.hpa = __pa(mmu->pae_root);
3905 
3906 set_root_pgd:
3907  mmu->root.pgd = root_pgd;
3908 out_unlock:
3909  write_unlock(&vcpu->kvm->mmu_lock);
3910 
3911  return r;
3912 }
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
Definition: kvm_main.c:2677
static int mmu_first_shadow_root_alloc(struct kvm *kvm)
Definition: mmu.c:3735
#define PT_ACCESSED_MASK
Definition: mmu.h:20
#define PT_USER_MASK
Definition: mmu.h:16
static hpa_t kvm_mmu_get_dummy_root(void)
Definition: mmu_internal.h:40
#define __PT_BASE_ADDR_MASK
Definition: mmu_internal.h:16
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_alloc_special_roots()

static int mmu_alloc_special_roots ( struct kvm_vcpu *  vcpu)
static

Definition at line 3914 of file mmu.c.

3915 {
3916  struct kvm_mmu *mmu = vcpu->arch.mmu;
3917  bool need_pml5 = mmu->root_role.level > PT64_ROOT_4LEVEL;
3918  u64 *pml5_root = NULL;
3919  u64 *pml4_root = NULL;
3920  u64 *pae_root;
3921 
3922  /*
3923  * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
3924  * tables are allocated and initialized at root creation as there is no
3925  * equivalent level in the guest's NPT to shadow. Allocate the tables
3926  * on demand, as running a 32-bit L1 VMM on 64-bit KVM is very rare.
3927  */
3928  if (mmu->root_role.direct ||
3929  mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL ||
3930  mmu->root_role.level < PT64_ROOT_4LEVEL)
3931  return 0;
3932 
3933  /*
3934  * NPT, the only paging mode that uses this horror, uses a fixed number
3935  * of levels for the shadow page tables, e.g. all MMUs are 4-level or
3936  * all MMus are 5-level. Thus, this can safely require that pml5_root
3937  * is allocated if the other roots are valid and pml5 is needed, as any
3938  * prior MMU would also have required pml5.
3939  */
3940  if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root))
3941  return 0;
3942 
3943  /*
3944  * The special roots should always be allocated in concert. Yell and
3945  * bail if KVM ends up in a state where only one of the roots is valid.
3946  */
3947  if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
3948  (need_pml5 && mmu->pml5_root)))
3949  return -EIO;
3950 
3951  /*
3952  * Unlike 32-bit NPT, the PDP table doesn't need to be in low mem, and
3953  * doesn't need to be decrypted.
3954  */
3955  pae_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3956  if (!pae_root)
3957  return -ENOMEM;
3958 
3959 #ifdef CONFIG_X86_64
3960  pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3961  if (!pml4_root)
3962  goto err_pml4;
3963 
3964  if (need_pml5) {
3965  pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3966  if (!pml5_root)
3967  goto err_pml5;
3968  }
3969 #endif
3970 
3971  mmu->pae_root = pae_root;
3972  mmu->pml4_root = pml4_root;
3973  mmu->pml5_root = pml5_root;
3974 
3975  return 0;
3976 
3977 #ifdef CONFIG_X86_64
3978 err_pml5:
3979  free_page((unsigned long)pml4_root);
3980 err_pml4:
3981  free_page((unsigned long)pae_root);
3982  return -ENOMEM;
3983 #endif
3984 }
Here is the caller graph for this function:

◆ mmu_destroy_caches()

static void mmu_destroy_caches ( void  )
static

Definition at line 6926 of file mmu.c.

6927 {
6928  kmem_cache_destroy(pte_list_desc_cache);
6929  kmem_cache_destroy(mmu_page_header_cache);
6930 }
Here is the caller graph for this function:

◆ mmu_first_shadow_root_alloc()

static int mmu_first_shadow_root_alloc ( struct kvm *  kvm)
static

Definition at line 3735 of file mmu.c.

3736 {
3737  struct kvm_memslots *slots;
3738  struct kvm_memory_slot *slot;
3739  int r = 0, i, bkt;
3740 
3741  /*
3742  * Check if this is the first shadow root being allocated before
3743  * taking the lock.
3744  */
3745  if (kvm_shadow_root_allocated(kvm))
3746  return 0;
3747 
3748  mutex_lock(&kvm->slots_arch_lock);
3749 
3750  /* Recheck, under the lock, whether this is the first shadow root. */
3751  if (kvm_shadow_root_allocated(kvm))
3752  goto out_unlock;
3753 
3754  /*
3755  * Check if anything actually needs to be allocated, e.g. all metadata
3756  * will be allocated upfront if TDP is disabled.
3757  */
3758  if (kvm_memslots_have_rmaps(kvm) &&
3760  goto out_success;
3761 
3762  for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
3763  slots = __kvm_memslots(kvm, i);
3764  kvm_for_each_memslot(slot, bkt, slots) {
3765  /*
3766  * Both of these functions are no-ops if the target is
3767  * already allocated, so unconditionally calling both
3768  * is safe. Intentionally do NOT free allocations on
3769  * failure to avoid having to track which allocations
3770  * were made now versus when the memslot was created.
3771  * The metadata is guaranteed to be freed when the slot
3772  * is freed, and will be kept/used if userspace retries
3773  * KVM_RUN instead of killing the VM.
3774  */
3775  r = memslot_rmap_alloc(slot, slot->npages);
3776  if (r)
3777  goto out_unlock;
3779  if (r)
3780  goto out_unlock;
3781  }
3782  }
3783 
3784  /*
3785  * Ensure that shadow_root_allocated becomes true strictly after
3786  * all the related pointers are set.
3787  */
3788 out_success:
3789  smp_store_release(&kvm->arch.shadow_root_allocated, true);
3790 
3791 out_unlock:
3792  mutex_unlock(&kvm->slots_arch_lock);
3793  return r;
3794 }
static bool kvm_shadow_root_allocated(struct kvm *kvm)
Definition: mmu.h:262
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
Definition: page_track.c:57
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
Definition: page_track.c:23
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
Definition: x86.c:12749
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_free_memory_caches()

static void mmu_free_memory_caches ( struct kvm_vcpu *  vcpu)
static

Definition at line 702 of file mmu.c.

703 {
704  kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
705  kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
706  kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
707  kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
708 }
Here is the caller graph for this function:

◆ mmu_free_pte_list_desc()

static void mmu_free_pte_list_desc ( struct pte_list_desc pte_list_desc)
static

Definition at line 710 of file mmu.c.

711 {
712  kmem_cache_free(pte_list_desc_cache, pte_list_desc);
713 }
Here is the caller graph for this function:

◆ mmu_free_root_page()

static void mmu_free_root_page ( struct kvm *  kvm,
hpa_t *  root_hpa,
struct list_head *  invalid_list 
)
static

Definition at line 3566 of file mmu.c.

3568 {
3569  struct kvm_mmu_page *sp;
3570 
3571  if (!VALID_PAGE(*root_hpa))
3572  return;
3573 
3574  sp = root_to_sp(*root_hpa);
3575  if (WARN_ON_ONCE(!sp))
3576  return;
3577 
3578  if (is_tdp_mmu_page(sp))
3579  kvm_tdp_mmu_put_root(kvm, sp);
3580  else if (!--sp->root_count && sp->role.invalid)
3581  kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
3582 
3583  *root_hpa = INVALID_PAGE;
3584 }
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
Definition: tdp_mmu.c:76
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_free_vm_memory_caches()

static void mmu_free_vm_memory_caches ( struct kvm *  kvm)
static

Definition at line 6323 of file mmu.c.

6324 {
6325  kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
6326  kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
6327  kvm_mmu_free_memory_cache(&kvm->arch.split_shadow_page_cache);
6328 }
Here is the caller graph for this function:

◆ mmu_page_add_parent_pte()

static void mmu_page_add_parent_pte ( struct kvm_mmu_memory_cache *  cache,
struct kvm_mmu_page sp,
u64 *  parent_pte 
)
static

Definition at line 1754 of file mmu.c.

1756 {
1757  if (!parent_pte)
1758  return;
1759 
1760  pte_list_add(cache, parent_pte, &sp->parent_ptes);
1761 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_page_remove_parent_pte()

static void mmu_page_remove_parent_pte ( struct kvm *  kvm,
struct kvm_mmu_page sp,
u64 *  parent_pte 
)
static

Definition at line 1763 of file mmu.c.

1765 {
1766  pte_list_remove(kvm, parent_pte, &sp->parent_ptes);
1767 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_page_zap_pte()

static int mmu_page_zap_pte ( struct kvm *  kvm,
struct kvm_mmu_page sp,
u64 *  spte,
struct list_head *  invalid_list 
)
static

Definition at line 2493 of file mmu.c.

2495 {
2496  u64 pte;
2497  struct kvm_mmu_page *child;
2498 
2499  pte = *spte;
2500  if (is_shadow_present_pte(pte)) {
2501  if (is_last_spte(pte, sp->role.level)) {
2502  drop_spte(kvm, spte);
2503  } else {
2504  child = spte_to_child_sp(pte);
2505  drop_parent_pte(kvm, child, spte);
2506 
2507  /*
2508  * Recursively zap nested TDP SPs, parentless SPs are
2509  * unlikely to be used again in the near future. This
2510  * avoids retaining a large number of stale nested SPs.
2511  */
2512  if (tdp_enabled && invalid_list &&
2513  child->role.guest_mode && !child->parent_ptes.val)
2514  return kvm_mmu_prepare_zap_page(kvm, child,
2515  invalid_list);
2516  }
2517  } else if (is_mmio_spte(pte)) {
2519  }
2520  return 0;
2521 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_pages_add()

static int mmu_pages_add ( struct kvm_mmu_pages pvec,
struct kvm_mmu_page sp,
int  idx 
)
static

Definition at line 1809 of file mmu.c.

1811 {
1812  int i;
1813 
1814  if (sp->unsync)
1815  for (i=0; i < pvec->nr; i++)
1816  if (pvec->page[i].sp == sp)
1817  return 0;
1818 
1819  pvec->page[pvec->nr].sp = sp;
1820  pvec->page[pvec->nr].idx = idx;
1821  pvec->nr++;
1822  return (pvec->nr == KVM_PAGE_ARRAY_NR);
1823 }
#define KVM_PAGE_ARRAY_NR
Definition: mmu.c:1799
struct kvm_mmu_page * sp
Definition: mmu.c:1803
struct kvm_mmu_pages::mmu_page_and_offset page[KVM_PAGE_ARRAY_NR]
unsigned int nr
Definition: mmu.c:1806
Here is the caller graph for this function:

◆ mmu_pages_clear_parents()

static void mmu_pages_clear_parents ( struct mmu_page_path parents)
static

Definition at line 2076 of file mmu.c.

2077 {
2078  struct kvm_mmu_page *sp;
2079  unsigned int level = 0;
2080 
2081  do {
2082  unsigned int idx = parents->idx[level];
2083  sp = parents->parent[level];
2084  if (!sp)
2085  return;
2086 
2087  WARN_ON_ONCE(idx == INVALID_INDEX);
2088  clear_unsync_child_bit(sp, idx);
2089  level++;
2090  } while (!sp->unsync_children);
2091 }
#define INVALID_INDEX
Definition: mmu.c:1871
struct kvm_mmu_page * parent[PT64_ROOT_MAX_LEVEL]
Definition: mmu.c:2022
unsigned int idx[PT64_ROOT_MAX_LEVEL]
Definition: mmu.c:2023
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_pages_first()

static int mmu_pages_first ( struct kvm_mmu_pages pvec,
struct mmu_page_path parents 
)
static

Definition at line 2052 of file mmu.c.

2054 {
2055  struct kvm_mmu_page *sp;
2056  int level;
2057 
2058  if (pvec->nr == 0)
2059  return 0;
2060 
2061  WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX);
2062 
2063  sp = pvec->page[0].sp;
2064  level = sp->role.level;
2065  WARN_ON_ONCE(level == PG_LEVEL_4K);
2066 
2067  parents->parent[level-2] = sp;
2068 
2069  /* Also set up a sentinel. Further entries in pvec are all
2070  * children of sp, so this element is never overwritten.
2071  */
2072  parents->parent[level-1] = NULL;
2073  return mmu_pages_next(pvec, parents, 0);
2074 }
Here is the call graph for this function:

◆ mmu_pages_next()

static int mmu_pages_next ( struct kvm_mmu_pages pvec,
struct mmu_page_path parents,
int  i 
)
static

Definition at line 2031 of file mmu.c.

2034 {
2035  int n;
2036 
2037  for (n = i+1; n < pvec->nr; n++) {
2038  struct kvm_mmu_page *sp = pvec->page[n].sp;
2039  unsigned idx = pvec->page[n].idx;
2040  int level = sp->role.level;
2041 
2042  parents->idx[level-1] = idx;
2043  if (level == PG_LEVEL_4K)
2044  break;
2045 
2046  parents->parent[level-2] = sp;
2047  }
2048 
2049  return n;
2050 }
Here is the caller graph for this function:

◆ mmu_pte_write_fetch_gpte()

static u64 mmu_pte_write_fetch_gpte ( struct kvm_vcpu *  vcpu,
gpa_t *  gpa,
int *  bytes 
)
static

Definition at line 5682 of file mmu.c.

5684 {
5685  u64 gentry = 0;
5686  int r;
5687 
5688  /*
5689  * Assume that the pte write on a page table of the same type
5690  * as the current vcpu paging mode since we update the sptes only
5691  * when they have the same mode.
5692  */
5693  if (is_pae(vcpu) && *bytes == 4) {
5694  /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
5695  *gpa &= ~(gpa_t)7;
5696  *bytes = 8;
5697  }
5698 
5699  if (*bytes == 4 || *bytes == 8) {
5700  r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
5701  if (r)
5702  gentry = 0;
5703  }
5704 
5705  return gentry;
5706 }
int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len)
Definition: kvm_main.c:3403
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_set_spte()

static int mmu_set_spte ( struct kvm_vcpu *  vcpu,
struct kvm_memory_slot *  slot,
u64 *  sptep,
unsigned int  pte_access,
gfn_t  gfn,
kvm_pfn_t  pfn,
struct kvm_page_fault fault 
)
static

Definition at line 2906 of file mmu.c.

2909 {
2910  struct kvm_mmu_page *sp = sptep_to_sp(sptep);
2911  int level = sp->role.level;
2912  int was_rmapped = 0;
2913  int ret = RET_PF_FIXED;
2914  bool flush = false;
2915  bool wrprot;
2916  u64 spte;
2917 
2918  /* Prefetching always gets a writable pfn. */
2919  bool host_writable = !fault || fault->map_writable;
2920  bool prefetch = !fault || fault->prefetch;
2921  bool write_fault = fault && fault->write;
2922 
2923  if (unlikely(is_noslot_pfn(pfn))) {
2924  vcpu->stat.pf_mmio_spte_created++;
2925  mark_mmio_spte(vcpu, sptep, gfn, pte_access);
2926  return RET_PF_EMULATE;
2927  }
2928 
2929  if (is_shadow_present_pte(*sptep)) {
2930  /*
2931  * If we overwrite a PTE page pointer with a 2MB PMD, unlink
2932  * the parent of the now unreachable PTE.
2933  */
2934  if (level > PG_LEVEL_4K && !is_large_pte(*sptep)) {
2935  struct kvm_mmu_page *child;
2936  u64 pte = *sptep;
2937 
2938  child = spte_to_child_sp(pte);
2939  drop_parent_pte(vcpu->kvm, child, sptep);
2940  flush = true;
2941  } else if (pfn != spte_to_pfn(*sptep)) {
2942  drop_spte(vcpu->kvm, sptep);
2943  flush = true;
2944  } else
2945  was_rmapped = 1;
2946  }
2947 
2948  wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch,
2949  true, host_writable, &spte);
2950 
2951  if (*sptep == spte) {
2952  ret = RET_PF_SPURIOUS;
2953  } else {
2954  flush |= mmu_spte_update(sptep, spte);
2955  trace_kvm_mmu_set_spte(level, gfn, sptep);
2956  }
2957 
2958  if (wrprot) {
2959  if (write_fault)
2960  ret = RET_PF_EMULATE;
2961  }
2962 
2963  if (flush)
2964  kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
2965 
2966  if (!was_rmapped) {
2967  WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
2968  rmap_add(vcpu, slot, sptep, gfn, pte_access);
2969  } else {
2970  /* Already rmapped but the pte_access bits may have changed. */
2971  kvm_mmu_page_set_access(sp, spte_index(sptep), pte_access);
2972  }
2973 
2974  return ret;
2975 }
static bool mmu_spte_update(u64 *sptep, u64 new_spte)
Definition: mmu.c:520
static void kvm_mmu_page_set_access(struct kvm_mmu_page *sp, int index, unsigned int access)
Definition: mmu.c:773
static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access)
Definition: mmu.c:1665
static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, unsigned int access)
Definition: mmu.c:292
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool prefetch, bool can_unsync, bool host_writable, u64 *new_spte)
Definition: spte.c:137
static kvm_pfn_t spte_to_pfn(u64 pte)
Definition: spte.h:328
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_shrink_count()

static unsigned long mmu_shrink_count ( struct shrinker *  shrink,
struct shrink_control *  sc 
)
static

Definition at line 6918 of file mmu.c.

6920 {
6921  return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
6922 }
Here is the caller graph for this function:

◆ mmu_shrink_scan()

static unsigned long mmu_shrink_scan ( struct shrinker *  shrink,
struct shrink_control *  sc 
)
static

Definition at line 6859 of file mmu.c.

6861 {
6862  struct kvm *kvm;
6863  int nr_to_scan = sc->nr_to_scan;
6864  unsigned long freed = 0;
6865 
6866  mutex_lock(&kvm_lock);
6867 
6868  list_for_each_entry(kvm, &vm_list, vm_list) {
6869  int idx;
6870  LIST_HEAD(invalid_list);
6871 
6872  /*
6873  * Never scan more than sc->nr_to_scan VM instances.
6874  * Will not hit this condition practically since we do not try
6875  * to shrink more than one VM and it is very unlikely to see
6876  * !n_used_mmu_pages so many times.
6877  */
6878  if (!nr_to_scan--)
6879  break;
6880  /*
6881  * n_used_mmu_pages is accessed without holding kvm->mmu_lock
6882  * here. We may skip a VM instance errorneosly, but we do not
6883  * want to shrink a VM that only started to populate its MMU
6884  * anyway.
6885  */
6886  if (!kvm->arch.n_used_mmu_pages &&
6888  continue;
6889 
6890  idx = srcu_read_lock(&kvm->srcu);
6891  write_lock(&kvm->mmu_lock);
6892 
6893  if (kvm_has_zapped_obsolete_pages(kvm)) {
6895  &kvm->arch.zapped_obsolete_pages);
6896  goto unlock;
6897  }
6898 
6899  freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
6900 
6901 unlock:
6902  write_unlock(&kvm->mmu_lock);
6903  srcu_read_unlock(&kvm->srcu, idx);
6904 
6905  /*
6906  * unfair on small ones
6907  * per-vm shrinkers cry out
6908  * sadness comes quickly
6909  */
6910  list_move_tail(&kvm->vm_list, &vm_list);
6911  break;
6912  }
6913 
6914  mutex_unlock(&kvm_lock);
6915  return freed;
6916 }
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
Definition: mmu.c:6299
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_spte_age()

static bool mmu_spte_age ( u64 *  sptep)
static

Definition at line 615 of file mmu.c.

616 {
617  u64 spte = mmu_spte_get_lockless(sptep);
618 
619  if (!is_accessed_spte(spte))
620  return false;
621 
622  if (spte_ad_enabled(spte)) {
623  clear_bit((ffs(shadow_accessed_mask) - 1),
624  (unsigned long *)sptep);
625  } else {
626  /*
627  * Capture the dirty status of the page, so that it doesn't get
628  * lost when the SPTE is marked for access tracking.
629  */
630  if (is_writable_pte(spte))
632 
633  spte = mark_spte_for_access_track(spte);
634  mmu_spte_update_no_track(sptep, spte);
635  }
636 
637  return true;
638 }
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
Definition: kvm_main.c:3285
static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
Definition: mmu.c:489
u64 __read_mostly shadow_accessed_mask
Definition: spte.c:32
u64 mark_spte_for_access_track(u64 spte)
Definition: spte.c:341
static bool spte_ad_enabled(u64 spte)
Definition: spte.h:279
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_spte_clear_no_track()

static void mmu_spte_clear_no_track ( u64 *  sptep)
static

Definition at line 604 of file mmu.c.

605 {
606  __update_clear_spte_fast(sptep, 0ull);
607 }
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
Definition: mmu.c:396
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_spte_clear_track_bits()

static u64 mmu_spte_clear_track_bits ( struct kvm *  kvm,
u64 *  sptep 
)
static

Definition at line 561 of file mmu.c.

562 {
563  kvm_pfn_t pfn;
564  u64 old_spte = *sptep;
565  int level = sptep_to_sp(sptep)->role.level;
566  struct page *page;
567 
568  if (!is_shadow_present_pte(old_spte) ||
569  !spte_has_volatile_bits(old_spte))
570  __update_clear_spte_fast(sptep, 0ull);
571  else
572  old_spte = __update_clear_spte_slow(sptep, 0ull);
573 
574  if (!is_shadow_present_pte(old_spte))
575  return old_spte;
576 
577  kvm_update_page_stats(kvm, level, -1);
578 
579  pfn = spte_to_pfn(old_spte);
580 
581  /*
582  * KVM doesn't hold a reference to any pages mapped into the guest, and
583  * instead uses the mmu_notifier to ensure that KVM unmaps any pages
584  * before they are reclaimed. Sanity check that, if the pfn is backed
585  * by a refcounted page, the refcount is elevated.
586  */
587  page = kvm_pfn_to_refcounted_page(pfn);
588  WARN_ON_ONCE(page && !page_count(page));
589 
590  if (is_accessed_spte(old_spte))
592 
593  if (is_dirty_spte(old_spte))
594  kvm_set_pfn_dirty(pfn);
595 
596  return old_spte;
597 }
struct page * kvm_pfn_to_refcounted_page(kvm_pfn_t pfn)
Definition: kvm_main.c:179
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
Definition: kvm_main.c:3295
static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
Definition: mmu.c:415
bool spte_has_volatile_bits(u64 spte)
Definition: spte.c:114
static bool is_dirty_spte(u64 spte)
Definition: spte.h:341
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_spte_get_lockless()

static u64 mmu_spte_get_lockless ( u64 *  sptep)
static

Definition at line 609 of file mmu.c.

610 {
611  return __get_spte_lockless(sptep);
612 }
static u64 __get_spte_lockless(u64 *sptep)
Definition: mmu.c:449
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_spte_set()

static void mmu_spte_set ( u64 *  sptep,
u64  spte 
)
static

Definition at line 479 of file mmu.c.

480 {
481  WARN_ON_ONCE(is_shadow_present_pte(*sptep));
482  __set_spte(sptep, new_spte);
483 }
static void __set_spte(u64 *sptep, u64 spte)
Definition: mmu.c:377
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_spte_update()

static bool mmu_spte_update ( u64 *  sptep,
u64  new_spte 
)
static

Definition at line 520 of file mmu.c.

521 {
522  bool flush = false;
523  u64 old_spte = mmu_spte_update_no_track(sptep, new_spte);
524 
525  if (!is_shadow_present_pte(old_spte))
526  return false;
527 
528  /*
529  * For the spte updated out of mmu-lock is safe, since
530  * we always atomically update it, see the comments in
531  * spte_has_volatile_bits().
532  */
533  if (is_mmu_writable_spte(old_spte) &&
534  !is_writable_pte(new_spte))
535  flush = true;
536 
537  /*
538  * Flush TLB when accessed/dirty states are changed in the page tables,
539  * to guarantee consistency between TLB and page tables.
540  */
541 
542  if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) {
543  flush = true;
545  }
546 
547  if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) {
548  flush = true;
549  kvm_set_pfn_dirty(spte_to_pfn(old_spte));
550  }
551 
552  return flush;
553 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_spte_update_no_track()

static u64 mmu_spte_update_no_track ( u64 *  sptep,
u64  new_spte 
)
static

Definition at line 489 of file mmu.c.

490 {
491  u64 old_spte = *sptep;
492 
493  WARN_ON_ONCE(!is_shadow_present_pte(new_spte));
495 
496  if (!is_shadow_present_pte(old_spte)) {
497  mmu_spte_set(sptep, new_spte);
498  return old_spte;
499  }
500 
501  if (!spte_has_volatile_bits(old_spte))
502  __update_clear_spte_fast(sptep, new_spte);
503  else
504  old_spte = __update_clear_spte_slow(sptep, new_spte);
505 
506  WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
507 
508  return old_spte;
509 }
static void check_spte_writable_invariants(u64 spte)
Definition: spte.h:447
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_sync_children()

static int mmu_sync_children ( struct kvm_vcpu *  vcpu,
struct kvm_mmu_page parent,
bool  can_yield 
)
static

Definition at line 2093 of file mmu.c.

2095 {
2096  int i;
2097  struct kvm_mmu_page *sp;
2098  struct mmu_page_path parents;
2099  struct kvm_mmu_pages pages;
2100  LIST_HEAD(invalid_list);
2101  bool flush = false;
2102 
2103  while (mmu_unsync_walk(parent, &pages)) {
2104  bool protected = false;
2105 
2106  for_each_sp(pages, sp, parents, i)
2107  protected |= kvm_vcpu_write_protect_gfn(vcpu, sp->gfn);
2108 
2109  if (protected) {
2110  kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, true);
2111  flush = false;
2112  }
2113 
2114  for_each_sp(pages, sp, parents, i) {
2115  kvm_unlink_unsync_page(vcpu->kvm, sp);
2116  flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0;
2117  mmu_pages_clear_parents(&parents);
2118  }
2119  if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
2120  kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush);
2121  if (!can_yield) {
2122  kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
2123  return -EINTR;
2124  }
2125 
2126  cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
2127  flush = false;
2128  }
2129  }
2130 
2131  kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush);
2132  return 0;
2133 }
static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn)
Definition: mmu.c:1436
static int mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec)
Definition: mmu.c:1873
static void mmu_pages_clear_parents(struct mmu_page_path *parents)
Definition: mmu.c:2076
#define for_each_sp(pvec, sp, parents, i)
Definition: mmu.c:2026
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_topup_memory_caches()

static int mmu_topup_memory_caches ( struct kvm_vcpu *  vcpu,
bool  maybe_indirect 
)
static

Definition at line 679 of file mmu.c.

680 {
681  int r;
682 
683  /* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */
684  r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
685  1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM);
686  if (r)
687  return r;
688  r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache,
689  PT64_ROOT_MAX_LEVEL);
690  if (r)
691  return r;
692  if (maybe_indirect) {
693  r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadowed_info_cache,
694  PT64_ROOT_MAX_LEVEL);
695  if (r)
696  return r;
697  }
698  return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
699  PT64_ROOT_MAX_LEVEL);
700 }
Here is the caller graph for this function:

◆ mmu_try_to_unsync_pages()

int mmu_try_to_unsync_pages ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
gfn_t  gfn,
bool  can_unsync,
bool  prefetch 
)

Definition at line 2805 of file mmu.c.

2807 {
2808  struct kvm_mmu_page *sp;
2809  bool locked = false;
2810 
2811  /*
2812  * Force write-protection if the page is being tracked. Note, the page
2813  * track machinery is used to write-protect upper-level shadow pages,
2814  * i.e. this guards the role.level == 4K assertion below!
2815  */
2816  if (kvm_gfn_is_write_tracked(kvm, slot, gfn))
2817  return -EPERM;
2818 
2819  /*
2820  * The page is not write-tracked, mark existing shadow pages unsync
2821  * unless KVM is synchronizing an unsync SP (can_unsync = false). In
2822  * that case, KVM must complete emulation of the guest TLB flush before
2823  * allowing shadow pages to become unsync (writable by the guest).
2824  */
2826  if (!can_unsync)
2827  return -EPERM;
2828 
2829  if (sp->unsync)
2830  continue;
2831 
2832  if (prefetch)
2833  return -EEXIST;
2834 
2835  /*
2836  * TDP MMU page faults require an additional spinlock as they
2837  * run with mmu_lock held for read, not write, and the unsync
2838  * logic is not thread safe. Take the spinklock regardless of
2839  * the MMU type to avoid extra conditionals/parameters, there's
2840  * no meaningful penalty if mmu_lock is held for write.
2841  */
2842  if (!locked) {
2843  locked = true;
2844  spin_lock(&kvm->arch.mmu_unsync_pages_lock);
2845 
2846  /*
2847  * Recheck after taking the spinlock, a different vCPU
2848  * may have since marked the page unsync. A false
2849  * negative on the unprotected check above is not
2850  * possible as clearing sp->unsync _must_ hold mmu_lock
2851  * for write, i.e. unsync cannot transition from 1->0
2852  * while this CPU holds mmu_lock for read (or write).
2853  */
2854  if (READ_ONCE(sp->unsync))
2855  continue;
2856  }
2857 
2858  WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
2859  kvm_unsync_page(kvm, sp);
2860  }
2861  if (locked)
2862  spin_unlock(&kvm->arch.mmu_unsync_pages_lock);
2863 
2864  /*
2865  * We need to ensure that the marking of unsync pages is visible
2866  * before the SPTE is updated to allow writes because
2867  * kvm_mmu_sync_roots() checks the unsync flags without holding
2868  * the MMU lock and so can race with this. If the SPTE was updated
2869  * before the page had been marked as unsync-ed, something like the
2870  * following could happen:
2871  *
2872  * CPU 1 CPU 2
2873  * ---------------------------------------------------------------------
2874  * 1.2 Host updates SPTE
2875  * to be writable
2876  * 2.1 Guest writes a GPTE for GVA X.
2877  * (GPTE being in the guest page table shadowed
2878  * by the SP from CPU 1.)
2879  * This reads SPTE during the page table walk.
2880  * Since SPTE.W is read as 1, there is no
2881  * fault.
2882  *
2883  * 2.2 Guest issues TLB flush.
2884  * That causes a VM Exit.
2885  *
2886  * 2.3 Walking of unsync pages sees sp->unsync is
2887  * false and skips the page.
2888  *
2889  * 2.4 Guest accesses GVA X.
2890  * Since the mapping in the SP was not updated,
2891  * so the old mapping for GVA X incorrectly
2892  * gets used.
2893  * 1.1 Host marks SP
2894  * as unsync
2895  * (sp->unsync = true)
2896  *
2897  * The write barrier below ensures that 1.1 happens before 1.2 and thus
2898  * the situation in 2.4 does not arise. It pairs with the read barrier
2899  * in is_unsync_root(), placed between 2.1's load of SPTE.W and 2.3.
2900  */
2901  smp_wmb();
2902 
2903  return 0;
2904 }
static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:2790
bool kvm_gfn_is_write_tracked(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn)
Definition: page_track.c:123
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_unsync_walk()

static int mmu_unsync_walk ( struct kvm_mmu_page sp,
struct kvm_mmu_pages pvec 
)
static

Definition at line 1873 of file mmu.c.

1875 {
1876  pvec->nr = 0;
1877  if (!sp->unsync_children)
1878  return 0;
1879 
1880  mmu_pages_add(pvec, sp, INVALID_INDEX);
1881  return __mmu_unsync_walk(sp, pvec);
1882 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mmu_zap_unsync_children()

static int mmu_zap_unsync_children ( struct kvm *  kvm,
struct kvm_mmu_page parent,
struct list_head *  invalid_list 
)
static

Definition at line 2545 of file mmu.c.

2548 {
2549  int i, zapped = 0;
2550  struct mmu_page_path parents;
2551  struct kvm_mmu_pages pages;
2552 
2553  if (parent->role.level == PG_LEVEL_4K)
2554  return 0;
2555 
2556  while (mmu_unsync_walk(parent, &pages)) {
2557  struct kvm_mmu_page *sp;
2558 
2559  for_each_sp(pages, sp, parents, i) {
2560  kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
2561  mmu_pages_clear_parents(&parents);
2562  zapped++;
2563  }
2564  }
2565 
2566  return zapped;
2567 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ module_param_cb() [1/3]

module_param_cb ( nx_huge_pages  ,
nx_huge_pages_ops,
nx_huge_pages,
0644   
)

◆ module_param_cb() [2/3]

module_param_cb ( nx_huge_pages_recovery_period_ms  ,
nx_huge_pages_recovery_param_ops,
nx_huge_pages_recovery_period_ms,
0644   
)

◆ module_param_cb() [3/3]

module_param_cb ( nx_huge_pages_recovery_ratio  ,
nx_huge_pages_recovery_param_ops,
nx_huge_pages_recovery_ratio,
0644   
)

◆ module_param_named()

module_param_named ( flush_on_reuse  ,
force_flush_and_sync_on_reuse  ,
bool  ,
0644   
)

◆ need_topup()

static bool need_topup ( struct kvm_mmu_memory_cache *  cache,
int  min 
)
inlinestatic

Definition at line 6424 of file mmu.c.

6425 {
6426  return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
6427 }
Here is the caller graph for this function:

◆ need_topup_split_caches_or_resched()

static bool need_topup_split_caches_or_resched ( struct kvm *  kvm)
static

Definition at line 6429 of file mmu.c.

6430 {
6431  if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
6432  return true;
6433 
6434  /*
6435  * In the worst case, SPLIT_DESC_CACHE_MIN_NR_OBJECTS descriptors are needed
6436  * to split a single huge page. Calculating how many are actually needed
6437  * is possible but not worth the complexity.
6438  */
6439  return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) ||
6440  need_topup(&kvm->arch.split_page_header_cache, 1) ||
6441  need_topup(&kvm->arch.split_shadow_page_cache, 1);
6442 }
static bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
Definition: mmu.c:6424
Here is the call graph for this function:
Here is the caller graph for this function:

◆ nonpaging_gva_to_gpa()

static gpa_t nonpaging_gva_to_gpa ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  mmu,
gpa_t  vaddr,
u64  access,
struct x86_exception exception 
)
static

Definition at line 4075 of file mmu.c.

4078 {
4079  if (exception)
4080  exception->error_code = 0;
4081  return kvm_translate_gpa(vcpu, mmu, vaddr, access, exception);
4082 }
static gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t gpa, u64 access, struct x86_exception *exception)
Definition: mmu.h:313
Here is the call graph for this function:
Here is the caller graph for this function:

◆ nonpaging_init_context()

static void nonpaging_init_context ( struct kvm_mmu *  context)
static

Definition at line 4649 of file mmu.c.

4650 {
4651  context->page_fault = nonpaging_page_fault;
4652  context->gva_to_gpa = nonpaging_gva_to_gpa;
4653  context->sync_spte = NULL;
4654 }
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:4532
Here is the call graph for this function:
Here is the caller graph for this function:

◆ nonpaging_page_fault()

static int nonpaging_page_fault ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 4532 of file mmu.c.

4534 {
4535  /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
4536  fault->max_level = PG_LEVEL_2M;
4537  return direct_page_fault(vcpu, fault);
4538 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ page_fault_can_be_fast()

static bool page_fault_can_be_fast ( struct kvm_page_fault fault)
static

Definition at line 3341 of file mmu.c.

3342 {
3343  /*
3344  * Page faults with reserved bits set, i.e. faults on MMIO SPTEs, only
3345  * reach the common page fault handler if the SPTE has an invalid MMIO
3346  * generation number. Refreshing the MMIO generation needs to go down
3347  * the slow path. Note, EPT Misconfigs do NOT set the PRESENT flag!
3348  */
3349  if (fault->rsvd)
3350  return false;
3351 
3352  /*
3353  * #PF can be fast if:
3354  *
3355  * 1. The shadow page table entry is not present and A/D bits are
3356  * disabled _by KVM_, which could mean that the fault is potentially
3357  * caused by access tracking (if enabled). If A/D bits are enabled
3358  * by KVM, but disabled by L1 for L2, KVM is forced to disable A/D
3359  * bits for L2 and employ access tracking, but the fast page fault
3360  * mechanism only supports direct MMUs.
3361  * 2. The shadow page table entry is present, the access is a write,
3362  * and no reserved bits are set (MMIO SPTEs cannot be "fixed"), i.e.
3363  * the fault was caused by a write-protection violation. If the
3364  * SPTE is MMU-writable (determined later), the fault can be fixed
3365  * by setting the Writable bit, which can be done out of mmu_lock.
3366  */
3367  if (!fault->present)
3368  return !kvm_ad_enabled();
3369 
3370  /*
3371  * Note, instruction fetches and writes are mutually exclusive, ignore
3372  * the "exec" flag.
3373  */
3374  return fault->write;
3375 }
const bool present
Definition: mmu_internal.h:199
const bool rsvd
Definition: mmu_internal.h:200
Here is the call graph for this function:
Here is the caller graph for this function:

◆ page_fault_handle_page_track()

static bool page_fault_handle_page_track ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)
static

Definition at line 4208 of file mmu.c.

4210 {
4211  if (unlikely(fault->rsvd))
4212  return false;
4213 
4214  if (!fault->present || !fault->write)
4215  return false;
4216 
4217  /*
4218  * guest is writing the page which is write tracked which can
4219  * not be fixed by page fault handler.
4220  */
4221  if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn))
4222  return true;
4223 
4224  return false;
4225 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ paging32_init_context()

static void paging32_init_context ( struct kvm_mmu *  context)
static

Definition at line 5237 of file mmu.c.

5238 {
5239  context->page_fault = paging32_page_fault;
5240  context->gva_to_gpa = paging32_gva_to_gpa;
5241  context->sync_spte = paging32_sync_spte;
5242 }
Here is the caller graph for this function:

◆ paging64_init_context()

static void paging64_init_context ( struct kvm_mmu *  context)
static

Definition at line 5230 of file mmu.c.

5231 {
5232  context->page_fault = paging64_page_fault;
5233  context->gva_to_gpa = paging64_gva_to_gpa;
5234  context->sync_spte = paging64_sync_spte;
5235 }
Here is the caller graph for this function:

◆ pte_list_add()

static int pte_list_add ( struct kvm_mmu_memory_cache *  cache,
u64 *  spte,
struct kvm_rmap_head *  rmap_head 
)
static

Definition at line 933 of file mmu.c.

935 {
936  struct pte_list_desc *desc;
937  int count = 0;
938 
939  if (!rmap_head->val) {
940  rmap_head->val = (unsigned long)spte;
941  } else if (!(rmap_head->val & 1)) {
942  desc = kvm_mmu_memory_cache_alloc(cache);
943  desc->sptes[0] = (u64 *)rmap_head->val;
944  desc->sptes[1] = spte;
945  desc->spte_count = 2;
946  desc->tail_count = 0;
947  rmap_head->val = (unsigned long)desc | 1;
948  ++count;
949  } else {
950  desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
951  count = desc->tail_count + desc->spte_count;
952 
953  /*
954  * If the previous head is full, allocate a new head descriptor
955  * as tail descriptors are always kept full.
956  */
957  if (desc->spte_count == PTE_LIST_EXT) {
958  desc = kvm_mmu_memory_cache_alloc(cache);
959  desc->more = (struct pte_list_desc *)(rmap_head->val & ~1ul);
960  desc->spte_count = 0;
961  desc->tail_count = count;
962  rmap_head->val = (unsigned long)desc | 1;
963  }
964  desc->sptes[desc->spte_count++] = spte;
965  }
966  return count;
967 }
#define PTE_LIST_EXT
Definition: mmu.c:124
u32 tail_count
Definition: mmu.c:151
Here is the caller graph for this function:

◆ pte_list_count()

unsigned int pte_list_count ( struct kvm_rmap_head *  rmap_head)

Definition at line 1073 of file mmu.c.

1074 {
1075  struct pte_list_desc *desc;
1076 
1077  if (!rmap_head->val)
1078  return 0;
1079  else if (!(rmap_head->val & 1))
1080  return 1;
1081 
1082  desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
1083  return desc->tail_count + desc->spte_count;
1084 }
Here is the caller graph for this function:

◆ pte_list_desc_remove_entry()

static void pte_list_desc_remove_entry ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
struct pte_list_desc desc,
int  i 
)
static

Definition at line 969 of file mmu.c.

972 {
973  struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
974  int j = head_desc->spte_count - 1;
975 
976  /*
977  * The head descriptor should never be empty. A new head is added only
978  * when adding an entry and the previous head is full, and heads are
979  * removed (this flow) when they become empty.
980  */
981  KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
982 
983  /*
984  * Replace the to-be-freed SPTE with the last valid entry from the head
985  * descriptor to ensure that tail descriptors are full at all times.
986  * Note, this also means that tail_count is stable for each descriptor.
987  */
988  desc->sptes[i] = head_desc->sptes[j];
989  head_desc->sptes[j] = NULL;
990  head_desc->spte_count--;
991  if (head_desc->spte_count)
992  return;
993 
994  /*
995  * The head descriptor is empty. If there are no tail descriptors,
996  * nullify the rmap head to mark the list as empty, else point the rmap
997  * head at the next descriptor, i.e. the new head.
998  */
999  if (!head_desc->more)
1000  rmap_head->val = 0;
1001  else
1002  rmap_head->val = (unsigned long)head_desc->more | 1;
1003  mmu_free_pte_list_desc(head_desc);
1004 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ pte_list_remove()

static void pte_list_remove ( struct kvm *  kvm,
u64 *  spte,
struct kvm_rmap_head *  rmap_head 
)
static

Definition at line 1006 of file mmu.c.

1008 {
1009  struct pte_list_desc *desc;
1010  int i;
1011 
1012  if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
1013  return;
1014 
1015  if (!(rmap_head->val & 1)) {
1016  if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
1017  return;
1018 
1019  rmap_head->val = 0;
1020  } else {
1021  desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
1022  while (desc) {
1023  for (i = 0; i < desc->spte_count; ++i) {
1024  if (desc->sptes[i] == spte) {
1025  pte_list_desc_remove_entry(kvm, rmap_head,
1026  desc, i);
1027  return;
1028  }
1029  }
1030  desc = desc->more;
1031  }
1032 
1033  KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
1034  }
1035 }
static void pte_list_desc_remove_entry(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct pte_list_desc *desc, int i)
Definition: mmu.c:969
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reserved_hpa_bits()

static u64 reserved_hpa_bits ( void  )
inlinestatic

Definition at line 4974 of file mmu.c.

4975 {
4976  return rsvd_bits(shadow_phys_bits, 63);
4977 }
u8 __read_mostly shadow_phys_bits
Definition: spte.c:46
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_ept_shadow_zero_bits_mask()

static void reset_ept_shadow_zero_bits_mask ( struct kvm_mmu *  context,
bool  execonly 
)
static

Definition at line 5062 of file mmu.c.

5063 {
5064  __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
5065  reserved_hpa_bits(), execonly,
5066  max_huge_page_level);
5067 }
static u64 reserved_hpa_bits(void)
Definition: mmu.c:4974
static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, bool execonly, int huge_page_level)
Definition: mmu.c:4928
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_guest_paging_metadata()

static void reset_guest_paging_metadata ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  mmu 
)
static

Definition at line 5219 of file mmu.c.

5221 {
5222  if (!is_cr0_pg(mmu))
5223  return;
5224 
5225  reset_guest_rsvds_bits_mask(vcpu, mmu);
5226  update_permission_bitmask(mmu, false);
5227  update_pkru_bitmask(mmu);
5228 }
static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
Definition: mmu.c:4917
static void update_pkru_bitmask(struct kvm_mmu *mmu)
Definition: mmu.c:5175
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_guest_rsvds_bits_mask()

static void reset_guest_rsvds_bits_mask ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  context 
)
static

Definition at line 4917 of file mmu.c.

4919 {
4920  __reset_rsvds_bits_mask(&context->guest_rsvd_check,
4921  vcpu->arch.reserved_gpa_bits,
4922  context->cpu_role.base.level, is_efer_nx(context),
4923  guest_can_use(vcpu, X86_FEATURE_GBPAGES),
4924  is_cr4_pse(context),
4926 }
static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu, unsigned int x86_feature)
Definition: cpuid.h:278
static bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
Definition: cpuid.h:123
static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, int level, bool nx, bool gbpages, bool pse, bool amd)
Definition: mmu.c:4828
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_rsvds_bits_mask_ept()

static void reset_rsvds_bits_mask_ept ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  context,
bool  execonly,
int  huge_page_level 
)
static

Definition at line 4966 of file mmu.c.

4968 {
4969  __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
4970  vcpu->arch.reserved_gpa_bits, execonly,
4971  huge_page_level);
4972 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_shadow_zero_bits_mask()

static void reset_shadow_zero_bits_mask ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  context 
)
static

Definition at line 4984 of file mmu.c.

4986 {
4987  /* @amd adds a check on bit of SPTEs, which KVM shouldn't use anyways. */
4988  bool is_amd = true;
4989  /* KVM doesn't use 2-level page tables for the shadow MMU. */
4990  bool is_pse = false;
4991  struct rsvd_bits_validate *shadow_zero_check;
4992  int i;
4993 
4994  WARN_ON_ONCE(context->root_role.level < PT32E_ROOT_LEVEL);
4995 
4996  shadow_zero_check = &context->shadow_zero_check;
4997  __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
4998  context->root_role.level,
4999  context->root_role.efer_nx,
5000  guest_can_use(vcpu, X86_FEATURE_GBPAGES),
5001  is_pse, is_amd);
5002 
5003  if (!shadow_me_mask)
5004  return;
5005 
5006  for (i = context->root_role.level; --i >= 0;) {
5007  /*
5008  * So far shadow_me_value is a constant during KVM's life
5009  * time. Bits in shadow_me_value are allowed to be set.
5010  * Bits in shadow_me_mask but not in shadow_me_value are
5011  * not allowed to be set.
5012  */
5013  shadow_zero_check->rsvd_bits_mask[0][i] |= shadow_me_mask;
5014  shadow_zero_check->rsvd_bits_mask[1][i] |= shadow_me_mask;
5015  shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_value;
5016  shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_value;
5017  }
5018 
5019 }
u64 __read_mostly shadow_me_mask
Definition: spte.c:40
static bool is_pse(struct kvm_vcpu *vcpu)
Definition: x86.h:193
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_tdp_shadow_zero_bits_mask()

static void reset_tdp_shadow_zero_bits_mask ( struct kvm_mmu *  context)
static

Definition at line 5031 of file mmu.c.

5032 {
5033  struct rsvd_bits_validate *shadow_zero_check;
5034  int i;
5035 
5036  shadow_zero_check = &context->shadow_zero_check;
5037 
5038  if (boot_cpu_is_amd())
5039  __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
5040  context->root_role.level, true,
5041  boot_cpu_has(X86_FEATURE_GBPAGES),
5042  false, true);
5043  else
5044  __reset_rsvds_bits_mask_ept(shadow_zero_check,
5045  reserved_hpa_bits(), false,
5046  max_huge_page_level);
5047 
5048  if (!shadow_me_mask)
5049  return;
5050 
5051  for (i = context->root_role.level; --i >= 0;) {
5052  shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
5053  shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
5054  }
5055 }
static bool boot_cpu_is_amd(void)
Definition: mmu.c:5021
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rmap_add()

static void rmap_add ( struct kvm_vcpu *  vcpu,
const struct kvm_memory_slot *  slot,
u64 *  spte,
gfn_t  gfn,
unsigned int  access 
)
static

Definition at line 1665 of file mmu.c.

1667 {
1668  struct kvm_mmu_memory_cache *cache = &vcpu->arch.mmu_pte_list_desc_cache;
1669 
1670  __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access);
1671 }
static void __rmap_add(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access)
Definition: mmu.c:1641
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rmap_get_first()

static u64* rmap_get_first ( struct kvm_rmap_head *  rmap_head,
struct rmap_iterator iter 
)
static

Definition at line 1136 of file mmu.c.

1138 {
1139  u64 *sptep;
1140 
1141  if (!rmap_head->val)
1142  return NULL;
1143 
1144  if (!(rmap_head->val & 1)) {
1145  iter->desc = NULL;
1146  sptep = (u64 *)rmap_head->val;
1147  goto out;
1148  }
1149 
1150  iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
1151  iter->pos = 0;
1152  sptep = iter->desc->sptes[iter->pos];
1153 out:
1154  BUG_ON(!is_shadow_present_pte(*sptep));
1155  return sptep;
1156 }
int pos
Definition: mmu.c:1126
struct pte_list_desc * desc
Definition: mmu.c:1125
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rmap_get_next()

static u64* rmap_get_next ( struct rmap_iterator iter)
static

Definition at line 1163 of file mmu.c.

1164 {
1165  u64 *sptep;
1166 
1167  if (iter->desc) {
1168  if (iter->pos < PTE_LIST_EXT - 1) {
1169  ++iter->pos;
1170  sptep = iter->desc->sptes[iter->pos];
1171  if (sptep)
1172  goto out;
1173  }
1174 
1175  iter->desc = iter->desc->more;
1176 
1177  if (iter->desc) {
1178  iter->pos = 0;
1179  /* desc->sptes[0] cannot be NULL */
1180  sptep = iter->desc->sptes[iter->pos];
1181  goto out;
1182  }
1183  }
1184 
1185  return NULL;
1186 out:
1187  BUG_ON(!is_shadow_present_pte(*sptep));
1188  return sptep;
1189 }
Here is the call graph for this function:

◆ rmap_remove()

static void rmap_remove ( struct kvm *  kvm,
u64 *  spte 
)
static

Definition at line 1095 of file mmu.c.

1096 {
1097  struct kvm_memslots *slots;
1098  struct kvm_memory_slot *slot;
1099  struct kvm_mmu_page *sp;
1100  gfn_t gfn;
1101  struct kvm_rmap_head *rmap_head;
1102 
1103  sp = sptep_to_sp(spte);
1104  gfn = kvm_mmu_page_get_gfn(sp, spte_index(spte));
1105 
1106  /*
1107  * Unlike rmap_add, rmap_remove does not run in the context of a vCPU
1108  * so we have to determine which memslots to use based on context
1109  * information in sp->role.
1110  */
1111  slots = kvm_memslots_for_spte_role(kvm, sp->role);
1112 
1113  slot = __gfn_to_memslot(slots, gfn);
1114  rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
1115 
1116  pte_list_remove(kvm, spte, rmap_head);
1117 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rmap_walk_init_level()

static void rmap_walk_init_level ( struct slot_rmap_walk_iterator iterator,
int  level 
)
static

Definition at line 1511 of file mmu.c.

1513 {
1514  iterator->level = level;
1515  iterator->gfn = iterator->start_gfn;
1516  iterator->rmap = gfn_to_rmap(iterator->gfn, level, iterator->slot);
1517  iterator->end_rmap = gfn_to_rmap(iterator->end_gfn, level, iterator->slot);
1518 }
struct kvm_rmap_head * end_rmap
Definition: mmu.c:1508
struct kvm_rmap_head * rmap
Definition: mmu.c:1504
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rmap_write_protect()

static bool rmap_write_protect ( struct kvm_rmap_head *  rmap_head,
bool  pt_protect 
)
static

Definition at line 1244 of file mmu.c.

1246 {
1247  u64 *sptep;
1248  struct rmap_iterator iter;
1249  bool flush = false;
1250 
1251  for_each_rmap_spte(rmap_head, &iter, sptep)
1252  flush |= spte_write_protect(sptep, pt_protect);
1253 
1254  return flush;
1255 }
static bool spte_write_protect(u64 *sptep, bool pt_protect)
Definition: mmu.c:1229
Here is the call graph for this function:
Here is the caller graph for this function:

◆ set_nx_huge_pages()

static int set_nx_huge_pages ( const char *  val,
const struct kernel_param *  kp 
)
static

Definition at line 6951 of file mmu.c.

6952 {
6953  bool old_val = nx_huge_pages;
6954  bool new_val;
6955 
6957  return -EPERM;
6958 
6959  /* In "auto" mode deploy workaround only if CPU has the bug. */
6960  if (sysfs_streq(val, "off")) {
6961  new_val = 0;
6962  } else if (sysfs_streq(val, "force")) {
6963  new_val = 1;
6964  } else if (sysfs_streq(val, "auto")) {
6965  new_val = get_nx_auto_mode();
6966  } else if (sysfs_streq(val, "never")) {
6967  new_val = 0;
6968 
6969  mutex_lock(&kvm_lock);
6970  if (!list_empty(&vm_list)) {
6971  mutex_unlock(&kvm_lock);
6972  return -EBUSY;
6973  }
6975  mutex_unlock(&kvm_lock);
6976  } else if (kstrtobool(val, &new_val) < 0) {
6977  return -EINVAL;
6978  }
6979 
6980  __set_nx_huge_pages(new_val);
6981 
6982  if (new_val != old_val) {
6983  struct kvm *kvm;
6984 
6985  mutex_lock(&kvm_lock);
6986 
6987  list_for_each_entry(kvm, &vm_list, vm_list) {
6988  mutex_lock(&kvm->slots_lock);
6989  kvm_mmu_zap_all_fast(kvm);
6990  mutex_unlock(&kvm->slots_lock);
6991 
6992  wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
6993  }
6994  mutex_unlock(&kvm_lock);
6995  }
6996 
6997  return 0;
6998 }
Here is the call graph for this function:

◆ set_nx_huge_pages_recovery_param()

static int set_nx_huge_pages_recovery_param ( const char *  val,
const struct kernel_param *  kp 
)
static

Definition at line 7116 of file mmu.c.

7117 {
7118  bool was_recovery_enabled, is_recovery_enabled;
7119  uint old_period, new_period;
7120  int err;
7121 
7123  return -EPERM;
7124 
7125  was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
7126 
7127  err = param_set_uint(val, kp);
7128  if (err)
7129  return err;
7130 
7131  is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period);
7132 
7133  if (is_recovery_enabled &&
7134  (!was_recovery_enabled || old_period > new_period)) {
7135  struct kvm *kvm;
7136 
7137  mutex_lock(&kvm_lock);
7138 
7139  list_for_each_entry(kvm, &vm_list, vm_list)
7140  wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
7141 
7142  mutex_unlock(&kvm_lock);
7143  }
7144 
7145  return err;
7146 }
Here is the call graph for this function:

◆ shadow_mmu_get_sp_for_split()

static struct kvm_mmu_page* shadow_mmu_get_sp_for_split ( struct kvm *  kvm,
u64 *  huge_sptep 
)
static

Definition at line 6477 of file mmu.c.

6478 {
6479  struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
6480  struct shadow_page_caches caches = {};
6481  union kvm_mmu_page_role role;
6482  unsigned int access;
6483  gfn_t gfn;
6484 
6485  gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
6486  access = kvm_mmu_page_get_access(huge_sp, spte_index(huge_sptep));
6487 
6488  /*
6489  * Note, huge page splitting always uses direct shadow pages, regardless
6490  * of whether the huge page itself is mapped by a direct or indirect
6491  * shadow page, since the huge page region itself is being directly
6492  * mapped with smaller pages.
6493  */
6494  role = kvm_mmu_child_role(huge_sptep, /*direct=*/true, access);
6495 
6496  /* Direct SPs do not require a shadowed_info_cache. */
6497  caches.page_header_cache = &kvm->arch.split_page_header_cache;
6498  caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache;
6499 
6500  /* Safe to pass NULL for vCPU since requesting a direct SP. */
6501  return __kvm_mmu_get_shadow_page(kvm, NULL, &caches, gfn, role);
6502 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_mmu_init_context()

static void shadow_mmu_init_context ( struct kvm_vcpu *  vcpu,
struct kvm_mmu *  context,
union kvm_cpu_role  cpu_role,
union kvm_mmu_page_role  root_role 
)
static

Definition at line 5360 of file mmu.c.

5363 {
5364  if (cpu_role.as_u64 == context->cpu_role.as_u64 &&
5365  root_role.word == context->root_role.word)
5366  return;
5367 
5368  context->cpu_role.as_u64 = cpu_role.as_u64;
5369  context->root_role.word = root_role.word;
5370 
5371  if (!is_cr0_pg(context))
5372  nonpaging_init_context(context);
5373  else if (is_cr4_pae(context))
5374  paging64_init_context(context);
5375  else
5376  paging32_init_context(context);
5377 
5378  reset_guest_paging_metadata(vcpu, context);
5379  reset_shadow_zero_bits_mask(vcpu, context);
5380 }
static void paging32_init_context(struct kvm_mmu *context)
Definition: mmu.c:5237
static void paging64_init_context(struct kvm_mmu *context)
Definition: mmu.c:5230
static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
Definition: mmu.c:4984
static void nonpaging_init_context(struct kvm_mmu *context)
Definition: mmu.c:4649
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_mmu_split_huge_page()

static void shadow_mmu_split_huge_page ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
u64 *  huge_sptep 
)
static

Definition at line 6504 of file mmu.c.

6508 {
6509  struct kvm_mmu_memory_cache *cache = &kvm->arch.split_desc_cache;
6510  u64 huge_spte = READ_ONCE(*huge_sptep);
6511  struct kvm_mmu_page *sp;
6512  bool flush = false;
6513  u64 *sptep, spte;
6514  gfn_t gfn;
6515  int index;
6516 
6517  sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep);
6518 
6519  for (index = 0; index < SPTE_ENT_PER_PAGE; index++) {
6520  sptep = &sp->spt[index];
6521  gfn = kvm_mmu_page_get_gfn(sp, index);
6522 
6523  /*
6524  * The SP may already have populated SPTEs, e.g. if this huge
6525  * page is aliased by multiple sptes with the same access
6526  * permissions. These entries are guaranteed to map the same
6527  * gfn-to-pfn translation since the SP is direct, so no need to
6528  * modify them.
6529  *
6530  * However, if a given SPTE points to a lower level page table,
6531  * that lower level page table may only be partially populated.
6532  * Installing such SPTEs would effectively unmap a potion of the
6533  * huge page. Unmapping guest memory always requires a TLB flush
6534  * since a subsequent operation on the unmapped regions would
6535  * fail to detect the need to flush.
6536  */
6537  if (is_shadow_present_pte(*sptep)) {
6538  flush |= !is_last_spte(*sptep, sp->role.level);
6539  continue;
6540  }
6541 
6542  spte = make_huge_page_split_spte(kvm, huge_spte, sp->role, index);
6543  mmu_spte_set(sptep, spte);
6544  __rmap_add(kvm, cache, slot, sptep, gfn, sp->role.access);
6545  }
6546 
6547  __link_shadow_page(kvm, cache, huge_sptep, sp, flush);
6548 }
static struct kvm_mmu_page * shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep)
Definition: mmu.c:6477
u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte, union kvm_mmu_page_role role, int index)
Definition: spte.c:274
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_mmu_try_split_huge_page()

static int shadow_mmu_try_split_huge_page ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
u64 *  huge_sptep 
)
static

Definition at line 6550 of file mmu.c.

6553 {
6554  struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
6555  int level, r = 0;
6556  gfn_t gfn;
6557  u64 spte;
6558 
6559  /* Grab information for the tracepoint before dropping the MMU lock. */
6560  gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
6561  level = huge_sp->role.level;
6562  spte = *huge_sptep;
6563 
6564  if (kvm_mmu_available_pages(kvm) <= KVM_MIN_FREE_MMU_PAGES) {
6565  r = -ENOSPC;
6566  goto out;
6567  }
6568 
6570  write_unlock(&kvm->mmu_lock);
6571  cond_resched();
6572  /*
6573  * If the topup succeeds, return -EAGAIN to indicate that the
6574  * rmap iterator should be restarted because the MMU lock was
6575  * dropped.
6576  */
6577  r = topup_split_caches(kvm) ?: -EAGAIN;
6578  write_lock(&kvm->mmu_lock);
6579  goto out;
6580  }
6581 
6582  shadow_mmu_split_huge_page(kvm, slot, huge_sptep);
6583 
6584 out:
6585  trace_kvm_mmu_split_huge_page(gfn, spte, level, r);
6586  return r;
6587 }
static void shadow_mmu_split_huge_page(struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep)
Definition: mmu.c:6504
static int topup_split_caches(struct kvm *kvm)
Definition: mmu.c:6444
static bool need_topup_split_caches_or_resched(struct kvm *kvm)
Definition: mmu.c:6429
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_mmu_try_split_huge_pages()

static bool shadow_mmu_try_split_huge_pages ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 6589 of file mmu.c.

6592 {
6593  struct rmap_iterator iter;
6594  struct kvm_mmu_page *sp;
6595  u64 *huge_sptep;
6596  int r;
6597 
6598 restart:
6599  for_each_rmap_spte(rmap_head, &iter, huge_sptep) {
6600  sp = sptep_to_sp(huge_sptep);
6601 
6602  /* TDP MMU is enabled, so rmap only contains nested MMU SPs. */
6603  if (WARN_ON_ONCE(!sp->role.guest_mode))
6604  continue;
6605 
6606  /* The rmaps should never contain non-leaf SPTEs. */
6607  if (WARN_ON_ONCE(!is_large_pte(*huge_sptep)))
6608  continue;
6609 
6610  /* SPs with level >PG_LEVEL_4K should never by unsync. */
6611  if (WARN_ON_ONCE(sp->unsync))
6612  continue;
6613 
6614  /* Don't bother splitting huge pages on invalid SPs. */
6615  if (sp->role.invalid)
6616  continue;
6617 
6618  r = shadow_mmu_try_split_huge_page(kvm, slot, huge_sptep);
6619 
6620  /*
6621  * The split succeeded or needs to be retried because the MMU
6622  * lock was dropped. Either way, restart the iterator to get it
6623  * back into a consistent state.
6624  */
6625  if (!r || r == -EAGAIN)
6626  goto restart;
6627 
6628  /* The split failed and shouldn't be retried (e.g. -ENOMEM). */
6629  break;
6630  }
6631 
6632  return false;
6633 }
static int shadow_mmu_try_split_huge_page(struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep)
Definition: mmu.c:6550
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_page_table_clear_flood()

static void shadow_page_table_clear_flood ( struct kvm_vcpu *  vcpu,
gva_t  addr 
)
static

Definition at line 4227 of file mmu.c.

4228 {
4229  struct kvm_shadow_walk_iterator iterator;
4230  u64 spte;
4231 
4233  for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
4234  clear_sp_write_flooding_count(iterator.sptep);
4236 }
static void clear_sp_write_flooding_count(u64 *spte)
Definition: mmu.c:2140
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_walk_init()

static void shadow_walk_init ( struct kvm_shadow_walk_iterator iterator,
struct kvm_vcpu *  vcpu,
u64  addr 
)
static

Definition at line 2395 of file mmu.c.

2397 {
2398  shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root.hpa,
2399  addr);
2400 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_walk_init_using_root()

static void shadow_walk_init_using_root ( struct kvm_shadow_walk_iterator iterator,
struct kvm_vcpu *  vcpu,
hpa_t  root,
u64  addr 
)
static

Definition at line 2366 of file mmu.c.

2369 {
2370  iterator->addr = addr;
2371  iterator->shadow_addr = root;
2372  iterator->level = vcpu->arch.mmu->root_role.level;
2373 
2374  if (iterator->level >= PT64_ROOT_4LEVEL &&
2375  vcpu->arch.mmu->cpu_role.base.level < PT64_ROOT_4LEVEL &&
2376  !vcpu->arch.mmu->root_role.direct)
2377  iterator->level = PT32E_ROOT_LEVEL;
2378 
2379  if (iterator->level == PT32E_ROOT_LEVEL) {
2380  /*
2381  * prev_root is currently only used for 64-bit hosts. So only
2382  * the active root_hpa is valid here.
2383  */
2384  BUG_ON(root != vcpu->arch.mmu->root.hpa);
2385 
2386  iterator->shadow_addr
2387  = vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
2388  iterator->shadow_addr &= SPTE_BASE_ADDR_MASK;
2389  --iterator->level;
2390  if (!iterator->shadow_addr)
2391  iterator->level = 0;
2392  }
2393 }
Here is the caller graph for this function:

◆ shadow_walk_next()

static void shadow_walk_next ( struct kvm_shadow_walk_iterator iterator)
static

Definition at line 2424 of file mmu.c.

2425 {
2426  __shadow_walk_next(iterator, *iterator->sptep);
2427 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shadow_walk_okay()

static bool shadow_walk_okay ( struct kvm_shadow_walk_iterator iterator)
static

Definition at line 2402 of file mmu.c.

2403 {
2404  if (iterator->level < PG_LEVEL_4K)
2405  return false;
2406 
2407  iterator->index = SPTE_INDEX(iterator->addr, iterator->level);
2408  iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
2409  return true;
2410 }
#define SPTE_INDEX(address, level)
Definition: spte.h:57
Here is the caller graph for this function:

◆ slot_rmap_walk_init()

static void slot_rmap_walk_init ( struct slot_rmap_walk_iterator iterator,
const struct kvm_memory_slot *  slot,
int  start_level,
int  end_level,
gfn_t  start_gfn,
gfn_t  end_gfn 
)
static

Definition at line 1520 of file mmu.c.

1524 {
1525  iterator->slot = slot;
1526  iterator->start_level = start_level;
1527  iterator->end_level = end_level;
1528  iterator->start_gfn = start_gfn;
1529  iterator->end_gfn = end_gfn;
1530 
1531  rmap_walk_init_level(iterator, iterator->start_level);
1532 }
static void rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
Definition: mmu.c:1511
Here is the call graph for this function:

◆ slot_rmap_walk_next()

static void slot_rmap_walk_next ( struct slot_rmap_walk_iterator iterator)
static

Definition at line 1539 of file mmu.c.

1540 {
1541  while (++iterator->rmap <= iterator->end_rmap) {
1542  iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level));
1543 
1544  if (iterator->rmap->val)
1545  return;
1546  }
1547 
1548  if (++iterator->level > iterator->end_level) {
1549  iterator->rmap = NULL;
1550  return;
1551  }
1552 
1553  rmap_walk_init_level(iterator, iterator->level);
1554 }
Here is the call graph for this function:

◆ slot_rmap_walk_okay()

static bool slot_rmap_walk_okay ( struct slot_rmap_walk_iterator iterator)
static

Definition at line 1534 of file mmu.c.

1535 {
1536  return !!iterator->rmap;
1537 }

◆ slot_rmap_write_protect()

static bool slot_rmap_write_protect ( struct kvm *  kvm,
struct kvm_rmap_head *  rmap_head,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 6399 of file mmu.c.

6402 {
6403  return rmap_write_protect(rmap_head, false);
6404 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sp_has_gptes()

static bool sp_has_gptes ( struct kvm_mmu_page sp)
static

Definition at line 1897 of file mmu.c.

1898 {
1899  if (sp->role.direct)
1900  return false;
1901 
1902  if (sp->role.passthrough)
1903  return false;
1904 
1905  return true;
1906 }
Here is the caller graph for this function:

◆ spte_clear_dirty()

static bool spte_clear_dirty ( u64 *  sptep)
static

Definition at line 1257 of file mmu.c.

1258 {
1259  u64 spte = *sptep;
1260 
1262  spte &= ~shadow_dirty_mask;
1263  return mmu_spte_update(sptep, spte);
1264 }
u64 __read_mostly shadow_dirty_mask
Definition: spte.c:33
Here is the call graph for this function:
Here is the caller graph for this function:

◆ spte_write_protect()

static bool spte_write_protect ( u64 *  sptep,
bool  pt_protect 
)
static

Definition at line 1229 of file mmu.c.

1230 {
1231  u64 spte = *sptep;
1232 
1233  if (!is_writable_pte(spte) &&
1234  !(pt_protect && is_mmu_writable_spte(spte)))
1235  return false;
1236 
1237  if (pt_protect)
1238  spte &= ~shadow_mmu_writable_mask;
1239  spte = spte & ~PT_WRITABLE_MASK;
1240 
1241  return mmu_spte_update(sptep, spte);
1242 }
u64 __read_mostly shadow_mmu_writable_mask
Definition: spte.c:28
Here is the call graph for this function:
Here is the caller graph for this function:

◆ spte_wrprot_for_clear_dirty()

static bool spte_wrprot_for_clear_dirty ( u64 *  sptep)
static

Definition at line 1266 of file mmu.c.

1267 {
1268  bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
1269  (unsigned long *)sptep);
1270  if (was_writable && !spte_ad_enabled(*sptep))
1271  kvm_set_pfn_dirty(spte_to_pfn(*sptep));
1272 
1273  return was_writable;
1274 }
#define PT_WRITABLE_SHIFT
Definition: mmu.h:11
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sync_mmio_spte()

static bool sync_mmio_spte ( struct kvm_vcpu *  vcpu,
u64 *  sptep,
gfn_t  gfn,
unsigned int  access 
)
static

Definition at line 4799 of file mmu.c.

4801 {
4802  if (unlikely(is_mmio_spte(*sptep))) {
4803  if (gfn != get_mmio_spte_gfn(*sptep)) {
4804  mmu_spte_clear_no_track(sptep);
4805  return true;
4806  }
4807 
4808  mark_mmio_spte(vcpu, sptep, gfn, access);
4809  return true;
4810  }
4811 
4812  return false;
4813 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ topup_split_caches()

static int topup_split_caches ( struct kvm *  kvm)
static

Definition at line 6444 of file mmu.c.

6445 {
6446  /*
6447  * Allocating rmap list entries when splitting huge pages for nested
6448  * MMUs is uncommon as KVM needs to use a list if and only if there is
6449  * more than one rmap entry for a gfn, i.e. requires an L1 gfn to be
6450  * aliased by multiple L2 gfns and/or from multiple nested roots with
6451  * different roles. Aliasing gfns when using TDP is atypical for VMMs;
6452  * a few gfns are often aliased during boot, e.g. when remapping BIOS,
6453  * but aliasing rarely occurs post-boot or for many gfns. If there is
6454  * only one rmap entry, rmap->val points directly at that one entry and
6455  * doesn't need to allocate a list. Buffer the cache by the default
6456  * capacity so that KVM doesn't have to drop mmu_lock to topup if KVM
6457  * encounters an aliased gfn or two.
6458  */
6459  const int capacity = SPLIT_DESC_CACHE_MIN_NR_OBJECTS +
6460  KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
6461  int r;
6462 
6463  lockdep_assert_held(&kvm->slots_lock);
6464 
6465  r = __kvm_mmu_topup_memory_cache(&kvm->arch.split_desc_cache, capacity,
6466  SPLIT_DESC_CACHE_MIN_NR_OBJECTS);
6467  if (r)
6468  return r;
6469 
6470  r = kvm_mmu_topup_memory_cache(&kvm->arch.split_page_header_cache, 1);
6471  if (r)
6472  return r;
6473 
6474  return kvm_mmu_topup_memory_cache(&kvm->arch.split_shadow_page_cache, 1);
6475 }
Here is the caller graph for this function:

◆ track_possible_nx_huge_page()

void track_possible_nx_huge_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)

Definition at line 848 of file mmu.c.

849 {
850  /*
851  * If it's possible to replace the shadow page with an NX huge page,
852  * i.e. if the shadow page is the only thing currently preventing KVM
853  * from using a huge page, add the shadow page to the list of "to be
854  * zapped for NX recovery" pages. Note, the shadow page can already be
855  * on the list if KVM is reusing an existing shadow page, i.e. if KVM
856  * links a shadow page at multiple points.
857  */
858  if (!list_empty(&sp->possible_nx_huge_page_link))
859  return;
860 
861  ++kvm->stat.nx_lpage_splits;
862  list_add_tail(&sp->possible_nx_huge_page_link,
863  &kvm->arch.possible_nx_huge_pages);
864 }
Here is the caller graph for this function:

◆ unaccount_nx_huge_page()

static void unaccount_nx_huge_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 900 of file mmu.c.

901 {
902  sp->nx_huge_page_disallowed = false;
903 
905 }
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:891
Here is the call graph for this function:
Here is the caller graph for this function:

◆ unaccount_shadowed()

static void unaccount_shadowed ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 875 of file mmu.c.

876 {
877  struct kvm_memslots *slots;
878  struct kvm_memory_slot *slot;
879  gfn_t gfn;
880 
881  kvm->arch.indirect_shadow_pages--;
882  gfn = sp->gfn;
883  slots = kvm_memslots_for_spte_role(kvm, sp->role);
884  slot = __gfn_to_memslot(slots, gfn);
885  if (sp->role.level > PG_LEVEL_4K)
886  return __kvm_write_track_remove_gfn(kvm, slot, gfn);
887 
888  kvm_mmu_gfn_allow_lpage(slot, gfn);
889 }
void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn)
Definition: mmu.c:822
void __kvm_write_track_remove_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn)
Definition: page_track.c:100
Here is the call graph for this function:
Here is the caller graph for this function:

◆ untrack_possible_nx_huge_page()

void untrack_possible_nx_huge_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)

Definition at line 891 of file mmu.c.

892 {
893  if (list_empty(&sp->possible_nx_huge_page_link))
894  return;
895 
896  --kvm->stat.nx_lpage_splits;
897  list_del_init(&sp->possible_nx_huge_page_link);
898 }
Here is the caller graph for this function:

◆ update_gfn_disallow_lpage_count()

static void update_gfn_disallow_lpage_count ( const struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  count 
)
static

Definition at line 802 of file mmu.c.

804 {
805  struct kvm_lpage_info *linfo;
806  int old, i;
807 
808  for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
809  linfo = lpage_info_slot(gfn, slot, i);
810 
811  old = linfo->disallow_lpage;
812  linfo->disallow_lpage += count;
813  WARN_ON_ONCE((old ^ linfo->disallow_lpage) & KVM_LPAGE_MIXED_FLAG);
814  }
815 }
#define KVM_LPAGE_MIXED_FLAG
Definition: mmu.c:800
Here is the call graph for this function:
Here is the caller graph for this function:

◆ update_permission_bitmask()

static void update_permission_bitmask ( struct kvm_mmu *  mmu,
bool  ept 
)
static

Definition at line 5079 of file mmu.c.

5080 {
5081  unsigned byte;
5082 
5083  const u8 x = BYTE_MASK(ACC_EXEC_MASK);
5084  const u8 w = BYTE_MASK(ACC_WRITE_MASK);
5085  const u8 u = BYTE_MASK(ACC_USER_MASK);
5086 
5087  bool cr4_smep = is_cr4_smep(mmu);
5088  bool cr4_smap = is_cr4_smap(mmu);
5089  bool cr0_wp = is_cr0_wp(mmu);
5090  bool efer_nx = is_efer_nx(mmu);
5091 
5092  for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
5093  unsigned pfec = byte << 1;
5094 
5095  /*
5096  * Each "*f" variable has a 1 bit for each UWX value
5097  * that causes a fault with the given PFEC.
5098  */
5099 
5100  /* Faults from writes to non-writable pages */
5101  u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
5102  /* Faults from user mode accesses to supervisor pages */
5103  u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
5104  /* Faults from fetches of non-executable pages*/
5105  u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
5106  /* Faults from kernel mode fetches of user pages */
5107  u8 smepf = 0;
5108  /* Faults from kernel mode accesses of user pages */
5109  u8 smapf = 0;
5110 
5111  if (!ept) {
5112  /* Faults from kernel mode accesses to user pages */
5113  u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
5114 
5115  /* Not really needed: !nx will cause pte.nx to fault */
5116  if (!efer_nx)
5117  ff = 0;
5118 
5119  /* Allow supervisor writes if !cr0.wp */
5120  if (!cr0_wp)
5121  wf = (pfec & PFERR_USER_MASK) ? wf : 0;
5122 
5123  /* Disallow supervisor fetches of user code if cr4.smep */
5124  if (cr4_smep)
5125  smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0;
5126 
5127  /*
5128  * SMAP:kernel-mode data accesses from user-mode
5129  * mappings should fault. A fault is considered
5130  * as a SMAP violation if all of the following
5131  * conditions are true:
5132  * - X86_CR4_SMAP is set in CR4
5133  * - A user page is accessed
5134  * - The access is not a fetch
5135  * - The access is supervisor mode
5136  * - If implicit supervisor access or X86_EFLAGS_AC is clear
5137  *
5138  * Here, we cover the first four conditions.
5139  * The fifth is computed dynamically in permission_fault();
5140  * PFERR_RSVD_MASK bit will be set in PFEC if the access is
5141  * *not* subject to SMAP restrictions.
5142  */
5143  if (cr4_smap)
5144  smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
5145  }
5146 
5147  mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
5148  }
5149 }
#define BYTE_MASK(access)
Definition: mmu.c:5069
#define ACC_USER_MASK
Definition: spte.h:48
#define ACC_EXEC_MASK
Definition: spte.h:46
Here is the caller graph for this function:

◆ update_pkru_bitmask()

static void update_pkru_bitmask ( struct kvm_mmu *  mmu)
static

Definition at line 5175 of file mmu.c.

5176 {
5177  unsigned bit;
5178  bool wp;
5179 
5180  mmu->pkru_mask = 0;
5181 
5182  if (!is_cr4_pke(mmu))
5183  return;
5184 
5185  wp = is_cr0_wp(mmu);
5186 
5187  for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
5188  unsigned pfec, pkey_bits;
5189  bool check_pkey, check_write, ff, uf, wf, pte_user;
5190 
5191  pfec = bit << 1;
5192  ff = pfec & PFERR_FETCH_MASK;
5193  uf = pfec & PFERR_USER_MASK;
5194  wf = pfec & PFERR_WRITE_MASK;
5195 
5196  /* PFEC.RSVD is replaced by ACC_USER_MASK. */
5197  pte_user = pfec & PFERR_RSVD_MASK;
5198 
5199  /*
5200  * Only need to check the access which is not an
5201  * instruction fetch and is to a user page.
5202  */
5203  check_pkey = (!ff && pte_user);
5204  /*
5205  * write access is controlled by PKRU if it is a
5206  * user access or CR0.WP = 1.
5207  */
5208  check_write = check_pkey && wf && (uf || wp);
5209 
5210  /* PKRU.AD stops both read and write access. */
5211  pkey_bits = !!check_pkey;
5212  /* PKRU.WD stops write access. */
5213  pkey_bits |= (!!check_write) << 1;
5214 
5215  mmu->pkru_mask |= (pkey_bits & 3) << pfec;
5216  }
5217 }
Here is the caller graph for this function:

◆ validate_direct_spte()

static void validate_direct_spte ( struct kvm_vcpu *  vcpu,
u64 *  sptep,
unsigned  direct_access 
)
static

Definition at line 2470 of file mmu.c.

2472 {
2473  if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
2474  struct kvm_mmu_page *child;
2475 
2476  /*
2477  * For the direct sp, if the guest pte's dirty bit
2478  * changed form clean to dirty, it will corrupt the
2479  * sp's access: allow writable in the read-only sp,
2480  * so we should update the spte at this point to get
2481  * a new sp with the correct access.
2482  */
2483  child = spte_to_child_sp(*sptep);
2484  if (child->role.access == direct_access)
2485  return;
2486 
2487  drop_parent_pte(vcpu->kvm, child, sptep);
2488  kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
2489  }
2490 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ vcpu_to_role_regs()

static struct kvm_mmu_role_regs vcpu_to_role_regs ( struct kvm_vcpu *  vcpu)
static

Definition at line 242 of file mmu.c.

248 {
249  struct kvm_mmu_role_regs regs = {
252  .efer = vcpu->arch.efer,
253  };
254 
255  return regs;
256 }
static ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
static ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
Here is the caller graph for this function:

◆ walk_shadow_page_lockless_begin()

static void walk_shadow_page_lockless_begin ( struct kvm_vcpu *  vcpu)
static

Definition at line 645 of file mmu.c.

646 {
647  if (is_tdp_mmu_active(vcpu)) {
649  } else {
650  /*
651  * Prevent page table teardown by making any free-er wait during
652  * kvm_flush_remote_tlbs() IPI to all active vcpus.
653  */
654  local_irq_disable();
655 
656  /*
657  * Make sure a following spte read is not reordered ahead of the write
658  * to vcpu->mode.
659  */
660  smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
661  }
662 }
static void kvm_tdp_mmu_walk_lockless_begin(void)
Definition: tdp_mmu.h:56
Here is the call graph for this function:
Here is the caller graph for this function:

◆ walk_shadow_page_lockless_end()

static void walk_shadow_page_lockless_end ( struct kvm_vcpu *  vcpu)
static

Definition at line 664 of file mmu.c.

665 {
666  if (is_tdp_mmu_active(vcpu)) {
668  } else {
669  /*
670  * Make sure the write to vcpu->mode is not reordered in front of
671  * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us
672  * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
673  */
674  smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
675  local_irq_enable();
676  }
677 }
static void kvm_tdp_mmu_walk_lockless_end(void)
Definition: tdp_mmu.h:61
Here is the call graph for this function:
Here is the caller graph for this function:

◆ walk_slot_rmaps()

static __always_inline bool walk_slot_rmaps ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
slot_rmaps_handler  fn,
int  start_level,
int  end_level,
bool  flush_on_yield 
)
static

Definition at line 6072 of file mmu.c.

6077 {
6078  return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level,
6079  slot->base_gfn, slot->base_gfn + slot->npages - 1,
6080  flush_on_yield, false);
6081 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ walk_slot_rmaps_4k()

static __always_inline bool walk_slot_rmaps_4k ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
slot_rmaps_handler  fn,
bool  flush_on_yield 
)
static

Definition at line 6083 of file mmu.c.

6087 {
6088  return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield);
6089 }
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ __read_mostly

int max_tdp_level __read_mostly
static

Definition at line 115 of file mmu.c.

◆ force_flush_and_sync_on_reuse

bool __read_mostly force_flush_and_sync_on_reuse
static

Definition at line 96 of file mmu.c.

◆ itlb_multihit_kvm_mitigation

bool itlb_multihit_kvm_mitigation
extern

◆ kvm_total_used_mmu_pages

struct percpu_counter kvm_total_used_mmu_pages
static

Definition at line 181 of file mmu.c.

◆ mmu_page_header_cache

struct kmem_cache* mmu_page_header_cache

Definition at line 181 of file mmu.c.

◆ mmu_shrinker

struct shrinker* mmu_shrinker
static

Definition at line 6924 of file mmu.c.

◆ nx_huge_pages

int __read_mostly nx_huge_pages = -1

Definition at line 64 of file mmu.c.

◆ nx_huge_pages_ops

const struct kernel_param_ops nx_huge_pages_ops
static
Initial value:
= {
}
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
Definition: mmu.c:6951
static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
Definition: mmu.c:6932

Definition at line 75 of file mmu.c.

◆ nx_huge_pages_recovery_param_ops

const struct kernel_param_ops nx_huge_pages_recovery_param_ops
static
Initial value:
= {
.get = param_get_uint,
}
static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
Definition: mmu.c:7116

Definition at line 75 of file mmu.c.

◆ nx_huge_pages_recovery_period_ms

uint __read_mostly nx_huge_pages_recovery_period_ms
static

Definition at line 65 of file mmu.c.

◆ nx_huge_pages_recovery_ratio

uint __read_mostly nx_huge_pages_recovery_ratio = 60
static

Definition at line 70 of file mmu.c.

◆ nx_hugepage_mitigation_hard_disabled

bool nx_hugepage_mitigation_hard_disabled
static

Definition at line 62 of file mmu.c.

◆ pte_list_desc_cache

struct kmem_cache* pte_list_desc_cache
static

Definition at line 180 of file mmu.c.

◆ tdp_enabled

bool tdp_enabled = false

Definition at line 106 of file mmu.c.

◆ tdp_mmu_allowed

bool __ro_after_init tdp_mmu_allowed
static

Definition at line 108 of file mmu.c.