KVM
Macros | Typedefs | Functions
tdp_mmu.c File Reference
#include "mmu.h"
#include "mmu_internal.h"
#include "mmutrace.h"
#include "tdp_iter.h"
#include "tdp_mmu.h"
#include "spte.h"
#include <asm/cmpxchg.h>
#include <trace/events/kvm.h>
Include dependency graph for tdp_mmu.c:

Go to the source code of this file.

Macros

#define pr_fmt(fmt)   KBUILD_MODNAME ": " fmt
 
#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)
 
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id)    __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, true)
 
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root)
 
#define for_each_tdp_mmu_root(_kvm, _root, _as_id)
 
#define tdp_root_for_each_pte(_iter, _root, _start, _end)    for_each_tdp_pte(_iter, _root, _start, _end)
 
#define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end)
 
#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)    for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
 

Typedefs

typedef bool(* tdp_handler_t) (struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)
 

Functions

void kvm_mmu_init_tdp_mmu (struct kvm *kvm)
 
static __always_inline bool kvm_lockdep_assert_mmu_lock_held (struct kvm *kvm, bool shared)
 
void kvm_mmu_uninit_tdp_mmu (struct kvm *kvm)
 
static void tdp_mmu_free_sp (struct kvm_mmu_page *sp)
 
static void tdp_mmu_free_sp_rcu_callback (struct rcu_head *head)
 
void kvm_tdp_mmu_put_root (struct kvm *kvm, struct kvm_mmu_page *root)
 
static struct kvm_mmu_pagetdp_mmu_next_root (struct kvm *kvm, struct kvm_mmu_page *prev_root, bool only_valid)
 
static struct kvm_mmu_pagetdp_mmu_alloc_sp (struct kvm_vcpu *vcpu)
 
static void tdp_mmu_init_sp (struct kvm_mmu_page *sp, tdp_ptep_t sptep, gfn_t gfn, union kvm_mmu_page_role role)
 
static void tdp_mmu_init_child_sp (struct kvm_mmu_page *child_sp, struct tdp_iter *iter)
 
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa (struct kvm_vcpu *vcpu)
 
static void handle_changed_spte (struct kvm *kvm, int as_id, gfn_t gfn, u64 old_spte, u64 new_spte, int level, bool shared)
 
static void tdp_account_mmu_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static void tdp_unaccount_mmu_page (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static void tdp_mmu_unlink_sp (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static void handle_removed_pt (struct kvm *kvm, tdp_ptep_t pt, bool shared)
 
static int tdp_mmu_set_spte_atomic (struct kvm *kvm, struct tdp_iter *iter, u64 new_spte)
 
static int tdp_mmu_zap_spte_atomic (struct kvm *kvm, struct tdp_iter *iter)
 
static u64 tdp_mmu_set_spte (struct kvm *kvm, int as_id, tdp_ptep_t sptep, u64 old_spte, u64 new_spte, gfn_t gfn, int level)
 
static void tdp_mmu_iter_set_spte (struct kvm *kvm, struct tdp_iter *iter, u64 new_spte)
 
static bool __must_check tdp_mmu_iter_cond_resched (struct kvm *kvm, struct tdp_iter *iter, bool flush, bool shared)
 
static gfn_t tdp_mmu_max_gfn_exclusive (void)
 
static void __tdp_mmu_zap_root (struct kvm *kvm, struct kvm_mmu_page *root, bool shared, int zap_level)
 
static void tdp_mmu_zap_root (struct kvm *kvm, struct kvm_mmu_page *root, bool shared)
 
bool kvm_tdp_mmu_zap_sp (struct kvm *kvm, struct kvm_mmu_page *sp)
 
static bool tdp_mmu_zap_leafs (struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, bool can_yield, bool flush)
 
bool kvm_tdp_mmu_zap_leafs (struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
 
void kvm_tdp_mmu_zap_all (struct kvm *kvm)
 
void kvm_tdp_mmu_zap_invalidated_roots (struct kvm *kvm)
 
void kvm_tdp_mmu_invalidate_all_roots (struct kvm *kvm)
 
static int tdp_mmu_map_handle_target_level (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, struct tdp_iter *iter)
 
static int tdp_mmu_link_sp (struct kvm *kvm, struct tdp_iter *iter, struct kvm_mmu_page *sp, bool shared)
 
static int tdp_mmu_split_huge_page (struct kvm *kvm, struct tdp_iter *iter, struct kvm_mmu_page *sp, bool shared)
 
int kvm_tdp_mmu_map (struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
bool kvm_tdp_mmu_unmap_gfn_range (struct kvm *kvm, struct kvm_gfn_range *range, bool flush)
 
static __always_inline bool kvm_tdp_mmu_handle_gfn (struct kvm *kvm, struct kvm_gfn_range *range, tdp_handler_t handler)
 
static bool age_gfn_range (struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)
 
bool kvm_tdp_mmu_age_gfn_range (struct kvm *kvm, struct kvm_gfn_range *range)
 
static bool test_age_gfn (struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)
 
bool kvm_tdp_mmu_test_age_gfn (struct kvm *kvm, struct kvm_gfn_range *range)
 
static bool set_spte_gfn (struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)
 
bool kvm_tdp_mmu_set_spte_gfn (struct kvm *kvm, struct kvm_gfn_range *range)
 
static bool wrprot_gfn_range (struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, int min_level)
 
bool kvm_tdp_mmu_wrprot_slot (struct kvm *kvm, const struct kvm_memory_slot *slot, int min_level)
 
static struct kvm_mmu_page__tdp_mmu_alloc_sp_for_split (gfp_t gfp)
 
static struct kvm_mmu_pagetdp_mmu_alloc_sp_for_split (struct kvm *kvm, struct tdp_iter *iter, bool shared)
 
static int tdp_mmu_split_huge_pages_root (struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, int target_level, bool shared)
 
void kvm_tdp_mmu_try_split_huge_pages (struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, int target_level, bool shared)
 
static bool tdp_mmu_need_write_protect (struct kvm_mmu_page *sp)
 
static bool clear_dirty_gfn_range (struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end)
 
bool kvm_tdp_mmu_clear_dirty_slot (struct kvm *kvm, const struct kvm_memory_slot *slot)
 
static void clear_dirty_pt_masked (struct kvm *kvm, struct kvm_mmu_page *root, gfn_t gfn, unsigned long mask, bool wrprot)
 
void kvm_tdp_mmu_clear_dirty_pt_masked (struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, unsigned long mask, bool wrprot)
 
static void zap_collapsible_spte_range (struct kvm *kvm, struct kvm_mmu_page *root, const struct kvm_memory_slot *slot)
 
void kvm_tdp_mmu_zap_collapsible_sptes (struct kvm *kvm, const struct kvm_memory_slot *slot)
 
static bool write_protect_gfn (struct kvm *kvm, struct kvm_mmu_page *root, gfn_t gfn, int min_level)
 
bool kvm_tdp_mmu_write_protect_gfn (struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int min_level)
 
int kvm_tdp_mmu_get_walk (struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 
u64 * kvm_tdp_mmu_fast_pf_get_last_sptep (struct kvm_vcpu *vcpu, u64 addr, u64 *spte)
 

Macro Definition Documentation

◆ __for_each_tdp_mmu_root_yield_safe

#define __for_each_tdp_mmu_root_yield_safe (   _kvm,
  _root,
  _as_id,
  _only_valid 
)
Value:
for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid); \
({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
_root = tdp_mmu_next_root(_kvm, _root, _only_valid)) \
if (kvm_mmu_page_as_id(_root) != _as_id) { \
} else
static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
Definition: mmu_internal.h:143
static struct kvm_mmu_page * tdp_mmu_next_root(struct kvm *kvm, struct kvm_mmu_page *prev_root, bool only_valid)
Definition: tdp_mmu.c:104

Definition at line 152 of file tdp_mmu.c.

◆ for_each_tdp_mmu_root

#define for_each_tdp_mmu_root (   _kvm,
  _root,
  _as_id 
)
Value:
list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) \
if (kvm_lockdep_assert_mmu_lock_held(_kvm, false) && \
kvm_mmu_page_as_id(_root) != _as_id) { \
} else
static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm, bool shared)
Definition: tdp_mmu.c:22

Definition at line 174 of file tdp_mmu.c.

◆ for_each_tdp_mmu_root_yield_safe

#define for_each_tdp_mmu_root_yield_safe (   _kvm,
  _root 
)
Value:
for (_root = tdp_mmu_next_root(_kvm, NULL, false); \
({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
_root = tdp_mmu_next_root(_kvm, _root, false))

Definition at line 162 of file tdp_mmu.c.

◆ for_each_valid_tdp_mmu_root_yield_safe

#define for_each_valid_tdp_mmu_root_yield_safe (   _kvm,
  _root,
  _as_id 
)     __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, true)

Definition at line 159 of file tdp_mmu.c.

◆ pr_fmt

#define pr_fmt (   fmt)    KBUILD_MODNAME ": " fmt

Definition at line 2 of file tdp_mmu.c.

◆ tdp_mmu_for_each_pte

#define tdp_mmu_for_each_pte (   _iter,
  _mmu,
  _start,
  _end 
)     for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)

Definition at line 631 of file tdp_mmu.c.

◆ tdp_root_for_each_leaf_pte

#define tdp_root_for_each_leaf_pte (   _iter,
  _root,
  _start,
  _end 
)
Value:
tdp_root_for_each_pte(_iter, _root, _start, _end) \
if (!is_shadow_present_pte(_iter.old_spte) || \
!is_last_spte(_iter.old_spte, _iter.level)) \
continue; \
else
static bool is_last_spte(u64 pte, int level)
Definition: spte.h:318
static bool is_shadow_present_pte(u64 pte)
Definition: spte.h:258
#define tdp_root_for_each_pte(_iter, _root, _start, _end)
Definition: tdp_mmu.c:621

Definition at line 624 of file tdp_mmu.c.

◆ tdp_root_for_each_pte

#define tdp_root_for_each_pte (   _iter,
  _root,
  _start,
  _end 
)     for_each_tdp_pte(_iter, _root, _start, _end)

Definition at line 621 of file tdp_mmu.c.

Typedef Documentation

◆ tdp_handler_t

typedef bool(* tdp_handler_t) (struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)

Definition at line 1126 of file tdp_mmu.c.

Function Documentation

◆ __tdp_mmu_alloc_sp_for_split()

static struct kvm_mmu_page* __tdp_mmu_alloc_sp_for_split ( gfp_t  gfp)
static

Definition at line 1315 of file tdp_mmu.c.

1316 {
1317  struct kvm_mmu_page *sp;
1318 
1319  gfp |= __GFP_ZERO;
1320 
1321  sp = kmem_cache_alloc(mmu_page_header_cache, gfp);
1322  if (!sp)
1323  return NULL;
1324 
1325  sp->spt = (void *)__get_free_page(gfp);
1326  if (!sp->spt) {
1327  kmem_cache_free(mmu_page_header_cache, sp);
1328  return NULL;
1329  }
1330 
1331  return sp;
1332 }
struct kmem_cache * mmu_page_header_cache
Definition: mmu.c:181
Here is the caller graph for this function:

◆ __tdp_mmu_zap_root()

static void __tdp_mmu_zap_root ( struct kvm *  kvm,
struct kvm_mmu_page root,
bool  shared,
int  zap_level 
)
static

Definition at line 690 of file tdp_mmu.c.

692 {
693  struct tdp_iter iter;
694 
695  gfn_t end = tdp_mmu_max_gfn_exclusive();
696  gfn_t start = 0;
697 
698  for_each_tdp_pte_min_level(iter, root, zap_level, start, end) {
699 retry:
700  if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared))
701  continue;
702 
703  if (!is_shadow_present_pte(iter.old_spte))
704  continue;
705 
706  if (iter.level > zap_level)
707  continue;
708 
709  if (!shared)
710  tdp_mmu_iter_set_spte(kvm, &iter, 0);
711  else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
712  goto retry;
713  }
714 }
#define for_each_tdp_pte_min_level(iter, root, min_level, start, end)
Definition: tdp_iter.h:123
static int tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte)
Definition: tdp_mmu.c:517
static bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter, bool flush, bool shared)
Definition: tdp_mmu.c:648
static gfn_t tdp_mmu_max_gfn_exclusive(void)
Definition: tdp_mmu.c:679
static void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte)
Definition: tdp_mmu.c:612
Here is the call graph for this function:
Here is the caller graph for this function:

◆ age_gfn_range()

static bool age_gfn_range ( struct kvm *  kvm,
struct tdp_iter iter,
struct kvm_gfn_range *  range 
)
static

Definition at line 1161 of file tdp_mmu.c.

1163 {
1164  u64 new_spte;
1165 
1166  /* If we have a non-accessed entry we don't need to change the pte. */
1167  if (!is_accessed_spte(iter->old_spte))
1168  return false;
1169 
1170  if (spte_ad_enabled(iter->old_spte)) {
1171  iter->old_spte = tdp_mmu_clear_spte_bits(iter->sptep,
1172  iter->old_spte,
1174  iter->level);
1175  new_spte = iter->old_spte & ~shadow_accessed_mask;
1176  } else {
1177  /*
1178  * Capture the dirty status of the page, so that it doesn't get
1179  * lost when the SPTE is marked for access tracking.
1180  */
1181  if (is_writable_pte(iter->old_spte))
1183 
1184  new_spte = mark_spte_for_access_track(iter->old_spte);
1185  iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep,
1186  iter->old_spte, new_spte,
1187  iter->level);
1188  }
1189 
1190  trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level,
1191  iter->old_spte, new_spte);
1192  return true;
1193 }
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
Definition: kvm_main.c:3285
u64 __read_mostly shadow_accessed_mask
Definition: spte.c:32
u64 mark_spte_for_access_track(u64 spte)
Definition: spte.c:341
static bool is_accessed_spte(u64 spte)
Definition: spte.h:333
static bool is_writable_pte(unsigned long pte)
Definition: spte.h:441
static kvm_pfn_t spte_to_pfn(u64 pte)
Definition: spte.h:328
static bool spte_ad_enabled(u64 spte)
Definition: spte.h:279
int as_id
Definition: tdp_iter.h:103
gfn_t gfn
Definition: tdp_iter.h:95
tdp_ptep_t sptep
Definition: tdp_iter.h:93
u64 old_spte
Definition: tdp_iter.h:105
int level
Definition: tdp_iter.h:101
static u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte, u64 mask, int level)
Definition: tdp_iter.h:61
static u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, u64 new_spte, int level)
Definition: tdp_iter.h:51
Here is the call graph for this function:
Here is the caller graph for this function:

◆ clear_dirty_gfn_range()

static bool clear_dirty_gfn_range ( struct kvm *  kvm,
struct kvm_mmu_page root,
gfn_t  start,
gfn_t  end 
)
static

Definition at line 1518 of file tdp_mmu.c.

1520 {
1521  const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
1523  struct tdp_iter iter;
1524  bool spte_set = false;
1525 
1526  rcu_read_lock();
1527 
1528  tdp_root_for_each_pte(iter, root, start, end) {
1529 retry:
1530  if (!is_shadow_present_pte(iter.old_spte) ||
1531  !is_last_spte(iter.old_spte, iter.level))
1532  continue;
1533 
1534  if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
1535  continue;
1536 
1538  spte_ad_need_write_protect(iter.old_spte));
1539 
1540  if (!(iter.old_spte & dbit))
1541  continue;
1542 
1543  if (tdp_mmu_set_spte_atomic(kvm, &iter, iter.old_spte & ~dbit))
1544  goto retry;
1545 
1546  spte_set = true;
1547  }
1548 
1549  rcu_read_unlock();
1550  return spte_set;
1551 }
#define PT_WRITABLE_MASK
Definition: mmu.h:15
#define KVM_MMU_WARN_ON(x)
Definition: mmu_internal.h:12
u64 __read_mostly shadow_dirty_mask
Definition: spte.c:33
static bool spte_ad_need_write_protect(u64 spte)
Definition: spte.h:285
static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
Definition: tdp_mmu.c:1501
Here is the call graph for this function:
Here is the caller graph for this function:

◆ clear_dirty_pt_masked()

static void clear_dirty_pt_masked ( struct kvm *  kvm,
struct kvm_mmu_page root,
gfn_t  gfn,
unsigned long  mask,
bool  wrprot 
)
static

Definition at line 1581 of file tdp_mmu.c.

1583 {
1584  const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
1586  struct tdp_iter iter;
1587 
1588  lockdep_assert_held_write(&kvm->mmu_lock);
1589 
1590  rcu_read_lock();
1591 
1592  tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
1593  gfn + BITS_PER_LONG) {
1594  if (!mask)
1595  break;
1596 
1598  spte_ad_need_write_protect(iter.old_spte));
1599 
1600  if (iter.level > PG_LEVEL_4K ||
1601  !(mask & (1UL << (iter.gfn - gfn))))
1602  continue;
1603 
1604  mask &= ~(1UL << (iter.gfn - gfn));
1605 
1606  if (!(iter.old_spte & dbit))
1607  continue;
1608 
1609  iter.old_spte = tdp_mmu_clear_spte_bits(iter.sptep,
1610  iter.old_spte, dbit,
1611  iter.level);
1612 
1613  trace_kvm_tdp_mmu_spte_changed(iter.as_id, iter.gfn, iter.level,
1614  iter.old_spte,
1615  iter.old_spte & ~dbit);
1616  kvm_set_pfn_dirty(spte_to_pfn(iter.old_spte));
1617  }
1618 
1619  rcu_read_unlock();
1620 }
#define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end)
Definition: tdp_mmu.c:624
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handle_changed_spte()

static void handle_changed_spte ( struct kvm *  kvm,
int  as_id,
gfn_t  gfn,
u64  old_spte,
u64  new_spte,
int  level,
bool  shared 
)
static

handle_changed_spte - handle bookkeeping associated with an SPTE change @kvm: kvm instance @as_id: the address space of the paging structure the SPTE was a part of @gfn: the base GFN that was mapped by the SPTE @old_spte: The value of the SPTE before the change @new_spte: The value of the SPTE after the change @level: the level of the PT the SPTE is part of in the paging structure @shared: This operation may not be running under the exclusive use of the MMU lock and the operation must synchronize with other threads that might be modifying SPTEs.

Handle bookkeeping that might result from the modification of a SPTE. Note, dirty logging updates are handled in common code, not here (see make_spte() and fast_pf_fix_direct_spte()).

Definition at line 408 of file tdp_mmu.c.

411 {
412  bool was_present = is_shadow_present_pte(old_spte);
413  bool is_present = is_shadow_present_pte(new_spte);
414  bool was_leaf = was_present && is_last_spte(old_spte, level);
415  bool is_leaf = is_present && is_last_spte(new_spte, level);
416  bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
417 
418  WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
419  WARN_ON_ONCE(level < PG_LEVEL_4K);
420  WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
421 
422  /*
423  * If this warning were to trigger it would indicate that there was a
424  * missing MMU notifier or a race with some notifier handler.
425  * A present, leaf SPTE should never be directly replaced with another
426  * present leaf SPTE pointing to a different PFN. A notifier handler
427  * should be zapping the SPTE before the main MM's page table is
428  * changed, or the SPTE should be zeroed, and the TLBs flushed by the
429  * thread before replacement.
430  */
431  if (was_leaf && is_leaf && pfn_changed) {
432  pr_err("Invalid SPTE change: cannot replace a present leaf\n"
433  "SPTE with another present leaf SPTE mapping a\n"
434  "different PFN!\n"
435  "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
436  as_id, gfn, old_spte, new_spte, level);
437 
438  /*
439  * Crash the host to prevent error propagation and guest data
440  * corruption.
441  */
442  BUG();
443  }
444 
445  if (old_spte == new_spte)
446  return;
447 
448  trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
449 
450  if (is_leaf)
452 
453  /*
454  * The only times a SPTE should be changed from a non-present to
455  * non-present state is when an MMIO entry is installed/modified/
456  * removed. In that case, there is nothing to do here.
457  */
458  if (!was_present && !is_present) {
459  /*
460  * If this change does not involve a MMIO SPTE or removed SPTE,
461  * it is unexpected. Log the change, though it should not
462  * impact the guest since both the former and current SPTEs
463  * are nonpresent.
464  */
465  if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
466  !is_mmio_spte(new_spte) &&
467  !is_removed_spte(new_spte)))
468  pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
469  "should not be replaced with another,\n"
470  "different nonpresent SPTE, unless one or both\n"
471  "are MMIO SPTEs, or the new SPTE is\n"
472  "a temporary removed SPTE.\n"
473  "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
474  as_id, gfn, old_spte, new_spte, level);
475  return;
476  }
477 
478  if (is_leaf != was_leaf)
479  kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1);
480 
481  if (was_leaf && is_dirty_spte(old_spte) &&
482  (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
483  kvm_set_pfn_dirty(spte_to_pfn(old_spte));
484 
485  /*
486  * Recursively handle child PTs if the change removed a subtree from
487  * the paging structure. Note the WARN on the PFN changing without the
488  * SPTE being converted to a hugepage (leaf) or being zapped. Shadow
489  * pages are kernel allocations and should never be migrated.
490  */
491  if (was_present && !was_leaf &&
492  (is_leaf || !is_present || WARN_ON_ONCE(pfn_changed)))
493  handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared);
494 
495  if (was_leaf && is_accessed_spte(old_spte) &&
496  (!is_present || !is_accessed_spte(new_spte) || pfn_changed))
498 }
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
Definition: kvm_main.c:3295
static void kvm_update_page_stats(struct kvm *kvm, int level, int count)
Definition: mmu.h:305
static bool is_dirty_spte(u64 spte)
Definition: spte.h:341
static bool is_mmio_spte(u64 spte)
Definition: spte.h:252
static void check_spte_writable_invariants(u64 spte)
Definition: spte.h:447
static bool is_removed_spte(u64 spte)
Definition: spte.h:202
tdp_ptep_t spte_to_child_pt(u64 spte, int level)
Definition: tdp_iter.c:62
static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
Definition: tdp_mmu.c:309
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handle_removed_pt()

static void handle_removed_pt ( struct kvm *  kvm,
tdp_ptep_t  pt,
bool  shared 
)
static

handle_removed_pt() - handle a page table removed from the TDP structure

@kvm: kvm instance @pt: the page removed from the paging structure @shared: This operation may not be running under the exclusive use of the MMU lock and the operation must synchronize with other threads that might be modifying SPTEs.

Given a page table that has been removed from the TDP paging structure, iterates through the page table to clear SPTEs and free child page tables.

Note that pt is passed in as a tdp_ptep_t, but it does not need RCU protection. Since this thread removed it from the paging structure, this thread will be responsible for ensuring the page is freed. Hence the early rcu_dereferences in the function.

Definition at line 309 of file tdp_mmu.c.

310 {
311  struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
312  int level = sp->role.level;
313  gfn_t base_gfn = sp->gfn;
314  int i;
315 
316  trace_kvm_mmu_prepare_zap_page(sp);
317 
318  tdp_mmu_unlink_sp(kvm, sp);
319 
320  for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
321  tdp_ptep_t sptep = pt + i;
322  gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
323  u64 old_spte;
324 
325  if (shared) {
326  /*
327  * Set the SPTE to a nonpresent value that other
328  * threads will not overwrite. If the SPTE was
329  * already marked as removed then another thread
330  * handling a page fault could overwrite it, so
331  * set the SPTE until it is set from some other
332  * value to the removed SPTE value.
333  */
334  for (;;) {
335  old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, REMOVED_SPTE);
336  if (!is_removed_spte(old_spte))
337  break;
338  cpu_relax();
339  }
340  } else {
341  /*
342  * If the SPTE is not MMU-present, there is no backing
343  * page associated with the SPTE and so no side effects
344  * that need to be recorded, and exclusive ownership of
345  * mmu_lock ensures the SPTE can't be made present.
346  * Note, zapping MMIO SPTEs is also unnecessary as they
347  * are guarded by the memslots generation, not by being
348  * unreachable.
349  */
350  old_spte = kvm_tdp_mmu_read_spte(sptep);
351  if (!is_shadow_present_pte(old_spte))
352  continue;
353 
354  /*
355  * Use the common helper instead of a raw WRITE_ONCE as
356  * the SPTE needs to be updated atomically if it can be
357  * modified by a different vCPU outside of mmu_lock.
358  * Even though the parent SPTE is !PRESENT, the TLB
359  * hasn't yet been flushed, and both Intel and AMD
360  * document that A/D assists can use upper-level PxE
361  * entries that are cached in the TLB, i.e. the CPU can
362  * still access the page and mark it dirty.
363  *
364  * No retry is needed in the atomic update path as the
365  * sole concern is dropping a Dirty bit, i.e. no other
366  * task can zap/remove the SPTE as mmu_lock is held for
367  * write. Marking the SPTE as a removed SPTE is not
368  * strictly necessary for the same reason, but using
369  * the remove SPTE value keeps the shared/exclusive
370  * paths consistent and allows the handle_changed_spte()
371  * call below to hardcode the new value to REMOVED_SPTE.
372  *
373  * Note, even though dropping a Dirty bit is the only
374  * scenario where a non-atomic update could result in a
375  * functional bug, simply checking the Dirty bit isn't
376  * sufficient as a fast page fault could read the upper
377  * level SPTE before it is zapped, and then make this
378  * target SPTE writable, resume the guest, and set the
379  * Dirty bit between reading the SPTE above and writing
380  * it here.
381  */
382  old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte,
383  REMOVED_SPTE, level);
384  }
386  old_spte, REMOVED_SPTE, level, shared);
387  }
388 
389  call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
390 }
u64 __rcu * tdp_ptep_t
Definition: mmu_internal.h:50
static struct kvm_mmu_page * sptep_to_sp(u64 *sptep)
Definition: spte.h:235
#define SPTE_ENT_PER_PAGE
Definition: spte.h:58
#define REMOVED_SPTE
Definition: spte.h:197
union kvm_mmu_page_role role
Definition: mmu_internal.h:80
static u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
Definition: tdp_iter.h:22
static u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
Definition: tdp_iter.h:17
static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: tdp_mmu.c:279
static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
Definition: tdp_mmu.c:68
static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, u64 old_spte, u64 new_spte, int level, bool shared)
Definition: tdp_mmu.c:408
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_lockdep_assert_mmu_lock_held()

static __always_inline bool kvm_lockdep_assert_mmu_lock_held ( struct kvm *  kvm,
bool  shared 
)
static

Definition at line 22 of file tdp_mmu.c.

24 {
25  if (shared)
26  lockdep_assert_held_read(&kvm->mmu_lock);
27  else
28  lockdep_assert_held_write(&kvm->mmu_lock);
29 
30  return true;
31 }
Here is the caller graph for this function:

◆ kvm_mmu_init_tdp_mmu()

void kvm_mmu_init_tdp_mmu ( struct kvm *  kvm)

Definition at line 15 of file tdp_mmu.c.

16 {
17  INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
18  spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
19 }
Here is the caller graph for this function:

◆ kvm_mmu_uninit_tdp_mmu()

void kvm_mmu_uninit_tdp_mmu ( struct kvm *  kvm)

Definition at line 33 of file tdp_mmu.c.

34 {
35  /*
36  * Invalidate all roots, which besides the obvious, schedules all roots
37  * for zapping and thus puts the TDP MMU's reference to each root, i.e.
38  * ultimately frees all roots.
39  */
42 
43  WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
44  WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
45 
46  /*
47  * Ensure that all the outstanding RCU callbacks to free shadow pages
48  * can run before the VM is torn down. Putting the last reference to
49  * zapped roots will create new callbacks.
50  */
51  rcu_barrier();
52 }
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
Definition: tdp_mmu.c:856
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
Definition: tdp_mmu.c:901
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_age_gfn_range()

bool kvm_tdp_mmu_age_gfn_range ( struct kvm *  kvm,
struct kvm_gfn_range *  range 
)

Definition at line 1195 of file tdp_mmu.c.

1196 {
1197  return kvm_tdp_mmu_handle_gfn(kvm, range, age_gfn_range);
1198 }
static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)
Definition: tdp_mmu.c:1161
static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm, struct kvm_gfn_range *range, tdp_handler_t handler)
Definition: tdp_mmu.c:1129
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_clear_dirty_pt_masked()

void kvm_tdp_mmu_clear_dirty_pt_masked ( struct kvm *  kvm,
struct kvm_memory_slot *  slot,
gfn_t  gfn,
unsigned long  mask,
bool  wrprot 
)

Definition at line 1629 of file tdp_mmu.c.

1633 {
1634  struct kvm_mmu_page *root;
1635 
1636  for_each_tdp_mmu_root(kvm, root, slot->as_id)
1637  clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
1638 }
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t gfn, unsigned long mask, bool wrprot)
Definition: tdp_mmu.c:1581
#define for_each_tdp_mmu_root(_kvm, _root, _as_id)
Definition: tdp_mmu.c:174
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_clear_dirty_slot()

bool kvm_tdp_mmu_clear_dirty_slot ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot 
)

Definition at line 1560 of file tdp_mmu.c.

1562 {
1563  struct kvm_mmu_page *root;
1564  bool spte_set = false;
1565 
1566  lockdep_assert_held_read(&kvm->mmu_lock);
1567  for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
1568  spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
1569  slot->base_gfn + slot->npages);
1570 
1571  return spte_set;
1572 }
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id)
Definition: tdp_mmu.c:159
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end)
Definition: tdp_mmu.c:1518
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_fast_pf_get_last_sptep()

u64* kvm_tdp_mmu_fast_pf_get_last_sptep ( struct kvm_vcpu *  vcpu,
u64  addr,
u64 *  spte 
)

Definition at line 1795 of file tdp_mmu.c.

1797 {
1798  struct tdp_iter iter;
1799  struct kvm_mmu *mmu = vcpu->arch.mmu;
1800  gfn_t gfn = addr >> PAGE_SHIFT;
1801  tdp_ptep_t sptep = NULL;
1802 
1803  tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
1804  *spte = iter.old_spte;
1805  sptep = iter.sptep;
1806  }
1807 
1808  /*
1809  * Perform the rcu_dereference to get the raw spte pointer value since
1810  * we are passing it up to fast_page_fault, which is shared with the
1811  * legacy MMU and thus does not retain the TDP MMU-specific __rcu
1812  * annotation.
1813  *
1814  * This is safe since fast_page_fault obeys the contracts of this
1815  * function as well as all TDP MMU contracts around modifying SPTEs
1816  * outside of mmu_lock.
1817  */
1818  return rcu_dereference(sptep);
1819 }
#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)
Definition: tdp_mmu.c:631
Here is the caller graph for this function:

◆ kvm_tdp_mmu_get_vcpu_root_hpa()

hpa_t kvm_tdp_mmu_get_vcpu_root_hpa ( struct kvm_vcpu *  vcpu)

Definition at line 219 of file tdp_mmu.c.

220 {
221  union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
222  struct kvm *kvm = vcpu->kvm;
223  struct kvm_mmu_page *root;
224 
225  lockdep_assert_held_write(&kvm->mmu_lock);
226 
227  /*
228  * Check for an existing root before allocating a new one. Note, the
229  * role check prevents consuming an invalid root.
230  */
232  if (root->role.word == role.word &&
233  kvm_tdp_mmu_get_root(root))
234  goto out;
235  }
236 
237  root = tdp_mmu_alloc_sp(vcpu);
238  tdp_mmu_init_sp(root, NULL, 0, role);
239 
240  /*
241  * TDP MMU roots are kept until they are explicitly invalidated, either
242  * by a memslot update or by the destruction of the VM. Initialize the
243  * refcount to two; one reference for the vCPU, and one reference for
244  * the TDP MMU itself, which is held until the root is invalidated and
245  * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots().
246  */
247  refcount_set(&root->tdp_mmu_root_count, 2);
248 
249  spin_lock(&kvm->arch.tdp_mmu_pages_lock);
250  list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots);
251  spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
252 
253 out:
254  return __pa(root->spt);
255 }
static int kvm_mmu_role_as_id(union kvm_mmu_page_role role)
Definition: mmu_internal.h:138
refcount_t tdp_mmu_root_count
Definition: mmu_internal.h:102
struct list_head link
Definition: mmu_internal.h:57
static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, gfn_t gfn, union kvm_mmu_page_role role)
Definition: tdp_mmu.c:190
static struct kvm_mmu_page * tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
Definition: tdp_mmu.c:180
static __must_check bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
Definition: tdp_mmu.h:15
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_get_walk()

int kvm_tdp_mmu_get_walk ( struct kvm_vcpu *  vcpu,
u64  addr,
u64 *  sptes,
int *  root_level 
)

Definition at line 1766 of file tdp_mmu.c.

1768 {
1769  struct tdp_iter iter;
1770  struct kvm_mmu *mmu = vcpu->arch.mmu;
1771  gfn_t gfn = addr >> PAGE_SHIFT;
1772  int leaf = -1;
1773 
1774  *root_level = vcpu->arch.mmu->root_role.level;
1775 
1776  tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
1777  leaf = iter.level;
1778  sptes[leaf] = iter.old_spte;
1779  }
1780 
1781  return leaf;
1782 }
Here is the caller graph for this function:

◆ kvm_tdp_mmu_handle_gfn()

static __always_inline bool kvm_tdp_mmu_handle_gfn ( struct kvm *  kvm,
struct kvm_gfn_range *  range,
tdp_handler_t  handler 
)
static

Definition at line 1129 of file tdp_mmu.c.

1132 {
1133  struct kvm_mmu_page *root;
1134  struct tdp_iter iter;
1135  bool ret = false;
1136 
1137  /*
1138  * Don't support rescheduling, none of the MMU notifiers that funnel
1139  * into this helper allow blocking; it'd be dead, wasteful code.
1140  */
1141  for_each_tdp_mmu_root(kvm, root, range->slot->as_id) {
1142  rcu_read_lock();
1143 
1144  tdp_root_for_each_leaf_pte(iter, root, range->start, range->end)
1145  ret |= handler(kvm, &iter, range);
1146 
1147  rcu_read_unlock();
1148  }
1149 
1150  return ret;
1151 }
Here is the caller graph for this function:

◆ kvm_tdp_mmu_invalidate_all_roots()

void kvm_tdp_mmu_invalidate_all_roots ( struct kvm *  kvm)

Definition at line 901 of file tdp_mmu.c.

902 {
903  struct kvm_mmu_page *root;
904 
905  /*
906  * mmu_lock must be held for write to ensure that a root doesn't become
907  * invalid while there are active readers (invalidating a root while
908  * there are active readers may or may not be problematic in practice,
909  * but it's uncharted territory and not supported).
910  *
911  * Waive the assertion if there are no users of @kvm, i.e. the VM is
912  * being destroyed after all references have been put, or if no vCPUs
913  * have been created (which means there are no roots), i.e. the VM is
914  * being destroyed in an error path of KVM_CREATE_VM.
915  */
916  if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
917  refcount_read(&kvm->users_count) && kvm->created_vcpus)
918  lockdep_assert_held_write(&kvm->mmu_lock);
919 
920  /*
921  * As above, mmu_lock isn't held when destroying the VM! There can't
922  * be other references to @kvm, i.e. nothing else can invalidate roots
923  * or get/put references to roots.
924  */
925  list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
926  /*
927  * Note, invalid roots can outlive a memslot update! Invalid
928  * roots must be *zapped* before the memslot update completes,
929  * but a different task can acquire a reference and keep the
930  * root alive after its been zapped.
931  */
932  if (!root->role.invalid) {
933  root->tdp_mmu_scheduled_root_to_zap = true;
934  root->role.invalid = true;
935  }
936  }
937 }
bool tdp_mmu_scheduled_root_to_zap
Definition: mmu_internal.h:66
Here is the caller graph for this function:

◆ kvm_tdp_mmu_map()

int kvm_tdp_mmu_map ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault 
)

Definition at line 1032 of file tdp_mmu.c.

1033 {
1034  struct kvm_mmu *mmu = vcpu->arch.mmu;
1035  struct kvm *kvm = vcpu->kvm;
1036  struct tdp_iter iter;
1037  struct kvm_mmu_page *sp;
1038  int ret = RET_PF_RETRY;
1039 
1040  kvm_mmu_hugepage_adjust(vcpu, fault);
1041 
1042  trace_kvm_mmu_spte_requested(fault);
1043 
1044  rcu_read_lock();
1045 
1046  tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
1047  int r;
1048 
1050  disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
1051 
1052  /*
1053  * If SPTE has been frozen by another thread, just give up and
1054  * retry, avoiding unnecessary page table allocation and free.
1055  */
1056  if (is_removed_spte(iter.old_spte))
1057  goto retry;
1058 
1059  if (iter.level == fault->goal_level)
1060  goto map_target_level;
1061 
1062  /* Step down into the lower level page table if it exists. */
1063  if (is_shadow_present_pte(iter.old_spte) &&
1064  !is_large_pte(iter.old_spte))
1065  continue;
1066 
1067  /*
1068  * The SPTE is either non-present or points to a huge page that
1069  * needs to be split.
1070  */
1071  sp = tdp_mmu_alloc_sp(vcpu);
1072  tdp_mmu_init_child_sp(sp, &iter);
1073 
1075 
1076  if (is_shadow_present_pte(iter.old_spte))
1077  r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
1078  else
1079  r = tdp_mmu_link_sp(kvm, &iter, sp, true);
1080 
1081  /*
1082  * Force the guest to retry if installing an upper level SPTE
1083  * failed, e.g. because a different task modified the SPTE.
1084  */
1085  if (r) {
1086  tdp_mmu_free_sp(sp);
1087  goto retry;
1088  }
1089 
1090  if (fault->huge_page_disallowed &&
1091  fault->req_level >= iter.level) {
1092  spin_lock(&kvm->arch.tdp_mmu_pages_lock);
1093  if (sp->nx_huge_page_disallowed)
1094  track_possible_nx_huge_page(kvm, sp);
1095  spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
1096  }
1097  }
1098 
1099  /*
1100  * The walk aborted before reaching the target level, e.g. because the
1101  * iterator detected an upper level SPTE was frozen during traversal.
1102  */
1103  WARN_ON_ONCE(iter.level == fault->goal_level);
1104  goto retry;
1105 
1106 map_target_level:
1107  ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);
1108 
1109 retry:
1110  rcu_read_unlock();
1111  return ret;
1112 }
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
Definition: mmu.c:3180
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:848
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level)
Definition: mmu.c:3216
@ RET_PF_RETRY
Definition: mmu_internal.h:275
static bool is_large_pte(u64 pte)
Definition: spte.h:313
bool nx_huge_page_disallowed
Definition: mmu_internal.h:74
bool huge_page_disallowed
Definition: mmu_internal.h:212
const bool nx_huge_page_workaround_enabled
Definition: mmu_internal.h:206
static void tdp_mmu_init_child_sp(struct kvm_mmu_page *child_sp, struct tdp_iter *iter)
Definition: tdp_mmu.c:205
static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, struct kvm_mmu_page *sp, bool shared)
Definition: tdp_mmu.c:1006
static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, struct tdp_iter *iter)
Definition: tdp_mmu.c:943
static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, struct kvm_mmu_page *sp, bool shared)
Definition: tdp_mmu.c:1376
static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
Definition: tdp_mmu.c:54
Here is the call graph for this function:

◆ kvm_tdp_mmu_put_root()

void kvm_tdp_mmu_put_root ( struct kvm *  kvm,
struct kvm_mmu_page root 
)

Definition at line 76 of file tdp_mmu.c.

77 {
78  if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
79  return;
80 
81  /*
82  * The TDP MMU itself holds a reference to each root until the root is
83  * explicitly invalidated, i.e. the final reference should be never be
84  * put for a valid root.
85  */
86  KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm);
87 
88  spin_lock(&kvm->arch.tdp_mmu_pages_lock);
89  list_del_rcu(&root->link);
90  spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
91  call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
92 }
static bool is_tdp_mmu_page(struct kvm_mmu_page *sp)
Definition: tdp_mmu.h:74
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_set_spte_gfn()

bool kvm_tdp_mmu_set_spte_gfn ( struct kvm *  kvm,
struct kvm_gfn_range *  range 
)

Definition at line 1247 of file tdp_mmu.c.

1248 {
1249  /*
1250  * No need to handle the remote TLB flush under RCU protection, the
1251  * target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a
1252  * shadow page. See the WARN on pfn_changed in handle_changed_spte().
1253  */
1254  return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn);
1255 }
static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)
Definition: tdp_mmu.c:1211
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_test_age_gfn()

bool kvm_tdp_mmu_test_age_gfn ( struct kvm *  kvm,
struct kvm_gfn_range *  range 
)

Definition at line 1206 of file tdp_mmu.c.

1207 {
1208  return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn);
1209 }
static bool test_age_gfn(struct kvm *kvm, struct tdp_iter *iter, struct kvm_gfn_range *range)
Definition: tdp_mmu.c:1200
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_try_split_huge_pages()

void kvm_tdp_mmu_try_split_huge_pages ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
gfn_t  start,
gfn_t  end,
int  target_level,
bool  shared 
)

Definition at line 1483 of file tdp_mmu.c.

1487 {
1488  struct kvm_mmu_page *root;
1489  int r = 0;
1490 
1491  kvm_lockdep_assert_mmu_lock_held(kvm, shared);
1492  for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) {
1493  r = tdp_mmu_split_huge_pages_root(kvm, root, start, end, target_level, shared);
1494  if (r) {
1495  kvm_tdp_mmu_put_root(kvm, root);
1496  break;
1497  }
1498  }
1499 }
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
Definition: tdp_mmu.c:76
static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, int target_level, bool shared)
Definition: tdp_mmu.c:1414
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_unmap_gfn_range()

bool kvm_tdp_mmu_unmap_gfn_range ( struct kvm *  kvm,
struct kvm_gfn_range *  range,
bool  flush 
)

Definition at line 1114 of file tdp_mmu.c.

1116 {
1117  struct kvm_mmu_page *root;
1118 
1119  __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
1120  flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
1121  range->may_block, flush);
1122 
1123  return flush;
1124 }
static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, bool can_yield, bool flush)
Definition: tdp_mmu.c:780
#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)
Definition: tdp_mmu.c:152
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_write_protect_gfn()

bool kvm_tdp_mmu_write_protect_gfn ( struct kvm *  kvm,
struct kvm_memory_slot *  slot,
gfn_t  gfn,
int  min_level 
)

Definition at line 1746 of file tdp_mmu.c.

1749 {
1750  struct kvm_mmu_page *root;
1751  bool spte_set = false;
1752 
1753  lockdep_assert_held_write(&kvm->mmu_lock);
1754  for_each_tdp_mmu_root(kvm, root, slot->as_id)
1755  spte_set |= write_protect_gfn(kvm, root, gfn, min_level);
1756 
1757  return spte_set;
1758 }
static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t gfn, int min_level)
Definition: tdp_mmu.c:1710
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_wrprot_slot()

bool kvm_tdp_mmu_wrprot_slot ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot,
int  min_level 
)

Definition at line 1300 of file tdp_mmu.c.

1302 {
1303  struct kvm_mmu_page *root;
1304  bool spte_set = false;
1305 
1306  lockdep_assert_held_read(&kvm->mmu_lock);
1307 
1308  for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
1309  spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
1310  slot->base_gfn + slot->npages, min_level);
1311 
1312  return spte_set;
1313 }
static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, int min_level)
Definition: tdp_mmu.c:1262
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_zap_all()

void kvm_tdp_mmu_zap_all ( struct kvm *  kvm)

Definition at line 831 of file tdp_mmu.c.

832 {
833  struct kvm_mmu_page *root;
834 
835  /*
836  * Zap all roots, including invalid roots, as all SPTEs must be dropped
837  * before returning to the caller. Zap directly even if the root is
838  * also being zapped by a worker. Walking zapped top-level SPTEs isn't
839  * all that expensive and mmu_lock is already held, which means the
840  * worker has yielded, i.e. flushing the work instead of zapping here
841  * isn't guaranteed to be any faster.
842  *
843  * A TLB flush is unnecessary, KVM zaps everything if and only the VM
844  * is being destroyed or the userspace VMM has exited. In both cases,
845  * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
846  */
847  lockdep_assert_held_write(&kvm->mmu_lock);
849  tdp_mmu_zap_root(kvm, root, false);
850 }
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root)
Definition: tdp_mmu.c:162
static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared)
Definition: tdp_mmu.c:716
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_zap_collapsible_sptes()

void kvm_tdp_mmu_zap_collapsible_sptes ( struct kvm *  kvm,
const struct kvm_memory_slot *  slot 
)

Definition at line 1695 of file tdp_mmu.c.

1697 {
1698  struct kvm_mmu_page *root;
1699 
1700  lockdep_assert_held_read(&kvm->mmu_lock);
1701  for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
1702  zap_collapsible_spte_range(kvm, root, slot);
1703 }
static void zap_collapsible_spte_range(struct kvm *kvm, struct kvm_mmu_page *root, const struct kvm_memory_slot *slot)
Definition: tdp_mmu.c:1640
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_zap_invalidated_roots()

void kvm_tdp_mmu_zap_invalidated_roots ( struct kvm *  kvm)

Definition at line 856 of file tdp_mmu.c.

857 {
858  struct kvm_mmu_page *root;
859 
860  read_lock(&kvm->mmu_lock);
861 
864  continue;
865 
866  root->tdp_mmu_scheduled_root_to_zap = false;
867  KVM_BUG_ON(!root->role.invalid, kvm);
868 
869  /*
870  * A TLB flush is not necessary as KVM performs a local TLB
871  * flush when allocating a new root (see kvm_mmu_load()), and
872  * when migrating a vCPU to a different pCPU. Note, the local
873  * TLB flush on reuse also invalidates paging-structure-cache
874  * entries, i.e. TLB entries for intermediate paging structures,
875  * that may be zapped, as such entries are associated with the
876  * ASID on both VMX and SVM.
877  */
878  tdp_mmu_zap_root(kvm, root, true);
879 
880  /*
881  * The referenced needs to be put *after* zapping the root, as
882  * the root must be reachable by mmu_notifiers while it's being
883  * zapped
884  */
885  kvm_tdp_mmu_put_root(kvm, root);
886  }
887 
888  read_unlock(&kvm->mmu_lock);
889 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_zap_leafs()

bool kvm_tdp_mmu_zap_leafs ( struct kvm *  kvm,
gfn_t  start,
gfn_t  end,
bool  flush 
)

Definition at line 820 of file tdp_mmu.c.

821 {
822  struct kvm_mmu_page *root;
823 
824  lockdep_assert_held_write(&kvm->mmu_lock);
826  flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
827 
828  return flush;
829 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ kvm_tdp_mmu_zap_sp()

bool kvm_tdp_mmu_zap_sp ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)

Definition at line 752 of file tdp_mmu.c.

753 {
754  u64 old_spte;
755 
756  /*
757  * This helper intentionally doesn't allow zapping a root shadow page,
758  * which doesn't have a parent page table and thus no associated entry.
759  */
760  if (WARN_ON_ONCE(!sp->ptep))
761  return false;
762 
763  old_spte = kvm_tdp_mmu_read_spte(sp->ptep);
764  if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
765  return false;
766 
767  tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
768  sp->gfn, sp->role.level + 1);
769 
770  return true;
771 }
tdp_ptep_t ptep
Definition: mmu_internal.h:107
static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, u64 old_spte, u64 new_spte, gfn_t gfn, int level)
Definition: tdp_mmu.c:592
Here is the call graph for this function:
Here is the caller graph for this function:

◆ set_spte_gfn()

static bool set_spte_gfn ( struct kvm *  kvm,
struct tdp_iter iter,
struct kvm_gfn_range *  range 
)
static

Definition at line 1211 of file tdp_mmu.c.

1213 {
1214  u64 new_spte;
1215 
1216  /* Huge pages aren't expected to be modified without first being zapped. */
1217  WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
1218 
1219  if (iter->level != PG_LEVEL_4K ||
1221  return false;
1222 
1223  /*
1224  * Note, when changing a read-only SPTE, it's not strictly necessary to
1225  * zero the SPTE before setting the new PFN, but doing so preserves the
1226  * invariant that the PFN of a present * leaf SPTE can never change.
1227  * See handle_changed_spte().
1228  */
1229  tdp_mmu_iter_set_spte(kvm, iter, 0);
1230 
1231  if (!pte_write(range->arg.pte)) {
1233  pte_pfn(range->arg.pte));
1234 
1235  tdp_mmu_iter_set_spte(kvm, iter, new_spte);
1236  }
1237 
1238  return true;
1239 }
u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
Definition: spte.c:325
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_account_mmu_page()

static void tdp_account_mmu_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 261 of file tdp_mmu.c.

262 {
263  kvm_account_pgtable_pages((void *)sp->spt, +1);
264  atomic64_inc(&kvm->arch.tdp_mmu_pages);
265 }
Here is the caller graph for this function:

◆ tdp_mmu_alloc_sp()

static struct kvm_mmu_page* tdp_mmu_alloc_sp ( struct kvm_vcpu *  vcpu)
static

Definition at line 180 of file tdp_mmu.c.

181 {
182  struct kvm_mmu_page *sp;
183 
184  sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
185  sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
186 
187  return sp;
188 }
Here is the caller graph for this function:

◆ tdp_mmu_alloc_sp_for_split()

static struct kvm_mmu_page* tdp_mmu_alloc_sp_for_split ( struct kvm *  kvm,
struct tdp_iter iter,
bool  shared 
)
static

Definition at line 1334 of file tdp_mmu.c.

1337 {
1338  struct kvm_mmu_page *sp;
1339 
1340  kvm_lockdep_assert_mmu_lock_held(kvm, shared);
1341 
1342  /*
1343  * Since we are allocating while under the MMU lock we have to be
1344  * careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct
1345  * reclaim and to avoid making any filesystem callbacks (which can end
1346  * up invoking KVM MMU notifiers, resulting in a deadlock).
1347  *
1348  * If this allocation fails we drop the lock and retry with reclaim
1349  * allowed.
1350  */
1351  sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT);
1352  if (sp)
1353  return sp;
1354 
1355  rcu_read_unlock();
1356 
1357  if (shared)
1358  read_unlock(&kvm->mmu_lock);
1359  else
1360  write_unlock(&kvm->mmu_lock);
1361 
1362  iter->yielded = true;
1363  sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT);
1364 
1365  if (shared)
1366  read_lock(&kvm->mmu_lock);
1367  else
1368  write_lock(&kvm->mmu_lock);
1369 
1370  rcu_read_lock();
1371 
1372  return sp;
1373 }
bool yielded
Definition: tdp_iter.h:116
static struct kvm_mmu_page * __tdp_mmu_alloc_sp_for_split(gfp_t gfp)
Definition: tdp_mmu.c:1315
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_free_sp()

static void tdp_mmu_free_sp ( struct kvm_mmu_page sp)
static

Definition at line 54 of file tdp_mmu.c.

55 {
56  free_page((unsigned long)sp->spt);
57  kmem_cache_free(mmu_page_header_cache, sp);
58 }
Here is the caller graph for this function:

◆ tdp_mmu_free_sp_rcu_callback()

static void tdp_mmu_free_sp_rcu_callback ( struct rcu_head *  head)
static

Definition at line 68 of file tdp_mmu.c.

69 {
70  struct kvm_mmu_page *sp = container_of(head, struct kvm_mmu_page,
71  rcu_head);
72 
73  tdp_mmu_free_sp(sp);
74 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_init_child_sp()

static void tdp_mmu_init_child_sp ( struct kvm_mmu_page child_sp,
struct tdp_iter iter 
)
static

Definition at line 205 of file tdp_mmu.c.

207 {
208  struct kvm_mmu_page *parent_sp;
209  union kvm_mmu_page_role role;
210 
211  parent_sp = sptep_to_sp(rcu_dereference(iter->sptep));
212 
213  role = parent_sp->role;
214  role.level--;
215 
216  tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role);
217 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_init_sp()

static void tdp_mmu_init_sp ( struct kvm_mmu_page sp,
tdp_ptep_t  sptep,
gfn_t  gfn,
union kvm_mmu_page_role  role 
)
static

Definition at line 190 of file tdp_mmu.c.

192 {
193  INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
194 
195  set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
196 
197  sp->role = role;
198  sp->gfn = gfn;
199  sp->ptep = sptep;
200  sp->tdp_mmu_page = true;
201 
202  trace_kvm_mmu_get_page(sp, true);
203 }
struct list_head possible_nx_huge_page_link
Definition: mmu_internal.h:118
bool tdp_mmu_page
Definition: mmu_internal.h:60
Here is the caller graph for this function:

◆ tdp_mmu_iter_cond_resched()

static bool __must_check tdp_mmu_iter_cond_resched ( struct kvm *  kvm,
struct tdp_iter iter,
bool  flush,
bool  shared 
)
inlinestatic

Definition at line 648 of file tdp_mmu.c.

651 {
652  WARN_ON_ONCE(iter->yielded);
653 
654  /* Ensure forward progress has been made before yielding. */
655  if (iter->next_last_level_gfn == iter->yielded_gfn)
656  return false;
657 
658  if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
659  if (flush)
661 
662  rcu_read_unlock();
663 
664  if (shared)
665  cond_resched_rwlock_read(&kvm->mmu_lock);
666  else
667  cond_resched_rwlock_write(&kvm->mmu_lock);
668 
669  rcu_read_lock();
670 
671  WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
672 
673  iter->yielded = true;
674  }
675 
676  return iter->yielded;
677 }
void kvm_flush_remote_tlbs(struct kvm *kvm)
Definition: kvm_main.c:346
gfn_t yielded_gfn
Definition: tdp_iter.h:89
gfn_t next_last_level_gfn
Definition: tdp_iter.h:83
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_iter_set_spte()

static void tdp_mmu_iter_set_spte ( struct kvm *  kvm,
struct tdp_iter iter,
u64  new_spte 
)
inlinestatic

Definition at line 612 of file tdp_mmu.c.

614 {
615  WARN_ON_ONCE(iter->yielded);
616  iter->old_spte = tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
617  iter->old_spte, new_spte,
618  iter->gfn, iter->level);
619 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_link_sp()

static int tdp_mmu_link_sp ( struct kvm *  kvm,
struct tdp_iter iter,
struct kvm_mmu_page sp,
bool  shared 
)
static

Definition at line 1006 of file tdp_mmu.c.

1008 {
1009  u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled());
1010  int ret = 0;
1011 
1012  if (shared) {
1013  ret = tdp_mmu_set_spte_atomic(kvm, iter, spte);
1014  if (ret)
1015  return ret;
1016  } else {
1017  tdp_mmu_iter_set_spte(kvm, iter, spte);
1018  }
1019 
1020  tdp_account_mmu_page(kvm, sp);
1021 
1022  return 0;
1023 }
u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
Definition: spte.c:310
static bool kvm_ad_enabled(void)
Definition: spte.h:269
static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: tdp_mmu.c:261
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_map_handle_target_level()

static int tdp_mmu_map_handle_target_level ( struct kvm_vcpu *  vcpu,
struct kvm_page_fault fault,
struct tdp_iter iter 
)
static

Definition at line 943 of file tdp_mmu.c.

946 {
947  struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
948  u64 new_spte;
949  int ret = RET_PF_FIXED;
950  bool wrprot = false;
951 
952  if (WARN_ON_ONCE(sp->role.level != fault->goal_level))
953  return RET_PF_RETRY;
954 
955  if (unlikely(!fault->slot))
956  new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
957  else
958  wrprot = make_spte(vcpu, sp, fault->slot, ACC_ALL, iter->gfn,
959  fault->pfn, iter->old_spte, fault->prefetch, true,
960  fault->map_writable, &new_spte);
961 
962  if (new_spte == iter->old_spte)
963  ret = RET_PF_SPURIOUS;
964  else if (tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
965  return RET_PF_RETRY;
966  else if (is_shadow_present_pte(iter->old_spte) &&
967  !is_last_spte(iter->old_spte, iter->level))
968  kvm_flush_remote_tlbs_gfn(vcpu->kvm, iter->gfn, iter->level);
969 
970  /*
971  * If the page fault was caused by a write but the page is write
972  * protected, emulation is needed. If the emulation was skipped,
973  * the vCPU would have the same fault again.
974  */
975  if (wrprot) {
976  if (fault->write)
977  ret = RET_PF_EMULATE;
978  }
979 
980  /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
981  if (unlikely(is_mmio_spte(new_spte))) {
982  vcpu->stat.pf_mmio_spte_created++;
983  trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
984  new_spte);
985  ret = RET_PF_EMULATE;
986  } else {
987  trace_kvm_mmu_set_spte(iter->level, iter->gfn,
988  rcu_dereference(iter->sptep));
989  }
990 
991  return ret;
992 }
@ RET_PF_FIXED
Definition: mmu_internal.h:278
@ RET_PF_EMULATE
Definition: mmu_internal.h:276
@ RET_PF_SPURIOUS
Definition: mmu_internal.h:279
static void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
Definition: mmu_internal.h:176
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool prefetch, bool can_unsync, bool host_writable, u64 *new_spte)
Definition: spte.c:137
u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
Definition: spte.c:71
#define ACC_ALL
Definition: spte.h:49
const bool prefetch
Definition: mmu_internal.h:194
const bool write
Definition: mmu_internal.h:198
kvm_pfn_t pfn
Definition: mmu_internal.h:240
struct kvm_memory_slot * slot
Definition: mmu_internal.h:236
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_max_gfn_exclusive()

static gfn_t tdp_mmu_max_gfn_exclusive ( void  )
inlinestatic

Definition at line 679 of file tdp_mmu.c.

680 {
681  /*
682  * Bound TDP MMU walks at host.MAXPHYADDR. KVM disallows memslots with
683  * a gpa range that would exceed the max gfn, and KVM does not create
684  * MMIO SPTEs for "impossible" gfns, instead sending such accesses down
685  * the slow emulation path every time.
686  */
687  return kvm_mmu_max_gfn() + 1;
688 }
static gfn_t kvm_mmu_max_gfn(void)
Definition: mmu.h:66
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_need_write_protect()

static bool tdp_mmu_need_write_protect ( struct kvm_mmu_page sp)
static

Definition at line 1501 of file tdp_mmu.c.

1502 {
1503  /*
1504  * All TDP MMU shadow pages share the same role as their root, aside
1505  * from level, so it is valid to key off any shadow page to determine if
1506  * write protection is needed for an entire tree.
1507  */
1509 }
static bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
Definition: mmu_internal.h:148
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_next_root()

static struct kvm_mmu_page* tdp_mmu_next_root ( struct kvm *  kvm,
struct kvm_mmu_page prev_root,
bool  only_valid 
)
static

Definition at line 104 of file tdp_mmu.c.

107 {
108  struct kvm_mmu_page *next_root;
109 
110  /*
111  * While the roots themselves are RCU-protected, fields such as
112  * role.invalid are protected by mmu_lock.
113  */
114  lockdep_assert_held(&kvm->mmu_lock);
115 
116  rcu_read_lock();
117 
118  if (prev_root)
119  next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots,
120  &prev_root->link,
121  typeof(*prev_root), link);
122  else
123  next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots,
124  typeof(*next_root), link);
125 
126  while (next_root) {
127  if ((!only_valid || !next_root->role.invalid) &&
128  kvm_tdp_mmu_get_root(next_root))
129  break;
130 
131  next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots,
132  &next_root->link, typeof(*next_root), link);
133  }
134 
135  rcu_read_unlock();
136 
137  if (prev_root)
138  kvm_tdp_mmu_put_root(kvm, prev_root);
139 
140  return next_root;
141 }
Here is the call graph for this function:

◆ tdp_mmu_set_spte()

static u64 tdp_mmu_set_spte ( struct kvm *  kvm,
int  as_id,
tdp_ptep_t  sptep,
u64  old_spte,
u64  new_spte,
gfn_t  gfn,
int  level 
)
static

Definition at line 592 of file tdp_mmu.c.

594 {
595  lockdep_assert_held_write(&kvm->mmu_lock);
596 
597  /*
598  * No thread should be using this function to set SPTEs to or from the
599  * temporary removed SPTE value.
600  * If operating under the MMU lock in read mode, tdp_mmu_set_spte_atomic
601  * should be used. If operating under the MMU lock in write mode, the
602  * use of the removed SPTE should not be necessary.
603  */
604  WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
605 
606  old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
607 
608  handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
609  return old_spte;
610 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_set_spte_atomic()

static int tdp_mmu_set_spte_atomic ( struct kvm *  kvm,
struct tdp_iter iter,
u64  new_spte 
)
inlinestatic

Definition at line 517 of file tdp_mmu.c.

520 {
521  u64 *sptep = rcu_dereference(iter->sptep);
522 
523  /*
524  * The caller is responsible for ensuring the old SPTE is not a REMOVED
525  * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
526  * and pre-checking before inserting a new SPTE is advantageous as it
527  * avoids unnecessary work.
528  */
529  WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));
530 
531  lockdep_assert_held_read(&kvm->mmu_lock);
532 
533  /*
534  * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
535  * does not hold the mmu_lock. On failure, i.e. if a different logical
536  * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
537  * the current value, so the caller operates on fresh data, e.g. if it
538  * retries tdp_mmu_set_spte_atomic()
539  */
540  if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
541  return -EBUSY;
542 
543  handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
544  new_spte, iter->level, true);
545 
546  return 0;
547 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_split_huge_page()

static int tdp_mmu_split_huge_page ( struct kvm *  kvm,
struct tdp_iter iter,
struct kvm_mmu_page sp,
bool  shared 
)
static

Definition at line 1376 of file tdp_mmu.c.

1378 {
1379  const u64 huge_spte = iter->old_spte;
1380  const int level = iter->level;
1381  int ret, i;
1382 
1383  /*
1384  * No need for atomics when writing to sp->spt since the page table has
1385  * not been linked in yet and thus is not reachable from any other CPU.
1386  */
1387  for (i = 0; i < SPTE_ENT_PER_PAGE; i++)
1388  sp->spt[i] = make_huge_page_split_spte(kvm, huge_spte, sp->role, i);
1389 
1390  /*
1391  * Replace the huge spte with a pointer to the populated lower level
1392  * page table. Since we are making this change without a TLB flush vCPUs
1393  * will see a mix of the split mappings and the original huge mapping,
1394  * depending on what's currently in their TLB. This is fine from a
1395  * correctness standpoint since the translation will be the same either
1396  * way.
1397  */
1398  ret = tdp_mmu_link_sp(kvm, iter, sp, shared);
1399  if (ret)
1400  goto out;
1401 
1402  /*
1403  * tdp_mmu_link_sp_atomic() will handle subtracting the huge page we
1404  * are overwriting from the page stats. But we have to manually update
1405  * the page stats with the new present child pages.
1406  */
1407  kvm_update_page_stats(kvm, level - 1, SPTE_ENT_PER_PAGE);
1408 
1409 out:
1410  trace_kvm_mmu_split_huge_page(iter->gfn, huge_spte, level, ret);
1411  return ret;
1412 }
u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte, union kvm_mmu_page_role role, int index)
Definition: spte.c:274
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_split_huge_pages_root()

static int tdp_mmu_split_huge_pages_root ( struct kvm *  kvm,
struct kvm_mmu_page root,
gfn_t  start,
gfn_t  end,
int  target_level,
bool  shared 
)
static

Definition at line 1414 of file tdp_mmu.c.

1418 {
1419  struct kvm_mmu_page *sp = NULL;
1420  struct tdp_iter iter;
1421  int ret = 0;
1422 
1423  rcu_read_lock();
1424 
1425  /*
1426  * Traverse the page table splitting all huge pages above the target
1427  * level into one lower level. For example, if we encounter a 1GB page
1428  * we split it into 512 2MB pages.
1429  *
1430  * Since the TDP iterator uses a pre-order traversal, we are guaranteed
1431  * to visit an SPTE before ever visiting its children, which means we
1432  * will correctly recursively split huge pages that are more than one
1433  * level above the target level (e.g. splitting a 1GB to 512 2MB pages,
1434  * and then splitting each of those to 512 4KB pages).
1435  */
1436  for_each_tdp_pte_min_level(iter, root, target_level + 1, start, end) {
1437 retry:
1438  if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared))
1439  continue;
1440 
1441  if (!is_shadow_present_pte(iter.old_spte) || !is_large_pte(iter.old_spte))
1442  continue;
1443 
1444  if (!sp) {
1445  sp = tdp_mmu_alloc_sp_for_split(kvm, &iter, shared);
1446  if (!sp) {
1447  ret = -ENOMEM;
1448  trace_kvm_mmu_split_huge_page(iter.gfn,
1449  iter.old_spte,
1450  iter.level, ret);
1451  break;
1452  }
1453 
1454  if (iter.yielded)
1455  continue;
1456  }
1457 
1458  tdp_mmu_init_child_sp(sp, &iter);
1459 
1460  if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared))
1461  goto retry;
1462 
1463  sp = NULL;
1464  }
1465 
1466  rcu_read_unlock();
1467 
1468  /*
1469  * It's possible to exit the loop having never used the last sp if, for
1470  * example, a vCPU doing HugePage NX splitting wins the race and
1471  * installs its own sp in place of the last sp we tried to split.
1472  */
1473  if (sp)
1474  tdp_mmu_free_sp(sp);
1475 
1476  return ret;
1477 }
static struct kvm_mmu_page * tdp_mmu_alloc_sp_for_split(struct kvm *kvm, struct tdp_iter *iter, bool shared)
Definition: tdp_mmu.c:1334
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_unlink_sp()

static void tdp_mmu_unlink_sp ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

tdp_mmu_unlink_sp() - Remove a shadow page from the list of used pages

@kvm: kvm instance @sp: the page to be removed

Definition at line 279 of file tdp_mmu.c.

280 {
281  tdp_unaccount_mmu_page(kvm, sp);
282 
283  if (!sp->nx_huge_page_disallowed)
284  return;
285 
286  spin_lock(&kvm->arch.tdp_mmu_pages_lock);
287  sp->nx_huge_page_disallowed = false;
289  spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
290 }
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: mmu.c:891
static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Definition: tdp_mmu.c:267
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_zap_leafs()

static bool tdp_mmu_zap_leafs ( struct kvm *  kvm,
struct kvm_mmu_page root,
gfn_t  start,
gfn_t  end,
bool  can_yield,
bool  flush 
)
static

Definition at line 780 of file tdp_mmu.c.

782 {
783  struct tdp_iter iter;
784 
785  end = min(end, tdp_mmu_max_gfn_exclusive());
786 
787  lockdep_assert_held_write(&kvm->mmu_lock);
788 
789  rcu_read_lock();
790 
791  for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) {
792  if (can_yield &&
793  tdp_mmu_iter_cond_resched(kvm, &iter, flush, false)) {
794  flush = false;
795  continue;
796  }
797 
798  if (!is_shadow_present_pte(iter.old_spte) ||
799  !is_last_spte(iter.old_spte, iter.level))
800  continue;
801 
802  tdp_mmu_iter_set_spte(kvm, &iter, 0);
803  flush = true;
804  }
805 
806  rcu_read_unlock();
807 
808  /*
809  * Because this flow zaps _only_ leaf SPTEs, the caller doesn't need
810  * to provide RCU protection as no 'struct kvm_mmu_page' will be freed.
811  */
812  return flush;
813 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_zap_root()

static void tdp_mmu_zap_root ( struct kvm *  kvm,
struct kvm_mmu_page root,
bool  shared 
)
static

Definition at line 716 of file tdp_mmu.c.

718 {
719 
720  /*
721  * The root must have an elevated refcount so that it's reachable via
722  * mmu_notifier callbacks, which allows this path to yield and drop
723  * mmu_lock. When handling an unmap/release mmu_notifier command, KVM
724  * must drop all references to relevant pages prior to completing the
725  * callback. Dropping mmu_lock with an unreachable root would result
726  * in zapping SPTEs after a relevant mmu_notifier callback completes
727  * and lead to use-after-free as zapping a SPTE triggers "writeback" of
728  * dirty accessed bits to the SPTE's associated struct page.
729  */
730  WARN_ON_ONCE(!refcount_read(&root->tdp_mmu_root_count));
731 
733 
734  rcu_read_lock();
735 
736  /*
737  * To avoid RCU stalls due to recursively removing huge swaths of SPs,
738  * split the zap into two passes. On the first pass, zap at the 1gb
739  * level, and then zap top-level SPs on the second pass. "1gb" is not
740  * arbitrary, as KVM must be able to zap a 1gb shadow page without
741  * inducing a stall to allow in-place replacement with a 1gb hugepage.
742  *
743  * Because zapping a SP recurses on its children, stepping down to
744  * PG_LEVEL_4K in the iterator itself is unnecessary.
745  */
746  __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_1G);
747  __tdp_mmu_zap_root(kvm, root, shared, root->role.level);
748 
749  rcu_read_unlock();
750 }
static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared, int zap_level)
Definition: tdp_mmu.c:690
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_mmu_zap_spte_atomic()

static int tdp_mmu_zap_spte_atomic ( struct kvm *  kvm,
struct tdp_iter iter 
)
inlinestatic

Definition at line 549 of file tdp_mmu.c.

551 {
552  int ret;
553 
554  /*
555  * Freeze the SPTE by setting it to a special,
556  * non-present value. This will stop other threads from
557  * immediately installing a present entry in its place
558  * before the TLBs are flushed.
559  */
560  ret = tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE);
561  if (ret)
562  return ret;
563 
564  kvm_flush_remote_tlbs_gfn(kvm, iter->gfn, iter->level);
565 
566  /*
567  * No other thread can overwrite the removed SPTE as they must either
568  * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
569  * overwrite the special removed SPTE value. No bookkeeping is needed
570  * here since the SPTE is going from non-present to non-present. Use
571  * the raw write helper to avoid an unnecessary check on volatile bits.
572  */
574 
575  return 0;
576 }
static void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
Definition: tdp_iter.h:27
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tdp_unaccount_mmu_page()

static void tdp_unaccount_mmu_page ( struct kvm *  kvm,
struct kvm_mmu_page sp 
)
static

Definition at line 267 of file tdp_mmu.c.

268 {
269  kvm_account_pgtable_pages((void *)sp->spt, -1);
270  atomic64_dec(&kvm->arch.tdp_mmu_pages);
271 }
Here is the caller graph for this function:

◆ test_age_gfn()

static bool test_age_gfn ( struct kvm *  kvm,
struct tdp_iter iter,
struct kvm_gfn_range *  range 
)
static

Definition at line 1200 of file tdp_mmu.c.

1202 {
1203  return is_accessed_spte(iter->old_spte);
1204 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ write_protect_gfn()

static bool write_protect_gfn ( struct kvm *  kvm,
struct kvm_mmu_page root,
gfn_t  gfn,
int  min_level 
)
static

Definition at line 1710 of file tdp_mmu.c.

1712 {
1713  struct tdp_iter iter;
1714  u64 new_spte;
1715  bool spte_set = false;
1716 
1717  BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
1718 
1719  rcu_read_lock();
1720 
1721  for_each_tdp_pte_min_level(iter, root, min_level, gfn, gfn + 1) {
1722  if (!is_shadow_present_pte(iter.old_spte) ||
1723  !is_last_spte(iter.old_spte, iter.level))
1724  continue;
1725 
1726  new_spte = iter.old_spte &
1728 
1729  if (new_spte == iter.old_spte)
1730  break;
1731 
1732  tdp_mmu_iter_set_spte(kvm, &iter, new_spte);
1733  spte_set = true;
1734  }
1735 
1736  rcu_read_unlock();
1737 
1738  return spte_set;
1739 }
u64 __read_mostly shadow_mmu_writable_mask
Definition: spte.c:28
int min_level
Definition: tdp_iter.h:99
Here is the call graph for this function:
Here is the caller graph for this function:

◆ wrprot_gfn_range()

static bool wrprot_gfn_range ( struct kvm *  kvm,
struct kvm_mmu_page root,
gfn_t  start,
gfn_t  end,
int  min_level 
)
static

Definition at line 1262 of file tdp_mmu.c.

1264 {
1265  struct tdp_iter iter;
1266  u64 new_spte;
1267  bool spte_set = false;
1268 
1269  rcu_read_lock();
1270 
1271  BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
1272 
1273  for_each_tdp_pte_min_level(iter, root, min_level, start, end) {
1274 retry:
1275  if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
1276  continue;
1277 
1278  if (!is_shadow_present_pte(iter.old_spte) ||
1279  !is_last_spte(iter.old_spte, iter.level) ||
1280  !(iter.old_spte & PT_WRITABLE_MASK))
1281  continue;
1282 
1283  new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
1284 
1285  if (tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
1286  goto retry;
1287 
1288  spte_set = true;
1289  }
1290 
1291  rcu_read_unlock();
1292  return spte_set;
1293 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ zap_collapsible_spte_range()

static void zap_collapsible_spte_range ( struct kvm *  kvm,
struct kvm_mmu_page root,
const struct kvm_memory_slot *  slot 
)
static

Definition at line 1640 of file tdp_mmu.c.

1643 {
1644  gfn_t start = slot->base_gfn;
1645  gfn_t end = start + slot->npages;
1646  struct tdp_iter iter;
1647  int max_mapping_level;
1648 
1649  rcu_read_lock();
1650 
1651  for_each_tdp_pte_min_level(iter, root, PG_LEVEL_2M, start, end) {
1652 retry:
1653  if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
1654  continue;
1655 
1656  if (iter.level > KVM_MAX_HUGEPAGE_LEVEL ||
1657  !is_shadow_present_pte(iter.old_spte))
1658  continue;
1659 
1660  /*
1661  * Don't zap leaf SPTEs, if a leaf SPTE could be replaced with
1662  * a large page size, then its parent would have been zapped
1663  * instead of stepping down.
1664  */
1665  if (is_last_spte(iter.old_spte, iter.level))
1666  continue;
1667 
1668  /*
1669  * If iter.gfn resides outside of the slot, i.e. the page for
1670  * the current level overlaps but is not contained by the slot,
1671  * then the SPTE can't be made huge. More importantly, trying
1672  * to query that info from slot->arch.lpage_info will cause an
1673  * out-of-bounds access.
1674  */
1675  if (iter.gfn < start || iter.gfn >= end)
1676  continue;
1677 
1678  max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
1679  iter.gfn, PG_LEVEL_NUM);
1680  if (max_mapping_level < iter.level)
1681  continue;
1682 
1683  /* Note, a successful atomic zap also does a remote TLB flush. */
1684  if (tdp_mmu_zap_spte_atomic(kvm, &iter))
1685  goto retry;
1686  }
1687 
1688  rcu_read_unlock();
1689 }
int kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, int max_level)
Definition: mmu.c:3170
static int tdp_mmu_zap_spte_atomic(struct kvm *kvm, struct tdp_iter *iter)
Definition: tdp_mmu.c:549
Here is the call graph for this function:
Here is the caller graph for this function: