KVM
switch.c
Go to the documentation of this file.
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  */
6 
7 #include <hyp/switch.h>
8 #include <hyp/sysreg-sr.h>
9 
10 #include <linux/arm-smccc.h>
11 #include <linux/kvm_host.h>
12 #include <linux/types.h>
13 #include <linux/jump_label.h>
14 #include <uapi/linux/psci.h>
15 
16 #include <kvm/arm_psci.h>
17 
18 #include <asm/barrier.h>
19 #include <asm/cpufeature.h>
20 #include <asm/kprobes.h>
21 #include <asm/kvm_asm.h>
22 #include <asm/kvm_emulate.h>
23 #include <asm/kvm_hyp.h>
24 #include <asm/kvm_mmu.h>
25 #include <asm/fpsimd.h>
26 #include <asm/debug-monitors.h>
27 #include <asm/processor.h>
28 
29 #include <nvhe/fixed_config.h>
30 #include <nvhe/mem_protect.h>
31 
32 /* Non-VHE specific context */
33 DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
34 DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
35 DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
36 
37 extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
38 
39 static void __activate_traps(struct kvm_vcpu *vcpu)
40 {
41  u64 val;
42 
43  ___activate_traps(vcpu);
45 
46  val = vcpu->arch.cptr_el2;
47  val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */
48  val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
49  if (cpus_have_final_cap(ARM64_SME)) {
50  if (has_hvhe())
51  val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
52  else
53  val |= CPTR_EL2_TSM;
54  }
55 
56  if (!guest_owns_fp_regs(vcpu)) {
57  if (has_hvhe())
58  val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
59  CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
60  else
61  val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
62 
64  }
65 
66  kvm_write_cptr_el2(val);
67  write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
68 
69  if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
70  struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
71 
72  isb();
73  /*
74  * At this stage, and thanks to the above isb(), S2 is
75  * configured and enabled. We can now restore the guest's S1
76  * configuration: SCTLR, and only then TCR.
77  */
78  write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
79  isb();
80  write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
81  }
82 }
83 
84 static void __deactivate_traps(struct kvm_vcpu *vcpu)
85 {
86  extern char __kvm_hyp_host_vector[];
87 
88  ___deactivate_traps(vcpu);
89 
90  if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
91  u64 val;
92 
93  /*
94  * Set the TCR and SCTLR registers in the exact opposite
95  * sequence as __activate_traps (first prevent walks,
96  * then force the MMU on). A generous sprinkling of isb()
97  * ensure that things happen in this exact order.
98  */
99  val = read_sysreg_el1(SYS_TCR);
100  write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
101  isb();
102  val = read_sysreg_el1(SYS_SCTLR);
103  write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
104  isb();
105  }
106 
108 
109  write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
110 
111  kvm_reset_cptr_el2(vcpu);
112  write_sysreg(__kvm_hyp_host_vector, vbar_el2);
113 }
114 
115 /* Save VGICv3 state on non-VHE systems */
116 static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
117 {
118  if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
119  __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
120  __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
121  }
122 }
123 
124 /* Restore VGICv3 state on non-VHE systems */
125 static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
126 {
127  if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
128  __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
129  __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
130  }
131 }
132 
133 /*
134  * Disable host events, enable guest events
135  */
136 #ifdef CONFIG_HW_PERF_EVENTS
137 static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
138 {
139  struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
140 
141  if (pmu->events_host)
142  write_sysreg(pmu->events_host, pmcntenclr_el0);
143 
144  if (pmu->events_guest)
145  write_sysreg(pmu->events_guest, pmcntenset_el0);
146 
147  return (pmu->events_host || pmu->events_guest);
148 }
149 
150 /*
151  * Disable guest events, enable host events
152  */
153 static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
154 {
155  struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
156 
157  if (pmu->events_guest)
158  write_sysreg(pmu->events_guest, pmcntenclr_el0);
159 
160  if (pmu->events_host)
161  write_sysreg(pmu->events_host, pmcntenset_el0);
162 }
163 #else
164 #define __pmu_switch_to_guest(v) ({ false; })
165 #define __pmu_switch_to_host(v) do {} while (0)
166 #endif
167 
168 /*
169  * Handler for protected VM MSR, MRS or System instruction execution in AArch64.
170  *
171  * Returns true if the hypervisor has handled the exit, and control should go
172  * back to the guest, or false if it hasn't.
173  */
174 static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
175 {
176  /*
177  * Make sure we handle the exit for workarounds and ptrauth
178  * before the pKVM handling, as the latter could decide to
179  * UNDEF.
180  */
181  return (kvm_hyp_handle_sysreg(vcpu, exit_code) ||
182  kvm_handle_pvm_sysreg(vcpu, exit_code));
183 }
184 
186  [0 ... ESR_ELx_EC_MAX] = NULL,
187  [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
188  [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
189  [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
190  [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
191  [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
192  [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
193  [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
194  [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
195  [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
196 };
197 
199  [0 ... ESR_ELx_EC_MAX] = NULL,
200  [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
201  [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
202  [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
203  [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
204  [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
205  [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
206  [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
207  [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
208 };
209 
210 static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
211 {
212  if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
213  return pvm_exit_handlers;
214 
215  return hyp_exit_handlers;
216 }
217 
218 /*
219  * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
220  * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
221  * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
222  * hypervisor spots a guest in such a state ensure it is handled, and don't
223  * trust the host to spot or fix it. The check below is based on the one in
224  * kvm_arch_vcpu_ioctl_run().
225  *
226  * Returns false if the guest ran in AArch32 when it shouldn't have, and
227  * thus should exit to the host, or true if a the guest run loop can continue.
228  */
229 static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
230 {
231  struct kvm *kvm = kern_hyp_va(vcpu->kvm);
232 
233  if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
234  /*
235  * As we have caught the guest red-handed, decide that it isn't
236  * fit for purpose anymore by making the vcpu invalid. The VMM
237  * can try and fix it by re-initializing the vcpu with
238  * KVM_ARM_VCPU_INIT, however, this is likely not possible for
239  * protected VMs.
240  */
241  vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
242  *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
243  *exit_code |= ARM_EXCEPTION_IL;
244  }
245 }
246 
247 /* Switch to the guest for legacy non-VHE systems */
248 int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
249 {
250  struct kvm_cpu_context *host_ctxt;
251  struct kvm_cpu_context *guest_ctxt;
252  struct kvm_s2_mmu *mmu;
253  bool pmu_switch_needed;
254  u64 exit_code;
255 
256  /*
257  * Having IRQs masked via PMR when entering the guest means the GIC
258  * will not signal the CPU of interrupts of lower priority, and the
259  * only way to get out will be via guest exceptions.
260  * Naturally, we want to avoid this.
261  */
262  if (system_uses_irq_prio_masking()) {
263  gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
264  pmr_sync();
265  }
266 
267  host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
268  host_ctxt->__hyp_running_vcpu = vcpu;
269  guest_ctxt = &vcpu->arch.ctxt;
270 
271  pmu_switch_needed = __pmu_switch_to_guest(vcpu);
272 
273  __sysreg_save_state_nvhe(host_ctxt);
274  /*
275  * We must flush and disable the SPE buffer for nVHE, as
276  * the translation regime(EL1&0) is going to be loaded with
277  * that of the guest. And we must do this before we change the
278  * translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
279  * before we load guest Stage1.
280  */
282 
283  /*
284  * We're about to restore some new MMU state. Make sure
285  * ongoing page-table walks that have started before we
286  * trapped to EL2 have completed. This also synchronises the
287  * above disabling of SPE and TRBE.
288  *
289  * See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
290  * rule R_LFHQG and subsequent information statements.
291  */
292  dsb(nsh);
293 
294  __kvm_adjust_pc(vcpu);
295 
296  /*
297  * We must restore the 32-bit state before the sysregs, thanks
298  * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
299  *
300  * Also, and in order to be able to deal with erratum #1319537 (A57)
301  * and #1319367 (A72), we must ensure that all VM-related sysreg are
302  * restored before we enable S2 translation.
303  */
305  __sysreg_restore_state_nvhe(guest_ctxt);
306 
307  mmu = kern_hyp_va(vcpu->arch.hw_mmu);
308  __load_stage2(mmu, kern_hyp_va(mmu->arch));
309  __activate_traps(vcpu);
310 
312  __timer_enable_traps(vcpu);
313 
315 
316  do {
317  /* Jump in the fire! */
318  exit_code = __guest_enter(vcpu);
319 
320  /* And we're baaack! */
321  } while (fixup_guest_exit(vcpu, &exit_code));
322 
323  __sysreg_save_state_nvhe(guest_ctxt);
324  __sysreg32_save_state(vcpu);
325  __timer_disable_traps(vcpu);
326  __hyp_vgic_save_state(vcpu);
327 
328  /*
329  * Same thing as before the guest run: we're about to switch
330  * the MMU context, so let's make sure we don't have any
331  * ongoing EL1&0 translations.
332  */
333  dsb(nsh);
334 
335  __deactivate_traps(vcpu);
337 
338  __sysreg_restore_state_nvhe(host_ctxt);
339 
340  if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
341  __fpsimd_save_fpexc32(vcpu);
342 
344  /*
345  * This must come after restoring the host sysregs, since a non-VHE
346  * system may enable SPE here and make use of the TTBRs.
347  */
349 
350  if (pmu_switch_needed)
351  __pmu_switch_to_host(vcpu);
352 
353  /* Returning to host will clear PSR.I, remask PMR if needed */
354  if (system_uses_irq_prio_masking())
355  gic_write_pmr(GIC_PRIO_IRQOFF);
356 
357  host_ctxt->__hyp_running_vcpu = NULL;
358 
359  return exit_code;
360 }
361 
362 asmlinkage void __noreturn hyp_panic(void)
363 {
364  u64 spsr = read_sysreg_el2(SYS_SPSR);
365  u64 elr = read_sysreg_el2(SYS_ELR);
366  u64 par = read_sysreg_par();
367  struct kvm_cpu_context *host_ctxt;
368  struct kvm_vcpu *vcpu;
369 
370  host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
371  vcpu = host_ctxt->__hyp_running_vcpu;
372 
373  if (vcpu) {
374  __timer_disable_traps(vcpu);
375  __deactivate_traps(vcpu);
377  __sysreg_restore_state_nvhe(host_ctxt);
378  }
379 
380  /* Prepare to dump kvm nvhe hyp stacktrace */
381  kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
382  _THIS_IP_);
383 
384  __hyp_do_panic(host_ctxt, spsr, elr, par);
385  unreachable();
386 }
387 
388 asmlinkage void __noreturn hyp_panic_bad_stack(void)
389 {
390  hyp_panic();
391 }
392 
393 asmlinkage void kvm_unexpected_el2_exception(void)
394 {
396 }
struct vgic_global kvm_vgic_global_state
void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
Definition: exception.c:365
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: sys_regs.c:474
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: sys_regs.c:512
static __always_inline void __load_host_stage2(void)
Definition: mem_protect.h:86
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
Definition: debug-sr.c:92
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
Definition: debug-sr.c:97
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
Definition: debug-sr.c:105
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
Definition: debug-sr.c:82
static void __activate_traps(struct kvm_vcpu *vcpu)
Definition: switch.c:39
static const exit_handler_fn pvm_exit_handlers[]
Definition: switch.c:198
asmlinkage void kvm_unexpected_el2_exception(void)
Definition: switch.c:393
asmlinkage void __noreturn hyp_panic_bad_stack(void)
Definition: switch.c:388
static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
Definition: switch.c:125
static const exit_handler_fn hyp_exit_handlers[]
Definition: switch.c:185
static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
Definition: switch.c:116
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
Definition: stacktrace.c:152
static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.c:174
#define __pmu_switch_to_host(v)
Definition: switch.c:165
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.c:229
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data)
asmlinkage void __noreturn hyp_panic(void)
Definition: switch.c:362
#define __pmu_switch_to_guest(v)
Definition: switch.c:164
static void __deactivate_traps(struct kvm_vcpu *vcpu)
Definition: switch.c:84
int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
Definition: switch.c:248
static const exit_handler_fn * kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
Definition: switch.c:210
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
Definition: sysreg-sr.c:29
void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
Definition: sysreg-sr.c:21
void __timer_disable_traps(struct kvm_vcpu *vcpu)
Definition: timer-sr.c:23
void __timer_enable_traps(struct kvm_vcpu *vcpu)
Definition: timer-sr.c:40
struct static_key_false gicv3_cpuif
Definition: arm_vgic.h:81
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) __alias(kvm_hyp_handle_memory_fault)
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.h:330
static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.h:300
static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.h:476
bool(* exit_handler_fn)(struct kvm_vcpu *, u64 *)
Definition: switch.h:645
static void __deactivate_traps_common(struct kvm_vcpu *vcpu)
Definition: switch.h:249
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.h:617
static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.h:596
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.h:573
static void __activate_traps_common(struct kvm_vcpu *vcpu)
Definition: switch.h:207
static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) __alias(kvm_hyp_handle_memory_fault)
static void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
Definition: switch.h:57
static bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
Definition: switch.h:693
static void ___deactivate_traps(struct kvm_vcpu *vcpu)
Definition: switch.h:281
static void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
Definition: switch.h:49
static void ___activate_traps(struct kvm_vcpu *vcpu)
Definition: switch.h:268
static void __kvm_unexpected_el2_exception(void)
Definition: switch.h:748
static bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
Definition: switch.h:43
static void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
Definition: sysreg-sr.h:229
static void __sysreg32_save_state(struct kvm_vcpu *vcpu)
Definition: sysreg-sr.h:212
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
Definition: vgic-v3-sr.c:234
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
Definition: vgic-v3-sr.c:199
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
Definition: vgic-v3-sr.c:307
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
Definition: vgic-v3-sr.c:260