Hi All,
This series adds bare minimum required to support kvm virtual machines. The majority of changes make sure that executing Morello instructions by the hypervisor/guest is safe and that core capability registers are properly set and maintained across context switches. Additionally, support for COMPAT64 allows plain aarch64 tools to still manage virtual machines. Support for purecap guests is yet to come.
The changes have been tested with KVM kselftests (in plain aarch64) in their default setup (some of the tests do expose number of options and those have not been fully exercised).
The changes are available at: https://git.morello-project.org/Bea/linux/-/tree/morello/kvm_base_2?ref_type...
Beata Michalska (12): KVM: arm64: Provide explicit capability annotations KVM: arm64: morello: Add (V)TCR_EL2 HW field descriptors KVM: arm64: morello: Enable page-based hw attributes arm64/sysreg: morello: Support for capability registers arm64/sysreg: morello: Add capability-aware sysreg helpers KVM: arm64: morello: Add sysreg access capability variants KVM: arm64: morello: Extend the context by capability sysregs KVM: arm64: morello: Provide valid capability on return from exception KVM: arm64: morello: Disable trapping of Morello instructions KVM: arm64: morello: Preserve capabilities on context save / restore KVM: arm64: morello: Enable support for COMPAT64 KVM: arm64: morello: Enable KVM
Documentation/virt/kvm/api.rst | 10 +- arch/arm64/include/asm/kvm_arm.h | 12 ++ arch/arm64/include/asm/kvm_asm.h | 108 ++++++++++- arch/arm64/include/asm/kvm_emulate.h | 5 + arch/arm64/include/asm/kvm_host.h | 38 ++++ arch/arm64/include/asm/kvm_hyp.h | 74 ++++++-- arch/arm64/include/asm/kvm_pgtable.h | 20 ++ arch/arm64/include/asm/sysreg.h | 83 +++++++++ arch/arm64/include/uapi/asm/kvm.h | 6 +- arch/arm64/kernel/asm-offsets.c | 3 + arch/arm64/kernel/hyp-stub.S | 5 + arch/arm64/kvm/Kconfig | 1 - arch/arm64/kvm/arch_timer.c | 4 +- arch/arm64/kvm/arm.c | 207 ++++++++++++++------- arch/arm64/kvm/guest.c | 10 +- arch/arm64/kvm/hyp/entry.S | 12 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 58 +++++- arch/arm64/kvm/hyp/nvhe/host.S | 66 ++++++- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 14 ++ arch/arm64/kvm/hyp/nvhe/switch.c | 9 +- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 4 +- arch/arm64/kvm/hyp/pgtable.c | 10 +- arch/arm64/kvm/hyp/vhe/switch.c | 11 +- arch/arm64/kvm/hypercalls.c | 4 +- arch/arm64/kvm/pmu-emul.c | 8 +- arch/arm64/kvm/sys_regs.c | 21 ++- arch/arm64/kvm/vgic/vgic-its.c | 8 +- arch/arm64/kvm/vgic/vgic-kvm-device.c | 11 +- include/linux/kvm_host.h | 8 +- include/uapi/linux/kvm.h | 10 +- tools/arch/arm64/include/uapi/asm/kvm.h | 6 +- tools/include/uapi/linux/kvm.h | 12 +- virt/kvm/Kconfig | 2 +- virt/kvm/kvm_main.c | 99 +++++++--- virt/kvm/vfio.c | 2 +- 36 files changed, 776 insertions(+), 189 deletions(-)
In order to avoid declaration ambiguity with more than one capability pointer levels, provide explicit __capability annotations, thus avoiding compilation issues with implicit casting. NOTE: This is far from being perfect and should be done in a more elegant manner but for now it has to do.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/kvm/sys_regs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4735e1b37fb3..1c5d24fb7733 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3653,7 +3653,12 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg) (reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT)); }
-static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) +static bool copy_reg_to_user(const struct sys_reg_desc *reg, +#ifdef CONFIG_CHERI_PURECAP_UABI + u64 * __capability * uind) +#else + u64 __user **uind) +#endif { if (!*uind) return true; @@ -3667,7 +3672,11 @@ static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, +#ifdef CONFIG_CHERI_PURECAP_UABI + u64 * __capability * uind, +#else u64 __user **uind, +#endif unsigned int *total) { /*
With the FEAT_HPDS2 being implemented, PBHA bits of the translation table last-level descriptors can be leveraged for implementation defined hardware use. Fields HWU61,HWU60 & HWU59 are being used by Morello to control capability loads & stores from memory (with correspondign bits enabled in TCR_EL2 for stage 1 and VTCR_EL2 for stage 2 for EL2 translation regime). Add their respective filed descriptors, with HWU62 being provided for completenes only.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/kvm_arm.h | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index d1fbf885283f..e80e1b7a91e0 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -109,6 +109,11 @@
/* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) +#define TCR_EL2_HWU62 (1 << 28) +#define TCR_EL2_HWU61 (1 << 27) +#define TCR_EL2_HWU60 (1 << 26) +#define TCR_EL2_HWU59 (1 << 25) +#define TCR_EL2_HPD (1 << 24) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) @@ -123,6 +128,10 @@
/* VTCR_EL2 Registers bits */ #define VTCR_EL2_RES1 (1U << 31) +#define VTCR_EL2_HWU62 TCR_EL2_HWU62 +#define VTCR_EL2_HWU61 TCR_EL2_HWU61 +#define VTCR_EL2_HWU60 TCR_EL2_HWU60 +#define VTCR_EL2_HWU59 TCR_EL2_HWU59 #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
On 26/02/2024 09:20, Beata Michalska wrote:
With the FEAT_HPDS2 being implemented, PBHA bits of the translation table last-level descriptors can be leveraged for implementation defined hardware use. Fields HWU61,HWU60 & HWU59 are being used by Morello to control capability loads & stores from memory (with correspondign bits
Nit: s/correspondign/corresponding/
enabled in TCR_EL2 for stage 1 and VTCR_EL2 for stage 2 for EL2 translation regime). Add their respective filed descriptors, with HWU62 being provided
s/filed/field/
for completenes only.
s/completenes/completeness/
Kevin
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_arm.h | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index d1fbf885283f..e80e1b7a91e0 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -109,6 +109,11 @@ /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) +#define TCR_EL2_HWU62 (1 << 28) +#define TCR_EL2_HWU61 (1 << 27) +#define TCR_EL2_HWU60 (1 << 26) +#define TCR_EL2_HWU59 (1 << 25) +#define TCR_EL2_HPD (1 << 24) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) @@ -123,6 +128,10 @@ /* VTCR_EL2 Registers bits */ #define VTCR_EL2_RES1 (1U << 31) +#define VTCR_EL2_HWU62 TCR_EL2_HWU62 +#define VTCR_EL2_HWU61 TCR_EL2_HWU61 +#define VTCR_EL2_HWU60 TCR_EL2_HWU60 +#define VTCR_EL2_HWU59 TCR_EL2_HWU59 #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
Extend the definition of kvm_pgtabe_prot enum to incorporate PBHA bits for both stage-1 and stage-2 page tables. At this point, those are being enabled only for Morello, which implements FEAT_HPDS2 feature by default and as such does not require any additional checks for the above mentioned feature status.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 10 ++++++++-- 4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e80e1b7a91e0..010d837f8f1d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -132,6 +132,9 @@ #define VTCR_EL2_HWU61 TCR_EL2_HWU61 #define VTCR_EL2_HWU60 TCR_EL2_HWU60 #define VTCR_EL2_HWU59 TCR_EL2_HWU59 +#define VTCR_EL2_PBHA_SHIFT 25 +#define VTCR_EL2_PBHA_MASK (0xf << VTCR_EL2_PBHA_SHIFT) +#define VTCR_EL2_PBHA VTCR_EL2_PBHA_MASK #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..97ff8a2634d5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -173,6 +173,10 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_SW1: Software bit 1. * @KVM_PGTABLE_PROT_SW2: Software bit 2. * @KVM_PGTABLE_PROT_SW3: Software bit 3. + * @KVM_PGTABLE_PROT_HW0: PBHA[0] + * @KVM_PGTABLE_PROT_HW1: PBHA[1] + * @KVM_PGTABLE_PROT_HW2: PBHA[2] + * @KVM_PGTABLE_PROT_HW3: PBHA[3] */ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -185,17 +189,33 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_SW1 = BIT(56), KVM_PGTABLE_PROT_SW2 = BIT(57), KVM_PGTABLE_PROT_SW3 = BIT(58), + + KVM_PGTABLE_PROT_HW0 = BIT(59), + KVM_PGTABLE_PROT_HW1 = BIT(60), + KVM_PGTABLE_PROT_HW2 = BIT(61), + KVM_PGTABLE_PROT_HW3 = BIT(62), };
+#ifdef CONFIG_ARM64_MORELLO +#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W | \ + KVM_PGTABLE_PROT_HW1 | KVM_PGTABLE_PROT_HW2) +#else #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#endif #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW
#define PAGE_HYP KVM_PGTABLE_PROT_RW +#ifdef CONFIG_ARM64_MORELLO +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X | \ + KVM_PGTABLE_PROT_HW2) +#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_HW2) +#else #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#endif #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 53ad8de28f48..bc945eac681d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1859,6 +1859,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) } tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits); + if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + tcr |= TCR_EL2_HWU61 | TCR_EL2_HWU60 | TCR_EL2_HPD; params->tcr_el2 = tcr;
params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1966fdee740e..88d002d7152f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -38,6 +38,7 @@
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
+#define KVM_PTE_LEAF_ATTR_HI_HW GENMASK(62, 59) #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) @@ -411,6 +412,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; + attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -418,7 +420,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) { - enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; + enum kvm_pgtable_prot prot = pte & (KVM_PTE_LEAF_ATTR_HI_SW | KVM_PTE_LEAF_ATTR_HI_HW); u32 ap;
if (!kvm_pte_valid(pte)) @@ -654,6 +656,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */
+ if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + vtcr |= VTCR_EL2_PBHA; + /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS_16BIT : @@ -714,6 +719,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; + attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -1299,7 +1305,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, u32 level; kvm_pte_t set = 0, clr = 0;
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW) + if (prot & KVM_PTE_LEAF_ATTR_HI_SW || prot & KVM_PTE_LEAF_ATTR_HI_HW) return -EINVAL;
if (prot & KVM_PGTABLE_PROT_R)
On 26/02/2024 09:20, Beata Michalska wrote:
Extend the definition of kvm_pgtabe_prot enum to incorporate PBHA bits for both stage-1 and stage-2 page tables. At this point, those are being enabled only for Morello, which implements FEAT_HPDS2 feature by default and as such does not require any additional checks for the above mentioned feature status.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 10 ++++++++-- 4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e80e1b7a91e0..010d837f8f1d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -132,6 +132,9 @@ #define VTCR_EL2_HWU61 TCR_EL2_HWU61 #define VTCR_EL2_HWU60 TCR_EL2_HWU60 #define VTCR_EL2_HWU59 TCR_EL2_HWU59 +#define VTCR_EL2_PBHA_SHIFT 25
Maybe use VTCR_EL2_HWU59 instead of 25?
+#define VTCR_EL2_PBHA_MASK (0xf << VTCR_EL2_PBHA_SHIFT) +#define VTCR_EL2_PBHA VTCR_EL2_PBHA_MASK #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..97ff8a2634d5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -173,6 +173,10 @@ enum kvm_pgtable_stage2_flags {
- @KVM_PGTABLE_PROT_SW1: Software bit 1.
- @KVM_PGTABLE_PROT_SW2: Software bit 2.
- @KVM_PGTABLE_PROT_SW3: Software bit 3.
- @KVM_PGTABLE_PROT_HW0: PBHA[0]
- @KVM_PGTABLE_PROT_HW1: PBHA[1]
- @KVM_PGTABLE_PROT_HW2: PBHA[2]
*/
- @KVM_PGTABLE_PROT_HW3: PBHA[3]
enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -185,17 +189,33 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_SW1 = BIT(56), KVM_PGTABLE_PROT_SW2 = BIT(57), KVM_PGTABLE_PROT_SW3 = BIT(58),
- KVM_PGTABLE_PROT_HW0 = BIT(59),
- KVM_PGTABLE_PROT_HW1 = BIT(60),
- KVM_PGTABLE_PROT_HW2 = BIT(61),
- KVM_PGTABLE_PROT_HW3 = BIT(62),
}; +#ifdef CONFIG_ARM64_MORELLO
To reduce the #ifdef'ing, we could use a similar approach as in <asm/pgtable-prot.h>, that is define MAYBE macros that expand to 0 in !MORELLO. The other advantage would be to make it clearer what each bit does, e.g. KVM_PGTABLE_PROT_MAYBE_LS_CAPS. Getting pretty long though, maybe we could shorten the prefix...
+#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W | \
KVM_PGTABLE_PROT_HW1 | KVM_PGTABLE_PROT_HW2)
+#else #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#endif #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X) #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW #define PAGE_HYP KVM_PGTABLE_PROT_RW +#ifdef CONFIG_ARM64_MORELLO +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X | \
KVM_PGTABLE_PROT_HW2)
+#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_HW2) +#else #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#endif #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 53ad8de28f48..bc945eac681d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1859,6 +1859,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) } tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits);
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
params->tcr_el2 = tcr;tcr |= TCR_EL2_HWU61 | TCR_EL2_HWU60 | TCR_EL2_HPD;
params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1966fdee740e..88d002d7152f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -38,6 +38,7 @@ #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) +#define KVM_PTE_LEAF_ATTR_HI_HW GENMASK(62, 59) #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) @@ -411,6 +412,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -418,7 +420,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) {
- enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
- enum kvm_pgtable_prot prot = pte & (KVM_PTE_LEAF_ATTR_HI_SW | KVM_PTE_LEAF_ATTR_HI_HW); u32 ap;
if (!kvm_pte_valid(pte)) @@ -654,6 +656,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
vtcr |= VTCR_EL2_PBHA;
I'm not sure I understand the rationale behind the choice of HWU bits to set. Right now it looks like we're doing this: - Stage 1: 60, 61 (SC, LC[0]) - Stage 2: 59, 60, 61, 62 (CDBM, SC, LC, <unused>)
IIUC, the stage 1 configuration only matters for host / KVM mappings. We don't use LC[1] or CDBM so it's ok to set only HWU{60,61}, though it wouldn't hurt to set them all (like we do in the kernel). OTOH we're setting all bits at stage 2, even though HWU62 has no meaning for Morello (and we don't use CDBM either). It is a detail, as only HWU{60,61} matter in both cases for our usage, but it would be nice to be more consistent.
- /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS_16BIT :
@@ -714,6 +719,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -1299,7 +1305,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, u32 level; kvm_pte_t set = 0, clr = 0;
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW || prot & KVM_PTE_LEAF_ATTR_HI_HW)
Nit: could be prot & (SW | HW), like above.
Kevin
return -EINVAL;
if (prot & KVM_PGTABLE_PROT_R)
On Fri, Mar 01, 2024 at 11:15:26AM +0100, Kevin Brodsky wrote:
On 26/02/2024 09:20, Beata Michalska wrote:
Extend the definition of kvm_pgtabe_prot enum to incorporate PBHA bits for both stage-1 and stage-2 page tables. At this point, those are being enabled only for Morello, which implements FEAT_HPDS2 feature by default and as such does not require any additional checks for the above mentioned feature status.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 10 ++++++++-- 4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e80e1b7a91e0..010d837f8f1d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -132,6 +132,9 @@ #define VTCR_EL2_HWU61 TCR_EL2_HWU61 #define VTCR_EL2_HWU60 TCR_EL2_HWU60 #define VTCR_EL2_HWU59 TCR_EL2_HWU59 +#define VTCR_EL2_PBHA_SHIFT 25
Maybe use VTCR_EL2_HWU59 instead of 25?
Could do though in majority of cases the XXX_SHIFT(s) are being defined explicitely across this file, though no strong objections to the idea. Will update that in v2, but will use VTCR_EL2_HWU59 instead.
+#define VTCR_EL2_PBHA_MASK (0xf << VTCR_EL2_PBHA_SHIFT) +#define VTCR_EL2_PBHA VTCR_EL2_PBHA_MASK #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..97ff8a2634d5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -173,6 +173,10 @@ enum kvm_pgtable_stage2_flags {
- @KVM_PGTABLE_PROT_SW1: Software bit 1.
- @KVM_PGTABLE_PROT_SW2: Software bit 2.
- @KVM_PGTABLE_PROT_SW3: Software bit 3.
- @KVM_PGTABLE_PROT_HW0: PBHA[0]
- @KVM_PGTABLE_PROT_HW1: PBHA[1]
- @KVM_PGTABLE_PROT_HW2: PBHA[2]
*/
- @KVM_PGTABLE_PROT_HW3: PBHA[3]
enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -185,17 +189,33 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_SW1 = BIT(56), KVM_PGTABLE_PROT_SW2 = BIT(57), KVM_PGTABLE_PROT_SW3 = BIT(58),
- KVM_PGTABLE_PROT_HW0 = BIT(59),
- KVM_PGTABLE_PROT_HW1 = BIT(60),
- KVM_PGTABLE_PROT_HW2 = BIT(61),
- KVM_PGTABLE_PROT_HW3 = BIT(62),
}; +#ifdef CONFIG_ARM64_MORELLO
To reduce the #ifdef'ing, we could use a similar approach as in <asm/pgtable-prot.h>, that is define MAYBE macros that expand to 0 in !MORELLO. The other advantage would be to make it clearer what each bit does, e.g. KVM_PGTABLE_PROT_MAYBE_LS_CAPS. Getting pretty long though, maybe we could shorten the prefix...
Right, not really a big fan of XXX_MAYBE_YYY really: what MAYBE is referring to actually ? also we would need separate defines for RO and still that requires #ifdef'ing any way so not sure what we will be gaining here ? (apart from trading off one #ifdef block ). Having the HW bits being somehow mapped through the naming to their actual meaning is probably a good idea. Let me ponder a bit to see if I can improve it somehow.
+#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W | \
KVM_PGTABLE_PROT_HW1 | KVM_PGTABLE_PROT_HW2)
+#else #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#endif #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X) #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW #define PAGE_HYP KVM_PGTABLE_PROT_RW +#ifdef CONFIG_ARM64_MORELLO +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X | \
KVM_PGTABLE_PROT_HW2)
+#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_HW2) +#else #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#endif #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 53ad8de28f48..bc945eac681d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1859,6 +1859,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) } tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits);
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
params->tcr_el2 = tcr;tcr |= TCR_EL2_HWU61 | TCR_EL2_HWU60 | TCR_EL2_HPD;
params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1966fdee740e..88d002d7152f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -38,6 +38,7 @@ #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) +#define KVM_PTE_LEAF_ATTR_HI_HW GENMASK(62, 59) #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) @@ -411,6 +412,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -418,7 +420,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) {
- enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
- enum kvm_pgtable_prot prot = pte & (KVM_PTE_LEAF_ATTR_HI_SW | KVM_PTE_LEAF_ATTR_HI_HW); u32 ap;
if (!kvm_pte_valid(pte)) @@ -654,6 +656,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
vtcr |= VTCR_EL2_PBHA;
I'm not sure I understand the rationale behind the choice of HWU bits to set. Right now it looks like we're doing this:
- Stage 1: 60, 61 (SC, LC[0])
- Stage 2: 59, 60, 61, 62 (CDBM, SC, LC, <unused>)
IIUC, the stage 1 configuration only matters for host / KVM mappings. We don't use LC[1] or CDBM so it's ok to set only HWU{60,61}, though it wouldn't hurt to set them all (like we do in the kernel). OTOH we're setting all bits at stage 2, even though HWU62 has no meaning for Morello (and we don't use CDBM either). It is a detail, as only HWU{60,61} matter in both cases for our usage, but it would be nice to be more consistent.
Right, that's a fair point. Will update stage 1 setup accordingly (did end up mixing the approach taken for EL1 vs EL2)
- /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS_16BIT :
@@ -714,6 +719,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -1299,7 +1305,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, u32 level; kvm_pte_t set = 0, clr = 0;
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW || prot & KVM_PTE_LEAF_ATTR_HI_HW)
Nit: could be prot & (SW | HW), like above.
Done.
Thanks.
--- BR Beata
Kevin
return -EINVAL;
if (prot & KVM_PGTABLE_PROT_R)
On Wed, Apr 10, 2024 at 08:56:39PM +0200, Beata Michalska wrote:
On Fri, Mar 01, 2024 at 11:15:26AM +0100, Kevin Brodsky wrote:
On 26/02/2024 09:20, Beata Michalska wrote:
Extend the definition of kvm_pgtabe_prot enum to incorporate PBHA bits for both stage-1 and stage-2 page tables. At this point, those are being enabled only for Morello, which implements FEAT_HPDS2 feature by default and as such does not require any additional checks for the above mentioned feature status.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 10 ++++++++-- 4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e80e1b7a91e0..010d837f8f1d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -132,6 +132,9 @@ #define VTCR_EL2_HWU61 TCR_EL2_HWU61 #define VTCR_EL2_HWU60 TCR_EL2_HWU60 #define VTCR_EL2_HWU59 TCR_EL2_HWU59 +#define VTCR_EL2_PBHA_SHIFT 25
Maybe use VTCR_EL2_HWU59 instead of 25?
Could do though in majority of cases the XXX_SHIFT(s) are being defined explicitely across this file, though no strong objections to the idea. Will update that in v2, but will use VTCR_EL2_HWU59 instead.
Replying too fast I guess: First of all, you did suggest VTCR_EL2_HWU59. Not sure why I did read TCR_EL2_HWU59 .... Secondly: This needs to stay as is as this is needed for VTCR_EL2_PBHA to represent all HW bits: VTCR_EL2_HWU59 represets single bit set.
--- BR Beata
+#define VTCR_EL2_PBHA_MASK (0xf << VTCR_EL2_PBHA_SHIFT) +#define VTCR_EL2_PBHA VTCR_EL2_PBHA_MASK #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..97ff8a2634d5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -173,6 +173,10 @@ enum kvm_pgtable_stage2_flags {
- @KVM_PGTABLE_PROT_SW1: Software bit 1.
- @KVM_PGTABLE_PROT_SW2: Software bit 2.
- @KVM_PGTABLE_PROT_SW3: Software bit 3.
- @KVM_PGTABLE_PROT_HW0: PBHA[0]
- @KVM_PGTABLE_PROT_HW1: PBHA[1]
- @KVM_PGTABLE_PROT_HW2: PBHA[2]
*/
- @KVM_PGTABLE_PROT_HW3: PBHA[3]
enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -185,17 +189,33 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_SW1 = BIT(56), KVM_PGTABLE_PROT_SW2 = BIT(57), KVM_PGTABLE_PROT_SW3 = BIT(58),
- KVM_PGTABLE_PROT_HW0 = BIT(59),
- KVM_PGTABLE_PROT_HW1 = BIT(60),
- KVM_PGTABLE_PROT_HW2 = BIT(61),
- KVM_PGTABLE_PROT_HW3 = BIT(62),
}; +#ifdef CONFIG_ARM64_MORELLO
To reduce the #ifdef'ing, we could use a similar approach as in <asm/pgtable-prot.h>, that is define MAYBE macros that expand to 0 in !MORELLO. The other advantage would be to make it clearer what each bit does, e.g. KVM_PGTABLE_PROT_MAYBE_LS_CAPS. Getting pretty long though, maybe we could shorten the prefix...
Right, not really a big fan of XXX_MAYBE_YYY really: what MAYBE is referring to actually ? also we would need separate defines for RO and still that requires #ifdef'ing any way so not sure what we will be gaining here ? (apart from trading off one #ifdef block ). Having the HW bits being somehow mapped through the naming to their actual meaning is probably a good idea. Let me ponder a bit to see if I can improve it somehow.
+#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W | \
KVM_PGTABLE_PROT_HW1 | KVM_PGTABLE_PROT_HW2)
+#else #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#endif #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X) #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW #define PAGE_HYP KVM_PGTABLE_PROT_RW +#ifdef CONFIG_ARM64_MORELLO +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X | \
KVM_PGTABLE_PROT_HW2)
+#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_HW2) +#else #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#endif #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 53ad8de28f48..bc945eac681d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1859,6 +1859,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) } tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits);
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
params->tcr_el2 = tcr;tcr |= TCR_EL2_HWU61 | TCR_EL2_HWU60 | TCR_EL2_HPD;
params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1966fdee740e..88d002d7152f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -38,6 +38,7 @@ #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) +#define KVM_PTE_LEAF_ATTR_HI_HW GENMASK(62, 59) #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) @@ -411,6 +412,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -418,7 +420,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) {
- enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
- enum kvm_pgtable_prot prot = pte & (KVM_PTE_LEAF_ATTR_HI_SW | KVM_PTE_LEAF_ATTR_HI_HW); u32 ap;
if (!kvm_pte_valid(pte)) @@ -654,6 +656,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
vtcr |= VTCR_EL2_PBHA;
I'm not sure I understand the rationale behind the choice of HWU bits to set. Right now it looks like we're doing this:
- Stage 1: 60, 61 (SC, LC[0])
- Stage 2: 59, 60, 61, 62 (CDBM, SC, LC, <unused>)
IIUC, the stage 1 configuration only matters for host / KVM mappings. We don't use LC[1] or CDBM so it's ok to set only HWU{60,61}, though it wouldn't hurt to set them all (like we do in the kernel). OTOH we're setting all bits at stage 2, even though HWU62 has no meaning for Morello (and we don't use CDBM either). It is a detail, as only HWU{60,61} matter in both cases for our usage, but it would be nice to be more consistent.
Right, that's a fair point. Will update stage 1 setup accordingly (did end up mixing the approach taken for EL1 vs EL2)
- /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS_16BIT :
@@ -714,6 +719,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -1299,7 +1305,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, u32 level; kvm_pte_t set = 0, clr = 0;
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW || prot & KVM_PTE_LEAF_ATTR_HI_HW)
Nit: could be prot & (SW | HW), like above.
Done.
Thanks.
BR Beata
Kevin
return -EINVAL;
if (prot & KVM_PGTABLE_PROT_R)
linux-morello mailing list -- linux-morello@op-lists.linaro.org To unsubscribe send an email to linux-morello-leave@op-lists.linaro.org
On 10/04/2024 22:27, Beata Michalska wrote:
On Wed, Apr 10, 2024 at 08:56:39PM +0200, Beata Michalska wrote:
On Fri, Mar 01, 2024 at 11:15:26AM +0100, Kevin Brodsky wrote:
On 26/02/2024 09:20, Beata Michalska wrote:
Extend the definition of kvm_pgtabe_prot enum to incorporate PBHA bits for both stage-1 and stage-2 page tables. At this point, those are being enabled only for Morello, which implements FEAT_HPDS2 feature by default and as such does not require any additional checks for the above mentioned feature status.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 10 ++++++++-- 4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e80e1b7a91e0..010d837f8f1d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -132,6 +132,9 @@ #define VTCR_EL2_HWU61 TCR_EL2_HWU61 #define VTCR_EL2_HWU60 TCR_EL2_HWU60 #define VTCR_EL2_HWU59 TCR_EL2_HWU59 +#define VTCR_EL2_PBHA_SHIFT 25
Maybe use VTCR_EL2_HWU59 instead of 25?
Could do though in majority of cases the XXX_SHIFT(s) are being defined explicitely across this file, though no strong objections to the idea. Will update that in v2, but will use VTCR_EL2_HWU59 instead.
Replying too fast I guess: First of all, you did suggest VTCR_EL2_HWU59. Not sure why I did read TCR_EL2_HWU59 .... Secondly: This needs to stay as is as this is needed for VTCR_EL2_PBHA to represent all HW bits: VTCR_EL2_HWU59 represets single bit set.
Yes right VTCR_EL2_HWU59 is 1 << 25, not 25. Sorry for the noise.
BR Beata
+#define VTCR_EL2_PBHA_MASK (0xf << VTCR_EL2_PBHA_SHIFT) +#define VTCR_EL2_PBHA VTCR_EL2_PBHA_MASK #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..97ff8a2634d5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -173,6 +173,10 @@ enum kvm_pgtable_stage2_flags {
- @KVM_PGTABLE_PROT_SW1: Software bit 1.
- @KVM_PGTABLE_PROT_SW2: Software bit 2.
- @KVM_PGTABLE_PROT_SW3: Software bit 3.
- @KVM_PGTABLE_PROT_HW0: PBHA[0]
- @KVM_PGTABLE_PROT_HW1: PBHA[1]
- @KVM_PGTABLE_PROT_HW2: PBHA[2]
*/
- @KVM_PGTABLE_PROT_HW3: PBHA[3]
enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -185,17 +189,33 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_SW1 = BIT(56), KVM_PGTABLE_PROT_SW2 = BIT(57), KVM_PGTABLE_PROT_SW3 = BIT(58),
- KVM_PGTABLE_PROT_HW0 = BIT(59),
- KVM_PGTABLE_PROT_HW1 = BIT(60),
- KVM_PGTABLE_PROT_HW2 = BIT(61),
- KVM_PGTABLE_PROT_HW3 = BIT(62),
}; +#ifdef CONFIG_ARM64_MORELLO
To reduce the #ifdef'ing, we could use a similar approach as in <asm/pgtable-prot.h>, that is define MAYBE macros that expand to 0 in !MORELLO. The other advantage would be to make it clearer what each bit does, e.g. KVM_PGTABLE_PROT_MAYBE_LS_CAPS. Getting pretty long though, maybe we could shorten the prefix...
Right, not really a big fan of XXX_MAYBE_YYY really: what MAYBE is referring to actually ? also we would need separate defines for RO and still that requires #ifdef'ing any way so not sure what we will be gaining here ? (apart from trading off one #ifdef block ). Having the HW bits being somehow mapped through the naming to their actual meaning is probably a good idea. Let me ponder a bit to see if I can improve it somehow.
MAYBE refers to the fact that it may expand to nothing, there is a precedent with PTE_MAYBE_NG. Either way I'm fine with keeping the #ifdef'ing. It would still be good to define macros that clarify what those bits actually do in the Morello case, like PTE_LOAD_CAPS and PTE_STORE_CAPS, instead of using KVM_PGTABLE_PROT_HW* directly.
Kevin
+#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W | \
KVM_PGTABLE_PROT_HW1 | KVM_PGTABLE_PROT_HW2)
+#else #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#endif #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X) #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW #define PAGE_HYP KVM_PGTABLE_PROT_RW +#ifdef CONFIG_ARM64_MORELLO +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X | \
KVM_PGTABLE_PROT_HW2)
+#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_HW2) +#else #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#endif #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
Add capability-aware assembly macors generating full-syntax MRS/MSR instructions for capability system registers.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/sysreg.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9988e289ce20..e2109e2c0bef 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1071,28 +1071,56 @@ __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) \ " .endm\n"
+#define DEFINE_MRS_S_C \ + __DEFINE_ASM_GPR_NUMS \ +" .macro mrs_s_c, rt, sreg\n" \ + __emit_inst(0xC2900000|(\sreg)|(.L__gpr_num_\rt)) \ +" .endm\n" + #define DEFINE_MSR_S \ __DEFINE_ASM_GPR_NUMS \ " .macro msr_s, sreg, rt\n" \ __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) \ " .endm\n"
+#define DEFINE_MSR_S_C \ + __DEFINE_ASM_GPR_NUMS \ +" .macro msr_s_c, sreg, rt\n" \ + __emit_inst(0xC2800000|(\sreg)&~0x100000|(.L__gpr_num_\rt)) \ +" .endm\n" + #define UNDEFINE_MRS_S \ " .purgem mrs_s\n"
+#define UNDEFINE_MRS_S_C \ +" .purgem mrs_s_c\n" + #define UNDEFINE_MSR_S \ " .purgem msr_s\n"
+#define UNDEFINE_MSR_S_C \ +" .purgem msr_s_c\n" + #define __mrs_s(v, r) \ DEFINE_MRS_S \ " mrs_s " v ", " __stringify(r) "\n" \ UNDEFINE_MRS_S
+#define __mrs_s_c(v, r) \ + DEFINE_MRS_S_C \ +" mrs_s_c " v ", " __stringify(r) "\n" \ + UNDEFINE_MRS_S_C + #define __msr_s(r, v) \ DEFINE_MSR_S \ " msr_s " __stringify(r) ", " v "\n" \ UNDEFINE_MSR_S
+#define __msr_s_c(r, v) \ + DEFINE_MSR_S_C \ +" msr_s_c " __stringify(r) ", " v "\n" \ + UNDEFINE_MSR_S_C + /* * Unlike read_cpuid, calls to read_sysreg are never expected to be * optimized away or replaced with synthetic values.
On 26/02/2024 09:20, Beata Michalska wrote:
Add capability-aware assembly macors generating full-syntax MRS/MSR
s/macors/macros/
instructions for capability system registers.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/sysreg.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9988e289ce20..e2109e2c0bef 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1071,28 +1071,56 @@ __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) \ " .endm\n" +#define DEFINE_MRS_S_C \
- __DEFINE_ASM_GPR_NUMS \
+" .macro mrs_s_c, rt, sreg\n" \
- __emit_inst(0xC2900000|(\sreg)|(.L__gpr_num_\rt)) \
Nit: hex is normally lowercase (same below).
+" .endm\n"
#define DEFINE_MSR_S \ __DEFINE_ASM_GPR_NUMS \ " .macro msr_s, sreg, rt\n" \ __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) \ " .endm\n" +#define DEFINE_MSR_S_C \
- __DEFINE_ASM_GPR_NUMS \
+" .macro msr_s_c, sreg, rt\n" \
- __emit_inst(0xC2800000|(\sreg)&~0x100000|(.L__gpr_num_\rt)) \
I think I understand the need for masking bit 20 in sreg (unlike similar instructions, bit 20 is 0 in capability MSR), but a comment wouldn't hurt.
Kevin
+" .endm\n"
#define UNDEFINE_MRS_S \ " .purgem mrs_s\n" +#define UNDEFINE_MRS_S_C \ +" .purgem mrs_s_c\n"
#define UNDEFINE_MSR_S \ " .purgem msr_s\n" +#define UNDEFINE_MSR_S_C \ +" .purgem msr_s_c\n"
#define __mrs_s(v, r) \ DEFINE_MRS_S \ " mrs_s " v ", " __stringify(r) "\n" \ UNDEFINE_MRS_S +#define __mrs_s_c(v, r) \
- DEFINE_MRS_S_C \
+" mrs_s_c " v ", " __stringify(r) "\n" \
- UNDEFINE_MRS_S_C
#define __msr_s(r, v) \ DEFINE_MSR_S \ " msr_s " __stringify(r) ", " v "\n" \ UNDEFINE_MSR_S +#define __msr_s_c(r, v) \
- DEFINE_MSR_S_C \
+" msr_s_c " __stringify(r) ", " v "\n" \
- UNDEFINE_MSR_S_C
/*
- Unlike read_cpuid, calls to read_sysreg are never expected to be
- optimized away or replaced with synthetic values.
On Fri, Mar 01, 2024 at 11:15:57AM +0100, Kevin Brodsky wrote:
On 26/02/2024 09:20, Beata Michalska wrote:
Add capability-aware assembly macors generating full-syntax MRS/MSR
s/macors/macros/
instructions for capability system registers.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/sysreg.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9988e289ce20..e2109e2c0bef 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1071,28 +1071,56 @@ __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) \ " .endm\n" +#define DEFINE_MRS_S_C \
- __DEFINE_ASM_GPR_NUMS \
+" .macro mrs_s_c, rt, sreg\n" \
- __emit_inst(0xC2900000|(\sreg)|(.L__gpr_num_\rt)) \
Nit: hex is normally lowercase (same below).
+" .endm\n"
#define DEFINE_MSR_S \ __DEFINE_ASM_GPR_NUMS \ " .macro msr_s, sreg, rt\n" \ __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) \ " .endm\n" +#define DEFINE_MSR_S_C \
- __DEFINE_ASM_GPR_NUMS \
+" .macro msr_s_c, sreg, rt\n" \
- __emit_inst(0xC2800000|(\sreg)&~0x100000|(.L__gpr_num_\rt)) \
I think I understand the need for masking bit 20 in sreg (unlike similar instructions, bit 20 is 0 in capability MSR), but a comment wouldn't hurt.
Will do.
--- BR Beata
Kevin
+" .endm\n"
#define UNDEFINE_MRS_S \ " .purgem mrs_s\n" +#define UNDEFINE_MRS_S_C \ +" .purgem mrs_s_c\n"
#define UNDEFINE_MSR_S \ " .purgem msr_s\n" +#define UNDEFINE_MSR_S_C \ +" .purgem msr_s_c\n"
#define __mrs_s(v, r) \ DEFINE_MRS_S \ " mrs_s " v ", " __stringify(r) "\n" \ UNDEFINE_MRS_S +#define __mrs_s_c(v, r) \
- DEFINE_MRS_S_C \
+" mrs_s_c " v ", " __stringify(r) "\n" \
- UNDEFINE_MRS_S_C
#define __msr_s(r, v) \ DEFINE_MSR_S \ " msr_s " __stringify(r) ", " v "\n" \ UNDEFINE_MSR_S +#define __msr_s_c(r, v) \
- DEFINE_MSR_S_C \
+" msr_s_c " __stringify(r) ", " v "\n" \
- UNDEFINE_MSR_S_C
/*
- Unlike read_cpuid, calls to read_sysreg are never expected to be
- optimized away or replaced with synthetic values.
Provide capability-aware set of helper macros for system register read/writes. For the time being, if the transfer register is not a capability one, a prep step is being carried out to provide it through the use of CVTP instruction. In a long run this should be replace by proper setup where a source capability is derived from an appropriate authorizing capability.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/sysreg.h | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e2109e2c0bef..c55c4c4fa945 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1141,6 +1141,42 @@ : : "rZ" (__val)); \ } while (0)
+#ifdef CONFIG_ARM64_MORELLO +#define __convert_to_cap(v) \ +({ \ + uintcap_t __cval; \ + asm volatile("cvtp %0, %1" : "=r" (__cval) : "r" (v)); \ + __cval; \ +}) + +#define write_cap_sysreg(v, r) \ + do { \ + uintcap_t __cval; \ + __cval = __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(v), \ + uintcap_t), \ + (uintcap_t)(v), \ + __convert_to_cap(v) \ + ); \ + asm volatile("msr " __stringify(r) ", %x0" : : "rZ" (__cval)); \ + } while (0) + +/* + * Select appropriate variant of write_sysreg based on whether + * CONFIG_ARM64_MORELLO is enabled or not. + * For the capability system registers, unless capability is being provided, + * one is created through CVTP instruction. This immplies that this macro + * is not suitable for all use-cases. Additionally, this macro is based on + * the assumption that the capability system register name can be derived + * from corresponding aarch64 one. + */ +#define write_sysreg_variant(v, r) \ + IS_BUILTIN(CONFIG_ARM64_MORELLO) \ + ? ({ write_cap_sysreg(v, c##r); }) \ + : ({ write_sysreg(v, r); }) +#else +#define write_sysreg_variant write_sysreg +#endif /* * For registers without architectural names, or simply unsupported by * GAS. @@ -1162,6 +1198,25 @@ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0)
+#define read_cap_sysreg_s(r) ({ \ + uintcap_t __val; \ + u32 __maybe_unused __check_r = (u32)(r); \ + asm volatile(__mrs_s_c("%0", r) : "=r" (__val)); \ + __val; \ +}) + +#define write_cap_sysreg_s(v, r) do { \ + uintcap_t __cval; \ + u32 __maybe_unused __check_r = (u32)(r); \ + __cval = __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(v), \ + uintcap_t), \ + (uintcap_t)(v), \ + __convert_to_cap(v) \ + ); \ + asm volatile(__msr_s_c(r, "%x0") : : "rZ" (__cval)); \ +} while (0) + /* * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the * set mask are set. Other bits are left as-is.
On 26/02/2024 09:20, Beata Michalska wrote:
Provide capability-aware set of helper macros for system register read/writes. For the time being, if the transfer register is not a capability one, a prep step is being carried out to provide it through the use of CVTP instruction. In a long run this should be replace by proper setup where a source capability is derived from an appropriate authorizing capability.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/sysreg.h | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e2109e2c0bef..c55c4c4fa945 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1141,6 +1141,42 @@ : : "rZ" (__val)); \ } while (0) +#ifdef CONFIG_ARM64_MORELLO +#define __convert_to_cap(v) \ +({ \
uintcap_t __cval; \
asm volatile("cvtp %0, %1" : "=r" (__cval) : "r" (v)); \
__cval; \
+})
+#define write_cap_sysreg(v, r) \
- do { \
uintcap_t __cval; \
__cval = __builtin_choose_expr( \
__builtin_types_compatible_p(typeof(v), \
uintcap_t), \
(uintcap_t)(v), \
__convert_to_cap(v) \
); \
asm volatile("msr " __stringify(r) ", %x0" : : "rZ" (__cval)); \
- } while (0)
+/*
- Select appropriate variant of write_sysreg based on whether
- CONFIG_ARM64_MORELLO is enabled or not.
- For the capability system registers, unless capability is being provided,
- one is created through CVTP instruction. This immplies that this macro
- is not suitable for all use-cases. Additionally, this macro is based on
- the assumption that the capability system register name can be derived
- from corresponding aarch64 one.
- */
+#define write_sysreg_variant(v, r) \
- IS_BUILTIN(CONFIG_ARM64_MORELLO) \
? ({ write_cap_sysreg(v, c##r); }) \
: ({ write_sysreg(v, r); })
At first I wasn't convinced about having such a generic macro, but I do see the point in subsequent patches - it avoids quite a bit of #ifdef'ing. That said, I would prefer things to be more explicit. I don't see a need for write_cap_sysreg() accepting both 64-bit and capability values. I think we have two situations: 1. We've got a 64-bit code address and we want to create a valid capability to write into a capability register on Morello (and just the 64-bit address otherwise). That seems to be relevant to VBAR/ELR. 2. We're explicitly manipulating a capability and writing it into a capability register.
For the first situation, something like this macro is useful, but I would always have it use CVTP to create a valid code capability (on Morello). Additionally, I would name it accordingly: write_sysreg_code_addr() or similar. This is because using CVTP is not appropriate for creating data capability, and we shouldn't suggest that this macro is more generic than it really is. Overall my thinking is: if a capability is going to be created, that needs to be reflected in the helper's name, and we don't want to have that happen by accident (e.g. by passing the wrong type to the helper).
If in the future we move to saving/restoring the full capabilities for VBAR/ELR, then we can introduce a new macro, maybe write_sysreg_ptr(), that would always take a capability on Morello and a 64-bit address otherwise.
+#else +#define write_sysreg_variant write_sysreg +#endif /*
- For registers without architectural names, or simply unsupported by
- GAS.
@@ -1162,6 +1198,25 @@ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) +#define read_cap_sysreg_s(r) ({ \
- uintcap_t __val; \
- u32 __maybe_unused __check_r = (u32)(r); \
- asm volatile(__mrs_s_c("%0", r) : "=r" (__val)); \
- __val; \
+})
+#define write_cap_sysreg_s(v, r) do { \
- uintcap_t __cval; \
- u32 __maybe_unused __check_r = (u32)(r); \
- __cval = __builtin_choose_expr( \
__builtin_types_compatible_p(typeof(v), \
uintcap_t), \
(uintcap_t)(v), \
__convert_to_cap(v) \
); \
- asm volatile(__msr_s_c(r, "%x0") : : "rZ" (__cval)); \
+} while (0)
Same idea here: write_cap_sysreg_s() should do exactly what it says (write a capability). We can have a separate helper that (always) creates a capability with CVTP.
Kevin
/*
- Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the
- set mask are set. Other bits are left as-is.
On Fri, Mar 01, 2024 at 11:22:51AM +0100, Kevin Brodsky wrote:
On 26/02/2024 09:20, Beata Michalska wrote:
Provide capability-aware set of helper macros for system register read/writes. For the time being, if the transfer register is not a capability one, a prep step is being carried out to provide it through the use of CVTP instruction. In a long run this should be replace by proper setup where a source capability is derived from an appropriate authorizing capability.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/sysreg.h | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e2109e2c0bef..c55c4c4fa945 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1141,6 +1141,42 @@ : : "rZ" (__val)); \ } while (0) +#ifdef CONFIG_ARM64_MORELLO +#define __convert_to_cap(v) \ +({ \
uintcap_t __cval; \
asm volatile("cvtp %0, %1" : "=r" (__cval) : "r" (v)); \
__cval; \
+})
+#define write_cap_sysreg(v, r) \
- do { \
uintcap_t __cval; \
__cval = __builtin_choose_expr( \
__builtin_types_compatible_p(typeof(v), \
uintcap_t), \
(uintcap_t)(v), \
__convert_to_cap(v) \
); \
asm volatile("msr " __stringify(r) ", %x0" : : "rZ" (__cval)); \
- } while (0)
+/*
- Select appropriate variant of write_sysreg based on whether
- CONFIG_ARM64_MORELLO is enabled or not.
- For the capability system registers, unless capability is being provided,
- one is created through CVTP instruction. This immplies that this macro
- is not suitable for all use-cases. Additionally, this macro is based on
- the assumption that the capability system register name can be derived
- from corresponding aarch64 one.
- */
+#define write_sysreg_variant(v, r) \
- IS_BUILTIN(CONFIG_ARM64_MORELLO) \
? ({ write_cap_sysreg(v, c##r); }) \
: ({ write_sysreg(v, r); })
At first I wasn't convinced about having such a generic macro, but I do see the point in subsequent patches - it avoids quite a bit of #ifdef'ing. That said, I would prefer things to be more explicit. I don't see a need for write_cap_sysreg() accepting both 64-bit and capability values. I think we have two situations:
- We've got a 64-bit code address and we want to create a valid
capability to write into a capability register on Morello (and just the 64-bit address otherwise). That seems to be relevant to VBAR/ELR. 2. We're explicitly manipulating a capability and writing it into a capability register.
For the first situation, something like this macro is useful, but I would always have it use CVTP to create a valid code capability (on Morello). Additionally, I would name it accordingly:
Did I get it right that you would prefer to drop checking the type of the argument and make it always u64 being converted to cap even if a valid one has been provided?
write_sysreg_code_addr() or similar. This is because using CVTP is not
Fair point.
appropriate for creating data capability, and we shouldn't suggest that this macro is more generic than it really is. Overall my thinking is: if a capability is going to be created, that needs to be reflected in the helper's name, and we don't want to have that happen by accident (e.g. by passing the wrong type to the helper).
If in the future we move to saving/restoring the full capabilities for VBAR/ELR, then we can introduce a new macro, maybe write_sysreg_ptr(), that would always take a capability on Morello and a 64-bit address otherwise.
+#else +#define write_sysreg_variant write_sysreg +#endif /*
- For registers without architectural names, or simply unsupported by
- GAS.
@@ -1162,6 +1198,25 @@ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) +#define read_cap_sysreg_s(r) ({ \
- uintcap_t __val; \
- u32 __maybe_unused __check_r = (u32)(r); \
- asm volatile(__mrs_s_c("%0", r) : "=r" (__val)); \
- __val; \
+})
+#define write_cap_sysreg_s(v, r) do { \
- uintcap_t __cval; \
- u32 __maybe_unused __check_r = (u32)(r); \
- __cval = __builtin_choose_expr( \
__builtin_types_compatible_p(typeof(v), \
uintcap_t), \
(uintcap_t)(v), \
__convert_to_cap(v) \
); \
- asm volatile(__msr_s_c(r, "%x0") : : "rZ" (__cval)); \
+} while (0)
Same idea here: write_cap_sysreg_s() should do exactly what it says (write a capability). We can have a separate helper that (always) creates a capability with CVTP.
Noted.
--- BR Beata
Kevin
/*
- Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the
- set mask are set. Other bits are left as-is.
On 10/04/2024 21:16, Beata Michalska wrote:
On Fri, Mar 01, 2024 at 11:22:51AM +0100, Kevin Brodsky wrote:
On 26/02/2024 09:20, Beata Michalska wrote:
Provide capability-aware set of helper macros for system register read/writes. For the time being, if the transfer register is not a capability one, a prep step is being carried out to provide it through the use of CVTP instruction. In a long run this should be replace by proper setup where a source capability is derived from an appropriate authorizing capability.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/sysreg.h | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e2109e2c0bef..c55c4c4fa945 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1141,6 +1141,42 @@ : : "rZ" (__val)); \ } while (0) +#ifdef CONFIG_ARM64_MORELLO +#define __convert_to_cap(v) \ +({ \
uintcap_t __cval; \
asm volatile("cvtp %0, %1" : "=r" (__cval) : "r" (v)); \
__cval; \
+})
+#define write_cap_sysreg(v, r) \
- do { \
uintcap_t __cval; \
__cval = __builtin_choose_expr( \
__builtin_types_compatible_p(typeof(v), \
uintcap_t), \
(uintcap_t)(v), \
__convert_to_cap(v) \
); \
asm volatile("msr " __stringify(r) ", %x0" : : "rZ" (__cval)); \
- } while (0)
+/*
- Select appropriate variant of write_sysreg based on whether
- CONFIG_ARM64_MORELLO is enabled or not.
- For the capability system registers, unless capability is being provided,
- one is created through CVTP instruction. This immplies that this macro
- is not suitable for all use-cases. Additionally, this macro is based on
- the assumption that the capability system register name can be derived
- from corresponding aarch64 one.
- */
+#define write_sysreg_variant(v, r) \
- IS_BUILTIN(CONFIG_ARM64_MORELLO) \
? ({ write_cap_sysreg(v, c##r); }) \
: ({ write_sysreg(v, r); })
At first I wasn't convinced about having such a generic macro, but I do see the point in subsequent patches - it avoids quite a bit of #ifdef'ing. That said, I would prefer things to be more explicit. I don't see a need for write_cap_sysreg() accepting both 64-bit and capability values. I think we have two situations:
- We've got a 64-bit code address and we want to create a valid
capability to write into a capability register on Morello (and just the 64-bit address otherwise). That seems to be relevant to VBAR/ELR. 2. We're explicitly manipulating a capability and writing it into a capability register.
For the first situation, something like this macro is useful, but I would always have it use CVTP to create a valid code capability (on Morello). Additionally, I would name it accordingly:
Did I get it right that you would prefer to drop checking the type of the argument and make it always u64 being converted to cap even if a valid one has been provided?
Yes. AFAICT we never provide a capability to it at the moment. If we also have such situation, we could have a separate helper (see paragraph below).
Kevin
write_sysreg_code_addr() or similar. This is because using CVTP is not
Fair point.
appropriate for creating data capability, and we shouldn't suggest that this macro is more generic than it really is. Overall my thinking is: if a capability is going to be created, that needs to be reflected in the helper's name, and we don't want to have that happen by accident (e.g. by passing the wrong type to the helper).
If in the future we move to saving/restoring the full capabilities for VBAR/ELR, then we can introduce a new macro, maybe write_sysreg_ptr(), that would always take a capability on Morello and a 64-bit address otherwise.
+#else +#define write_sysreg_variant write_sysreg +#endif /*
- For registers without architectural names, or simply unsupported by
- GAS.
@@ -1162,6 +1198,25 @@ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) +#define read_cap_sysreg_s(r) ({ \
- uintcap_t __val; \
- u32 __maybe_unused __check_r = (u32)(r); \
- asm volatile(__mrs_s_c("%0", r) : "=r" (__val)); \
- __val; \
+})
+#define write_cap_sysreg_s(v, r) do { \
- uintcap_t __cval; \
- u32 __maybe_unused __check_r = (u32)(r); \
- __cval = __builtin_choose_expr( \
__builtin_types_compatible_p(typeof(v), \
uintcap_t), \
(uintcap_t)(v), \
__convert_to_cap(v) \
); \
- asm volatile(__msr_s_c(r, "%x0") : : "rZ" (__cval)); \
+} while (0)
Same idea here: write_cap_sysreg_s() should do exactly what it says (write a capability). We can have a separate helper that (always) creates a capability with CVTP.
Noted.
BR Beata
Kevin
/*
- Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the
- set mask are set. Other bits are left as-is.
Add read/write sysreg variants for capability registers. For the time being unless provided, a valid capability is being created through CVTP instructions which should be replaced by proper setup in a long run.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/kvm_hyp.h | 74 +++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 16 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 145ce73fc16c..7631921f7574 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -24,12 +24,16 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
#if defined(__KVM_VHE_HYPERVISOR__)
-#define read_sysreg_el0(r) read_sysreg_s(r##_EL02) -#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02) -#define read_sysreg_el1(r) read_sysreg_s(r##_EL12) -#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12) -#define read_sysreg_el2(r) read_sysreg_s(r##_EL1) -#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1) +#define read_sysreg_el0(r) read_sysreg_s(r##_EL02) +#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02) +#define read_sysreg_el1(r) read_sysreg_s(r##_EL12) +#define read_cap_sysreg_el1(r) read_cap_sysreg_s(r##_EL12) +#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12) +#define write_cap_sysreg_el1(v,r) write_cap_sysreg_s(v, r##_EL12) +#define read_sysreg_el2(r) read_sysreg_s(r##_EL1) +#define read_cap_sysreg_el2(r) read_cap_sysreg_s(r##_EL1) +#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1) +#define write_cap_sysreg_el2(v,r) write_cap_sysreg_s(v, r##_EL1)
#else // !__KVM_VHE_HYPERVISOR__
@@ -49,21 +53,59 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); reg; \ })
+#ifdef CONFIG_ARM64_MORELLO +#define read_cap_sysreg_elx(r,nvh,vh) \ + ({ \ + uintcap_t reg; \ + asm volatile(ALTERNATIVE(__mrs_s_c("%0", r##nvh), \ + __mrs_s_c("%0", r##vh), \ + VHE_ALT_KEY) \ + : "=r" (reg)); \ + reg; \ + }) +#else +#define read_cap_sysreg_elx read_sysreg_elx +#endif + +#define __write_sysreg_elx(v,r,inst,nvh,vh) \ +({ \ + asm volatile(ALTERNATIVE(inst(r##nvh, "%x0"), \ + inst(r##vh, "%x0"), \ + VHE_ALT_KEY) \ + : : "rZ" (v)); \ +}) + #define write_sysreg_elx(v,r,nvh,vh) \ do { \ u64 __val = (u64)(v); \ - asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \ - __msr_s(r##vh, "%x0"), \ - VHE_ALT_KEY) \ - : : "rZ" (__val)); \ + __write_sysreg_elx(__val, r, __msr_s, nvh, vh); \ } while (0)
-#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) -#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) -#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) -#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12) -#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) -#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) +#ifdef CONFIG_ARM64_MORELLO +#define write_cap_sysreg_elx(v,r,nvh,vh) \ + do { \ + uintcap_t __cval; \ + __cval = __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(v), \ + uintcap_t),\ + (uintcap_t)(v), \ + __convert_to_cap(v)); \ + __write_sysreg_elx(__cval, r, __msr_s_c, nvh, vh); \ + } while (0) +#else +#define write_cap_sysreg_elx write_sysreg_elx +#endif + +#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) +#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) +#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) +#define read_cap_sysreg_el1(r) read_cap_sysreg_elx(r, _EL1, _EL12) +#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12) +#define write_cap_sysreg_el1(v,r) write_cap_sysreg_elx(v, r, _EL1, _EL12) +#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) +#define read_cap_sysreg_el2(r) read_cap_sysreg_elx(r, _EL2, _EL1) +#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) +#define write_cap_sysreg_el2(v,r) write_cap_sysreg_elx(v, r, _EL2, _EL1)
#endif // __KVM_VHE_HYPERVISOR__
Add bare minimum to enable saving & restoring relevant capability registers. Note that, at this point, only a subset of capability registers is to be handled,with the remaining ones being provided for completeness only.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/kvm_asm.h | 6 +++++ arch/arm64/include/asm/kvm_host.h | 38 +++++++++++++++++++++++++++++++ arch/arm64/kernel/asm-offsets.c | 3 +++ 3 files changed, 47 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24b5e6b23417..4ca7ece15385 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -334,6 +334,12 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, #define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_LR_OFFSET CPU_XREG_OFFSET(30) #define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8) +#ifdef CONFIG_ARM64_MORELLO +#define CPU_CREG_OFFSET(x) (CPU_USER_CREGS + 16*x) +#define CPU_CLR_OFFSET CPU_CREG_OFFSET(30) +#define CPU_CSP_EL0_OFFSET (CPU_CLR_OFFSET + 16) +#define CPU_RCSP_EL0_OFFSET (CPU_CLR_OFFSET + 32) +#endif
/* * We treat x18 as callee-saved as the host may use it as a platform diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 824f29f04916..808645b7108d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -451,9 +451,40 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ };
+#ifdef CONFIG_ARM64_MORELLO +enum vcpu_cap_sysregs { + __INVALID_CAP_SYSREG__, + CTPIDR_EL0, + RTPIDR_EL0, + CTPIDRRO_EL0, + + CTPIDR_EL1, + CELR_EL1, + CVBAR_EL1, + CSP_EL1, + DDC_EL1, + + CELR_EL2, + CVBAR_EL2, + CTPIDR_EL2, + CSP_EL2, + DDC_EL2, + + NR_CAP_SYS_REGS +}; +#endif + struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */
+ /* This should probably end up in user_pt_regs at one point */ +#ifdef CONFIG_ARM64_MORELLO + uintcap_t cregs[31]; + uintcap_t csp; + uintcap_t rcsp; + uintcap_t pcc; +#endif + u64 spsr_abt; u64 spsr_und; u64 spsr_irq; @@ -463,6 +494,9 @@ struct kvm_cpu_context {
u64 sys_regs[NR_SYS_REGS];
+#ifdef CONFIG_ARM64_MORELLO + uintcap_t cap_sys_regs[NR_CAP_SYS_REGS]; +#endif struct kvm_vcpu *__hyp_running_vcpu; };
@@ -833,6 +867,10 @@ struct kvm_vcpu_arch {
#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
+#ifdef CONFIG_ARM64_MORELLO +#define __ctxt_cap_sys_reg(c, r) (&(c)->cap_sys_regs[(r)]) +#define ctxt_cap_sys_reg(c, r) (*__ctxt_cap_sys_reg(c,r)) +#endif u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 90ca25039583..249bf3ca2fbc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -143,6 +143,9 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); +#ifdef CONFIG_ARM64_MORELLO + DEFINE(CPU_USER_CREGS, offsetof(struct kvm_cpu_context, cregs)); +#endif DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
On 26/02/2024 10:20, Beata Michalska wrote:
Add bare minimum to enable saving & restoring relevant capability registers. Note that, at this point, only a subset of capability registers is to be handled,with the remaining ones being provided for completeness only.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_asm.h | 6 +++++ arch/arm64/include/asm/kvm_host.h | 38 +++++++++++++++++++++++++++++++ arch/arm64/kernel/asm-offsets.c | 3 +++ 3 files changed, 47 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24b5e6b23417..4ca7ece15385 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -334,6 +334,12 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, #define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_LR_OFFSET CPU_XREG_OFFSET(30) #define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8) +#ifdef CONFIG_ARM64_MORELLO +#define CPU_CREG_OFFSET(x) (CPU_USER_CREGS + 16*x) +#define CPU_CLR_OFFSET CPU_CREG_OFFSET(30) +#define CPU_CSP_EL0_OFFSET (CPU_CLR_OFFSET + 16) +#define CPU_RCSP_EL0_OFFSET (CPU_CLR_OFFSET + 32) +#endif /*
- We treat x18 as callee-saved as the host may use it as a platform
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 824f29f04916..808645b7108d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -451,9 +451,40 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; +#ifdef CONFIG_ARM64_MORELLO +enum vcpu_cap_sysregs {
- __INVALID_CAP_SYSREG__,
- CTPIDR_EL0,
- RTPIDR_EL0,
RCTPIDR_EL0 to be consistent.
Are all the capability registers expected to be here? If so, DDC_EL0 and RDDC_EL0 are missing.
- CTPIDRRO_EL0,
- CTPIDR_EL1,
- CELR_EL1,
- CVBAR_EL1,
- CSP_EL1,
- DDC_EL1,
- CELR_EL2,
- CVBAR_EL2,
- CTPIDR_EL2,
- CSP_EL2,
- DDC_EL2,
I can't see most of these registers being saved or restored in the current series, is that expected?
- NR_CAP_SYS_REGS
+}; +#endif
struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */
- /* This should probably end up in user_pt_regs at one point */
user_pt_regs is a uapi struct so it cannot be changed, at least not in the standard arm64 ABI (and we can't assume the userspace ABI here).
+#ifdef CONFIG_ARM64_MORELLO
- uintcap_t cregs[31];
- uintcap_t csp;
- uintcap_t rcsp;
- uintcap_t pcc;
+#endif
- u64 spsr_abt; u64 spsr_und; u64 spsr_irq;
@@ -463,6 +494,9 @@ struct kvm_cpu_context { u64 sys_regs[NR_SYS_REGS]; +#ifdef CONFIG_ARM64_MORELLO
- uintcap_t cap_sys_regs[NR_CAP_SYS_REGS];
+#endif struct kvm_vcpu *__hyp_running_vcpu; }; @@ -833,6 +867,10 @@ struct kvm_vcpu_arch { #define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) +#ifdef CONFIG_ARM64_MORELLO +#define __ctxt_cap_sys_reg(c, r) (&(c)->cap_sys_regs[(r)]) +#define ctxt_cap_sys_reg(c, r) (*__ctxt_cap_sys_reg(c,r)) +#endif
Nit: empty line here.
Kevin
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 90ca25039583..249bf3ca2fbc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -143,6 +143,9 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); +#ifdef CONFIG_ARM64_MORELLO
- DEFINE(CPU_USER_CREGS, offsetof(struct kvm_cpu_context, cregs));
+#endif DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
On Fri, Mar 01, 2024 at 11:26:28AM +0100, Kevin Brodsky wrote:
On 26/02/2024 10:20, Beata Michalska wrote:
Add bare minimum to enable saving & restoring relevant capability registers. Note that, at this point, only a subset of capability registers is to be handled,with the remaining ones being provided for completeness only.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_asm.h | 6 +++++ arch/arm64/include/asm/kvm_host.h | 38 +++++++++++++++++++++++++++++++ arch/arm64/kernel/asm-offsets.c | 3 +++ 3 files changed, 47 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24b5e6b23417..4ca7ece15385 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -334,6 +334,12 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, #define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_LR_OFFSET CPU_XREG_OFFSET(30) #define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8) +#ifdef CONFIG_ARM64_MORELLO +#define CPU_CREG_OFFSET(x) (CPU_USER_CREGS + 16*x) +#define CPU_CLR_OFFSET CPU_CREG_OFFSET(30) +#define CPU_CSP_EL0_OFFSET (CPU_CLR_OFFSET + 16) +#define CPU_RCSP_EL0_OFFSET (CPU_CLR_OFFSET + 32) +#endif /*
- We treat x18 as callee-saved as the host may use it as a platform
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 824f29f04916..808645b7108d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -451,9 +451,40 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; +#ifdef CONFIG_ARM64_MORELLO +enum vcpu_cap_sysregs {
- __INVALID_CAP_SYSREG__,
- CTPIDR_EL0,
- RTPIDR_EL0,
RCTPIDR_EL0 to be consistent.
Are all the capability registers expected to be here? If so, DDC_EL0 and RDDC_EL0 are missing.
- CTPIDRRO_EL0,
- CTPIDR_EL1,
- CELR_EL1,
- CVBAR_EL1,
- CSP_EL1,
- DDC_EL1,
- CELR_EL2,
- CVBAR_EL2,
- CTPIDR_EL2,
- CSP_EL2,
- DDC_EL2,
I can't see most of these registers being saved or restored in the current series, is that expected?
Yes, currently the bare minimum is being handled, and the rest is for completeness only, as stated in the commit message (not complete enough as you have pointed out though ...)
- NR_CAP_SYS_REGS
+}; +#endif
struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */
- /* This should probably end up in user_pt_regs at one point */
user_pt_regs is a uapi struct so it cannot be changed, at least not in the standard arm64 ABI (and we can't assume the userspace ABI here).
Fair point.
+#ifdef CONFIG_ARM64_MORELLO
- uintcap_t cregs[31];
- uintcap_t csp;
- uintcap_t rcsp;
- uintcap_t pcc;
+#endif
- u64 spsr_abt; u64 spsr_und; u64 spsr_irq;
@@ -463,6 +494,9 @@ struct kvm_cpu_context { u64 sys_regs[NR_SYS_REGS]; +#ifdef CONFIG_ARM64_MORELLO
- uintcap_t cap_sys_regs[NR_CAP_SYS_REGS];
+#endif struct kvm_vcpu *__hyp_running_vcpu; }; @@ -833,6 +867,10 @@ struct kvm_vcpu_arch { #define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) +#ifdef CONFIG_ARM64_MORELLO +#define __ctxt_cap_sys_reg(c, r) (&(c)->cap_sys_regs[(r)]) +#define ctxt_cap_sys_reg(c, r) (*__ctxt_cap_sys_reg(c,r)) +#endif
Nit: empty line here.
Noted.
--- BR Beata
Kevin
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 90ca25039583..249bf3ca2fbc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -143,6 +143,9 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); +#ifdef CONFIG_ARM64_MORELLO
- DEFINE(CPU_USER_CREGS, offsetof(struct kvm_cpu_context, cregs));
+#endif DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
On 10/04/2024 21:20, Beata Michalska wrote:
+enum vcpu_cap_sysregs {
- __INVALID_CAP_SYSREG__,
- CTPIDR_EL0,
- RTPIDR_EL0,
RCTPIDR_EL0 to be consistent.
Are all the capability registers expected to be here? If so, DDC_EL0 and RDDC_EL0 are missing.
- CTPIDRRO_EL0,
- CTPIDR_EL1,
- CELR_EL1,
- CVBAR_EL1,
- CSP_EL1,
- DDC_EL1,
- CELR_EL2,
- CVBAR_EL2,
- CTPIDR_EL2,
- CSP_EL2,
- DDC_EL2,
I can't see most of these registers being saved or restored in the current series, is that expected?
Yes, currently the bare minimum is being handled, and the rest is for completeness only, as stated in the commit message (not complete enough as you have pointed out though ...)
That's understood, no expectation of completeness here. I wonder if it makes sense to add those registers to this enum without handling them though, it seems more misleading than helpful.
Kevin
Make sure that the capability vector is being updated accordingly during the context switch.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c50f8459e4fc..e19243367408 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -64,7 +64,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) }
kvm_write_cptr_el2(val); - write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); + write_sysreg_variant(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; @@ -109,7 +109,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
kvm_reset_cptr_el2(vcpu); - write_sysreg(__kvm_hyp_host_vector, vbar_el2); + write_sysreg_variant(__kvm_hyp_host_vector, vbar_el2); }
/* Save VGICv3 state on non-VHE systems */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1581df6aec87..e4fc314a83a6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -85,7 +85,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(val, cpacr_el1);
- write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1); + write_sysreg_variant(__this_cpu_read(kvm_hyp_vector), vbar_el1); } NOKPROBE_SYMBOL(__activate_traps);
@@ -132,7 +132,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
if (!arm64_kernel_unmapped_at_el0()) host_vectors = __this_cpu_read(this_cpu_vector); - write_sysreg(host_vectors, vbar_el1); + write_sysreg_variant(host_vectors, vbar_el1); } NOKPROBE_SYMBOL(__deactivate_traps);
In title: VBAR is for exception entry, not exception return (in that case ELR is used).
Kevin
On 26/02/2024 10:20, Beata Michalska wrote:
Make sure that the capability vector is being updated accordingly during the context switch.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c50f8459e4fc..e19243367408 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -64,7 +64,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) } kvm_write_cptr_el2(val);
- write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
- write_sysreg_variant(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; @@ -109,7 +109,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); kvm_reset_cptr_el2(vcpu);
- write_sysreg(__kvm_hyp_host_vector, vbar_el2);
- write_sysreg_variant(__kvm_hyp_host_vector, vbar_el2);
} /* Save VGICv3 state on non-VHE systems */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1581df6aec87..e4fc314a83a6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -85,7 +85,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, cpacr_el1);
- write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1);
- write_sysreg_variant(__this_cpu_read(kvm_hyp_vector), vbar_el1);
} NOKPROBE_SYMBOL(__activate_traps); @@ -132,7 +132,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) if (!arm64_kernel_unmapped_at_el0()) host_vectors = __this_cpu_read(this_cpu_vector);
- write_sysreg(host_vectors, vbar_el1);
- write_sysreg_variant(host_vectors, vbar_el1);
} NOKPROBE_SYMBOL(__deactivate_traps);
Disable trapping of Morello instructions on context switch.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/switch.c | 5 +++++ arch/arm64/kvm/hyp/vhe/switch.c | 7 +++++++ 3 files changed, 17 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78a550537b67..6ce5d7c96cad 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -587,6 +587,9 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) CPACR_EL1_ZEN_EL1EN); if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN; + if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + val |= CPACR_EL1_CEN; + } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
@@ -603,6 +606,8 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; + if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + val &= ~CPTR_EL2_TC; }
return val; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e19243367408..7c4363b0691b 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -63,7 +63,12 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); }
+ /* Disable trapping of Morello instructions */ + if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + val &= ~CPTR_EL2_TC; + kvm_write_cptr_el2(val); + write_sysreg_variant(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e4fc314a83a6..6a2540ca30aa 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -75,6 +75,13 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM;
+ /* + * Disable trapping Morello instructions + * For VHE (HCR.E2H) CPCR_EL1.CEN bits have the same position as + * CPTR_EL2.CEN + */ + val |= CPACR_EL1_CEN; + if (guest_owns_fp_regs(vcpu)) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
On 26/02/2024 10:20, Beata Michalska wrote:
Disable trapping of Morello instructions on context switch.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/switch.c | 5 +++++ arch/arm64/kvm/hyp/vhe/switch.c | 7 +++++++ 3 files changed, 17 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78a550537b67..6ce5d7c96cad 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -587,6 +587,9 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) CPACR_EL1_ZEN_EL1EN); if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN;
if (IS_ENABLED(CONFIG_ARM64_MORELLO))
val |= CPACR_EL1_CEN;
- } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
@@ -603,6 +606,8 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM;
if (IS_ENABLED(CONFIG_ARM64_MORELLO))
}val &= ~CPTR_EL2_TC;
return val; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e19243367408..7c4363b0691b 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -63,7 +63,12 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); }
- /* Disable trapping of Morello instructions */
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
val &= ~CPTR_EL2_TC;
- kvm_write_cptr_el2(val);
It doesn't look like this automatically does an ISB. Considering that it is immediately followed by Morello instructions, that could be an issue. Unless CPTR_EL2.TC already happens to be cleared, considering its initialisation in init_kernel_el?
Kevin
- write_sysreg_variant(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e4fc314a83a6..6a2540ca30aa 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -75,6 +75,13 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TAM;
- /*
* Disable trapping Morello instructions
* For VHE (HCR.E2H) CPCR_EL1.CEN bits have the same position as
* CPTR_EL2.CEN
*/
- val |= CPACR_EL1_CEN;
- if (guest_owns_fp_regs(vcpu)) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
On Fri, Mar 01, 2024 at 11:27:26AM +0100, Kevin Brodsky wrote:
On 26/02/2024 10:20, Beata Michalska wrote:
Disable trapping of Morello instructions on context switch.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/switch.c | 5 +++++ arch/arm64/kvm/hyp/vhe/switch.c | 7 +++++++ 3 files changed, 17 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78a550537b67..6ce5d7c96cad 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -587,6 +587,9 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) CPACR_EL1_ZEN_EL1EN); if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN;
if (IS_ENABLED(CONFIG_ARM64_MORELLO))
val |= CPACR_EL1_CEN;
- } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
@@ -603,6 +606,8 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM;
if (IS_ENABLED(CONFIG_ARM64_MORELLO))
}val &= ~CPTR_EL2_TC;
return val; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e19243367408..7c4363b0691b 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -63,7 +63,12 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); }
- /* Disable trapping of Morello instructions */
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
val &= ~CPTR_EL2_TC;
- kvm_write_cptr_el2(val);
It doesn't look like this automatically does an ISB. Considering that it is immediately followed by Morello instructions, that could be an issue. Unless CPTR_EL2.TC already happens to be cleared, considering its initialisation in init_kernel_el?
It is indeed already cleared so what it does it makes sure that between the switches we will not lose that setting. I should probably add a comment why barrier is not needed in this case.
--- BR Beata
Kevin
- write_sysreg_variant(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e4fc314a83a6..6a2540ca30aa 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -75,6 +75,13 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TAM;
- /*
* Disable trapping Morello instructions
* For VHE (HCR.E2H) CPCR_EL1.CEN bits have the same position as
* CPTR_EL2.CEN
*/
- val |= CPACR_EL1_CEN;
- if (guest_owns_fp_regs(vcpu)) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
On 10/04/2024 21:32, Beata Michalska wrote:
On Fri, Mar 01, 2024 at 11:27:26AM +0100, Kevin Brodsky wrote:
On 26/02/2024 10:20, Beata Michalska wrote:
Disable trapping of Morello instructions on context switch.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/switch.c | 5 +++++ arch/arm64/kvm/hyp/vhe/switch.c | 7 +++++++ 3 files changed, 17 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78a550537b67..6ce5d7c96cad 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -587,6 +587,9 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) CPACR_EL1_ZEN_EL1EN); if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN;
if (IS_ENABLED(CONFIG_ARM64_MORELLO))
val |= CPACR_EL1_CEN;
- } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
@@ -603,6 +606,8 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM;
if (IS_ENABLED(CONFIG_ARM64_MORELLO))
}val &= ~CPTR_EL2_TC;
return val; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e19243367408..7c4363b0691b 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -63,7 +63,12 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); }
- /* Disable trapping of Morello instructions */
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
val &= ~CPTR_EL2_TC;
- kvm_write_cptr_el2(val);
It doesn't look like this automatically does an ISB. Considering that it is immediately followed by Morello instructions, that could be an issue. Unless CPTR_EL2.TC already happens to be cleared, considering its initialisation in init_kernel_el?
It is indeed already cleared so what it does it makes sure that between the switches we will not lose that setting. I should probably add a comment why barrier is not needed in this case.
I'm not sure I understand the rationale in that case. If we assume it's already cleared (thus skipping the ISB), because we initialise it appropriately and never change the value in CPTR_EL2, then this addition is unnecessary.
Kevin
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/include/asm/kvm_asm.h | 102 +++++++++++++++++++-- arch/arm64/kernel/hyp-stub.S | 5 + arch/arm64/kvm/hyp/entry.S | 12 ++- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 58 +++++++++++- arch/arm64/kvm/hyp/nvhe/host.S | 66 +++++++++++-- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 14 +++ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 4 +- 8 files changed, 243 insertions(+), 22 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 4ca7ece15385..b76f29470389 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -353,10 +353,74 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] + +#ifdef CONFIG_ARM64_MORELLO + /* + * In theory this could be if/else but to play + * on the safe side for the time being ... + * to avoid clearing tag when storing non-capability + * to capability-tagged location + */ + str c18, [\ctxt, #CPU_CREG_OFFSET(18)] + stp c19, c20, [\ctxt, #CPU_CREG_OFFSET(19)] + stp c21, c22, [\ctxt, #CPU_CREG_OFFSET(21)] + stp c23, c24, [\ctxt, #CPU_CREG_OFFSET(23)] + stp c25, c26, [\ctxt, #CPU_CREG_OFFSET(25)] + stp c27, c28, [\ctxt, #CPU_CREG_OFFSET(27)] + stp c29, clr, [\ctxt, #CPU_CREG_OFFSET(29)] +#endif .endm
+#ifdef CONFIG_ARM64_MORELLO +.macro sync_regs_c_x nr:req, x:req + cmp x\nr, \x + b.eq .Lskip_sync@ + scvalue c\nr, c\nr, \x +.Lskip_sync@: +.endm + +.macro sync_savged_regs_c_x ctxt, nr_1, nr_2, tmp_1, tmp_2 + ldp \tmp_1, \tmp_2, [\ctxt, #CPU_XREG_OFFSET(\nr_1)] + ldp c\nr_1, c\nr_2, [\ctxt, #CPU_CREG_OFFSET(\nr_1)] + sync_regs_c_x \nr_1, \tmp_1 + sync_regs_c_x \nr_2, \tmp_2 + stp c\nr_1, c\nr_2, [\ctxt, #CPU_CREG_OFFSET(\nr_1)] +.endm +#endif + .macro restore_callee_saved_regs ctxt // We require \ctxt is not x18-x28 +#ifdef CONFIG_ARM64_MORELLO + ldr x19, [\ctxt, #CPU_XREG_OFFSET(18)] + ldr c18, [\ctxt, #CPU_CREG_OFFSET(18)] + sync_regs_c_x 18, x19 + str c18, [\ctxt, #CPU_CREG_OFFSET(18)] + + sync_savged_regs_c_x \ctxt, 19, 20, x21, x22 + sync_savged_regs_c_x \ctxt, 21, 22, x19, x20 + sync_savged_regs_c_x \ctxt, 23, 24, x19, x20 + sync_savged_regs_c_x \ctxt, 25, 26, x19, x20 + sync_savged_regs_c_x \ctxt, 27, 28, x19, x20 + // Things get tricky here as we cannot use c30 for the sync here + // Note: the context is in x29 + ldr x18, [\ctxt, #CPU_XREG_OFFSET(29)] + ldr c19, [\ctxt, #CPU_CREG_OFFSET(29)] + sync_regs_c_x 19, x18 + str c19, [\ctxt, #CPU_CREG_OFFSET(29)] + + ldr x18, [\ctxt, #CPU_XREG_OFFSET(30)] + ldr c19, [\ctxt, #CPU_CREG_OFFSET(30)] + sync_regs_c_x 19, x18 + str c19, [\ctxt, #CPU_CREG_OFFSET(30)] + + ldr c18, [\ctxt, #CPU_CREG_OFFSET(18)] + ldp c19, c20, [\ctxt, #CPU_CREG_OFFSET(19)] + ldp c21, c22, [\ctxt, #CPU_CREG_OFFSET(21)] + ldp c23, c24, [\ctxt, #CPU_CREG_OFFSET(23)] + ldp c25, c26, [\ctxt, #CPU_CREG_OFFSET(25)] + ldp c27, c28, [\ctxt, #CPU_CREG_OFFSET(27)] + ldp c29, c30, [\ctxt, #CPU_CREG_OFFSET(29)] +#else ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] @@ -364,16 +428,42 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +#endif .endm
-.macro save_sp_el0 ctxt, tmp - mrs \tmp, sp_el0 - str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] +.macro save_sp_el0 ctxt, nr +#ifdef CONFIG_ARM64_MORELLO + mrs c\nr, csp_el0 + str c\nr, [\ctxt, #CPU_CSP_EL0_OFFSET] + mrs c\nr, rcsp_el0 + str c\nr, [\ctxt, #CPU_RCSP_EL0_OFFSET] +#endif + mrs x\nr, sp_el0 + str x\nr, [\ctxt, #CPU_SP_EL0_OFFSET] + .endm
-.macro restore_sp_el0 ctxt, tmp - ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] - msr sp_el0, \tmp +.macro restore_sp_el0 ctxt, nr_1, nr_2 +#ifdef CONFIG_ARM64_MORELLO + morello_tst_cap_has_executive c\nr_1, x\nr_2 + b.eq .L_merge_rcsp@ + ldr c\nr_1, [\ctxt, #CPU_CSP_EL0_OFFSET] + ldr x\nr_2, [\ctxt, #CPU_SP_EL0_OFFSET] + sync_regs_c_x \nr_1, x\nr_2 + ldr c\nr_2, [\ctxt, #CPU_RCSP_EL0_OFFSET] + b .L_restore@ +.L_merge_rcsp@: + ldr c\nr_1, [\ctxt, #CPU_RCSP_EL0_OFFSET] + ldr x\nr_2, [\ctxt, #CPU_SP_EL0_OFFSET] + sync_regs_c_x \nr_2, x\nr_1 + ldr c\nr_1, [\ctxt, #CPU_CSP_EL0_OFFSET] +.L_restore@: + msr csp_el0, c\nr_1 + msr rcsp_el0, c\nr_2 +#else + ldr x\nr_1, [\ctxt, #CPU_SP_EL0_OFFSET] + msr sp_el0, x\nr_1 +#endif .endm
#endif diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 49f0b7eb8abe..a6c649ac2e00 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -48,7 +48,12 @@ SYM_CODE_END(__hyp_stub_vectors) SYM_CODE_START_LOCAL(elx_sync) cmp x0, #HVC_SET_VECTORS b.ne 1f +#ifdef CONFIG_ARM64_MORELLO + cvtp c1, x1 + msr cvbar_el2, c1 +#else msr vbar_el2, x1 +#endif b 9f
1: cmp x0, #HVC_FINALISE_EL2 diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index f3aa7738b477..77ee6759a8e6 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -32,7 +32,7 @@ SYM_FUNC_START(__guest_enter) save_callee_saved_regs x1
// Save hyp's sp_el0 - save_sp_el0 x1, x2 + save_sp_el0 x1, 2
// Now the hyp state is stored if we have a pending RAS SError it must // affect the host or hyp. If any asynchronous exception is pending we @@ -63,7 +63,7 @@ alternative_else_nop_endif ptrauth_switch_to_guest x29, x0, x1, x2
// Restore the guest's sp_el0 - restore_sp_el0 x29, x0 + restore_sp_el0 x29, 0, 1
// Restore guest regs x0-x17 ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] @@ -135,7 +135,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) save_callee_saved_regs x1
// Store the guest's sp_el0 - save_sp_el0 x1, x2 + save_sp_el0 x1, 2
adr_this_cpu x2, kvm_hyp_ctxt, x3
@@ -150,7 +150,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) mte_switch_to_hyp x1, x2, x3
// Restore hyp's sp_el0 - restore_sp_el0 x2, x3 + restore_sp_el0 x2, 3, 4
// Now restore the hyp regs restore_callee_saved_regs x2 @@ -208,6 +208,10 @@ abort_guest_exit_end: // restore the EL1 exception context so that we can report some // information. Merge the exception code with the SError pending bit. msr elr_el2, x2 +#ifdef CONFIG_ARM64_MORELLO + cvtp c2, x2 + msr celr_el2, c2 +#endif msr esr_el2, x3 msr spsr_el2, x4 orr x0, x0, x5 diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f99d8af0b9af..fe4c35144cee 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -733,12 +733,12 @@ static inline void __kvm_unexpected_el2_exception(void) continue; }
- write_sysreg(fixup, elr_el2); + write_sysreg_variant(fixup, elr_el2); return; }
/* Trigger a panic after restoring the hyp context. */ - write_sysreg(__guest_exit_panic, elr_el2); + write_sysreg_variant(__guest_exit_panic, elr_el2); }
#endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index bb6b571ec627..23215827aef6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -9,6 +9,7 @@
#include <linux/compiler.h> #include <linux/kvm_host.h> +#include <linux/cheri.h>
#include <asm/kprobes.h> #include <asm/kvm_asm.h> @@ -37,6 +38,15 @@ static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) return kvm_has_mte(kern_hyp_va(vcpu->kvm)); }
+static __always_inline void +__sysreg_save_el1_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO + ctxt_cap_sys_reg(ctxt, CVBAR_EL1) = read_cap_sysreg_el1(SYS_VBAR); + ctxt_cap_sys_reg(ctxt, CELR_EL1) = read_cap_sysreg_el1(SYS_ELR); +#endif +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); @@ -70,6 +80,17 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); + + if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + __sysreg_save_el1_state_ext(ctxt); +} + +static __always_inline void +__sysreg_el2_return_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO + ctxt->pcc = read_cap_sysreg_el2(SYS_ELR); +#endif }
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -97,6 +118,18 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); }
+static __always_inline void +__sysreg_restore_el1_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO + if (cheri_is_valid(ctxt_cap_sys_reg(ctxt, CVBAR_EL1))) + write_cap_sysreg_el1(ctxt_cap_sys_reg(ctxt, CVBAR_EL1), SYS_VBAR); + else + write_cap_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); + write_cap_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); +#endif +} + static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); @@ -164,6 +197,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); + + if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + __sysreg_restore_el1_state_ext(ctxt); }
/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */ @@ -183,6 +219,22 @@ static inline u64 to_hw_pstate(const struct kvm_cpu_context *ctxt) return (ctxt->regs.pstate & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode; }
+static __always_inline void +__sysreg_restore_el2_return_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO + /* + * Cutting corners a bit here to avoid massive changes to + * KVM_GET_REGS/KVM_SET_REGS: for the time being vcpu remains + * unaware of capabilities + */ + if (cheri_is_valid(ctxt->pcc)) + write_cap_sysreg_el2(ctxt->pcc, SYS_ELR); + else + write_cap_sysreg_el2(ctxt->regs.pc, SYS_ELR); +#endif +} + static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) { u64 pstate = to_hw_pstate(ctxt); @@ -202,11 +254,13 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) pstate = PSR_MODE_EL2h | PSR_IL_BIT;
- write_sysreg_el2(ctxt->regs.pc, SYS_ELR); - write_sysreg_el2(pstate, SYS_SPSR); + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); + if (IS_ENABLED(CONFIG_ARM64_MORELLO)) + __sysreg_restore_el2_return_state_ext(ctxt); }
static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 7693a6757cd7..40bd4cd897a0 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -19,10 +19,16 @@ SYM_FUNC_START(__host_exit)
/* Store the host regs x2 and x3 */ stp x2, x3, [x0, #CPU_XREG_OFFSET(2)] +#ifdef CONFIG_ARM64_MORELLO + stp c2, c3, [x0, #CPU_CREG_OFFSET(2)] +#endif
/* Retrieve the host regs x0-x1 from the stack */ - ldp x2, x3, [sp], #16 // x0, x1 - +#ifdef CONFIG_ARM64_MORELLO + ldp c2, c3, [sp], #32 +#else + ldp x2, x3, [sp], #16 // x0, x1 +#endif /* Store the host regs x0-x1 and x4-x17 */ stp x2, x3, [x0, #CPU_XREG_OFFSET(0)] stp x4, x5, [x0, #CPU_XREG_OFFSET(4)] @@ -33,6 +39,16 @@ SYM_FUNC_START(__host_exit) stp x14, x15, [x0, #CPU_XREG_OFFSET(14)] stp x16, x17, [x0, #CPU_XREG_OFFSET(16)]
+#ifdef CONFIG_ARM64_MORELLO + stp c2, c3, [x0, #CPU_CREG_OFFSET(0)] + stp c4, c5, [x0, #CPU_CREG_OFFSET(4)] + stp c6, c7, [x0, #CPU_CREG_OFFSET(6)] + stp c8, c9, [x0, #CPU_CREG_OFFSET(8)] + stp c10, c11, [x0, #CPU_CREG_OFFSET(10)] + stp c12, c13, [x0, #CPU_CREG_OFFSET(12)] + stp c14, c15, [x0, #CPU_CREG_OFFSET(14)] + stp c16, c17, [x0, #CPU_CREG_OFFSET(16)] +#endif /* Store the host regs x18-x29, lr */ save_callee_saved_regs x0
@@ -75,19 +91,43 @@ __skip_pauth_restore: #endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
/* Restore host regs x0-x17 */ +#ifdef CONFIG_ARM64_MORELLO + sync_savged_regs_c_x x29, 0, 1, x2, x3 + sync_savged_regs_c_x x29, 2, 3, x0, x1 + sync_savged_regs_c_x x29, 4, 5, x0, x1 + sync_savged_regs_c_x x29, 6, 7, x0, x1 + + ldp c0, c1, [x29, #CPU_CREG_OFFSET(0)] + ldp c2, c3, [x29, #CPU_CREG_OFFSET(2)] + ldp c4, c5, [x29, #CPU_CREG_OFFSET(4)] + ldp c6, c7, [x29, #CPU_CREG_OFFSET(6)] +#else ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)] - +#endif /* x0-7 are use for panic arguments */ __host_enter_for_panic: +#ifdef CONFIG_ARM64_MORELLO + sync_savged_regs_c_x x29, 8, 9, x10, x11 + sync_savged_regs_c_x x29, 10, 11, x8, x9 + sync_savged_regs_c_x x29, 12, 13, x8, x9 + sync_savged_regs_c_x x29, 14, 15, x8, x9 + sync_savged_regs_c_x x29, 16, 17, x8, x9 + + ldp c8, c9, [x29, #CPU_CREG_OFFSET(8)] + ldp c10, c11, [x29, #CPU_CREG_OFFSET(10)] + ldp c12, c13, [x29, #CPU_CREG_OFFSET(12)] + ldp c14, c15, [x29, #CPU_CREG_OFFSET(14)] + ldp c16, c17, [x29, #CPU_CREG_OFFSET(16)] +#else ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)] - +#endif /* Restore host regs x18-x29, lr */ restore_callee_saved_regs x29
@@ -117,7 +157,10 @@ SYM_FUNC_START(__hyp_do_panic) adr_l lr, nvhe_hyp_panic_handler hyp_kimg_va lr, x6 msr elr_el2, lr - +#ifdef CONFIG_ARM64_MORELLO + cvtd clr, lr + msr celr_el2, clr +#endif mov x29, x0
#ifdef CONFIG_NVHE_EL2_DEBUG @@ -145,8 +188,11 @@ SYM_FUNC_START(__hyp_do_panic) SYM_FUNC_END(__hyp_do_panic)
SYM_FUNC_START(__host_hvc) +#ifndef CONFIG_ARM64_MORELLO ldp x0, x1, [sp] // Don't fixup the stack yet - +#else + ldp c0, c1, [sp] +#endif /* No stub for you, sonny Jim */ alternative_if ARM64_KVM_PROTECTED_MODE b __host_exit @@ -156,7 +202,11 @@ alternative_else_nop_endif cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit
+#ifndef CONFIG_ARM64_MORELLO add sp, sp, #16 +#else + add sp, sp, #32 +#endif /* * Compute the idmap address of __kvm_handle_stub_hvc and * jump there. @@ -171,7 +221,11 @@ SYM_FUNC_END(__host_hvc) .macro host_el1_sync_vect .align 7 .L__vect_start@: +#ifndef CONFIG_ARM64_MORELLO stp x0, x1, [sp, #-16]! +#else + stp c0, c1, [sp, #-32]! +#endif mrs x0, esr_el2 ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH cmp x0, #ESR_ELx_EC_HVC64 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 1cc06e6797bd..9b621692af2c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -154,7 +154,12 @@ alternative_else_nop_endif
/* Set the host vector */ ldr x0, =__kvm_hyp_host_vector +#ifndef CONFIG_ARM64_MORELLO msr vbar_el2, x0 +#else + cvtp c0, x0 + msr cvbar_el2, c0 +#endif
ret SYM_CODE_END(___kvm_hyp_init) @@ -228,6 +233,10 @@ SYM_CODE_START(__kvm_handle_stub_hvc)
/* This is where we're about to jump, staying at EL2 */ msr elr_el2, x1 +#ifdef CONFIG_ARM64_MORELLO + cvtp c1, x1 + msr celr_el2, c1 +#endif mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) msr spsr_el2, x0
@@ -260,7 +269,12 @@ alternative_else_nop_endif
/* Install stub vectors */ adr_l x5, __hyp_stub_vectors +#ifdef CONFIG_ARM64_MORELLO + cvtp c5, x5 + msr cvbar_el1, c5 +#else msr vbar_el2, x5 +#endif eret
1: /* Bad stub call */ diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index edd969a1f36b..15e14961a0bb 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -44,8 +44,8 @@ static void inject_undef64(struct kvm_vcpu *vcpu) __kvm_adjust_pc(vcpu);
write_sysreg_el1(esr, SYS_ESR); - write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); - write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + write_cap_sysreg_el1(read_cap_sysreg_el2(SYS_ELR), SYS_ELR); + write_cap_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); }
On 26/02/2024 10:20, Beata Michalska wrote:
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_asm.h | 102 +++++++++++++++++++-- arch/arm64/kernel/hyp-stub.S | 5 + arch/arm64/kvm/hyp/entry.S | 12 ++- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 58 +++++++++++- arch/arm64/kvm/hyp/nvhe/host.S | 66 +++++++++++-- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 14 +++ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 4 +- 8 files changed, 243 insertions(+), 22 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 4ca7ece15385..b76f29470389 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -353,10 +353,74 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
+#ifdef CONFIG_ARM64_MORELLO
- /*
* In theory this could be if/else but to play
* on the safe side for the time being ...
* to avoid clearing tag when storing non-capability
* to capability-tagged location
Not sure this is very clear. It looks like the main reason we need this whole dance (saving separately + merging when restoring) is because KVM happens to modify the saved X regs in places, like the kernel, and we use the merging approach for the same reason. Would probably be better to have this comment in the commit message as it applies to a lot of code in this patch.
*/
- str c18, [\ctxt, #CPU_CREG_OFFSET(18)]
- stp c19, c20, [\ctxt, #CPU_CREG_OFFSET(19)]
- stp c21, c22, [\ctxt, #CPU_CREG_OFFSET(21)]
- stp c23, c24, [\ctxt, #CPU_CREG_OFFSET(23)]
- stp c25, c26, [\ctxt, #CPU_CREG_OFFSET(25)]
- stp c27, c28, [\ctxt, #CPU_CREG_OFFSET(27)]
- stp c29, clr, [\ctxt, #CPU_CREG_OFFSET(29)]
+#endif .endm +#ifdef CONFIG_ARM64_MORELLO +.macro sync_regs_c_x nr:req, x:req
- cmp x\nr, \x
- b.eq .Lskip_sync@
- scvalue c\nr, c\nr, \x
+.Lskip_sync@: +.endm
Unless we can't include <asm/morello.h> for some reason, we could use the existing morello_merge_c_x instead.
+.macro sync_savged_regs_c_x ctxt, nr_1, nr_2, tmp_1, tmp_2
- ldp \tmp_1, \tmp_2, [\ctxt, #CPU_XREG_OFFSET(\nr_1)]
- ldp c\nr_1, c\nr_2, [\ctxt, #CPU_CREG_OFFSET(\nr_1)]
- sync_regs_c_x \nr_1, \tmp_1
- sync_regs_c_x \nr_2, \tmp_2
- stp c\nr_1, c\nr_2, [\ctxt, #CPU_CREG_OFFSET(\nr_1)]
+.endm +#endif
.macro restore_callee_saved_regs ctxt // We require \ctxt is not x18-x28 +#ifdef CONFIG_ARM64_MORELLO
- ldr x19, [\ctxt, #CPU_XREG_OFFSET(18)]
- ldr c18, [\ctxt, #CPU_CREG_OFFSET(18)]
- sync_regs_c_x 18, x19
- str c18, [\ctxt, #CPU_CREG_OFFSET(18)]
- sync_savged_regs_c_x \ctxt, 19, 20, x21, x22
- sync_savged_regs_c_x \ctxt, 21, 22, x19, x20
- sync_savged_regs_c_x \ctxt, 23, 24, x19, x20
- sync_savged_regs_c_x \ctxt, 25, 26, x19, x20
- sync_savged_regs_c_x \ctxt, 27, 28, x19, x20
- // Things get tricky here as we cannot use c30 for the sync here
- // Note: the context is in x29
- ldr x18, [\ctxt, #CPU_XREG_OFFSET(29)]
- ldr c19, [\ctxt, #CPU_CREG_OFFSET(29)]
- sync_regs_c_x 19, x18
- str c19, [\ctxt, #CPU_CREG_OFFSET(29)]
- ldr x18, [\ctxt, #CPU_XREG_OFFSET(30)]
- ldr c19, [\ctxt, #CPU_CREG_OFFSET(30)]
- sync_regs_c_x 19, x18
- str c19, [\ctxt, #CPU_CREG_OFFSET(30)]
- ldr c18, [\ctxt, #CPU_CREG_OFFSET(18)]
- ldp c19, c20, [\ctxt, #CPU_CREG_OFFSET(19)]
- ldp c21, c22, [\ctxt, #CPU_CREG_OFFSET(21)]
- ldp c23, c24, [\ctxt, #CPU_CREG_OFFSET(23)]
- ldp c25, c26, [\ctxt, #CPU_CREG_OFFSET(25)]
- ldp c27, c28, [\ctxt, #CPU_CREG_OFFSET(27)]
- ldp c29, c30, [\ctxt, #CPU_CREG_OFFSET(29)]
This feels more complicated than it needs to be, couldn't it be very similar to the restore code in kernel_exit (entry.S)? Two registers do need to be stashed and reloaded at the end, but apart from that it should be possible to handle everything with the merge macro (without systematically writing back). You may need to introduce one for working on a single register though (like morello_ldr_merge_c_x in entry.S), that would be useful for restore_sp_el0 as well.
Also nit: better to use clr everywhere for consistency (not c30).
+#else ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] @@ -364,16 +428,42 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +#endif .endm -.macro save_sp_el0 ctxt, tmp
- mrs \tmp, sp_el0
- str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
+.macro save_sp_el0 ctxt, nr +#ifdef CONFIG_ARM64_MORELLO
- mrs c\nr, csp_el0
- str c\nr, [\ctxt, #CPU_CSP_EL0_OFFSET]
- mrs c\nr, rcsp_el0
- str c\nr, [\ctxt, #CPU_RCSP_EL0_OFFSET]
+#endif
- mrs x\nr, sp_el0
- str x\nr, [\ctxt, #CPU_SP_EL0_OFFSET]
If the saved SP is ever read by KVM, then the Executive/Restricted selection needs to be done too, like in entry.S.
.endm -.macro restore_sp_el0 ctxt, tmp
- ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
- msr sp_el0, \tmp
+.macro restore_sp_el0 ctxt, nr_1, nr_2
It doesn't look like we're short on registers at the point where this is called, so might as well take a third temporary to simplify the macro (one for CSP, one for RCSP, one for SP).
+#ifdef CONFIG_ARM64_MORELLO
- morello_tst_cap_has_executive c\nr_1, x\nr_2
- b.eq .L_merge_rcsp@
- ldr c\nr_1, [\ctxt, #CPU_CSP_EL0_OFFSET]
- ldr x\nr_2, [\ctxt, #CPU_SP_EL0_OFFSET]
- sync_regs_c_x \nr_1, x\nr_2
- ldr c\nr_2, [\ctxt, #CPU_RCSP_EL0_OFFSET]
- b .L_restore@
+.L_merge_rcsp@:
- ldr c\nr_1, [\ctxt, #CPU_RCSP_EL0_OFFSET]
- ldr x\nr_2, [\ctxt, #CPU_SP_EL0_OFFSET]
- sync_regs_c_x \nr_2, x\nr_1
- ldr c\nr_1, [\ctxt, #CPU_CSP_EL0_OFFSET]
+.L_restore@:
- msr csp_el0, c\nr_1
- msr rcsp_el0, c\nr_2
+#else
- ldr x\nr_1, [\ctxt, #CPU_SP_EL0_OFFSET]
- msr sp_el0, x\nr_1
+#endif .endm #endif diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 49f0b7eb8abe..a6c649ac2e00 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -48,7 +48,12 @@ SYM_CODE_END(__hyp_stub_vectors) SYM_CODE_START_LOCAL(elx_sync) cmp x0, #HVC_SET_VECTORS b.ne 1f +#ifdef CONFIG_ARM64_MORELLO
- cvtp c1, x1
- msr cvbar_el2, c1
+#else msr vbar_el2, x1 +#endif b 9f 1: cmp x0, #HVC_FINALISE_EL2 diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index f3aa7738b477..77ee6759a8e6 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -32,7 +32,7 @@ SYM_FUNC_START(__guest_enter) save_callee_saved_regs x1 // Save hyp's sp_el0
- save_sp_el0 x1, x2
- save_sp_el0 x1, 2
// Now the hyp state is stored if we have a pending RAS SError it must // affect the host or hyp. If any asynchronous exception is pending we @@ -63,7 +63,7 @@ alternative_else_nop_endif ptrauth_switch_to_guest x29, x0, x1, x2 // Restore the guest's sp_el0
- restore_sp_el0 x29, x0
- restore_sp_el0 x29, 0, 1
// Restore guest regs x0-x17 ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] @@ -135,7 +135,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) save_callee_saved_regs x1 // Store the guest's sp_el0
- save_sp_el0 x1, x2
- save_sp_el0 x1, 2
adr_this_cpu x2, kvm_hyp_ctxt, x3 @@ -150,7 +150,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) mte_switch_to_hyp x1, x2, x3 // Restore hyp's sp_el0
- restore_sp_el0 x2, x3
- restore_sp_el0 x2, 3, 4
// Now restore the hyp regs restore_callee_saved_regs x2 @@ -208,6 +208,10 @@ abort_guest_exit_end: // restore the EL1 exception context so that we can report some // information. Merge the exception code with the SError pending bit. msr elr_el2, x2
Should be in #else after the #ifdef.
+#ifdef CONFIG_ARM64_MORELLO
- cvtp c2, x2
- msr celr_el2, c2
+#endif msr esr_el2, x3 msr spsr_el2, x4 orr x0, x0, x5 diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f99d8af0b9af..fe4c35144cee 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -733,12 +733,12 @@ static inline void __kvm_unexpected_el2_exception(void) continue; }
write_sysreg(fixup, elr_el2);
return; }write_sysreg_variant(fixup, elr_el2);
/* Trigger a panic after restoring the hyp context. */
- write_sysreg(__guest_exit_panic, elr_el2);
- write_sysreg_variant(__guest_exit_panic, elr_el2);
} #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index bb6b571ec627..23215827aef6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -9,6 +9,7 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> +#include <linux/cheri.h> #include <asm/kprobes.h> #include <asm/kvm_asm.h> @@ -37,6 +38,15 @@ static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) return kvm_has_mte(kern_hyp_va(vcpu->kvm)); } +static __always_inline void +__sysreg_save_el1_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO
- ctxt_cap_sys_reg(ctxt, CVBAR_EL1) = read_cap_sysreg_el1(SYS_VBAR);
- ctxt_cap_sys_reg(ctxt, CELR_EL1) = read_cap_sysreg_el1(SYS_ELR);
+#endif +}
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); @@ -70,6 +80,17 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
Does this still need to be done if we save CELR_EL1? In other words, does it have a purpose beyond being restored (which is redundant on Morello)? Same question for VBAR_EL1. I'd just #ifdef those otherwise, without adding new helpers.
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
__sysreg_save_el1_state_ext(ctxt);
+}
+static __always_inline void +__sysreg_el2_return_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO
- ctxt->pcc = read_cap_sysreg_el2(SYS_ELR);
+#endif } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -97,6 +118,18 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); } +static __always_inline void +__sysreg_restore_el1_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO
- if (cheri_is_valid(ctxt_cap_sys_reg(ctxt, CVBAR_EL1)))
I don't really understand this. This seems to be meant for guests that are Morello-unaware, but such guests do not enable Morello in the first place, so they use non-capability exception entry/return and only the 64-bit part of VBAR_EL1 is relevant (see section 2.13 of the Morello spec for more details).
write_cap_sysreg_el1(ctxt_cap_sys_reg(ctxt, CVBAR_EL1), SYS_VBAR);
- else
write_cap_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR);
- write_cap_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
+#endif +}
static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); @@ -164,6 +197,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
__sysreg_restore_el1_state_ext(ctxt);
} /* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */ @@ -183,6 +219,22 @@ static inline u64 to_hw_pstate(const struct kvm_cpu_context *ctxt) return (ctxt->regs.pstate & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode; } +static __always_inline void +__sysreg_restore_el2_return_state_ext(struct kvm_cpu_context *ctxt) +{ +#ifdef CONFIG_ARM64_MORELLO
- /*
* Cutting corners a bit here to avoid massive changes to
* KVM_GET_REGS/KVM_SET_REGS: for the time being vcpu remains
* unaware of capabilities
*/
- if (cheri_is_valid(ctxt->pcc))
write_cap_sysreg_el2(ctxt->pcc, SYS_ELR);
- else
write_cap_sysreg_el2(ctxt->regs.pc, SYS_ELR);
+#endif +}
static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) { u64 pstate = to_hw_pstate(ctxt); @@ -202,11 +254,13 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) pstate = PSR_MODE_EL2h | PSR_IL_BIT;
- write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
- write_sysreg_el2(pstate, SYS_SPSR);
- write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
- write_sysreg_el2(pstate, SYS_SPSR);
That seems to be purely whitespace change so I would leave it untouched.
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
__sysreg_restore_el2_return_state_ext(ctxt);
} static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 7693a6757cd7..40bd4cd897a0 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -19,10 +19,16 @@ SYM_FUNC_START(__host_exit) /* Store the host regs x2 and x3 */ stp x2, x3, [x0, #CPU_XREG_OFFSET(2)] +#ifdef CONFIG_ARM64_MORELLO
- stp c2, c3, [x0, #CPU_CREG_OFFSET(2)]
+#endif /* Retrieve the host regs x0-x1 from the stack */
- ldp x2, x3, [sp], #16 // x0, x1
+#ifdef CONFIG_ARM64_MORELLO
- ldp c2, c3, [sp], #32
+#else
- ldp x2, x3, [sp], #16 // x0, x1
+#endif /* Store the host regs x0-x1 and x4-x17 */ stp x2, x3, [x0, #CPU_XREG_OFFSET(0)] stp x4, x5, [x0, #CPU_XREG_OFFSET(4)] @@ -33,6 +39,16 @@ SYM_FUNC_START(__host_exit) stp x14, x15, [x0, #CPU_XREG_OFFSET(14)] stp x16, x17, [x0, #CPU_XREG_OFFSET(16)] +#ifdef CONFIG_ARM64_MORELLO
- stp c2, c3, [x0, #CPU_CREG_OFFSET(0)]
- stp c4, c5, [x0, #CPU_CREG_OFFSET(4)]
- stp c6, c7, [x0, #CPU_CREG_OFFSET(6)]
- stp c8, c9, [x0, #CPU_CREG_OFFSET(8)]
- stp c10, c11, [x0, #CPU_CREG_OFFSET(10)]
- stp c12, c13, [x0, #CPU_CREG_OFFSET(12)]
- stp c14, c15, [x0, #CPU_CREG_OFFSET(14)]
- stp c16, c17, [x0, #CPU_CREG_OFFSET(16)]
+#endif /* Store the host regs x18-x29, lr */ save_callee_saved_regs x0 @@ -75,19 +91,43 @@ __skip_pauth_restore: #endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */ /* Restore host regs x0-x17 */ +#ifdef CONFIG_ARM64_MORELLO
- sync_savged_regs_c_x x29, 0, 1, x2, x3
- sync_savged_regs_c_x x29, 2, 3, x0, x1
- sync_savged_regs_c_x x29, 4, 5, x0, x1
- sync_savged_regs_c_x x29, 6, 7, x0, x1
- ldp c0, c1, [x29, #CPU_CREG_OFFSET(0)]
- ldp c2, c3, [x29, #CPU_CREG_OFFSET(2)]
- ldp c4, c5, [x29, #CPU_CREG_OFFSET(4)]
- ldp c6, c7, [x29, #CPU_CREG_OFFSET(6)]
+#else ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
+#endif /* x0-7 are use for panic arguments */ __host_enter_for_panic: +#ifdef CONFIG_ARM64_MORELLO
- sync_savged_regs_c_x x29, 8, 9, x10, x11
- sync_savged_regs_c_x x29, 10, 11, x8, x9
- sync_savged_regs_c_x x29, 12, 13, x8, x9
- sync_savged_regs_c_x x29, 14, 15, x8, x9
- sync_savged_regs_c_x x29, 16, 17, x8, x9
- ldp c8, c9, [x29, #CPU_CREG_OFFSET(8)]
- ldp c10, c11, [x29, #CPU_CREG_OFFSET(10)]
- ldp c12, c13, [x29, #CPU_CREG_OFFSET(12)]
- ldp c14, c15, [x29, #CPU_CREG_OFFSET(14)]
- ldp c16, c17, [x29, #CPU_CREG_OFFSET(16)]
+#else ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
+#endif /* Restore host regs x18-x29, lr */ restore_callee_saved_regs x29 @@ -117,7 +157,10 @@ SYM_FUNC_START(__hyp_do_panic) adr_l lr, nvhe_hyp_panic_handler hyp_kimg_va lr, x6 msr elr_el2, lr
+#ifdef CONFIG_ARM64_MORELLO
- cvtd clr, lr
s/cvtd/cvtp/, and the msr elr_el2 should be in #else.
- msr celr_el2, clr
+#endif mov x29, x0 #ifdef CONFIG_NVHE_EL2_DEBUG @@ -145,8 +188,11 @@ SYM_FUNC_START(__hyp_do_panic) SYM_FUNC_END(__hyp_do_panic) SYM_FUNC_START(__host_hvc) +#ifndef CONFIG_ARM64_MORELLO
Nit: better use #ifdef to avoid the double negation in #else (there are a few instances of this).
ldp x0, x1, [sp] // Don't fixup the stack yet
+#else
- ldp c0, c1, [sp]
+#endif /* No stub for you, sonny Jim */ alternative_if ARM64_KVM_PROTECTED_MODE b __host_exit @@ -156,7 +202,11 @@ alternative_else_nop_endif cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit +#ifndef CONFIG_ARM64_MORELLO add sp, sp, #16 +#else
- add sp, sp, #32
+#endif /* * Compute the idmap address of __kvm_handle_stub_hvc and * jump there. @@ -171,7 +221,11 @@ SYM_FUNC_END(__host_hvc) .macro host_el1_sync_vect .align 7 .L__vect_start@: +#ifndef CONFIG_ARM64_MORELLO stp x0, x1, [sp, #-16]! +#else
- stp c0, c1, [sp, #-32]!
+#endif mrs x0, esr_el2 ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH cmp x0, #ESR_ELx_EC_HVC64 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 1cc06e6797bd..9b621692af2c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -154,7 +154,12 @@ alternative_else_nop_endif /* Set the host vector */ ldr x0, =__kvm_hyp_host_vector +#ifndef CONFIG_ARM64_MORELLO msr vbar_el2, x0 +#else
- cvtp c0, x0
- msr cvbar_el2, c0
+#endif ret SYM_CODE_END(___kvm_hyp_init) @@ -228,6 +233,10 @@ SYM_CODE_START(__kvm_handle_stub_hvc) /* This is where we're about to jump, staying at EL2 */ msr elr_el2, x1 +#ifdef CONFIG_ARM64_MORELLO
- cvtp c1, x1
- msr celr_el2, c1
+#endif mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) msr spsr_el2, x0 @@ -260,7 +269,12 @@ alternative_else_nop_endif /* Install stub vectors */ adr_l x5, __hyp_stub_vectors +#ifdef CONFIG_ARM64_MORELLO
- cvtp c5, x5
- msr cvbar_el1, c5
cvbar_el2 surely?
Kevin
+#else msr vbar_el2, x5 +#endif eret 1: /* Bad stub call */ diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index edd969a1f36b..15e14961a0bb 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -44,8 +44,8 @@ static void inject_undef64(struct kvm_vcpu *vcpu) __kvm_adjust_pc(vcpu); write_sysreg_el1(esr, SYS_ESR);
- write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
- write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
- write_cap_sysreg_el1(read_cap_sysreg_el2(SYS_ELR), SYS_ELR);
- write_cap_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
}
Add support for managing kvm virtual machines through plain aarch64 tools. No support for purecap guests just yet.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- Documentation/virt/kvm/api.rst | 10 +- arch/arm64/include/uapi/asm/kvm.h | 6 +- arch/arm64/kvm/arch_timer.c | 4 +- arch/arm64/kvm/arm.c | 205 ++++++++++++++++-------- arch/arm64/kvm/guest.c | 10 +- arch/arm64/kvm/hypercalls.c | 4 +- arch/arm64/kvm/pmu-emul.c | 8 +- arch/arm64/kvm/sys_regs.c | 10 +- arch/arm64/kvm/vgic/vgic-its.c | 8 +- arch/arm64/kvm/vgic/vgic-kvm-device.c | 11 +- include/linux/kvm_host.h | 8 +- include/uapi/linux/kvm.h | 10 +- tools/arch/arm64/include/uapi/asm/kvm.h | 6 +- tools/include/uapi/linux/kvm.h | 12 +- virt/kvm/Kconfig | 2 +- virt/kvm/kvm_main.c | 99 +++++++++--- virt/kvm/vfio.c | 2 +- 17 files changed, 272 insertions(+), 143 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 7025b3751027..7a8370fbb357 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2308,8 +2308,8 @@ code being returned in a specific situation.) ::
struct kvm_one_reg { - __u64 id; - __u64 addr; + __u64 id; + __kernel_uintptr_t addr; };
Using this ioctl, a single vcpu register can be set to a specific value @@ -6160,9 +6160,9 @@ applied. #define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8)
struct reg_mask_range { - __u64 addr; /* Pointer to mask array */ - __u32 range; /* Requested range */ - __u32 reserved[13]; + __kernel_uintptr_t addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; };
This ioctl copies the writable masks for a selected range of registers to diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 89d2fc872d9f..4f854ef1f76d 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -532,9 +532,9 @@ struct kvm_smccc_filter { #define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8)
struct reg_mask_range { - __u64 addr; /* Pointer to mask array */ - __u32 range; /* Requested range */ - __u32 reserved[13]; + __kernel_uintptr_t addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; };
#endif diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 13ba691b848f..8b70eeeebf33 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1559,7 +1559,7 @@ void kvm_timer_init_vhe(void)
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - int __user *uaddr = (int __user *)(long)attr->addr; + int __user *uaddr = (int __user *)attr->addr; int irq, idx, ret = 0;
if (!irqchip_in_kernel(vcpu->kvm)) @@ -1611,7 +1611,7 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - int __user *uaddr = (int __user *)(long)attr->addr; + int __user *uaddr = (int __user *)attr->addr; struct arch_timer_context *timer; int irq;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bc945eac681d..a49f7603f03e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1512,33 +1512,38 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm_vcpu *vcpu = filp->private_data; - void __user *argp = (void __user *)arg; + void __user *argp; struct kvm_device_attr attr; long r;
- switch (ioctl) { - case KVM_ARM_VCPU_INIT: { - struct kvm_vcpu_init init; - - r = -EFAULT; - if (copy_from_user(&init, argp, sizeof(init))) - break; + argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg;
- r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); - break; - } - case KVM_SET_ONE_REG: - case KVM_GET_ONE_REG: { + switch (_IOC_NR(ioctl)) { + case _IOC_NR(KVM_SET_ONE_REG): + case _IOC_NR(KVM_GET_ONE_REG): { struct kvm_one_reg reg;
r = -ENOEXEC; if (unlikely(!kvm_vcpu_initialized(vcpu))) - break; + goto done;
r = -EFAULT; - if (copy_from_user(®, argp, sizeof(reg))) - break;
+ if (!in_compat64_syscall()) { + if (copy_from_user_with_ptr(®, argp, sizeof(reg))) + goto done; + } else { + struct { + __u64 id; + compat_uptr_t addr; + } __reg; + + if (copy_from_user(&__reg, argp, sizeof(__reg))) + goto done; + + reg.id = __reg.id; + reg.addr = (__kernel_uintptr_t)compat_ptr(__reg.addr); + } /* * We could owe a reset due to PSCI. Handle the pending reset * here to ensure userspace register accesses are ordered after @@ -1547,10 +1552,47 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) kvm_reset_vcpu(vcpu);
- if (ioctl == KVM_SET_ONE_REG) + if (_IOC_NR(ioctl) == _IOC_NR(KVM_SET_ONE_REG)) r = kvm_arm_set_reg(vcpu, ®); else r = kvm_arm_get_reg(vcpu, ®); + + goto done; + } + case _IOC_NR(KVM_SET_DEVICE_ATTR): + case _IOC_NR(KVM_GET_DEVICE_ATTR): + case _IOC_NR(KVM_HAS_DEVICE_ATTR): { + r = -EFAULT; + + if (!in_compat64_syscall()) { + if (copy_from_user_with_ptr(&attr, argp, sizeof(attr))) + goto done; + } else { + r = kvm_get_device_attr_compat(ioctl, arg, &attr); + if (r) goto done; + } + + if (_IOC_NR(ioctl) == _IOC_NR(KVM_SET_DEVICE_ATTR)) + r = kvm_arm_vcpu_set_attr(vcpu, &attr); + else if (_IOC_NR(ioctl) == _IOC_NR(KVM_GET_DEVICE_ATTR)) + r = kvm_arm_vcpu_get_attr(vcpu, &attr); + else + r = kvm_arm_vcpu_has_attr(vcpu, &attr); + goto done; + } + default: + break; + } + + switch (ioctl) { + case KVM_ARM_VCPU_INIT: { + struct kvm_vcpu_init init; + + r = -EFAULT; + if (copy_from_user(&init, argp, sizeof(init))) + break; + + r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); break; } case KVM_GET_REG_LIST: { @@ -1579,27 +1621,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); break; } - case KVM_SET_DEVICE_ATTR: { - r = -EFAULT; - if (copy_from_user(&attr, argp, sizeof(attr))) - break; - r = kvm_arm_vcpu_set_attr(vcpu, &attr); - break; - } - case KVM_GET_DEVICE_ATTR: { - r = -EFAULT; - if (copy_from_user(&attr, argp, sizeof(attr))) - break; - r = kvm_arm_vcpu_get_attr(vcpu, &attr); - break; - } - case KVM_HAS_DEVICE_ATTR: { - r = -EFAULT; - if (copy_from_user(&attr, argp, sizeof(attr))) - break; - r = kvm_arm_vcpu_has_attr(vcpu, &attr); - break; - } case KVM_GET_VCPU_EVENTS: { struct kvm_vcpu_events events;
@@ -1633,7 +1654,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, default: r = -EINVAL; } - +done: return r; }
@@ -1678,9 +1699,88 @@ static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm *kvm = filp->private_data; - void __user *argp = (void __user *)arg; + void __user *argp; struct kvm_device_attr attr;
+ argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg; + + switch (_IOC_NR(ioctl)) { + case _IOC_NR(KVM_ARM_MTE_COPY_TAGS): { + struct kvm_arm_copy_mte_tags copy_tags; + + if (!in_compat64_syscall()) { + if (copy_from_user_with_ptr(©_tags, argp, + sizeof(copy_tags))) + return -EFAULT; + } else { + struct { + __u64 guest_ipa; + __u64 length; + compat_uptr_t addr; + __u64 flags; + __u64 reserved[2]; + } __copy_tags; + + if (copy_from_user(&__copy_tags, argp, + sizeof(__copy_tags))) + return -EFAULT; + + copy_tags.guest_ipa = __copy_tags.guest_ipa; + copy_tags.length = __copy_tags.length; + copy_tags.addr = compat_ptr(__copy_tags.addr); + copy_tags.flags = __copy_tags.flags; + + memcpy(copy_tags.reserved, __copy_tags.reserved, + sizeof(__copy_tags.reserved)); + } + + return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); + } + case _IOC_NR(KVM_HAS_DEVICE_ATTR): + case _IOC_NR(KVM_SET_DEVICE_ATTR): { + if (!in_compat64_syscall()) { + if (copy_from_user_with_ptr(&attr, argp, sizeof(attr))) + return -EFAULT; + } else { + int ret; + + ret = kvm_get_device_attr_compat(ioctl, arg, &attr); + if (ret) + return ret; + } + + return _IOC_NR(ioctl) == _IOC_NR(KVM_HAS_DEVICE_ATTR) + ? kvm_vm_has_attr(kvm, &attr) + : kvm_vm_set_attr(kvm, &attr); + } + case _IOC_NR(KVM_ARM_GET_REG_WRITABLE_MASKS): { + struct reg_mask_range range; + + if (!in_compat64_syscall()) { + if (copy_from_user_with_ptr(&range, argp, sizeof(range))) + return -EFAULT; + } else { + struct reg_mask_range { + compat_uptr_t addr; + __u32 range; + __u32 reserved[13]; + } __range; + + if (copy_from_user(&__range, argp, sizeof(__range))) + return -EFAULT; + + range.addr = (__kernel_uintptr_t)compat_ptr(__range.addr); + range.range = __range.range; + + memcpy(range.reserved, __range.reserved, + sizeof(__range.reserved)); + } + return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range); + } + default: + break; + } + switch (ioctl) { case KVM_CREATE_IRQCHIP: { int ret; @@ -1708,13 +1808,6 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg)
return 0; } - case KVM_ARM_MTE_COPY_TAGS: { - struct kvm_arm_copy_mte_tags copy_tags; - - if (copy_from_user(©_tags, argp, sizeof(copy_tags))) - return -EFAULT; - return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); - } case KVM_ARM_SET_COUNTER_OFFSET: { struct kvm_arm_counter_offset offset;
@@ -1722,25 +1815,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) return -EFAULT; return kvm_vm_ioctl_set_counter_offset(kvm, &offset); } - case KVM_HAS_DEVICE_ATTR: { - if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT; - - return kvm_vm_has_attr(kvm, &attr); - } - case KVM_SET_DEVICE_ATTR: { - if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT;
- return kvm_vm_set_attr(kvm, &attr); - } - case KVM_ARM_GET_REG_WRITABLE_MASKS: { - struct reg_mask_range range; - - if (copy_from_user(&range, argp, sizeof(range))) - return -EFAULT; - return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range); - } default: return -EINVAL; } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index aaf1d4939739..13c7813ed9ca 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -191,7 +191,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) * array. Hence below, nr_regs is the number of entries, and * off the index in the "array". */ - __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + __u32 __user *uaddr = (__u32 __user *)reg->addr; int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); void *addr; u32 off; @@ -214,7 +214,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + __u32 __user *uaddr = (__u32 __user *)reg->addr; int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); __uint128_t tmp; void *valp = &tmp, *addr; @@ -628,7 +628,7 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - void __user *uaddr = (void __user *)(long)reg->addr; + void __user *uaddr = (void __user *)reg->addr; u64 val; int ret;
@@ -641,7 +641,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - void __user *uaddr = (void __user *)(long)reg->addr; + void __user *uaddr = (void __user *)reg->addr; u64 val;
val = kvm_arm_timer_get_reg(vcpu, reg->id); @@ -786,7 +786,7 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL;
switch (reg->id & KVM_REG_ARM_COPROC_MASK) { - case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); + case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); case KVM_REG_ARM_FW: case KVM_REG_ARM_FW_FEAT_BMAP: return kvm_arm_set_fw_reg(vcpu, reg); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5763d979d8ca..8d3c96be4a4f 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -476,7 +476,7 @@ static int get_kernel_wa_level(u64 regid) int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; - void __user *uaddr = (void __user *)(long)reg->addr; + void __user *uaddr = (void __user *)reg->addr; u64 val;
switch (reg->id) { @@ -550,7 +550,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - void __user *uaddr = (void __user *)(long)reg->addr; + void __user *uaddr = (void __user *)reg->addr; u64 val; int wa_level;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fe99b3dab6ce..5ff14b6dedc1 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -992,7 +992,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
switch (attr->attr) { case KVM_ARM_VCPU_PMU_V3_IRQ: { - int __user *uaddr = (int __user *)(long)attr->addr; + int __user *uaddr = (int __user *)attr->addr; int irq;
if (!irqchip_in_kernel(kvm)) @@ -1028,7 +1028,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) */ nr_events = __kvm_pmu_event_mask(pmuver) + 1;
- uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; + uaddr = (struct kvm_pmu_event_filter __user *)attr->addr;
if (copy_from_user(&filter, uaddr, sizeof(filter))) return -EFAULT; @@ -1066,7 +1066,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return 0; } case KVM_ARM_VCPU_PMU_V3_SET_PMU: { - int __user *uaddr = (int __user *)(long)attr->addr; + int __user *uaddr = (int __user *)attr->addr; int pmu_id;
if (get_user(pmu_id, uaddr)) @@ -1085,7 +1085,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { switch (attr->attr) { case KVM_ARM_VCPU_PMU_V3_IRQ: { - int __user *uaddr = (int __user *)(long)attr->addr; + int __user *uaddr = (int __user *)attr->addr; int irq;
if (!irqchip_in_kernel(vcpu->kvm)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1c5d24fb7733..2146e2e8ace4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3541,7 +3541,7 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { - u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; + u64 __user *uaddr = (u64 __user *)reg->addr; const struct sys_reg_desc *r; u64 val; int ret; @@ -3559,13 +3559,12 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
if (!ret) ret = put_user(val, uaddr); - return ret; }
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - void __user *uaddr = (void __user *)(unsigned long)reg->addr; + void __user *uaddr = (void __user *)reg->addr; int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) @@ -3582,7 +3581,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { - u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; + u64 __user *uaddr = (u64 __user *)reg->addr; const struct sys_reg_desc *r; u64 val; int ret; @@ -3609,7 +3608,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - void __user *uaddr = (void __user *)(unsigned long)reg->addr; + void __user *uaddr = (void __user *)reg->addr; int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) @@ -3618,7 +3617,6 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg err = set_invariant_sys_reg(reg->id, uaddr); if (err != -ENOENT) return err; - return kvm_sys_reg_set_user(vcpu, reg, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2dad2d095160..adea62ba0fe6 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2826,7 +2826,7 @@ static int vgic_its_set_attr(struct kvm_device *dev,
switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 __user *uaddr = (u64 __user *)attr->addr; unsigned long type = (unsigned long)attr->attr; u64 addr;
@@ -2846,7 +2846,7 @@ static int vgic_its_set_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_CTRL: return vgic_its_ctrl(dev->kvm, its, attr->attr); case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 __user *uaddr = (u64 __user *)attr->addr; u64 reg;
if (get_user(reg, uaddr)) @@ -2865,7 +2865,7 @@ static int vgic_its_get_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_ADDR: { struct vgic_its *its = dev->private; u64 addr = its->vgic_its_base; - u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 __user *uaddr = (u64 __user *)attr->addr; unsigned long type = (unsigned long)attr->attr;
if (type != KVM_VGIC_ITS_ADDR_TYPE) @@ -2876,7 +2876,7 @@ static int vgic_its_get_attr(struct kvm_device *dev, break; } case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 __user *uaddr = (u64 __user *)attr->addr; u64 reg; int ret;
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index f48b8dab8b3d..6608a25c95bc 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -216,7 +216,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, r = kvm_vgic_addr(dev->kvm, attr, true); return (r == -ENODEV) ? -ENXIO : r; case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 __user *uaddr = (u32 __user *)attr->addr; u32 val; int ret = 0;
@@ -292,7 +292,7 @@ static int vgic_get_common_attr(struct kvm_device *dev, r = kvm_vgic_addr(dev->kvm, attr, false); return (r == -ENODEV) ? -ENXIO : r; case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 __user *uaddr = (u32 __user *)attr->addr;
r = put_user(dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS, uaddr); @@ -359,7 +359,7 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, bool is_write) { - u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + u32 __user *uaddr = (u32 __user *)attr->addr; struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; @@ -533,7 +533,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, }
if (uaccess && is_write) { - u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + u32 __user *uaddr = (u32 __user *)attr->addr; if (get_user(val, uaddr)) return -EFAULT; } @@ -552,6 +552,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, goto out; }
+ switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &val); @@ -588,7 +589,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_unlock(&dev->kvm->lock);
if (!ret && uaccess && !is_write) { - u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + u32 __user *uaddr = (u32 __user *)attr->addr; ret = put_user(val, uaddr); }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b011b4ec2ddf..c2f743462148 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1215,9 +1215,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, #define __kvm_get_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ - typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ + typeof(v) __user *__uaddr; \ int __ret = -EFAULT; \ \ + __uaddr = uaddr_to_user_ptr(__addr + offset); \ if (!kvm_is_error_hva(__addr)) \ __ret = get_user(v, __uaddr); \ __ret; \ @@ -1235,9 +1236,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, #define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ - typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ + typeof(v) __user *__uaddr; \ int __ret = -EFAULT; \ \ + __uaddr = uaddr_to_user_ptr(__addr + offset); \ if (!kvm_is_error_hva(__addr)) \ __ret = put_user(v, __uaddr); \ if (!__ret) \ @@ -1405,6 +1407,8 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
+int kvm_get_device_attr_compat(unsigned int ioctl, unsigned long arg, + struct kvm_device_attr *attr); void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 211b86de35ac..1e80277bfef5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1392,7 +1392,7 @@ struct kvm_reg_list {
struct kvm_one_reg { __u64 id; - __u64 addr; + __kernel_uintptr_t addr; };
#define KVM_MSI_VALID_DEVID (1U << 0) @@ -1422,10 +1422,10 @@ struct kvm_create_device { };
struct kvm_device_attr { - __u32 flags; /* no flags currently defined */ - __u32 group; /* device-defined */ - __u64 attr; /* group-defined */ - __u64 addr; /* userspace address of attr data */ + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __kernel_uintptr_t addr; /* userspace address of attr data */ };
#define KVM_DEV_VFIO_FILE 1 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 89d2fc872d9f..4f854ef1f76d 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -532,9 +532,9 @@ struct kvm_smccc_filter { #define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8)
struct reg_mask_range { - __u64 addr; /* Pointer to mask array */ - __u32 range; /* Requested range */ - __u32 reserved[13]; + __kernel_uintptr_t addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; };
#endif diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 211b86de35ac..fd70c2bdf989 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1391,8 +1391,8 @@ struct kvm_reg_list { };
struct kvm_one_reg { - __u64 id; - __u64 addr; + __u64 id; + __kernel_uintptr_t addr; };
#define KVM_MSI_VALID_DEVID (1U << 0) @@ -1422,10 +1422,10 @@ struct kvm_create_device { };
struct kvm_device_attr { - __u32 flags; /* no flags currently defined */ - __u32 group; /* device-defined */ - __u64 attr; /* group-defined */ - __u64 addr; /* userspace address of attr data */ + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __kernel_uintptr_t addr; /* userspace address of attr data */ };
#define KVM_DEV_VFIO_FILE 1 diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 484d0873061c..b6ed8ed3cc4d 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -70,7 +70,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
config KVM_COMPAT def_bool y - depends on KVM && COMPAT && !(S390 || ARM64 || RISCV) + depends on KVM && COMPAT && !(S390 || RISCV || (ARM64 &&!ARM64_MORELLO))
config HAVE_KVM_IRQ_BYPASS bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d79fd04d8b9e..bbddb168430c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -120,7 +120,14 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, #ifdef CONFIG_KVM_COMPAT static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +#ifndef CONFIG_COMPAT64 #define KVM_COMPAT(c) .compat_ioctl = (c) +#define KVM_COMPAT_DIRECT(c) KVM_COMPAT(c) +#else +#define KVM_COMPAT(c) .compat_ioctl = compat_ptr_ioctl, \ + .unlocked_ioctl = (c) +#define KVM_COMPAT_DIRECT(c) .compat_ioctl = (c) +#endif #else /* * For architectures that don't implement a compat infrastructure, @@ -138,6 +145,7 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file) } #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ .open = kvm_no_compat_open +#define KVM_COMPAT_DIRECT(c) KVM_COMPAT(c) #endif static int hardware_enable_all(void); static void hardware_disable_all(void); @@ -3885,7 +3893,7 @@ static struct file_operations kvm_vcpu_fops = { .unlocked_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap, .llseek = noop_llseek, - KVM_COMPAT(kvm_vcpu_compat_ioctl), + KVM_COMPAT_DIRECT(kvm_vcpu_compat_ioctl), };
/* @@ -4110,7 +4118,7 @@ static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm_vcpu *vcpu = filp->private_data; - void __user *argp = (void __user *)arg; + void __user *argp; int r; struct kvm_fpu *fpu = NULL; struct kvm_sregs *kvm_sregs = NULL; @@ -4121,6 +4129,7 @@ static long kvm_vcpu_ioctl(struct file *filp, if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) return -EINVAL;
+ argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg; /* * Some architectures have vcpu ioctls that are asynchronous to vcpu * execution; mutex_lock() would break them. @@ -4317,6 +4326,45 @@ static long kvm_vcpu_ioctl(struct file *filp, return r; }
+int kvm_get_device_attr_compat(unsigned int ioctl, unsigned long arg, + struct kvm_device_attr *attr) +{ + int ret = -EINVAL; +#ifdef CONFIG_KVM_COMPAT + if (!in_compat64_syscall()) + return ret; + + switch (_IOC_NR(ioctl)) { + case _IOC_NR(KVM_HAS_DEVICE_ATTR): + case _IOC_NR(KVM_GET_DEVICE_ATTR): + case _IOC_NR(KVM_SET_DEVICE_ATTR): { + struct { + __u32 flags; + __u32 group; + __u64 attr; + compat_uptr_t addr; + } __attr; + + if (_IOC_SIZE(ioctl) != sizeof(__attr)) + break; + + ret = -EFAULT; + if (copy_from_user(&__attr, compat_ptr(arg), sizeof(__attr))) + break; + + attr->flags = __attr.flags; + attr->group = __attr.group; + attr->attr = __attr.attr; + attr->addr = (__kernel_uintptr_t)compat_ptr(__attr.addr); + ret = 0; + } + default: + break; + } +#endif + return ret; +} + #ifdef CONFIG_KVM_COMPAT static long kvm_vcpu_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -4370,37 +4418,38 @@ static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) return -ENODEV; }
-static int kvm_device_ioctl_attr(struct kvm_device *dev, +static inline int kvm_device_ioctl_attr(struct kvm_device *dev, int (*accessor)(struct kvm_device *dev, struct kvm_device_attr *attr), - user_uintptr_t arg) + struct kvm_device_attr *attr) { - struct kvm_device_attr attr; - if (!accessor) return -EPERM;
- if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) - return -EFAULT; - - return accessor(dev, &attr); + return accessor(dev, attr); }
static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm_device *dev = filp->private_data; + struct kvm_device_attr attr; + int ret;
if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) return -EIO;
- switch (ioctl) { - case KVM_SET_DEVICE_ATTR: - return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); - case KVM_GET_DEVICE_ATTR: - return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); - case KVM_HAS_DEVICE_ATTR: - return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); + ret = kvm_get_device_attr_compat(ioctl, arg, &attr); + if (ret) + return ret; + + switch (_IOC_NR(ioctl)) { + case _IOC_NR(KVM_SET_DEVICE_ATTR): + return kvm_device_ioctl_attr(dev, dev->ops->set_attr, &attr); + case _IOC_NR(KVM_GET_DEVICE_ATTR): + return kvm_device_ioctl_attr(dev, dev->ops->get_attr, &attr); + case _IOC_NR(KVM_HAS_DEVICE_ATTR): + return kvm_device_ioctl_attr(dev, dev->ops->has_attr, &attr); default: if (dev->ops->ioctl) return dev->ops->ioctl(dev, ioctl, arg); @@ -4787,11 +4836,13 @@ static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm *kvm = filp->private_data; - void __user *argp = (void __user *)arg; + void __user *argp; int r;
if (kvm->mm != current->mm || kvm->vm_dead) return -EIO; + + argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg; switch (ioctl) { case KVM_CREATE_VCPU: r = kvm_vm_ioctl_create_vcpu(kvm, arg); @@ -5014,13 +5065,13 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (r != -ENOTTY) return r;
- switch (ioctl) { + switch (_IOC_NR(ioctl)) { #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CLEAR_DIRTY_LOG: { + case _IOC_NR(KVM_CLEAR_DIRTY_LOG): { struct compat_kvm_clear_dirty_log compat_log; struct kvm_clear_dirty_log log;
- if (copy_from_user(&compat_log, (void __user *)arg, + if (copy_from_user(&compat_log, compat_ptr(arg), sizeof(compat_log))) return -EFAULT; log.slot = compat_log.slot; @@ -5033,11 +5084,11 @@ static long kvm_vm_compat_ioctl(struct file *filp, break; } #endif - case KVM_GET_DIRTY_LOG: { + case _IOC_NR(KVM_GET_DIRTY_LOG): { struct compat_kvm_dirty_log compat_log; struct kvm_dirty_log log;
- if (copy_from_user(&compat_log, (void __user *)arg, + if (copy_from_user(&compat_log, compat_ptr(arg), sizeof(compat_log))) return -EFAULT; log.slot = compat_log.slot; @@ -5059,7 +5110,7 @@ static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .llseek = noop_llseek, - KVM_COMPAT(kvm_vm_compat_ioctl), + KVM_COMPAT_DIRECT(kvm_vm_compat_ioctl), };
bool file_is_kvm(struct file *file) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index ca24ce120906..304abc96272a 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -303,7 +303,7 @@ static int kvm_vfio_set_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_VFIO_FILE: return kvm_vfio_set_file(dev, attr->attr, - u64_to_user_ptr(attr->addr)); + as_user_ptr(attr->addr)); }
return -ENXIO;
On 26/02/2024 10:20, Beata Michalska wrote:
Add support for managing kvm virtual machines through plain aarch64 tools. No support for purecap guests just yet.
What this patch really does is adding purecap support to the KVM ioctls, as well as compat64 handling to keep it working. I would prefer doing each step in its own patch (first compat64 handling then purecap support), like we've done for io_uring, BPF, etc.
[...]
@@ -786,7 +786,7 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
- case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
- case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
Let's avoid unnecessary whitespace changes (however logical).
case KVM_REG_ARM_FW: case KVM_REG_ARM_FW_FEAT_BMAP: return kvm_arm_set_fw_reg(vcpu, reg);
[...] diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b011b4ec2ddf..c2f743462148 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1215,9 +1215,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, #define __kvm_get_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \
- typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
- typeof(v) __user *__uaddr; \ int __ret = -EFAULT; \ \
- __uaddr = uaddr_to_user_ptr(__addr + offset); \ if (!kvm_is_error_hva(__addr)) \ __ret = get_user(v, __uaddr); \ __ret; \
@@ -1235,9 +1236,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, #define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \
- typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
- typeof(v) __user *__uaddr; \ int __ret = -EFAULT; \ \
- __uaddr = uaddr_to_user_ptr(__addr + offset); \
It does look like we have to create a capability ourselves here, and we're in control of the address so that's ok. uaddr_to_user_ptr() should not be used in such situations though, it is only meant as a temporary workaround for user-provided addresses. Nowadays we should use make_user_ptr_*() instead (which restricts both bounds and permissions), see [1].
Also these changes look unrelated to the ioctl adaptations, I think they belong to a separate patch.
[1] https://git.morello-project.org/morello/kernel/linux/-/blob/morello/master/D...
if (!kvm_is_error_hva(__addr)) \ __ret = put_user(v, __uaddr); \ if (!__ret) \
[...]
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 484d0873061c..b6ed8ed3cc4d 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -70,7 +70,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y
depends on KVM && COMPAT && !(S390 || ARM64 || RISCV)
depends on KVM && COMPAT && !(S390 || RISCV || (ARM64 &&!ARM64_MORELLO))
What matters is not so much that ARM64_MORELLO is selected, but rather that PCuABI is selected (regardless of the architecture). I think the condition could be something like:
KVM && COMPAT && (CHERI_PURECAP_UABI || !(S390 || ARM64 || RISCV))
config HAVE_KVM_IRQ_BYPASS bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d79fd04d8b9e..bbddb168430c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -120,7 +120,14 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, #ifdef CONFIG_KVM_COMPAT static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +#ifndef CONFIG_COMPAT64 #define KVM_COMPAT(c) .compat_ioctl = (c) +#define KVM_COMPAT_DIRECT(c) KVM_COMPAT(c) +#else +#define KVM_COMPAT(c) .compat_ioctl = compat_ptr_ioctl, \
I don't think there is any issue with using compat_ptr_ioctl regardless of the kind of compat. The compat_ptr() conversion is not actually specific to compat64, it just happens that very few compat ioctl handlers already do the right thing.
Specifically, what I'm suggesting is to leave all of these macros unchanged, and for kvm_device_ioctl and kvm_dev_ioctl, use KVM_COMPAT(compat_ptr_ioctl).
.unlocked_ioctl = (c)
It seems to me that this additional .unlocked_ioctl assignment does nothing (it is already present in the struct definitions where KVM_COMPAT() is used). Also worth noting that overriding an initialiser can trigger a warning (though not in the default build configuration).
+#define KVM_COMPAT_DIRECT(c) .compat_ioctl = (c) +#endif #else /*
- For architectures that don't implement a compat infrastructure,
@@ -138,6 +145,7 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file) } #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ .open = kvm_no_compat_open +#define KVM_COMPAT_DIRECT(c) KVM_COMPAT(c) #endif static int hardware_enable_all(void); static void hardware_disable_all(void); @@ -3885,7 +3893,7 @@ static struct file_operations kvm_vcpu_fops = { .unlocked_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap, .llseek = noop_llseek,
- KVM_COMPAT(kvm_vcpu_compat_ioctl),
- KVM_COMPAT_DIRECT(kvm_vcpu_compat_ioctl),
}; /* @@ -4110,7 +4118,7 @@ static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm_vcpu *vcpu = filp->private_data;
- void __user *argp = (void __user *)arg;
- void __user *argp; int r; struct kvm_fpu *fpu = NULL; struct kvm_sregs *kvm_sregs = NULL;
@@ -4121,6 +4129,7 @@ static long kvm_vcpu_ioctl(struct file *filp, if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) return -EINVAL;
- argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg;
Instead of this, I would suggest doing the easy thing: have kvm_vcpu_compat_ioctl() call kvm_vcpu_ioctl(filp, ioctl, (user_uintptr_t)argp), so that the native handler can assume that arg is a valid pointer. This patch effectively does this already for kvm_device_ioctl() and kvm_dev_ioctl() by using compat_ptr_ioctl, and we might as well do it for all the handlers. It is not 100% correct in the sense that some commands do not expect a pointer, but at this point I am really not convinced that the additional complexity created by using compat_ptr() only when the argument is actually a pointer is justified. The overhead of calling compat_ptr() should be fairly minimal, and it is very difficult to imagine a case where it would be a security issue.
/* * Some architectures have vcpu ioctls that are asynchronous to vcpu * execution; mutex_lock() would break them. @@ -4317,6 +4326,45 @@ static long kvm_vcpu_ioctl(struct file *filp, return r; } +int kvm_get_device_attr_compat(unsigned int ioctl, unsigned long arg,
struct kvm_device_attr *attr)
+{
- int ret = -EINVAL;
+#ifdef CONFIG_KVM_COMPAT
- if (!in_compat64_syscall())
return ret;
- switch (_IOC_NR(ioctl)) {
- case _IOC_NR(KVM_HAS_DEVICE_ATTR):
- case _IOC_NR(KVM_GET_DEVICE_ATTR):
- case _IOC_NR(KVM_SET_DEVICE_ATTR): {
struct {
__u32 flags;
__u32 group;
__u64 attr;
compat_uptr_t addr;
} __attr;
if (_IOC_SIZE(ioctl) != sizeof(__attr))
We do need this check indeed. At the moment I don't think we are checking this systematically though, it should be done whenever we transform a switch to use _IOC_NR(), both in native and in compat. That does beg the question of whether this is actually simpler / shorter than having a separate switch for compat64... This works well in the DRM case because the driver knows what the expected size is just based on _IOC_NR(cmd), but we don't have such infrastructure here. Maybe we can figure out some clever wrapper for copy_from_user that also does some command size checking, though.
A further issue is that we should check the remaining fields of the command value as well, i.e. _IOC_TYPE() and _IOC_DIR(). The former can be checked unconditionally at the beginning, but not the latter. Here as well it gets rather cumbersome without DRM's infrastructure.
If we go for a separate switch for compat64, we can make our lives easier by introducing a macro that replaces the size field in a command value (like KVM_HAS_DEVICE_ATTR) with a new value, i.e. the size of the compat struct.
break;
ret = -EFAULT;
if (copy_from_user(&__attr, compat_ptr(arg), sizeof(__attr)))
break;
attr->flags = __attr.flags;
attr->group = __attr.group;
attr->attr = __attr.attr;
attr->addr = (__kernel_uintptr_t)compat_ptr(__attr.addr);
ret = 0;
- }
- default:
break;
- }
+#endif
- return ret;
+}
#ifdef CONFIG_KVM_COMPAT static long kvm_vcpu_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -4370,37 +4418,38 @@ static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) return -ENODEV; } -static int kvm_device_ioctl_attr(struct kvm_device *dev, +static inline int kvm_device_ioctl_attr(struct kvm_device *dev, int (*accessor)(struct kvm_device *dev, struct kvm_device_attr *attr),
user_uintptr_t arg)
struct kvm_device_attr *attr)
{
- struct kvm_device_attr attr;
- if (!accessor) return -EPERM;
- if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
return -EFAULT;
- return accessor(dev, &attr);
- return accessor(dev, attr);
} static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm_device *dev = filp->private_data;
- struct kvm_device_attr attr;
- int ret;
if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) return -EIO;
- switch (ioctl) {
- case KVM_SET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
- case KVM_GET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
- case KVM_HAS_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
- ret = kvm_get_device_attr_compat(ioctl, arg, &attr);
- if (ret)
return ret;
- switch (_IOC_NR(ioctl)) {
- case _IOC_NR(KVM_SET_DEVICE_ATTR):
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, &attr);
- case _IOC_NR(KVM_GET_DEVICE_ATTR):
return kvm_device_ioctl_attr(dev, dev->ops->get_attr, &attr);
- case _IOC_NR(KVM_HAS_DEVICE_ATTR):
default: if (dev->ops->ioctl) return dev->ops->ioctl(dev, ioctl, arg);return kvm_device_ioctl_attr(dev, dev->ops->has_attr, &attr);
@@ -4787,11 +4836,13 @@ static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm *kvm = filp->private_data;
- void __user *argp = (void __user *)arg;
- void __user *argp; int r;
if (kvm->mm != current->mm || kvm->vm_dead) return -EIO;
- argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg; switch (ioctl) { case KVM_CREATE_VCPU: r = kvm_vm_ioctl_create_vcpu(kvm, arg);
@@ -5014,13 +5065,13 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (r != -ENOTTY) return r;
- switch (ioctl) {
- switch (_IOC_NR(ioctl)) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
- case KVM_CLEAR_DIRTY_LOG: {
- case _IOC_NR(KVM_CLEAR_DIRTY_LOG): { struct compat_kvm_clear_dirty_log compat_log; struct kvm_clear_dirty_log log;
if (copy_from_user(&compat_log, (void __user *)arg,
log.slot = compat_log.slot;if (copy_from_user(&compat_log, compat_ptr(arg), sizeof(compat_log))) return -EFAULT;
@@ -5033,11 +5084,11 @@ static long kvm_vm_compat_ioctl(struct file *filp, break; } #endif
- case KVM_GET_DIRTY_LOG: {
- case _IOC_NR(KVM_GET_DIRTY_LOG): { struct compat_kvm_dirty_log compat_log; struct kvm_dirty_log log;
if (copy_from_user(&compat_log, (void __user *)arg,
log.slot = compat_log.slot;if (copy_from_user(&compat_log, compat_ptr(arg), sizeof(compat_log))) return -EFAULT;
@@ -5059,7 +5110,7 @@ static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .llseek = noop_llseek,
- KVM_COMPAT(kvm_vm_compat_ioctl),
- KVM_COMPAT_DIRECT(kvm_vm_compat_ioctl),
}; bool file_is_kvm(struct file *file) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index ca24ce120906..304abc96272a 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -303,7 +303,7 @@ static int kvm_vfio_set_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_VFIO_FILE: return kvm_vfio_set_file(dev, attr->attr,
u64_to_user_ptr(attr->addr));
as_user_ptr(attr->addr));
Not sure I understand this. I would expect a simple cast to void __user *.
Kevin
} return -ENXIO;
Signed-off-by: Beata Michalska beata.michalska@arm.com --- arch/arm64/kvm/Kconfig | 1 - 1 file changed, 1 deletion(-)
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index d49ece801a7a..83c1e09be42e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,7 +21,6 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM - depends on !ARM64_MORELLO select KVM_GENERIC_HARDWARE_ENABLING select MMU_NOTIFIER select PREEMPT_NOTIFIERS
On 26/02/2024 10:20, Beata Michalska wrote:
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/kvm/Kconfig | 1 - 1 file changed, 1 deletion(-)
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index d49ece801a7a..83c1e09be42e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,7 +21,6 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM
- depends on !ARM64_MORELLO
Let's update the help text of ARM64_MORELLO in the arm64 Kconfig too.
In itself this patch doesn't enable KVM for the Morello defconfig, should we add CONFIG_KVM there as well?
Kevin
select KVM_GENERIC_HARDWARE_ENABLING select MMU_NOTIFIER select PREEMPT_NOTIFIERS
linux-morello@op-lists.linaro.org