On Fri, Mar 01, 2024 at 11:15:26AM +0100, Kevin Brodsky wrote:
On 26/02/2024 09:20, Beata Michalska wrote:
Extend the definition of kvm_pgtabe_prot enum to incorporate PBHA bits for both stage-1 and stage-2 page tables. At this point, those are being enabled only for Morello, which implements FEAT_HPDS2 feature by default and as such does not require any additional checks for the above mentioned feature status.
Signed-off-by: Beata Michalska beata.michalska@arm.com
arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 10 ++++++++-- 4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e80e1b7a91e0..010d837f8f1d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -132,6 +132,9 @@ #define VTCR_EL2_HWU61 TCR_EL2_HWU61 #define VTCR_EL2_HWU60 TCR_EL2_HWU60 #define VTCR_EL2_HWU59 TCR_EL2_HWU59 +#define VTCR_EL2_PBHA_SHIFT 25
Maybe use VTCR_EL2_HWU59 instead of 25?
Could do though in majority of cases the XXX_SHIFT(s) are being defined explicitely across this file, though no strong objections to the idea. Will update that in v2, but will use VTCR_EL2_HWU59 instead.
+#define VTCR_EL2_PBHA_MASK (0xf << VTCR_EL2_PBHA_SHIFT) +#define VTCR_EL2_PBHA VTCR_EL2_PBHA_MASK #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..97ff8a2634d5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -173,6 +173,10 @@ enum kvm_pgtable_stage2_flags {
- @KVM_PGTABLE_PROT_SW1: Software bit 1.
- @KVM_PGTABLE_PROT_SW2: Software bit 2.
- @KVM_PGTABLE_PROT_SW3: Software bit 3.
- @KVM_PGTABLE_PROT_HW0: PBHA[0]
- @KVM_PGTABLE_PROT_HW1: PBHA[1]
- @KVM_PGTABLE_PROT_HW2: PBHA[2]
*/
- @KVM_PGTABLE_PROT_HW3: PBHA[3]
enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -185,17 +189,33 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_SW1 = BIT(56), KVM_PGTABLE_PROT_SW2 = BIT(57), KVM_PGTABLE_PROT_SW3 = BIT(58),
- KVM_PGTABLE_PROT_HW0 = BIT(59),
- KVM_PGTABLE_PROT_HW1 = BIT(60),
- KVM_PGTABLE_PROT_HW2 = BIT(61),
- KVM_PGTABLE_PROT_HW3 = BIT(62),
}; +#ifdef CONFIG_ARM64_MORELLO
To reduce the #ifdef'ing, we could use a similar approach as in <asm/pgtable-prot.h>, that is define MAYBE macros that expand to 0 in !MORELLO. The other advantage would be to make it clearer what each bit does, e.g. KVM_PGTABLE_PROT_MAYBE_LS_CAPS. Getting pretty long though, maybe we could shorten the prefix...
Right, not really a big fan of XXX_MAYBE_YYY really: what MAYBE is referring to actually ? also we would need separate defines for RO and still that requires #ifdef'ing any way so not sure what we will be gaining here ? (apart from trading off one #ifdef block ). Having the HW bits being somehow mapped through the naming to their actual meaning is probably a good idea. Let me ponder a bit to see if I can improve it somehow.
+#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W | \
KVM_PGTABLE_PROT_HW1 | KVM_PGTABLE_PROT_HW2)
+#else #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#endif #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X) #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW #define PAGE_HYP KVM_PGTABLE_PROT_RW +#ifdef CONFIG_ARM64_MORELLO +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X | \
KVM_PGTABLE_PROT_HW2)
+#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_HW2) +#else #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#endif #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 53ad8de28f48..bc945eac681d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1859,6 +1859,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) } tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits);
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
params->tcr_el2 = tcr;tcr |= TCR_EL2_HWU61 | TCR_EL2_HWU60 | TCR_EL2_HPD;
params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1966fdee740e..88d002d7152f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -38,6 +38,7 @@ #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) +#define KVM_PTE_LEAF_ATTR_HI_HW GENMASK(62, 59) #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) @@ -411,6 +412,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -418,7 +420,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) {
- enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
- enum kvm_pgtable_prot prot = pte & (KVM_PTE_LEAF_ATTR_HI_SW | KVM_PTE_LEAF_ATTR_HI_HW); u32 ap;
if (!kvm_pte_valid(pte)) @@ -654,6 +656,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */
- if (IS_ENABLED(CONFIG_ARM64_MORELLO))
vtcr |= VTCR_EL2_PBHA;
I'm not sure I understand the rationale behind the choice of HWU bits to set. Right now it looks like we're doing this:
- Stage 1: 60, 61 (SC, LC[0])
- Stage 2: 59, 60, 61, 62 (CDBM, SC, LC, <unused>)
IIUC, the stage 1 configuration only matters for host / KVM mappings. We don't use LC[1] or CDBM so it's ok to set only HWU{60,61}, though it wouldn't hurt to set them all (like we do in the kernel). OTOH we're setting all bits at stage 2, even though HWU62 has no meaning for Morello (and we don't use CDBM either). It is a detail, as only HWU{60,61} matter in both cases for our usage, but it would be nice to be more consistent.
Right, that's a fair point. Will update stage 1 setup accordingly (did end up mixing the approach taken for EL1 vs EL2)
- /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS_16BIT :
@@ -714,6 +719,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
- attr |= prot & KVM_PTE_LEAF_ATTR_HI_HW; *ptep = attr;
return 0; @@ -1299,7 +1305,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, u32 level; kvm_pte_t set = 0, clr = 0;
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
- if (prot & KVM_PTE_LEAF_ATTR_HI_SW || prot & KVM_PTE_LEAF_ATTR_HI_HW)
Nit: could be prot & (SW | HW), like above.
Done.
Thanks.
--- BR Beata
Kevin
return -EINVAL;
if (prot & KVM_PGTABLE_PROT_R)