[linux-morello] Re: [PATCH 11/12] KVM: arm64: morello: Enable support for COMPAT64

1 Mar 2024

On 26/02/2024 10:20, Beata Michalska wrote:
...
Add support for managing kvm virtual machines through plain aarch64
tools. No support for purecap guests just yet.
What this patch really does is adding purecap support to the KVM ioctls,
as well as compat64 handling to keep it working. I would prefer doing
each step in its own patch (first compat64 handling then purecap
support), like we've done for io_uring, BPF, etc.
...
[...]
@@ -786,7 +786,7 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
   	return -EINVAL;
 
   switch (reg->id & KVM_REG_ARM_COPROC_MASK) {

case KVM_REG_ARM_CORE:	return set_core_reg(vcpu, reg);


case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);

Let's avoid unnecessary whitespace changes (however logical).
...
case KVM_REG_ARM_FW:
   case KVM_REG_ARM_FW_FEAT_BMAP:
   	return kvm_arm_set_fw_reg(vcpu, reg);
[...]

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b011b4ec2ddf..c2f743462148 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1215,9 +1215,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 #define __kvm_get_guest(kvm, gfn, offset, v)				\
 ({									\
   unsigned long __addr = gfn_to_hva(kvm, gfn);			\

typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset);	\


typeof(v) __user *__uaddr;					\
int __ret = -EFAULT;						\
							\
__uaddr = uaddr_to_user_ptr(__addr + offset);			\
if (!kvm_is_error_hva(__addr))					\
__ret = get_user(v, __uaddr);				\
__ret;								\

@@ -1235,9 +1236,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 #define __kvm_put_guest(kvm, gfn, offset, v)				\
 ({									\
   unsigned long __addr = gfn_to_hva(kvm, gfn);			\

typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset);	\


typeof(v) __user *__uaddr;					\
int __ret = -EFAULT;						\
							\
__uaddr = uaddr_to_user_ptr(__addr + offset);			\

It does look like we have to create a capability ourselves here, and
we're in control of the address so that's ok. uaddr_to_user_ptr() should
not be used in such situations though, it is only meant as a temporary
workaround for user-provided addresses. Nowadays we should use
make_user_ptr_*() instead (which restricts both bounds and permissions),
see [1].
Also these changes look unrelated to the ioctl adaptations, I think they
belong to a separate patch.
[1]
https://git.morello-project.org/morello/kernel/linux/-/blob/morello/master/D...
...
if (!kvm_is_error_hva(__addr))					\
   	__ret = put_user(v, __uaddr);				\
   if (!__ret)							\
[...]
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 484d0873061c..b6ed8ed3cc4d 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -70,7 +70,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 
 config KVM_COMPAT
        def_bool y

  depends on KVM && COMPAT && !(S390 || ARM64 || RISCV)




  depends on KVM && COMPAT && !(S390 || RISCV || (ARM64 &&!ARM64_MORELLO))



What matters is not so much that ARM64_MORELLO is selected, but rather
that PCuABI is selected (regardless of the architecture). I think the
condition could be something like:
KVM && COMPAT && (CHERI_PURECAP_UABI || !(S390 || ARM64 || RISCV))
...
config HAVE_KVM_IRQ_BYPASS
        bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d79fd04d8b9e..bbddb168430c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -120,7 +120,14 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 #ifdef CONFIG_KVM_COMPAT
 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
   			  unsigned long arg);
+#ifndef CONFIG_COMPAT64
 #define KVM_COMPAT(c)	.compat_ioctl	= (c)
+#define KVM_COMPAT_DIRECT(c) KVM_COMPAT(c)
+#else
+#define KVM_COMPAT(c)   .compat_ioctl  = compat_ptr_ioctl,     \
I don't think there is any issue with using compat_ptr_ioctl regardless
of the kind of compat. The compat_ptr() conversion is not actually
specific to compat64, it just happens that very few compat ioctl
handlers already do the right thing.
Specifically, what I'm suggesting is to leave all of these macros
unchanged, and for kvm_device_ioctl and kvm_dev_ioctl, use
KVM_COMPAT(compat_ptr_ioctl).
...

	.unlocked_ioctl = (c)



It seems to me that this additional .unlocked_ioctl assignment does
nothing (it is already present in the struct definitions where
KVM_COMPAT() is used). Also worth noting that overriding an initialiser
can trigger a warning (though not in the default build configuration).
...
+#define KVM_COMPAT_DIRECT(c)   .compat_ioctl   = (c)
+#endif
 #else
 /*

For architectures that don't implement a compat infrastructure,

@@ -138,6 +145,7 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file)
 }
 #define KVM_COMPAT(c)	.compat_ioctl	= kvm_no_compat_ioctl,	\
   		.open		= kvm_no_compat_open
+#define KVM_COMPAT_DIRECT(c)	KVM_COMPAT(c)
 #endif
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
@@ -3885,7 +3893,7 @@ static struct file_operations kvm_vcpu_fops = {
   .unlocked_ioctl = kvm_vcpu_ioctl,
   .mmap           = kvm_vcpu_mmap,
   .llseek		= noop_llseek,

KVM_COMPAT(kvm_vcpu_compat_ioctl),


KVM_COMPAT_DIRECT(kvm_vcpu_compat_ioctl),

};
 
 /*
@@ -4110,7 +4118,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
   		   unsigned int ioctl, user_uintptr_t arg)
 {
   struct kvm_vcpu *vcpu = filp->private_data;

void __user *argp = (void __user *)arg;


void __user *argp;
int r;
struct kvm_fpu *fpu = NULL;
struct kvm_sregs *kvm_sregs = NULL;

@@ -4121,6 +4129,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
   if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
   	return -EINVAL;

argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg;

Instead of this, I would suggest doing the easy thing: have
kvm_vcpu_compat_ioctl() call kvm_vcpu_ioctl(filp, ioctl,
(user_uintptr_t)argp), so that the native handler can assume that arg is
a valid pointer. This patch effectively does this already for
kvm_device_ioctl() and kvm_dev_ioctl() by using compat_ptr_ioctl, and we
might as well do it for all the handlers. It is not 100% correct in the
sense that some commands do not expect a pointer, but at this point I am
really not convinced that the additional complexity created by using
compat_ptr() only when the argument is actually a pointer is justified.
The overhead of calling compat_ptr() should be fairly minimal, and it is
very difficult to imagine a case where it would be a security issue.
...
/*
    * Some architectures have vcpu ioctls that are asynchronous to vcpu
    * execution; mutex_lock() would break them.
@@ -4317,6 +4326,45 @@ static long kvm_vcpu_ioctl(struct file *filp,
   return r;
 }
 
+int kvm_get_device_attr_compat(unsigned int ioctl, unsigned long arg,

	       struct kvm_device_attr *attr)



+{

int ret = -EINVAL;

+#ifdef CONFIG_KVM_COMPAT

if (!in_compat64_syscall())
return ret;



switch (_IOC_NR(ioctl)) {
case _IOC_NR(KVM_HAS_DEVICE_ATTR):
case _IOC_NR(KVM_GET_DEVICE_ATTR):
case _IOC_NR(KVM_SET_DEVICE_ATTR): {
struct {


	__u32		flags;


	__u32		group;


	__u64		attr;


	compat_uptr_t	addr;


} __attr;



if (_IOC_SIZE(ioctl) != sizeof(__attr))



We do need this check indeed. At the moment I don't think we are
checking this systematically though, it should be done whenever we
transform a switch to use _IOC_NR(), both in native and in compat. That
does beg the question of whether this is actually simpler / shorter than
having a separate switch for compat64... This works well in the DRM case
because the driver knows what the expected size is just based on
_IOC_NR(cmd), but we don't have such infrastructure here. Maybe we can
figure out some clever wrapper for copy_from_user that also does some
command size checking, though.
A further issue is that we should check the remaining fields of the
command value as well, i.e. _IOC_TYPE() and _IOC_DIR(). The former can
be checked unconditionally at the beginning, but not the latter. Here as
well it gets rather cumbersome without DRM's infrastructure.
If we go for a separate switch for compat64, we can make our lives
easier by introducing a macro that replaces the size field in a command
value (like KVM_HAS_DEVICE_ATTR) with a new value, i.e. the size of the
compat struct.
...

	break;



ret = -EFAULT;


if (copy_from_user(&__attr, compat_ptr(arg), sizeof(__attr)))


	break;



attr->flags = __attr.flags;


attr->group = __attr.group;


attr->attr  = __attr.attr;


attr->addr  = (__kernel_uintptr_t)compat_ptr(__attr.addr);


ret = 0;


}
default:
break;


}

+#endif

return ret;

+}



#ifdef CONFIG_KVM_COMPAT
 static long kvm_vcpu_compat_ioctl(struct file *filp,
   			  unsigned int ioctl, unsigned long arg)
@@ -4370,37 +4418,38 @@ static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
   return -ENODEV;
 }
 
-static int kvm_device_ioctl_attr(struct kvm_device *dev,
+static inline int kvm_device_ioctl_attr(struct kvm_device *dev,
   			 int (*accessor)(struct kvm_device *dev,
   					 struct kvm_device_attr *attr),

		 user_uintptr_t arg)




		 struct kvm_device_attr *attr)



{

struct kvm_device_attr attr;
if (!accessor)
  return -EPERM;


if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
return -EFAULT;



return accessor(dev, &attr);


return accessor(dev, attr);

}
 
 static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
   		     user_uintptr_t arg)
 {
   struct kvm_device *dev = filp->private_data;

struct kvm_device_attr attr;
int ret;

if (dev->kvm->mm != current->mm || dev->kvm->vm_dead)
   	return -EIO;

switch (ioctl) {
case KVM_SET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);


case KVM_GET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);


case KVM_HAS_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);




ret = kvm_get_device_attr_compat(ioctl, arg, &attr);
if (ret)
return ret;



switch (_IOC_NR(ioctl)) {
case _IOC_NR(KVM_SET_DEVICE_ATTR):
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, &attr);


case _IOC_NR(KVM_GET_DEVICE_ATTR):
return kvm_device_ioctl_attr(dev, dev->ops->get_attr, &attr);


case _IOC_NR(KVM_HAS_DEVICE_ATTR):
return kvm_device_ioctl_attr(dev, dev->ops->has_attr, &attr);

default:
if (dev->ops->ioctl)
	return dev->ops->ioctl(dev, ioctl, arg);

@@ -4787,11 +4836,13 @@ static long kvm_vm_ioctl(struct file *filp,
   		   unsigned int ioctl, user_uintptr_t arg)
 {
   struct kvm *kvm = filp->private_data;

void __user *argp = (void __user *)arg;


void __user *argp;
int r;

if (kvm->mm != current->mm || kvm->vm_dead)
   	return -EIO;


argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg;
switch (ioctl) {
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);

@@ -5014,13 +5065,13 @@ static long kvm_vm_compat_ioctl(struct file *filp,
   if (r != -ENOTTY)
   	return r;

switch (ioctl) {


switch (_IOC_NR(ioctl)) {

#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT

case KVM_CLEAR_DIRTY_LOG: {


case _IOC_NR(KVM_CLEAR_DIRTY_LOG): {
struct compat_kvm_clear_dirty_log compat_log;
struct kvm_clear_dirty_log log;


if (copy_from_user(&compat_log, (void __user *)arg,




if (copy_from_user(&compat_log, compat_ptr(arg),
	   sizeof(compat_log)))
return -EFAULT;

log.slot	 = compat_log.slot;

@@ -5033,11 +5084,11 @@ static long kvm_vm_compat_ioctl(struct file *filp,
   	break;
   }
 #endif

case KVM_GET_DIRTY_LOG: {


case _IOC_NR(KVM_GET_DIRTY_LOG): {
struct compat_kvm_dirty_log compat_log;
struct kvm_dirty_log log;


if (copy_from_user(&compat_log, (void __user *)arg,




if (copy_from_user(&compat_log, compat_ptr(arg),
	   sizeof(compat_log)))
return -EFAULT;

log.slot	 = compat_log.slot;

@@ -5059,7 +5110,7 @@ static struct file_operations kvm_vm_fops = {
   .release        = kvm_vm_release,
   .unlocked_ioctl = kvm_vm_ioctl,
   .llseek		= noop_llseek,

KVM_COMPAT(kvm_vm_compat_ioctl),


KVM_COMPAT_DIRECT(kvm_vm_compat_ioctl),

};
 
 bool file_is_kvm(struct file *file)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ca24ce120906..304abc96272a 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -303,7 +303,7 @@ static int kvm_vfio_set_attr(struct kvm_device *dev,
   switch (attr->group) {
   case KVM_DEV_VFIO_FILE:
   	return kvm_vfio_set_file(dev, attr->attr,

			 u64_to_user_ptr(attr->addr));




			 as_user_ptr(attr->addr));



Not sure I understand this. I would expect a simple cast to void __user *.
Kevin
...
}
 
   return -ENXIO;

    

2025

2024

2023

2022

[linux-morello] Re: [PATCH 11/12] KVM: arm64: morello: Enable support for COMPAT64