On 26/02/2024 10:20, Beata Michalska wrote:
Add support for managing kvm virtual machines through plain aarch64 tools. No support for purecap guests just yet.
What this patch really does is adding purecap support to the KVM ioctls, as well as compat64 handling to keep it working. I would prefer doing each step in its own patch (first compat64 handling then purecap support), like we've done for io_uring, BPF, etc.
[...]
@@ -786,7 +786,7 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
- case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
- case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
Let's avoid unnecessary whitespace changes (however logical).
case KVM_REG_ARM_FW: case KVM_REG_ARM_FW_FEAT_BMAP: return kvm_arm_set_fw_reg(vcpu, reg);
[...] diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b011b4ec2ddf..c2f743462148 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1215,9 +1215,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, #define __kvm_get_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \
- typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
- typeof(v) __user *__uaddr; \ int __ret = -EFAULT; \ \
- __uaddr = uaddr_to_user_ptr(__addr + offset); \ if (!kvm_is_error_hva(__addr)) \ __ret = get_user(v, __uaddr); \ __ret; \
@@ -1235,9 +1236,10 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, #define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \
- typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
- typeof(v) __user *__uaddr; \ int __ret = -EFAULT; \ \
- __uaddr = uaddr_to_user_ptr(__addr + offset); \
It does look like we have to create a capability ourselves here, and we're in control of the address so that's ok. uaddr_to_user_ptr() should not be used in such situations though, it is only meant as a temporary workaround for user-provided addresses. Nowadays we should use make_user_ptr_*() instead (which restricts both bounds and permissions), see [1].
Also these changes look unrelated to the ioctl adaptations, I think they belong to a separate patch.
[1] https://git.morello-project.org/morello/kernel/linux/-/blob/morello/master/D...
if (!kvm_is_error_hva(__addr)) \ __ret = put_user(v, __uaddr); \ if (!__ret) \
[...]
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 484d0873061c..b6ed8ed3cc4d 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -70,7 +70,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y
depends on KVM && COMPAT && !(S390 || ARM64 || RISCV)
depends on KVM && COMPAT && !(S390 || RISCV || (ARM64 &&!ARM64_MORELLO))
What matters is not so much that ARM64_MORELLO is selected, but rather that PCuABI is selected (regardless of the architecture). I think the condition could be something like:
KVM && COMPAT && (CHERI_PURECAP_UABI || !(S390 || ARM64 || RISCV))
config HAVE_KVM_IRQ_BYPASS bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d79fd04d8b9e..bbddb168430c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -120,7 +120,14 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, #ifdef CONFIG_KVM_COMPAT static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +#ifndef CONFIG_COMPAT64 #define KVM_COMPAT(c) .compat_ioctl = (c) +#define KVM_COMPAT_DIRECT(c) KVM_COMPAT(c) +#else +#define KVM_COMPAT(c) .compat_ioctl = compat_ptr_ioctl, \
I don't think there is any issue with using compat_ptr_ioctl regardless of the kind of compat. The compat_ptr() conversion is not actually specific to compat64, it just happens that very few compat ioctl handlers already do the right thing.
Specifically, what I'm suggesting is to leave all of these macros unchanged, and for kvm_device_ioctl and kvm_dev_ioctl, use KVM_COMPAT(compat_ptr_ioctl).
.unlocked_ioctl = (c)
It seems to me that this additional .unlocked_ioctl assignment does nothing (it is already present in the struct definitions where KVM_COMPAT() is used). Also worth noting that overriding an initialiser can trigger a warning (though not in the default build configuration).
+#define KVM_COMPAT_DIRECT(c) .compat_ioctl = (c) +#endif #else /*
- For architectures that don't implement a compat infrastructure,
@@ -138,6 +145,7 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file) } #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ .open = kvm_no_compat_open +#define KVM_COMPAT_DIRECT(c) KVM_COMPAT(c) #endif static int hardware_enable_all(void); static void hardware_disable_all(void); @@ -3885,7 +3893,7 @@ static struct file_operations kvm_vcpu_fops = { .unlocked_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap, .llseek = noop_llseek,
- KVM_COMPAT(kvm_vcpu_compat_ioctl),
- KVM_COMPAT_DIRECT(kvm_vcpu_compat_ioctl),
}; /* @@ -4110,7 +4118,7 @@ static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm_vcpu *vcpu = filp->private_data;
- void __user *argp = (void __user *)arg;
- void __user *argp; int r; struct kvm_fpu *fpu = NULL; struct kvm_sregs *kvm_sregs = NULL;
@@ -4121,6 +4129,7 @@ static long kvm_vcpu_ioctl(struct file *filp, if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) return -EINVAL;
- argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg;
Instead of this, I would suggest doing the easy thing: have kvm_vcpu_compat_ioctl() call kvm_vcpu_ioctl(filp, ioctl, (user_uintptr_t)argp), so that the native handler can assume that arg is a valid pointer. This patch effectively does this already for kvm_device_ioctl() and kvm_dev_ioctl() by using compat_ptr_ioctl, and we might as well do it for all the handlers. It is not 100% correct in the sense that some commands do not expect a pointer, but at this point I am really not convinced that the additional complexity created by using compat_ptr() only when the argument is actually a pointer is justified. The overhead of calling compat_ptr() should be fairly minimal, and it is very difficult to imagine a case where it would be a security issue.
/* * Some architectures have vcpu ioctls that are asynchronous to vcpu * execution; mutex_lock() would break them. @@ -4317,6 +4326,45 @@ static long kvm_vcpu_ioctl(struct file *filp, return r; } +int kvm_get_device_attr_compat(unsigned int ioctl, unsigned long arg,
struct kvm_device_attr *attr)
+{
- int ret = -EINVAL;
+#ifdef CONFIG_KVM_COMPAT
- if (!in_compat64_syscall())
return ret;
- switch (_IOC_NR(ioctl)) {
- case _IOC_NR(KVM_HAS_DEVICE_ATTR):
- case _IOC_NR(KVM_GET_DEVICE_ATTR):
- case _IOC_NR(KVM_SET_DEVICE_ATTR): {
struct {
__u32 flags;
__u32 group;
__u64 attr;
compat_uptr_t addr;
} __attr;
if (_IOC_SIZE(ioctl) != sizeof(__attr))
We do need this check indeed. At the moment I don't think we are checking this systematically though, it should be done whenever we transform a switch to use _IOC_NR(), both in native and in compat. That does beg the question of whether this is actually simpler / shorter than having a separate switch for compat64... This works well in the DRM case because the driver knows what the expected size is just based on _IOC_NR(cmd), but we don't have such infrastructure here. Maybe we can figure out some clever wrapper for copy_from_user that also does some command size checking, though.
A further issue is that we should check the remaining fields of the command value as well, i.e. _IOC_TYPE() and _IOC_DIR(). The former can be checked unconditionally at the beginning, but not the latter. Here as well it gets rather cumbersome without DRM's infrastructure.
If we go for a separate switch for compat64, we can make our lives easier by introducing a macro that replaces the size field in a command value (like KVM_HAS_DEVICE_ATTR) with a new value, i.e. the size of the compat struct.
break;
ret = -EFAULT;
if (copy_from_user(&__attr, compat_ptr(arg), sizeof(__attr)))
break;
attr->flags = __attr.flags;
attr->group = __attr.group;
attr->attr = __attr.attr;
attr->addr = (__kernel_uintptr_t)compat_ptr(__attr.addr);
ret = 0;
- }
- default:
break;
- }
+#endif
- return ret;
+}
#ifdef CONFIG_KVM_COMPAT static long kvm_vcpu_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -4370,37 +4418,38 @@ static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) return -ENODEV; } -static int kvm_device_ioctl_attr(struct kvm_device *dev, +static inline int kvm_device_ioctl_attr(struct kvm_device *dev, int (*accessor)(struct kvm_device *dev, struct kvm_device_attr *attr),
user_uintptr_t arg)
struct kvm_device_attr *attr)
{
- struct kvm_device_attr attr;
- if (!accessor) return -EPERM;
- if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
return -EFAULT;
- return accessor(dev, &attr);
- return accessor(dev, attr);
} static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm_device *dev = filp->private_data;
- struct kvm_device_attr attr;
- int ret;
if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) return -EIO;
- switch (ioctl) {
- case KVM_SET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
- case KVM_GET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
- case KVM_HAS_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
- ret = kvm_get_device_attr_compat(ioctl, arg, &attr);
- if (ret)
return ret;
- switch (_IOC_NR(ioctl)) {
- case _IOC_NR(KVM_SET_DEVICE_ATTR):
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, &attr);
- case _IOC_NR(KVM_GET_DEVICE_ATTR):
return kvm_device_ioctl_attr(dev, dev->ops->get_attr, &attr);
- case _IOC_NR(KVM_HAS_DEVICE_ATTR):
default: if (dev->ops->ioctl) return dev->ops->ioctl(dev, ioctl, arg);return kvm_device_ioctl_attr(dev, dev->ops->has_attr, &attr);
@@ -4787,11 +4836,13 @@ static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, user_uintptr_t arg) { struct kvm *kvm = filp->private_data;
- void __user *argp = (void __user *)arg;
- void __user *argp; int r;
if (kvm->mm != current->mm || kvm->vm_dead) return -EIO;
- argp = in_compat64_syscall() ? compat_ptr(arg) : (void __user *)arg; switch (ioctl) { case KVM_CREATE_VCPU: r = kvm_vm_ioctl_create_vcpu(kvm, arg);
@@ -5014,13 +5065,13 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (r != -ENOTTY) return r;
- switch (ioctl) {
- switch (_IOC_NR(ioctl)) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
- case KVM_CLEAR_DIRTY_LOG: {
- case _IOC_NR(KVM_CLEAR_DIRTY_LOG): { struct compat_kvm_clear_dirty_log compat_log; struct kvm_clear_dirty_log log;
if (copy_from_user(&compat_log, (void __user *)arg,
log.slot = compat_log.slot;if (copy_from_user(&compat_log, compat_ptr(arg), sizeof(compat_log))) return -EFAULT;
@@ -5033,11 +5084,11 @@ static long kvm_vm_compat_ioctl(struct file *filp, break; } #endif
- case KVM_GET_DIRTY_LOG: {
- case _IOC_NR(KVM_GET_DIRTY_LOG): { struct compat_kvm_dirty_log compat_log; struct kvm_dirty_log log;
if (copy_from_user(&compat_log, (void __user *)arg,
log.slot = compat_log.slot;if (copy_from_user(&compat_log, compat_ptr(arg), sizeof(compat_log))) return -EFAULT;
@@ -5059,7 +5110,7 @@ static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .llseek = noop_llseek,
- KVM_COMPAT(kvm_vm_compat_ioctl),
- KVM_COMPAT_DIRECT(kvm_vm_compat_ioctl),
}; bool file_is_kvm(struct file *file) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index ca24ce120906..304abc96272a 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -303,7 +303,7 @@ static int kvm_vfio_set_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_VFIO_FILE: return kvm_vfio_set_file(dev, attr->attr,
u64_to_user_ptr(attr->addr));
as_user_ptr(attr->addr));
Not sure I understand this. I would expect a simple cast to void __user *.
Kevin
} return -ENXIO;