PCuABI needs the address space reservation interfaces to manage the owning capability of the allocated addresses. This interface prevents two unrelated owning capabilities created by the kernel from overlapping.
The reservation interface stores the ranges of different virtual addresses as reservation entries, which is the same as the bound of the capability provided by the kernel to userspace. It also stores the owning capability permissions to manage the future syscall requests for updating permissions.
The reservation interfaces follow a few basic rules:
- Reservations can only be created or destroyed but never expanded or shrunk. Reservations are created when new memory mapping is made outside of an existing reservation. - A single reservation can have many mappings. However, unused regions of the reservation cannot be reused again. - The Reservation start address is aligned to CHERI representable base. - The Reservation length value is aligned to CHERI representable length.
More rules about the address space reservation interface can be found in the PCuABI specification.
This commit introduces API's reserv_vma_set_reserv(), reserv_range_set_reserv(), reserv_vmi_range_mapped(), reserv_vmi_cap_within_reserv(), reserv_vma_cap_within_reserv(), reserv_vma_range_within_reserv(), reserv_is_supported() and reserv_fork(). Here, except reserv_range_set_reserv(), all others involve single VMA. All the above interfaces will be used in different memory management syscalls in subsequent patches.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/cap_addr_mgmt.h | 217 ++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 9 ++ include/linux/user_ptr.h | 5 + mm/Makefile | 2 +- mm/cap_addr_mgmt.c | 152 ++++++++++++++++++++++++ 5 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 include/linux/cap_addr_mgmt.h create mode 100644 mm/cap_addr_mgmt.c
diff --git a/include/linux/cap_addr_mgmt.h b/include/linux/cap_addr_mgmt.h new file mode 100644 index 000000000000..3cb45e41f36c --- /dev/null +++ b/include/linux/cap_addr_mgmt.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_CAP_ADDR_MGMT_H +#define _LINUX_CAP_ADDR_MGMT_H + +#include <linux/cheri.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/mm_types.h> +#include <linux/sched/coredump.h> +#include <linux/types.h> +#include <linux/user_ptr.h> + +#ifdef CONFIG_CHERI_PURECAP_UABI +#define reserv_representable_alignment(len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? ~cheri_representable_alignment_mask(len) : 0) + +#define reserv_representable_base(base, len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? base & cheri_representable_alignment_mask(len) : base) + +#define reserv_representable_length(len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? cheri_representable_length(len) : len) + +#define reserv_vma_reserv_start(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.reserv_start : 0) + +#define reserv_vma_reserv_len(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.reserv_len : 0) + +#define reserv_vma_reserv_perm(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.reserv_perm : 0) + +/** + * reserv_vma_set_reserv() - Sets the reservation details in the VMA for the + * virtual address range from start to (start + len) with perm permission as + * the entry. The start address are stored as CHERI representable base and the + * length as CHERI representable length. They are expected to not interfere + * with the successive VMA. This function should be called with mmap_lock + * held. + * @vma: The VMA pointer to insert the reservation entry. + * @start: Reservation start value. + * @len: Reservation length. + * @perm: Capability permission for the reserved range. + * + * Return: 0 if reservation entry added successfully or negative errorcode + * otherwise. + */ +int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perm); + +/** + * reserv_range_set_reserv() - Sets the reservation details across the VMA's + * for the virtual address range from start to (start + len) with the perm + * permission as the entry. The start address is expected to be CHERI + * representable base and the length to be CHERI representable length. + * This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @start: Reservation start value. + * @len: Reservation length. + * @perm: Capability permission for the reserved range. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: valid capability with bounded range and requested permission or + * negative error code otherwise. + */ +user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, + user_ptr_perms_t perm, bool locked); + +/** + * reserv_vmi_range_mapped() - Searches the reservation interface for + * the virtual address range from start to (start + len). This is useful to + * find if the requested range maps completely and there is no fragmentation. + * This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @vmi: The VMA iterator pointing at the VMA. + * @start: Virtual address start value. + * @len: Virtual address length. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: 0 if the VMA mapping matches fully with the given range or negative + * error code otherwise. + */ +int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked); + +/** + * reserv_vmi_cap_within_reserv() - Searches and matches the input VMI for the + * for the capability bound values falling within the reserved virtual address + * range. This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @vmi: The VMA iterator pointing at the VMA. + * @cap: Reservation capability value. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: True if the input capability bound values within the reserved virtual + * address range or false otherwise. + */ +bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, + bool locked); + +/** + * reserv_vma_cap_within_reserv() - Searches and matches the input VMA for the + * capability bound values falling within the reserved virtual address range. + * This function should be called with mmap_lock held. + * @vma: The VMA pointer. + * @cap: Reservation capability value. + * + * Return: True if the input capability bound values within the reserved virtual + * address range or false otherwise. + */ +bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap); + +/** + * reserv_vma_range_within_reserv() - Searches and matches the input VMA for the input + * address range falling within the reserved virtual address range. This function + * should be called with mmap_lock held. + * @vma: The VMA pointer. + * @start: Virtual address start value. + * @len: Virtual address length. + * + * Return: True if the input address range within the reserved virtual address + * range or false otherwise. + */ +bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, size_t len); + +/** + * reserv_is_supported() - Checks if the reservation property exists for the mm. + * @mm: The mm pointer. + * + * Return: True if mm has the reservation property set or false otherwise. + */ +static inline bool reserv_is_supported(struct mm_struct *mm) +{ + if (mm && test_bit(MMF_PCUABI_RESERV, &mm->flags)) + return true; + + return false; +} + +/** + * reserv_fork() - Checks and copies the MMF_PCUABI_RESERV bit in the new mm during fork. + * @mm: New mm pointer. + * @oldmm: Old mm pointer. + * + * Return: None. + */ +static inline void reserv_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + if (test_bit(MMF_PCUABI_RESERV, &oldmm->flags)) + set_bit(MMF_PCUABI_RESERV, &mm->flags); +} + +#else /* CONFIG_CHERI_PURECAP_UABI */ + +#define reserv_representable_alignment(len) 0 + +#define reserv_representable_base(base, len) base + +#define reserv_representable_length(len) len + +#define reserv_vma_reserv_start(vma) 0 + +#define reserv_vma_reserv_len(vma) 0 + +#define reserv_vma_reserv_perm(vma) 0 + +static inline int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perm) +{ + return 0; +} + +static inline user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, + user_ptr_perms_t perm, bool locked) +{ + return (user_uintptr_t)start; +} + +static inline int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked) +{ + return 0; +} + +static inline bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, + bool locked) +{ + return true; +} + +static inline bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap) +{ + return true; +} + +static inline bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len) +{ + return true; +} + +static inline bool reserv_is_supported(struct mm_struct *mm) +{ + return false; +} + +static inline void reserv_fork(struct mm_struct *mm, struct mm_struct *oldmm) {} + +#endif /* CONFIG_CHERI_PURECAP_UABI */ + +#endif /* _LINUX_CAP_ADDR_MGMT_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 774bd7d6ad60..5182848f4228 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -607,6 +607,12 @@ struct vma_numab_state { int prev_scan_seq; };
+struct reserv_struct { + ptraddr_t reserv_start; + size_t reserv_len; + user_ptr_perms_t reserv_perm; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -711,6 +717,9 @@ struct vm_area_struct { struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef CONFIG_CHERI_PURECAP_UABI + struct reserv_struct reserv_data; +#endif } __randomize_layout;
#ifdef CONFIG_NUMA diff --git a/include/linux/user_ptr.h b/include/linux/user_ptr.h index 685586bc0d89..d663c6105d54 100644 --- a/include/linux/user_ptr.h +++ b/include/linux/user_ptr.h @@ -2,6 +2,7 @@ #ifndef _LINUX_USER_PTR_H #define _LINUX_USER_PTR_H
+#include <linux/cheri.h> #include <linux/limits.h> #include <linux/typecheck.h>
@@ -27,6 +28,8 @@
#ifdef CONFIG_CHERI_PURECAP_UABI
+#define user_ptr_perms_t cheri_perms_t + /** * uaddr_to_user_ptr() - Convert a user-provided address to a user pointer. * @addr: The address to set the pointer to. @@ -109,6 +112,8 @@ bool check_user_ptr_rw(void __user *ptr, size_t len);
#else /* CONFIG_CHERI_PURECAP_UABI */
+#define user_ptr_perms_t int + static inline void __user *uaddr_to_user_ptr(ptraddr_t addr) { return as_user_ptr(addr); diff --git a/mm/Makefile b/mm/Makefile index 33873c8aedb3..6f994a1664e4 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -39,7 +39,7 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ msync.o page_vma_mapped.o pagewalk.o \ - pgtable-generic.o rmap.o vmalloc.o + pgtable-generic.o rmap.o vmalloc.o cap_addr_mgmt.o
ifdef CONFIG_CROSS_MEMORY_ATTACH diff --git a/mm/cap_addr_mgmt.c b/mm/cap_addr_mgmt.c new file mode 100644 index 000000000000..5586fde34d0a --- /dev/null +++ b/mm/cap_addr_mgmt.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bug.h> +#include <linux/cap_addr_mgmt.h> +#include <linux/cheri.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#ifdef CONFIG_CHERI_PURECAP_UABI + +int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perm) +{ + if (!reserv_is_supported(vma->vm_mm)) + return 0; + if (start + len < start) + return -EINVAL; + /* Reservation base/length is expected as page aligned */ + VM_BUG_ON(start & ~PAGE_MASK || len % PAGE_SIZE); + + vma->reserv_data.reserv_start = start & cheri_representable_alignment_mask(len); + vma->reserv_data.reserv_len = cheri_representable_length(len); + if (perm) + vma->reserv_data.reserv_perm = perm; + + return 0; +} + +user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, user_ptr_perms_t perm, + bool locked) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + ptraddr_t end = start + len; + user_uintptr_t ret = 0; + VMA_ITERATOR(vmi, mm, start); + + if (!reserv_is_supported(mm)) + return start; + if (end < start) + return -EINVAL; + + /* Check if the reservation range is representable and throw error if not */ + if (start & ~cheri_representable_alignment_mask(len) || + len != cheri_representable_length(len) || + start & ~PAGE_MASK || len % PAGE_SIZE) { + printk(KERN_WARNING "Reservation range (0x%lx)-(0x%lx) is not representable\n", + start, start + len - 1); + return -ERESERVATION; + } + if (!locked && mmap_write_lock_killable(mm)) + return -EINTR; + + for_each_vma_range(vmi, vma, end) { + WRITE_ONCE(vma->reserv_data.reserv_start, start); + WRITE_ONCE(vma->reserv_data.reserv_len, len); + WRITE_ONCE(vma->reserv_data.reserv_perm, perm); + } + if (!locked) + mmap_write_unlock(current->mm); + ret = (user_uintptr_t)uaddr_to_user_ptr_safe(start); + + return ret; +} + +int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + int ret = -ENOMEM; + + if (!reserv_is_supported(mm)) + return 0; + if (!locked && mmap_read_lock_killable(mm)) + return -EINTR; + + start = round_down(start, PAGE_SIZE); + len = round_up(len, PAGE_SIZE); + mas_set_range(&vmi->mas, start, start); + /* Try walking the given range */ + vma = mas_find(&vmi->mas, start + len - 1); + if (!vma) + goto out; + + /* If the range is fully mapped then no gap exists */ + if (mas_empty_area(&vmi->mas, start, start + len - 1, 1)) + goto out; + ret = 0; +out: + if (!locked) + mmap_read_unlock(mm); + return ret; +} + +bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, bool locked) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + ptraddr_t cap_start = cheri_base_get(cap); + ptraddr_t cap_end = cap_start + cheri_length_get(cap); + bool ret = false; + + if (!reserv_is_supported(mm)) + return true; + + if (!locked && mmap_read_lock_killable(mm)) + return false; + + /* Check if there is match with the existing reservations */ + vma = mas_find(&vmi->mas, cap_end); + if (!vma) + goto out; + + if (vma->reserv_data.reserv_start <= cap_start && + vma->reserv_data.reserv_len >= cheri_length_get(cap)) + ret = true; +out: + if (!locked) + mmap_read_unlock(mm); + + return ret; +} + +bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap) +{ + if (!reserv_is_supported(vma->vm_mm)) + return true; + + /* Check if there is match with the existing reservations */ + if (vma->reserv_data.reserv_start <= cheri_base_get(cap) && + vma->reserv_data.reserv_len >= cheri_length_get(cap)) + return true; + + return false; +} + +bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, size_t len) +{ + if (!reserv_is_supported(vma->vm_mm)) + return true; + + start = untagged_addr(start); + + /* Check if there is match with the existing reservations */ + if (vma->reserv_data.reserv_start <= start && vma->reserv_data.reserv_len >= len) + return true; + + return false; +} + +#endif /* CONFIG_CHERI_PURECAP_UABI */