PCuABI needs the address space reservation interfaces to manage the owning capability of the allocated addresses. This interface prevents two unrelated owning capabilities created by the kernel from overlapping.
The reservation interface stores the ranges of different virtual addresses as reservation entries, which is the same as the bound of the capability provided by the kernel to userspace. It also stores the owning capability permissions to manage the future syscall requests for updating permissions.
The reservation interfaces follow a few basic rules:
- Reservations can only be created or destroyed but never expanded or shrunk. Reservations are created when new memory mapping is made outside of an existing reservation. - A single reservation can have many mappings. However, unused regions of the reservation cannot be reused again. - The Reservation start address is aligned to CHERI representable base. - The Reservation length value is aligned to CHERI representable length.
More rules about the address space reservation interface can be found in the PCuABI specification.
This commit introduces API's reserv_vma_set_reserv(), reserv_range_set_reserv(), reserv_vmi_range_mapped(), reserv_vmi_cap_within_reserv(), reserv_vma_cap_within_reserv(), reserv_vma_range_within_reserv(), reserv_is_supported() and reserv_fork(). Here, except reserv_range_set_reserv(), all others involve single VMA. All the above interfaces will be used in different memory management syscalls in subsequent patches.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/cap_addr_mgmt.h | 227 ++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 9 ++ mm/Makefile | 1 + mm/cap_addr_mgmt.c | 150 ++++++++++++++++++++++ 4 files changed, 387 insertions(+) create mode 100644 include/linux/cap_addr_mgmt.h create mode 100644 mm/cap_addr_mgmt.c
diff --git a/include/linux/cap_addr_mgmt.h b/include/linux/cap_addr_mgmt.h new file mode 100644 index 000000000000..015d9f0f77eb --- /dev/null +++ b/include/linux/cap_addr_mgmt.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_CAP_ADDR_MGMT_H +#define _LINUX_CAP_ADDR_MGMT_H + +#include <linux/cheri.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/mm_types.h> +#include <linux/sched/coredump.h> +#include <linux/types.h> +#include <linux/user_ptr.h> + +#ifdef CONFIG_CHERI_PURECAP_UABI +#define reserv_representable_alignment(len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? (PAGE_MASK & ~cheri_representable_alignment_mask(len)) : 0) + +#define reserv_representable_base(base, len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? (base & cheri_representable_alignment_mask(len)) : base) + +#define reserv_representable_length(len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? cheri_representable_length(len) : len) + +#define reserv_vma_reserv_start(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.start : vma->vm_start) + +#define reserv_vma_reserv_len(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.len : (vma->vm_end - vma->vm_start)) + +#define reserv_vma_reserv_perms(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.perms : 0) + +#define reserv_vma_reserv_info(vma) \ +({ \ + struct reserv_struct __tmp = {0}; \ + test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data : __tmp; \ +}) + +/** + * reserv_vma_set_reserv() - Sets the reservation details in the VMA for the + * virtual address range from start to (start + len) with perms permission as + * the entry. The start address are stored as CHERI representable base and the + * length as CHERI representable length. They are expected to not interfere + * with the successive VMA. This function should be called with mmap_lock + * held. + * @vma: The VMA pointer to insert the reservation entry. + * @start: Reservation start value. + * @len: Reservation length. + * @perms: Capability permission for the reserved range. + * + * Return: 0 if reservation entry added successfully or negative errorcode + * otherwise. + */ +int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perms); + +/** + * reserv_range_set_reserv() - Sets the reservation details across the VMA's + * for the virtual address range from start to (start + len) with the perms + * permission as the entry. The start address is expected to be CHERI + * representable base and the length to be CHERI representable length. + * This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @start: Reservation start value. + * @len: Reservation length. + * @perms: Capability permission for the reserved range. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: valid capability with bounded range and requested permission or + * negative error code otherwise. + */ +user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, + user_ptr_perms_t perms, bool locked); + +/** + * reserv_vmi_range_mapped() - Searches the reservation interface for + * the virtual address range from start to (start + len). This is useful to + * find if the requested range maps completely and there is no fragmentation. + * This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @vmi: The VMA iterator pointing at the VMA. + * @start: Virtual address start value. + * @len: Virtual address length. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: 0 if the VMA mapping matches fully with the given range or negative + * error code otherwise. + */ +int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked); + +/** + * reserv_vmi_cap_within_reserv() - Searches and matches the input VMI for the + * for the capability bound values falling within the reserved virtual address + * range. This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @vmi: The VMA iterator pointing at the VMA. + * @cap: Reservation capability value. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: True if the input capability bound values within the reserved virtual + * address range or false otherwise. + */ +bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, + bool locked); + +/** + * reserv_vma_cap_within_reserv() - Searches and matches the input VMA for the + * capability bound values falling within the reserved virtual address range. + * This function should be called with mmap_lock held. + * @vma: The VMA pointer. + * @cap: Reservation capability value. + * + * Return: True if the input capability bound values within the reserved virtual + * address range or false otherwise. + */ +bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap); + +/** + * reserv_vma_range_within_reserv() - Searches and matches the input VMA for the input + * address range falling within the reserved virtual address range. This function + * should be called with mmap_lock held. + * @vma: The VMA pointer. + * @start: Virtual address start value. + * @len: Virtual address length. + * + * Return: True if the input address range within the reserved virtual address + * range or false otherwise. + */ +bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, size_t len); + +/** + * reserv_is_supported() - Checks if the reservation property exists for the mm. + * @mm: The mm pointer. + * + * Return: True if mm has the reservation property set or false otherwise. + */ +static inline bool reserv_is_supported(struct mm_struct *mm) +{ + return test_bit(MMF_PCUABI_RESERV, &mm->flags); +} + +/** + * reserv_fork() - Checks and copies the MMF_PCUABI_RESERV bit in the new mm during fork. + * @mm: New mm pointer. + * @oldmm: Old mm pointer. + * + * Return: None. + */ +static inline void reserv_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + if (test_bit(MMF_PCUABI_RESERV, &oldmm->flags)) + set_bit(MMF_PCUABI_RESERV, &mm->flags); +} + +#else /* CONFIG_CHERI_PURECAP_UABI */ + +#define reserv_representable_alignment(len) 0 + +#define reserv_representable_base(base, len) base + +#define reserv_representable_length(len) len + +#define reserv_vma_reserv_start(vma) vma->vm_start + +#define reserv_vma_reserv_len(vma) (vma->vm_end - vma->vm_start) + +#define reserv_vma_reserv_perms(vma) 0 + +#define reserv_vma_reserv_info(vma) \ +({ \ + struct reserv_struct __tmp = {0}; \ + __tmp; \ +}) + +static inline int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perms) +{ + return 0; +} + +static inline user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, + user_ptr_perms_t perms, bool locked) +{ + return (user_uintptr_t)start; +} + +static inline int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked) +{ + return 0; +} + +static inline bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, + bool locked) +{ + return true; +} + +static inline bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap) +{ + return true; +} + +static inline bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len) +{ + return true; +} + +static inline bool reserv_is_supported(struct mm_struct *mm) +{ + return false; +} + +static inline void reserv_fork(struct mm_struct *mm, struct mm_struct *oldmm) {} + +#endif /* CONFIG_CHERI_PURECAP_UABI */ + +#endif /* _LINUX_CAP_ADDR_MGMT_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 774bd7d6ad60..25cbbe18f5b8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -607,6 +607,12 @@ struct vma_numab_state { int prev_scan_seq; };
+struct reserv_struct { + ptraddr_t start; + size_t len; + user_ptr_perms_t perms; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -711,6 +717,9 @@ struct vm_area_struct { struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef CONFIG_CHERI_PURECAP_UABI + struct reserv_struct reserv_data; +#endif } __randomize_layout;
#ifdef CONFIG_NUMA diff --git a/mm/Makefile b/mm/Makefile index 33873c8aedb3..780befc2500f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -41,6 +41,7 @@ mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ msync.o page_vma_mapped.o pagewalk.o \ pgtable-generic.o rmap.o vmalloc.o
+mmu-$(CONFIG_CHERI_PURECAP_UABI) += cap_addr_mgmt.o
ifdef CONFIG_CROSS_MEMORY_ATTACH mmu-$(CONFIG_MMU) += process_vm_access.o diff --git a/mm/cap_addr_mgmt.c b/mm/cap_addr_mgmt.c new file mode 100644 index 000000000000..a8d41c7a5fbb --- /dev/null +++ b/mm/cap_addr_mgmt.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bug.h> +#include <linux/cap_addr_mgmt.h> +#include <linux/cheri.h> +#include <linux/mm.h> +#include <linux/slab.h> + +int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perms) +{ + if (!reserv_is_supported(vma->vm_mm)) + return 0; + if (start + len < start) + return -EINVAL; + /* Reservation base/length is expected as page aligned */ + VM_BUG_ON(start & ~PAGE_MASK || len % PAGE_SIZE); + + vma->reserv_data.start = start & cheri_representable_alignment_mask(len); + vma->reserv_data.len = cheri_representable_length(len); + if (perms) + vma->reserv_data.perms = perms; + + return 0; +} + +user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, user_ptr_perms_t perms, + bool locked) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + ptraddr_t end = start + len; + user_uintptr_t ret = 0; + VMA_ITERATOR(vmi, mm, start); + + if (!reserv_is_supported(mm)) + return start; + if (end < start) + return -EINVAL; + + /* Check if the reservation range is representable and throw error if not */ + if (start & ~cheri_representable_alignment_mask(len) || + len != cheri_representable_length(len) || + start & ~PAGE_MASK || len % PAGE_SIZE) { + printk(KERN_WARNING "Reservation range (0x%lx)-(0x%lx) is not representable\n", + start, start + len - 1); + return -ERESERVATION; + } + if (!locked && mmap_write_lock_killable(mm)) + return -EINTR; + + for_each_vma_range(vmi, vma, end) { + WRITE_ONCE(vma->reserv_data.start, start); + WRITE_ONCE(vma->reserv_data.len, len); + WRITE_ONCE(vma->reserv_data.perms, perms); + } + if (!locked) + mmap_write_unlock(current->mm); + ret = (user_uintptr_t)uaddr_to_user_ptr_safe(start); + + return ret; +} + +int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + int ret = -ENOMEM; + + if (!reserv_is_supported(mm)) + return 0; + if (!locked && mmap_read_lock_killable(mm)) + return -EINTR; + + start = untagged_addr(start); + start = round_down(start, PAGE_SIZE); + len = round_up(len, PAGE_SIZE); + vma_iter_set(vmi, start); + /* Try walking the given range */ + vma = mas_find(&vmi->mas, start + len - 1); + if (!vma) + goto out; + + /* If the range is fully mapped then no gap exists */ + if (mas_empty_area(&vmi->mas, start, start + len - 1, 1)) + goto out; + ret = 0; +out: + if (!locked) + mmap_read_unlock(mm); + return ret; +} + +bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, bool locked) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + ptraddr_t cap_start = untagged_addr(cheri_base_get(cap)); + ptraddr_t cap_end = cap_start + cheri_length_get(cap); + bool ret = false; + + if (!reserv_is_supported(mm)) + return true; + if (!locked && mmap_read_lock_killable(mm)) + return false; + + /* Check if there is match with the existing reservations */ + vma_iter_set(vmi, cap_start); + vma = mas_find(&vmi->mas, cap_end); + if (!vma) + goto out; + + if (vma->reserv_data.start <= cap_start && + vma->reserv_data.start + vma->reserv_data.len >= cap_end) + ret = true; +out: + if (!locked) + mmap_read_unlock(mm); + + return ret; +} + +bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap) +{ + ptraddr_t start = untagged_addr(cheri_base_get(cap)); + + if (!reserv_is_supported(vma->vm_mm)) + return true; + + /* Check if there is match with the existing reservations */ + if (vma->reserv_data.start <= start && + vma->reserv_data.start + vma->reserv_data.len >= start + cheri_length_get(cap)) + return true; + + return false; +} + +bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, size_t len) +{ + if (!reserv_is_supported(vma->vm_mm)) + return true; + + /* Check if there is match with the existing reservations */ + if (vma->reserv_data.start <= start && + vma->reserv_data.start + vma->reserv_data.len >= start + len) + return true; + + return false; +}