Hi All,
This patch series introduces the mm reservation interface to manage the owning capability of the allocated addresses. This series adds reservation details in the VMA structure and different capability/reservation constraint checks. Looking for feedback regarding API names, directory structure etc.
Details about several rules implemented can be found in PCuABI spec here [1].
This series is based on tree [2].
Changes in this v3 as compared with v2(Based on suggestions from Kevin):
1) Implemented all suggestion from Kevin on V2 version. However, suggestion to use (unsigned long __user *) instead of user_uintptr_t was not done due to not much benefits and lack of time.
2) vm_mmap() now takes address as unsigned long instead of user_uintptr_t.
3) get_unmapped_area() now checks if address range falling within reservation in case of MAP_FIXED address.
4) New function check_pcuabi_params() added to check parameters sanity in mmap and mremap syscalls.
5) Several fixes and cleanups.
6) Patch 3, 24 and 25 are new patches.
Testing:
1) All tests by Chaitanya in v8 selftests [3] passes. 2) Purecap/Compat Busybox boot passes after adding [WIP] patches present in [4].
The whole series can be found here [4].
[1]: https://git.morello-project.org/morello/kernel/linux/-/wikis/Morello-pure-ca... [2]: https://git.morello-project.org/morello/kernel/linux morello/next [3]: https://git.morello-project.org/chaitanya_prakash/linux.git review/purecap_mmap_testcases_v8 [4]: https://git.morello-project.org/amitdaniel/linux.git review/purecap_mm_reservation_v3
Thanks, Amit Daniel
Amit Daniel Kachhap (25): uapi: errno.h: Introduce PCuABI memory reservation error linux/sched/coredump.h: Add MMF_PCUABI_RESERV mm flag linux/user_ptr.h: Add a typedef user_ptr_perms_t mm/cap_addr_mgmt: Add capability reservation interfaces in VMA linux/user_ptr.h: Add two helpers to operate on user pointers lib/user_ptr: Add helpers to be used by mm syscalls mm/mmap: Modify get free unmapped address space management code mm/(mmap,mremap): Add PCuABI reservation during VMA operation mm/mmap: Add reservation constraints in mmap/munmap parameters mm/mremap: Add reservation constraints in mremap parameters mm/mprotect: Add the PCuABI reservation constraints mm/madvise: Add the PCuABI reservation constraints mm/mlock: Add the PCuABI reservation constraints mm/msync: Add the PCuABI reservation constraints mm/mmap: Disable MAP_GROWSDOWN mapping flag for PCuABI uapi: mman-common.h: Macros for maximum capability permissions lib/user_ptr: Add user pointer permission helpers for PCuABI arm64: user_ptr: Implement morello capability permission helpers mm/mmap: Add capability permission constraints for PCuABI mm/mremap: Add capability permission constraints for PCuABI mm/mprotect: Add capability permission constraints for PCuABI mm/mincore: Add PCuABI reservation/capability constraints fs/binfmt_elf: Add PCuABI reservation constraints ipc/shm: Add the PCuABI reservation constraints arm64: vDSO: Add appropriate capability bounds
Documentation/core-api/user_ptr.rst | 28 +++ arch/Kconfig | 3 + arch/arm64/Kconfig | 1 + arch/arm64/include/asm/elf.h | 5 +- arch/arm64/include/asm/mmu.h | 2 +- arch/arm64/include/asm/user_ptr.h | 34 ++++ arch/arm64/kernel/signal.c | 2 +- arch/arm64/kernel/vdso.c | 29 +++- fs/binfmt_elf.c | 78 ++++++--- include/linux/cap_addr_mgmt.h | 227 +++++++++++++++++++++++++ include/linux/mm.h | 16 +- include/linux/mm_types.h | 9 + include/linux/sched/coredump.h | 2 + include/linux/shm.h | 4 +- include/linux/user_ptr.h | 101 +++++++++++ include/uapi/asm-generic/errno.h | 2 + include/uapi/asm-generic/mman-common.h | 6 + io_uring/advise.c | 2 +- ipc/shm.c | 27 +-- kernel/fork.c | 3 + lib/user_ptr.c | 74 ++++++++ mm/Makefile | 1 + mm/cap_addr_mgmt.c | 150 ++++++++++++++++ mm/damon/vaddr.c | 2 +- mm/internal.h | 2 +- mm/madvise.c | 27 ++- mm/mincore.c | 46 ++++- mm/mlock.c | 36 +++- mm/mmap.c | 188 +++++++++++++++++--- mm/mprotect.c | 26 ++- mm/mremap.c | 98 ++++++++--- mm/msync.c | 13 +- mm/util.c | 9 +- 33 files changed, 1119 insertions(+), 134 deletions(-) create mode 100644 arch/arm64/include/asm/user_ptr.h create mode 100644 include/linux/cap_addr_mgmt.h create mode 100644 mm/cap_addr_mgmt.c
PCuABI specification introduces this error and is used to denote any error during managing memory reservations.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/uapi/asm-generic/errno.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h index cf9c51ac49f9..4589a3165fe1 100644 --- a/include/uapi/asm-generic/errno.h +++ b/include/uapi/asm-generic/errno.h @@ -120,4 +120,6 @@
#define EHWPOISON 133 /* Memory page has hardware error */
+#define ERESERVATION 192 /* PCuABI memory reservation error */ + #endif
PCuABI specification introduces memory reservation, so add a flag MMF_PCUABI_RESERV to represent such memory mappings. As memory reservations are mm specific, this flag will help to differentiate between purecap and compat process memory mappings.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/sched/coredump.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 02f5090ffea2..87b686ae8b0c 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -92,6 +92,8 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_VM_MERGE_ANY 30 #define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY)
+#define MMF_PCUABI_RESERV 31 /* PCuABI memory reservation feature */ + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ MMF_VM_MERGE_ANY_MASK)
A typedef user_ptr_perms_t is created of type cheri_perms_t in case of PCuABI. Otherwise, this defaults to int in non-PCuABI case. This will assist in using user_ptr_perms_t unconditionally.
Note: This change will make linux/cheri.h to get included everywhere as linux/kernel.h includes linux/user_ptr.h.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/user_ptr.h | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/include/linux/user_ptr.h b/include/linux/user_ptr.h index 685586bc0d89..0e40b9850bf3 100644 --- a/include/linux/user_ptr.h +++ b/include/linux/user_ptr.h @@ -2,6 +2,7 @@ #ifndef _LINUX_USER_PTR_H #define _LINUX_USER_PTR_H
+#include <linux/cheri.h> #include <linux/limits.h> #include <linux/typecheck.h>
@@ -27,6 +28,8 @@
#ifdef CONFIG_CHERI_PURECAP_UABI
+typedef cheri_perms_t user_ptr_perms_t; + /** * uaddr_to_user_ptr() - Convert a user-provided address to a user pointer. * @addr: The address to set the pointer to. @@ -109,6 +112,8 @@ bool check_user_ptr_rw(void __user *ptr, size_t len);
#else /* CONFIG_CHERI_PURECAP_UABI */
+typedef int user_ptr_perms_t; + static inline void __user *uaddr_to_user_ptr(ptraddr_t addr) { return as_user_ptr(addr);
PCuABI needs the address space reservation interfaces to manage the owning capability of the allocated addresses. This interface prevents two unrelated owning capabilities created by the kernel from overlapping.
The reservation interface stores the ranges of different virtual addresses as reservation entries, which is the same as the bound of the capability provided by the kernel to userspace. It also stores the owning capability permissions to manage the future syscall requests for updating permissions.
The reservation interfaces follow a few basic rules:
- Reservations can only be created or destroyed but never expanded or shrunk. Reservations are created when new memory mapping is made outside of an existing reservation. - A single reservation can have many mappings. However, unused regions of the reservation cannot be reused again. - The Reservation start address is aligned to CHERI representable base. - The Reservation length value is aligned to CHERI representable length.
More rules about the address space reservation interface can be found in the PCuABI specification.
This commit introduces API's reserv_vma_set_reserv(), reserv_range_set_reserv(), reserv_vmi_range_mapped(), reserv_vmi_cap_within_reserv(), reserv_vma_cap_within_reserv(), reserv_vma_range_within_reserv(), reserv_is_supported() and reserv_fork(). Here, except reserv_range_set_reserv(), all others involve single VMA. All the above interfaces will be used in different memory management syscalls in subsequent patches.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/cap_addr_mgmt.h | 227 ++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 9 ++ mm/Makefile | 1 + mm/cap_addr_mgmt.c | 150 ++++++++++++++++++++++ 4 files changed, 387 insertions(+) create mode 100644 include/linux/cap_addr_mgmt.h create mode 100644 mm/cap_addr_mgmt.c
diff --git a/include/linux/cap_addr_mgmt.h b/include/linux/cap_addr_mgmt.h new file mode 100644 index 000000000000..015d9f0f77eb --- /dev/null +++ b/include/linux/cap_addr_mgmt.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_CAP_ADDR_MGMT_H +#define _LINUX_CAP_ADDR_MGMT_H + +#include <linux/cheri.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/mm_types.h> +#include <linux/sched/coredump.h> +#include <linux/types.h> +#include <linux/user_ptr.h> + +#ifdef CONFIG_CHERI_PURECAP_UABI +#define reserv_representable_alignment(len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? (PAGE_MASK & ~cheri_representable_alignment_mask(len)) : 0) + +#define reserv_representable_base(base, len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? (base & cheri_representable_alignment_mask(len)) : base) + +#define reserv_representable_length(len) \ + (test_bit(MMF_PCUABI_RESERV, ¤t->mm->flags) \ + ? cheri_representable_length(len) : len) + +#define reserv_vma_reserv_start(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.start : vma->vm_start) + +#define reserv_vma_reserv_len(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.len : (vma->vm_end - vma->vm_start)) + +#define reserv_vma_reserv_perms(vma) \ + (test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data.perms : 0) + +#define reserv_vma_reserv_info(vma) \ +({ \ + struct reserv_struct __tmp = {0}; \ + test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \ + ? vma->reserv_data : __tmp; \ +}) + +/** + * reserv_vma_set_reserv() - Sets the reservation details in the VMA for the + * virtual address range from start to (start + len) with perms permission as + * the entry. The start address are stored as CHERI representable base and the + * length as CHERI representable length. They are expected to not interfere + * with the successive VMA. This function should be called with mmap_lock + * held. + * @vma: The VMA pointer to insert the reservation entry. + * @start: Reservation start value. + * @len: Reservation length. + * @perms: Capability permission for the reserved range. + * + * Return: 0 if reservation entry added successfully or negative errorcode + * otherwise. + */ +int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perms); + +/** + * reserv_range_set_reserv() - Sets the reservation details across the VMA's + * for the virtual address range from start to (start + len) with the perms + * permission as the entry. The start address is expected to be CHERI + * representable base and the length to be CHERI representable length. + * This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @start: Reservation start value. + * @len: Reservation length. + * @perms: Capability permission for the reserved range. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: valid capability with bounded range and requested permission or + * negative error code otherwise. + */ +user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, + user_ptr_perms_t perms, bool locked); + +/** + * reserv_vmi_range_mapped() - Searches the reservation interface for + * the virtual address range from start to (start + len). This is useful to + * find if the requested range maps completely and there is no fragmentation. + * This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @vmi: The VMA iterator pointing at the VMA. + * @start: Virtual address start value. + * @len: Virtual address length. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: 0 if the VMA mapping matches fully with the given range or negative + * error code otherwise. + */ +int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked); + +/** + * reserv_vmi_cap_within_reserv() - Searches and matches the input VMI for the + * for the capability bound values falling within the reserved virtual address + * range. This function internally uses mmap_lock to synchronize the VMA updates + * if mmap_lock is not already held. + * @vmi: The VMA iterator pointing at the VMA. + * @cap: Reservation capability value. + * @locked: Flag to indicate if mmap_lock is already held. + * + * Return: True if the input capability bound values within the reserved virtual + * address range or false otherwise. + */ +bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, + bool locked); + +/** + * reserv_vma_cap_within_reserv() - Searches and matches the input VMA for the + * capability bound values falling within the reserved virtual address range. + * This function should be called with mmap_lock held. + * @vma: The VMA pointer. + * @cap: Reservation capability value. + * + * Return: True if the input capability bound values within the reserved virtual + * address range or false otherwise. + */ +bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap); + +/** + * reserv_vma_range_within_reserv() - Searches and matches the input VMA for the input + * address range falling within the reserved virtual address range. This function + * should be called with mmap_lock held. + * @vma: The VMA pointer. + * @start: Virtual address start value. + * @len: Virtual address length. + * + * Return: True if the input address range within the reserved virtual address + * range or false otherwise. + */ +bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, size_t len); + +/** + * reserv_is_supported() - Checks if the reservation property exists for the mm. + * @mm: The mm pointer. + * + * Return: True if mm has the reservation property set or false otherwise. + */ +static inline bool reserv_is_supported(struct mm_struct *mm) +{ + return test_bit(MMF_PCUABI_RESERV, &mm->flags); +} + +/** + * reserv_fork() - Checks and copies the MMF_PCUABI_RESERV bit in the new mm during fork. + * @mm: New mm pointer. + * @oldmm: Old mm pointer. + * + * Return: None. + */ +static inline void reserv_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + if (test_bit(MMF_PCUABI_RESERV, &oldmm->flags)) + set_bit(MMF_PCUABI_RESERV, &mm->flags); +} + +#else /* CONFIG_CHERI_PURECAP_UABI */ + +#define reserv_representable_alignment(len) 0 + +#define reserv_representable_base(base, len) base + +#define reserv_representable_length(len) len + +#define reserv_vma_reserv_start(vma) vma->vm_start + +#define reserv_vma_reserv_len(vma) (vma->vm_end - vma->vm_start) + +#define reserv_vma_reserv_perms(vma) 0 + +#define reserv_vma_reserv_info(vma) \ +({ \ + struct reserv_struct __tmp = {0}; \ + __tmp; \ +}) + +static inline int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perms) +{ + return 0; +} + +static inline user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, + user_ptr_perms_t perms, bool locked) +{ + return (user_uintptr_t)start; +} + +static inline int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked) +{ + return 0; +} + +static inline bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, + bool locked) +{ + return true; +} + +static inline bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap) +{ + return true; +} + +static inline bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len) +{ + return true; +} + +static inline bool reserv_is_supported(struct mm_struct *mm) +{ + return false; +} + +static inline void reserv_fork(struct mm_struct *mm, struct mm_struct *oldmm) {} + +#endif /* CONFIG_CHERI_PURECAP_UABI */ + +#endif /* _LINUX_CAP_ADDR_MGMT_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 774bd7d6ad60..25cbbe18f5b8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -607,6 +607,12 @@ struct vma_numab_state { int prev_scan_seq; };
+struct reserv_struct { + ptraddr_t start; + size_t len; + user_ptr_perms_t perms; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -711,6 +717,9 @@ struct vm_area_struct { struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef CONFIG_CHERI_PURECAP_UABI + struct reserv_struct reserv_data; +#endif } __randomize_layout;
#ifdef CONFIG_NUMA diff --git a/mm/Makefile b/mm/Makefile index 33873c8aedb3..780befc2500f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -41,6 +41,7 @@ mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ msync.o page_vma_mapped.o pagewalk.o \ pgtable-generic.o rmap.o vmalloc.o
+mmu-$(CONFIG_CHERI_PURECAP_UABI) += cap_addr_mgmt.o
ifdef CONFIG_CROSS_MEMORY_ATTACH mmu-$(CONFIG_MMU) += process_vm_access.o diff --git a/mm/cap_addr_mgmt.c b/mm/cap_addr_mgmt.c new file mode 100644 index 000000000000..a8d41c7a5fbb --- /dev/null +++ b/mm/cap_addr_mgmt.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bug.h> +#include <linux/cap_addr_mgmt.h> +#include <linux/cheri.h> +#include <linux/mm.h> +#include <linux/slab.h> + +int reserv_vma_set_reserv(struct vm_area_struct *vma, ptraddr_t start, + size_t len, user_ptr_perms_t perms) +{ + if (!reserv_is_supported(vma->vm_mm)) + return 0; + if (start + len < start) + return -EINVAL; + /* Reservation base/length is expected as page aligned */ + VM_BUG_ON(start & ~PAGE_MASK || len % PAGE_SIZE); + + vma->reserv_data.start = start & cheri_representable_alignment_mask(len); + vma->reserv_data.len = cheri_representable_length(len); + if (perms) + vma->reserv_data.perms = perms; + + return 0; +} + +user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, user_ptr_perms_t perms, + bool locked) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + ptraddr_t end = start + len; + user_uintptr_t ret = 0; + VMA_ITERATOR(vmi, mm, start); + + if (!reserv_is_supported(mm)) + return start; + if (end < start) + return -EINVAL; + + /* Check if the reservation range is representable and throw error if not */ + if (start & ~cheri_representable_alignment_mask(len) || + len != cheri_representable_length(len) || + start & ~PAGE_MASK || len % PAGE_SIZE) { + printk(KERN_WARNING "Reservation range (0x%lx)-(0x%lx) is not representable\n", + start, start + len - 1); + return -ERESERVATION; + } + if (!locked && mmap_write_lock_killable(mm)) + return -EINTR; + + for_each_vma_range(vmi, vma, end) { + WRITE_ONCE(vma->reserv_data.start, start); + WRITE_ONCE(vma->reserv_data.len, len); + WRITE_ONCE(vma->reserv_data.perms, perms); + } + if (!locked) + mmap_write_unlock(current->mm); + ret = (user_uintptr_t)uaddr_to_user_ptr_safe(start); + + return ret; +} + +int reserv_vmi_range_mapped(struct vma_iterator *vmi, ptraddr_t start, + size_t len, bool locked) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + int ret = -ENOMEM; + + if (!reserv_is_supported(mm)) + return 0; + if (!locked && mmap_read_lock_killable(mm)) + return -EINTR; + + start = untagged_addr(start); + start = round_down(start, PAGE_SIZE); + len = round_up(len, PAGE_SIZE); + vma_iter_set(vmi, start); + /* Try walking the given range */ + vma = mas_find(&vmi->mas, start + len - 1); + if (!vma) + goto out; + + /* If the range is fully mapped then no gap exists */ + if (mas_empty_area(&vmi->mas, start, start + len - 1, 1)) + goto out; + ret = 0; +out: + if (!locked) + mmap_read_unlock(mm); + return ret; +} + +bool reserv_vmi_cap_within_reserv(struct vma_iterator *vmi, user_uintptr_t cap, bool locked) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + ptraddr_t cap_start = untagged_addr(cheri_base_get(cap)); + ptraddr_t cap_end = cap_start + cheri_length_get(cap); + bool ret = false; + + if (!reserv_is_supported(mm)) + return true; + if (!locked && mmap_read_lock_killable(mm)) + return false; + + /* Check if there is match with the existing reservations */ + vma_iter_set(vmi, cap_start); + vma = mas_find(&vmi->mas, cap_end); + if (!vma) + goto out; + + if (vma->reserv_data.start <= cap_start && + vma->reserv_data.start + vma->reserv_data.len >= cap_end) + ret = true; +out: + if (!locked) + mmap_read_unlock(mm); + + return ret; +} + +bool reserv_vma_cap_within_reserv(struct vm_area_struct *vma, user_uintptr_t cap) +{ + ptraddr_t start = untagged_addr(cheri_base_get(cap)); + + if (!reserv_is_supported(vma->vm_mm)) + return true; + + /* Check if there is match with the existing reservations */ + if (vma->reserv_data.start <= start && + vma->reserv_data.start + vma->reserv_data.len >= start + cheri_length_get(cap)) + return true; + + return false; +} + +bool reserv_vma_range_within_reserv(struct vm_area_struct *vma, ptraddr_t start, size_t len) +{ + if (!reserv_is_supported(vma->vm_mm)) + return true; + + /* Check if there is match with the existing reservations */ + if (vma->reserv_data.start <= start && + vma->reserv_data.start + vma->reserv_data.len >= start + len) + return true; + + return false; +}
On 15/04/2024 15:21, Amit Daniel Kachhap wrote:
+#define reserv_vma_reserv_info(vma) \ +({ \
- struct reserv_struct __tmp = {0}; \
- test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \
? vma->reserv_data : __tmp; \
reserv_data is always zero-initialised, so I think we can return vma->reserv_data unconditionally. Besides calls to this function are always guarded by reserv_is_supported().
Kevin
+})
On 4/17/24 12:59, Kevin Brodsky wrote:
On 15/04/2024 15:21, Amit Daniel Kachhap wrote:
+#define reserv_vma_reserv_info(vma) \ +({ \
- struct reserv_struct __tmp = {0}; \
- test_bit(MMF_PCUABI_RESERV, &vma->vm_mm->flags) \
? vma->reserv_data : __tmp; \
reserv_data is always zero-initialised, so I think we can return vma->reserv_data unconditionally. Besides calls to this function are always guarded by reserv_is_supported().
I agree with your suggestion here.
Amit
Kevin
+})
Add user_ptr_is_valid() and user_ptr_set_addr() helpers to operate on user pointers in different situations in subsequent commits.
* user_ptr_is_valid() validates the user pointer by fetching the tag.
* user_ptr_set_addr() sets the address field of the user pointer.
Both of the above helpers use CHERI compiler builtins for PCuABI case.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- Documentation/core-api/user_ptr.rst | 12 +++++++++++ include/linux/user_ptr.h | 33 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+)
diff --git a/Documentation/core-api/user_ptr.rst b/Documentation/core-api/user_ptr.rst index 9db5e9271578..1427c4701af8 100644 --- a/Documentation/core-api/user_ptr.rst +++ b/Documentation/core-api/user_ptr.rst @@ -222,6 +222,18 @@ equal without being identical. To check whether two user pointers are truly identical, ``user_ptr_is_same(p1, p2)`` (``<linux/user_ptr.h>``) should be used.
+Validity +---------- + +To check whether a user pointer is valid, +``user_ptr_is_valid(p)`` (``<linux/user_ptr.h>``) should be used. + +Setting the address +------------------- + +To set the address field of the user pointers, +``user_ptr_set_addr(p)`` (``<linux/user_ptr.h>``) should be used. + Alignment ---------
diff --git a/include/linux/user_ptr.h b/include/linux/user_ptr.h index 0e40b9850bf3..85137f0fc23e 100644 --- a/include/linux/user_ptr.h +++ b/include/linux/user_ptr.h @@ -226,4 +226,37 @@ static inline bool user_ptr_is_same(const void __user *p1, const void __user *p2 #endif }
+/** + * user_ptr_is_valid() - Checks if the user pointer is valid. + * @ptr: The user pointer to check. + * + * Return: true if @ptr is valid. + * + * This function returns the tag of user pointer @ptr. + */ +static inline bool user_ptr_is_valid(const void __user *ptr) +{ +#ifdef CONFIG_CHERI_PURECAP_UABI + return __builtin_cheri_tag_get(ptr); +#else + return 0; +#endif +} + +/** + * user_ptr_set_addr() - Sets the address of the user pointer. + * @ptr: The user pointer to set address. + * @addr: The address to set the pointer to. + * + * Return: A user pointer with its address set to @addr. + */ +static inline void __user *user_ptr_set_addr(void __user *ptr, ptraddr_t addr) +{ +#ifdef CONFIG_CHERI_PURECAP_UABI + return __builtin_cheri_address_set(ptr, addr); +#else + return as_user_ptr(addr); +#endif +} + #endif /* _LINUX_USER_PTR_H */
Helper functions check_user_ptr_owning(), make_user_ptr_owning() and user_ptr_owning_perms_from_prot() are added to manage owning capability constraints as per PCuABI specifications. These helpers will be mostly used by memory management syscalls to apply the different capability constraints.
* check_user_ptr_owning() checks if the capability owns the input range after page aligning them and has the CHERI_PERM_SW_VMEM owning bit set.
* make_user_ptr_owning() creates the relevant owning capability from the input range and permissions. The input range is first page aligned and then CHERI representable aligned.
Both of these functions are implemented on top of the cheri_* helpers in linux/cheri.h.
* user_ptr_owning_perms_from_prot() converts memory mapping protections to capability permissions.
Note: These helper functions currently check only capability bounds and not capability permission constraints and full support will be incrementally added in subsequent commits.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- Documentation/core-api/user_ptr.rst | 15 +++++++++ include/linux/user_ptr.h | 47 +++++++++++++++++++++++++++++ lib/user_ptr.c | 30 ++++++++++++++++++ mm/cap_addr_mgmt.c | 2 +- 4 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/Documentation/core-api/user_ptr.rst b/Documentation/core-api/user_ptr.rst index 1427c4701af8..0ad6e14e14c4 100644 --- a/Documentation/core-api/user_ptr.rst +++ b/Documentation/core-api/user_ptr.rst @@ -345,3 +345,18 @@ accidentally providing capabilities to userspace in PCuABI. | routines suffixed with ``with_captags``. See ``<linux/uaccess.h>`` | | for details. | +-----------------------------------------------------------------------+ + +Managing user pointers by mm subsystem +====================================== + +The user pointers created by the Linux mm subsystem are referred to as the +owning capability in PCuABI and have the owning bit CHERI_PERM_SW_VMEM set +as the permission. Also, the user pointers and memory length managed in mm +subsystem are page aligned or sometimes CHERI representable aligned. Below, +APIs consider those requirements while creating and checking user pointers. + +* ``check_user_ptr_owning(ptr, addr, n)`` +* ``make_user_ptr_owning(addr, n, perm)`` +* ``user_ptr_owning_perms_from_prot(prot, tag_perm)`` + +See ``<linux/user_ptr.h>`` for details on how to use them. diff --git a/include/linux/user_ptr.h b/include/linux/user_ptr.h index 85137f0fc23e..41ab156653c7 100644 --- a/include/linux/user_ptr.h +++ b/include/linux/user_ptr.h @@ -110,6 +110,38 @@ bool check_user_ptr_read(const void __user *ptr, size_t len); bool check_user_ptr_write(void __user *ptr, size_t len); bool check_user_ptr_rw(void __user *ptr, size_t len);
+/** + * check_user_ptr_owning() - Check if the address range is within the valid + * user pointer capability bound. + * @user_ptr: User pointer. + * @addr: Address start value. + * @len: Address length. + * + * Return: True if address within the capability bound or false otherwise. + */ +bool check_user_ptr_owning(user_uintptr_t user_ptr, ptraddr_t addr, size_t len); + +/** + * make_user_ptr_owning() - Creates a userspace capability from the + * requested base address, length and memory permission flags. + * @addr: Requested capability address. + * @len: Requested capability length. + * @perm: Requested capability permission flags. + * + * Return: A new capability derived from cheri_user_root_cap. + */ +user_uintptr_t make_user_ptr_owning(ptraddr_t addr, size_t len, user_ptr_perms_t perm); + +/** + * user_ptr_owning_perms_from_prot() - Converts memory mapping protection flags to + * capability permission flags. + * @prot: Memory protection flags. + * @has_tag_access: Capability permissions to have tag check flags. + * + * Return: Capability permission flags + */ +user_ptr_perms_t user_ptr_owning_perms_from_prot(int prot, bool has_tag_access); + #else /* CONFIG_CHERI_PURECAP_UABI */
typedef int user_ptr_perms_t; @@ -150,6 +182,21 @@ static inline bool check_user_ptr_rw(void __user *ptr, size_t len) return true; }
+static inline bool check_user_ptr_owning(user_uintptr_t user_ptr, ptraddr_t addr, size_t len) +{ + return true; +} + +static inline user_uintptr_t make_user_ptr_owning(ptraddr_t addr, size_t len, user_ptr_perms_t perm) +{ + return addr; +} + +static inline user_ptr_perms_t user_ptr_owning_perms_from_prot(int prot, bool has_tag_access) +{ + return 0; +} + #endif /* CONFIG_CHERI_PURECAP_UABI */
/** diff --git a/lib/user_ptr.c b/lib/user_ptr.c index 115efc9fe678..2ef58193fdad 100644 --- a/lib/user_ptr.c +++ b/lib/user_ptr.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/bug.h> +#include <linux/cap_addr_mgmt.h> #include <linux/cheri.h> +#include <linux/sched.h> #include <linux/user_ptr.h>
void __user *uaddr_to_user_ptr(ptraddr_t addr) @@ -70,3 +72,31 @@ bool check_user_ptr_rw(void __user *ptr, size_t len) { return cheri_check_cap(ptr, len, CHERI_PERM_LOAD | CHERI_PERM_STORE); } + +bool check_user_ptr_owning(user_uintptr_t user_ptr, ptraddr_t addr, size_t len) +{ + addr = round_down(addr, PAGE_SIZE); + len = round_up(len, PAGE_SIZE); + + return cheri_check_cap((const void * __capability)cheri_address_set(user_ptr, addr), + len, CHERI_PERM_GLOBAL | CHERI_PERM_SW_VMEM); +} + +user_uintptr_t make_user_ptr_owning(ptraddr_t addr, size_t len, user_ptr_perms_t perm) +{ + ptraddr_t align_addr; + user_uintptr_t user_ptr; + + align_addr = reserv_representable_base(round_down(addr, PAGE_SIZE), len); + len = cheri_representable_length(round_up(len, PAGE_SIZE)); + user_ptr = (user_uintptr_t)cheri_build_user_cap(align_addr, len, perm); + + return cheri_address_set(user_ptr, addr); +} + +user_ptr_perms_t user_ptr_owning_perms_from_prot(int prot, bool has_tag_access) +{ + /* TODO [PCuABI] - capability permission conversion from memory permission */ + return (CHERI_PERMS_READ | CHERI_PERMS_WRITE | + CHERI_PERMS_EXEC | CHERI_PERMS_ROOTCAP); +} diff --git a/mm/cap_addr_mgmt.c b/mm/cap_addr_mgmt.c index a8d41c7a5fbb..845e4a99556e 100644 --- a/mm/cap_addr_mgmt.c +++ b/mm/cap_addr_mgmt.c @@ -56,7 +56,7 @@ user_uintptr_t reserv_range_set_reserv(ptraddr_t start, size_t len, user_ptr_per } if (!locked) mmap_write_unlock(current->mm); - ret = (user_uintptr_t)uaddr_to_user_ptr_safe(start); + ret = make_user_ptr_owning(start, len, perms);
return ret; }
In CHERI architecture, all the ranges cannot be represented in capability so add the necessary CHERI base and length alignment checks when generating the free unmapped virtual address or evaluating the fixed input address.
The PCuABI reservation interface stores the unusable alignment gaps at the start and end. These gaps should be considered when finding the free unmapped address space.
In the case of fixed type addresses, the requested address range should completely reside within the reservation range or not overlap with any existing reservation range.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/mm.h | 5 ++-- mm/mmap.c | 71 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 59 insertions(+), 17 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index ce2501062292..73dc5ca47b55 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -30,6 +30,7 @@ #include <linux/kasan.h> #include <linux/memremap.h> #include <linux/slab.h> +#include <linux/cap_addr_mgmt.h>
struct mempolicy; struct anon_vma; @@ -3470,7 +3471,7 @@ static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) static inline unsigned long vm_start_gap(struct vm_area_struct *vma) { unsigned long gap = stack_guard_start_gap(vma); - unsigned long vm_start = vma->vm_start; + unsigned long vm_start = reserv_vma_reserv_start(vma);
vm_start -= gap; if (vm_start > vma->vm_start) @@ -3480,7 +3481,7 @@ static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
static inline unsigned long vm_end_gap(struct vm_area_struct *vma) { - unsigned long vm_end = vma->vm_end; + unsigned long vm_end = reserv_vma_reserv_start(vma) + reserv_vma_reserv_len(vma);
if (vma->vm_flags & VM_GROWSUP) { vm_end += stack_guard_gap; diff --git a/mm/mmap.c b/mm/mmap.c index bec26ad4fdb0..64e64ab5e819 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -48,6 +48,7 @@ #include <linux/sched/mm.h> #include <linux/ksm.h>
+#include <linux/cap_addr_mgmt.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/tlb.h> @@ -1655,7 +1656,7 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) } else { tmp = mas_prev(&mas, 0); if (tmp && vm_end_gap(tmp) > gap) { - high_limit = tmp->vm_start; + high_limit = reserv_vma_reserv_start(tmp); mas_reset(&mas); goto retry; } @@ -1706,27 +1707,47 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma, *prev; struct vm_unmapped_area_info info; const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); + unsigned long align_len; + unsigned long align_addr;
- if (len > mmap_end - mmap_min_addr) + align_len = reserv_representable_length(len); + if (align_len > mmap_end - mmap_min_addr) return -ENOMEM;
- if (flags & MAP_FIXED) + /* + * In case of PCuABI reservation, fixed should not overlap with any + * existing reservation or completely contained inside the reservation. + * Let this scenario fallthrough below for such checks. + */ + if ((flags & MAP_FIXED) && !reserv_is_supported(mm)) return addr;
if (addr) { addr = PAGE_ALIGN(addr); + /* + * Here CHERI representable address is aligned down as reservation + * layer holds this unusable aligned down gap. + */ + align_addr = reserv_representable_base(addr, len); vma = find_vma_prev(mm, addr, &prev); - if (mmap_end - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vm_start_gap(vma)) && - (!prev || addr >= vm_end_gap(prev))) + if (mmap_end - align_len >= align_addr && align_addr >= mmap_min_addr && + (!vma || align_addr + align_len <= vm_start_gap(vma)) && + (!prev || align_addr >= vm_end_gap(prev))) return addr; + else if (flags & MAP_FIXED) { + if ((vma && reserv_vma_range_within_reserv(vma, align_addr, align_len)) || + (prev && reserv_vma_range_within_reserv(prev, align_addr, align_len))) + return addr; + return -ERESERVATION; + } }
info.flags = 0; - info.length = len; + info.length = align_len; info.low_limit = mm->mmap_base; info.high_limit = mmap_end; info.align_mask = 0; + info.align_mask = reserv_representable_alignment(len); info.align_offset = 0; return vm_unmapped_area(&info); } @@ -1754,29 +1775,49 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_unmapped_area_info info; const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); + unsigned long align_len; + unsigned long align_addr;
+ align_len = reserv_representable_length(len); /* requested length too big for entire address space */ - if (len > mmap_end - mmap_min_addr) + if (align_len > mmap_end - mmap_min_addr) return -ENOMEM; - - if (flags & MAP_FIXED) + /* + * In case of PCuABI reservation, fixed should not overlap with any + * existing reservation or completely contained inside the reservation. + * Let this scenario fallthrough below for such checks. + */ + if ((flags & MAP_FIXED) && !reserv_is_supported(mm)) return addr;
/* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); - vma = find_vma_prev(mm, addr, &prev); - if (mmap_end - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vm_start_gap(vma)) && - (!prev || addr >= vm_end_gap(prev))) + /* + * Here CHERI representable address is aligned down as reservation + * layer holds this unusable aligned down gap. + */ + align_addr = reserv_representable_base(addr, len); + vma = find_vma_prev(mm, align_addr, &prev); + if (mmap_end - align_len >= align_addr && align_addr >= mmap_min_addr && + (!vma || align_addr + align_len <= vm_start_gap(vma)) && + (!prev || align_addr >= vm_end_gap(prev))) return addr; + else if (flags & MAP_FIXED) { + if ((vma && reserv_vma_range_within_reserv(vma, align_addr, align_len)) || + (prev && reserv_vma_range_within_reserv(prev, align_addr, align_len))) { + return addr; + } + return -ERESERVATION; + } }
info.flags = VM_UNMAPPED_AREA_TOPDOWN; - info.length = len; + info.length = align_len; info.low_limit = PAGE_SIZE; info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); info.align_mask = 0; + info.align_mask = reserv_representable_alignment(len); info.align_offset = 0; addr = vm_unmapped_area(&info);
On 15/04/2024 15:21, Amit Daniel Kachhap wrote:
@@ -1706,27 +1707,47 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma, *prev; struct vm_unmapped_area_info info; const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
- unsigned long align_len;
- unsigned long align_addr;
- if (len > mmap_end - mmap_min_addr)
- align_len = reserv_representable_length(len);
- if (align_len > mmap_end - mmap_min_addr) return -ENOMEM;
- if (flags & MAP_FIXED)
- /*
* In case of PCuABI reservation, fixed should not overlap with any
* existing reservation or completely contained inside the reservation.
* Let this scenario fallthrough below for such checks.
*/
- if ((flags & MAP_FIXED) && !reserv_is_supported(mm)) return addr;
if (addr) {
There is a corner case where MAP_FIXED is passed and addr is null. It doesn't look like this is forbidden in theory. At the moment this will result in MAP_FIXED being ignored. I can't think of a particularly elegant way to handle this, we could add || (flags & MAP_FIXED) in this if.
addr = PAGE_ALIGN(addr);
/*
* Here CHERI representable address is aligned down as reservation
* layer holds this unusable aligned down gap.
*/
vma = find_vma_prev(mm, addr, &prev);align_addr = reserv_representable_base(addr, len);
if (mmap_end - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)) &&
(!prev || addr >= vm_end_gap(prev)))
if (mmap_end - align_len >= align_addr && align_addr >= mmap_min_addr &&
(!vma || align_addr + align_len <= vm_start_gap(vma)) &&
(!prev || align_addr >= vm_end_gap(prev))) return addr;
else if (flags & MAP_FIXED) {
if ((vma && reserv_vma_range_within_reserv(vma, align_addr, align_len)) ||
(prev && reserv_vma_range_within_reserv(prev, align_addr, align_len)))
If the condition holds, no reservation is created, we just want to check that the mapping bounds fit in either reservation. For that reason it makes more sense to use the unaligned addr and len here (it should make no difference in practice).
Kevin
return addr;
return -ERESERVATION;
}}
info.flags = 0;
- info.length = len;
- info.length = align_len; info.low_limit = mm->mmap_base; info.high_limit = mmap_end; info.align_mask = 0;
- info.align_mask = reserv_representable_alignment(len); info.align_offset = 0; return vm_unmapped_area(&info);
}
On 4/17/24 12:59, Kevin Brodsky wrote:
On 15/04/2024 15:21, Amit Daniel Kachhap wrote:
@@ -1706,27 +1707,47 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma, *prev; struct vm_unmapped_area_info info; const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
- unsigned long align_len;
- unsigned long align_addr;
- if (len > mmap_end - mmap_min_addr)
- align_len = reserv_representable_length(len);
- if (align_len > mmap_end - mmap_min_addr) return -ENOMEM;
- if (flags & MAP_FIXED)
- /*
* In case of PCuABI reservation, fixed should not overlap with any
* existing reservation or completely contained inside the reservation.
* Let this scenario fallthrough below for such checks.
*/
- if ((flags & MAP_FIXED) && !reserv_is_supported(mm)) return addr;
if (addr) {
There is a corner case where MAP_FIXED is passed and addr is null. It doesn't look like this is forbidden in theory. At the moment this will result in MAP_FIXED being ignored. I can't think of a particularly elegant way to handle this, we could add || (flags & MAP_FIXED) in this if.
addr = PAGE_ALIGN(addr);
/*
* Here CHERI representable address is aligned down as reservation
* layer holds this unusable aligned down gap.
*/
vma = find_vma_prev(mm, addr, &prev);align_addr = reserv_representable_base(addr, len);
if (mmap_end - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)) &&
(!prev || addr >= vm_end_gap(prev)))
if (mmap_end - align_len >= align_addr && align_addr >= mmap_min_addr &&
(!vma || align_addr + align_len <= vm_start_gap(vma)) &&
(!prev || align_addr >= vm_end_gap(prev))) return addr;
else if (flags & MAP_FIXED) {
if ((vma && reserv_vma_range_within_reserv(vma, align_addr, align_len)) ||
(prev && reserv_vma_range_within_reserv(prev, align_addr, align_len)))
If the condition holds, no reservation is created, we just want to check that the mapping bounds fit in either reservation. For that reason it makes more sense to use the unaligned addr and len here (it should make no difference in practice).
Both of the above comments make sense.
Amit
Kevin
return addr;
return -ERESERVATION;
}}
info.flags = 0;
- info.length = len;
- info.length = align_len; info.low_limit = mm->mmap_base; info.high_limit = mmap_end; info.align_mask = 0;
- info.align_mask = reserv_representable_alignment(len); info.align_offset = 0; return vm_unmapped_area(&info); }
PCuABI memory reservation requires adding reservation properties while creating and modifying the VMA. reserv_vma_set_reserv() interface is used to update those reservation details. Currently, these properties are added only for mmap/mremap syscalls, and later commits will add them for other special VMA mappings.
PCuABI memory reservation also requires merging or expanding VMAs that only belong to the original reservation. Use suitable reservation interfaces to check those properties before performing such operations on the VMA.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/mm.h | 4 ++-- kernel/fork.c | 3 +++ mm/mmap.c | 44 +++++++++++++++++++++++++++++++++++++------- mm/mremap.c | 23 ++++++++++++++++++----- 4 files changed, 60 insertions(+), 14 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 73dc5ca47b55..6d62e91676cb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3259,7 +3259,7 @@ extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, - bool *need_rmap_locks); + bool *need_rmap_locks, struct reserv_struct *reserv_info); extern void exit_mmap(struct mm_struct *); struct vm_area_struct *vma_modify(struct vma_iterator *vmi, struct vm_area_struct *prev, @@ -3365,7 +3365,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf); + struct list_head *uf, unsigned long prot); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, diff --git a/kernel/fork.c b/kernel/fork.c index a460a65624d7..9ee78c76fd4a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -99,6 +99,7 @@ #include <linux/stackprotector.h> #include <linux/user_events.h> #include <linux/iommu.h> +#include <linux/cap_addr_mgmt.h>
#include <asm/pgalloc.h> #include <linux/uaccess.h> @@ -678,6 +679,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; khugepaged_fork(mm, oldmm);
+ reserv_fork(mm, oldmm); + retval = vma_iter_bulk_alloc(&vmi, oldmm->map_count); if (retval) goto out; diff --git a/mm/mmap.c b/mm/mmap.c index 64e64ab5e819..84e26bb7b203 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -911,7 +911,8 @@ static struct vm_area_struct /* Can we merge the predecessor? */ if (addr == prev->vm_end && mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, anon_vma, file, - pgoff, vm_userfaultfd_ctx, anon_name)) { + pgoff, vm_userfaultfd_ctx, anon_name) + && reserv_vma_range_within_reserv(prev, addr, end - addr)) { merge_prev = true; vma_prev(vmi); } @@ -920,7 +921,8 @@ static struct vm_area_struct /* Can we merge the successor? */ if (next && mpol_equal(policy, vma_policy(next)) && can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx, anon_name)) { + vm_userfaultfd_ctx, anon_name) + && reserv_vma_range_within_reserv(next, addr, end - addr)) { merge_next = true; }
@@ -1382,7 +1384,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags |= VM_NORESERVE; }
- addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); + addr = mmap_region(file, addr, len, vm_flags, pgoff, uf, prot); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) @@ -2792,7 +2794,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf) + struct list_head *uf, unsigned long prot) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; @@ -2802,8 +2804,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long end = addr + len; unsigned long merge_start = addr, merge_end = end; bool writable_file_mapping = false; + struct reserv_struct reserv_info; pgoff_t vm_pgoff; int error; + bool new_reserv = true; + user_ptr_perms_t perms = 0; VMA_ITERATOR(vmi, mm, addr);
/* Check against address space limit. */ @@ -2821,6 +2826,20 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return -ENOMEM; }
+ if (reserv_is_supported(mm)) { + next = find_vma_prev(mm, addr, &prev); + if (next && reserv_vma_range_within_reserv(next, addr, len)) { + reserv_info = reserv_vma_reserv_info(next); + new_reserv = false; + } else if (prev && reserv_vma_range_within_reserv(prev, addr, len)) { + reserv_info = reserv_vma_reserv_info(prev); + new_reserv = false; + } + if (new_reserv) + perms = user_ptr_owning_perms_from_prot(prot, (vm_flags & VM_SHARED) ? + false : true); + } + /* Unmap any existing mapping in the area */ if (do_vmi_munmap(&vmi, mm, addr, len, uf, false)) return -ENOMEM; @@ -2847,7 +2866,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* Check next */ if (next && next->vm_start == end && !vma_policy(next) && can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen, - NULL_VM_UFFD_CTX, NULL)) { + NULL_VM_UFFD_CTX, NULL) && + reserv_vma_range_within_reserv(next, addr, len)) { merge_end = next->vm_end; vma = next; vm_pgoff = next->vm_pgoff - pglen; @@ -2858,7 +2878,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, pgoff, vma->vm_userfaultfd_ctx, NULL) : can_vma_merge_after(prev, vm_flags, NULL, file, pgoff, - NULL_VM_UFFD_CTX, NULL))) { + NULL_VM_UFFD_CTX, NULL)) && + reserv_vma_range_within_reserv(prev, addr, len)) { merge_start = prev->vm_start; vma = prev; vm_pgoff = prev->vm_pgoff; @@ -2894,6 +2915,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vm_flags_init(vma, vm_flags); vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; + if (new_reserv) + reserv_vma_set_reserv(vma, addr, len, perms); + else + reserv_vma_set_reserv(vma, reserv_info.start, reserv_info.len, reserv_info.perms);
if (file) { vma->vm_file = get_file(file); @@ -3439,7 +3464,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) */ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, unsigned long addr, unsigned long len, pgoff_t pgoff, - bool *need_rmap_locks) + bool *need_rmap_locks, struct reserv_struct *reserv_info) { struct vm_area_struct *vma = *vmap; unsigned long vma_start = vma->vm_start; @@ -3491,6 +3516,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; + if (reserv_info) + reserv_vma_set_reserv(new_vma, reserv_info->start, + reserv_info->len, reserv_info->perms); + else + reserv_vma_set_reserv(new_vma, addr, len, 0); if (vma_dup_policy(vma, new_vma)) goto out_free_vma; if (anon_vma_clone(new_vma, vma)) diff --git a/mm/mremap.c b/mm/mremap.c index 515217a95293..1e0dd63c35a6 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -651,7 +651,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, unsigned long new_len, unsigned long new_addr, bool *locked, unsigned long flags, - struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) + struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap, + struct reserv_struct *reserv_info) { long to_account = new_len - old_len; struct mm_struct *mm = vma->vm_mm; @@ -705,7 +706,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, vma_start_write(vma); new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, - &need_rmap_locks); + &need_rmap_locks, reserv_info); if (!new_vma) { if (vm_flags & VM_ACCOUNT) vm_unacct_memory(to_account >> PAGE_SHIFT); @@ -871,9 +872,10 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, struct list_head *uf_unmap) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; unsigned long ret = -EINVAL; unsigned long map_flags = 0; + struct reserv_struct reserv_info, *reserv_ptr = NULL;
if (offset_in_page(new_addr)) goto out; @@ -902,6 +904,17 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if ((mm->map_count + 2) >= sysctl_max_map_count - 3) return -ENOMEM;
+ if (reserv_is_supported(mm)) { + vma = find_vma_prev(mm, new_addr, &prev); + if (vma && reserv_vma_range_within_reserv(vma, new_addr, new_len)) { + reserv_info = reserv_vma_reserv_info(vma); + reserv_ptr = &reserv_info; + } else if (prev && reserv_vma_range_within_reserv(prev, new_addr, new_len)) { + reserv_info = reserv_vma_reserv_info(prev); + reserv_ptr = &reserv_info; + } + } + if (flags & MREMAP_FIXED) { ret = do_munmap(mm, new_addr, new_len, uf_unmap_early); if (ret) @@ -945,7 +958,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, new_addr = ret;
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, - uf_unmap); + uf_unmap, reserv_ptr);
out: return ret; @@ -1160,7 +1173,7 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len }
ret = move_vma(vma, addr, old_len, new_len, new_addr, - &locked, flags, &uf, &uf_unmap); + &locked, flags, &uf, &uf_unmap, NULL); } out: if (offset_in_page(ret))
Use the recently introduced PCuABI reservation interfaces to add different parameter constraints for mmap/munmap syscall. The capability returned by mmap syscall is now bounded and is same as the reservation range. The in-kernel memory mapping vm_mmap() function do not check the constraints on parameters. These reservation checks added do not affect the compat64 code path.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/mm.h | 4 ++++ mm/internal.h | 2 +- mm/mmap.c | 56 ++++++++++++++++++++++++++++++++++++++++++---- mm/util.c | 9 +------- 4 files changed, 58 insertions(+), 13 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 6d62e91676cb..137dbd27db55 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3411,6 +3411,10 @@ struct vm_unmapped_area_info {
extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
+int check_pcuabi_params(user_uintptr_t user_ptr, unsigned long len, + unsigned long flags, bool enforce_cap_validity, + bool enforce_range_mapped, bool reserv_lock); + /* truncate.c */ extern void truncate_inode_pages(struct address_space *, loff_t); extern void truncate_inode_pages_range(struct address_space *, diff --git a/mm/internal.h b/mm/internal.h index 58df037c3824..3a88f1e2ffee 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -861,7 +861,7 @@ extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable;
-extern user_uintptr_t __must_check vm_mmap_pgoff(struct file *, user_uintptr_t, +extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/mm/mmap.c b/mm/mmap.c index 84e26bb7b203..cb069b76d761 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1392,12 +1392,40 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return addr; }
-user_uintptr_t ksys_mmap_pgoff(user_uintptr_t addr, unsigned long len, +int check_pcuabi_params(user_uintptr_t user_ptr, unsigned long len, + unsigned long flags, bool enforce_cap_validity, + bool enforce_range_mapped, bool reserv_lock) +{ + ptraddr_t addr = (ptraddr_t)user_ptr; + int ret = -EINVAL; + VMA_ITERATOR(vmi, current->mm, addr); + + if (!reserv_is_supported(current->mm)) + return 0; + if (!check_user_ptr_owning(user_ptr, addr, len)) { + if (enforce_cap_validity || !user_ptr_is_same((const void __user *)user_ptr, + (const void __user *)(user_uintptr_t)addr)) + return ret; + return 0; + } + if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, reserv_lock)) + return -ERESERVATION; + if (!(flags & MREMAP_FIXED || flags & MAP_FIXED)) + return ret; + if (enforce_range_mapped && !reserv_vmi_range_mapped(&vmi, addr, len, reserv_lock)) + return -ENOMEM; + + return 0; +} + +user_uintptr_t ksys_mmap_pgoff(user_uintptr_t user_ptr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { struct file *file = NULL; - user_uintptr_t retval; + user_uintptr_t retval = -EINVAL; + ptraddr_t addr = (ptraddr_t)user_ptr; + bool new_reserv = true;
if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); @@ -1430,7 +1458,21 @@ user_uintptr_t ksys_mmap_pgoff(user_uintptr_t addr, unsigned long len, return PTR_ERR(file); }
+ retval = check_pcuabi_params(user_ptr, len, flags, false, true, false); + if (retval) + goto out_fput; + if (user_ptr_is_valid((const void __user *)user_ptr)) + new_reserv = true; + retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); + if (!IS_ERR_VALUE(retval) && reserv_is_supported(current->mm)) { + if (new_reserv) + retval = make_user_ptr_owning(retval, len, + user_ptr_owning_perms_from_prot(prot, + (flags & MAP_SHARED) ? false : true)); + else + retval = (user_uintptr_t)user_ptr_set_addr((void __user *)user_ptr, retval); + } out_fput: if (file) fput(file); @@ -3097,9 +3139,15 @@ int vm_munmap(unsigned long start, size_t len) } EXPORT_SYMBOL(vm_munmap);
-SYSCALL_DEFINE2(munmap, user_uintptr_t, addr, size_t, len) +SYSCALL_DEFINE2(munmap, user_uintptr_t, user_ptr, size_t, len) { - addr = untagged_addr(addr); + ptraddr_t addr = untagged_addr((ptraddr_t)user_ptr); + VMA_ITERATOR(vmi, current->mm, addr); + + if (reserv_is_supported(current->mm) && !check_user_ptr_owning(user_ptr, addr, len)) + return -EINVAL; + if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, false)) + return -ERESERVATION; return __vm_munmap(addr, len, true); }
diff --git a/mm/util.c b/mm/util.c index afd40ed9c3c8..bd69a417c6a9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -540,7 +540,7 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) } EXPORT_SYMBOL_GPL(account_locked_vm);
-user_uintptr_t vm_mmap_pgoff(struct file *file, user_uintptr_t addr, +unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { @@ -553,19 +553,12 @@ user_uintptr_t vm_mmap_pgoff(struct file *file, user_uintptr_t addr, if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; - /* - * TODO [PCuABI] - might need propagating uintcap further down - * to do_mmap to properly handle capabilities - */ ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate, &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(ret, populate); - /* TODO [PCuABI] - derive proper capability */ - if (!IS_ERR_VALUE(ret)) - ret = (user_uintptr_t)uaddr_to_user_ptr_safe((ptraddr_t)ret); } return ret; }
On 15/04/2024 15:21, Amit Daniel Kachhap wrote:
Use the recently introduced PCuABI reservation interfaces to add different parameter constraints for mmap/munmap syscall. The capability returned by mmap syscall is now bounded and is same as the reservation range. The in-kernel memory mapping vm_mmap() function do not check the constraints on parameters. These reservation checks added do not affect the compat64 code path.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com
include/linux/mm.h | 4 ++++ mm/internal.h | 2 +- mm/mmap.c | 56 ++++++++++++++++++++++++++++++++++++++++++---- mm/util.c | 9 +------- 4 files changed, 58 insertions(+), 13 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 6d62e91676cb..137dbd27db55 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3411,6 +3411,10 @@ struct vm_unmapped_area_info { extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info); +int check_pcuabi_params(user_uintptr_t user_ptr, unsigned long len,
unsigned long flags, bool enforce_cap_validity,
bool enforce_range_mapped, bool reserv_lock);
/* truncate.c */ extern void truncate_inode_pages(struct address_space *, loff_t); extern void truncate_inode_pages_range(struct address_space *, diff --git a/mm/internal.h b/mm/internal.h index 58df037c3824..3a88f1e2ffee 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -861,7 +861,7 @@ extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; -extern user_uintptr_t __must_check vm_mmap_pgoff(struct file *, user_uintptr_t, +extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/mm/mmap.c b/mm/mmap.c index 84e26bb7b203..cb069b76d761 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1392,12 +1392,40 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return addr; } -user_uintptr_t ksys_mmap_pgoff(user_uintptr_t addr, unsigned long len, +int check_pcuabi_params(user_uintptr_t user_ptr, unsigned long len,
unsigned long flags, bool enforce_cap_validity,
bool enforce_range_mapped, bool reserv_lock)
The combination of enforce_cap_validity + enforce_range_mapped is pretty hard to read, especially as the function is directly called with true/false as parameters.
In practice, I think only enforce_cap_validity == false && enforce_range_mapped == true is useful. That corresponds to the standard mmap() case, as well as the new_addr case in mremap(), and shmat() (which is equivalent to mmap()). The old_addr case in mremap() corresponds in fact to the same checks as standard syscalls like munmap(), so it shouldn't be handled here.
+{
- ptraddr_t addr = (ptraddr_t)user_ptr;
- int ret = -EINVAL;
- VMA_ITERATOR(vmi, current->mm, addr);
- if (!reserv_is_supported(current->mm))
return 0;
- if (!check_user_ptr_owning(user_ptr, addr, len)) {
if (enforce_cap_validity || !user_ptr_is_same((const void __user *)user_ptr,
(const void __user *)(user_uintptr_t)addr))
return ret;
return 0;
- }
- if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, reserv_lock))
return -ERESERVATION;
- if (!(flags & MREMAP_FIXED || flags & MAP_FIXED))
We cannot do this. MREMAP_FIXED is the same value as MAP_PRIVATE. The caller should check its own flags, we could pass a boolean instead.
return ret;
- if (enforce_range_mapped && !reserv_vmi_range_mapped(&vmi, addr, len, reserv_lock))
return -ENOMEM;
- return 0;
+}
+user_uintptr_t ksys_mmap_pgoff(user_uintptr_t user_ptr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { struct file *file = NULL;
- user_uintptr_t retval;
- user_uintptr_t retval = -EINVAL;
- ptraddr_t addr = (ptraddr_t)user_ptr;
- bool new_reserv = true;
if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); @@ -1430,7 +1458,21 @@ user_uintptr_t ksys_mmap_pgoff(user_uintptr_t addr, unsigned long len, return PTR_ERR(file); }
- retval = check_pcuabi_params(user_ptr, len, flags, false, true, false);
- if (retval)
goto out_fput;
- if (user_ptr_is_valid((const void __user *)user_ptr))
new_reserv = true;
new_reserv = false surely?
- retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- if (!IS_ERR_VALUE(retval) && reserv_is_supported(current->mm)) {
if (new_reserv)
retval = make_user_ptr_owning(retval, len,
user_ptr_owning_perms_from_prot(prot,
(flags & MAP_SHARED) ? false : true));
else
retval = (user_uintptr_t)user_ptr_set_addr((void __user *)user_ptr, retval);
There's nothing to do in this case, the return value is exactly user_ptr as per the spec (in other words this operation should be a no-op).
Kevin
- }
out_fput: if (file) fput(file); @@ -3097,9 +3139,15 @@ int vm_munmap(unsigned long start, size_t len) } EXPORT_SYMBOL(vm_munmap); -SYSCALL_DEFINE2(munmap, user_uintptr_t, addr, size_t, len) +SYSCALL_DEFINE2(munmap, user_uintptr_t, user_ptr, size_t, len) {
- addr = untagged_addr(addr);
- ptraddr_t addr = untagged_addr((ptraddr_t)user_ptr);
- VMA_ITERATOR(vmi, current->mm, addr);
- if (reserv_is_supported(current->mm) && !check_user_ptr_owning(user_ptr, addr, len))
return -EINVAL;
- if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, false))
return __vm_munmap(addr, len, true);return -ERESERVATION;
} diff --git a/mm/util.c b/mm/util.c index afd40ed9c3c8..bd69a417c6a9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -540,7 +540,7 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) } EXPORT_SYMBOL_GPL(account_locked_vm); -user_uintptr_t vm_mmap_pgoff(struct file *file, user_uintptr_t addr, +unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { @@ -553,19 +553,12 @@ user_uintptr_t vm_mmap_pgoff(struct file *file, user_uintptr_t addr, if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR;
/*
* TODO [PCuABI] - might need propagating uintcap further down
* to do_mmap to properly handle capabilities
ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate, &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(ret, populate);*/
/* TODO [PCuABI] - derive proper capability */
if (!IS_ERR_VALUE(ret))
} return ret;ret = (user_uintptr_t)uaddr_to_user_ptr_safe((ptraddr_t)ret);
}
Use the recently introduced PCuABI reservation interfaces to add different parameter constraints for mremap syscall. The capability returned by mremap syscall is either same as the input capability or newly created if an old reservation exists or not.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mremap.c | 56 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c index 1e0dd63c35a6..8a6a90d3c40e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -25,6 +25,7 @@ #include <linux/uaccess.h> #include <linux/userfaultfd_k.h> #include <linux/mempolicy.h> +#include <linux/cap_addr_mgmt.h>
#include <asm/cacheflush.h> #include <asm/tlb.h> @@ -865,17 +866,20 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, return vma; }
-static unsigned long mremap_to(unsigned long addr, unsigned long old_len, - unsigned long new_addr, unsigned long new_len, bool *locked, +static user_uintptr_t mremap_to(user_uintptr_t user_ptr, unsigned long old_len, + user_uintptr_t new_user_ptr, unsigned long new_len, bool *locked, unsigned long flags, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap_early, struct list_head *uf_unmap) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long ret = -EINVAL; + user_uintptr_t ret = -EINVAL; unsigned long map_flags = 0; struct reserv_struct reserv_info, *reserv_ptr = NULL; + ptraddr_t addr = (ptraddr_t)user_ptr; + ptraddr_t new_addr = (ptraddr_t)new_user_ptr; + unsigned long old_perm = 0;
if (offset_in_page(new_addr)) goto out; @@ -957,9 +961,18 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (!(flags & MREMAP_FIXED)) new_addr = ret;
+ old_perm = reserv_vma_reserv_perms(vma); ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, uf_unmap, reserv_ptr);
+ if (!IS_ERR_VALUE(ret)) { + if (reserv_is_supported(mm)) { + if (!(flags & MREMAP_FIXED)) + ret = make_user_ptr_owning(new_addr, new_len, old_perm); + else + ret = new_user_ptr; + } + } out: return ret; } @@ -985,19 +998,21 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise * This option implies MREMAP_MAYMOVE. */ -SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len, +SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, user_ptr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, - user_uintptr_t, new_addr) + user_uintptr_t, new_user_ptr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; user_uintptr_t ret = -EINVAL; bool locked = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; + ptraddr_t addr = (ptraddr_t)user_ptr; + ptraddr_t new_addr = (ptraddr_t)new_user_ptr; + unsigned long old_perm = 0; LIST_HEAD(uf_unmap_early); LIST_HEAD(uf_unmap);
- /* @TODO [PCuABI] - capability validation */ /* * There is a deliberate asymmetry here: we strip the pointer tag * from the old address but leave the new address alone. This is @@ -1010,6 +1025,7 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len * information. */ addr = untagged_addr(addr); + user_ptr = (user_uintptr_t)user_ptr_set_addr((void __user *)user_ptr, addr);
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) return ret; @@ -1048,6 +1064,15 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len goto out; }
+ ret = check_pcuabi_params(user_ptr, old_len ? old_len : new_len, + flags | MREMAP_FIXED, true, false, true); + if (ret) + goto out; + ret = check_pcuabi_params(new_user_ptr, new_len, flags, false, true, true); + if (ret) + goto out; + old_perm = reserv_vma_reserv_perms(vma); + if (is_vm_hugetlb_page(vma)) { struct hstate *h __maybe_unused = hstate_vma(vma);
@@ -1069,7 +1094,7 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len }
if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { - ret = mremap_to(addr, old_len, new_addr, new_len, + ret = mremap_to(user_ptr, old_len, new_user_ptr, new_len, &locked, flags, &uf, &uf_unmap_early, &uf_unmap); goto out; @@ -1094,7 +1119,7 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len if (ret) goto out;
- ret = addr; + ret = user_ptr; goto out_unlocked; }
@@ -1148,7 +1173,7 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len locked = true; new_addr = addr; } - ret = addr; + ret = user_ptr; goto out; } } @@ -1172,8 +1197,14 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len goto out; }
- ret = move_vma(vma, addr, old_len, new_len, new_addr, + ret = move_vma(vma, user_ptr, old_len, new_len, new_addr, &locked, flags, &uf, &uf_unmap, NULL); + if (!IS_ERR_VALUE(ret)) { + if (reserv_is_supported(mm)) + ret = make_user_ptr_owning(new_addr, new_len, old_perm); + else + ret = (user_uintptr_t)new_addr; + } } out: if (offset_in_page(ret)) @@ -1185,8 +1216,5 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, addr, unsigned long, old_len userfaultfd_unmap_complete(mm, &uf_unmap_early); mremap_userfaultfd_complete(&uf, addr, ret, old_len); userfaultfd_unmap_complete(mm, &uf_unmap); - /* TODO [PCuABI] - derive proper capability */ - return IS_ERR_VALUE(ret) ? - ret : - (user_intptr_t)uaddr_to_user_ptr_safe((ptraddr_t)ret); + return ret; }
Use the recently introduced PCuABI reservation interfaces and add the relevant capability/reservation constraint checks on the mprotect syscall parameters.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mprotect.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/mm/mprotect.c b/mm/mprotect.c index 4dffb34f62fd..1c64a9df53a4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -32,6 +32,7 @@ #include <linux/sched/sysctl.h> #include <linux/userfaultfd_k.h> #include <linux/memory-tiers.h> +#include <linux/cap_addr_mgmt.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> @@ -677,7 +678,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, /* * pkey==-1 when doing a legacy mprotect() */ -static int do_mprotect_pkey(user_uintptr_t start, size_t len, +static int do_mprotect_pkey(user_uintptr_t user_ptr, size_t len, unsigned long prot, int pkey) { unsigned long nstart, end, tmp, reqprot; @@ -688,9 +689,7 @@ static int do_mprotect_pkey(user_uintptr_t start, size_t len, (prot & PROT_READ); struct mmu_gather tlb; struct vma_iterator vmi; - - /* TODO [PCuABI] - capability checks for uaccess */ - start = untagged_addr(start); + unsigned long start = untagged_addr(user_ptr);
prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ @@ -704,6 +703,9 @@ static int do_mprotect_pkey(user_uintptr_t start, size_t len, end = start + len; if (end <= start) return -ENOMEM; + + if (reserv_is_supported(current->mm) && !check_user_ptr_owning(user_ptr, start, len)) + return -EINVAL; if (!arch_validate_prot(prot, start)) return -EINVAL;
@@ -761,6 +763,12 @@ static int do_mprotect_pkey(user_uintptr_t start, size_t len, break; }
+ /* Check if the capability range is valid with mmap lock. */ + if (!reserv_vma_cap_within_reserv(vma, user_ptr)) { + error = -ERESERVATION; + break; + } + /* Does the application expect PROT_READ to imply PROT_EXEC */ if (rier && (vma->vm_flags & VM_MAYEXEC)) prot |= PROT_EXEC; @@ -825,18 +833,18 @@ static int do_mprotect_pkey(user_uintptr_t start, size_t len, return error; }
-SYSCALL_DEFINE3(mprotect, user_uintptr_t, start, size_t, len, +SYSCALL_DEFINE3(mprotect, user_uintptr_t, user_ptr, size_t, len, unsigned long, prot) { - return do_mprotect_pkey(start, len, prot, -1); + return do_mprotect_pkey(user_ptr, len, prot, -1); }
#ifdef CONFIG_ARCH_HAS_PKEYS
-SYSCALL_DEFINE4(pkey_mprotect, user_uintptr_t, start, size_t, len, +SYSCALL_DEFINE4(pkey_mprotect, user_uintptr_t, user_ptr, size_t, len, unsigned long, prot, int, pkey) { - return do_mprotect_pkey(start, len, prot, pkey); + return do_mprotect_pkey(user_ptr, len, prot, pkey); }
SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
Use the recently introduced PCuABI reservation interfaces to verify the address range for madvise syscall.
do_madvise() function is used by virtual address monitoring daemon and this may not satisfy the reservation range criteria, so add a parameter to skip the reservation checks.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/mm.h | 3 ++- io_uring/advise.c | 2 +- mm/damon/vaddr.c | 2 +- mm/madvise.c | 27 ++++++++++++++++++++++----- 4 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 137dbd27db55..be713afe0ef2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3375,7 +3375,8 @@ extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, bool unlock); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); -extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); +extern int do_madvise(struct mm_struct *mm, user_uintptr_t user_ptr, size_t len_in, + int behavior, bool reserv_ignore);
#ifdef CONFIG_MMU extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, diff --git a/io_uring/advise.c b/io_uring/advise.c index 952d9289a311..2e43142cf4df 100644 --- a/io_uring/advise.c +++ b/io_uring/advise.c @@ -55,7 +55,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags) WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
/* TODO [PCuABI] - capability checks for uaccess */ - ret = do_madvise(current->mm, user_ptr_addr(ma->addr), ma->len, ma->advice); + ret = do_madvise(current->mm, (user_uintptr_t)ma->addr, ma->len, ma->advice, false); io_req_set_res(req, ret, 0); return IOU_OK; #else diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index a4d1f63c5b23..3138da113117 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -643,7 +643,7 @@ static unsigned long damos_madvise(struct damon_target *target, if (!mm) return 0;
- applied = do_madvise(mm, start, len, behavior) ? 0 : len; + applied = do_madvise(mm, start, len, behavior, true) ? 0 : len; mmput(mm);
return applied; diff --git a/mm/madvise.c b/mm/madvise.c index d0c8e854636e..7a70482b401f 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -31,6 +31,7 @@ #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> +#include <linux/cap_addr_mgmt.h>
#include <asm/tlb.h>
@@ -1394,13 +1395,16 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ -int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) +int do_madvise(struct mm_struct *mm, user_uintptr_t user_ptr, size_t len_in, + int behavior, bool reserv_ignore) { unsigned long end; int error; int write; size_t len; struct blk_plug plug; + unsigned long start = (ptraddr_t)user_ptr; + struct vma_iterator vmi;
if (!madvise_behavior_valid(behavior)) return -EINVAL; @@ -1433,14 +1437,27 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh mmap_read_lock(mm); }
- /* TODO [PCuABI] - capability checks for uaccess */ start = untagged_addr_remote(mm, start); end = start + len;
+ if (!reserv_ignore) { + vma_iter_init(&vmi, current->mm, start); + if (reserv_is_supported(current->mm) && + !check_user_ptr_owning(user_ptr, start, len)) { + error = -EINVAL; + goto out; + } + /* Check if the range exists within the reservation with mmap lock. */ + if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, true)) { + error = -ERESERVATION; + goto out; + } + } blk_start_plug(&plug); error = madvise_walk_vmas(mm, start, end, behavior, madvise_vma_behavior); blk_finish_plug(&plug); +out: if (write) mmap_write_unlock(mm); else @@ -1449,9 +1466,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh return error; }
-SYSCALL_DEFINE3(madvise, user_uintptr_t, start, size_t, len_in, int, behavior) +SYSCALL_DEFINE3(madvise, user_uintptr_t, user_ptr, size_t, len_in, int, behavior) { - return do_madvise(current->mm, start, len_in, behavior); + return do_madvise(current->mm, user_ptr, len_in, behavior, false); }
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, @@ -1506,7 +1523,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
while (iov_iter_count(&iter)) { ret = do_madvise(mm, user_ptr_addr(iter_iov_addr(&iter)), - iter_iov_len(&iter), behavior); + iter_iov_len(&iter), behavior, false); if (ret < 0) break; iov_iter_advance(&iter, iter_iov_len(&iter));
On 15/04/2024 15:21, Amit Daniel Kachhap wrote:
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, @@ -1506,7 +1523,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, while (iov_iter_count(&iter)) { ret = do_madvise(mm, user_ptr_addr(iter_iov_addr(&iter)),
iter_iov_len(&iter), behavior);
iter_iov_len(&iter), behavior, false);
process_madvise() should only check capabilities if targeting the current process (see the spec). For now let's just skip the checks by passing true.
Kevin
if (ret < 0) break; iov_iter_advance(&iter, iter_iov_len(&iter));
Use the recently introduced PCuABI reservation interfaces to verify the address range for mlock, mlock2, and munlock syscalls.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mlock.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-)
diff --git a/mm/mlock.c b/mm/mlock.c index 086546ac5766..7d4c84265a84 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -25,6 +25,7 @@ #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/secretmem.h> +#include <linux/cap_addr_mgmt.h>
#include "internal.h"
@@ -621,14 +622,16 @@ static int __mlock_posix_error_return(long retval) return retval; }
-static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags) +static __must_check int do_mlock(user_uintptr_t user_ptr, size_t len, vm_flags_t flags) { unsigned long locked; unsigned long lock_limit; int error = -ENOMEM; + unsigned long start = untagged_addr(user_ptr); + struct vma_iterator vmi;
- start = untagged_addr(start); - + if (reserv_is_supported(current->mm) && !check_user_ptr_owning(user_ptr, start, len)) + return -EINVAL; if (!can_do_mlock()) return -EPERM;
@@ -642,6 +645,12 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla if (mmap_write_lock_killable(current->mm)) return -EINTR;
+ vma_iter_init(&vmi, current->mm, start); + /* Check if the range exists within the reservation with mmap lock. */ + if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, true)) { + mmap_write_unlock(current->mm); + return -ERESERVATION; + } locked += current->mm->locked_vm; if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { /* @@ -668,12 +677,12 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla return 0; }
-SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) +SYSCALL_DEFINE2(mlock, user_uintptr_t, user_ptr, size_t, len) { - return do_mlock(start, len, VM_LOCKED); + return do_mlock(user_ptr, len, VM_LOCKED); }
-SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) +SYSCALL_DEFINE3(mlock2, user_uintptr_t, user_ptr, size_t, len, int, flags) { vm_flags_t vm_flags = VM_LOCKED;
@@ -683,20 +692,29 @@ SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) if (flags & MLOCK_ONFAULT) vm_flags |= VM_LOCKONFAULT;
- return do_mlock(start, len, vm_flags); + return do_mlock(user_ptr, len, vm_flags); }
-SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) +SYSCALL_DEFINE2(munlock, user_uintptr_t, user_ptr, size_t, len) { int ret; + unsigned long start = untagged_addr(user_ptr); + struct vma_iterator vmi;
- start = untagged_addr(start); + if (reserv_is_supported(current->mm) && !check_user_ptr_owning(user_ptr, start, len)) + return -EINVAL;
len = PAGE_ALIGN(len + (offset_in_page(start))); start &= PAGE_MASK;
if (mmap_write_lock_killable(current->mm)) return -EINTR; + vma_iter_init(&vmi, current->mm, start); + /* Check if the range exists within the reservation with mmap lock. */ + if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, true)) { + mmap_write_unlock(current->mm); + return -ERESERVATION; + } ret = apply_vma_lock_flags(start, len, 0); mmap_write_unlock(current->mm);
Use the recently introduced PCuABI reservation interfaces to verify the address range for msync syscall.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/msync.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/mm/msync.c b/mm/msync.c index ac4c9bfea2e7..7a571181c371 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -14,6 +14,7 @@ #include <linux/file.h> #include <linux/syscalls.h> #include <linux/sched.h> +#include <linux/cap_addr_mgmt.h>
/* * MS_SYNC syncs the entire file - including mappings. @@ -29,16 +30,17 @@ * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to * applications. */ -SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) +SYSCALL_DEFINE3(msync, user_uintptr_t, user_ptr, size_t, len, int, flags) { unsigned long end; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int unmapped_error = 0; int error = -EINVAL; + unsigned long start = untagged_addr(user_ptr);
- start = untagged_addr(start); - + if (reserv_is_supported(mm) && !check_user_ptr_owning(user_ptr, start, len)) + return -EINVAL; if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) goto out; if (offset_in_page(start)) @@ -61,6 +63,11 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) */ mmap_read_lock(mm); vma = find_vma(mm, start); + /* Check if the range exists within the reservation with mmap lock. */ + if (vma && !reserv_vma_cap_within_reserv(vma, user_ptr)) { + error = -ERESERVATION; + goto out_unlock; + } for (;;) { struct file *file; loff_t fstart, fend;
MAP_GROWSDOWN flag is not supported by PCuABI specification. Hence, reject such requests with -EOPNOTSUPP error.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mmap.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/mm/mmap.c b/mm/mmap.c index cb069b76d761..67a208465d0d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1458,6 +1458,17 @@ user_uintptr_t ksys_mmap_pgoff(user_uintptr_t user_ptr, unsigned long len, return PTR_ERR(file); }
+ /* + * Introduce checks for PCuABI: + * - MAP_GROWSDOWN flag has no fixed bounds and hence is not supported + * in the PCuABI reservation model. + */ + if (reserv_is_supported(current->mm)) { + if (flags & MAP_GROWSDOWN) { + retval = -EOPNOTSUPP; + goto out_fput; + } + } retval = check_pcuabi_params(user_ptr, len, flags, false, true, false); if (retval) goto out_fput;
PCuABI specification introduces limitations in expanding the capability permissions through mprotect() system calls. This needs the capabilities to be initially created with maximum permissions, the memory mappings may possess in their lifetime.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/uapi/asm-generic/mman-common.h | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 6ce1f1ceb432..e7ba511c2bad 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -17,6 +17,12 @@ #define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ #define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
+/* PCuABI mapping and capability permissions */ +#define _PROT_MAX_SHIFT 16 +#define PROT_MAX(prot) ((prot) << _PROT_MAX_SHIFT) +#define PROT_EXTRACT(prot) ((prot) & (PROT_READ | PROT_WRITE | PROT_EXEC)) +#define PROT_MAX_EXTRACT(prot) (((prot) >> _PROT_MAX_SHIFT) & (PROT_READ | PROT_WRITE | PROT_EXEC)) + /* 0x01 - 0x03 are defined in linux/mman.h */ #define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_FIXED 0x10 /* Interpret addr exactly */
Helper functions user_ptr_may_set_prot() and user_ptr_owning_perms_from_prot() are added/modified to manage capability permissions in memory management syscalls as per PCuABI specifications.
Also, use arch-specific hook arch_user_ptr_owning_perms_from_prot() to convert arch-specific mapping protection to capability permissions.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- Documentation/core-api/user_ptr.rst | 1 + arch/Kconfig | 3 ++ include/linux/user_ptr.h | 16 +++++++++ lib/user_ptr.c | 50 +++++++++++++++++++++++++++-- 4 files changed, 67 insertions(+), 3 deletions(-)
diff --git a/Documentation/core-api/user_ptr.rst b/Documentation/core-api/user_ptr.rst index 0ad6e14e14c4..4d7188792a13 100644 --- a/Documentation/core-api/user_ptr.rst +++ b/Documentation/core-api/user_ptr.rst @@ -358,5 +358,6 @@ APIs consider those requirements while creating and checking user pointers. * ``check_user_ptr_owning(ptr, addr, n)`` * ``make_user_ptr_owning(addr, n, perm)`` * ``user_ptr_owning_perms_from_prot(prot, tag_perm)`` +* ``user_ptr_may_set_prot(ptr, prot)``
See ``<linux/user_ptr.h>`` for details on how to use them. diff --git a/arch/Kconfig b/arch/Kconfig index 19f7bbb20a41..161f7002b0ab 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1502,6 +1502,9 @@ config CHERI_PURECAP_UABI availability of CHERI capabilities at compile-time; the resulting kernel image will not boot on incompatible hardware.
+config HAVE_ARCH_USER_PTR_H + bool + source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig" diff --git a/include/linux/user_ptr.h b/include/linux/user_ptr.h index 41ab156653c7..ffb55e67f704 100644 --- a/include/linux/user_ptr.h +++ b/include/linux/user_ptr.h @@ -142,6 +142,17 @@ user_uintptr_t make_user_ptr_owning(ptraddr_t addr, size_t len, user_ptr_perms_t */ user_ptr_perms_t user_ptr_owning_perms_from_prot(int prot, bool has_tag_access);
+/** + * user_ptr_may_set_prot() - Verify if the mapping protection flags confirms + * with the capability permission flags. + * @user_ptr: User pointer. + * @prot: Memory protection flag. + * + * Return: True if the capability permissions includes the protection flags + * or false otherwise. + */ +bool user_ptr_may_set_prot(user_uintptr_t user_ptr, int prot); + #else /* CONFIG_CHERI_PURECAP_UABI */
typedef int user_ptr_perms_t; @@ -197,6 +208,11 @@ static inline user_ptr_perms_t user_ptr_owning_perms_from_prot(int prot, bool ha return 0; }
+static inline bool user_ptr_may_set_prot(user_uintptr_t user_ptr, int prot) +{ + return true; +} + #endif /* CONFIG_CHERI_PURECAP_UABI */
/** diff --git a/lib/user_ptr.c b/lib/user_ptr.c index 2ef58193fdad..a96866143349 100644 --- a/lib/user_ptr.c +++ b/lib/user_ptr.c @@ -2,9 +2,14 @@ #include <linux/bug.h> #include <linux/cap_addr_mgmt.h> #include <linux/cheri.h> +#include <linux/mman.h> #include <linux/sched.h> #include <linux/user_ptr.h>
+#ifdef CONFIG_HAVE_ARCH_USER_PTR_H +#include <asm/user_ptr.h> +#endif + void __user *uaddr_to_user_ptr(ptraddr_t addr) { /* @@ -94,9 +99,48 @@ user_uintptr_t make_user_ptr_owning(ptraddr_t addr, size_t len, user_ptr_perms_t return cheri_address_set(user_ptr, addr); }
+#ifndef arch_user_ptr_owning_perms_from_prot +static __always_inline +user_ptr_perms_t arch_user_ptr_owning_perms_from_prot(int prot, bool has_tag_access) +{ + return 0; +} +#define arch_user_ptr_owning_perms_from_prot arch_user_ptr_owning_perms_from_prot +#endif /* arch_user_ptr_owning_perms_from_prot */ + user_ptr_perms_t user_ptr_owning_perms_from_prot(int prot, bool has_tag_access) { - /* TODO [PCuABI] - capability permission conversion from memory permission */ - return (CHERI_PERMS_READ | CHERI_PERMS_WRITE | - CHERI_PERMS_EXEC | CHERI_PERMS_ROOTCAP); + user_ptr_perms_t perms = 0; + int used_prot = PROT_MAX_EXTRACT(prot) ? PROT_MAX_EXTRACT(prot) : prot; + + if (used_prot & PROT_READ) { + perms |= CHERI_PERM_LOAD; + if (has_tag_access) + perms |= CHERI_PERM_LOAD_CAP; + } + if (used_prot & PROT_WRITE) { + perms |= CHERI_PERM_STORE; + if (has_tag_access) + perms |= (CHERI_PERM_STORE_CAP | CHERI_PERM_STORE_LOCAL_CAP); + } + if (used_prot & PROT_EXEC) + perms |= CHERI_PERM_EXECUTE; + + /* Fetch any extra architecture specific permissions */ + perms |= arch_user_ptr_owning_perms_from_prot(used_prot, has_tag_access); + perms |= CHERI_PERMS_ROOTCAP; + + return perms; +} + +bool user_ptr_may_set_prot(user_uintptr_t user_ptr, int prot) +{ + user_ptr_perms_t perms = cheri_perms_get(user_ptr); + + if (((prot & PROT_READ) && !(perms & CHERI_PERM_LOAD)) || + ((prot & PROT_WRITE) && !(perms & CHERI_PERM_STORE)) || + ((prot & PROT_EXEC) && !(perms & CHERI_PERM_EXECUTE))) + return false; + + return true; }
Add arm64 morello specific hook for arch_user_ptr_owning_perms_from_prot() to convert arch specific memory mapping permissions to capability permissions.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/user_ptr.h | 34 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 arch/arm64/include/asm/user_ptr.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 83a5817afa7d..fbf4ed6c6b5b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -31,6 +31,7 @@ config ARM64 select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select HAVE_ARCH_CHERI_H + select HAVE_ARCH_USER_PTR_H select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS diff --git a/arch/arm64/include/asm/user_ptr.h b/arch/arm64/include/asm/user_ptr.h new file mode 100644 index 000000000000..d0a3e86cb3eb --- /dev/null +++ b/arch/arm64/include/asm/user_ptr.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_USER_PTR_H +#define __ASM_USER_PTR_H + +#include <linux/cheri.h> +#include <linux/mman.h> +#include <linux/sched/task_stack.h> +#include <asm/processor.h> + +#ifdef CONFIG_CHERI_PURECAP_UABI + +static __always_inline +cheri_perms_t arch_user_ptr_owning_perms_from_prot(int prot, bool has_tag_access) +{ + struct pt_regs *regs = task_pt_regs(current); + cheri_perms_t perms = 0; + + if ((prot & PROT_READ) && has_tag_access) + perms |= ARM_CAP_PERMISSION_MUTABLE_LOAD; + + if (prot & PROT_EXEC) { + if (cheri_perms_get(regs->pcc) & CHERI_PERM_SYSTEM_REGS) + perms |= CHERI_PERM_SYSTEM_REGS; + if (cheri_perms_get(regs->pcc) & ARM_CAP_PERMISSION_EXECUTIVE) + perms |= ARM_CAP_PERMISSION_EXECUTIVE; + } + + return perms; +} +#define arch_user_ptr_owning_perms_from_prot arch_user_ptr_owning_perms_from_prot + +#endif /* CONFIG_CHERI_PURECAP_UABI */ + +#endif /* __ASM_USER_PTR_H */
Add a check that the requested protection bits do not exceed the maximum protection bits.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mmap.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/mm/mmap.c b/mm/mmap.c index 67a208465d0d..9b72d4769c7a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1462,12 +1462,18 @@ user_uintptr_t ksys_mmap_pgoff(user_uintptr_t user_ptr, unsigned long len, * Introduce checks for PCuABI: * - MAP_GROWSDOWN flag has no fixed bounds and hence is not supported * in the PCuABI reservation model. + * - PCuABI reservation model introduces the concept of maximum + * protection the mappings can have. Add a check to make sure the + * requested protection does not exceed the maximum protection. */ if (reserv_is_supported(current->mm)) { if (flags & MAP_GROWSDOWN) { retval = -EOPNOTSUPP; goto out_fput; } + if ((PROT_MAX_EXTRACT(prot) != 0) && + ((PROT_EXTRACT(prot) & PROT_MAX_EXTRACT(prot)) != PROT_EXTRACT(prot))) + goto out_fput; } retval = check_pcuabi_params(user_ptr, len, flags, false, true, false); if (retval)
Check that the permission of the new user address does not exceed the permission of old user address for mremap syscall.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mremap.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
diff --git a/mm/mremap.c b/mm/mremap.c index 8a6a90d3c40e..ca14aa8588bf 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -991,6 +991,20 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) return 1; }
+static int check_mremap_user_ptr_perms(user_uintptr_t user_ptr, user_uintptr_t new_user_ptr, + unsigned long flags) +{ +#ifdef CONFIG_CHERI_PURECAP_UABI + if (!reserv_is_supported(current->mm) || !(flags & MREMAP_FIXED)) + return 0; + + if ((cheri_perms_get(user_ptr) | cheri_perms_get(new_user_ptr)) + != cheri_perms_get(user_ptr)) + return -EINVAL; +#endif + return 0; +} + /* * Expand (or shrink) an existing mapping, potentially moving it at the * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) @@ -1069,6 +1083,9 @@ SYSCALL_DEFINE5(__retptr__(mremap), user_uintptr_t, user_ptr, unsigned long, old if (ret) goto out; ret = check_pcuabi_params(new_user_ptr, new_len, flags, false, true, true); + if (ret) + goto out; + ret = check_mremap_user_ptr_perms(user_ptr, new_user_ptr, flags); if (ret) goto out; old_perm = reserv_vma_reserv_perms(vma);
Check that the requested permission matches the constraints of input user capability pointer for mprotect syscall.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mprotect.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mm/mprotect.c b/mm/mprotect.c index 1c64a9df53a4..fbfa082cec6c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -704,8 +704,10 @@ static int do_mprotect_pkey(user_uintptr_t user_ptr, size_t len, if (end <= start) return -ENOMEM;
- if (reserv_is_supported(current->mm) && !check_user_ptr_owning(user_ptr, start, len)) + if (reserv_is_supported(current->mm) && (!check_user_ptr_owning(user_ptr, start, len) || + !user_ptr_may_set_prot(user_ptr, prot))) return -EINVAL; + if (!arch_validate_prot(prot, start)) return -EINVAL;
Different capability permission and bound constraints are added as per PCuABI specification for mincore() syscall. mincore() does not need VMem permission and any RWX memory permission, so the standard check_user_ptr_owning() interface is not used, and permissions are verified explicitly.
Also, as mincore() allows the address range to not span whole pages, so checking only a single byte at the page intersection is sufficient.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- mm/mincore.c | 46 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-)
diff --git a/mm/mincore.c b/mm/mincore.c index dd164cb84ba8..23156caa01f2 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/pgtable.h>
+#include <linux/cap_addr_mgmt.h> #include <linux/uaccess.h> #include "swap.h"
@@ -184,15 +185,19 @@ static const struct mm_walk_ops mincore_walk_ops = { * all the arguments, we hold the mmap semaphore: we should * just return the amount of info we're asked for. */ -static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec) +static long do_mincore(user_uintptr_t user_ptr, unsigned long pages, unsigned char *vec) { struct vm_area_struct *vma; unsigned long end; + unsigned long addr = (ptraddr_t)user_ptr; int err;
vma = vma_lookup(current->mm, addr); if (!vma) return -ENOMEM; + /* Check if the capability range is valid with mmap lock. */ + if (!reserv_vma_cap_within_reserv(vma, user_ptr)) + return -ERESERVATION; end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); if (!can_do_mincore(vma)) { unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); @@ -229,14 +234,16 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v * mapped * -EAGAIN - A kernel resource was temporarily unavailable. */ -SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, +SYSCALL_DEFINE3(mincore, user_uintptr_t, user_ptr, size_t, len, unsigned char __user *, vec) { long retval; unsigned long pages; unsigned char *tmp; - - start = untagged_addr(start); + unsigned long start = untagged_addr((ptraddr_t)user_ptr); +#ifdef CONFIG_CHERI_PURECAP_UABI + unsigned long cap_start, cap_len; +#endif
/* Check the start address: needs to be page-aligned.. */ if (start & ~PAGE_MASK) @@ -253,6 +260,35 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, if (!access_ok(vec, pages)) return -EFAULT;
+#ifdef CONFIG_CHERI_PURECAP_UABI + if (!reserv_is_supported(current->mm)) + goto skip_pcuabi_checks; + /* + * mincore syscall does not need VMem permission so as to allow ordinary pages. + * Also at least one of the standard memory permissions RWX will help to reject + * non memory capabilities. + */ + user_ptr = cheri_address_set(user_ptr, start); + if (cheri_is_invalid(user_ptr) || cheri_is_sealed(user_ptr) || + !(CHERI_PERM_GLOBAL & cheri_perms_get(user_ptr)) || + !((CHERI_PERM_LOAD | CHERI_PERM_STORE | CHERI_PERM_EXECUTE) + & cheri_perms_get(user_ptr))) + return -EINVAL; + /* + * mincore syscall can be invoked as: + * mincore(align_down(p, PAGE_SIZE), sz + (p.addr % PAGE_SIZE), vec) + * Hence, the capability might not consider the increased range due to + * alignment. In this scenario, check only the single byte at the page + * intersection. + */ + cap_start = cheri_base_get(user_ptr); + cap_len = cheri_length_get(user_ptr); + if ((start + PAGE_SIZE <= cap_start) || + (cap_start + cap_len < start + len - offset_in_page(len))) + return -EINVAL; +skip_pcuabi_checks: +#endif + tmp = (void *) __get_free_page(GFP_USER); if (!tmp) return -EAGAIN; @@ -264,7 +300,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, * the temporary buffer size. */ mmap_read_lock(current->mm); - retval = do_mincore(start, min(pages, PAGE_SIZE), tmp); + retval = do_mincore(user_ptr, min(pages, PAGE_SIZE), tmp); mmap_read_unlock(current->mm);
if (retval <= 0)
Use the recently introduced PCuABI reservation interfaces to create the appropriate bounded capability for executable/interpreter load segments.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- fs/binfmt_elf.c | 78 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 19 deletions(-)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c10ba610be50..df272015490f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -48,6 +48,7 @@ #include <linux/uaccess.h> #include <linux/rseq.h> #include <linux/cheri.h> +#include <linux/cap_addr_mgmt.h> #include <asm/param.h> #include <asm/page.h>
@@ -119,15 +120,14 @@ static struct linux_binfmt elf_format = { * p_filesz when it ends before the page ends (e.g. bss), otherwise this * memory will contain the junk from the file that should not be present. */ -static int padzero(unsigned long address) +static int padzero(user_uintptr_t user_ptr) { unsigned long nbyte;
- nbyte = ELF_PAGEOFFSET(address); + nbyte = ELF_PAGEOFFSET((ptraddr_t)user_ptr); if (nbyte) { nbyte = ELF_MIN_ALIGN - nbyte; - if (clear_user(make_user_ptr_for_write_uaccess(address, nbyte), - nbyte)) + if (clear_user((void __user *)user_ptr, nbyte)) return -EFAULT; } return 0; @@ -163,6 +163,7 @@ struct elf_load_info { unsigned long end_elf_rx; unsigned long start_elf_rw; unsigned long end_elf_rw; + user_uintptr_t user_ptr_elf; };
static int @@ -303,17 +304,21 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, */ NEW_AUX_ENT(AT_CHERI_EXEC_RW_CAP, (exec_load_info->start_elf_rw != ~0UL ? - elf_uaddr_to_user_ptr(exec_load_info->start_elf_rw) : + (void __user *)cheri_address_set(exec_load_info->user_ptr_elf, + exec_load_info->start_elf_rw) : NULL)); NEW_AUX_ENT(AT_CHERI_EXEC_RX_CAP, - elf_uaddr_to_user_ptr(exec_load_info->start_elf_rx)); + (void __user *)cheri_address_set(exec_load_info->user_ptr_elf, + exec_load_info->start_elf_rx)); NEW_AUX_ENT(AT_CHERI_INTERP_RW_CAP, ((interp_load_addr && interp_load_info->start_elf_rw != ~0UL) ? - elf_uaddr_to_user_ptr(interp_load_info->start_elf_rw) : + (void __user *)cheri_address_set(interp_load_info->user_ptr_elf, + interp_load_info->start_elf_rw) : NULL)); NEW_AUX_ENT(AT_CHERI_INTERP_RX_CAP, (interp_load_addr ? - elf_uaddr_to_user_ptr(interp_load_info->start_elf_rx) : + (void __user *)cheri_address_set(interp_load_info->user_ptr_elf, + interp_load_info->start_elf_rx) : NULL)); NEW_AUX_ENT(AT_CHERI_STACK_CAP, elf_uaddr_to_user_ptr(0)); NEW_AUX_ENT(AT_CHERI_SEAL_CAP, cheri_user_root_seal_cap); @@ -464,28 +469,41 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, * into memory at "addr". Memory from "p_filesz" through "p_memsz" * rounded up to the next page is zeroed. */ -static unsigned long elf_load(struct file *filep, unsigned long addr, - const struct elf_phdr *eppnt, int prot, int type, - unsigned long total_size) +static unsigned long elf_load(struct elf_load_info *load_info, struct file *filep, + unsigned long addr, const struct elf_phdr *eppnt, + int prot, int type, unsigned long total_size) { unsigned long zero_start, zero_end; unsigned long map_addr; + user_uintptr_t map_user_ptr;
if (eppnt->p_filesz) { map_addr = elf_map(filep, addr, eppnt, prot, type, total_size); if (BAD_ADDR(map_addr)) return map_addr; + if (reserv_is_supported(current->mm) && total_size) { + load_info->user_ptr_elf = + reserv_range_set_reserv(map_addr, ELF_PAGEALIGN(total_size), + user_ptr_owning_perms_from_prot(PROT_READ | PROT_WRITE | PROT_EXEC, + true), false); + if (IS_ERR_VALUE(load_info->user_ptr_elf)) + return (long)load_info->user_ptr_elf; + } if (eppnt->p_memsz > eppnt->p_filesz) { zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) + eppnt->p_filesz; zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) + eppnt->p_memsz; - + if (reserv_is_supported(current->mm)) + map_user_ptr = (user_uintptr_t)user_ptr_set_addr((void __user *)load_info->user_ptr_elf, + zero_start); + else + map_user_ptr = zero_start; /* * Zero the end of the last mapped page but ignore * any errors if the segment isn't writable. */ - if (padzero(zero_start) && (prot & PROT_WRITE)) + if (padzero(map_user_ptr) && (prot & PROT_WRITE)) return -EFAULT; } } else { @@ -499,15 +517,24 @@ static unsigned long elf_load(struct file *filep, unsigned long addr, * If the header is requesting these pages to be * executable, honour that (ppc32 needs this). */ - int error;
zero_start = ELF_PAGEALIGN(zero_start); zero_end = ELF_PAGEALIGN(zero_end);
- error = vm_brk_flags(zero_start, zero_end - zero_start, + if (!reserv_is_supported(current->mm)) + return vm_brk_flags(zero_start, zero_end - zero_start, prot & PROT_EXEC ? VM_EXEC : 0); - if (error) - map_addr = error; + + if (zero_end <= zero_start) + return map_addr; + map_user_ptr = (user_uintptr_t)user_ptr_set_addr((void __user *)load_info->user_ptr_elf, + zero_start); + map_addr = vm_mmap(0, map_user_ptr, zero_end - zero_start, prot, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 0); + if (BAD_ADDR(map_addr)) + return (int)map_addr; + if (padzero(map_user_ptr)) + map_addr = -EFAULT; } return map_addr; } @@ -745,7 +772,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr;
- map_addr = elf_load(interpreter, load_addr + vaddr, + map_addr = elf_load(load_info, interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type, total_size); total_size = 0; error = map_addr; @@ -1090,6 +1117,11 @@ static int load_elf_binary(struct linux_binprm *bprm)
setup_new_exec(bprm);
+#if defined(CONFIG_CHERI_PURECAP_UABI) && (ELF_COMPAT == 0) + set_bit(MMF_PCUABI_RESERV, ¤t->mm->flags); +#else + clear_bit(MMF_PCUABI_RESERV, ¤t->mm->flags); +#endif /* Do this so that we can load the interpreter, if need be. We will change some of these later */ retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), @@ -1217,7 +1249,15 @@ static int load_elf_binary(struct linux_binprm *bprm) } }
- error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt, + if (reserv_is_supported(current->mm) && first_pt_load && !total_size) { + total_size = total_mapping_size(elf_phdata, elf_ex->e_phnum); + if (!total_size) { + retval = -EINVAL; + goto out_free_dentry; + } + } + + error = elf_load(&exec_load_info, bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { retval = IS_ERR_VALUE(error) ?
Use the recently introduced PCuABI reservation interfaces to verify the address range for shmat syscall.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/shm.h | 4 ++-- ipc/shm.c | 27 ++++++++++++++++----------- 2 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/include/linux/shm.h b/include/linux/shm.h index d8e69aed3d32..bf5b2e5cbd0c 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -14,7 +14,7 @@ struct sysv_shm { struct list_head shm_clist; };
-long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, +long do_shmat(int shmid, char __user *shmaddr, int shmflg, user_uintptr_t *user_ptr, unsigned long shmlba); bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); @@ -25,7 +25,7 @@ struct sysv_shm { };
static inline long do_shmat(int shmid, char __user *shmaddr, - int shmflg, unsigned long *addr, + int shmflg, user_uintptr_t *user_ptr, unsigned long shmlba) { return -ENOSYS; diff --git a/ipc/shm.c b/ipc/shm.c index 7bb7c4bbc383..231b68d6c281 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -44,6 +44,7 @@ #include <linux/mount.h> #include <linux/ipc_namespace.h> #include <linux/rhashtable.h> +#include <linux/cap_addr_mgmt.h>
#include <linux/uaccess.h>
@@ -1519,14 +1520,13 @@ COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr) * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. * * NOTE! Despite the name, this is NOT a direct system call entrypoint. The - * "raddr" thing points to kernel space, and there has to be a wrapper around + * "ruser_ptr" thing points to kernel space, and there has to be a wrapper around * this. */ long do_shmat(int shmid, char __user *shmaddr, int shmflg, - ulong *raddr, unsigned long shmlba) + user_uintptr_t *ruser_ptr, unsigned long shmlba) { struct shmid_kernel *shp; - /* TODO [PCuABI] - capability checks for address space management */ unsigned long addr = user_ptr_addr(shmaddr); unsigned long size; struct file *file, *base; @@ -1538,6 +1538,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, struct shm_file_data *sfd; int f_flags; unsigned long populate = 0; + user_uintptr_t user_ptr = (user_uintptr_t)shmaddr;
err = -EINVAL; if (shmid < 0) @@ -1666,11 +1667,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto invalid; }
- addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL); - *raddr = addr; + user_ptr = (user_uintptr_t)user_ptr_set_addr(shmaddr, addr); + err = check_pcuabi_params(user_ptr, size, MAP_FIXED, false, false, true); + if (err) + goto invalid; + + user_ptr = do_mmap(file, user_ptr, size, prot, flags, 0, 0, &populate, NULL); + *ruser_ptr = user_ptr; err = 0; - if (IS_ERR_VALUE(addr)) - err = (long)addr; + if (IS_ERR_VALUE(user_ptr)) + err = (long)user_ptr; invalid: mmap_write_unlock(current->mm); if (populate) @@ -1699,15 +1705,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
SYSCALL_DEFINE3(__retptr__(shmat), int, shmid, char __user *, shmaddr, int, shmflg) { - unsigned long ret; + user_uintptr_t ret; long err;
err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; force_successful_syscall_return(); - /* TODO [PCuABI] - derive proper capability */ - return (user_uintptr_t)uaddr_to_user_ptr_safe(ret); + return ret; }
#ifdef CONFIG_COMPAT @@ -1718,7 +1723,7 @@ SYSCALL_DEFINE3(__retptr__(shmat), int, shmid, char __user *, shmaddr, int, shmf
COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) { - unsigned long ret; + user_uintptr_t ret; long err;
err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
On 15/04/2024 15:21, Amit Daniel Kachhap wrote:
Use the recently introduced PCuABI reservation interfaces to verify the address range for shmat syscall.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com
include/linux/shm.h | 4 ++-- ipc/shm.c | 27 ++++++++++++++++----------- 2 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/include/linux/shm.h b/include/linux/shm.h index d8e69aed3d32..bf5b2e5cbd0c 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -14,7 +14,7 @@ struct sysv_shm { struct list_head shm_clist; }; -long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, +long do_shmat(int shmid, char __user *shmaddr, int shmflg, user_uintptr_t *user_ptr, unsigned long shmlba); bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); @@ -25,7 +25,7 @@ struct sysv_shm { }; static inline long do_shmat(int shmid, char __user *shmaddr,
int shmflg, unsigned long *addr,
int shmflg, user_uintptr_t *user_ptr, unsigned long shmlba)
{ return -ENOSYS; diff --git a/ipc/shm.c b/ipc/shm.c index 7bb7c4bbc383..231b68d6c281 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -44,6 +44,7 @@ #include <linux/mount.h> #include <linux/ipc_namespace.h> #include <linux/rhashtable.h> +#include <linux/cap_addr_mgmt.h> #include <linux/uaccess.h> @@ -1519,14 +1520,13 @@ COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr)
- Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
- NOTE! Despite the name, this is NOT a direct system call entrypoint. The
- "raddr" thing points to kernel space, and there has to be a wrapper around
*/
- "ruser_ptr" thing points to kernel space, and there has to be a wrapper around
- this.
long do_shmat(int shmid, char __user *shmaddr, int shmflg,
ulong *raddr, unsigned long shmlba)
user_uintptr_t *ruser_ptr, unsigned long shmlba)
{ struct shmid_kernel *shp;
- /* TODO [PCuABI] - capability checks for address space management */ unsigned long addr = user_ptr_addr(shmaddr); unsigned long size; struct file *file, *base;
@@ -1538,6 +1538,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, struct shm_file_data *sfd; int f_flags; unsigned long populate = 0;
- user_uintptr_t user_ptr = (user_uintptr_t)shmaddr;
err = -EINVAL; if (shmid < 0) @@ -1666,11 +1667,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto invalid; }
- addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL);
- *raddr = addr;
- user_ptr = (user_uintptr_t)user_ptr_set_addr(shmaddr, addr);
- err = check_pcuabi_params(user_ptr, size, MAP_FIXED, false, false, true);
- if (err)
goto invalid;
- user_ptr = do_mmap(file, user_ptr, size, prot, flags, 0, 0, &populate, NULL);
do_mmap() returns an address. I suppose we need to use the same logic as in ksys_mmap_pgoff() to return a capability.
Kevin
- *ruser_ptr = user_ptr; err = 0;
- if (IS_ERR_VALUE(addr))
err = (long)addr;
- if (IS_ERR_VALUE(user_ptr))
err = (long)user_ptr;
invalid: mmap_write_unlock(current->mm); if (populate) @@ -1699,15 +1705,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, SYSCALL_DEFINE3(__retptr__(shmat), int, shmid, char __user *, shmaddr, int, shmflg) {
- unsigned long ret;
- user_uintptr_t ret; long err;
err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; force_successful_syscall_return();
- /* TODO [PCuABI] - derive proper capability */
- return (user_uintptr_t)uaddr_to_user_ptr_safe(ret);
- return ret;
} #ifdef CONFIG_COMPAT @@ -1718,7 +1723,7 @@ SYSCALL_DEFINE3(__retptr__(shmat), int, shmid, char __user *, shmaddr, int, shmf COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) {
- unsigned long ret;
- user_uintptr_t ret; long err;
err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
Use the recently introduced PCuABI reservation interfaces to verify the address range for shmat syscall.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/shm.h | 4 ++-- ipc/shm.c | 24 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/include/linux/shm.h b/include/linux/shm.h index d8e69aed3d32..bf5b2e5cbd0c 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -14,7 +14,7 @@ struct sysv_shm { struct list_head shm_clist; };
-long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, +long do_shmat(int shmid, char __user *shmaddr, int shmflg, user_uintptr_t *user_ptr, unsigned long shmlba); bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); @@ -25,7 +25,7 @@ struct sysv_shm { };
static inline long do_shmat(int shmid, char __user *shmaddr, - int shmflg, unsigned long *addr, + int shmflg, user_uintptr_t *user_ptr, unsigned long shmlba) { return -ENOSYS; diff --git a/ipc/shm.c b/ipc/shm.c index 7bb7c4bbc383..d028a8ce8b39 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -44,6 +44,7 @@ #include <linux/mount.h> #include <linux/ipc_namespace.h> #include <linux/rhashtable.h> +#include <linux/cap_addr_mgmt.h>
#include <linux/uaccess.h>
@@ -1519,14 +1520,13 @@ COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr) * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. * * NOTE! Despite the name, this is NOT a direct system call entrypoint. The - * "raddr" thing points to kernel space, and there has to be a wrapper around + * "ruser_ptr" thing points to kernel space, and there has to be a wrapper around * this. */ long do_shmat(int shmid, char __user *shmaddr, int shmflg, - ulong *raddr, unsigned long shmlba) + user_uintptr_t *ruser_ptr, unsigned long shmlba) { struct shmid_kernel *shp; - /* TODO [PCuABI] - capability checks for address space management */ unsigned long addr = user_ptr_addr(shmaddr); unsigned long size; struct file *file, *base; @@ -1666,11 +1666,20 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto invalid; }
+ err = check_pcuabi_params((user_uintptr_t)shmaddr, size, + (flags & MAP_FIXED) ? true : false, true); + if (err) + goto invalid; + addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL); - *raddr = addr; err = 0; if (IS_ERR_VALUE(addr)) err = (long)addr; + else if (!user_ptr_is_valid(shmaddr)) + *ruser_ptr = make_user_ptr_owning(addr, size, + user_ptr_owning_perms_from_prot(prot, false)); + else + *ruser_ptr = (user_uintptr_t)shmaddr; invalid: mmap_write_unlock(current->mm); if (populate) @@ -1699,15 +1708,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
SYSCALL_DEFINE3(__retptr__(shmat), int, shmid, char __user *, shmaddr, int, shmflg) { - unsigned long ret; + user_uintptr_t ret; long err;
err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; force_successful_syscall_return(); - /* TODO [PCuABI] - derive proper capability */ - return (user_uintptr_t)uaddr_to_user_ptr_safe(ret); + return ret; }
#ifdef CONFIG_COMPAT @@ -1718,7 +1726,7 @@ SYSCALL_DEFINE3(__retptr__(shmat), int, shmid, char __user *, shmaddr, int, shmf
COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) { - unsigned long ret; + user_uintptr_t ret; long err;
err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
Modify vdso pointer to user pointer and provide the suitable bounded capability to purecap userspace using the memory reservation interface.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- arch/arm64/include/asm/elf.h | 5 ++--- arch/arm64/include/asm/mmu.h | 2 +- arch/arm64/kernel/signal.c | 2 +- arch/arm64/kernel/vdso.c | 29 ++++++++++++++++++++++++----- 4 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index b54070e80867..55dcbda6b052 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -178,8 +178,7 @@ extern int purecap_setup_additional_pages(struct linux_binprm *bprm, * TODO [PCuABI]: Look into restricting the bounds of this capability to just * the vDSO pages, as currently the bounds are of the root user capability. */ -#define ARCH_DLINFO SETUP_DLINFO(uaddr_to_user_ptr_safe( \ - (elf_addr_t)current->mm->context.vdso)) +#define ARCH_DLINFO SETUP_DLINFO((user_uintptr_t)current->mm->context.vdso) #else /* !CONFIG_CHERI_PURECAP_UABI */ #define arch_setup_additional_pages aarch64_setup_additional_pages #define ARCH_DLINFO SETUP_DLINFO((elf_addr_t)current->mm->context.vdso) @@ -224,7 +223,7 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; SET_PERSONALITY_AARCH64(); \ })
-#define COMPAT_ARCH_DLINFO SETUP_DLINFO((elf_addr_t)current->mm->context.vdso) +#define COMPAT_ARCH_DLINFO SETUP_DLINFO((user_uintptr_t)current->mm->context.vdso)
#define compat_arch_setup_additional_pages aarch64_setup_additional_pages
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 12a7889f0a37..5febbcb23b48 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -23,7 +23,7 @@ typedef struct { void *sigpage; #endif refcount_t pinned; - void *vdso; + void __user *vdso; unsigned long flags; } mm_context_t;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a585d005f797..948766111204 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -1265,7 +1265,7 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, if (ka->sa.sa_flags & SA_RESTORER) sigtramp = user_ptr_addr(ka->sa.sa_restorer); else - sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + sigtramp = VDSO_SYMBOL((user_uintptr_t)current->mm->context.vdso, sigtramp);
regs->regs[30] = sigtramp; } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 3121e70f598a..1fe873e9ef6c 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -8,6 +8,7 @@ */
#include <linux/cache.h> +#include <linux/cap_addr_mgmt.h> #include <linux/clocksource.h> #include <linux/elf.h> #include <linux/err.h> @@ -15,6 +16,7 @@ #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/sched.h> #include <linux/signal.h> #include <linux/slab.h> @@ -86,7 +88,16 @@ struct vdso_data *vdso_data = vdso_data_store.data; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - current->mm->context.vdso = (void *)new_vma->vm_start; + if (reserv_is_supported(new_vma->vm_mm)) { + current->mm->context.vdso = (void __user *)make_user_ptr_owning( + reserv_vma_reserv_start(new_vma), + reserv_vma_reserv_len(new_vma), + reserv_vma_reserv_perms(new_vma)); + current->mm->context.vdso = user_ptr_set_addr(current->mm->context.vdso, + new_vma->vm_start); + } else { + current->mm->context.vdso = as_user_ptr(new_vma->vm_start); + }
return 0; } @@ -202,8 +213,9 @@ static int __setup_additional_pages(enum vdso_abi abi, struct linux_binprm *bprm, int uses_interp) { - unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + unsigned long vdso_base, vdso_text_base, vdso_text_len, vdso_mapping_len; unsigned long gp_flags = 0; + user_uintptr_t user_ptr; void *ret;
BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); @@ -227,14 +239,21 @@ static int __setup_additional_pages(enum vdso_abi abi, if (system_supports_bti_kernel()) gp_flags = VM_ARM64_BTI;
- vdso_base += VVAR_NR_PAGES * PAGE_SIZE; - mm->context.vdso = (void *)vdso_base; - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + vdso_text_base = vdso_base + VVAR_NR_PAGES * PAGE_SIZE; + ret = _install_special_mapping(mm, vdso_text_base, vdso_text_len, VM_READ|VM_EXEC|gp_flags| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_info[abi].cm); if (IS_ERR(ret)) goto up_fail; + user_ptr = reserv_range_set_reserv(vdso_base, vdso_mapping_len, + user_ptr_owning_perms_from_prot(PROT_READ | PROT_EXEC, + true), true); + if (IS_ERR_VALUE(user_ptr)) { + ret = (void *)(ptraddr_t)user_ptr; + goto up_fail; + } + mm->context.vdso = (void __user *)(user_ptr + VVAR_NR_PAGES * PAGE_SIZE);
return 0;
linux-morello@op-lists.linaro.org