On 1/12/24 20:26, Kevin Brodsky wrote:
On 08/01/2024 11:23, Amit Daniel Kachhap wrote:
PCuABI memory reservation requires adding reservation properties while creating and modifying the vma. reserv_vma_init_reserv() is added to initialize them and reserv_vma_set_reserv() is used to update those details. As of now only for mmap/mremap syscalls these properties are added and later commits will add them for other special vma mappings.
PCuABI memory reservation also requires merging or expanding vma's which only belong to the original reservation. Use suitable reservation interfaces to check those properties before performing such operations on the vma.
The address parameter type is modified from unsigned long to user_uintptr_t in several do_mmap() upstream functions to find out if a new reservation is required or not.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com
include/linux/mm.h | 24 +++++++++++++++--- kernel/fork.c | 3 +++ mm/mmap.c | 63 ++++++++++++++++++++++++++++++++++++++-------- mm/util.c | 5 ++-- 4 files changed, 80 insertions(+), 15 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 5b79120c999a..06d2aee83aa4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,6 +747,21 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, #endif /* CONFIG_PER_VMA_LOCK */ +#ifdef CONFIG_CHERI_PURECAP_UABI
+static void reserv_vma_init_reserv(struct vm_area_struct *vma) +{
- vma->reserv_start = 0;
- vma->reserv_len = 0;
- vma->reserv_perm = 0;
vma_init() already zero-inits the vma.
Yes good catch.
+}
+#else /* CONFIG_CHERI_PURECAP_UABI */
+static void reserv_vma_init_reserv(struct vm_area_struct *vma) {}
+#endif /* CONFIG_CHERI_PURECAP_UABI */ [...]
-unsigned long do_mmap(struct file *file, unsigned long addr, +user_uintptr_t do_mmap(struct file *file, user_uintptr_t usrptr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf) @@ -1233,6 +1236,8 @@ unsigned long do_mmap(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; vm_flags_t vm_flags; int pkey = 0;
- unsigned long addr = (ptraddr_t)usrptr;
- bool new_caps = true;
It's not really obvious what "caps" means (especially with an "s"). Maybe "new_reserv" like the mmap_region() parameter.
Also I'd suggest s/usrptr/user_ptr/ (since that's the abbreviation we've generally used so far).
ok
validate_mm(mm); *populate = 0; @@ -1240,6 +1245,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (!len) return -EINVAL; +#ifdef CONFIG_CHERI_PURECAP_UABI
- if (test_bit(MMF_PCUABI_RESERVE, &mm->flags)) {
if (cheri_tag_get(usrptr))
new_caps = false;
- }
+#endif /* * Does the application expect PROT_READ to imply PROT_EXEC? * @@ -1397,20 +1408,25 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags |= VM_NORESERVE; }
- addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
- if (new_caps)
usrptr = addr;
usrptr isn't actually used in this patch, maybe the changes in this function belong to the next patch. That said these two lines don't seem to have any effect even after patch 8.
Looks like some code got missed in patch preperation.
- addr = mmap_region(file, addr, len, vm_flags, pgoff, uf, prot, new_caps); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) *populate = len;
- return addr; }
-user_uintptr_t ksys_mmap_pgoff(user_uintptr_t addr, unsigned long len, +user_uintptr_t ksys_mmap_pgoff(user_uintptr_t usrptr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { struct file *file = NULL; user_uintptr_t retval;
- ptraddr_t addr = (ptraddr_t)usrptr;
addr isn't used in this patch. Those changes probably belong to the next patch too.
if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); @@ -1443,7 +1459,7 @@ user_uintptr_t ksys_mmap_pgoff(user_uintptr_t addr, unsigned long len, return PTR_ERR(file); }
- retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- retval = vm_mmap_pgoff(file, usrptr, len, prot, flags, pgoff); out_fput: if (file) fput(file);
@@ -2628,15 +2644,18 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
struct list_head *uf)
{ struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL;struct list_head *uf, unsigned long prot, bool new_reserv)
- struct vm_area_struct *next, *prev, *merge;
- struct vm_area_struct *next, *prev, *merge, *old_vma; pgoff_t pglen = len >> PAGE_SHIFT; unsigned long charged = 0; unsigned long end = addr + len; unsigned long merge_start = addr, merge_end = end;
- unsigned long reserv_start = 0;
- unsigned long reserv_len = 0;
- unsigned long reserv_perm = 0; pgoff_t vm_pgoff; int error; VMA_ITERATOR(vmi, mm, addr);
@@ -2656,6 +2675,20 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return -ENOMEM; }
- if (!new_reserv) {
old_vma = vma_find(&vmi, end);
if (!old_vma)
I initially thought the check was insufficient, but then realised that the actual check happens by calling reserv_vmi_range_mapped() in ksys_mmap_pgoff(). So I think looking up the reservation information this way makes sense, but we definitely want a comment saying that the actual check has already happened (in principle this condition can never be true).
ok
return -ENOMEM;
+#ifdef CONFIG_CHERI_PURECAP_UABI
if (reserv_vma_is_supported(old_vma)) {
reserv_start = old_vma->reserv_start;
reserv_len = old_vma->reserv_len;
reserv_perm = old_vma->reserv_perm;
}
+#endif
vma_iter_set(&vmi, addr);
- }
- /* Unmap any existing mapping in the area */ if (do_vmi_munmap(&vmi, mm, addr, len, uf, false)) return -ENOMEM;
@@ -2679,7 +2712,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* Check next */ if (next && next->vm_start == end && !vma_policy(next) && can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
NULL_VM_UFFD_CTX, NULL)) {
NULL_VM_UFFD_CTX, NULL) &&
merge_end = next->vm_end; vma = next; vm_pgoff = next->vm_pgoff - pglen;reserv_vma_valid_address(next, addr, len)) {
@@ -2690,7 +2724,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, pgoff, vma->vm_userfaultfd_ctx, NULL) : can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
NULL_VM_UFFD_CTX, NULL))) {
NULL_VM_UFFD_CTX, NULL)) &&
merge_start = prev->vm_start; vma = prev; vm_pgoff = prev->vm_pgoff;reserv_vma_valid_address(prev, addr, len)) {
@@ -2722,6 +2757,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vm_flags_init(vma, vm_flags); vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff;
- if (new_reserv)
reserv_vma_set_reserv(vma, addr, len,
mapping_to_capability_perm(prot, (vm_flags & VM_SHARED)
? false : true));
- else
reserv_vma_set_reserv(vma, reserv_start, reserv_len, reserv_perm);
if (file) { if (vm_flags & VM_SHARED) { @@ -3320,6 +3361,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, *vmap = vma = new_vma; } *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
reserv_vma_set_reserv(new_vma, addr, len, 0);
AFAICT we should do nothing in this case. An existing vma has been expanded when duplicating the target vma, and the reservation it is in remains unchanged.
} else { new_vma = vm_area_dup(vma); if (!new_vma) @@ -3327,6 +3369,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff;
reserv_vma_set_reserv(new_vma, addr, len, 0);
We cannot use the (addr, len) range as reservation bounds unconditionally. This should be correct if we are actually creating a new reservation, but if MREMAP_FIXED was passed we might actually be moving the mapping into an existing reservation, in which case we should copy reserv_* from another vma in that reservation.
Ok I will check this.
if (vma_dup_policy(vma, new_vma)) goto out_free_vma; if (anon_vma_clone(new_vma, vma))
diff --git a/mm/util.c b/mm/util.c index 61de3bf7712b..f6fbfa7d014e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -557,7 +557,8 @@ user_uintptr_t vm_mmap_pgoff(struct file *file, user_uintptr_t addr, return ret; } -unsigned long vm_mmap(struct file *file, unsigned long addr, +/* TODO [PCuABI] - Update the users of vm_mmap */ +user_uintptr_t vm_mmap(struct file *file, user_uintptr_t usrptr,
Provided that in-kernel users do not manipulate mappings with capabilities, do we need to change the prototypes of vm_* at all?
As discussed earlier, elf_map in binfmt_elf.c will use now MAP_FIXED to change mapping of individual elf segment so full capability has to be supplied. In case of vm_munmap() full capability may not be required but for consistency sake may be capability can be used there too.
Thanks, Amit
Kevin
unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { @@ -566,7 +567,7 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, if (unlikely(offset_in_page(offset))) return -EINVAL;
- return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
- return vm_mmap_pgoff(file, usrptr, len, prot, flag, offset >> PAGE_SHIFT); } EXPORT_SYMBOL(vm_mmap);