On 16/11/2022 15:32, Tudor Cretu wrote:
On 15-11-2022 09:08, Zachary Leaf wrote:
The bpf syscall on a native 64 bit system originally did not need a separate compat handler for compat32. Since the union bpf_attr stored pointers as __u64 and was 8B aligned it results in the same struct offsets/memory layout for both 64/32b.
On some new architectures, e.g. Morello PCuABI, userspace pointers are 128 bit (plus 1 out-of-band tag bit), so storing pointers as __u64 is not possible. bpf_attr is therefore extended to hold 128b pointers on these archs, and remains __u64 on those that do not.
In order to support a 64b compat layer (compat64) in PCuABI, add a new union compat_bpf_attr with __u64 members. This retains the offsets and alignment of the original bpf_attr struct.
In order to handle compat64, use generic void* in __sys_bpf(...) to represent either bpf_attr or compat_bpf_attr. Then in each of the multiplexed options e.g. BPF_PROG_LOAD, if in a compat syscall, convert compat_bpf_atr into a native bpf_attr struct and handle as normal.
Since the bpf_attr union contains a number of anonymous structs, converting each option inside the option handlers e.g. bpf_prof_load() seems to be the best way to do it. After the conversion there should be minimal additional compat handling/in_compat_syscall conditions etc.
Currently unrestricted capabilities are created in the kernel here with compat_ptr so some care needs to be taken to check none of these are returned to userspace at any point.
This patch should be before PATCH 4 I think. In this way, the compat64 wouldn't be broken between PATCH 4 and PATCH 5.
Sure, but then this uses compat_ptr() to add capabilities to pointers when converting to the new bpf_attr. How does that work without first amending bpf_attr to accept capabilities? i.e. using compat_ptr and putting it in a __u64 member. Would compat64 actually work?
Seems to me it would be broken either way, but I might be thinking about it the wrong way.
Signed-off-by: Zachary Leaf zachary.leaf@arm.com
re: compat_ptr, there is probably another case to be made for having some kind of kernel only flag for capabilities here, to avoid them being returned to userspace by accident (as discussed for make_user_ptr_unsafe)
include/uapi/linux/bpf.h | 39 ++++++++++++++++++++++ kernel/bpf/syscall.c | 71 +++++++++++++++++++++++++++++++++++----- 2 files changed, 102 insertions(+), 8 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 98a23f9a540e..752a520e1481 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1269,6 +1269,45 @@ struct bpf_stack_build_id {
#define BPF_OBJ_NAME_LEN 16U
+union compat_bpf_attr { + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* not used */ + __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_ifindex; /* ifindex of netdev to prep for */ + /* For some prog types expected attach type must be known at + * load time to verify attach type specific parts of prog + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ + __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; + __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + }; +} __attribute__((aligned(8)) > + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 518ab0e9f2a4..6e3b567d254a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2203,21 +2203,68 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } }
+#ifdef CONFIG_COMPAT +static void convert_compat_bpf_prog_load(const void *vattr, union bpf_attr *dest) +{ + const union compat_bpf_attr *cattr = (union compat_bpf_attr *)vattr;
+ dest->prog_type = READ_ONCE(cattr->prog_type); + dest->insn_cnt = READ_ONCE(cattr->insn_cnt); + dest->insns = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->insns)); + dest->license = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->license)); + dest->log_level = READ_ONCE(cattr->log_level); + dest->log_size = READ_ONCE(cattr->log_size); + dest->log_buf = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->log_buf)); + dest->kern_version = READ_ONCE(cattr->kern_version); + dest->prog_flags = READ_ONCE(cattr->prog_flags); + strncpy(dest->prog_name, cattr->prog_name, BPF_OBJ_NAME_LEN); + dest->prog_ifindex = READ_ONCE(cattr->prog_ifindex); + dest->expected_attach_type = READ_ONCE(cattr->expected_attach_type); + dest->prog_btf_fd = READ_ONCE(cattr->prog_btf_fd); + dest->func_info_rec_size = READ_ONCE(cattr->func_info_rec_size); + dest->func_info = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->func_info)); + dest->func_info_cnt = READ_ONCE(cattr->func_info_cnt); + dest->line_info_rec_size = READ_ONCE(cattr->line_info_rec_size); + dest->line_info = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->line_info)); + dest->line_info_cnt = READ_ONCE(cattr->line_info_cnt); + dest->attach_btf_id = READ_ONCE(cattr->attach_btf_id); + dest->attach_prog_fd = READ_ONCE(cattr->attach_prog_fd); + /* u32 attach_btf_obj_fd is in a union with u32 attach_prog_fd */ + dest->core_relo_cnt = READ_ONCE(cattr->core_relo_cnt); + dest->fd_array = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->fd_array)); + dest->core_relos = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->core_relos)); + dest->core_relo_rec_size = READ_ONCE(cattr->core_relo_rec_size); +} +#endif
/* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size
-static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) +static int bpf_prog_load(void *vattr, bpfptr_t uattr) { - enum bpf_prog_type type = attr->prog_type; + enum bpf_prog_type type; struct bpf_prog *prog, *dst_prog = NULL; struct btf *attach_btf = NULL; int err; char license[128]; bool is_gpl; + union bpf_attr *attr = NULL; +#ifdef CONFIG_COMPAT + union bpf_attr compat_attr; +#endif
if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL;
+#ifdef CONFIG_COMPAT
I think we should use CONFIG_COMPAT64 for newly added compat code. Maybe, I'm not sure, leaving this here as I meant to change CONFIG_COMPAT -> CONFIG_COMPAT64 in the io_uring series as well.
Yeah, that was something I meant to ask about io_uring as well. CONFIG_COMPAT64 implies CONFIG_CHERI_PURECAP_UABI, so in that case the normal native 64/compat32 can apply without using the extra conversion structs etc.
In that case CONFIG_COMPAT64 is more appropriate here and in io_uring.
+ if (in_compat_syscall()) + convert_compat_bpf_prog_load(vattr, &compat_attr); + attr = in_compat_syscall() ? &compat_attr : (union bpf_attr *) vattr; +#else + attr = (union bpf_attr *) vattr; +#endif + type = attr->prog_type;
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | BPF_F_TEST_STATE_FREQ | @@ -4621,22 +4668,30 @@ static int bpf_prog_bind_map(union bpf_attr *attr) static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; +#ifdef CONFIG_COMPAT + union compat_bpf_attr cattr; + void *vattr = in_compat_syscall() ? (void *)&cattr : (void *)&attr; + size_t attrsz = in_compat_syscall() ? sizeof(cattr) : sizeof(attr); +#else + void *vattr = &attr; + size_t attrsz = sizeof(attr); +#endif int err;
if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) return -EPERM;
- err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); + err = bpf_check_uarg_tail_zero(uattr, attrsz, size); if (err) return err; - size = min_t(u32, size, sizeof(attr)); + size = min_t(u32, size, attrsz);
/* copy attributes from user space, may be less than sizeof(bpf_attr) */ - memset(&attr, 0, sizeof(attr)); - if (copy_from_bpfptr_with_ptr(&attr, uattr, size) != 0) + memset(vattr, 0, attrsz); + if (copy_from_bpfptr_with_ptr(vattr, uattr, size) != 0) return -EFAULT;
- err = security_bpf(cmd, &attr, size); + err = security_bpf(cmd, vattr, size); if (err < 0) return err;
@@ -4660,7 +4715,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) err = map_freeze(&attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr, uattr); + err = bpf_prog_load(vattr, uattr); break; case BPF_OBJ_PIN: err = bpf_obj_pin(&attr); -- 2.34.1
linux-morello mailing list -- linux-morello@op-lists.linaro.org To unsubscribe send an email to linux-morello-leave@op-lists.linaro.org