[linux-morello] Re: [RFC PATCH 5/9] bpf: add compat64 handling of bpf syscall

28 Nov 2022

On 23/11/2022 09:57, Kevin Brodsky wrote:
...
On 15/11/2022 10:08, Zachary Leaf wrote:
...
The bpf syscall on a native 64 bit system originally did not need a
separate compat handler for compat32. Since the union bpf_attr stored
pointers as __u64 and was 8B aligned it results in the same struct
offsets/memory layout for both 64/32b.
On some new architectures, e.g. Morello PCuABI, userspace pointers are
128 bit (plus 1 out-of-band tag bit), so storing pointers as __u64 is
not possible. bpf_attr is therefore extended to hold 128b pointers on
these archs, and remains __u64 on those that do not.
In order to support a 64b compat layer (compat64) in PCuABI, add a new
union compat_bpf_attr with __u64 members. This retains the offsets and
alignment of the original bpf_attr struct.
In order to handle compat64, use generic void* in __sys_bpf(...) to
represent either bpf_attr or compat_bpf_attr. Then in each of the
multiplexed options e.g. BPF_PROG_LOAD, if in a compat syscall, convert
compat_bpf_atr into a native bpf_attr struct and handle as normal.
Since the bpf_attr union contains a number of anonymous structs,
converting each option inside the option handlers e.g. bpf_prof_load()
seems to be the best way to do it. After the conversion there should be
minimal additional compat handling/in_compat_syscall conditions etc.
Currently unrestricted capabilities are created in the kernel here with
compat_ptr so some care needs to be taken to check none of these are
returned to userspace at any point.
Signed-off-by: Zachary Leaf zachary.leaf@arm.com
re: compat_ptr, there is probably another case to be made for having
some kind of kernel only flag for capabilities here, to avoid them being
returned to userspace by accident (as discussed for
make_user_ptr_unsafe)
include/uapi/linux/bpf.h | 39 ++++++++++++++++++++++
 kernel/bpf/syscall.c     | 71 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 98a23f9a540e..752a520e1481 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
compat_* types do not belong to uapi headers, because they are
irrelevant to userspace. There is unfortunately no agreed place to put
them (they are often found in a kernel header or in the .c next to the
compat handling), so you can choose to move it wherever makes more sense
/ is easier.
Ack
...
...
@@ -1269,6 +1269,45 @@ struct bpf_stack_build_id {
#define BPF_OBJ_NAME_LEN 16U
+union compat_bpf_attr {

struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32		prog_type;	/* one of enum bpf_prog_type */


__u32		insn_cnt;


__aligned_u64	insns;


__aligned_u64	license;


__u32		log_level;	/* verbosity level of verifier */


__u32		log_size;	/* size of user buffer */


__aligned_u64	log_buf;	/* user supplied buffer */


__u32		kern_version;	/* not used */


__u32		prog_flags;


char		prog_name[BPF_OBJ_NAME_LEN];


__u32		prog_ifindex;	/* ifindex of netdev to prep for */


/* For some prog types expected attach type must be known at


 * load time to verify attach type specific parts of prog


 * (context accesses, allowed helpers, etc).


 */


__u32		expected_attach_type;


__u32		prog_btf_fd;	/* fd pointing to BTF type data */


__u32		func_info_rec_size;	/* userspace bpf_func_info size */


__aligned_u64	func_info;	/* func info */


__u32		func_info_cnt;	/* number of bpf_func_info records */


__u32		line_info_rec_size;	/* userspace bpf_line_info size */


__aligned_u64	line_info;	/* line info */


__u32		line_info_cnt;	/* number of bpf_line_info records */


__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */


union {


	/* valid prog_fd to attach to bpf prog */


	__u32		attach_prog_fd;


	/* or valid module BTF object fd or 0 to attach to vmlinux */


	__u32		attach_btf_obj_fd;


};


__u32		core_relo_cnt;	/* number of bpf_core_relo */


__aligned_u64	fd_array;	/* array of FDs */


__aligned_u64	core_relos;


__u32		core_relo_rec_size; /* sizeof(struct bpf_core_relo) */


};

+} __attribute__((aligned(8)));



union bpf_attr {
   struct { /* anonymous struct used by BPF_MAP_CREATE command */
   	__u32	map_type;	/* one of enum bpf_map_type */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 518ab0e9f2a4..6e3b567d254a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2203,21 +2203,68 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
   }
 }
+#ifdef CONFIG_COMPAT
+static void convert_compat_bpf_prog_load(const void *vattr, union bpf_attr *dest)
+{

const union compat_bpf_attr *cattr = (union compat_bpf_attr *)vattr;

dest->prog_type = READ_ONCE(cattr->prog_type);
dest->insn_cnt = READ_ONCE(cattr->insn_cnt);
dest->insns = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->insns));
dest->license = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->license));
dest->log_level = READ_ONCE(cattr->log_level);
dest->log_size = READ_ONCE(cattr->log_size);
dest->log_buf = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->log_buf));
dest->kern_version = READ_ONCE(cattr->kern_version);
dest->prog_flags = READ_ONCE(cattr->prog_flags);
strncpy(dest->prog_name, cattr->prog_name, BPF_OBJ_NAME_LEN);
dest->prog_ifindex = READ_ONCE(cattr->prog_ifindex);
dest->expected_attach_type = READ_ONCE(cattr->expected_attach_type);
dest->prog_btf_fd = READ_ONCE(cattr->prog_btf_fd);
dest->func_info_rec_size = READ_ONCE(cattr->func_info_rec_size);
dest->func_info = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->func_info));
dest->func_info_cnt = READ_ONCE(cattr->func_info_cnt);
dest->line_info_rec_size = READ_ONCE(cattr->line_info_rec_size);
dest->line_info = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->line_info));
dest->line_info_cnt = READ_ONCE(cattr->line_info_cnt);
dest->attach_btf_id = READ_ONCE(cattr->attach_btf_id);
dest->attach_prog_fd = READ_ONCE(cattr->attach_prog_fd);
/* u32 attach_btf_obj_fd is in a union with u32 attach_prog_fd */
dest->core_relo_cnt = READ_ONCE(cattr->core_relo_cnt);
dest->fd_array = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->fd_array));
dest->core_relos = (__kernel_aligned_uintptr_t)compat_ptr(READ_ONCE(cattr->core_relos));
dest->core_relo_rec_size = READ_ONCE(cattr->core_relo_rec_size);

There is no need for special handling with READ_ONCE() here. io_uring is
a very special case where the source is a piece of memory that is shared
with userspace and extra care is required. Here everything is stored on
the stack (as usual) and normal accesses are perfectly fine.
Ah yep. I knew this or should have known it from the io_uring review.
...
...
+}
+#endif



/* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size
-static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
+static int bpf_prog_load(void *vattr, bpfptr_t uattr)
 {

enum bpf_prog_type type = attr->prog_type;


enum bpf_prog_type type;
struct bpf_prog *prog, *dst_prog = NULL;
struct btf *attach_btf = NULL;
int err;
char license[128];
bool is_gpl;
union bpf_attr *attr = NULL;

+#ifdef CONFIG_COMPAT

union bpf_attr compat_attr;

+#endif
if (CHECK_ATTR(BPF_PROG_LOAD))
   	return -EINVAL;
+#ifdef CONFIG_COMPAT

if (in_compat_syscall())
convert_compat_bpf_prog_load(vattr, &compat_attr);


attr = in_compat_syscall() ? &compat_attr : (union bpf_attr *) vattr;

+#else

attr = (union bpf_attr *) vattr;

+#endif

type = attr->prog_type;
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
  		 BPF_F_ANY_ALIGNMENT |
  		 BPF_F_TEST_STATE_FREQ |

@@ -4621,22 +4668,30 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 {
   union bpf_attr attr;
+#ifdef CONFIG_COMPAT

union compat_bpf_attr cattr;
void *vattr = in_compat_syscall() ? (void *)&cattr : (void *)&attr;

The fact that we are using in_compat_syscall() here reinforces my
concern with the ABI that actually applies to bpf_sys_bpf(). If it is
somehow obtaining bpf_attr from the current process, then this approach
makes sense, otherwise we might have a problem here.
Still will need to look into this more. I'd be surprised if what you're
suggesting is the case, since that would mean we're mis-handling user
pointers, not storing them with a __user tag, and possibly using them
without going through uaccess. That may be possible so will check either
way.
...
...

size_t attrsz = in_compat_syscall() ? sizeof(cattr) : sizeof(attr);

+#else

void *vattr = &attr;
size_t attrsz = sizeof(attr);

+#endif
   int err;
if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
   	return -EPERM;

err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);


err = bpf_check_uarg_tail_zero(uattr, attrsz, size);
if (err)
return err;


size = min_t(u32, size, sizeof(attr));


size = min_t(u32, size, attrsz);
/* copy attributes from user space, may be less than sizeof(bpf_attr) */



memset(&attr, 0, sizeof(attr));
if (copy_from_bpfptr_with_ptr(&attr, uattr, size) != 0)


memset(vattr, 0, attrsz);
if (copy_from_bpfptr_with_ptr(vattr, uattr, size) != 0)

Note that _with_ptr is only appropriate for native, in compat there is
no actual user pointer in the struct.
Ack. I assume you mean in compat there *is* user pointers, it just
doesn't have a capability so makes no sense to go through the capability
preserving _with_ptr variant.
...
...
return -EFAULT;



err = security_bpf(cmd, &attr, size);


err = security_bpf(cmd, vattr, size);
if (err < 0)
return err;

@@ -4660,7 +4715,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
   	err = map_freeze(&attr);
   	break;
   case BPF_PROG_LOAD:

err = bpf_prog_load(&attr, uattr);




err = bpf_prog_load(vattr, uattr);

break;
case BPF_OBJ_PIN:
err = bpf_obj_pin(&attr);

Considering you will end up touching more than half of the lines in
__sys_bpf(), I am tempted to recommend to actually create a compat
handler for bpf and a new __sys_compat_bpf() or similar. The duplication
this would create is minimal as there is barely any logic in __sys_bpf()

it's all nicely abstracted in helper functions - and this way you can

keep all the compat logic in just one function (plus helpers for the
struct layout conversions). bpf_prog_load() and friends can remain
unchanged if all the conversion work happens in __sys_compat_bpf(), in a
similar fashion to more "traditional" compat handlers.
A bit more rationale on why I'm recommending this: most syscalls that
take a struct read the same members in that struct in any case, so
converting from compat to native is straightforward (just one conversion
helper). However here we effectively have a dozen structs stuffed into a
union (only marginally nicer than the syscall taking a void *), so each
subcommand needs its own conversion. What this means is that you need
compat code to 1. read the whole compat union and 2. convert the layout
of each struct. If all this compat code is scattered in the native
syscall / subcommand handlers, that's an awful lot of conditionals (on
in_compat_syscall()). Conversely if all of this can happen in the same
function, that's a big win in readability (and maybe a bit faster).
Finally if you have just one function doing everything, you might as
well have a compat handler calling it (but that's a detail really).
Agreed on that - see response on [RFC PATCH 9/9] bpf/security: move lsm
hook point.
Thanks,
Zach
...
Kevin
...
--
2.34.1

linux-morello mailing list -- linux-morello@op-lists.linaro.org
To unsubscribe send an email to linux-morello-leave@op-lists.linaro.org

    

2025

2024

2023

2022

[linux-morello] Re: [RFC PATCH 5/9] bpf: add compat64 handling of bpf syscall

Signed-off-by: Zachary Leaf zachary.leaf@arm.com