The bpf syscall does not require a compat handler for 32-bit compat. This is achieved by using u64 instead of pointer types in the bpf_attr union used to pass arguments to the syscall. This means that in a system where pointers are 32-bit, the struct/union layouts and offsets are the same as in a 64-bit arch, since the u64 field is split into two u32 fields/registers.
This greatly simplifies 32-bit compat at the small cost of requiring casting pointers passed in through the uAPI to u64 (generally via ptr_to_u64() helper functions).
This poses a problem in architectures where user pointers are longer than 64b such as Morello/PCuABI where pointers are represented as 129b capabilities. In order to extend the bpf syscall interface to accept capabilities and still retain compatibility with the existing 64/32b ABI, a 64-bit compat layer and appropriate conversions must be added to handle the different union/struct sizes caused by this pointer size mis-match.
Before extending the number of bits in union bpf_attr to accept capabilitities, set the groundwork with a compat64 handler and conversion function to take a compat64 sized bpf_attr and convert it to what will be the new native offsets.
Inbound conversion is handled upfront to minimise impact on existing code and reduce overall diff size. After dispatch_bpf the majority of code can remain unchanged. The cases where conversion back out to userspace is required are handled in subsequent commits.
Signed-off-by: Zachary Leaf zachary.leaf@arm.com --- arch/arm64/kernel/sys_compat64.c | 4 + include/linux/bpf_compat.h | 274 +++++++++++++++++++ kernel/bpf/syscall.c | 438 ++++++++++++++++++++++++++----- 3 files changed, 652 insertions(+), 64 deletions(-) create mode 100644 include/linux/bpf_compat.h
diff --git a/arch/arm64/kernel/sys_compat64.c b/arch/arm64/kernel/sys_compat64.c index 1442581ec292..0687f88baa16 100644 --- a/arch/arm64/kernel/sys_compat64.c +++ b/arch/arm64/kernel/sys_compat64.c @@ -13,6 +13,10 @@
#include <asm/syscall.h>
+#ifdef CONFIG_COMPAT64 +#define __arm64_compatentry_sys_bpf __arm64_compatentry_compat_sys_bpf +#endif + #define __arm64_compatentry_sys_personality __arm64_compatentry_sys_arm64_personality
/* diff --git a/include/linux/bpf_compat.h b/include/linux/bpf_compat.h new file mode 100644 index 000000000000..cc12f2e3b204 --- /dev/null +++ b/include/linux/bpf_compat.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2023 Arm Ltd */ + +#ifdef CONFIG_COMPAT64 + +union compat_bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ + __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ + char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ + __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- + * struct stored as the + * map value + */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* struct used by BPF_MAP_*_BATCH commands */ + __aligned_u64 in_batch; /* start batch, + * NULL to start from beginning + */ + __aligned_u64 out_batch; /* output: next start batch */ + __aligned_u64 keys; + __aligned_u64 values; + __u32 count; /* input/output: + * input: # of key/value + * elements + * output: # of filled elements + */ + __u32 map_fd; + __u64 elem_flags; + __u64 flags; + } batch; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* not used */ + __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_ifindex; /* ifindex of netdev to prep for */ + /* For some prog types expected attach type must be known at + * load time to verify attach type specific parts of prog + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ + __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; + __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; + __u32 file_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ + }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; + __u32 batch_size; + } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + __u32 btf_id; + __u32 link_id; + }; + __u32 next_id; + __u32 open_flags; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; + } query; + + struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ + __u64 name; + __u32 prog_fd; + } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; + + struct { /* struct used by BPF_LINK_CREATE command */ + __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 target_fd; /* object to attach to */ + __u32 target_ifindex; /* target ifindex */ + }; + __u32 attach_type; /* attach type */ + __u32 flags; /* extra flags */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + __aligned_u64 cookies; + } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; + }; + } link_create; + + struct { /* struct used by BPF_LINK_UPDATE command */ + __u32 link_fd; /* link fd */ + /* new program fd to update link with */ + __u32 new_prog_fd; + __u32 flags; /* extra flags */ + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags */ + __u32 old_prog_fd; + } link_update; + + struct { + __u32 link_fd; + } link_detach; + + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + + struct { /* struct used by BPF_ITER_CREATE command */ + __u32 link_fd; + __u32 flags; + } iter_create; + + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + +} __attribute__((aligned(8))); + +#endif /* CONFIG_COMPAT64 */ + diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7b373a5e861f..818ca8b63295 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3,6 +3,7 @@ */ #include <linux/bpf.h> #include <linux/bpf-cgroup.h> +#include <linux/bpf_compat.h> #include <linux/bpf_trace.h> #include <linux/bpf_lirc.h> #include <linux/bpf_verifier.h> @@ -4908,153 +4909,127 @@ static int bpf_prog_bind_map(union bpf_attr *attr) return ret; }
-static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) +static int dispatch_bpf(int cmd, union bpf_attr *attr, bpfptr_t uattr, int size) { - union bpf_attr attr; - bool capable; int err;
- capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; - - /* Intent here is for unprivileged_bpf_disabled to block key object - * creation commands for unprivileged users; other actions depend - * of fd availability and access to bpffs, so are dependent on - * object creation success. Capabilities are later verified for - * operations such as load and map create, so even with unprivileged - * BPF disabled, capability checks are still carried out for these - * and other operations. - */ - if (!capable && - (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) - return -EPERM; - - err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); - if (err) - return err; - size = min_t(u32, size, sizeof(attr)); - - /* copy attributes from user space, may be less than sizeof(bpf_attr) */ - memset(&attr, 0, sizeof(attr)); - if (copy_from_bpfptr(&attr, uattr, size) != 0) - return -EFAULT; - - err = security_bpf(cmd, &attr, size); + err = security_bpf(cmd, attr, size); if (err < 0) return err;
switch (cmd) { case BPF_MAP_CREATE: - err = map_create(&attr); + err = map_create(attr); break; case BPF_MAP_LOOKUP_ELEM: - err = map_lookup_elem(&attr); + err = map_lookup_elem(attr); break; case BPF_MAP_UPDATE_ELEM: - err = map_update_elem(&attr, uattr); + err = map_update_elem(attr, uattr); break; case BPF_MAP_DELETE_ELEM: - err = map_delete_elem(&attr, uattr); + err = map_delete_elem(attr, uattr); break; case BPF_MAP_GET_NEXT_KEY: - err = map_get_next_key(&attr); + err = map_get_next_key(attr); break; case BPF_MAP_FREEZE: - err = map_freeze(&attr); + err = map_freeze(attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr, uattr); + err = bpf_prog_load(attr, uattr); break; case BPF_OBJ_PIN: - err = bpf_obj_pin(&attr); + err = bpf_obj_pin(attr); break; case BPF_OBJ_GET: - err = bpf_obj_get(&attr); + err = bpf_obj_get(attr); break; case BPF_PROG_ATTACH: - err = bpf_prog_attach(&attr); + err = bpf_prog_attach(attr); break; case BPF_PROG_DETACH: - err = bpf_prog_detach(&attr); + err = bpf_prog_detach(attr); break; case BPF_PROG_QUERY: - err = bpf_prog_query(&attr, uattr.user); + err = bpf_prog_query(attr, uattr.user); break; case BPF_PROG_TEST_RUN: - err = bpf_prog_test_run(&attr, uattr.user); + err = bpf_prog_test_run(attr, uattr.user); break; case BPF_PROG_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &prog_idr, &prog_idr_lock); break; case BPF_MAP_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &map_idr, &map_idr_lock); break; case BPF_BTF_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &btf_idr, &btf_idr_lock); break; case BPF_PROG_GET_FD_BY_ID: - err = bpf_prog_get_fd_by_id(&attr); + err = bpf_prog_get_fd_by_id(attr); break; case BPF_MAP_GET_FD_BY_ID: - err = bpf_map_get_fd_by_id(&attr); + err = bpf_map_get_fd_by_id(attr); break; case BPF_OBJ_GET_INFO_BY_FD: - err = bpf_obj_get_info_by_fd(&attr, uattr.user); + err = bpf_obj_get_info_by_fd(attr, uattr.user); break; case BPF_RAW_TRACEPOINT_OPEN: - err = bpf_raw_tracepoint_open(&attr); + err = bpf_raw_tracepoint_open(attr); break; case BPF_BTF_LOAD: - err = bpf_btf_load(&attr, uattr); + err = bpf_btf_load(attr, uattr); break; case BPF_BTF_GET_FD_BY_ID: - err = bpf_btf_get_fd_by_id(&attr); + err = bpf_btf_get_fd_by_id(attr); break; case BPF_TASK_FD_QUERY: - err = bpf_task_fd_query(&attr, uattr.user); + err = bpf_task_fd_query(attr, uattr.user); break; case BPF_MAP_LOOKUP_AND_DELETE_ELEM: - err = map_lookup_and_delete_elem(&attr); + err = map_lookup_and_delete_elem(attr); break; case BPF_MAP_LOOKUP_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_LOOKUP_BATCH); break; case BPF_MAP_LOOKUP_AND_DELETE_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_LOOKUP_AND_DELETE_BATCH); break; case BPF_MAP_UPDATE_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_UPDATE_BATCH); break; case BPF_MAP_DELETE_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_DELETE_BATCH); break; case BPF_LINK_CREATE: - err = link_create(&attr, uattr); + err = link_create(attr, uattr); break; case BPF_LINK_UPDATE: - err = link_update(&attr); + err = link_update(attr); break; case BPF_LINK_GET_FD_BY_ID: - err = bpf_link_get_fd_by_id(&attr); + err = bpf_link_get_fd_by_id(attr); break; case BPF_LINK_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &link_idr, &link_idr_lock); break; case BPF_ENABLE_STATS: - err = bpf_enable_stats(&attr); + err = bpf_enable_stats(attr); break; case BPF_ITER_CREATE: - err = bpf_iter_create(&attr); + err = bpf_iter_create(attr); break; case BPF_LINK_DETACH: - err = link_detach(&attr); + err = link_detach(attr); break; case BPF_PROG_BIND_MAP: - err = bpf_prog_bind_map(&attr); + err = bpf_prog_bind_map(attr); break; default: err = -EINVAL; @@ -5064,11 +5039,346 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) return err; }
+#ifdef CONFIG_COMPAT64 +static void convert_compat_bpf_attr(union bpf_attr *dest, const union compat_bpf_attr *cattr, int cmd) +{ + struct bpf_prog *prog; + + switch (cmd) { + case BPF_MAP_CREATE: + dest->map_type = cattr->map_type; + dest->key_size = cattr->key_size; + dest->value_size = cattr->value_size; + dest->max_entries = cattr->max_entries; + dest->map_flags = cattr->map_flags; + dest->inner_map_fd = cattr->inner_map_fd; + dest->numa_node = cattr->numa_node; + strncpy(dest->map_name, cattr->map_name, BPF_OBJ_NAME_LEN); + dest->map_ifindex = cattr->map_ifindex; + dest->btf_fd = cattr->btf_fd; + dest->btf_key_type_id = cattr->btf_key_type_id; + dest->btf_value_type_id = cattr->btf_value_type_id; + dest->btf_vmlinux_value_type_id = cattr->btf_vmlinux_value_type_id; + dest->map_extra = cattr->map_extra; + break; + case BPF_MAP_LOOKUP_ELEM: + case BPF_MAP_UPDATE_ELEM: + case BPF_MAP_DELETE_ELEM: + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: + dest->map_fd = cattr->map_fd; + dest->key = cattr->key; + dest->value = cattr->value; + /* u64 next_key is in a union with u64 value */ + dest->flags = cattr->flags; + break; + case BPF_MAP_LOOKUP_BATCH: + case BPF_MAP_LOOKUP_AND_DELETE_BATCH: + case BPF_MAP_UPDATE_BATCH: + case BPF_MAP_DELETE_BATCH: + dest->batch.in_batch = cattr->batch.in_batch; + dest->batch.out_batch = cattr->batch.out_batch; + dest->batch.keys = cattr->batch.keys; + dest->batch.values = cattr->batch.values; + dest->batch.count = cattr->batch.count; + dest->batch.map_fd = cattr->batch.map_fd; + dest->batch.elem_flags = cattr->batch.elem_flags; + dest->batch.flags = cattr->batch.flags; + break; + case BPF_PROG_LOAD: + dest->prog_type = cattr->prog_type; + dest->insn_cnt = cattr->insn_cnt; + dest->insns = cattr->insns; + dest->license = cattr->license; + dest->log_level = cattr->log_level; + dest->log_size = cattr->log_size; + dest->log_buf = cattr->log_buf; + dest->kern_version = cattr->kern_version; + dest->prog_flags = cattr->prog_flags; + strncpy(dest->prog_name, cattr->prog_name, BPF_OBJ_NAME_LEN); + dest->prog_ifindex = cattr->prog_ifindex; + dest->expected_attach_type = cattr->expected_attach_type; + dest->prog_btf_fd = cattr->prog_btf_fd; + dest->func_info_rec_size = cattr->func_info_rec_size; + dest->func_info = cattr->func_info; + dest->func_info_cnt = cattr->func_info_cnt; + dest->line_info_rec_size = cattr->line_info_rec_size; + dest->line_info = cattr->line_info; + dest->line_info_cnt = cattr->line_info_cnt; + dest->attach_btf_id = cattr->attach_btf_id; + dest->attach_prog_fd = cattr->attach_prog_fd; + /* u32 attach_btf_obj_fd is in a union with u32 attach_prog_fd */ + dest->core_relo_cnt = cattr->core_relo_cnt; + dest->fd_array = cattr->fd_array; + dest->core_relos = cattr->core_relos; + dest->core_relo_rec_size = cattr->core_relo_rec_size; + break; + case BPF_OBJ_PIN: + case BPF_OBJ_GET: + dest->pathname = cattr->pathname; + dest->bpf_fd = cattr->bpf_fd; + dest->file_flags = cattr->file_flags; + break; + case BPF_PROG_ATTACH: + case BPF_PROG_DETACH: + dest->target_fd = cattr->target_fd; + dest->attach_bpf_fd = cattr->attach_bpf_fd; + dest->attach_type = cattr->attach_type; + dest->attach_flags = cattr->attach_flags; + dest->replace_bpf_fd = cattr->replace_bpf_fd; + break; + case BPF_PROG_RUN: /* same as BPF_PROG_TEST_RUN */ + dest->test.prog_fd = cattr->test.prog_fd; + dest->test.retval = cattr->test.retval; + dest->test.data_size_in = cattr->test.data_size_in; + dest->test.data_size_out = cattr->test.data_size_out; + dest->test.data_in = cattr->test.data_in; + dest->test.data_out = cattr->test.data_out; + dest->test.repeat = cattr->test.repeat; + dest->test.duration = cattr->test.duration; + dest->test.ctx_size_in = cattr->test.ctx_size_in; + dest->test.ctx_size_out = cattr->test.ctx_size_out; + dest->test.ctx_in = cattr->test.ctx_in; + dest->test.ctx_out = cattr->test.ctx_out; + dest->test.flags = cattr->test.flags; + dest->test.cpu = cattr->test.cpu; + dest->test.batch_size = cattr->test.batch_size; + break; + case BPF_PROG_GET_NEXT_ID: + case BPF_MAP_GET_NEXT_ID: + case BPF_PROG_GET_FD_BY_ID: + case BPF_MAP_GET_FD_BY_ID: + case BPF_BTF_GET_FD_BY_ID: + case BPF_BTF_GET_NEXT_ID: + case BPF_LINK_GET_FD_BY_ID: + case BPF_LINK_GET_NEXT_ID: + /* u32 prog_id, map_id, btf_id + link_id are in a union with + * u32 start_id */ + dest->start_id = cattr->start_id; + dest->next_id = cattr->next_id; + dest->open_flags = cattr->open_flags; + break; + case BPF_OBJ_GET_INFO_BY_FD: + dest->info.bpf_fd = cattr->info.bpf_fd; + dest->info.info_len = cattr->info.info_len; + dest->info.info = cattr->info.info; + break; + case BPF_PROG_QUERY: + dest->query.target_fd = cattr->query.target_fd; + dest->query.attach_type = cattr->query.attach_type; + dest->query.query_flags = cattr->query.query_flags; + dest->query.attach_flags = cattr->query.attach_flags; + dest->query.prog_ids = cattr->query.prog_ids; + dest->query.prog_cnt = cattr->query.prog_cnt; + dest->query.prog_attach_flags = cattr->query.prog_attach_flags; + break; + case BPF_RAW_TRACEPOINT_OPEN: + dest->raw_tracepoint.name = cattr->raw_tracepoint.name; + dest->raw_tracepoint.prog_fd = cattr->raw_tracepoint.prog_fd; + break; + case BPF_BTF_LOAD: + dest->btf = cattr->btf; + dest->btf_log_buf = cattr->btf_log_buf; + dest->btf_size = cattr->btf_size; + dest->btf_log_size = cattr->btf_log_size; + dest->btf_log_level = cattr->btf_log_level; + break; + case BPF_TASK_FD_QUERY: + dest->task_fd_query.pid = cattr->task_fd_query.pid; + dest->task_fd_query.fd = cattr->task_fd_query.fd; + dest->task_fd_query.flags = cattr->task_fd_query.flags; + dest->task_fd_query.buf_len = cattr->task_fd_query.buf_len; + dest->task_fd_query.buf = cattr->task_fd_query.buf; + dest->task_fd_query.prog_id = cattr->task_fd_query.prog_id; + dest->task_fd_query.fd_type = cattr->task_fd_query.fd_type; + dest->task_fd_query.probe_offset = cattr->task_fd_query.probe_offset; + dest->task_fd_query.probe_addr = cattr->task_fd_query.probe_addr; + break; + case BPF_LINK_CREATE: + dest->link_create.prog_fd = cattr->link_create.prog_fd; + dest->link_create.target_fd = cattr->link_create.target_fd; + /* u32 target_ifindex is in a union with u32 target_fd */ + dest->link_create.attach_type = cattr->link_create.attach_type; + dest->link_create.flags = cattr->link_create.flags; + + prog = bpf_prog_get(cattr->link_create.prog_fd); + + if (prog->type == BPF_PROG_TYPE_CGROUP_SKB || + prog->type == BPF_PROG_TYPE_CGROUP_SOCK || + prog->type == BPF_PROG_TYPE_CGROUP_SOCK_ADDR || + prog->type == BPF_PROG_TYPE_SOCK_OPS || + prog->type == BPF_PROG_TYPE_CGROUP_DEVICE || + prog->type == BPF_PROG_TYPE_CGROUP_SYSCTL || + prog->type == BPF_PROG_TYPE_CGROUP_SOCKOPT) + break; + + if (prog->type == BPF_PROG_TYPE_EXT) { + dest->link_create.tracing.target_btf_id = + cattr->link_create.tracing.target_btf_id; + dest->link_create.tracing.cookie = + cattr->link_create.tracing.cookie; + break; + } + + if (prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_TRACING) { + if (prog->expected_attach_type == BPF_TRACE_ITER) { + /* iter_info is a user pointer to union + * bpf_iter_link_info however since this union + * contains no pointers, the size/offsets are + * the same for compat64/purecap; hence no + * conversion needed */ + dest->link_create.iter_info = + cattr->link_create.iter_info; + dest->link_create.iter_info_len = + cattr->link_create.iter_info_len; + break; + } else if (prog->expected_attach_type == BPF_TRACE_RAW_TP + || prog->expected_attach_type == BPF_LSM_CGROUP) { + /* only uses common fields above */ + break; + } else { + dest->link_create.target_btf_id = + cattr->link_create.target_btf_id; + dest->link_create.tracing.cookie = + cattr->link_create.tracing.cookie; + break; + } + } + + if (prog->type == BPF_PROG_TYPE_FLOW_DISSECTOR || + prog->type == BPF_PROG_TYPE_SK_LOOKUP || + prog->type == BPF_PROG_TYPE_XDP) + break; + + /* bpf_cookie is used in bpf_perf_link_attach() */ + if (prog->type == BPF_PROG_TYPE_PERF_EVENT || + prog->type == BPF_PROG_TYPE_TRACEPOINT || + (prog->type == BPF_PROG_TYPE_KPROBE && + cattr->link_create.attach_type == BPF_PERF_EVENT)) { + dest->link_create.perf_event.bpf_cookie = + cattr->link_create.perf_event.bpf_cookie; + break; + } + + /* kprobe_multi is used in bpf_kprobe_multi_link_attach() */ + if (prog->type == BPF_PROG_TYPE_KPROBE && + cattr->link_create.attach_type != BPF_PERF_EVENT) { + dest->link_create.kprobe_multi.flags = + cattr->link_create.kprobe_multi.flags; + dest->link_create.kprobe_multi.cnt = + cattr->link_create.kprobe_multi.cnt; + dest->link_create.kprobe_multi.syms = + cattr->link_create.kprobe_multi.syms; + dest->link_create.kprobe_multi.addrs = + cattr->link_create.kprobe_multi.addrs; + dest->link_create.kprobe_multi.cookies = + cattr->link_create.kprobe_multi.cookies; + break; + } + break; + case BPF_LINK_UPDATE: + dest->link_update.link_fd = cattr->link_update.link_fd; + dest->link_update.new_prog_fd = cattr->link_update.new_prog_fd; + dest->link_update.flags = cattr->link_update.flags; + dest->link_update.old_prog_fd = cattr->link_update.old_prog_fd; + break; + case BPF_LINK_DETACH: + dest->link_detach.link_fd = cattr->link_detach.link_fd; + break; + case BPF_ENABLE_STATS: + dest->enable_stats.type = cattr->enable_stats.type; + break; + case BPF_ITER_CREATE: + dest->iter_create.link_fd = cattr->iter_create.link_fd; + dest->iter_create.flags = cattr->iter_create.flags; + break; + case BPF_PROG_BIND_MAP: + dest->prog_bind_map.prog_fd = cattr->prog_bind_map.prog_fd; + dest->prog_bind_map.map_fd = cattr->prog_bind_map.map_fd; + dest->prog_bind_map.flags = cattr->prog_bind_map.flags; + break; + }; +} +#endif /* CONFIG_COMPAT64 */ + +static int bpf_check_perms(int cmd) +{ + bool capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; + + /* Intent here is for unprivileged_bpf_disabled to block key object + * creation commands for unprivileged users; other actions depend + * of fd availability and access to bpffs, so are dependent on + * object creation success. Capabilities are later verified for + * operations such as load and map create, so even with unprivileged + * BPF disabled, capability checks are still carried out for these + * and other operations. + */ + if (!capable && + (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) + return -EPERM; + + return 0; +} + +static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) +{ + union bpf_attr attr; + int err; + + err = bpf_check_perms(cmd); + if (err) + return err; + + err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); + if (err) + return err; + size = min_t(u32, size, sizeof(attr)); + + /* copy attributes from user space, may be less than sizeof(bpf_attr) */ + memset(&attr, 0, sizeof(attr)); + if (copy_from_bpfptr_with_ptr(&attr, uattr, size) != 0) + return -EFAULT; + + return dispatch_bpf(cmd, &attr, uattr, size); +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { return __sys_bpf(cmd, USER_BPFPTR(uattr), size); }
+#ifdef CONFIG_COMPAT64 +static int __sys_compat_bpf(int cmd, bpfptr_t uattr, unsigned int size) +{ + union bpf_attr attr; + union compat_bpf_attr cattr; + int err; + + err = bpf_check_perms(cmd); + if (err) + return err; + + err = bpf_check_uarg_tail_zero(uattr, sizeof(cattr), size); + if (err) + return err; + size = min_t(u32, size, sizeof(cattr)); + + /* copy attributes from user space, may be less than sizeof(bpf_attr) */ + memset(&cattr, 0, sizeof(cattr)); + if (copy_from_bpfptr_with_ptr(&cattr, uattr, size) != 0) + return -EFAULT; + + convert_compat_bpf_attr(&attr, &cattr, cmd); + + return dispatch_bpf(cmd, &attr, uattr, sizeof(attr)); +} + +COMPAT_SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) +{ + return __sys_compat_bpf(cmd, USER_BPFPTR(uattr), size); +} +#endif /* CONFIG_COMPAT64 */ + static bool syscall_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog,