The bpf syscall does not require a compat handler for 32-bit compat. This is achieved by using u64 instead of pointer types in the bpf_attr union used to pass arguments to the syscall. This means that in a system where pointers are 32-bit, the struct/union layouts and offsets are the same as in a 64-bit arch, since the u64 field is split into two u32 fields/registers.
This greatly simplifies 32-bit compat at the small cost of requiring casting pointers passed in through the uAPI to u64 (generally via ptr_to_u64() helper functions).
This poses a problem in architectures where user pointers are longer than 64b such as Morello/PCuABI where pointers are represented as 129b capabilities. In order to extend the bpf syscall interface to accept capabilities and still retain compatibility with the existing 64/32b ABI, 64-bit compat handling and appropriate conversions must be added to handle the different union/struct sizes caused by this pointer size mis-match.
Before extending the number of bits in union bpf_attr to accept capabilities, set the groundwork for handling compat64. When in_compat64_syscall(), take the compat64 sized bpf_attr and convert it to what will be the new native offsets.
Inbound conversion is handled upfront to minimise impact on existing code and reduce overall diff size. After dispatch_bpf the majority of code can remain unchanged. The cases where conversion back out to userspace is required are handled in subsequent commits.
Signed-off-by: Zachary Leaf zachary.leaf@arm.com --- include/linux/bpf_compat.h | 300 ++++++++++++++++++++++++++ kernel/bpf/syscall.c | 427 +++++++++++++++++++++++++++++++------ 2 files changed, 663 insertions(+), 64 deletions(-) create mode 100644 include/linux/bpf_compat.h
diff --git a/include/linux/bpf_compat.h b/include/linux/bpf_compat.h new file mode 100644 index 000000000000..85e0198bede7 --- /dev/null +++ b/include/linux/bpf_compat.h @@ -0,0 +1,300 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2023 Arm Ltd */ + +union compat_bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ + __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ + char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ + __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- + * struct stored as the + * map value + */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* struct used by BPF_MAP_*_BATCH commands */ + __aligned_u64 in_batch; /* start batch, + * NULL to start from beginning + */ + __aligned_u64 out_batch; /* output: next start batch */ + __aligned_u64 keys; + __aligned_u64 values; + __u32 count; /* input/output: + * input: # of key/value + * elements + * output: # of filled elements + */ + __u32 map_fd; + __u64 elem_flags; + __u64 flags; + } batch; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* not used */ + __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_ifindex; /* ifindex of netdev to prep for */ + /* For some prog types expected attach type must be known at + * load time to verify attach type specific parts of prog + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ + __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; + __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; + __u32 file_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ + }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; + __u32 batch_size; + } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + __u32 btf_id; + __u32 link_id; + }; + __u32 next_id; + __u32 open_flags; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; + } query; + + struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ + __u64 name; + __u32 prog_fd; + } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; + }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; + + struct { /* struct used by BPF_LINK_CREATE command */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; + union { + __u32 target_fd; /* object to attach to */ + __u32 target_ifindex; /* target ifindex */ + }; + __u32 attach_type; /* attach type */ + __u32 flags; /* extra flags */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + __aligned_u64 cookies; + } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; + }; + } link_create; + + struct { /* struct used by BPF_LINK_UPDATE command */ + __u32 link_fd; /* link fd */ + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; + __u32 flags; /* extra flags */ + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; + } link_update; + + struct { + __u32 link_fd; + } link_detach; + + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + + struct { /* struct used by BPF_ITER_CREATE command */ + __u32 link_fd; + __u32 flags; + } iter_create; + + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + +} __attribute__((aligned(8))); + diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f1c8733f76b8..867c51d45a83 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3,6 +3,7 @@ */ #include <linux/bpf.h> #include <linux/bpf-cgroup.h> +#include <linux/bpf_compat.h> #include <linux/bpf_trace.h> #include <linux/bpf_lirc.h> #include <linux/bpf_verifier.h> @@ -11,6 +12,7 @@ #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/sched/signal.h> +#include <linux/stddef.h> #include <linux/vmalloc.h> #include <linux/mmzone.h> #include <linux/anon_inodes.h> @@ -5015,153 +5017,128 @@ static int bpf_prog_bind_map(union bpf_attr *attr) return ret; }
-static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) +static int dispatch_bpf(int cmd, union bpf_attr *attr, bpfptr_t uattr, + unsigned int size) { - union bpf_attr attr; - bool capable; int err;
- capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; - - /* Intent here is for unprivileged_bpf_disabled to block key object - * creation commands for unprivileged users; other actions depend - * of fd availability and access to bpffs, so are dependent on - * object creation success. Capabilities are later verified for - * operations such as load and map create, so even with unprivileged - * BPF disabled, capability checks are still carried out for these - * and other operations. - */ - if (!capable && - (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) - return -EPERM; - - err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); - if (err) - return err; - size = min_t(u32, size, sizeof(attr)); - - /* copy attributes from user space, may be less than sizeof(bpf_attr) */ - memset(&attr, 0, sizeof(attr)); - if (copy_from_bpfptr(&attr, uattr, size) != 0) - return -EFAULT; - - err = security_bpf(cmd, &attr, size); + err = security_bpf(cmd, attr, size); if (err < 0) return err;
switch (cmd) { case BPF_MAP_CREATE: - err = map_create(&attr); + err = map_create(attr); break; case BPF_MAP_LOOKUP_ELEM: - err = map_lookup_elem(&attr); + err = map_lookup_elem(attr); break; case BPF_MAP_UPDATE_ELEM: - err = map_update_elem(&attr, uattr); + err = map_update_elem(attr, uattr); break; case BPF_MAP_DELETE_ELEM: - err = map_delete_elem(&attr, uattr); + err = map_delete_elem(attr, uattr); break; case BPF_MAP_GET_NEXT_KEY: - err = map_get_next_key(&attr); + err = map_get_next_key(attr); break; case BPF_MAP_FREEZE: - err = map_freeze(&attr); + err = map_freeze(attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr, uattr, size); + err = bpf_prog_load(attr, uattr, size); break; case BPF_OBJ_PIN: - err = bpf_obj_pin(&attr); + err = bpf_obj_pin(attr); break; case BPF_OBJ_GET: - err = bpf_obj_get(&attr); + err = bpf_obj_get(attr); break; case BPF_PROG_ATTACH: - err = bpf_prog_attach(&attr); + err = bpf_prog_attach(attr); break; case BPF_PROG_DETACH: - err = bpf_prog_detach(&attr); + err = bpf_prog_detach(attr); break; case BPF_PROG_QUERY: - err = bpf_prog_query(&attr, uattr.user); + err = bpf_prog_query(attr, uattr.user); break; case BPF_PROG_TEST_RUN: - err = bpf_prog_test_run(&attr, uattr.user); + err = bpf_prog_test_run(attr, uattr.user); break; case BPF_PROG_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &prog_idr, &prog_idr_lock); break; case BPF_MAP_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &map_idr, &map_idr_lock); break; case BPF_BTF_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &btf_idr, &btf_idr_lock); break; case BPF_PROG_GET_FD_BY_ID: - err = bpf_prog_get_fd_by_id(&attr); + err = bpf_prog_get_fd_by_id(attr); break; case BPF_MAP_GET_FD_BY_ID: - err = bpf_map_get_fd_by_id(&attr); + err = bpf_map_get_fd_by_id(attr); break; case BPF_OBJ_GET_INFO_BY_FD: - err = bpf_obj_get_info_by_fd(&attr, uattr.user); + err = bpf_obj_get_info_by_fd(attr, uattr.user); break; case BPF_RAW_TRACEPOINT_OPEN: - err = bpf_raw_tracepoint_open(&attr); + err = bpf_raw_tracepoint_open(attr); break; case BPF_BTF_LOAD: - err = bpf_btf_load(&attr, uattr, size); + err = bpf_btf_load(attr, uattr, size); break; case BPF_BTF_GET_FD_BY_ID: - err = bpf_btf_get_fd_by_id(&attr); + err = bpf_btf_get_fd_by_id(attr); break; case BPF_TASK_FD_QUERY: - err = bpf_task_fd_query(&attr, uattr.user); + err = bpf_task_fd_query(attr, uattr.user); break; case BPF_MAP_LOOKUP_AND_DELETE_ELEM: - err = map_lookup_and_delete_elem(&attr); + err = map_lookup_and_delete_elem(attr); break; case BPF_MAP_LOOKUP_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_LOOKUP_BATCH); break; case BPF_MAP_LOOKUP_AND_DELETE_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_LOOKUP_AND_DELETE_BATCH); break; case BPF_MAP_UPDATE_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_UPDATE_BATCH); break; case BPF_MAP_DELETE_BATCH: - err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); + err = bpf_map_do_batch(attr, uattr.user, BPF_MAP_DELETE_BATCH); break; case BPF_LINK_CREATE: - err = link_create(&attr, uattr); + err = link_create(attr, uattr); break; case BPF_LINK_UPDATE: - err = link_update(&attr); + err = link_update(attr); break; case BPF_LINK_GET_FD_BY_ID: - err = bpf_link_get_fd_by_id(&attr); + err = bpf_link_get_fd_by_id(attr); break; case BPF_LINK_GET_NEXT_ID: - err = bpf_obj_get_next_id(&attr, uattr.user, + err = bpf_obj_get_next_id(attr, uattr.user, &link_idr, &link_idr_lock); break; case BPF_ENABLE_STATS: - err = bpf_enable_stats(&attr); + err = bpf_enable_stats(attr); break; case BPF_ITER_CREATE: - err = bpf_iter_create(&attr); + err = bpf_iter_create(attr); break; case BPF_LINK_DETACH: - err = link_detach(&attr); + err = link_detach(attr); break; case BPF_PROG_BIND_MAP: - err = bpf_prog_bind_map(&attr); + err = bpf_prog_bind_map(attr); break; default: err = -EINVAL; @@ -5171,6 +5148,328 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) return err; }
+static void convert_compat_bpf_attr(union bpf_attr *dest, + const union compat_bpf_attr *cattr, int cmd) +{ + struct bpf_prog *prog; + + memset(dest, 0, sizeof(union bpf_attr)); + + switch (cmd) { + case BPF_MAP_CREATE: + copy_field(dest, cattr, map_type); + copy_field(dest, cattr, key_size); + copy_field(dest, cattr, value_size); + copy_field(dest, cattr, max_entries); + copy_field(dest, cattr, map_flags); + copy_field(dest, cattr, inner_map_fd); + copy_field(dest, cattr, numa_node); + strncpy(dest->map_name, cattr->map_name, BPF_OBJ_NAME_LEN); + copy_field(dest, cattr, map_ifindex); + copy_field(dest, cattr, btf_fd); + copy_field(dest, cattr, btf_key_type_id); + copy_field(dest, cattr, btf_value_type_id); + copy_field(dest, cattr, btf_vmlinux_value_type_id); + copy_field(dest, cattr, map_extra); + break; + case BPF_MAP_LOOKUP_ELEM: + case BPF_MAP_UPDATE_ELEM: + case BPF_MAP_DELETE_ELEM: + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: + copy_field(dest, cattr, map_fd); + copy_field(dest, cattr, key); + copy_field(dest, cattr, value); + /* u64 next_key is in a union with u64 value */ + copy_field(dest, cattr, flags); + break; + case BPF_MAP_LOOKUP_BATCH: + case BPF_MAP_LOOKUP_AND_DELETE_BATCH: + case BPF_MAP_UPDATE_BATCH: + case BPF_MAP_DELETE_BATCH: + copy_field(dest, cattr, batch.in_batch); + copy_field(dest, cattr, batch.out_batch); + copy_field(dest, cattr, batch.keys); + copy_field(dest, cattr, batch.values); + copy_field(dest, cattr, batch.count); + copy_field(dest, cattr, batch.map_fd); + copy_field(dest, cattr, batch.elem_flags); + copy_field(dest, cattr, batch.flags); + break; + case BPF_PROG_LOAD: + copy_field(dest, cattr, prog_type); + copy_field(dest, cattr, insn_cnt); + copy_field(dest, cattr, insns); + copy_field(dest, cattr, license); + copy_field(dest, cattr, log_level); + copy_field(dest, cattr, log_size); + copy_field(dest, cattr, log_buf); + copy_field(dest, cattr, kern_version); + copy_field(dest, cattr, prog_flags); + strncpy(dest->prog_name, cattr->prog_name, BPF_OBJ_NAME_LEN); + copy_field(dest, cattr, prog_ifindex); + copy_field(dest, cattr, expected_attach_type); + copy_field(dest, cattr, prog_btf_fd); + copy_field(dest, cattr, func_info_rec_size); + copy_field(dest, cattr, func_info); + copy_field(dest, cattr, func_info_cnt); + copy_field(dest, cattr, line_info_rec_size); + copy_field(dest, cattr, line_info); + copy_field(dest, cattr, line_info_cnt); + copy_field(dest, cattr, attach_btf_id); + copy_field(dest, cattr, attach_prog_fd); + /* u32 attach_btf_obj_fd is in a union with u32 attach_prog_fd */ + copy_field(dest, cattr, core_relo_cnt); + copy_field(dest, cattr, fd_array); + copy_field(dest, cattr, core_relos); + copy_field(dest, cattr, core_relo_rec_size); + copy_field(dest, cattr, log_true_size); + break; + case BPF_OBJ_PIN: + case BPF_OBJ_GET: + copy_field(dest, cattr, pathname); + copy_field(dest, cattr, bpf_fd); + copy_field(dest, cattr, file_flags); + break; + case BPF_PROG_ATTACH: + case BPF_PROG_DETACH: + copy_field(dest, cattr, target_fd); + copy_field(dest, cattr, attach_bpf_fd); + copy_field(dest, cattr, attach_type); + copy_field(dest, cattr, attach_flags); + copy_field(dest, cattr, replace_bpf_fd); + break; + case BPF_PROG_RUN: /* same as BPF_PROG_TEST_RUN */ + copy_field(dest, cattr, test.prog_fd); + copy_field(dest, cattr, test.retval); + copy_field(dest, cattr, test.data_size_in); + copy_field(dest, cattr, test.data_size_out); + copy_field(dest, cattr, test.data_in); + copy_field(dest, cattr, test.data_out); + copy_field(dest, cattr, test.repeat); + copy_field(dest, cattr, test.duration); + copy_field(dest, cattr, test.ctx_size_in); + copy_field(dest, cattr, test.ctx_size_out); + copy_field(dest, cattr, test.ctx_in); + copy_field(dest, cattr, test.ctx_out); + copy_field(dest, cattr, test.flags); + copy_field(dest, cattr, test.cpu); + copy_field(dest, cattr, test.batch_size); + break; + case BPF_PROG_GET_NEXT_ID: + case BPF_MAP_GET_NEXT_ID: + case BPF_PROG_GET_FD_BY_ID: + case BPF_MAP_GET_FD_BY_ID: + case BPF_BTF_GET_FD_BY_ID: + case BPF_BTF_GET_NEXT_ID: + case BPF_LINK_GET_FD_BY_ID: + case BPF_LINK_GET_NEXT_ID: + /* u32 prog_id, map_id, btf_id + link_id are in a union with + * u32 start_id */ + copy_field(dest, cattr, start_id); + copy_field(dest, cattr, next_id); + copy_field(dest, cattr, open_flags); + break; + case BPF_OBJ_GET_INFO_BY_FD: + copy_field(dest, cattr, info.bpf_fd); + copy_field(dest, cattr, info.info_len); + copy_field(dest, cattr, info.info); + break; + case BPF_PROG_QUERY: + copy_field(dest, cattr, query.target_fd); + copy_field(dest, cattr, query.attach_type); + copy_field(dest, cattr, query.query_flags); + copy_field(dest, cattr, query.attach_flags); + copy_field(dest, cattr, query.prog_ids); + copy_field(dest, cattr, query.prog_cnt); + copy_field(dest, cattr, query.prog_attach_flags); + break; + case BPF_RAW_TRACEPOINT_OPEN: + copy_field(dest, cattr, raw_tracepoint.name); + copy_field(dest, cattr, raw_tracepoint.prog_fd); + break; + case BPF_BTF_LOAD: + copy_field(dest, cattr, btf); + copy_field(dest, cattr, btf_log_buf); + copy_field(dest, cattr, btf_size); + copy_field(dest, cattr, btf_log_size); + copy_field(dest, cattr, btf_log_level); + copy_field(dest, cattr, btf_log_true_size); + break; + case BPF_TASK_FD_QUERY: + copy_field(dest, cattr, task_fd_query.pid); + copy_field(dest, cattr, task_fd_query.fd); + copy_field(dest, cattr, task_fd_query.flags); + copy_field(dest, cattr, task_fd_query.buf_len); + copy_field(dest, cattr, task_fd_query.buf); + copy_field(dest, cattr, task_fd_query.prog_id); + copy_field(dest, cattr, task_fd_query.fd_type); + copy_field(dest, cattr, task_fd_query.probe_offset); + copy_field(dest, cattr, task_fd_query.probe_addr); + break; + case BPF_LINK_CREATE: + copy_field(dest, cattr, link_create.prog_fd); + copy_field(dest, cattr, link_create.target_fd); + /* u32 target_ifindex is in a union with u32 target_fd */ + copy_field(dest, cattr, link_create.attach_type); + copy_field(dest, cattr, link_create.flags); + + prog = bpf_prog_get(cattr->link_create.prog_fd); + + if (prog->type == BPF_PROG_TYPE_CGROUP_SKB || + prog->type == BPF_PROG_TYPE_CGROUP_SOCK || + prog->type == BPF_PROG_TYPE_CGROUP_SOCK_ADDR || + prog->type == BPF_PROG_TYPE_SOCK_OPS || + prog->type == BPF_PROG_TYPE_CGROUP_DEVICE || + prog->type == BPF_PROG_TYPE_CGROUP_SYSCTL || + prog->type == BPF_PROG_TYPE_CGROUP_SOCKOPT) + break; + + if (prog->type == BPF_PROG_TYPE_EXT) { + copy_field(dest, cattr, link_create.tracing.target_btf_id); + copy_field(dest, cattr, link_create.tracing.cookie); + break; + } + + if (prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_TRACING) { + if (prog->expected_attach_type == BPF_TRACE_ITER) { + /* iter_info is a user pointer to union + * bpf_iter_link_info however since this union + * contains no pointers, the size/offsets are + * the same for compat64/purecap; hence no + * conversion needed + */ + copy_field(dest, cattr, link_create.iter_info); + copy_field(dest, cattr, link_create.iter_info_len); + break; + } else if (prog->expected_attach_type == BPF_TRACE_RAW_TP + || prog->expected_attach_type == BPF_LSM_CGROUP) { + /* only uses common fields above */ + break; + } else { + copy_field(dest, cattr, link_create.target_btf_id); + copy_field(dest, cattr, link_create.tracing.cookie); + break; + } + } + + if (prog->type == BPF_PROG_TYPE_FLOW_DISSECTOR || + prog->type == BPF_PROG_TYPE_SK_LOOKUP || + prog->type == BPF_PROG_TYPE_XDP) + break; + + /* bpf_cookie is used in bpf_perf_link_attach() */ + if (prog->type == BPF_PROG_TYPE_PERF_EVENT || + prog->type == BPF_PROG_TYPE_TRACEPOINT || + (prog->type == BPF_PROG_TYPE_KPROBE && + cattr->link_create.attach_type == BPF_PERF_EVENT)) { + copy_field(dest, cattr, link_create.perf_event.bpf_cookie); + break; + } + + /* kprobe_multi is used in bpf_kprobe_multi_link_attach() */ + if (prog->type == BPF_PROG_TYPE_KPROBE && + cattr->link_create.attach_type != BPF_PERF_EVENT) { + copy_field(dest, cattr, link_create.kprobe_multi.flags); + copy_field(dest, cattr, link_create.kprobe_multi.cnt); + copy_field(dest, cattr, link_create.kprobe_multi.syms); + copy_field(dest, cattr, link_create.kprobe_multi.addrs); + copy_field(dest, cattr, link_create.kprobe_multi.cookies); + break; + } + + if (prog->type == BPF_PROG_TYPE_NETFILTER) { + copy_field(dest, cattr, link_create.netfilter.pf); + copy_field(dest, cattr, link_create.netfilter.hooknum); + copy_field(dest, cattr, link_create.netfilter.priority); + copy_field(dest, cattr, link_create.netfilter.flags); + break; + } + break; + case BPF_LINK_UPDATE: + copy_field(dest, cattr, link_update.link_fd); + copy_field(dest, cattr, link_update.new_prog_fd); + copy_field(dest, cattr, link_update.flags); + copy_field(dest, cattr, link_update.old_prog_fd); + break; + case BPF_LINK_DETACH: + copy_field(dest, cattr, link_detach.link_fd); + break; + case BPF_ENABLE_STATS: + copy_field(dest, cattr, enable_stats.type); + break; + case BPF_ITER_CREATE: + copy_field(dest, cattr, iter_create.link_fd); + copy_field(dest, cattr, iter_create.flags); + break; + case BPF_PROG_BIND_MAP: + copy_field(dest, cattr, prog_bind_map.prog_fd); + copy_field(dest, cattr, prog_bind_map.map_fd); + copy_field(dest, cattr, prog_bind_map.flags); + break; + }; +} + +static int bpf_check_perms(int cmd) +{ + bool capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; + + /* Intent here is for unprivileged_bpf_disabled to block key object + * creation commands for unprivileged users; other actions depend + * of fd availability and access to bpffs, so are dependent on + * object creation success. Capabilities are later verified for + * operations such as load and map create, so even with unprivileged + * BPF disabled, capability checks are still carried out for these + * and other operations. + */ + if (!capable && + (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) + return -EPERM; + + return 0; +} + +static int copy_bpf_attr_from_user(union bpf_attr *attr, int cmd, + bpfptr_t uattr, unsigned int *size) +{ + union compat_bpf_attr cattr; + void *select_attr = in_compat64_syscall() ? &cattr : attr; + size_t attr_size = in_compat64_syscall() ? sizeof(union compat_bpf_attr) + : sizeof(union bpf_attr); + int err; + + err = bpf_check_uarg_tail_zero(uattr, attr_size, *size); + if (err) + return err; + *size = min_t(u32, *size, attr_size); + + /* copy attributes from user space, may be less than sizeof(bpf_attr) */ + memset(select_attr, 0, attr_size); + if (copy_from_bpfptr(select_attr, uattr, *size) != 0) + return -EFAULT; + + if (in_compat64_syscall()) + convert_compat_bpf_attr(attr, &cattr, cmd); + + return 0; +} + +static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) +{ + union bpf_attr attr; + int err; + + err = bpf_check_perms(cmd); + if (err) + return err; + + err = copy_bpf_attr_from_user(&attr, cmd, uattr, &size); + if (err) + return err; + + return dispatch_bpf(cmd, &attr, uattr, size); +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { return __sys_bpf(cmd, USER_BPFPTR(uattr), size);