From 88d1d4a7eebea2836859246d91fe9d141789dfc3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Mar 2024 19:12:23 -0800 Subject: [PATCH 1/6] bpf: Allow kfuncs return 'void *' Recognize return of 'void *' from kfunc as returning unknown scalar. Acked-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240307031228.42896-2-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ee86e4d7d5fc..9c2a36b1373b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12364,6 +12364,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, meta.func_name); return -EFAULT; } + } else if (btf_type_is_void(ptr_type)) { + /* kfunc returning 'void *' is equivalent to returning scalar */ + mark_reg_unknown(env, regs, BPF_REG_0); } else if (!__btf_type_is_struct(ptr_type)) { if (!meta.r0_size) { __u32 sz; From 8d94f1357c00d7706c1f3d0bb568e054cef6aea1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Mar 2024 19:12:24 -0800 Subject: [PATCH 2/6] bpf: Recognize '__map' suffix in kfunc arguments Recognize 'void *p__map' kfunc argument as 'struct bpf_map *p__map'. It allows kfunc to have 'void *' argument for maps, since bpf progs will call them as: struct { __uint(type, BPF_MAP_TYPE_ARENA); ... } arena SEC(".maps"); bpf_kfunc_with_map(... &arena ...); Underneath libbpf will load CONST_PTR_TO_MAP into the register via ld_imm64 insn. If kfunc was defined with 'struct bpf_map *' it would pass the verifier as well, but bpf prog would need to type cast the argument (void *)&arena, which is not clean. Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240307031228.42896-3-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9c2a36b1373b..bf084c693507 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10752,6 +10752,11 @@ static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *a return btf_param_match_suffix(btf, arg, "__ign"); } +static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__map"); +} + static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg) { return btf_param_match_suffix(btf, arg, "__alloc"); @@ -10921,6 +10926,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_RB_NODE, KF_ARG_PTR_TO_NULL, KF_ARG_PTR_TO_CONST_STR, + KF_ARG_PTR_TO_MAP, }; enum special_kfunc_type { @@ -11074,6 +11080,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_const_str(meta->btf, &args[argno])) return KF_ARG_PTR_TO_CONST_STR; + if (is_kfunc_arg_map(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_MAP; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11674,6 +11683,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ switch (kf_arg_type) { case KF_ARG_PTR_TO_NULL: continue; + case KF_ARG_PTR_TO_MAP: case KF_ARG_PTR_TO_ALLOC_BTF_ID: case KF_ARG_PTR_TO_BTF_ID: if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta)) @@ -11890,6 +11900,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_MAP: + /* If argument has '__map' suffix expect 'struct bpf_map *' */ + ref_id = *reg2btf_ids[CONST_PTR_TO_MAP]; + ref_t = btf_type_by_id(btf_vmlinux, ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + fallthrough; case KF_ARG_PTR_TO_BTF_ID: /* Only base_type is checked, further checks are done here */ if ((base_type(reg->type) != PTR_TO_BTF_ID || From cf2c2e4a3d910270903d50462aaa75140cdb2c96 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Mar 2024 19:12:25 -0800 Subject: [PATCH 3/6] bpf: Plumb get_unmapped_area() callback into bpf_map_ops Subsequent patches introduce bpf_arena that imposes special alignment requirements on address selection. Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240307031228.42896-4-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 3 +++ kernel/bpf/syscall.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 785660810e6a..95e07673cdc1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -139,6 +139,9 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b2750b79ac80..f63f4da4db5e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -937,6 +937,21 @@ static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) return EPOLLERR; } +static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct bpf_map *map = filp->private_data; + + if (map->ops->map_get_unmapped_area) + return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags); +#ifdef CONFIG_MMU + return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); +#else + return addr; +#endif +} + const struct file_operations bpf_map_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_map_show_fdinfo, @@ -946,6 +961,7 @@ const struct file_operations bpf_map_fops = { .write = bpf_dummy_write, .mmap = bpf_map_mmap, .poll = bpf_map_poll, + .get_unmapped_area = bpf_get_unmapped_area, }; int bpf_map_new_fd(struct bpf_map *map, int flags) From d147357e2e5977c5fe9218457a1e359fd1d36609 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Mar 2024 19:12:26 -0800 Subject: [PATCH 4/6] libbpf: Allow specifying 64-bit integers in map BTF. __uint() macro that is used to specify map attributes like: __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); It is limited to 32-bit, since BTF_KIND_ARRAY has u32 "number of elements" field in "struct btf_array". Introduce __ulong() macro that allows specifying values bigger than 32-bit. In map definition "map_extra" is the only u64 field, so far. Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240307031228.42896-5-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf_helpers.h | 1 + tools/lib/bpf/libbpf.c | 44 +++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 79eaa581be98..112b1504e072 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -13,6 +13,7 @@ #define __uint(name, val) int (*name)[val] #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] +#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name /* * Helper macro to place programs, maps, license in diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 672fca94ff53..567ad367e7aa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2335,6 +2335,46 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } +static bool get_map_field_long(const char *map_name, const struct btf *btf, + const struct btf_member *m, __u64 *res) +{ + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); + const char *name = btf__name_by_offset(btf, m->name_off); + + if (btf_is_ptr(t)) { + __u32 res32; + bool ret; + + ret = get_map_field_int(map_name, btf, m, &res32); + if (ret) + *res = (__u64)res32; + return ret; + } + + if (!btf_is_enum(t) && !btf_is_enum64(t)) { + pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", + map_name, name, btf_kind_str(t)); + return false; + } + + if (btf_vlen(t) != 1) { + pr_warn("map '%s': attr '%s': invalid __ulong\n", + map_name, name); + return false; + } + + if (btf_is_enum(t)) { + const struct btf_enum *e = btf_enum(t); + + *res = e->val; + } else { + const struct btf_enum64 *e = btf_enum64(t); + + *res = btf_enum64_value(e); + } + return true; +} + static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) { int len; @@ -2568,9 +2608,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; } else if (strcmp(name, "map_extra") == 0) { - __u32 map_extra; + __u64 map_extra; - if (!get_map_field_int(map_name, btf, m, &map_extra)) + if (!get_map_field_long(map_name, btf, m, &map_extra)) return -EINVAL; map_def->map_extra = map_extra; map_def->parts |= MAP_DEF_MAP_EXTRA; From 1576b07961971d4eeb0e269c7133e9a6d430daf8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 6 Mar 2024 19:12:27 -0800 Subject: [PATCH 5/6] bpftool: rename is_internal_mmapable_map into is_mmapable_map It's not restricted to working with "internal" maps, it cares about any map that can be mmap'ed. Reflect that in more succinct and generic name. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Quentin Monnet Link: https://lore.kernel.org/r/20240307031228.42896-6-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- tools/bpf/bpftool/gen.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 1f579eacd9d4..a3d72be347b0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -248,7 +248,7 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map return NULL; } -static bool is_internal_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) +static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) { if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) return false; @@ -274,7 +274,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -327,7 +327,7 @@ static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -504,7 +504,7 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name) ", obj_name); bpf_object__for_each_map(map, obj) { - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -720,7 +720,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h const void *mmap_data = NULL; size_t mmap_size = 0; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; codegen("\ @@ -782,7 +782,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h bpf_object__for_each_map(map, obj) { const char *mmap_flags; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG) @@ -871,7 +871,7 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) ", i, bpf_map__name(map), i, ident); /* memory-mapped internal maps */ - if (mmaped && is_internal_mmapable_map(map, ident, sizeof(ident))) { + if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) { printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", i, ident); } @@ -1617,7 +1617,7 @@ static int do_subskeleton(int argc, char **argv) /* Also count all maps that have a name */ map_cnt++; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; map_type_id = bpf_map__btf_value_type_id(map); @@ -1739,7 +1739,7 @@ static int do_subskeleton(int argc, char **argv) /* walk through each symbol and emit the runtime representation */ bpf_object__for_each_map(map, obj) { - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; map_type_id = bpf_map__btf_value_type_id(map); From fe5064158c561b807af5708c868f6c7cb5144e01 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Mar 2024 19:12:28 -0800 Subject: [PATCH 6/6] bpf: Tell bpf programs kernel's PAGE_SIZE vmlinux BTF includes all kernel enums. Add __PAGE_SIZE = PAGE_SIZE enum, so that bpf programs that include vmlinux.h can easily access it. Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240307031228.42896-7-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9ee4536d0a09..134b7979f537 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -88,13 +88,18 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } +/* tell bpf programs that include vmlinux.h kernel's PAGE_SIZE */ +enum page_size_enum { + __PAGE_SIZE = PAGE_SIZE +}; + struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog_aux *aux; struct bpf_prog *fp; - size = round_up(size, PAGE_SIZE); + size = round_up(size, __PAGE_SIZE); fp = __vmalloc(size, gfp_flags); if (fp == NULL) return NULL;