2022-02-21 02:40:55 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2018-04-19 06:55:57 +08:00
|
|
|
/* Copyright (c) 2018 Facebook */
|
|
|
|
|
|
|
|
#include <uapi/linux/btf.h>
|
2019-11-15 02:57:15 +08:00
|
|
|
#include <uapi/linux/bpf.h>
|
|
|
|
#include <uapi/linux/bpf_perf_event.h>
|
2018-04-19 06:55:57 +08:00
|
|
|
#include <uapi/linux/types.h>
|
2018-04-19 06:56:00 +08:00
|
|
|
#include <linux/seq_file.h>
|
2018-04-19 06:55:57 +08:00
|
|
|
#include <linux/compiler.h>
|
2018-11-20 07:29:08 +08:00
|
|
|
#include <linux/ctype.h>
|
2018-04-19 06:55:57 +08:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/slab.h>
|
2018-04-19 06:56:01 +08:00
|
|
|
#include <linux/anon_inodes.h>
|
|
|
|
#include <linux/file.h>
|
2018-04-19 06:55:57 +08:00
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/kernel.h>
|
2018-05-05 05:49:51 +08:00
|
|
|
#include <linux/idr.h>
|
2018-05-23 05:57:18 +08:00
|
|
|
#include <linux/sort.h>
|
2018-04-19 06:55:57 +08:00
|
|
|
#include <linux/bpf_verifier.h>
|
|
|
|
#include <linux/btf.h>
|
2020-07-12 05:53:26 +08:00
|
|
|
#include <linux/btf_ids.h>
|
2024-01-20 06:50:02 +08:00
|
|
|
#include <linux/bpf.h>
|
2022-12-04 04:49:54 +08:00
|
|
|
#include <linux/bpf_lsm.h>
|
2019-11-15 02:57:15 +08:00
|
|
|
#include <linux/skmsg.h>
|
|
|
|
#include <linux/perf_event.h>
|
2020-08-26 03:21:19 +08:00
|
|
|
#include <linux/bsearch.h>
|
2020-11-10 09:19:31 +08:00
|
|
|
#include <linux/kobject.h>
|
|
|
|
#include <linux/sysfs.h>
|
2023-04-22 01:02:55 +08:00
|
|
|
|
|
|
|
#include <net/netfilter/nf_bpf_link.h>
|
|
|
|
|
2019-11-15 02:57:15 +08:00
|
|
|
#include <net/sock.h>
|
2023-08-03 09:02:30 +08:00
|
|
|
#include <net/xdp.h>
|
2021-12-02 02:10:31 +08:00
|
|
|
#include "../tools/lib/bpf/relo_core.h"
|
2018-04-19 06:55:57 +08:00
|
|
|
|
|
|
|
/* BTF (BPF Type Format) is the meta data format which describes
|
|
|
|
* the data types of BPF program/map. Hence, it basically focus
|
|
|
|
* on the C programming language which the modern BPF is primary
|
|
|
|
* using.
|
|
|
|
*
|
|
|
|
* ELF Section:
|
|
|
|
* ~~~~~~~~~~~
|
|
|
|
* The BTF data is stored under the ".BTF" ELF section
|
|
|
|
*
|
|
|
|
* struct btf_type:
|
|
|
|
* ~~~~~~~~~~~~~~~
|
|
|
|
* Each 'struct btf_type' object describes a C data type.
|
|
|
|
* Depending on the type it is describing, a 'struct btf_type'
|
|
|
|
* object may be followed by more data. F.e.
|
|
|
|
* To describe an array, 'struct btf_type' is followed by
|
|
|
|
* 'struct btf_array'.
|
|
|
|
*
|
|
|
|
* 'struct btf_type' and any extra data following it are
|
|
|
|
* 4 bytes aligned.
|
|
|
|
*
|
|
|
|
* Type section:
|
|
|
|
* ~~~~~~~~~~~~~
|
|
|
|
* The BTF type section contains a list of 'struct btf_type' objects.
|
|
|
|
* Each one describes a C type. Recall from the above section
|
|
|
|
* that a 'struct btf_type' object could be immediately followed by extra
|
2021-05-25 10:56:59 +08:00
|
|
|
* data in order to describe some particular C types.
|
2018-04-19 06:55:57 +08:00
|
|
|
*
|
|
|
|
* type_id:
|
|
|
|
* ~~~~~~~
|
|
|
|
* Each btf_type object is identified by a type_id. The type_id
|
|
|
|
* is implicitly implied by the location of the btf_type object in
|
|
|
|
* the BTF type section. The first one has type_id 1. The second
|
|
|
|
* one has type_id 2...etc. Hence, an earlier btf_type has
|
|
|
|
* a smaller type_id.
|
|
|
|
*
|
|
|
|
* A btf_type object may refer to another btf_type object by using
|
|
|
|
* type_id (i.e. the "type" in the "struct btf_type").
|
|
|
|
*
|
|
|
|
* NOTE that we cannot assume any reference-order.
|
|
|
|
* A btf_type object can refer to an earlier btf_type object
|
|
|
|
* but it can also refer to a later btf_type object.
|
|
|
|
*
|
|
|
|
* For example, to describe "const void *". A btf_type
|
|
|
|
* object describing "const" may refer to another btf_type
|
|
|
|
* object describing "void *". This type-reference is done
|
|
|
|
* by specifying type_id:
|
|
|
|
*
|
|
|
|
* [1] CONST (anon) type_id=2
|
|
|
|
* [2] PTR (anon) type_id=0
|
|
|
|
*
|
|
|
|
* The above is the btf_verifier debug log:
|
|
|
|
* - Each line started with "[?]" is a btf_type object
|
|
|
|
* - [?] is the type_id of the btf_type object.
|
|
|
|
* - CONST/PTR is the BTF_KIND_XXX
|
|
|
|
* - "(anon)" is the name of the type. It just
|
|
|
|
* happens that CONST and PTR has no name.
|
|
|
|
* - type_id=XXX is the 'u32 type' in btf_type
|
|
|
|
*
|
|
|
|
* NOTE: "void" has type_id 0
|
|
|
|
*
|
|
|
|
* String section:
|
|
|
|
* ~~~~~~~~~~~~~~
|
|
|
|
* The BTF string section contains the names used by the type section.
|
|
|
|
* Each string is referred by an "offset" from the beginning of the
|
|
|
|
* string section.
|
|
|
|
*
|
|
|
|
* Each string is '\0' terminated.
|
|
|
|
*
|
|
|
|
* The first character in the string section must be '\0'
|
|
|
|
* which is used to mean 'anonymous'. Some btf_type may not
|
|
|
|
* have a name.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* BTF verification:
|
|
|
|
*
|
|
|
|
* To verify BTF data, two passes are needed.
|
|
|
|
*
|
|
|
|
* Pass #1
|
|
|
|
* ~~~~~~~
|
|
|
|
* The first pass is to collect all btf_type objects to
|
|
|
|
* an array: "btf->types".
|
|
|
|
*
|
|
|
|
* Depending on the C type that a btf_type is describing,
|
|
|
|
* a btf_type may be followed by extra data. We don't know
|
|
|
|
* how many btf_type is there, and more importantly we don't
|
|
|
|
* know where each btf_type is located in the type section.
|
|
|
|
*
|
|
|
|
* Without knowing the location of each type_id, most verifications
|
|
|
|
* cannot be done. e.g. an earlier btf_type may refer to a later
|
|
|
|
* btf_type (recall the "const void *" above), so we cannot
|
|
|
|
* check this type-reference in the first pass.
|
|
|
|
*
|
|
|
|
* In the first pass, it still does some verifications (e.g.
|
|
|
|
* checking the name is a valid offset to the string section).
|
2018-04-19 06:55:58 +08:00
|
|
|
*
|
|
|
|
* Pass #2
|
|
|
|
* ~~~~~~~
|
|
|
|
* The main focus is to resolve a btf_type that is referring
|
|
|
|
* to another type.
|
|
|
|
*
|
|
|
|
* We have to ensure the referring type:
|
|
|
|
* 1) does exist in the BTF (i.e. in btf->types[])
|
|
|
|
* 2) does not cause a loop:
|
|
|
|
* struct A {
|
|
|
|
* struct B b;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* struct B {
|
|
|
|
* struct A a;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* btf_type_needs_resolve() decides if a btf_type needs
|
|
|
|
* to be resolved.
|
|
|
|
*
|
|
|
|
* The needs_resolve type implements the "resolve()" ops which
|
|
|
|
* essentially does a DFS and detects backedge.
|
|
|
|
*
|
|
|
|
* During resolve (or DFS), different C types have different
|
|
|
|
* "RESOLVED" conditions.
|
|
|
|
*
|
|
|
|
* When resolving a BTF_KIND_STRUCT, we need to resolve all its
|
|
|
|
* members because a member is always referring to another
|
|
|
|
* type. A struct's member can be treated as "RESOLVED" if
|
|
|
|
* it is referring to a BTF_KIND_PTR. Otherwise, the
|
|
|
|
* following valid C struct would be rejected:
|
|
|
|
*
|
|
|
|
* struct A {
|
|
|
|
* int m;
|
|
|
|
* struct A *a;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* When resolving a BTF_KIND_PTR, it needs to keep resolving if
|
|
|
|
* it is referring to another BTF_KIND_PTR. Otherwise, we cannot
|
|
|
|
* detect a pointer loop, e.g.:
|
|
|
|
* BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR +
|
|
|
|
* ^ |
|
|
|
|
* +-----------------------------------------+
|
|
|
|
*
|
2018-04-19 06:55:57 +08:00
|
|
|
*/
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)
|
2018-04-19 06:55:57 +08:00
|
|
|
#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
|
|
|
|
#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
|
|
|
|
#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
|
|
|
|
#define BITS_ROUNDUP_BYTES(bits) \
|
|
|
|
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
|
|
|
|
|
2021-02-27 04:22:52 +08:00
|
|
|
#define BTF_INFO_MASK 0x9f00ffff
|
2018-05-23 05:57:20 +08:00
|
|
|
#define BTF_INT_MASK 0x0fffffff
|
|
|
|
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
|
|
|
|
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
/* 16MB for 64k structs and each has 16 members and
|
|
|
|
* a few MB spaces for the string section.
|
|
|
|
* The hard limit is S32_MAX.
|
|
|
|
*/
|
|
|
|
#define BTF_MAX_SIZE (16 * 1024 * 1024)
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
#define for_each_member_from(i, from, struct_type, member) \
|
|
|
|
for (i = from, member = btf_type_member(struct_type) + from; \
|
|
|
|
i < btf_type_vlen(struct_type); \
|
|
|
|
i++, member++)
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
#define for_each_vsi_from(i, from, struct_type, member) \
|
|
|
|
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
|
|
|
|
i < btf_type_vlen(struct_type); \
|
|
|
|
i++, member++)
|
|
|
|
|
2019-08-20 17:31:50 +08:00
|
|
|
DEFINE_IDR(btf_idr);
|
|
|
|
DEFINE_SPINLOCK(btf_idr_lock);
|
2018-05-05 05:49:51 +08:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
enum btf_kfunc_hook {
|
2022-11-21 03:54:26 +08:00
|
|
|
BTF_KFUNC_HOOK_COMMON,
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
BTF_KFUNC_HOOK_XDP,
|
|
|
|
BTF_KFUNC_HOOK_TC,
|
|
|
|
BTF_KFUNC_HOOK_STRUCT_OPS,
|
2022-05-19 04:59:08 +08:00
|
|
|
BTF_KFUNC_HOOK_TRACING,
|
|
|
|
BTF_KFUNC_HOOK_SYSCALL,
|
2022-12-06 22:59:32 +08:00
|
|
|
BTF_KFUNC_HOOK_FMODRET,
|
2024-08-20 00:28:04 +08:00
|
|
|
BTF_KFUNC_HOOK_CGROUP,
|
2023-03-01 23:49:50 +08:00
|
|
|
BTF_KFUNC_HOOK_SCHED_ACT,
|
|
|
|
BTF_KFUNC_HOOK_SK_SKB,
|
|
|
|
BTF_KFUNC_HOOK_SOCKET_FILTER,
|
|
|
|
BTF_KFUNC_HOOK_LWT,
|
2023-04-22 01:02:55 +08:00
|
|
|
BTF_KFUNC_HOOK_NETFILTER,
|
2024-04-30 19:28:25 +08:00
|
|
|
BTF_KFUNC_HOOK_KPROBE,
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
BTF_KFUNC_HOOK_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
2022-09-06 23:13:01 +08:00
|
|
|
BTF_KFUNC_SET_MAX_CNT = 256,
|
2022-04-25 05:48:54 +08:00
|
|
|
BTF_DTOR_KFUNC_MAX_CNT = 256,
|
2023-05-20 06:51:54 +08:00
|
|
|
BTF_KFUNC_FILTER_MAX_CNT = 16,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct btf_kfunc_hook_filter {
|
|
|
|
btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT];
|
|
|
|
u32 nr_filters;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct btf_kfunc_set_tab {
|
2022-07-21 21:42:35 +08:00
|
|
|
struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
|
2023-05-20 06:51:54 +08:00
|
|
|
struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX];
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
};
|
|
|
|
|
2022-04-25 05:48:54 +08:00
|
|
|
struct btf_id_dtor_kfunc_tab {
|
|
|
|
u32 cnt;
|
|
|
|
struct btf_id_dtor_kfunc dtors[];
|
|
|
|
};
|
|
|
|
|
2024-01-20 06:49:55 +08:00
|
|
|
struct btf_struct_ops_tab {
|
|
|
|
u32 cnt;
|
|
|
|
u32 capacity;
|
|
|
|
struct bpf_struct_ops_desc ops[];
|
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
struct btf {
|
2018-05-23 05:57:18 +08:00
|
|
|
void *data;
|
2018-04-19 06:55:57 +08:00
|
|
|
struct btf_type **types;
|
2018-04-19 06:55:58 +08:00
|
|
|
u32 *resolved_ids;
|
|
|
|
u32 *resolved_sizes;
|
2018-04-19 06:55:57 +08:00
|
|
|
const char *strings;
|
|
|
|
void *nohdr_data;
|
2018-05-23 05:57:18 +08:00
|
|
|
struct btf_header hdr;
|
2020-11-10 09:19:28 +08:00
|
|
|
u32 nr_types; /* includes VOID for base BTF */
|
2018-04-19 06:55:57 +08:00
|
|
|
u32 types_size;
|
|
|
|
u32 data_size;
|
2018-04-19 06:56:01 +08:00
|
|
|
refcount_t refcnt;
|
2018-05-05 05:49:51 +08:00
|
|
|
u32 id;
|
|
|
|
struct rcu_head rcu;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
struct btf_kfunc_set_tab *kfunc_set_tab;
|
2022-04-25 05:48:54 +08:00
|
|
|
struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
struct btf_struct_metas *struct_meta_tab;
|
2024-01-20 06:49:55 +08:00
|
|
|
struct btf_struct_ops_tab *struct_ops_tab;
|
2020-11-10 09:19:28 +08:00
|
|
|
|
|
|
|
/* split BTF support */
|
|
|
|
struct btf *base_btf;
|
|
|
|
u32 start_id; /* first type ID in this BTF (0 for base BTF) */
|
|
|
|
u32 start_str_off; /* first string offset (0 for base BTF) */
|
2020-11-10 09:19:29 +08:00
|
|
|
char name[MODULE_NAME_LEN];
|
|
|
|
bool kernel_btf;
|
2024-06-20 17:17:31 +08:00
|
|
|
__u32 *base_id_map; /* map from distilled base BTF -> vmlinux BTF ids */
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
enum verifier_phase {
|
|
|
|
CHECK_META,
|
|
|
|
CHECK_TYPE,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct resolve_vertex {
|
|
|
|
const struct btf_type *t;
|
|
|
|
u32 type_id;
|
|
|
|
u16 next_member;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum visit_state {
|
|
|
|
NOT_VISITED,
|
|
|
|
VISITED,
|
|
|
|
RESOLVED,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum resolve_mode {
|
|
|
|
RESOLVE_TBD, /* To Be Determined */
|
|
|
|
RESOLVE_PTR, /* Resolving for Pointer */
|
|
|
|
RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union
|
|
|
|
* or array
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
|
|
|
|
#define MAX_RESOLVE_DEPTH 32
|
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
struct btf_sec_info {
|
|
|
|
u32 off;
|
|
|
|
u32 len;
|
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
struct btf_verifier_env {
|
|
|
|
struct btf *btf;
|
2018-04-19 06:55:58 +08:00
|
|
|
u8 *visit_states;
|
|
|
|
struct resolve_vertex stack[MAX_RESOLVE_DEPTH];
|
2018-04-19 06:55:57 +08:00
|
|
|
struct bpf_verifier_log log;
|
|
|
|
u32 log_type_id;
|
2018-04-19 06:55:58 +08:00
|
|
|
u32 top_stack;
|
|
|
|
enum verifier_phase phase;
|
|
|
|
enum resolve_mode resolve_mode;
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static const char * const btf_kind_str[NR_BTF_KINDS] = {
|
|
|
|
[BTF_KIND_UNKN] = "UNKNOWN",
|
|
|
|
[BTF_KIND_INT] = "INT",
|
|
|
|
[BTF_KIND_PTR] = "PTR",
|
|
|
|
[BTF_KIND_ARRAY] = "ARRAY",
|
|
|
|
[BTF_KIND_STRUCT] = "STRUCT",
|
|
|
|
[BTF_KIND_UNION] = "UNION",
|
|
|
|
[BTF_KIND_ENUM] = "ENUM",
|
|
|
|
[BTF_KIND_FWD] = "FWD",
|
|
|
|
[BTF_KIND_TYPEDEF] = "TYPEDEF",
|
|
|
|
[BTF_KIND_VOLATILE] = "VOLATILE",
|
|
|
|
[BTF_KIND_CONST] = "CONST",
|
|
|
|
[BTF_KIND_RESTRICT] = "RESTRICT",
|
2018-11-20 07:29:08 +08:00
|
|
|
[BTF_KIND_FUNC] = "FUNC",
|
|
|
|
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
|
2019-04-10 05:20:09 +08:00
|
|
|
[BTF_KIND_VAR] = "VAR",
|
|
|
|
[BTF_KIND_DATASEC] = "DATASEC",
|
2021-02-27 04:22:52 +08:00
|
|
|
[BTF_KIND_FLOAT] = "FLOAT",
|
2021-10-13 00:48:38 +08:00
|
|
|
[BTF_KIND_DECL_TAG] = "DECL_TAG",
|
2021-11-12 09:26:09 +08:00
|
|
|
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
[BTF_KIND_ENUM64] = "ENUM64",
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 09:51:42 +08:00
|
|
|
const char *btf_type_str(const struct btf_type *t)
|
2020-01-21 08:53:46 +08:00
|
|
|
{
|
|
|
|
return btf_kind_str[BTF_INFO_KIND(t->info)];
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
/* Chunk size we use in safe copy of data to be shown. */
|
|
|
|
#define BTF_SHOW_OBJ_SAFE_SIZE 32
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the maximum size of a base type value (equivalent to a
|
|
|
|
* 128-bit int); if we are at the end of our safe buffer and have
|
|
|
|
* less than 16 bytes space we can't be assured of being able
|
|
|
|
* to copy the next type safely, so in such cases we will initiate
|
|
|
|
* a new copy.
|
|
|
|
*/
|
|
|
|
#define BTF_SHOW_OBJ_BASE_TYPE_SIZE 16
|
|
|
|
|
|
|
|
/* Type name size */
|
|
|
|
#define BTF_SHOW_NAME_SIZE 80
|
|
|
|
|
bpf: Allow trusted args to walk struct when checking BTF IDs
When validating BTF types for KF_TRUSTED_ARGS kfuncs, the verifier
currently enforces that the top-level type must match when calling
the kfunc. In other words, the verifier does not allow the BPF program
to pass a bitwise equivalent struct, despite it being allowed according
to the C standard.
For example, if you have the following type:
struct nf_conn___init {
struct nf_conn ct;
};
The C standard stipulates that it would be safe to pass a struct
nf_conn___init to a kfunc expecting a struct nf_conn. The verifier
currently disallows this, however, as semantically kfuncs may want to
enforce that structs that have equivalent types according to the C
standard, but have different BTF IDs, are not able to be passed to
kfuncs expecting one or the other. For example, struct nf_conn___init
may not be queried / looked up, as it is allocated but may not yet be
fully initialized.
On the other hand, being able to pass types that are equivalent
according to the C standard will be useful for other types of kfunc /
kptrs enabled by BPF. For example, in a follow-on patch, a series of
kfuncs will be added which allow programs to do bitwise queries on
cpumasks that are either allocated by the program (in which case they'll
be a 'struct bpf_cpumask' type that wraps a cpumask_t as its first
element), or a cpumask that was allocated by the main kernel (in which
case it will just be a straight cpumask_t, as in task->cpus_ptr).
Having the two types of cpumasks allows us to distinguish between the
two for when a cpumask is read-only vs. mutatable. A struct bpf_cpumask
can be mutated by e.g. bpf_cpumask_clear(), whereas a regular cpumask_t
cannot be. On the other hand, a struct bpf_cpumask can of course be
queried in the exact same manner as a cpumask_t, with e.g.
bpf_cpumask_test_cpu().
If we were to enforce that top level types match, then a user that's
passing a struct bpf_cpumask to a read-only cpumask_t argument would
have to cast with something like bpf_cast_to_kern_ctx() (which itself
would need to be updated to expect the alias, and currently it only
accommodates a single alias per prog type). Additionally, not specifying
KF_TRUSTED_ARGS is not an option, as some kfuncs take one argument as a
struct bpf_cpumask *, and another as a struct cpumask *
(i.e. cpumask_t).
In order to enable this, this patch relaxes the constraint that a
KF_TRUSTED_ARGS kfunc must have strict type matching, and instead only
enforces strict type matching if a type is observed to be a "no-cast
alias" (i.e., that the type names are equivalent, but one is suffixed
with ___init).
Additionally, in order to try and be conservative and match existing
behavior / expectations, this patch also enforces strict type checking
for acquire kfuncs. We were already enforcing it for release kfuncs, so
this should also improve the consistency of the semantics for kfuncs.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230120192523.3650503-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-01-21 03:25:16 +08:00
|
|
|
/*
|
|
|
|
* The suffix of a type that indicates it cannot alias another type when
|
|
|
|
* comparing BTF IDs for kfunc invocations.
|
|
|
|
*/
|
|
|
|
#define NOCAST_ALIAS_SUFFIX "___init"
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
/*
|
|
|
|
* Common data to all BTF show operations. Private show functions can add
|
|
|
|
* their own data to a structure containing a struct btf_show and consult it
|
|
|
|
* in the show callback. See btf_type_show() below.
|
|
|
|
*
|
|
|
|
* One challenge with showing nested data is we want to skip 0-valued
|
|
|
|
* data, but in order to figure out whether a nested object is all zeros
|
|
|
|
* we need to walk through it. As a result, we need to make two passes
|
|
|
|
* when handling structs, unions and arrays; the first path simply looks
|
|
|
|
* for nonzero data, while the second actually does the display. The first
|
|
|
|
* pass is signalled by show->state.depth_check being set, and if we
|
|
|
|
* encounter a non-zero value we set show->state.depth_to_show to
|
|
|
|
* the depth at which we encountered it. When we have completed the
|
|
|
|
* first pass, we will know if anything needs to be displayed if
|
|
|
|
* depth_to_show > depth. See btf_[struct,array]_show() for the
|
|
|
|
* implementation of this.
|
|
|
|
*
|
|
|
|
* Another problem is we want to ensure the data for display is safe to
|
|
|
|
* access. To support this, the anonymous "struct {} obj" tracks the data
|
|
|
|
* object and our safe copy of it. We copy portions of the data needed
|
|
|
|
* to the object "copy" buffer, but because its size is limited to
|
|
|
|
* BTF_SHOW_OBJ_COPY_LEN bytes, multiple copies may be required as we
|
|
|
|
* traverse larger objects for display.
|
|
|
|
*
|
|
|
|
* The various data type show functions all start with a call to
|
|
|
|
* btf_show_start_type() which returns a pointer to the safe copy
|
|
|
|
* of the data needed (or if BTF_SHOW_UNSAFE is specified, to the
|
|
|
|
* raw data itself). btf_show_obj_safe() is responsible for
|
|
|
|
* using copy_from_kernel_nofault() to update the safe data if necessary
|
|
|
|
* as we traverse the object's data. skbuff-like semantics are
|
|
|
|
* used:
|
|
|
|
*
|
|
|
|
* - obj.head points to the start of the toplevel object for display
|
|
|
|
* - obj.size is the size of the toplevel object
|
|
|
|
* - obj.data points to the current point in the original data at
|
|
|
|
* which our safe data starts. obj.data will advance as we copy
|
|
|
|
* portions of the data.
|
|
|
|
*
|
|
|
|
* In most cases a single copy will suffice, but larger data structures
|
|
|
|
* such as "struct task_struct" will require many copies. The logic in
|
|
|
|
* btf_show_obj_safe() handles the logic that determines if a new
|
|
|
|
* copy_from_kernel_nofault() is needed.
|
|
|
|
*/
|
|
|
|
struct btf_show {
|
|
|
|
u64 flags;
|
|
|
|
void *target; /* target of show operation (seq file, buffer) */
|
2024-07-12 17:28:59 +08:00
|
|
|
__printf(2, 0) void (*showfn)(struct btf_show *show, const char *fmt, va_list args);
|
2020-09-28 19:31:04 +08:00
|
|
|
const struct btf *btf;
|
|
|
|
/* below are used during iteration */
|
|
|
|
struct {
|
|
|
|
u8 depth;
|
|
|
|
u8 depth_to_show;
|
|
|
|
u8 depth_check;
|
|
|
|
u8 array_member:1,
|
|
|
|
array_terminated:1;
|
|
|
|
u16 array_encoding;
|
|
|
|
u32 type_id;
|
|
|
|
int status; /* non-zero for error */
|
|
|
|
const struct btf_type *type;
|
|
|
|
const struct btf_member *member;
|
|
|
|
char name[BTF_SHOW_NAME_SIZE]; /* space for member name/type */
|
|
|
|
} state;
|
|
|
|
struct {
|
|
|
|
u32 size;
|
|
|
|
void *head;
|
|
|
|
void *data;
|
|
|
|
u8 safe[BTF_SHOW_OBJ_SAFE_SIZE];
|
|
|
|
} obj;
|
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
struct btf_kind_operations {
|
|
|
|
s32 (*check_meta)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left);
|
2018-04-19 06:55:58 +08:00
|
|
|
int (*resolve)(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v);
|
2018-04-19 06:55:59 +08:00
|
|
|
int (*check_member)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
int (*check_kflag_member)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type);
|
2018-04-19 06:55:57 +08:00
|
|
|
void (*log_details)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t);
|
2020-09-28 19:31:04 +08:00
|
|
|
void (*show)(const struct btf *btf, const struct btf_type *t,
|
2018-04-19 06:56:00 +08:00
|
|
|
u32 type_id, void *data, u8 bits_offsets,
|
2020-09-28 19:31:04 +08:00
|
|
|
struct btf_show *show);
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
|
|
|
|
static struct btf_type btf_void;
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
static int btf_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t, u32 type_id);
|
|
|
|
|
2022-02-04 03:17:27 +08:00
|
|
|
static int btf_func_check(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t);
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static bool btf_type_is_modifier(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
/* Some of them is not strictly a C modifier
|
|
|
|
* but they are grouped into the same bucket
|
|
|
|
* for BTF concern:
|
|
|
|
* A type (t) that refers to another
|
|
|
|
* type through t->type AND its size cannot
|
|
|
|
* be determined without following the t->type.
|
|
|
|
*
|
|
|
|
* ptr does not fall into this bucket
|
|
|
|
* because its size is always sizeof(void *).
|
|
|
|
*/
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_TYPEDEF:
|
|
|
|
case BTF_KIND_VOLATILE:
|
|
|
|
case BTF_KIND_CONST:
|
|
|
|
case BTF_KIND_RESTRICT:
|
2021-11-12 09:26:09 +08:00
|
|
|
case BTF_KIND_TYPE_TAG:
|
2018-04-19 06:55:58 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-04-10 05:20:10 +08:00
|
|
|
bool btf_type_is_void(const struct btf_type *t)
|
2018-04-19 06:55:58 +08:00
|
|
|
{
|
2018-11-20 07:29:06 +08:00
|
|
|
return t == &btf_void;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool btf_type_is_fwd(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
|
|
|
|
}
|
|
|
|
|
2023-05-31 04:50:29 +08:00
|
|
|
static bool btf_type_is_datasec(const struct btf_type *t)
|
2018-11-20 07:29:06 +08:00
|
|
|
{
|
2023-05-31 04:50:29 +08:00
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
2023-05-31 04:50:29 +08:00
|
|
|
static bool btf_type_is_decl_tag(const struct btf_type *t)
|
2018-04-19 06:55:58 +08:00
|
|
|
{
|
2023-05-31 04:50:29 +08:00
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
2023-05-31 04:50:29 +08:00
|
|
|
static bool btf_type_nosize(const struct btf_type *t)
|
2019-04-10 05:20:09 +08:00
|
|
|
{
|
2023-05-31 04:50:29 +08:00
|
|
|
return btf_type_is_void(t) || btf_type_is_fwd(t) ||
|
|
|
|
btf_type_is_func(t) || btf_type_is_func_proto(t) ||
|
|
|
|
btf_type_is_decl_tag(t);
|
2019-04-10 05:20:09 +08:00
|
|
|
}
|
|
|
|
|
2023-05-31 04:50:29 +08:00
|
|
|
static bool btf_type_nosize_or_null(const struct btf_type *t)
|
2021-09-15 06:30:15 +08:00
|
|
|
{
|
2023-05-31 04:50:29 +08:00
|
|
|
return !t || btf_type_nosize(t);
|
2021-09-15 06:30:15 +08:00
|
|
|
}
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
static bool btf_type_is_decl_tag_target(const struct btf_type *t)
|
2021-09-15 06:30:15 +08:00
|
|
|
{
|
|
|
|
return btf_type_is_func(t) || btf_type_is_struct(t) ||
|
2021-10-22 03:56:28 +08:00
|
|
|
btf_type_is_var(t) || btf_type_is_typedef(t);
|
2021-09-15 06:30:15 +08:00
|
|
|
}
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
bool btf_is_vmlinux(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return btf->kernel_btf && !btf->base_btf;
|
|
|
|
}
|
|
|
|
|
2021-01-12 15:55:18 +08:00
|
|
|
u32 btf_nr_types(const struct btf *btf)
|
2020-11-10 09:19:28 +08:00
|
|
|
{
|
|
|
|
u32 total = 0;
|
|
|
|
|
|
|
|
while (btf) {
|
|
|
|
total += btf->nr_types;
|
|
|
|
btf = btf->base_btf;
|
|
|
|
}
|
|
|
|
|
|
|
|
return total;
|
|
|
|
}
|
|
|
|
|
2020-01-09 08:35:03 +08:00
|
|
|
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
|
|
|
|
{
|
|
|
|
const struct btf_type *t;
|
|
|
|
const char *tname;
|
2020-11-10 09:19:28 +08:00
|
|
|
u32 i, total;
|
2020-01-09 08:35:03 +08:00
|
|
|
|
2021-01-12 15:55:18 +08:00
|
|
|
total = btf_nr_types(btf);
|
2020-11-10 09:19:28 +08:00
|
|
|
for (i = 1; i < total; i++) {
|
|
|
|
t = btf_type_by_id(btf, i);
|
2020-01-09 08:35:03 +08:00
|
|
|
if (BTF_INFO_KIND(t->info) != kind)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (!strcmp(tname, name))
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2023-08-23 00:25:42 +08:00
|
|
|
s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
|
2022-03-17 19:59:43 +08:00
|
|
|
{
|
|
|
|
struct btf *btf;
|
|
|
|
s32 ret;
|
|
|
|
int id;
|
|
|
|
|
|
|
|
btf = bpf_get_btf_vmlinux();
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
2022-03-20 22:30:03 +08:00
|
|
|
if (!btf)
|
|
|
|
return -EINVAL;
|
2022-03-17 19:59:43 +08:00
|
|
|
|
|
|
|
ret = btf_find_by_name_kind(btf, name, kind);
|
|
|
|
/* ret is never zero, since btf_find_by_name_kind returns
|
|
|
|
* positive btf_id or negative error.
|
|
|
|
*/
|
|
|
|
if (ret > 0) {
|
|
|
|
btf_get(btf);
|
|
|
|
*btf_p = btf;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If name is not found in vmlinux's BTF then search in module's BTFs */
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
idr_for_each_entry(&btf_idr, btf, id) {
|
|
|
|
if (!btf_is_module(btf))
|
|
|
|
continue;
|
|
|
|
/* linear search could be slow hence unlock/lock
|
|
|
|
* the IDR to avoiding holding it for too long
|
|
|
|
*/
|
|
|
|
btf_get(btf);
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
ret = btf_find_by_name_kind(btf, name, kind);
|
|
|
|
if (ret > 0) {
|
|
|
|
*btf_p = btf;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
btf_put(btf);
|
2023-04-21 09:49:01 +08:00
|
|
|
spin_lock_bh(&btf_idr_lock);
|
2022-03-17 19:59:43 +08:00
|
|
|
}
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-01-09 08:35:03 +08:00
|
|
|
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
|
|
|
|
u32 id, u32 *res_id)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = btf_type_by_id(btf, id);
|
|
|
|
|
|
|
|
while (btf_type_is_modifier(t)) {
|
|
|
|
id = t->type;
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (res_id)
|
|
|
|
*res_id = id;
|
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct btf_type *btf_type_resolve_ptr(const struct btf *btf,
|
|
|
|
u32 id, u32 *res_id)
|
|
|
|
{
|
|
|
|
const struct btf_type *t;
|
|
|
|
|
|
|
|
t = btf_type_skip_modifiers(btf, id, NULL);
|
|
|
|
if (!btf_type_is_ptr(t))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return btf_type_skip_modifiers(btf, t->type, res_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
|
|
|
|
u32 id, u32 *res_id)
|
|
|
|
{
|
|
|
|
const struct btf_type *ptype;
|
|
|
|
|
|
|
|
ptype = btf_type_resolve_ptr(btf, id, res_id);
|
|
|
|
if (ptype && btf_type_is_func_proto(ptype))
|
|
|
|
return ptype;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
/* Types that act only as a source, not sink or intermediate
|
|
|
|
* type when resolving.
|
|
|
|
*/
|
|
|
|
static bool btf_type_is_resolve_source_only(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return btf_type_is_var(t) ||
|
2021-10-13 00:48:38 +08:00
|
|
|
btf_type_is_decl_tag(t) ||
|
2019-04-10 05:20:09 +08:00
|
|
|
btf_type_is_datasec(t);
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
/* What types need to be resolved?
|
|
|
|
*
|
|
|
|
* btf_type_is_modifier() is an obvious one.
|
|
|
|
*
|
|
|
|
* btf_type_is_struct() because its member refers to
|
|
|
|
* another type (through member->type).
|
2019-04-10 05:20:09 +08:00
|
|
|
*
|
|
|
|
* btf_type_is_var() because the variable refers to
|
|
|
|
* another type. btf_type_is_datasec() holds multiple
|
|
|
|
* btf_type_is_var() types that need resolving.
|
|
|
|
*
|
2018-04-19 06:55:58 +08:00
|
|
|
* btf_type_is_array() because its element (array->type)
|
|
|
|
* refers to another type. Array can be thought of a
|
|
|
|
* special case of struct while array just has the same
|
|
|
|
* member-type repeated by array->nelems of times.
|
|
|
|
*/
|
|
|
|
static bool btf_type_needs_resolve(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return btf_type_is_modifier(t) ||
|
2019-04-10 05:20:09 +08:00
|
|
|
btf_type_is_ptr(t) ||
|
|
|
|
btf_type_is_struct(t) ||
|
|
|
|
btf_type_is_array(t) ||
|
|
|
|
btf_type_is_var(t) ||
|
2022-02-04 03:17:27 +08:00
|
|
|
btf_type_is_func(t) ||
|
2021-10-13 00:48:38 +08:00
|
|
|
btf_type_is_decl_tag(t) ||
|
2019-04-10 05:20:09 +08:00
|
|
|
btf_type_is_datasec(t);
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* t->size can be used */
|
|
|
|
static bool btf_type_has_size(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_INT:
|
|
|
|
case BTF_KIND_STRUCT:
|
|
|
|
case BTF_KIND_UNION:
|
|
|
|
case BTF_KIND_ENUM:
|
2019-04-10 05:20:09 +08:00
|
|
|
case BTF_KIND_DATASEC:
|
2021-02-27 04:22:52 +08:00
|
|
|
case BTF_KIND_FLOAT:
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
case BTF_KIND_ENUM64:
|
2018-04-19 06:55:58 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static const char *btf_int_encoding_str(u8 encoding)
|
|
|
|
{
|
|
|
|
if (encoding == 0)
|
|
|
|
return "(none)";
|
|
|
|
else if (encoding == BTF_INT_SIGNED)
|
|
|
|
return "SIGNED";
|
|
|
|
else if (encoding == BTF_INT_CHAR)
|
|
|
|
return "CHAR";
|
|
|
|
else if (encoding == BTF_INT_BOOL)
|
|
|
|
return "BOOL";
|
|
|
|
else
|
|
|
|
return "UNKN";
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 btf_type_int(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return *(u32 *)(t + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_array *btf_type_array(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return (const struct btf_array *)(t + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_enum *btf_type_enum(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return (const struct btf_enum *)(t + 1);
|
|
|
|
}
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
static const struct btf_var *btf_type_var(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return (const struct btf_var *)(t + 1);
|
|
|
|
}
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t)
|
2021-09-15 06:30:15 +08:00
|
|
|
{
|
2021-10-13 00:48:38 +08:00
|
|
|
return (const struct btf_decl_tag *)(t + 1);
|
2021-09-15 06:30:15 +08:00
|
|
|
}
|
|
|
|
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return (const struct btf_enum64 *)(t + 1);
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return kind_ops[BTF_INFO_KIND(t->info)];
|
|
|
|
}
|
|
|
|
|
2019-01-17 03:29:40 +08:00
|
|
|
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
|
2018-04-19 06:55:57 +08:00
|
|
|
{
|
2020-11-10 09:19:28 +08:00
|
|
|
if (!BTF_STR_OFFSET_VALID(offset))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
while (offset < btf->start_str_off)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
offset -= btf->start_str_off;
|
|
|
|
return offset < btf->hdr.str_len;
|
2018-04-19 06:55:57 +08:00
|
|
|
}
|
|
|
|
|
bpf/btf: Accept function names that contain dots
When building a kernel with LLVM=1, LLVM_IAS=0 and CONFIG_KASAN=y, LLVM
leaves DWARF tags for the "asan.module_ctor" & co symbols. In turn,
pahole creates BTF_KIND_FUNC entries for these and this makes the BTF
metadata validation fail because they contain a dot.
In a dramatic turn of event, this BTF verification failure can cause
the netfilter_bpf initialization to fail, causing netfilter_core to
free the netfilter_helper hashmap and netfilter_ftp to trigger a
use-after-free. The risk of u-a-f in netfilter will be addressed
separately but the existence of "asan.module_ctor" debug info under some
build conditions sounds like a good enough reason to accept functions
that contain dots in BTF.
Although using only LLVM=1 is the recommended way to compile clang-based
kernels, users can certainly do LLVM=1, LLVM_IAS=0 as well and we still
try to support that combination according to Nick. To clarify:
- > v5.10 kernel, LLVM=1 (LLVM_IAS=0 is not the default) is recommended,
but user can still have LLVM=1, LLVM_IAS=0 to trigger the issue
- <= 5.10 kernel, LLVM=1 (LLVM_IAS=0 is the default) is recommended in
which case GNU as will be used
Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Cc: Yonghong Song <yhs@meta.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/bpf/20230615145607.3469985-1-revest@chromium.org
2023-06-15 22:56:07 +08:00
|
|
|
static bool __btf_name_char_ok(char c, bool first)
|
2019-04-10 05:20:09 +08:00
|
|
|
{
|
|
|
|
if ((first ? !isalpha(c) :
|
|
|
|
!isalnum(c)) &&
|
|
|
|
c != '_' &&
|
bpf/btf: Accept function names that contain dots
When building a kernel with LLVM=1, LLVM_IAS=0 and CONFIG_KASAN=y, LLVM
leaves DWARF tags for the "asan.module_ctor" & co symbols. In turn,
pahole creates BTF_KIND_FUNC entries for these and this makes the BTF
metadata validation fail because they contain a dot.
In a dramatic turn of event, this BTF verification failure can cause
the netfilter_bpf initialization to fail, causing netfilter_core to
free the netfilter_helper hashmap and netfilter_ftp to trigger a
use-after-free. The risk of u-a-f in netfilter will be addressed
separately but the existence of "asan.module_ctor" debug info under some
build conditions sounds like a good enough reason to accept functions
that contain dots in BTF.
Although using only LLVM=1 is the recommended way to compile clang-based
kernels, users can certainly do LLVM=1, LLVM_IAS=0 as well and we still
try to support that combination according to Nick. To clarify:
- > v5.10 kernel, LLVM=1 (LLVM_IAS=0 is not the default) is recommended,
but user can still have LLVM=1, LLVM_IAS=0 to trigger the issue
- <= 5.10 kernel, LLVM=1 (LLVM_IAS=0 is the default) is recommended in
which case GNU as will be used
Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Cc: Yonghong Song <yhs@meta.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/bpf/20230615145607.3469985-1-revest@chromium.org
2023-06-15 22:56:07 +08:00
|
|
|
c != '.')
|
2019-04-10 05:20:09 +08:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
const char *btf_str_by_offset(const struct btf *btf, u32 offset)
|
2020-11-10 09:19:28 +08:00
|
|
|
{
|
|
|
|
while (offset < btf->start_str_off)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
offset -= btf->start_str_off;
|
|
|
|
if (offset < btf->hdr.str_len)
|
|
|
|
return &btf->strings[offset];
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2024-08-07 22:31:10 +08:00
|
|
|
static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
|
2018-11-20 07:29:08 +08:00
|
|
|
{
|
|
|
|
/* offset must be valid */
|
2020-11-10 09:19:28 +08:00
|
|
|
const char *src = btf_str_by_offset(btf, offset);
|
2018-11-20 07:29:08 +08:00
|
|
|
const char *src_limit;
|
|
|
|
|
bpf/btf: Accept function names that contain dots
When building a kernel with LLVM=1, LLVM_IAS=0 and CONFIG_KASAN=y, LLVM
leaves DWARF tags for the "asan.module_ctor" & co symbols. In turn,
pahole creates BTF_KIND_FUNC entries for these and this makes the BTF
metadata validation fail because they contain a dot.
In a dramatic turn of event, this BTF verification failure can cause
the netfilter_bpf initialization to fail, causing netfilter_core to
free the netfilter_helper hashmap and netfilter_ftp to trigger a
use-after-free. The risk of u-a-f in netfilter will be addressed
separately but the existence of "asan.module_ctor" debug info under some
build conditions sounds like a good enough reason to accept functions
that contain dots in BTF.
Although using only LLVM=1 is the recommended way to compile clang-based
kernels, users can certainly do LLVM=1, LLVM_IAS=0 as well and we still
try to support that combination according to Nick. To clarify:
- > v5.10 kernel, LLVM=1 (LLVM_IAS=0 is not the default) is recommended,
but user can still have LLVM=1, LLVM_IAS=0 to trigger the issue
- <= 5.10 kernel, LLVM=1 (LLVM_IAS=0 is the default) is recommended in
which case GNU as will be used
Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Cc: Yonghong Song <yhs@meta.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/bpf/20230615145607.3469985-1-revest@chromium.org
2023-06-15 22:56:07 +08:00
|
|
|
if (!__btf_name_char_ok(*src, true))
|
2018-11-20 07:29:08 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/* set a limit on identifier length */
|
|
|
|
src_limit = src + KSYM_NAME_LEN;
|
|
|
|
src++;
|
|
|
|
while (*src && src < src_limit) {
|
bpf/btf: Accept function names that contain dots
When building a kernel with LLVM=1, LLVM_IAS=0 and CONFIG_KASAN=y, LLVM
leaves DWARF tags for the "asan.module_ctor" & co symbols. In turn,
pahole creates BTF_KIND_FUNC entries for these and this makes the BTF
metadata validation fail because they contain a dot.
In a dramatic turn of event, this BTF verification failure can cause
the netfilter_bpf initialization to fail, causing netfilter_core to
free the netfilter_helper hashmap and netfilter_ftp to trigger a
use-after-free. The risk of u-a-f in netfilter will be addressed
separately but the existence of "asan.module_ctor" debug info under some
build conditions sounds like a good enough reason to accept functions
that contain dots in BTF.
Although using only LLVM=1 is the recommended way to compile clang-based
kernels, users can certainly do LLVM=1, LLVM_IAS=0 as well and we still
try to support that combination according to Nick. To clarify:
- > v5.10 kernel, LLVM=1 (LLVM_IAS=0 is not the default) is recommended,
but user can still have LLVM=1, LLVM_IAS=0 to trigger the issue
- <= 5.10 kernel, LLVM=1 (LLVM_IAS=0 is the default) is recommended in
which case GNU as will be used
Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Cc: Yonghong Song <yhs@meta.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/bpf/20230615145607.3469985-1-revest@chromium.org
2023-06-15 22:56:07 +08:00
|
|
|
if (!__btf_name_char_ok(*src, false))
|
2018-11-20 07:29:08 +08:00
|
|
|
return false;
|
|
|
|
src++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !*src;
|
|
|
|
}
|
|
|
|
|
2024-03-06 18:45:28 +08:00
|
|
|
/* Allow any printable character in DATASEC names */
|
2019-04-10 05:20:09 +08:00
|
|
|
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
|
|
|
|
{
|
2024-03-06 18:45:28 +08:00
|
|
|
/* offset must be valid */
|
|
|
|
const char *src = btf_str_by_offset(btf, offset);
|
|
|
|
const char *src_limit;
|
|
|
|
|
2024-08-31 13:47:02 +08:00
|
|
|
if (!*src)
|
|
|
|
return false;
|
|
|
|
|
2024-03-06 18:45:28 +08:00
|
|
|
/* set a limit on identifier length */
|
|
|
|
src_limit = src + KSYM_NAME_LEN;
|
|
|
|
while (*src && src < src_limit) {
|
|
|
|
if (!isprint(*src))
|
|
|
|
return false;
|
|
|
|
src++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !*src;
|
2019-04-10 05:20:09 +08:00
|
|
|
}
|
|
|
|
|
2018-12-14 02:41:46 +08:00
|
|
|
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
|
2018-04-19 06:55:57 +08:00
|
|
|
{
|
2020-11-10 09:19:28 +08:00
|
|
|
const char *name;
|
|
|
|
|
2018-05-23 05:57:20 +08:00
|
|
|
if (!offset)
|
2018-04-19 06:55:57 +08:00
|
|
|
return "(anon)";
|
2020-11-10 09:19:28 +08:00
|
|
|
|
|
|
|
name = btf_str_by_offset(btf, offset);
|
|
|
|
return name ?: "(invalid-name-offset)";
|
2018-04-19 06:55:57 +08:00
|
|
|
}
|
|
|
|
|
2018-12-14 02:41:46 +08:00
|
|
|
const char *btf_name_by_offset(const struct btf *btf, u32 offset)
|
|
|
|
{
|
2020-11-10 09:19:28 +08:00
|
|
|
return btf_str_by_offset(btf, offset);
|
2018-12-14 02:41:46 +08:00
|
|
|
}
|
|
|
|
|
bpf: Introduce bpf_func_info
This patch added interface to load a program with the following
additional information:
. prog_btf_fd
. func_info, func_info_rec_size and func_info_cnt
where func_info will provide function range and type_id
corresponding to each function.
The func_info_rec_size is introduced in the UAPI to specify
struct bpf_func_info size passed from user space. This
intends to make bpf_func_info structure growable in the future.
If the kernel gets a different bpf_func_info size from userspace,
it will try to handle user request with part of bpf_func_info
it can understand. In this patch, kernel can understand
struct bpf_func_info {
__u32 insn_offset;
__u32 type_id;
};
If user passed a bpf func_info record size of 16 bytes, the
kernel can still handle part of records with the above definition.
If verifier agrees with function range provided by the user,
the bpf_prog ksym for each function will use the func name
provided in the type_id, which is supposed to provide better
encoding as it is not limited by 16 bytes program name
limitation and this is better for bpf program which contains
multiple subprograms.
The bpf_prog_info interface is also extended to
return btf_id, func_info, func_info_rec_size and func_info_cnt
to userspace, so userspace can print out the function prototype
for each xlated function. The insn_offset in the returned
func_info corresponds to the insn offset for xlated functions.
With other jit related fields in bpf_prog_info, userspace can also
print out function prototypes for each jited function.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-11-20 07:29:11 +08:00
|
|
|
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
|
2018-04-19 06:55:58 +08:00
|
|
|
{
|
2020-11-10 09:19:28 +08:00
|
|
|
while (type_id < btf->start_id)
|
|
|
|
btf = btf->base_btf;
|
2018-04-19 06:55:58 +08:00
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
type_id -= btf->start_id;
|
|
|
|
if (type_id >= btf->nr_types)
|
|
|
|
return NULL;
|
2018-04-19 06:55:58 +08:00
|
|
|
return btf->types[type_id];
|
|
|
|
}
|
2022-09-08 00:40:39 +08:00
|
|
|
EXPORT_SYMBOL_GPL(btf_type_by_id);
|
2018-04-19 06:55:58 +08:00
|
|
|
|
2018-05-23 05:57:19 +08:00
|
|
|
/*
|
|
|
|
* Regular int is not a bit field and it must be either
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
* u8/u16/u32/u64 or __int128.
|
2018-05-23 05:57:19 +08:00
|
|
|
*/
|
|
|
|
static bool btf_type_int_is_regular(const struct btf_type *t)
|
|
|
|
{
|
2018-07-20 13:14:31 +08:00
|
|
|
u8 nr_bits, nr_bytes;
|
2018-05-23 05:57:19 +08:00
|
|
|
u32 int_data;
|
|
|
|
|
|
|
|
int_data = btf_type_int(t);
|
|
|
|
nr_bits = BTF_INT_BITS(int_data);
|
|
|
|
nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
|
|
|
|
if (BITS_PER_BYTE_MASKED(nr_bits) ||
|
|
|
|
BTF_INT_OFFSET(int_data) ||
|
|
|
|
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
|
|
|
|
nr_bytes != (2 * sizeof(u64)))) {
|
2018-05-23 05:57:19 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 07:43:01 +08:00
|
|
|
/*
|
2018-12-16 14:13:52 +08:00
|
|
|
* Check that given struct member is a regular int with expected
|
|
|
|
* offset and size.
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 07:43:01 +08:00
|
|
|
*/
|
2018-12-16 14:13:52 +08:00
|
|
|
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
|
|
|
|
const struct btf_member *m,
|
|
|
|
u32 expected_offset, u32 expected_size)
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 07:43:01 +08:00
|
|
|
{
|
2018-12-16 14:13:52 +08:00
|
|
|
const struct btf_type *t;
|
|
|
|
u32 id, int_data;
|
|
|
|
u8 nr_bits;
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 07:43:01 +08:00
|
|
|
|
2018-12-16 14:13:52 +08:00
|
|
|
id = m->type;
|
|
|
|
t = btf_type_id_size(btf, &id, NULL);
|
|
|
|
if (!t || !btf_type_is_int(t))
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 07:43:01 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
int_data = btf_type_int(t);
|
|
|
|
nr_bits = BTF_INT_BITS(int_data);
|
2018-12-16 14:13:52 +08:00
|
|
|
if (btf_type_kflag(s)) {
|
|
|
|
u32 bitfield_size = BTF_MEMBER_BITFIELD_SIZE(m->offset);
|
|
|
|
u32 bit_offset = BTF_MEMBER_BIT_OFFSET(m->offset);
|
|
|
|
|
|
|
|
/* if kflag set, int should be a regular int and
|
|
|
|
* bit offset should be at byte boundary.
|
|
|
|
*/
|
|
|
|
return !bitfield_size &&
|
|
|
|
BITS_ROUNDUP_BYTES(bit_offset) == expected_offset &&
|
|
|
|
BITS_ROUNDUP_BYTES(nr_bits) == expected_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BTF_INT_OFFSET(int_data) ||
|
|
|
|
BITS_PER_BYTE_MASKED(m->offset) ||
|
|
|
|
BITS_ROUNDUP_BYTES(m->offset) != expected_offset ||
|
|
|
|
BITS_PER_BYTE_MASKED(nr_bits) ||
|
|
|
|
BITS_ROUNDUP_BYTES(nr_bits) != expected_size)
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 07:43:01 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
/* Similar to btf_type_skip_modifiers() but does not skip typedefs. */
|
|
|
|
static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf,
|
|
|
|
u32 id)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = btf_type_by_id(btf, id);
|
|
|
|
|
|
|
|
while (btf_type_is_modifier(t) &&
|
|
|
|
BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define BTF_SHOW_MAX_ITER 10
|
|
|
|
|
|
|
|
#define BTF_KIND_BIT(kind) (1ULL << kind)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Populate show->state.name with type name information.
|
|
|
|
* Format of type name is
|
|
|
|
*
|
|
|
|
* [.member_name = ] (type_name)
|
|
|
|
*/
|
|
|
|
static const char *btf_show_name(struct btf_show *show)
|
|
|
|
{
|
|
|
|
/* BTF_MAX_ITER array suffixes "[]" */
|
|
|
|
const char *array_suffixes = "[][][][][][][][][][]";
|
|
|
|
const char *array_suffix = &array_suffixes[strlen(array_suffixes)];
|
|
|
|
/* BTF_MAX_ITER pointer suffixes "*" */
|
|
|
|
const char *ptr_suffixes = "**********";
|
|
|
|
const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)];
|
|
|
|
const char *name = NULL, *prefix = "", *parens = "";
|
|
|
|
const struct btf_member *m = show->state.member;
|
2021-12-08 06:47:18 +08:00
|
|
|
const struct btf_type *t;
|
2020-09-28 19:31:04 +08:00
|
|
|
const struct btf_array *array;
|
|
|
|
u32 id = show->state.type_id;
|
|
|
|
const char *member = NULL;
|
|
|
|
bool show_member = false;
|
|
|
|
u64 kinds = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
show->state.name[0] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't show type name if we're showing an array member;
|
|
|
|
* in that case we show the array type so don't need to repeat
|
|
|
|
* ourselves for each member.
|
|
|
|
*/
|
|
|
|
if (show->state.array_member)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
/* Retrieve member name, if any. */
|
|
|
|
if (m) {
|
|
|
|
member = btf_name_by_offset(show->btf, m->name_off);
|
|
|
|
show_member = strlen(member) > 0;
|
|
|
|
id = m->type;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start with type_id, as we have resolved the struct btf_type *
|
|
|
|
* via btf_modifier_show() past the parent typedef to the child
|
|
|
|
* struct, int etc it is defined as. In such cases, the type_id
|
|
|
|
* still represents the starting type while the struct btf_type *
|
|
|
|
* in our show->state points at the resolved type of the typedef.
|
|
|
|
*/
|
|
|
|
t = btf_type_by_id(show->btf, id);
|
|
|
|
if (!t)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The goal here is to build up the right number of pointer and
|
|
|
|
* array suffixes while ensuring the type name for a typedef
|
|
|
|
* is represented. Along the way we accumulate a list of
|
|
|
|
* BTF kinds we have encountered, since these will inform later
|
|
|
|
* display; for example, pointer types will not require an
|
|
|
|
* opening "{" for struct, we will just display the pointer value.
|
|
|
|
*
|
|
|
|
* We also want to accumulate the right number of pointer or array
|
|
|
|
* indices in the format string while iterating until we get to
|
|
|
|
* the typedef/pointee/array member target type.
|
|
|
|
*
|
|
|
|
* We start by pointing at the end of pointer and array suffix
|
|
|
|
* strings; as we accumulate pointers and arrays we move the pointer
|
|
|
|
* or array string backwards so it will show the expected number of
|
|
|
|
* '*' or '[]' for the type. BTF_SHOW_MAX_ITER of nesting of pointers
|
|
|
|
* and/or arrays and typedefs are supported as a precaution.
|
|
|
|
*
|
|
|
|
* We also want to get typedef name while proceeding to resolve
|
|
|
|
* type it points to so that we can add parentheses if it is a
|
|
|
|
* "typedef struct" etc.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < BTF_SHOW_MAX_ITER; i++) {
|
|
|
|
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_TYPEDEF:
|
|
|
|
if (!name)
|
|
|
|
name = btf_name_by_offset(show->btf,
|
|
|
|
t->name_off);
|
|
|
|
kinds |= BTF_KIND_BIT(BTF_KIND_TYPEDEF);
|
|
|
|
id = t->type;
|
|
|
|
break;
|
|
|
|
case BTF_KIND_ARRAY:
|
|
|
|
kinds |= BTF_KIND_BIT(BTF_KIND_ARRAY);
|
|
|
|
parens = "[";
|
|
|
|
if (!t)
|
|
|
|
return "";
|
|
|
|
array = btf_type_array(t);
|
|
|
|
if (array_suffix > array_suffixes)
|
|
|
|
array_suffix -= 2;
|
|
|
|
id = array->type;
|
|
|
|
break;
|
|
|
|
case BTF_KIND_PTR:
|
|
|
|
kinds |= BTF_KIND_BIT(BTF_KIND_PTR);
|
|
|
|
if (ptr_suffix > ptr_suffixes)
|
|
|
|
ptr_suffix -= 1;
|
|
|
|
id = t->type;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
id = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!id)
|
|
|
|
break;
|
|
|
|
t = btf_type_skip_qualifiers(show->btf, id);
|
|
|
|
}
|
|
|
|
/* We may not be able to represent this type; bail to be safe */
|
|
|
|
if (i == BTF_SHOW_MAX_ITER)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
name = btf_name_by_offset(show->btf, t->name_off);
|
|
|
|
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_STRUCT:
|
|
|
|
case BTF_KIND_UNION:
|
|
|
|
prefix = BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT ?
|
|
|
|
"struct" : "union";
|
|
|
|
/* if it's an array of struct/union, parens is already set */
|
|
|
|
if (!(kinds & (BTF_KIND_BIT(BTF_KIND_ARRAY))))
|
|
|
|
parens = "{";
|
|
|
|
break;
|
|
|
|
case BTF_KIND_ENUM:
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
case BTF_KIND_ENUM64:
|
2020-09-28 19:31:04 +08:00
|
|
|
prefix = "enum";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pointer does not require parens */
|
|
|
|
if (kinds & BTF_KIND_BIT(BTF_KIND_PTR))
|
|
|
|
parens = "";
|
|
|
|
/* typedef does not require struct/union/enum prefix */
|
|
|
|
if (kinds & BTF_KIND_BIT(BTF_KIND_TYPEDEF))
|
|
|
|
prefix = "";
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
name = "";
|
|
|
|
|
|
|
|
/* Even if we don't want type name info, we want parentheses etc */
|
|
|
|
if (show->flags & BTF_SHOW_NONAME)
|
|
|
|
snprintf(show->state.name, sizeof(show->state.name), "%s",
|
|
|
|
parens);
|
|
|
|
else
|
|
|
|
snprintf(show->state.name, sizeof(show->state.name),
|
|
|
|
"%s%s%s(%s%s%s%s%s%s)%s",
|
|
|
|
/* first 3 strings comprise ".member = " */
|
|
|
|
show_member ? "." : "",
|
|
|
|
show_member ? member : "",
|
|
|
|
show_member ? " = " : "",
|
|
|
|
/* ...next is our prefix (struct, enum, etc) */
|
|
|
|
prefix,
|
|
|
|
strlen(prefix) > 0 && strlen(name) > 0 ? " " : "",
|
|
|
|
/* ...this is the type name itself */
|
|
|
|
name,
|
|
|
|
/* ...suffixed by the appropriate '*', '[]' suffixes */
|
|
|
|
strlen(ptr_suffix) > 0 ? " " : "", ptr_suffix,
|
|
|
|
array_suffix, parens);
|
|
|
|
|
|
|
|
return show->state.name;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *__btf_show_indent(struct btf_show *show)
|
|
|
|
{
|
|
|
|
const char *indents = " ";
|
|
|
|
const char *indent = &indents[strlen(indents)];
|
|
|
|
|
|
|
|
if ((indent - show->state.depth) >= indents)
|
|
|
|
return indent - show->state.depth;
|
|
|
|
return indents;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *btf_show_indent(struct btf_show *show)
|
|
|
|
{
|
|
|
|
return show->flags & BTF_SHOW_COMPACT ? "" : __btf_show_indent(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *btf_show_newline(struct btf_show *show)
|
|
|
|
{
|
|
|
|
return show->flags & BTF_SHOW_COMPACT ? "" : "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *btf_show_delim(struct btf_show *show)
|
|
|
|
{
|
|
|
|
if (show->state.depth == 0)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
if ((show->flags & BTF_SHOW_COMPACT) && show->state.type &&
|
|
|
|
BTF_INFO_KIND(show->state.type->info) == BTF_KIND_UNION)
|
|
|
|
return "|";
|
|
|
|
|
|
|
|
return ",";
|
|
|
|
}
|
|
|
|
|
|
|
|
__printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!show->state.depth_check) {
|
|
|
|
va_start(args, fmt);
|
|
|
|
show->showfn(show, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Macros are used here as btf_show_type_value[s]() prepends and appends
|
|
|
|
* format specifiers to the format specifier passed in; these do the work of
|
|
|
|
* adding indentation, delimiters etc while the caller simply has to specify
|
|
|
|
* the type value(s) in the format specifier + value(s).
|
|
|
|
*/
|
|
|
|
#define btf_show_type_value(show, fmt, value) \
|
|
|
|
do { \
|
2022-07-14 18:03:22 +08:00
|
|
|
if ((value) != (__typeof__(value))0 || \
|
|
|
|
(show->flags & BTF_SHOW_ZERO) || \
|
2020-09-28 19:31:04 +08:00
|
|
|
show->state.depth == 0) { \
|
|
|
|
btf_show(show, "%s%s" fmt "%s%s", \
|
|
|
|
btf_show_indent(show), \
|
|
|
|
btf_show_name(show), \
|
|
|
|
value, btf_show_delim(show), \
|
|
|
|
btf_show_newline(show)); \
|
|
|
|
if (show->state.depth > show->state.depth_to_show) \
|
|
|
|
show->state.depth_to_show = show->state.depth; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define btf_show_type_values(show, fmt, ...) \
|
|
|
|
do { \
|
|
|
|
btf_show(show, "%s%s" fmt "%s%s", btf_show_indent(show), \
|
|
|
|
btf_show_name(show), \
|
|
|
|
__VA_ARGS__, btf_show_delim(show), \
|
|
|
|
btf_show_newline(show)); \
|
|
|
|
if (show->state.depth > show->state.depth_to_show) \
|
|
|
|
show->state.depth_to_show = show->state.depth; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/* How much is left to copy to safe buffer after @data? */
|
|
|
|
static int btf_show_obj_size_left(struct btf_show *show, void *data)
|
|
|
|
{
|
|
|
|
return show->obj.head + show->obj.size - data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Is object pointed to by @data of @size already copied to our safe buffer? */
|
|
|
|
static bool btf_show_obj_is_safe(struct btf_show *show, void *data, int size)
|
|
|
|
{
|
|
|
|
return data >= show->obj.data &&
|
|
|
|
(data + size) < (show->obj.data + BTF_SHOW_OBJ_SAFE_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If object pointed to by @data of @size falls within our safe buffer, return
|
|
|
|
* the equivalent pointer to the same safe data. Assumes
|
|
|
|
* copy_from_kernel_nofault() has already happened and our safe buffer is
|
|
|
|
* populated.
|
|
|
|
*/
|
|
|
|
static void *__btf_show_obj_safe(struct btf_show *show, void *data, int size)
|
|
|
|
{
|
|
|
|
if (btf_show_obj_is_safe(show, data, size))
|
|
|
|
return show->obj.safe + (data - show->obj.data);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return a safe-to-access version of data pointed to by @data.
|
|
|
|
* We do this by copying the relevant amount of information
|
|
|
|
* to the struct btf_show obj.safe buffer using copy_from_kernel_nofault().
|
|
|
|
*
|
|
|
|
* If BTF_SHOW_UNSAFE is specified, just return data as-is; no
|
|
|
|
* safe copy is needed.
|
|
|
|
*
|
|
|
|
* Otherwise we need to determine if we have the required amount
|
|
|
|
* of data (determined by the @data pointer and the size of the
|
|
|
|
* largest base type we can encounter (represented by
|
|
|
|
* BTF_SHOW_OBJ_BASE_TYPE_SIZE). Having that much data ensures
|
|
|
|
* that we will be able to print some of the current object,
|
|
|
|
* and if more is needed a copy will be triggered.
|
|
|
|
* Some objects such as structs will not fit into the buffer;
|
|
|
|
* in such cases additional copies when we iterate over their
|
|
|
|
* members may be needed.
|
|
|
|
*
|
|
|
|
* btf_show_obj_safe() is used to return a safe buffer for
|
|
|
|
* btf_show_start_type(); this ensures that as we recurse into
|
|
|
|
* nested types we always have safe data for the given type.
|
|
|
|
* This approach is somewhat wasteful; it's possible for example
|
|
|
|
* that when iterating over a large union we'll end up copying the
|
|
|
|
* same data repeatedly, but the goal is safety not performance.
|
|
|
|
* We use stack data as opposed to per-CPU buffers because the
|
|
|
|
* iteration over a type can take some time, and preemption handling
|
|
|
|
* would greatly complicate use of the safe buffer.
|
|
|
|
*/
|
|
|
|
static void *btf_show_obj_safe(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
const struct btf_type *rt;
|
|
|
|
int size_left, size;
|
|
|
|
void *safe = NULL;
|
|
|
|
|
|
|
|
if (show->flags & BTF_SHOW_UNSAFE)
|
|
|
|
return data;
|
|
|
|
|
|
|
|
rt = btf_resolve_size(show->btf, t, &size);
|
|
|
|
if (IS_ERR(rt)) {
|
|
|
|
show->state.status = PTR_ERR(rt);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is this toplevel object? If so, set total object size and
|
|
|
|
* initialize pointers. Otherwise check if we still fall within
|
|
|
|
* our safe object data.
|
|
|
|
*/
|
|
|
|
if (show->state.depth == 0) {
|
|
|
|
show->obj.size = size;
|
|
|
|
show->obj.head = data;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If the size of the current object is > our remaining
|
|
|
|
* safe buffer we _may_ need to do a new copy. However
|
|
|
|
* consider the case of a nested struct; it's size pushes
|
|
|
|
* us over the safe buffer limit, but showing any individual
|
|
|
|
* struct members does not. In such cases, we don't need
|
|
|
|
* to initiate a fresh copy yet; however we definitely need
|
|
|
|
* at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes left
|
|
|
|
* in our buffer, regardless of the current object size.
|
|
|
|
* The logic here is that as we resolve types we will
|
|
|
|
* hit a base type at some point, and we need to be sure
|
|
|
|
* the next chunk of data is safely available to display
|
|
|
|
* that type info safely. We cannot rely on the size of
|
|
|
|
* the current object here because it may be much larger
|
|
|
|
* than our current buffer (e.g. task_struct is 8k).
|
|
|
|
* All we want to do here is ensure that we can print the
|
|
|
|
* next basic type, which we can if either
|
|
|
|
* - the current type size is within the safe buffer; or
|
|
|
|
* - at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes are left in
|
|
|
|
* the safe buffer.
|
|
|
|
*/
|
|
|
|
safe = __btf_show_obj_safe(show, data,
|
|
|
|
min(size,
|
|
|
|
BTF_SHOW_OBJ_BASE_TYPE_SIZE));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need a new copy to our safe object, either because we haven't
|
2021-05-25 10:56:59 +08:00
|
|
|
* yet copied and are initializing safe data, or because the data
|
2020-09-28 19:31:04 +08:00
|
|
|
* we want falls outside the boundaries of the safe object.
|
|
|
|
*/
|
|
|
|
if (!safe) {
|
|
|
|
size_left = btf_show_obj_size_left(show, data);
|
|
|
|
if (size_left > BTF_SHOW_OBJ_SAFE_SIZE)
|
|
|
|
size_left = BTF_SHOW_OBJ_SAFE_SIZE;
|
|
|
|
show->state.status = copy_from_kernel_nofault(show->obj.safe,
|
|
|
|
data, size_left);
|
|
|
|
if (!show->state.status) {
|
|
|
|
show->obj.data = data;
|
|
|
|
safe = show->obj.safe;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return safe;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the type we are starting to show and return a safe data pointer
|
|
|
|
* to be used for showing the associated data.
|
|
|
|
*/
|
|
|
|
static void *btf_show_start_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id, void *data)
|
|
|
|
{
|
|
|
|
show->state.type = t;
|
|
|
|
show->state.type_id = type_id;
|
|
|
|
show->state.name[0] = '\0';
|
|
|
|
|
|
|
|
return btf_show_obj_safe(show, t, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_type(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.type = NULL;
|
|
|
|
show->state.type_id = 0;
|
|
|
|
show->state.name[0] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *btf_show_start_aggr_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id, void *data)
|
|
|
|
{
|
|
|
|
void *safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
|
|
|
|
if (!safe_data)
|
|
|
|
return safe_data;
|
|
|
|
|
|
|
|
btf_show(show, "%s%s%s", btf_show_indent(show),
|
|
|
|
btf_show_name(show),
|
|
|
|
btf_show_newline(show));
|
|
|
|
show->state.depth++;
|
|
|
|
return safe_data;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_aggr_type(struct btf_show *show,
|
|
|
|
const char *suffix)
|
|
|
|
{
|
|
|
|
show->state.depth--;
|
|
|
|
btf_show(show, "%s%s%s%s", btf_show_indent(show), suffix,
|
|
|
|
btf_show_delim(show), btf_show_newline(show));
|
|
|
|
btf_show_end_type(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_start_member(struct btf_show *show,
|
|
|
|
const struct btf_member *m)
|
|
|
|
{
|
|
|
|
show->state.member = m;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_start_array_member(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.array_member = 1;
|
|
|
|
btf_show_start_member(show, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_member(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.member = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_array_member(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.array_member = 0;
|
|
|
|
btf_show_end_member(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *btf_show_start_array_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id,
|
|
|
|
u16 array_encoding,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
show->state.array_encoding = array_encoding;
|
|
|
|
show->state.array_terminated = 0;
|
|
|
|
return btf_show_start_aggr_type(show, t, type_id, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_array_type(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.array_encoding = 0;
|
|
|
|
show->state.array_terminated = 0;
|
|
|
|
btf_show_end_aggr_type(show, "]");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *btf_show_start_struct_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
return btf_show_start_aggr_type(show, t, type_id, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_struct_type(struct btf_show *show)
|
|
|
|
{
|
|
|
|
btf_show_end_aggr_type(show, "}");
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
bool log_details,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
2023-01-07 10:53:31 +08:00
|
|
|
if (log->level == BPF_LOG_KERNEL) {
|
|
|
|
/* btf verifier prints all types it is processing via
|
|
|
|
* btf_verifier_log_type(..., fmt = NULL).
|
|
|
|
* Skip those prints for in-kernel BTF verification.
|
|
|
|
*/
|
|
|
|
if (!fmt)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Skip logging when loading module BTF with mismatches permitted */
|
|
|
|
if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
|
|
|
|
return;
|
|
|
|
}
|
2019-10-16 11:24:57 +08:00
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
__btf_verifier_log(log, "[%u] %s %s%s",
|
|
|
|
env->log_type_id,
|
2022-09-17 04:28:00 +08:00
|
|
|
btf_type_str(t),
|
2018-12-14 02:41:46 +08:00
|
|
|
__btf_name_by_offset(btf, t->name_off),
|
2018-04-19 06:55:57 +08:00
|
|
|
log_details ? " " : "");
|
|
|
|
|
|
|
|
if (log_details)
|
|
|
|
btf_type_ops(t)->log_details(env, t);
|
|
|
|
|
|
|
|
if (fmt && *fmt) {
|
|
|
|
__btf_verifier_log(log, " ");
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
#define btf_verifier_log_type(env, t, ...) \
|
|
|
|
__btf_verifier_log_type((env), (t), true, __VA_ARGS__)
|
|
|
|
#define btf_verifier_log_basic(env, t, ...) \
|
|
|
|
__btf_verifier_log_type((env), (t), false, __VA_ARGS__)
|
|
|
|
|
|
|
|
__printf(4, 5)
|
|
|
|
static void btf_verifier_log_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
2023-01-07 10:53:31 +08:00
|
|
|
if (log->level == BPF_LOG_KERNEL) {
|
|
|
|
if (!fmt)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Skip logging when loading module BTF with mismatches permitted */
|
|
|
|
if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
/* The CHECK_META phase already did a btf dump.
|
|
|
|
*
|
|
|
|
* If member is logged again, it must hit an error in
|
|
|
|
* parsing this member. It is useful to print out which
|
|
|
|
* struct this member belongs to.
|
|
|
|
*/
|
|
|
|
if (env->phase != CHECK_META)
|
|
|
|
btf_verifier_log_type(env, struct_type, NULL);
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(struct_type))
|
|
|
|
__btf_verifier_log(log,
|
|
|
|
"\t%s type_id=%u bitfield_size=%u bits_offset=%u",
|
|
|
|
__btf_name_by_offset(btf, member->name_off),
|
|
|
|
member->type,
|
|
|
|
BTF_MEMBER_BITFIELD_SIZE(member->offset),
|
|
|
|
BTF_MEMBER_BIT_OFFSET(member->offset));
|
|
|
|
else
|
|
|
|
__btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u",
|
|
|
|
__btf_name_by_offset(btf, member->name_off),
|
|
|
|
member->type, member->offset);
|
2018-04-19 06:55:57 +08:00
|
|
|
|
|
|
|
if (fmt && *fmt) {
|
|
|
|
__btf_verifier_log(log, " ");
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\n");
|
|
|
|
}
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
__printf(4, 5)
|
|
|
|
static void btf_verifier_log_vsi(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *datasec_type,
|
|
|
|
const struct btf_var_secinfo *vsi,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
2019-10-16 11:24:57 +08:00
|
|
|
if (log->level == BPF_LOG_KERNEL && !fmt)
|
|
|
|
return;
|
2019-04-10 05:20:09 +08:00
|
|
|
if (env->phase != CHECK_META)
|
|
|
|
btf_verifier_log_type(env, datasec_type, NULL);
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
|
|
|
|
vsi->type, vsi->offset, vsi->size);
|
|
|
|
if (fmt && *fmt) {
|
|
|
|
__btf_verifier_log(log, " ");
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\n");
|
|
|
|
}
|
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
|
|
|
|
u32 btf_data_size)
|
2018-04-19 06:55:57 +08:00
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
const struct btf *btf = env->btf;
|
|
|
|
const struct btf_header *hdr;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
2019-10-16 11:24:57 +08:00
|
|
|
if (log->level == BPF_LOG_KERNEL)
|
|
|
|
return;
|
2018-05-23 05:57:18 +08:00
|
|
|
hdr = &btf->hdr;
|
2018-04-19 06:55:57 +08:00
|
|
|
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
|
|
|
|
__btf_verifier_log(log, "version: %u\n", hdr->version);
|
|
|
|
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
|
2018-05-23 05:57:18 +08:00
|
|
|
__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
|
2018-04-19 06:55:57 +08:00
|
|
|
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
|
2018-05-23 05:57:18 +08:00
|
|
|
__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
|
2018-04-19 06:55:57 +08:00
|
|
|
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
|
|
|
|
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
|
2018-05-23 05:57:18 +08:00
|
|
|
__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
|
2018-04-19 06:55:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
if (btf->types_size == btf->nr_types) {
|
2018-04-19 06:55:57 +08:00
|
|
|
/* Expand 'types' array */
|
|
|
|
|
|
|
|
struct btf_type **new_types;
|
|
|
|
u32 expand_by, new_size;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
if (btf->start_id + btf->types_size == BTF_MAX_TYPE) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log(env, "Exceeded max num of types");
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
|
|
|
expand_by = max_t(u32, btf->types_size >> 2, 16);
|
2018-05-23 05:57:20 +08:00
|
|
|
new_size = min_t(u32, BTF_MAX_TYPE,
|
2018-04-19 06:55:57 +08:00
|
|
|
btf->types_size + expand_by);
|
|
|
|
|
treewide: kvzalloc() -> kvcalloc()
The kvzalloc() function has a 2-factor argument form, kvcalloc(). This
patch replaces cases of:
kvzalloc(a * b, gfp)
with:
kvcalloc(a * b, gfp)
as well as handling cases of:
kvzalloc(a * b * c, gfp)
with:
kvzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kvcalloc(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kvzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kvzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kvzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kvzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kvzalloc
+ kvcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kvzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kvzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kvzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kvzalloc(C1 * C2 * C3, ...)
|
kvzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kvzalloc(sizeof(THING) * C2, ...)
|
kvzalloc(sizeof(TYPE) * C2, ...)
|
kvzalloc(C1 * C2 * C3, ...)
|
kvzalloc(C1 * C2, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kvzalloc
+ kvcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kvzalloc
+ kvcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 05:04:48 +08:00
|
|
|
new_types = kvcalloc(new_size, sizeof(*new_types),
|
2018-04-19 06:55:57 +08:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!new_types)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
if (btf->nr_types == 0) {
|
|
|
|
if (!btf->base_btf) {
|
|
|
|
/* lazily init VOID type */
|
|
|
|
new_types[0] = &btf_void;
|
|
|
|
btf->nr_types++;
|
|
|
|
}
|
|
|
|
} else {
|
2018-04-19 06:55:57 +08:00
|
|
|
memcpy(new_types, btf->types,
|
2020-11-10 09:19:28 +08:00
|
|
|
sizeof(*btf->types) * btf->nr_types);
|
|
|
|
}
|
2018-04-19 06:55:57 +08:00
|
|
|
|
|
|
|
kvfree(btf->types);
|
|
|
|
btf->types = new_types;
|
|
|
|
btf->types_size = new_size;
|
|
|
|
}
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
btf->types[btf->nr_types++] = t;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-05 05:49:51 +08:00
|
|
|
static int btf_alloc_id(struct btf *btf)
|
|
|
|
{
|
|
|
|
int id;
|
|
|
|
|
|
|
|
idr_preload(GFP_KERNEL);
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
|
|
|
|
if (id > 0)
|
|
|
|
btf->id = id;
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
idr_preload_end();
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(!id))
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
return id > 0 ? 0 : id;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_free_id(struct btf *btf)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In map-in-map, calling map_delete_elem() on outer
|
|
|
|
* map will call bpf_map_put on the inner map.
|
|
|
|
* It will then eventually call btf_free_id()
|
|
|
|
* on the inner map. Some of the map_delete_elem()
|
|
|
|
* implementation may have irq disabled, so
|
|
|
|
* we need to use the _irqsave() version instead
|
|
|
|
* of the _bh() version.
|
|
|
|
*/
|
|
|
|
spin_lock_irqsave(&btf_idr_lock, flags);
|
|
|
|
idr_remove(&btf_idr, btf->id);
|
|
|
|
spin_unlock_irqrestore(&btf_idr_lock, flags);
|
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
static void btf_free_kfunc_set_tab(struct btf *btf)
|
|
|
|
{
|
|
|
|
struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
|
2022-07-21 21:42:35 +08:00
|
|
|
int hook;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
|
|
|
|
if (!tab)
|
|
|
|
return;
|
2022-07-21 21:42:35 +08:00
|
|
|
for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++)
|
|
|
|
kfree(tab->sets[hook]);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
kfree(tab);
|
|
|
|
btf->kfunc_set_tab = NULL;
|
|
|
|
}
|
|
|
|
|
2022-04-25 05:48:54 +08:00
|
|
|
static void btf_free_dtor_kfunc_tab(struct btf *btf)
|
|
|
|
{
|
|
|
|
struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
|
|
|
|
|
|
|
|
if (!tab)
|
|
|
|
return;
|
|
|
|
kfree(tab);
|
|
|
|
btf->dtor_kfunc_tab = NULL;
|
|
|
|
}
|
|
|
|
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
static void btf_struct_metas_free(struct btf_struct_metas *tab)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!tab)
|
|
|
|
return;
|
bpf: Remove btf_field_offs, use btf_record's fields instead
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.
This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.
Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:
if (btf_record_successfully_initialized) {
foffs = btf_parse_field_offs(rec);
if (IS_ERR_OR_NULL(foffs))
// free the btf_record and return err
}
Other changes in this patch are pretty mechanical:
* foffs->field_off[i] -> rec->fields[i].offset
* foffs->field_sz[i] -> rec->fields[i].size
* Sort rec->fields in btf_parse_fields before returning
* It's possible that this is necessary independently of other
changes in this patch. btf_record_find in syscall.c expects
btf_record's fields to be sorted by offset, yet there's no
explicit sorting of them before this patch, record's fields are
populated in the order they're read from BTF struct definition.
BTF docs don't say anything about the sortedness of struct fields.
* All functions taking struct btf_field_offs * input now instead take
struct btf_record *. All callsites of these functions already have
access to the correct btf_record.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:03 +08:00
|
|
|
for (i = 0; i < tab->cnt; i++)
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
btf_record_free(tab->types[i].record);
|
|
|
|
kfree(tab);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_free_struct_meta_tab(struct btf *btf)
|
|
|
|
{
|
|
|
|
struct btf_struct_metas *tab = btf->struct_meta_tab;
|
|
|
|
|
|
|
|
btf_struct_metas_free(tab);
|
|
|
|
btf->struct_meta_tab = NULL;
|
|
|
|
}
|
|
|
|
|
2024-01-20 06:49:55 +08:00
|
|
|
static void btf_free_struct_ops_tab(struct btf *btf)
|
|
|
|
{
|
|
|
|
struct btf_struct_ops_tab *tab = btf->struct_ops_tab;
|
2024-02-09 10:37:49 +08:00
|
|
|
u32 i;
|
|
|
|
|
|
|
|
if (!tab)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < tab->cnt; i++)
|
|
|
|
bpf_struct_ops_desc_release(&tab->ops[i]);
|
2024-01-20 06:49:55 +08:00
|
|
|
|
|
|
|
kfree(tab);
|
|
|
|
btf->struct_ops_tab = NULL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static void btf_free(struct btf *btf)
|
|
|
|
{
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
btf_free_struct_meta_tab(btf);
|
2022-04-25 05:48:54 +08:00
|
|
|
btf_free_dtor_kfunc_tab(btf);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
btf_free_kfunc_set_tab(btf);
|
2024-01-20 06:49:55 +08:00
|
|
|
btf_free_struct_ops_tab(btf);
|
2018-04-19 06:55:57 +08:00
|
|
|
kvfree(btf->types);
|
2018-04-19 06:55:58 +08:00
|
|
|
kvfree(btf->resolved_sizes);
|
|
|
|
kvfree(btf->resolved_ids);
|
2024-06-20 17:17:31 +08:00
|
|
|
/* vmlinux does not allocate btf->data, it simply points it at
|
|
|
|
* __start_BTF.
|
|
|
|
*/
|
|
|
|
if (!btf_is_vmlinux(btf))
|
|
|
|
kvfree(btf->data);
|
|
|
|
kvfree(btf->base_id_map);
|
2018-04-19 06:55:57 +08:00
|
|
|
kfree(btf);
|
|
|
|
}
|
|
|
|
|
2018-05-05 05:49:51 +08:00
|
|
|
static void btf_free_rcu(struct rcu_head *rcu)
|
2018-04-19 06:56:01 +08:00
|
|
|
{
|
2018-05-05 05:49:51 +08:00
|
|
|
struct btf *btf = container_of(rcu, struct btf, rcu);
|
|
|
|
|
|
|
|
btf_free(btf);
|
2018-04-19 06:56:01 +08:00
|
|
|
}
|
|
|
|
|
2024-01-20 06:49:52 +08:00
|
|
|
const char *btf_get_name(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return btf->name;
|
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
void btf_get(struct btf *btf)
|
|
|
|
{
|
|
|
|
refcount_inc(&btf->refcnt);
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:56:01 +08:00
|
|
|
void btf_put(struct btf *btf)
|
|
|
|
{
|
2018-05-05 05:49:51 +08:00
|
|
|
if (btf && refcount_dec_and_test(&btf->refcnt)) {
|
|
|
|
btf_free_id(btf);
|
|
|
|
call_rcu(&btf->rcu, btf_free_rcu);
|
|
|
|
}
|
2018-04-19 06:56:01 +08:00
|
|
|
}
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
struct btf *btf_base_btf(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return btf->base_btf;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct btf_header *btf_header(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return &btf->hdr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
|
|
|
|
{
|
|
|
|
btf->base_btf = (struct btf *)base_btf;
|
|
|
|
btf->start_id = btf_nr_types(base_btf);
|
|
|
|
btf->start_str_off = base_btf->hdr.str_len;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static int env_resolve_init(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u32 nr_types = btf->nr_types;
|
|
|
|
u32 *resolved_sizes = NULL;
|
|
|
|
u32 *resolved_ids = NULL;
|
|
|
|
u8 *visit_states = NULL;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes),
|
2018-04-19 06:55:58 +08:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!resolved_sizes)
|
|
|
|
goto nomem;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids),
|
2018-04-19 06:55:58 +08:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!resolved_ids)
|
|
|
|
goto nomem;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
visit_states = kvcalloc(nr_types, sizeof(*visit_states),
|
2018-04-19 06:55:58 +08:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!visit_states)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
btf->resolved_sizes = resolved_sizes;
|
|
|
|
btf->resolved_ids = resolved_ids;
|
|
|
|
env->visit_states = visit_states;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nomem:
|
|
|
|
kvfree(resolved_sizes);
|
|
|
|
kvfree(resolved_ids);
|
|
|
|
kvfree(visit_states);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static void btf_verifier_env_free(struct btf_verifier_env *env)
|
|
|
|
{
|
2018-04-19 06:55:58 +08:00
|
|
|
kvfree(env->visit_states);
|
2018-04-19 06:55:57 +08:00
|
|
|
kfree(env);
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *next_type)
|
|
|
|
{
|
|
|
|
switch (env->resolve_mode) {
|
|
|
|
case RESOLVE_TBD:
|
|
|
|
/* int, enum or void is a sink */
|
|
|
|
return !btf_type_needs_resolve(next_type);
|
|
|
|
case RESOLVE_PTR:
|
2018-11-20 07:29:08 +08:00
|
|
|
/* int, enum, void, struct, array, func or func_proto is a sink
|
|
|
|
* for ptr
|
|
|
|
*/
|
2018-04-19 06:55:58 +08:00
|
|
|
return !btf_type_is_modifier(next_type) &&
|
|
|
|
!btf_type_is_ptr(next_type);
|
|
|
|
case RESOLVE_STRUCT_OR_ARRAY:
|
2018-11-20 07:29:08 +08:00
|
|
|
/* int, enum, void, ptr, func or func_proto is a sink
|
|
|
|
* for struct and array
|
|
|
|
*/
|
2018-04-19 06:55:58 +08:00
|
|
|
return !btf_type_is_modifier(next_type) &&
|
|
|
|
!btf_type_is_array(next_type) &&
|
|
|
|
!btf_type_is_struct(next_type);
|
|
|
|
default:
|
2018-05-26 05:33:19 +08:00
|
|
|
BUG();
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool env_type_is_resolved(const struct btf_verifier_env *env,
|
|
|
|
u32 type_id)
|
|
|
|
{
|
2020-11-10 09:19:28 +08:00
|
|
|
/* base BTF types should be resolved by now */
|
|
|
|
if (type_id < env->btf->start_id)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return env->visit_states[type_id - env->btf->start_id] == RESOLVED;
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int env_stack_push(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t, u32 type_id)
|
|
|
|
{
|
2020-11-10 09:19:28 +08:00
|
|
|
const struct btf *btf = env->btf;
|
2018-04-19 06:55:58 +08:00
|
|
|
struct resolve_vertex *v;
|
|
|
|
|
|
|
|
if (env->top_stack == MAX_RESOLVE_DEPTH)
|
|
|
|
return -E2BIG;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
if (type_id < btf->start_id
|
|
|
|
|| env->visit_states[type_id - btf->start_id] != NOT_VISITED)
|
2018-04-19 06:55:58 +08:00
|
|
|
return -EEXIST;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
env->visit_states[type_id - btf->start_id] = VISITED;
|
2018-04-19 06:55:58 +08:00
|
|
|
|
|
|
|
v = &env->stack[env->top_stack++];
|
|
|
|
v->t = t;
|
|
|
|
v->type_id = type_id;
|
|
|
|
v->next_member = 0;
|
|
|
|
|
|
|
|
if (env->resolve_mode == RESOLVE_TBD) {
|
|
|
|
if (btf_type_is_ptr(t))
|
|
|
|
env->resolve_mode = RESOLVE_PTR;
|
|
|
|
else if (btf_type_is_struct(t) || btf_type_is_array(t))
|
|
|
|
env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void env_stack_set_next_member(struct btf_verifier_env *env,
|
|
|
|
u16 next_member)
|
|
|
|
{
|
|
|
|
env->stack[env->top_stack - 1].next_member = next_member;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void env_stack_pop_resolved(struct btf_verifier_env *env,
|
|
|
|
u32 resolved_type_id,
|
|
|
|
u32 resolved_size)
|
|
|
|
{
|
|
|
|
u32 type_id = env->stack[--(env->top_stack)].type_id;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
type_id -= btf->start_id; /* adjust to local type id */
|
2018-04-19 06:55:58 +08:00
|
|
|
btf->resolved_sizes[type_id] = resolved_size;
|
|
|
|
btf->resolved_ids[type_id] = resolved_type_id;
|
|
|
|
env->visit_states[type_id] = RESOLVED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
|
|
|
|
}
|
|
|
|
|
2019-11-08 02:09:03 +08:00
|
|
|
/* Resolve the size of a passed-in "type"
|
|
|
|
*
|
|
|
|
* type: is an array (e.g. u32 array[x][y])
|
|
|
|
* return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY,
|
|
|
|
* *type_size: (x * y * sizeof(u32)). Hence, *type_size always
|
|
|
|
* corresponds to the return type.
|
|
|
|
* *elem_type: u32
|
2020-08-26 03:21:14 +08:00
|
|
|
* *elem_id: id of u32
|
2019-11-08 02:09:03 +08:00
|
|
|
* *total_nelems: (x * y). Hence, individual elem size is
|
|
|
|
* (*type_size / *total_nelems)
|
2020-08-26 03:21:15 +08:00
|
|
|
* *type_id: id of type if it's changed within the function, 0 if not
|
2019-11-08 02:09:03 +08:00
|
|
|
*
|
|
|
|
* type: is not an array (e.g. const struct X)
|
|
|
|
* return type: type "struct X"
|
|
|
|
* *type_size: sizeof(struct X)
|
|
|
|
* *elem_type: same as return type ("struct X")
|
2020-08-26 03:21:14 +08:00
|
|
|
* *elem_id: 0
|
2019-11-08 02:09:03 +08:00
|
|
|
* *total_nelems: 1
|
2020-08-26 03:21:15 +08:00
|
|
|
* *type_id: id of type if it's changed within the function, 0 if not
|
2019-11-08 02:09:03 +08:00
|
|
|
*/
|
2020-08-26 03:21:13 +08:00
|
|
|
static const struct btf_type *
|
|
|
|
__btf_resolve_size(const struct btf *btf, const struct btf_type *type,
|
|
|
|
u32 *type_size, const struct btf_type **elem_type,
|
2020-08-26 03:21:15 +08:00
|
|
|
u32 *elem_id, u32 *total_nelems, u32 *type_id)
|
2019-11-08 02:09:03 +08:00
|
|
|
{
|
|
|
|
const struct btf_type *array_type = NULL;
|
2020-08-26 03:21:14 +08:00
|
|
|
const struct btf_array *array = NULL;
|
2020-08-26 03:21:15 +08:00
|
|
|
u32 i, size, nelems = 1, id = 0;
|
2019-11-08 02:09:03 +08:00
|
|
|
|
|
|
|
for (i = 0; i < MAX_RESOLVE_DEPTH; i++) {
|
|
|
|
switch (BTF_INFO_KIND(type->info)) {
|
|
|
|
/* type->size can be used */
|
|
|
|
case BTF_KIND_INT:
|
|
|
|
case BTF_KIND_STRUCT:
|
|
|
|
case BTF_KIND_UNION:
|
|
|
|
case BTF_KIND_ENUM:
|
2021-02-27 04:22:52 +08:00
|
|
|
case BTF_KIND_FLOAT:
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
case BTF_KIND_ENUM64:
|
2019-11-08 02:09:03 +08:00
|
|
|
size = type->size;
|
|
|
|
goto resolved;
|
|
|
|
|
|
|
|
case BTF_KIND_PTR:
|
|
|
|
size = sizeof(void *);
|
|
|
|
goto resolved;
|
|
|
|
|
|
|
|
/* Modifiers */
|
|
|
|
case BTF_KIND_TYPEDEF:
|
|
|
|
case BTF_KIND_VOLATILE:
|
|
|
|
case BTF_KIND_CONST:
|
|
|
|
case BTF_KIND_RESTRICT:
|
2021-11-12 09:26:09 +08:00
|
|
|
case BTF_KIND_TYPE_TAG:
|
2020-08-26 03:21:15 +08:00
|
|
|
id = type->type;
|
2019-11-08 02:09:03 +08:00
|
|
|
type = btf_type_by_id(btf, type->type);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BTF_KIND_ARRAY:
|
|
|
|
if (!array_type)
|
|
|
|
array_type = type;
|
|
|
|
array = btf_type_array(type);
|
|
|
|
if (nelems && array->nelems > U32_MAX / nelems)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
nelems *= array->nelems;
|
|
|
|
type = btf_type_by_id(btf, array->type);
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* type without size */
|
|
|
|
default:
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
resolved:
|
|
|
|
if (nelems && size > U32_MAX / nelems)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
*type_size = nelems * size;
|
bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS
The patch introduces BPF_MAP_TYPE_STRUCT_OPS. The map value
is a kernel struct with its func ptr implemented in bpf prog.
This new map is the interface to register/unregister/introspect
a bpf implemented kernel struct.
The kernel struct is actually embedded inside another new struct
(or called the "value" struct in the code). For example,
"struct tcp_congestion_ops" is embbeded in:
struct bpf_struct_ops_tcp_congestion_ops {
refcount_t refcnt;
enum bpf_struct_ops_state state;
struct tcp_congestion_ops data; /* <-- kernel subsystem struct here */
}
The map value is "struct bpf_struct_ops_tcp_congestion_ops".
The "bpftool map dump" will then be able to show the
state ("inuse"/"tobefree") and the number of subsystem's refcnt (e.g.
number of tcp_sock in the tcp_congestion_ops case). This "value" struct
is created automatically by a macro. Having a separate "value" struct
will also make extending "struct bpf_struct_ops_XYZ" easier (e.g. adding
"void (*init)(void)" to "struct bpf_struct_ops_XYZ" to do some
initialization works before registering the struct_ops to the kernel
subsystem). The libbpf will take care of finding and populating the
"struct bpf_struct_ops_XYZ" from "struct XYZ".
Register a struct_ops to a kernel subsystem:
1. Load all needed BPF_PROG_TYPE_STRUCT_OPS prog(s)
2. Create a BPF_MAP_TYPE_STRUCT_OPS with attr->btf_vmlinux_value_type_id
set to the btf id "struct bpf_struct_ops_tcp_congestion_ops" of the
running kernel.
Instead of reusing the attr->btf_value_type_id,
btf_vmlinux_value_type_id s added such that attr->btf_fd can still be
used as the "user" btf which could store other useful sysadmin/debug
info that may be introduced in the furture,
e.g. creation-date/compiler-details/map-creator...etc.
3. Create a "struct bpf_struct_ops_tcp_congestion_ops" object as described
in the running kernel btf. Populate the value of this object.
The function ptr should be populated with the prog fds.
4. Call BPF_MAP_UPDATE with the object created in (3) as
the map value. The key is always "0".
During BPF_MAP_UPDATE, the code that saves the kernel-func-ptr's
args as an array of u64 is generated. BPF_MAP_UPDATE also allows
the specific struct_ops to do some final checks in "st_ops->init_member()"
(e.g. ensure all mandatory func ptrs are implemented).
If everything looks good, it will register this kernel struct
to the kernel subsystem. The map will not allow further update
from this point.
Unregister a struct_ops from the kernel subsystem:
BPF_MAP_DELETE with key "0".
Introspect a struct_ops:
BPF_MAP_LOOKUP_ELEM with key "0". The map value returned will
have the prog _id_ populated as the func ptr.
The map value state (enum bpf_struct_ops_state) will transit from:
INIT (map created) =>
INUSE (map updated, i.e. reg) =>
TOBEFREE (map value deleted, i.e. unreg)
The kernel subsystem needs to call bpf_struct_ops_get() and
bpf_struct_ops_put() to manage the "refcnt" in the
"struct bpf_struct_ops_XYZ". This patch uses a separate refcnt
for the purose of tracking the subsystem usage. Another approach
is to reuse the map->refcnt and then "show" (i.e. during map_lookup)
the subsystem's usage by doing map->refcnt - map->usercnt to filter out
the map-fd/pinned-map usage. However, that will also tie down the
future semantics of map->refcnt and map->usercnt.
The very first subsystem's refcnt (during reg()) holds one
count to map->refcnt. When the very last subsystem's refcnt
is gone, it will also release the map->refcnt. All bpf_prog will be
freed when the map->refcnt reaches 0 (i.e. during map_free()).
Here is how the bpftool map command will look like:
[root@arch-fb-vm1 bpf]# bpftool map show
6: struct_ops name dctcp flags 0x0
key 4B value 256B max_entries 1 memlock 4096B
btf_id 6
[root@arch-fb-vm1 bpf]# bpftool map dump id 6
[{
"value": {
"refcnt": {
"refs": {
"counter": 1
}
},
"state": 1,
"data": {
"list": {
"next": 0,
"prev": 0
},
"key": 0,
"flags": 2,
"init": 24,
"release": 0,
"ssthresh": 25,
"cong_avoid": 30,
"set_state": 27,
"cwnd_event": 28,
"in_ack_event": 26,
"undo_cwnd": 29,
"pkts_acked": 0,
"min_tso_segs": 0,
"sndbuf_expand": 0,
"cong_control": 0,
"get_info": 0,
"name": [98,112,102,95,100,99,116,99,112,0,0,0,0,0,0,0
],
"owner": 0
}
}
}
]
Misc Notes:
* bpf_struct_ops_map_sys_lookup_elem() is added for syscall lookup.
It does an inplace update on "*value" instead returning a pointer
to syscall.c. Otherwise, it needs a separate copy of "zero" value
for the BPF_STRUCT_OPS_STATE_INIT to avoid races.
* The bpf_struct_ops_map_delete_elem() is also called without
preempt_disable() from map_delete_elem(). It is because
the "->unreg()" may requires sleepable context, e.g.
the "tcp_unregister_congestion_control()".
* "const" is added to some of the existing "struct btf_func_model *"
function arg to avoid a compiler warning caused by this patch.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200109003505.3855919-1-kafai@fb.com
2020-01-09 08:35:05 +08:00
|
|
|
if (total_nelems)
|
|
|
|
*total_nelems = nelems;
|
|
|
|
if (elem_type)
|
|
|
|
*elem_type = type;
|
2020-08-26 03:21:14 +08:00
|
|
|
if (elem_id)
|
|
|
|
*elem_id = array ? array->type : 0;
|
2020-08-26 03:21:15 +08:00
|
|
|
if (type_id && id)
|
|
|
|
*type_id = id;
|
2019-11-08 02:09:03 +08:00
|
|
|
|
|
|
|
return array_type ? : type;
|
|
|
|
}
|
|
|
|
|
2020-08-26 03:21:13 +08:00
|
|
|
const struct btf_type *
|
|
|
|
btf_resolve_size(const struct btf *btf, const struct btf_type *type,
|
|
|
|
u32 *type_size)
|
|
|
|
{
|
2020-08-26 03:21:15 +08:00
|
|
|
return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL);
|
2020-08-26 03:21:13 +08:00
|
|
|
}
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id)
|
|
|
|
{
|
|
|
|
while (type_id < btf->start_id)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
return btf->resolved_ids[type_id - btf->start_id];
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
/* The input param "type_id" must point to a needs_resolve type */
|
|
|
|
static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
|
|
|
|
u32 *type_id)
|
|
|
|
{
|
2020-11-10 09:19:28 +08:00
|
|
|
*type_id = btf_resolved_type_id(btf, *type_id);
|
2018-04-19 06:55:58 +08:00
|
|
|
return btf_type_by_id(btf, *type_id);
|
|
|
|
}
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id)
|
|
|
|
{
|
|
|
|
while (type_id < btf->start_id)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
return btf->resolved_sizes[type_id - btf->start_id];
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
const struct btf_type *btf_type_id_size(const struct btf *btf,
|
|
|
|
u32 *type_id, u32 *ret_size)
|
|
|
|
{
|
|
|
|
const struct btf_type *size_type;
|
|
|
|
u32 size_type_id = *type_id;
|
|
|
|
u32 size = 0;
|
|
|
|
|
|
|
|
size_type = btf_type_by_id(btf, size_type_id);
|
2018-11-20 07:29:06 +08:00
|
|
|
if (btf_type_nosize_or_null(size_type))
|
2018-04-19 06:55:58 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (btf_type_has_size(size_type)) {
|
|
|
|
size = size_type->size;
|
|
|
|
} else if (btf_type_is_array(size_type)) {
|
2020-11-10 09:19:28 +08:00
|
|
|
size = btf_resolved_type_size(btf, size_type_id);
|
2018-04-19 06:55:58 +08:00
|
|
|
} else if (btf_type_is_ptr(size_type)) {
|
|
|
|
size = sizeof(void *);
|
|
|
|
} else {
|
2019-04-10 05:20:09 +08:00
|
|
|
if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
|
|
|
|
!btf_type_is_var(size_type)))
|
2018-04-19 06:55:58 +08:00
|
|
|
return NULL;
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
size_type_id = btf_resolved_type_id(btf, size_type_id);
|
2018-04-19 06:55:58 +08:00
|
|
|
size_type = btf_type_by_id(btf, size_type_id);
|
2018-11-20 07:29:06 +08:00
|
|
|
if (btf_type_nosize_or_null(size_type))
|
2018-04-19 06:55:58 +08:00
|
|
|
return NULL;
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-13 01:25:55 +08:00
|
|
|
else if (btf_type_has_size(size_type))
|
|
|
|
size = size_type->size;
|
|
|
|
else if (btf_type_is_array(size_type))
|
2020-11-10 09:19:28 +08:00
|
|
|
size = btf_resolved_type_size(btf, size_type_id);
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-13 01:25:55 +08:00
|
|
|
else if (btf_type_is_ptr(size_type))
|
|
|
|
size = sizeof(void *);
|
|
|
|
else
|
|
|
|
return NULL;
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
*type_id = size_type_id;
|
|
|
|
if (ret_size)
|
|
|
|
*ret_size = size;
|
|
|
|
|
|
|
|
return size_type;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:59 +08:00
|
|
|
static int btf_df_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
btf_verifier_log_basic(env, struct_type,
|
|
|
|
"Unsupported check_member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
static int btf_df_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
btf_verifier_log_basic(env, struct_type,
|
|
|
|
"Unsupported check_kflag_member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-02-27 04:22:52 +08:00
|
|
|
/* Used for ptr, array struct/union and float type members.
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
* int, enum and modifier types have their specific callback functions.
|
|
|
|
*/
|
|
|
|
static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
if (BTF_MEMBER_BITFIELD_SIZE(member->offset)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member bitfield_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* bitfield size is 0, so member->offset represents bit offset only.
|
|
|
|
* It is safe to call non kflag check_member variants.
|
|
|
|
*/
|
|
|
|
return btf_type_ops(member_type)->check_member(env, struct_type,
|
|
|
|
member,
|
|
|
|
member_type);
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static int btf_df_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
btf_verifier_log_basic(env, v->t, "Unsupported resolve");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_df_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offsets,
|
|
|
|
struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show(show, "<unsupported kind:%u>", BTF_INFO_KIND(t->info));
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:59 +08:00
|
|
|
static int btf_int_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 int_data = btf_type_int(member_type);
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size = struct_type->size;
|
|
|
|
u32 nr_copy_bits;
|
|
|
|
u32 bytes_offset;
|
|
|
|
|
|
|
|
if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"bits_offset exceeds U32_MAX");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_bits_off += BTF_INT_OFFSET(int_data);
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
nr_copy_bits = BTF_INT_BITS(int_data) +
|
|
|
|
BITS_PER_BYTE_MASKED(struct_bits_off);
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
if (nr_copy_bits > BITS_PER_U128) {
|
2018-04-19 06:55:59 +08:00
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
"nr_copy_bits exceeds 128");
|
2018-04-19 06:55:59 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (struct_size < bytes_offset ||
|
|
|
|
struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
static int btf_int_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off, nr_bits, nr_int_data_bits, bytes_offset;
|
|
|
|
u32 int_data = btf_type_int(member_type);
|
|
|
|
u32 struct_size = struct_type->size;
|
|
|
|
u32 nr_copy_bits;
|
|
|
|
|
|
|
|
/* a regular int type is required for the kflag int member */
|
|
|
|
if (!btf_type_int_is_regular(member_type)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member base type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check sanity of bitfield size */
|
|
|
|
nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset);
|
|
|
|
struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset);
|
|
|
|
nr_int_data_bits = BTF_INT_BITS(int_data);
|
|
|
|
if (!nr_bits) {
|
|
|
|
/* Not a bitfield member, member offset must be at byte
|
|
|
|
* boundary.
|
|
|
|
*/
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
nr_bits = nr_int_data_bits;
|
|
|
|
} else if (nr_bits > nr_int_data_bits) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member bitfield_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
if (nr_copy_bits > BITS_PER_U128) {
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
"nr_copy_bits exceeds 128");
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (struct_size < bytes_offset ||
|
|
|
|
struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static s32 btf_int_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
u32 int_data, nr_bits, meta_needed = sizeof(int_data);
|
|
|
|
u16 encoding;
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
int_data = btf_type_int(t);
|
2018-05-23 05:57:20 +08:00
|
|
|
if (int_data & ~BTF_INT_MASK) {
|
|
|
|
btf_verifier_log_basic(env, t, "Invalid int_data:%x",
|
|
|
|
int_data);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
if (nr_bits > BITS_PER_U128) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
BITS_PER_U128);
|
2018-04-19 06:55:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) {
|
|
|
|
btf_verifier_log_type(env, t, "nr_bits exceeds type_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 05:57:20 +08:00
|
|
|
/*
|
|
|
|
* Only one of the encoding bits is allowed and it
|
|
|
|
* should be sufficient for the pretty print purpose (i.e. decoding).
|
|
|
|
* Multiple bits can be allowed later if it is found
|
|
|
|
* to be insufficient.
|
|
|
|
*/
|
2018-04-19 06:55:57 +08:00
|
|
|
encoding = BTF_INT_ENCODING(int_data);
|
|
|
|
if (encoding &&
|
|
|
|
encoding != BTF_INT_SIGNED &&
|
|
|
|
encoding != BTF_INT_CHAR &&
|
2018-05-23 05:57:20 +08:00
|
|
|
encoding != BTF_INT_BOOL) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_type(env, t, "Unsupported encoding");
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_int_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
int int_data = btf_type_int(t);
|
|
|
|
|
|
|
|
btf_verifier_log(env,
|
|
|
|
"size=%u bits_offset=%u nr_bits=%u encoding=%s",
|
|
|
|
t->size, BTF_INT_OFFSET(int_data),
|
|
|
|
BTF_INT_BITS(int_data),
|
|
|
|
btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_int128_print(struct btf_show *show, void *data)
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
{
|
|
|
|
/* data points to a __int128 number.
|
|
|
|
* Suppose
|
|
|
|
* int128_num = *(__int128 *)data;
|
|
|
|
* The below formulas shows what upper_num and lower_num represents:
|
|
|
|
* upper_num = int128_num >> 64;
|
|
|
|
* lower_num = int128_num & 0xffffffffFFFFFFFFULL;
|
|
|
|
*/
|
|
|
|
u64 upper_num, lower_num;
|
|
|
|
|
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
upper_num = *(u64 *)data;
|
|
|
|
lower_num = *(u64 *)(data + 8);
|
|
|
|
#else
|
|
|
|
upper_num = *(u64 *)(data + 8);
|
|
|
|
lower_num = *(u64 *)data;
|
|
|
|
#endif
|
|
|
|
if (upper_num == 0)
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "0x%llx", lower_num);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
else
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_values(show, "0x%llx%016llx", upper_num,
|
|
|
|
lower_num);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
|
|
|
|
u16 right_shift_bits)
|
|
|
|
{
|
|
|
|
u64 upper_num, lower_num;
|
|
|
|
|
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
upper_num = print_num[0];
|
|
|
|
lower_num = print_num[1];
|
|
|
|
#else
|
|
|
|
upper_num = print_num[1];
|
|
|
|
lower_num = print_num[0];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* shake out un-needed bits by shift/or operations */
|
|
|
|
if (left_shift_bits >= 64) {
|
|
|
|
upper_num = lower_num << (left_shift_bits - 64);
|
|
|
|
lower_num = 0;
|
|
|
|
} else {
|
|
|
|
upper_num = (upper_num << left_shift_bits) |
|
|
|
|
(lower_num >> (64 - left_shift_bits));
|
|
|
|
lower_num = lower_num << left_shift_bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (right_shift_bits >= 64) {
|
|
|
|
lower_num = upper_num >> (right_shift_bits - 64);
|
|
|
|
upper_num = 0;
|
|
|
|
} else {
|
|
|
|
lower_num = (lower_num >> right_shift_bits) |
|
|
|
|
(upper_num << (64 - right_shift_bits));
|
|
|
|
upper_num = upper_num >> right_shift_bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
print_num[0] = upper_num;
|
|
|
|
print_num[1] = lower_num;
|
|
|
|
#else
|
|
|
|
print_num[0] = lower_num;
|
|
|
|
print_num[1] = upper_num;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_bitfield_show(void *data, u8 bits_offset,
|
|
|
|
u8 nr_bits, struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
2018-07-11 05:33:07 +08:00
|
|
|
u16 left_shift_bits, right_shift_bits;
|
2018-07-20 13:14:31 +08:00
|
|
|
u8 nr_copy_bytes;
|
|
|
|
u8 nr_copy_bits;
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
u64 print_num[2] = {};
|
2018-04-19 06:56:00 +08:00
|
|
|
|
|
|
|
nr_copy_bits = nr_bits + bits_offset;
|
|
|
|
nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
memcpy(print_num, data, nr_copy_bytes);
|
2018-04-19 06:56:00 +08:00
|
|
|
|
2018-07-11 05:33:07 +08:00
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
left_shift_bits = bits_offset;
|
|
|
|
#else
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
left_shift_bits = BITS_PER_U128 - nr_copy_bits;
|
2018-07-11 05:33:07 +08:00
|
|
|
#endif
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
right_shift_bits = BITS_PER_U128 - nr_bits;
|
2018-04-19 06:56:00 +08:00
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_int128_print(show, print_num);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_int_bits_show(const struct btf *btf,
|
|
|
|
const struct btf_type *t,
|
|
|
|
void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-12-16 14:13:50 +08:00
|
|
|
{
|
|
|
|
u32 int_data = btf_type_int(t);
|
|
|
|
u8 nr_bits = BTF_INT_BITS(int_data);
|
|
|
|
u8 total_bits_offset;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bits_offset is at most 7.
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
* BTF_INT_OFFSET() cannot exceed 128 bits.
|
2018-12-16 14:13:50 +08:00
|
|
|
*/
|
|
|
|
total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
|
2019-01-11 03:14:00 +08:00
|
|
|
data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
|
|
|
|
bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_bitfield_show(data, bits_offset, nr_bits, show);
|
2018-12-16 14:13:50 +08:00
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_int_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
|
|
|
u32 int_data = btf_type_int(t);
|
|
|
|
u8 encoding = BTF_INT_ENCODING(int_data);
|
|
|
|
bool sign = encoding & BTF_INT_SIGNED;
|
2018-07-20 13:14:31 +08:00
|
|
|
u8 nr_bits = BTF_INT_BITS(int_data);
|
2020-09-28 19:31:04 +08:00
|
|
|
void *safe_data;
|
|
|
|
|
|
|
|
safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
2018-04-19 06:56:00 +08:00
|
|
|
|
|
|
|
if (bits_offset || BTF_INT_OFFSET(int_data) ||
|
|
|
|
BITS_PER_BYTE_MASKED(nr_bits)) {
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_int_bits_show(btf, t, safe_data, bits_offset, show);
|
|
|
|
goto out;
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (nr_bits) {
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
case 128:
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_int128_print(show, safe_data);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 09:07:47 +08:00
|
|
|
break;
|
2018-04-19 06:56:00 +08:00
|
|
|
case 64:
|
|
|
|
if (sign)
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%lld", *(s64 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
else
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%llu", *(u64 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
if (sign)
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%d", *(s32 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
else
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%u", *(u32 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
if (sign)
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%d", *(s16 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
else
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%u", *(u16 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
break;
|
|
|
|
case 8:
|
2020-09-28 19:31:04 +08:00
|
|
|
if (show->state.array_encoding == BTF_INT_CHAR) {
|
|
|
|
/* check for null terminator */
|
|
|
|
if (show->state.array_terminated)
|
|
|
|
break;
|
|
|
|
if (*(char *)data == '\0') {
|
|
|
|
show->state.array_terminated = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (isprint(*(char *)data)) {
|
|
|
|
btf_show_type_value(show, "'%c'",
|
|
|
|
*(char *)safe_data);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-04-19 06:56:00 +08:00
|
|
|
if (sign)
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%d", *(s8 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
else
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_type_value(show, "%u", *(u8 *)safe_data);
|
2018-04-19 06:56:00 +08:00
|
|
|
break;
|
|
|
|
default:
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_int_bits_show(btf, t, safe_data, bits_offset, show);
|
|
|
|
break;
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
2020-09-28 19:31:04 +08:00
|
|
|
out:
|
|
|
|
btf_show_end_type(show);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static const struct btf_kind_operations int_ops = {
|
|
|
|
.check_meta = btf_int_check_meta,
|
2018-04-19 06:55:58 +08:00
|
|
|
.resolve = btf_df_resolve,
|
2018-04-19 06:55:59 +08:00
|
|
|
.check_member = btf_int_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_int_check_kflag_member,
|
2018-04-19 06:55:57 +08:00
|
|
|
.log_details = btf_int_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_int_show,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:59 +08:00
|
|
|
static int btf_modifier_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id = member->type;
|
|
|
|
struct btf_member resolved_member;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
|
|
|
|
if (!resolved_type) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
resolved_member = *member;
|
|
|
|
resolved_member.type = resolved_type_id;
|
|
|
|
|
|
|
|
return btf_type_ops(resolved_type)->check_member(env, struct_type,
|
|
|
|
&resolved_member,
|
|
|
|
resolved_type);
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
static int btf_modifier_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id = member->type;
|
|
|
|
struct btf_member resolved_member;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
|
|
|
|
if (!resolved_type) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
resolved_member = *member;
|
|
|
|
resolved_member.type = resolved_type_id;
|
|
|
|
|
|
|
|
return btf_type_ops(resolved_type)->check_kflag_member(env, struct_type,
|
|
|
|
&resolved_member,
|
|
|
|
resolved_type);
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:59 +08:00
|
|
|
static int btf_ptr_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_size, struct_bits_off, bytes_offset;
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
struct_bits_off = member->offset;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (struct_size - bytes_offset < sizeof(void *)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static int btf_ref_type_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
2021-11-12 09:26:09 +08:00
|
|
|
const char *value;
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 05:57:20 +08:00
|
|
|
if (!BTF_TYPE_ID_VALID(t->type)) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_type(env, t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-11-12 09:26:09 +08:00
|
|
|
/* typedef/type_tag type must have a valid name, and other ref types,
|
2018-11-28 05:23:28 +08:00
|
|
|
* volatile, const, restrict, should have a null name.
|
|
|
|
*/
|
|
|
|
if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) {
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2021-11-12 09:26:09 +08:00
|
|
|
} else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) {
|
|
|
|
value = btf_name_by_offset(env->btf, t->name_off);
|
|
|
|
if (!value || !value[0]) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-11-28 05:23:28 +08:00
|
|
|
} else {
|
|
|
|
if (t->name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static int btf_modifier_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
2019-04-10 05:20:09 +08:00
|
|
|
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
|
2018-04-19 06:55:58 +08:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
|
|
|
/* Figure out the resolved next_type_id with size.
|
|
|
|
* They will be stored in the current modifier's
|
|
|
|
* resolved_ids and resolved_sizes such that it can
|
|
|
|
* save us a few type-following when we use it later (e.g. in
|
|
|
|
* pretty print).
|
|
|
|
*/
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-13 01:25:55 +08:00
|
|
|
if (!btf_type_id_size(btf, &next_type_id, NULL)) {
|
2018-11-20 07:29:08 +08:00
|
|
|
if (env_type_is_resolved(env, next_type_id))
|
|
|
|
next_type = btf_type_id_resolve(btf, &next_type_id);
|
|
|
|
|
|
|
|
/* "typedef void new_void", "const void"...etc */
|
|
|
|
if (!btf_type_is_void(next_type) &&
|
2019-01-30 08:38:16 +08:00
|
|
|
!btf_type_is_fwd(next_type) &&
|
|
|
|
!btf_type_is_func_proto(next_type)) {
|
2018-11-20 07:29:08 +08:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-13 01:25:55 +08:00
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
2018-04-19 06:55:58 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
static int btf_var_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
|
|
|
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
|
|
|
if (btf_type_is_modifier(next_type)) {
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id;
|
|
|
|
|
|
|
|
resolved_type_id = next_type_id;
|
|
|
|
resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
|
|
|
|
|
|
|
|
if (btf_type_is_ptr(resolved_type) &&
|
|
|
|
!env_type_is_resolve_sink(env, resolved_type) &&
|
|
|
|
!env_type_is_resolved(env, resolved_type_id))
|
|
|
|
return env_stack_push(env, resolved_type,
|
|
|
|
resolved_type_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We must resolve to something concrete at this point, no
|
|
|
|
* forward types or similar that would resolve to size of
|
|
|
|
* zero is allowed.
|
|
|
|
*/
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-13 01:25:55 +08:00
|
|
|
if (!btf_type_id_size(btf, &next_type_id, NULL)) {
|
2019-04-10 05:20:09 +08:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-13 01:25:55 +08:00
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
2019-04-10 05:20:09 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static int btf_ptr_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
2019-04-10 05:20:09 +08:00
|
|
|
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
|
2018-04-19 06:55:58 +08:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
|
|
|
/* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY,
|
|
|
|
* the modifier may have stopped resolving when it was resolved
|
|
|
|
* to a ptr (last-resolved-ptr).
|
|
|
|
*
|
|
|
|
* We now need to continue from the last-resolved-ptr to
|
|
|
|
* ensure the last-resolved-ptr will not referring back to
|
2022-02-21 02:40:55 +08:00
|
|
|
* the current ptr (t).
|
2018-04-19 06:55:58 +08:00
|
|
|
*/
|
|
|
|
if (btf_type_is_modifier(next_type)) {
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id;
|
|
|
|
|
|
|
|
resolved_type_id = next_type_id;
|
|
|
|
resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
|
|
|
|
|
|
|
|
if (btf_type_is_ptr(resolved_type) &&
|
|
|
|
!env_type_is_resolve_sink(env, resolved_type) &&
|
|
|
|
!env_type_is_resolved(env, resolved_type_id))
|
|
|
|
return env_stack_push(env, resolved_type,
|
|
|
|
resolved_type_id);
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
if (!btf_type_id_size(btf, &next_type_id, NULL)) {
|
|
|
|
if (env_type_is_resolved(env, next_type_id))
|
|
|
|
next_type = btf_type_id_resolve(btf, &next_type_id);
|
|
|
|
|
|
|
|
if (!btf_type_is_void(next_type) &&
|
|
|
|
!btf_type_is_fwd(next_type) &&
|
|
|
|
!btf_type_is_func_proto(next_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_modifier_show(const struct btf *btf,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id, void *data,
|
|
|
|
u8 bits_offset, struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS
The patch introduces BPF_MAP_TYPE_STRUCT_OPS. The map value
is a kernel struct with its func ptr implemented in bpf prog.
This new map is the interface to register/unregister/introspect
a bpf implemented kernel struct.
The kernel struct is actually embedded inside another new struct
(or called the "value" struct in the code). For example,
"struct tcp_congestion_ops" is embbeded in:
struct bpf_struct_ops_tcp_congestion_ops {
refcount_t refcnt;
enum bpf_struct_ops_state state;
struct tcp_congestion_ops data; /* <-- kernel subsystem struct here */
}
The map value is "struct bpf_struct_ops_tcp_congestion_ops".
The "bpftool map dump" will then be able to show the
state ("inuse"/"tobefree") and the number of subsystem's refcnt (e.g.
number of tcp_sock in the tcp_congestion_ops case). This "value" struct
is created automatically by a macro. Having a separate "value" struct
will also make extending "struct bpf_struct_ops_XYZ" easier (e.g. adding
"void (*init)(void)" to "struct bpf_struct_ops_XYZ" to do some
initialization works before registering the struct_ops to the kernel
subsystem). The libbpf will take care of finding and populating the
"struct bpf_struct_ops_XYZ" from "struct XYZ".
Register a struct_ops to a kernel subsystem:
1. Load all needed BPF_PROG_TYPE_STRUCT_OPS prog(s)
2. Create a BPF_MAP_TYPE_STRUCT_OPS with attr->btf_vmlinux_value_type_id
set to the btf id "struct bpf_struct_ops_tcp_congestion_ops" of the
running kernel.
Instead of reusing the attr->btf_value_type_id,
btf_vmlinux_value_type_id s added such that attr->btf_fd can still be
used as the "user" btf which could store other useful sysadmin/debug
info that may be introduced in the furture,
e.g. creation-date/compiler-details/map-creator...etc.
3. Create a "struct bpf_struct_ops_tcp_congestion_ops" object as described
in the running kernel btf. Populate the value of this object.
The function ptr should be populated with the prog fds.
4. Call BPF_MAP_UPDATE with the object created in (3) as
the map value. The key is always "0".
During BPF_MAP_UPDATE, the code that saves the kernel-func-ptr's
args as an array of u64 is generated. BPF_MAP_UPDATE also allows
the specific struct_ops to do some final checks in "st_ops->init_member()"
(e.g. ensure all mandatory func ptrs are implemented).
If everything looks good, it will register this kernel struct
to the kernel subsystem. The map will not allow further update
from this point.
Unregister a struct_ops from the kernel subsystem:
BPF_MAP_DELETE with key "0".
Introspect a struct_ops:
BPF_MAP_LOOKUP_ELEM with key "0". The map value returned will
have the prog _id_ populated as the func ptr.
The map value state (enum bpf_struct_ops_state) will transit from:
INIT (map created) =>
INUSE (map updated, i.e. reg) =>
TOBEFREE (map value deleted, i.e. unreg)
The kernel subsystem needs to call bpf_struct_ops_get() and
bpf_struct_ops_put() to manage the "refcnt" in the
"struct bpf_struct_ops_XYZ". This patch uses a separate refcnt
for the purose of tracking the subsystem usage. Another approach
is to reuse the map->refcnt and then "show" (i.e. during map_lookup)
the subsystem's usage by doing map->refcnt - map->usercnt to filter out
the map-fd/pinned-map usage. However, that will also tie down the
future semantics of map->refcnt and map->usercnt.
The very first subsystem's refcnt (during reg()) holds one
count to map->refcnt. When the very last subsystem's refcnt
is gone, it will also release the map->refcnt. All bpf_prog will be
freed when the map->refcnt reaches 0 (i.e. during map_free()).
Here is how the bpftool map command will look like:
[root@arch-fb-vm1 bpf]# bpftool map show
6: struct_ops name dctcp flags 0x0
key 4B value 256B max_entries 1 memlock 4096B
btf_id 6
[root@arch-fb-vm1 bpf]# bpftool map dump id 6
[{
"value": {
"refcnt": {
"refs": {
"counter": 1
}
},
"state": 1,
"data": {
"list": {
"next": 0,
"prev": 0
},
"key": 0,
"flags": 2,
"init": 24,
"release": 0,
"ssthresh": 25,
"cong_avoid": 30,
"set_state": 27,
"cwnd_event": 28,
"in_ack_event": 26,
"undo_cwnd": 29,
"pkts_acked": 0,
"min_tso_segs": 0,
"sndbuf_expand": 0,
"cong_control": 0,
"get_info": 0,
"name": [98,112,102,95,100,99,116,99,112,0,0,0,0,0,0,0
],
"owner": 0
}
}
}
]
Misc Notes:
* bpf_struct_ops_map_sys_lookup_elem() is added for syscall lookup.
It does an inplace update on "*value" instead returning a pointer
to syscall.c. Otherwise, it needs a separate copy of "zero" value
for the BPF_STRUCT_OPS_STATE_INIT to avoid races.
* The bpf_struct_ops_map_delete_elem() is also called without
preempt_disable() from map_delete_elem(). It is because
the "->unreg()" may requires sleepable context, e.g.
the "tcp_unregister_congestion_control()".
* "const" is added to some of the existing "struct btf_func_model *"
function arg to avoid a compiler warning caused by this patch.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200109003505.3855919-1-kafai@fb.com
2020-01-09 08:35:05 +08:00
|
|
|
if (btf->resolved_ids)
|
|
|
|
t = btf_type_id_resolve(btf, &type_id);
|
|
|
|
else
|
|
|
|
t = btf_type_skip_modifiers(btf, type_id, NULL);
|
2018-04-19 06:56:00 +08:00
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_var_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2019-04-10 05:20:09 +08:00
|
|
|
{
|
|
|
|
t = btf_type_id_resolve(btf, &type_id);
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show);
|
2019-04-10 05:20:09 +08:00
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_ptr_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
2020-09-28 19:31:04 +08:00
|
|
|
void *safe_data;
|
|
|
|
|
|
|
|
safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* It is a hashed value unless BTF_SHOW_PTR_RAW is specified */
|
|
|
|
if (show->flags & BTF_SHOW_PTR_RAW)
|
|
|
|
btf_show_type_value(show, "0x%px", *(void **)safe_data);
|
|
|
|
else
|
|
|
|
btf_show_type_value(show, "0x%p", *(void **)safe_data);
|
|
|
|
btf_show_end_type(show);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static void btf_ref_type_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "type_id=%u", t->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct btf_kind_operations modifier_ops = {
|
|
|
|
.check_meta = btf_ref_type_check_meta,
|
2018-04-19 06:55:58 +08:00
|
|
|
.resolve = btf_modifier_resolve,
|
2018-04-19 06:55:59 +08:00
|
|
|
.check_member = btf_modifier_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_modifier_check_kflag_member,
|
2018-04-19 06:55:57 +08:00
|
|
|
.log_details = btf_ref_type_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_modifier_show,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct btf_kind_operations ptr_ops = {
|
|
|
|
.check_meta = btf_ref_type_check_meta,
|
2018-04-19 06:55:58 +08:00
|
|
|
.resolve = btf_ptr_resolve,
|
2018-04-19 06:55:59 +08:00
|
|
|
.check_member = btf_ptr_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
2018-04-19 06:55:57 +08:00
|
|
|
.log_details = btf_ref_type_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_ptr_show,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
2018-06-03 00:06:51 +08:00
|
|
|
static s32 btf_fwd_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->type) {
|
|
|
|
btf_verifier_log_type(env, t, "type != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 05:23:28 +08:00
|
|
|
/* fwd type must have a valid name */
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-06-03 00:06:51 +08:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-12-19 05:43:58 +08:00
|
|
|
static void btf_fwd_type_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct");
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static struct btf_kind_operations fwd_ops = {
|
2018-06-03 00:06:51 +08:00
|
|
|
.check_meta = btf_fwd_check_meta,
|
2018-04-19 06:55:58 +08:00
|
|
|
.resolve = btf_df_resolve,
|
2018-04-19 06:55:59 +08:00
|
|
|
.check_member = btf_df_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2018-12-19 05:43:58 +08:00
|
|
|
.log_details = btf_fwd_type_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_df_show,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:59 +08:00
|
|
|
static int btf_array_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size, bytes_offset;
|
|
|
|
u32 array_type_id, array_size;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
array_type_id = member->type;
|
|
|
|
btf_type_id_size(btf, &array_type_id, &array_size);
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
if (struct_size - bytes_offset < array_size) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static s32 btf_array_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
u32 meta_needed = sizeof(*array);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 05:23:28 +08:00
|
|
|
/* array type should not have a name */
|
|
|
|
if (t->name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-06-03 00:06:50 +08:00
|
|
|
if (t->size) {
|
|
|
|
btf_verifier_log_type(env, t, "size != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 05:57:19 +08:00
|
|
|
/* Array elem type and index type cannot be in type void,
|
|
|
|
* so !array->type and !array->index_type are not allowed.
|
2018-04-19 06:55:57 +08:00
|
|
|
*/
|
2018-05-23 05:57:20 +08:00
|
|
|
if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
|
2018-05-23 05:57:19 +08:00
|
|
|
btf_verifier_log_type(env, t, "Invalid elem");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 05:57:20 +08:00
|
|
|
if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
|
2018-05-23 05:57:19 +08:00
|
|
|
btf_verifier_log_type(env, t, "Invalid index");
|
2018-04-19 06:55:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static int btf_array_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(v->t);
|
2018-05-23 05:57:19 +08:00
|
|
|
const struct btf_type *elem_type, *index_type;
|
|
|
|
u32 elem_type_id, index_type_id;
|
2018-04-19 06:55:58 +08:00
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u32 elem_size;
|
|
|
|
|
2018-05-23 05:57:19 +08:00
|
|
|
/* Check array->index_type */
|
|
|
|
index_type_id = array->index_type;
|
|
|
|
index_type = btf_type_by_id(btf, index_type_id);
|
2019-06-20 03:01:05 +08:00
|
|
|
if (btf_type_nosize_or_null(index_type) ||
|
|
|
|
btf_type_is_resolve_source_only(index_type)) {
|
2018-05-23 05:57:19 +08:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid index");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, index_type) &&
|
|
|
|
!env_type_is_resolved(env, index_type_id))
|
|
|
|
return env_stack_push(env, index_type, index_type_id);
|
|
|
|
|
|
|
|
index_type = btf_type_id_size(btf, &index_type_id, NULL);
|
|
|
|
if (!index_type || !btf_type_is_int(index_type) ||
|
|
|
|
!btf_type_int_is_regular(index_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid index");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check array->type */
|
|
|
|
elem_type_id = array->type;
|
2018-04-19 06:55:58 +08:00
|
|
|
elem_type = btf_type_by_id(btf, elem_type_id);
|
2019-06-20 03:01:05 +08:00
|
|
|
if (btf_type_nosize_or_null(elem_type) ||
|
|
|
|
btf_type_is_resolve_source_only(elem_type)) {
|
2018-04-19 06:55:58 +08:00
|
|
|
btf_verifier_log_type(env, v->t,
|
|
|
|
"Invalid elem");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, elem_type) &&
|
|
|
|
!env_type_is_resolved(env, elem_type_id))
|
|
|
|
return env_stack_push(env, elem_type, elem_type_id);
|
|
|
|
|
|
|
|
elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
|
|
|
|
if (!elem_type) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid elem");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 05:57:19 +08:00
|
|
|
if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid array of int");
|
|
|
|
return -EINVAL;
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (array->nelems && elem_size > U32_MAX / array->nelems) {
|
|
|
|
btf_verifier_log_type(env, v->t,
|
|
|
|
"Array size overflows U32_MAX");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static void btf_array_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
|
|
|
|
btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u",
|
|
|
|
array->type, array->index_type, array->nelems);
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void __btf_array_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
const struct btf_kind_operations *elem_ops;
|
|
|
|
const struct btf_type *elem_type;
|
2020-09-28 19:31:04 +08:00
|
|
|
u32 i, elem_size = 0, elem_type_id;
|
|
|
|
u16 encoding = 0;
|
2018-04-19 06:56:00 +08:00
|
|
|
|
|
|
|
elem_type_id = array->type;
|
2020-09-28 19:31:04 +08:00
|
|
|
elem_type = btf_type_skip_modifiers(btf, elem_type_id, NULL);
|
|
|
|
if (elem_type && btf_type_has_size(elem_type))
|
|
|
|
elem_size = elem_type->size;
|
|
|
|
|
|
|
|
if (elem_type && btf_type_is_int(elem_type)) {
|
|
|
|
u32 int_type = btf_type_int(elem_type);
|
|
|
|
|
|
|
|
encoding = BTF_INT_ENCODING(int_type);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BTF_INT_CHAR encoding never seems to be set for
|
|
|
|
* char arrays, so if size is 1 and element is
|
|
|
|
* printable as a char, we'll do that.
|
|
|
|
*/
|
|
|
|
if (elem_size == 1)
|
|
|
|
encoding = BTF_INT_CHAR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!btf_show_start_array_type(show, t, type_id, encoding, data))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!elem_type)
|
|
|
|
goto out;
|
2018-04-19 06:56:00 +08:00
|
|
|
elem_ops = btf_type_ops(elem_type);
|
2020-09-28 19:31:04 +08:00
|
|
|
|
2018-04-19 06:56:00 +08:00
|
|
|
for (i = 0; i < array->nelems; i++) {
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_start_array_member(show);
|
|
|
|
|
|
|
|
elem_ops->show(btf, elem_type, elem_type_id, data,
|
|
|
|
bits_offset, show);
|
2018-04-19 06:56:00 +08:00
|
|
|
data += elem_size;
|
2020-09-28 19:31:04 +08:00
|
|
|
|
|
|
|
btf_show_end_array_member(show);
|
|
|
|
|
|
|
|
if (show->state.array_terminated)
|
|
|
|
break;
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
2020-09-28 19:31:04 +08:00
|
|
|
out:
|
|
|
|
btf_show_end_array_type(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_array_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
|
|
|
{
|
|
|
|
const struct btf_member *m = show->state.member;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First check if any members would be shown (are non-zero).
|
|
|
|
* See comments above "struct btf_show" definition for more
|
|
|
|
* details on how this works at a high-level.
|
|
|
|
*/
|
|
|
|
if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) {
|
|
|
|
if (!show->state.depth_check) {
|
|
|
|
show->state.depth_check = show->state.depth + 1;
|
|
|
|
show->state.depth_to_show = 0;
|
|
|
|
}
|
|
|
|
__btf_array_show(btf, t, type_id, data, bits_offset, show);
|
|
|
|
show->state.member = m;
|
|
|
|
|
|
|
|
if (show->state.depth_check != show->state.depth + 1)
|
|
|
|
return;
|
|
|
|
show->state.depth_check = 0;
|
|
|
|
|
|
|
|
if (show->state.depth_to_show <= show->state.depth)
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Reaching here indicates we have recursed and found
|
|
|
|
* non-zero array member(s).
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
__btf_array_show(btf, t, type_id, data, bits_offset, show);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static struct btf_kind_operations array_ops = {
|
|
|
|
.check_meta = btf_array_check_meta,
|
2018-04-19 06:55:58 +08:00
|
|
|
.resolve = btf_array_resolve,
|
2018-04-19 06:55:59 +08:00
|
|
|
.check_member = btf_array_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
2018-04-19 06:55:57 +08:00
|
|
|
.log_details = btf_array_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_array_show,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:59 +08:00
|
|
|
static int btf_struct_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size, bytes_offset;
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
if (struct_size - bytes_offset < member_type->size) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static s32 btf_struct_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION;
|
|
|
|
const struct btf_member *member;
|
2018-07-21 08:38:37 +08:00
|
|
|
u32 meta_needed, last_offset;
|
2018-04-19 06:55:57 +08:00
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u32 struct_size = t->size;
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
u32 offset;
|
2018-04-19 06:55:57 +08:00
|
|
|
u16 i;
|
|
|
|
|
|
|
|
meta_needed = btf_type_vlen(t) * sizeof(*member);
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 05:23:28 +08:00
|
|
|
/* struct type either no name or a valid one */
|
|
|
|
if (t->name_off &&
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
2018-07-21 08:38:37 +08:00
|
|
|
last_offset = 0;
|
2018-04-19 06:55:57 +08:00
|
|
|
for_each_member(i, t, member) {
|
2018-04-22 00:48:23 +08:00
|
|
|
if (!btf_name_offset_valid(btf, member->name_off)) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid member name_offset:%u",
|
2018-04-22 00:48:23 +08:00
|
|
|
member->name_off);
|
2018-04-19 06:55:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 05:23:28 +08:00
|
|
|
/* struct member either no name or a valid one */
|
|
|
|
if (member->name_off &&
|
|
|
|
!btf_name_valid_identifier(btf, member->name_off)) {
|
|
|
|
btf_verifier_log_member(env, t, member, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-04-19 06:55:57 +08:00
|
|
|
/* A member cannot be in type void */
|
2018-05-23 05:57:20 +08:00
|
|
|
if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-12-02 02:10:25 +08:00
|
|
|
offset = __btf_member_bit_offset(t, member);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (is_union && offset) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid member bits_offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-07-21 08:38:37 +08:00
|
|
|
/*
|
|
|
|
* ">" instead of ">=" because the last member could be
|
|
|
|
* "char a[0];"
|
|
|
|
*/
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (last_offset > offset) {
|
2018-07-21 08:38:37 +08:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid member bits_offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (BITS_ROUNDUP_BYTES(offset) > struct_size) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_member(env, t, member,
|
2018-11-26 07:32:51 +08:00
|
|
|
"Member bits_offset exceeds its struct size");
|
2018-04-19 06:55:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_member(env, t, member, NULL);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
last_offset = offset;
|
2018-04-19 06:55:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static int btf_struct_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_member *member;
|
2018-04-19 06:55:59 +08:00
|
|
|
int err;
|
2018-04-19 06:55:58 +08:00
|
|
|
u16 i;
|
|
|
|
|
|
|
|
/* Before continue resolving the next_member,
|
|
|
|
* ensure the last member is indeed resolved to a
|
|
|
|
* type with size info.
|
|
|
|
*/
|
|
|
|
if (v->next_member) {
|
2018-04-19 06:55:59 +08:00
|
|
|
const struct btf_type *last_member_type;
|
2018-04-19 06:55:58 +08:00
|
|
|
const struct btf_member *last_member;
|
2022-09-10 19:01:20 +08:00
|
|
|
u32 last_member_type_id;
|
2018-04-19 06:55:58 +08:00
|
|
|
|
|
|
|
last_member = btf_type_member(v->t) + v->next_member - 1;
|
|
|
|
last_member_type_id = last_member->type;
|
|
|
|
if (WARN_ON_ONCE(!env_type_is_resolved(env,
|
|
|
|
last_member_type_id)))
|
|
|
|
return -EINVAL;
|
2018-04-19 06:55:59 +08:00
|
|
|
|
|
|
|
last_member_type = btf_type_by_id(env->btf,
|
|
|
|
last_member_type_id);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(v->t))
|
|
|
|
err = btf_type_ops(last_member_type)->check_kflag_member(env, v->t,
|
|
|
|
last_member,
|
|
|
|
last_member_type);
|
|
|
|
else
|
|
|
|
err = btf_type_ops(last_member_type)->check_member(env, v->t,
|
|
|
|
last_member,
|
|
|
|
last_member_type);
|
2018-04-19 06:55:59 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for_each_member_from(i, v->next_member, v->t, member) {
|
|
|
|
u32 member_type_id = member->type;
|
|
|
|
const struct btf_type *member_type = btf_type_by_id(env->btf,
|
|
|
|
member_type_id);
|
|
|
|
|
2019-06-20 03:01:05 +08:00
|
|
|
if (btf_type_nosize_or_null(member_type) ||
|
|
|
|
btf_type_is_resolve_source_only(member_type)) {
|
2018-04-19 06:55:58 +08:00
|
|
|
btf_verifier_log_member(env, v->t, member,
|
|
|
|
"Invalid member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, member_type) &&
|
|
|
|
!env_type_is_resolved(env, member_type_id)) {
|
|
|
|
env_stack_set_next_member(env, i + 1);
|
|
|
|
return env_stack_push(env, member_type, member_type_id);
|
|
|
|
}
|
2018-04-19 06:55:59 +08:00
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(v->t))
|
|
|
|
err = btf_type_ops(member_type)->check_kflag_member(env, v->t,
|
|
|
|
member,
|
|
|
|
member_type);
|
|
|
|
else
|
|
|
|
err = btf_type_ops(member_type)->check_member(env, v->t,
|
|
|
|
member,
|
|
|
|
member_type);
|
2018-04-19 06:55:59 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, 0, 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static void btf_struct_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
|
|
|
|
}
|
|
|
|
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
enum {
|
|
|
|
BTF_FIELD_IGNORE = 0,
|
|
|
|
BTF_FIELD_FOUND = 1,
|
2022-04-16 00:03:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct btf_field_info {
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
enum btf_field_type type;
|
2022-04-16 00:03:42 +08:00
|
|
|
u32 off;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
u32 type_id;
|
|
|
|
} kptr;
|
|
|
|
struct {
|
|
|
|
const char *node_name;
|
|
|
|
u32 value_btf_id;
|
2022-12-17 16:24:57 +08:00
|
|
|
} graph_root;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
};
|
2022-04-16 00:03:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
|
2022-11-04 03:09:56 +08:00
|
|
|
u32 off, int sz, enum btf_field_type field_type,
|
|
|
|
struct btf_field_info *info)
|
2022-04-16 00:03:42 +08:00
|
|
|
{
|
|
|
|
if (!__btf_type_is_struct(t))
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return BTF_FIELD_IGNORE;
|
2022-04-16 00:03:42 +08:00
|
|
|
if (t->size != sz)
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return BTF_FIELD_IGNORE;
|
2022-11-04 03:09:56 +08:00
|
|
|
info->type = field_type;
|
2022-04-16 00:03:42 +08:00
|
|
|
info->off = off;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return BTF_FIELD_FOUND;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 off, int sz, struct btf_field_info *info)
|
|
|
|
{
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
enum btf_field_type type;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
u32 res_id;
|
|
|
|
|
2022-11-04 03:09:51 +08:00
|
|
|
/* Permit modifiers on the pointer itself */
|
|
|
|
if (btf_type_is_volatile(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
/* For PTR, sz is always == 8 */
|
|
|
|
if (!btf_type_is_ptr(t))
|
|
|
|
return BTF_FIELD_IGNORE;
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
if (!btf_type_is_type_tag(t))
|
|
|
|
return BTF_FIELD_IGNORE;
|
|
|
|
/* Reject extra tags */
|
|
|
|
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
|
|
|
|
return -EINVAL;
|
2023-03-03 12:14:41 +08:00
|
|
|
if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off)))
|
bpf: Allow storing referenced kptr in map
Extending the code in previous commits, introduce referenced kptr
support, which needs to be tagged using 'kptr_ref' tag instead. Unlike
unreferenced kptr, referenced kptr have a lot more restrictions. In
addition to the type matching, only a newly introduced bpf_kptr_xchg
helper is allowed to modify the map value at that offset. This transfers
the referenced pointer being stored into the map, releasing the
references state for the program, and returning the old value and
creating new reference state for the returned pointer.
Similar to unreferenced pointer case, return value for this case will
also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer
must either be eventually released by calling the corresponding release
function, otherwise it must be transferred into another map.
It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear
the value, and obtain the old value if any.
BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future
commit will permit using BPF_LDX for such pointers, but attempt at
making it safe, since the lifetime of object won't be guaranteed.
There are valid reasons to enforce the restriction of permitting only
bpf_kptr_xchg to operate on referenced kptr. The pointer value must be
consistent in face of concurrent modification, and any prior values
contained in the map must also be released before a new one is moved
into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg
returns the old value, which the verifier would require the user to
either free or move into another map, and releases the reference held
for the pointer being moved in.
In the future, direct BPF_XCHG instruction may also be permitted to work
like bpf_kptr_xchg helper.
Note that process_kptr_func doesn't have to call
check_helper_mem_access, since we already disallow rdonly/wronly flags
for map, which is what check_map_access_type checks, and we already
ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc,
so check_map_access is also not required.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com
2022-04-25 05:48:51 +08:00
|
|
|
type = BPF_KPTR_UNREF;
|
2023-03-03 12:14:41 +08:00
|
|
|
else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
|
bpf: Allow storing referenced kptr in map
Extending the code in previous commits, introduce referenced kptr
support, which needs to be tagged using 'kptr_ref' tag instead. Unlike
unreferenced kptr, referenced kptr have a lot more restrictions. In
addition to the type matching, only a newly introduced bpf_kptr_xchg
helper is allowed to modify the map value at that offset. This transfers
the referenced pointer being stored into the map, releasing the
references state for the program, and returning the old value and
creating new reference state for the returned pointer.
Similar to unreferenced pointer case, return value for this case will
also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer
must either be eventually released by calling the corresponding release
function, otherwise it must be transferred into another map.
It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear
the value, and obtain the old value if any.
BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future
commit will permit using BPF_LDX for such pointers, but attempt at
making it safe, since the lifetime of object won't be guaranteed.
There are valid reasons to enforce the restriction of permitting only
bpf_kptr_xchg to operate on referenced kptr. The pointer value must be
consistent in face of concurrent modification, and any prior values
contained in the map must also be released before a new one is moved
into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg
returns the old value, which the verifier would require the user to
either free or move into another map, and releases the reference held
for the pointer being moved in.
In the future, direct BPF_XCHG instruction may also be permitted to work
like bpf_kptr_xchg helper.
Note that process_kptr_func doesn't have to call
check_helper_mem_access, since we already disallow rdonly/wronly flags
for map, which is what check_map_access_type checks, and we already
ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc,
so check_map_access is also not required.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com
2022-04-25 05:48:51 +08:00
|
|
|
type = BPF_KPTR_REF;
|
2023-08-27 23:27:39 +08:00
|
|
|
else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
|
|
|
|
type = BPF_KPTR_PERCPU;
|
bpf: Allow storing referenced kptr in map
Extending the code in previous commits, introduce referenced kptr
support, which needs to be tagged using 'kptr_ref' tag instead. Unlike
unreferenced kptr, referenced kptr have a lot more restrictions. In
addition to the type matching, only a newly introduced bpf_kptr_xchg
helper is allowed to modify the map value at that offset. This transfers
the referenced pointer being stored into the map, releasing the
references state for the program, and returning the old value and
creating new reference state for the returned pointer.
Similar to unreferenced pointer case, return value for this case will
also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer
must either be eventually released by calling the corresponding release
function, otherwise it must be transferred into another map.
It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear
the value, and obtain the old value if any.
BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future
commit will permit using BPF_LDX for such pointers, but attempt at
making it safe, since the lifetime of object won't be guaranteed.
There are valid reasons to enforce the restriction of permitting only
bpf_kptr_xchg to operate on referenced kptr. The pointer value must be
consistent in face of concurrent modification, and any prior values
contained in the map must also be released before a new one is moved
into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg
returns the old value, which the verifier would require the user to
either free or move into another map, and releases the reference held
for the pointer being moved in.
In the future, direct BPF_XCHG instruction may also be permitted to work
like bpf_kptr_xchg helper.
Note that process_kptr_func doesn't have to call
check_helper_mem_access, since we already disallow rdonly/wronly flags
for map, which is what check_map_access_type checks, and we already
ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc,
so check_map_access is also not required.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com
2022-04-25 05:48:51 +08:00
|
|
|
else
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Get the base type */
|
|
|
|
t = btf_type_skip_modifiers(btf, t->type, &res_id);
|
|
|
|
/* Only pointer to struct is allowed */
|
|
|
|
if (!__btf_type_is_struct(t))
|
|
|
|
return -EINVAL;
|
|
|
|
|
bpf: Allow storing referenced kptr in map
Extending the code in previous commits, introduce referenced kptr
support, which needs to be tagged using 'kptr_ref' tag instead. Unlike
unreferenced kptr, referenced kptr have a lot more restrictions. In
addition to the type matching, only a newly introduced bpf_kptr_xchg
helper is allowed to modify the map value at that offset. This transfers
the referenced pointer being stored into the map, releasing the
references state for the program, and returning the old value and
creating new reference state for the returned pointer.
Similar to unreferenced pointer case, return value for this case will
also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer
must either be eventually released by calling the corresponding release
function, otherwise it must be transferred into another map.
It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear
the value, and obtain the old value if any.
BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future
commit will permit using BPF_LDX for such pointers, but attempt at
making it safe, since the lifetime of object won't be guaranteed.
There are valid reasons to enforce the restriction of permitting only
bpf_kptr_xchg to operate on referenced kptr. The pointer value must be
consistent in face of concurrent modification, and any prior values
contained in the map must also be released before a new one is moved
into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg
returns the old value, which the verifier would require the user to
either free or move into another map, and releases the reference held
for the pointer being moved in.
In the future, direct BPF_XCHG instruction may also be permitted to work
like bpf_kptr_xchg helper.
Note that process_kptr_func doesn't have to call
check_helper_mem_access, since we already disallow rdonly/wronly flags
for map, which is what check_map_access_type checks, and we already
ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc,
so check_map_access is also not required.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com
2022-04-25 05:48:51 +08:00
|
|
|
info->type = type;
|
2022-11-04 03:09:56 +08:00
|
|
|
info->off = off;
|
|
|
|
info->kptr.type_id = res_id;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return BTF_FIELD_FOUND;
|
2022-04-16 00:03:42 +08:00
|
|
|
}
|
|
|
|
|
2024-01-05 08:09:05 +08:00
|
|
|
int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt,
|
|
|
|
int comp_idx, const char *tag_key, int last_id)
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
{
|
2024-01-05 08:09:05 +08:00
|
|
|
int len = strlen(tag_key);
|
|
|
|
int i, n;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
|
2024-01-05 08:09:05 +08:00
|
|
|
for (i = last_id + 1, n = btf_nr_types(btf); i < n; i++) {
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
const struct btf_type *t = btf_type_by_id(btf, i);
|
|
|
|
|
|
|
|
if (!btf_type_is_decl_tag(t))
|
|
|
|
continue;
|
2024-01-05 08:09:05 +08:00
|
|
|
if (pt != btf_type_by_id(btf, t->type))
|
|
|
|
continue;
|
|
|
|
if (btf_type_decl_tag(t)->component_idx != comp_idx)
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
continue;
|
|
|
|
if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len))
|
|
|
|
continue;
|
2024-01-05 08:09:05 +08:00
|
|
|
return i;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
}
|
2024-01-05 08:09:05 +08:00
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
|
|
|
|
int comp_idx, const char *tag_key)
|
|
|
|
{
|
|
|
|
const char *value = NULL;
|
|
|
|
const struct btf_type *t;
|
|
|
|
int len, id;
|
|
|
|
|
|
|
|
id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key, 0);
|
|
|
|
if (id < 0)
|
|
|
|
return ERR_PTR(id);
|
|
|
|
|
|
|
|
t = btf_type_by_id(btf, id);
|
|
|
|
len = strlen(tag_key);
|
|
|
|
value = __btf_name_by_offset(btf, t->name_off) + len;
|
|
|
|
|
|
|
|
/* Prevent duplicate entries for same type */
|
|
|
|
id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key, id);
|
|
|
|
if (id >= 0)
|
|
|
|
return ERR_PTR(-EEXIST);
|
|
|
|
|
bpf: Add support for custom exception callbacks
By default, the subprog generated by the verifier to handle a thrown
exception hardcodes a return value of 0. To allow user-defined logic
and modification of the return value when an exception is thrown,
introduce the 'exception_callback:' declaration tag, which marks a
callback as the default exception handler for the program.
The format of the declaration tag is 'exception_callback:<value>', where
<value> is the name of the exception callback. Each main program can be
tagged using this BTF declaratiion tag to associate it with an exception
callback. In case the tag is absent, the default callback is used.
As such, the exception callback cannot be modified at runtime, only set
during verification.
Allowing modification of the callback for the current program execution
at runtime leads to issues when the programs begin to nest, as any
per-CPU state maintaing this information will have to be saved and
restored. We don't want it to stay in bpf_prog_aux as this takes a
global effect for all programs. An alternative solution is spilling
the callback pointer at a known location on the program stack on entry,
and then passing this location to bpf_throw as a parameter.
However, since exceptions are geared more towards a use case where they
are ideally never invoked, optimizing for this use case and adding to
the complexity has diminishing returns.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230912233214.1518551-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-09-13 07:32:03 +08:00
|
|
|
return value;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
}
|
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
static int
|
|
|
|
btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
|
|
|
|
const struct btf_type *t, int comp_idx, u32 off,
|
|
|
|
int sz, struct btf_field_info *info,
|
|
|
|
enum btf_field_type head_type)
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
{
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
const char *node_field_name;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
const char *value_type;
|
|
|
|
s32 id;
|
|
|
|
|
|
|
|
if (!__btf_type_is_struct(t))
|
|
|
|
return BTF_FIELD_IGNORE;
|
|
|
|
if (t->size != sz)
|
|
|
|
return BTF_FIELD_IGNORE;
|
|
|
|
value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
|
bpf: Add support for custom exception callbacks
By default, the subprog generated by the verifier to handle a thrown
exception hardcodes a return value of 0. To allow user-defined logic
and modification of the return value when an exception is thrown,
introduce the 'exception_callback:' declaration tag, which marks a
callback as the default exception handler for the program.
The format of the declaration tag is 'exception_callback:<value>', where
<value> is the name of the exception callback. Each main program can be
tagged using this BTF declaratiion tag to associate it with an exception
callback. In case the tag is absent, the default callback is used.
As such, the exception callback cannot be modified at runtime, only set
during verification.
Allowing modification of the callback for the current program execution
at runtime leads to issues when the programs begin to nest, as any
per-CPU state maintaing this information will have to be saved and
restored. We don't want it to stay in bpf_prog_aux as this takes a
global effect for all programs. An alternative solution is spilling
the callback pointer at a known location on the program stack on entry,
and then passing this location to bpf_throw as a parameter.
However, since exceptions are geared more towards a use case where they
are ideally never invoked, optimizing for this use case and adding to
the complexity has diminishing returns.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230912233214.1518551-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-09-13 07:32:03 +08:00
|
|
|
if (IS_ERR(value_type))
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
return -EINVAL;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
node_field_name = strstr(value_type, ":");
|
|
|
|
if (!node_field_name)
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
return -EINVAL;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
if (!value_type)
|
|
|
|
return -ENOMEM;
|
|
|
|
id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
|
|
|
|
kfree(value_type);
|
|
|
|
if (id < 0)
|
|
|
|
return id;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
node_field_name++;
|
|
|
|
if (str_is_empty(node_field_name))
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
return -EINVAL;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
info->type = head_type;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
info->off = off;
|
2022-12-17 16:24:57 +08:00
|
|
|
info->graph_root.value_btf_id = id;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
info->graph_root.node_name = node_field_name;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
return BTF_FIELD_FOUND;
|
|
|
|
}
|
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
#define field_mask_test_name(field_type, field_type_str) \
|
|
|
|
if (field_mask & field_type && !strcmp(name, field_type_str)) { \
|
|
|
|
type = field_type; \
|
|
|
|
goto end; \
|
|
|
|
}
|
|
|
|
|
2024-05-24 01:41:58 +08:00
|
|
|
static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type,
|
|
|
|
u32 field_mask, u32 *seen_mask,
|
2022-11-04 03:09:56 +08:00
|
|
|
int *align, int *sz)
|
|
|
|
{
|
|
|
|
int type = 0;
|
2024-05-24 01:41:58 +08:00
|
|
|
const char *name = __btf_name_by_offset(btf, var_type->name_off);
|
2022-11-04 03:09:56 +08:00
|
|
|
|
|
|
|
if (field_mask & BPF_SPIN_LOCK) {
|
|
|
|
if (!strcmp(name, "bpf_spin_lock")) {
|
|
|
|
if (*seen_mask & BPF_SPIN_LOCK)
|
|
|
|
return -E2BIG;
|
|
|
|
*seen_mask |= BPF_SPIN_LOCK;
|
|
|
|
type = BPF_SPIN_LOCK;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (field_mask & BPF_TIMER) {
|
|
|
|
if (!strcmp(name, "bpf_timer")) {
|
|
|
|
if (*seen_mask & BPF_TIMER)
|
|
|
|
return -E2BIG;
|
|
|
|
*seen_mask |= BPF_TIMER;
|
|
|
|
type = BPF_TIMER;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
}
|
2024-04-20 17:09:05 +08:00
|
|
|
if (field_mask & BPF_WORKQUEUE) {
|
|
|
|
if (!strcmp(name, "bpf_wq")) {
|
|
|
|
if (*seen_mask & BPF_WORKQUEUE)
|
|
|
|
return -E2BIG;
|
|
|
|
*seen_mask |= BPF_WORKQUEUE;
|
|
|
|
type = BPF_WORKQUEUE;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
}
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
|
|
|
|
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
|
|
|
|
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
|
|
|
|
field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
|
2023-04-16 04:18:04 +08:00
|
|
|
field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
|
2022-11-04 03:09:56 +08:00
|
|
|
/* Only return BPF_KPTR when all other types with matchable names fail */
|
2024-05-24 01:41:58 +08:00
|
|
|
if (field_mask & BPF_KPTR && !__btf_type_is_struct(var_type)) {
|
2022-11-04 03:09:56 +08:00
|
|
|
type = BPF_KPTR_REF;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
end:
|
|
|
|
*sz = btf_field_type_size(type);
|
|
|
|
*align = btf_field_type_align(type);
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
#undef field_mask_test_name
|
|
|
|
|
2024-05-24 01:41:58 +08:00
|
|
|
/* Repeat a number of fields for a specified number of times.
|
2024-05-24 01:41:57 +08:00
|
|
|
*
|
2024-05-24 01:41:58 +08:00
|
|
|
* Copy the fields starting from the first field and repeat them for
|
|
|
|
* repeat_cnt times. The fields are repeated by adding the offset of each
|
|
|
|
* field with
|
2024-05-24 01:41:57 +08:00
|
|
|
* (i + 1) * elem_size
|
|
|
|
* where i is the repeat index and elem_size is the size of an element.
|
|
|
|
*/
|
2024-10-08 15:11:13 +08:00
|
|
|
static int btf_repeat_fields(struct btf_field_info *info, int info_cnt,
|
2024-05-24 01:41:58 +08:00
|
|
|
u32 field_cnt, u32 repeat_cnt, u32 elem_size)
|
2024-05-24 01:41:57 +08:00
|
|
|
{
|
2024-05-24 01:41:58 +08:00
|
|
|
u32 i, j;
|
2024-05-24 01:41:57 +08:00
|
|
|
u32 cur;
|
|
|
|
|
|
|
|
/* Ensure not repeating fields that should not be repeated. */
|
2024-05-24 01:41:58 +08:00
|
|
|
for (i = 0; i < field_cnt; i++) {
|
|
|
|
switch (info[i].type) {
|
|
|
|
case BPF_KPTR_UNREF:
|
|
|
|
case BPF_KPTR_REF:
|
|
|
|
case BPF_KPTR_PERCPU:
|
|
|
|
case BPF_LIST_HEAD:
|
|
|
|
case BPF_RB_ROOT:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2024-05-24 01:41:57 +08:00
|
|
|
}
|
|
|
|
|
2024-10-08 15:11:13 +08:00
|
|
|
/* The type of struct size or variable size is u32,
|
|
|
|
* so the multiplication will not overflow.
|
|
|
|
*/
|
|
|
|
if (field_cnt * (repeat_cnt + 1) > info_cnt)
|
|
|
|
return -E2BIG;
|
|
|
|
|
2024-05-24 01:41:58 +08:00
|
|
|
cur = field_cnt;
|
2024-05-24 01:41:57 +08:00
|
|
|
for (i = 0; i < repeat_cnt; i++) {
|
2024-05-24 01:41:58 +08:00
|
|
|
memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0]));
|
|
|
|
for (j = 0; j < field_cnt; j++)
|
|
|
|
info[cur++].off += (i + 1) * elem_size;
|
2024-05-24 01:41:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-05-24 01:41:58 +08:00
|
|
|
static int btf_find_struct_field(const struct btf *btf,
|
|
|
|
const struct btf_type *t, u32 field_mask,
|
2024-05-24 01:41:59 +08:00
|
|
|
struct btf_field_info *info, int info_cnt,
|
|
|
|
u32 level);
|
2024-05-24 01:41:58 +08:00
|
|
|
|
|
|
|
/* Find special fields in the struct type of a field.
|
|
|
|
*
|
|
|
|
* This function is used to find fields of special types that is not a
|
|
|
|
* global variable or a direct field of a struct type. It also handles the
|
|
|
|
* repetition if it is the element type of an array.
|
|
|
|
*/
|
|
|
|
static int btf_find_nested_struct(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 off, u32 nelems,
|
|
|
|
u32 field_mask, struct btf_field_info *info,
|
2024-05-24 01:41:59 +08:00
|
|
|
int info_cnt, u32 level)
|
2024-05-24 01:41:58 +08:00
|
|
|
{
|
|
|
|
int ret, err, i;
|
|
|
|
|
2024-05-24 01:41:59 +08:00
|
|
|
level++;
|
|
|
|
if (level >= MAX_RESOLVE_DEPTH)
|
|
|
|
return -E2BIG;
|
|
|
|
|
|
|
|
ret = btf_find_struct_field(btf, t, field_mask, info, info_cnt, level);
|
2024-05-24 01:41:58 +08:00
|
|
|
|
|
|
|
if (ret <= 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/* Shift the offsets of the nested struct fields to the offsets
|
|
|
|
* related to the container.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < ret; i++)
|
|
|
|
info[i].off += off;
|
|
|
|
|
|
|
|
if (nelems > 1) {
|
2024-10-08 15:11:13 +08:00
|
|
|
err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size);
|
2024-05-24 01:41:58 +08:00
|
|
|
if (err == 0)
|
|
|
|
ret *= nelems;
|
|
|
|
else
|
|
|
|
ret = err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-05-24 01:41:56 +08:00
|
|
|
static int btf_find_field_one(const struct btf *btf,
|
|
|
|
const struct btf_type *var,
|
|
|
|
const struct btf_type *var_type,
|
|
|
|
int var_idx,
|
|
|
|
u32 off, u32 expected_size,
|
|
|
|
u32 field_mask, u32 *seen_mask,
|
2024-05-24 01:41:59 +08:00
|
|
|
struct btf_field_info *info, int info_cnt,
|
|
|
|
u32 level)
|
2024-05-24 01:41:56 +08:00
|
|
|
{
|
|
|
|
int ret, align, sz, field_type;
|
|
|
|
struct btf_field_info tmp;
|
2024-05-24 01:41:57 +08:00
|
|
|
const struct btf_array *array;
|
|
|
|
u32 i, nelems = 1;
|
|
|
|
|
|
|
|
/* Walk into array types to find the element type and the number of
|
|
|
|
* elements in the (flattened) array.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < MAX_RESOLVE_DEPTH && btf_type_is_array(var_type); i++) {
|
|
|
|
array = btf_array(var_type);
|
|
|
|
nelems *= array->nelems;
|
|
|
|
var_type = btf_type_by_id(btf, array->type);
|
|
|
|
}
|
|
|
|
if (i == MAX_RESOLVE_DEPTH)
|
|
|
|
return -E2BIG;
|
|
|
|
if (nelems == 0)
|
|
|
|
return 0;
|
2024-05-24 01:41:56 +08:00
|
|
|
|
2024-05-24 01:41:58 +08:00
|
|
|
field_type = btf_get_field_type(btf, var_type,
|
2024-05-24 01:41:56 +08:00
|
|
|
field_mask, seen_mask, &align, &sz);
|
2024-05-24 01:41:58 +08:00
|
|
|
/* Look into variables of struct types */
|
|
|
|
if (!field_type && __btf_type_is_struct(var_type)) {
|
|
|
|
sz = var_type->size;
|
|
|
|
if (expected_size && expected_size != sz * nelems)
|
|
|
|
return 0;
|
|
|
|
ret = btf_find_nested_struct(btf, var_type, off, nelems, field_mask,
|
2024-05-24 01:41:59 +08:00
|
|
|
&info[0], info_cnt, level);
|
2024-05-24 01:41:58 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-05-24 01:41:56 +08:00
|
|
|
if (field_type == 0)
|
|
|
|
return 0;
|
|
|
|
if (field_type < 0)
|
|
|
|
return field_type;
|
|
|
|
|
2024-05-24 01:41:57 +08:00
|
|
|
if (expected_size && expected_size != sz * nelems)
|
2024-05-24 01:41:56 +08:00
|
|
|
return 0;
|
|
|
|
if (off % align)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
switch (field_type) {
|
|
|
|
case BPF_SPIN_LOCK:
|
|
|
|
case BPF_TIMER:
|
|
|
|
case BPF_WORKQUEUE:
|
|
|
|
case BPF_LIST_NODE:
|
|
|
|
case BPF_RB_NODE:
|
|
|
|
case BPF_REFCOUNT:
|
|
|
|
ret = btf_find_struct(btf, var_type, off, sz, field_type,
|
|
|
|
info_cnt ? &info[0] : &tmp);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case BPF_KPTR_UNREF:
|
|
|
|
case BPF_KPTR_REF:
|
|
|
|
case BPF_KPTR_PERCPU:
|
|
|
|
ret = btf_find_kptr(btf, var_type, off, sz,
|
|
|
|
info_cnt ? &info[0] : &tmp);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case BPF_LIST_HEAD:
|
|
|
|
case BPF_RB_ROOT:
|
|
|
|
ret = btf_find_graph_root(btf, var, var_type,
|
|
|
|
var_idx, off, sz,
|
|
|
|
info_cnt ? &info[0] : &tmp,
|
|
|
|
field_type);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret == BTF_FIELD_IGNORE)
|
|
|
|
return 0;
|
2024-10-08 15:11:13 +08:00
|
|
|
if (!info_cnt)
|
2024-05-24 01:41:56 +08:00
|
|
|
return -E2BIG;
|
2024-05-24 01:41:57 +08:00
|
|
|
if (nelems > 1) {
|
2024-10-08 15:11:13 +08:00
|
|
|
ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz);
|
2024-05-24 01:41:57 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return nelems;
|
2024-05-24 01:41:56 +08:00
|
|
|
}
|
|
|
|
|
2022-11-04 03:09:56 +08:00
|
|
|
static int btf_find_struct_field(const struct btf *btf,
|
|
|
|
const struct btf_type *t, u32 field_mask,
|
2024-05-24 01:41:59 +08:00
|
|
|
struct btf_field_info *info, int info_cnt,
|
|
|
|
u32 level)
|
2019-02-01 07:40:04 +08:00
|
|
|
{
|
2024-05-24 01:41:56 +08:00
|
|
|
int ret, idx = 0;
|
2019-02-01 07:40:04 +08:00
|
|
|
const struct btf_member *member;
|
2022-11-04 03:09:56 +08:00
|
|
|
u32 i, off, seen_mask = 0;
|
2019-02-01 07:40:04 +08:00
|
|
|
|
|
|
|
for_each_member(i, t, member) {
|
|
|
|
const struct btf_type *member_type = btf_type_by_id(btf,
|
|
|
|
member->type);
|
2022-04-16 00:03:42 +08:00
|
|
|
|
2021-12-02 02:10:25 +08:00
|
|
|
off = __btf_member_bit_offset(t, member);
|
2019-02-01 07:40:04 +08:00
|
|
|
if (off % 8)
|
|
|
|
/* valid C code cannot generate such BTF */
|
|
|
|
return -EINVAL;
|
|
|
|
off /= 8;
|
2022-04-16 00:03:42 +08:00
|
|
|
|
2024-05-24 01:41:56 +08:00
|
|
|
ret = btf_find_field_one(btf, t, member_type, i,
|
|
|
|
off, 0,
|
|
|
|
field_mask, &seen_mask,
|
2024-05-24 01:41:59 +08:00
|
|
|
&info[idx], info_cnt - idx, level);
|
2024-05-24 01:41:56 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
idx += ret;
|
2021-07-15 08:54:10 +08:00
|
|
|
}
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return idx;
|
2021-07-15 08:54:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
|
2022-11-04 03:09:56 +08:00
|
|
|
u32 field_mask, struct btf_field_info *info,
|
2024-05-24 01:41:59 +08:00
|
|
|
int info_cnt, u32 level)
|
2021-07-15 08:54:10 +08:00
|
|
|
{
|
2024-05-24 01:41:56 +08:00
|
|
|
int ret, idx = 0;
|
2021-07-15 08:54:10 +08:00
|
|
|
const struct btf_var_secinfo *vsi;
|
2022-11-04 03:09:56 +08:00
|
|
|
u32 i, off, seen_mask = 0;
|
2021-07-15 08:54:10 +08:00
|
|
|
|
|
|
|
for_each_vsi(i, t, vsi) {
|
|
|
|
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
|
|
|
|
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
|
|
|
|
|
2022-11-04 03:09:56 +08:00
|
|
|
off = vsi->offset;
|
2024-05-24 01:41:56 +08:00
|
|
|
ret = btf_find_field_one(btf, var, var_type, -1, off, vsi->size,
|
|
|
|
field_mask, &seen_mask,
|
2024-05-24 01:41:59 +08:00
|
|
|
&info[idx], info_cnt - idx,
|
|
|
|
level);
|
2024-05-24 01:41:56 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
idx += ret;
|
2019-02-01 07:40:04 +08:00
|
|
|
}
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return idx;
|
2019-02-01 07:40:04 +08:00
|
|
|
}
|
|
|
|
|
2021-07-15 08:54:10 +08:00
|
|
|
static int btf_find_field(const struct btf *btf, const struct btf_type *t,
|
2022-11-04 03:09:56 +08:00
|
|
|
u32 field_mask, struct btf_field_info *info,
|
|
|
|
int info_cnt)
|
2021-07-15 08:54:10 +08:00
|
|
|
{
|
|
|
|
if (__btf_type_is_struct(t))
|
2024-05-24 01:41:59 +08:00
|
|
|
return btf_find_struct_field(btf, t, field_mask, info, info_cnt, 0);
|
2021-07-15 08:54:10 +08:00
|
|
|
else if (btf_type_is_datasec(t))
|
2024-05-24 01:41:59 +08:00
|
|
|
return btf_find_datasec_var(btf, t, field_mask, info, info_cnt, 0);
|
2021-07-15 08:54:10 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2024-08-14 05:24:20 +08:00
|
|
|
/* Callers have to ensure the life cycle of btf if it is program BTF */
|
2022-11-04 03:09:56 +08:00
|
|
|
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
|
|
|
|
struct btf_field_info *info)
|
2021-07-15 08:54:10 +08:00
|
|
|
{
|
2022-11-04 03:09:56 +08:00
|
|
|
struct module *mod = NULL;
|
|
|
|
const struct btf_type *t;
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
/* If a matching btf type is found in kernel or module BTFs, kptr_ref
|
|
|
|
* is that BTF, otherwise it's program BTF
|
|
|
|
*/
|
|
|
|
struct btf *kptr_btf;
|
2022-04-16 00:03:42 +08:00
|
|
|
int ret;
|
2022-11-04 03:09:56 +08:00
|
|
|
s32 id;
|
2022-04-16 00:03:42 +08:00
|
|
|
|
2022-11-04 03:09:56 +08:00
|
|
|
/* Find type in map BTF, and use it to look up the matching type
|
|
|
|
* in vmlinux or module BTFs, by name and kind.
|
|
|
|
*/
|
|
|
|
t = btf_type_by_id(btf, info->kptr.type_id);
|
|
|
|
id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
&kptr_btf);
|
|
|
|
if (id == -ENOENT) {
|
|
|
|
/* btf_parse_kptr should only be called w/ btf = program BTF */
|
|
|
|
WARN_ON_ONCE(btf_is_kernel(btf));
|
|
|
|
|
|
|
|
/* Type exists only in program BTF. Assume that it's a MEM_ALLOC
|
|
|
|
* kptr allocated via bpf_obj_new
|
|
|
|
*/
|
2023-03-14 05:46:41 +08:00
|
|
|
field->kptr.dtor = NULL;
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
id = info->kptr.type_id;
|
|
|
|
kptr_btf = (struct btf *)btf;
|
|
|
|
goto found_dtor;
|
|
|
|
}
|
2022-11-04 03:09:56 +08:00
|
|
|
if (id < 0)
|
|
|
|
return id;
|
|
|
|
|
|
|
|
/* Find and stash the function pointer for the destruction function that
|
|
|
|
* needs to be eventually invoked from the map free path.
|
|
|
|
*/
|
|
|
|
if (info->type == BPF_KPTR_REF) {
|
|
|
|
const struct btf_type *dtor_func;
|
|
|
|
const char *dtor_func_name;
|
|
|
|
unsigned long addr;
|
|
|
|
s32 dtor_btf_id;
|
|
|
|
|
|
|
|
/* This call also serves as a whitelist of allowed objects that
|
|
|
|
* can be used as a referenced pointer and be stored in a map at
|
|
|
|
* the same time.
|
|
|
|
*/
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id);
|
2022-11-04 03:09:56 +08:00
|
|
|
if (dtor_btf_id < 0) {
|
|
|
|
ret = dtor_btf_id;
|
|
|
|
goto end_btf;
|
|
|
|
}
|
2021-07-15 08:54:10 +08:00
|
|
|
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id);
|
2022-11-04 03:09:56 +08:00
|
|
|
if (!dtor_func) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto end_btf;
|
|
|
|
}
|
2022-04-16 00:03:42 +08:00
|
|
|
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
if (btf_is_module(kptr_btf)) {
|
|
|
|
mod = btf_try_get_module(kptr_btf);
|
2022-11-04 03:09:56 +08:00
|
|
|
if (!mod) {
|
|
|
|
ret = -ENXIO;
|
|
|
|
goto end_btf;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We already verified dtor_func to be btf_type_is_func
|
|
|
|
* in register_btf_id_dtor_kfuncs.
|
|
|
|
*/
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off);
|
2022-11-04 03:09:56 +08:00
|
|
|
addr = kallsyms_lookup_name(dtor_func_name);
|
|
|
|
if (!addr) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto end_mod;
|
|
|
|
}
|
|
|
|
field->kptr.dtor = (void *)addr;
|
|
|
|
}
|
|
|
|
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
found_dtor:
|
2022-11-04 03:09:56 +08:00
|
|
|
field->kptr.btf_id = id;
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
field->kptr.btf = kptr_btf;
|
2022-11-04 03:09:56 +08:00
|
|
|
field->kptr.module = mod;
|
|
|
|
return 0;
|
|
|
|
end_mod:
|
|
|
|
module_put(mod);
|
|
|
|
end_btf:
|
bpf: Support __kptr to local kptrs
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:
struct node_data {
long key;
long data;
struct bpf_rb_node node;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 1);
} some_nodes SEC(".maps");
If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.
This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.
Implementation notes:
* "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
Before this patch, the variable would only ever point to vmlinux or
module BTFs, but now it can point to some program BTF for local kptr
type. It's later used to populate the (btf, btf_id) pair in kptr btf
field.
* It's necessary to btf_get the program BTF when populating btf_field
for local kptr. btf_record_free later does a btf_put.
* Behavior for non-local referenced kptrs is not modified, as
bpf_find_btf_id helper only searches vmlinux and module BTFs for
matching BTF type. If such a type is found, btf_field_kptr's btf will
pass btf_is_kernel check, and the associated release function is
some one-argument dtor. If btf_is_kernel check fails, associated
release function is two-arg bpf_obj_drop_impl. Before this patch
only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-11 07:07:41 +08:00
|
|
|
btf_put(kptr_btf);
|
2022-11-04 03:09:56 +08:00
|
|
|
return ret;
|
2021-07-15 08:54:10 +08:00
|
|
|
}
|
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
static int btf_parse_graph_root(const struct btf *btf,
|
|
|
|
struct btf_field *field,
|
|
|
|
struct btf_field_info *info,
|
|
|
|
const char *node_type_name,
|
|
|
|
size_t node_type_align)
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
{
|
|
|
|
const struct btf_type *t, *n = NULL;
|
|
|
|
const struct btf_member *member;
|
|
|
|
u32 offset;
|
|
|
|
int i;
|
|
|
|
|
2022-12-17 16:24:57 +08:00
|
|
|
t = btf_type_by_id(btf, info->graph_root.value_btf_id);
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
/* We've already checked that value_btf_id is a struct type. We
|
|
|
|
* just need to figure out the offset of the list_node, and
|
|
|
|
* verify its type.
|
|
|
|
*/
|
|
|
|
for_each_member(i, t, member) {
|
2022-12-17 16:24:57 +08:00
|
|
|
if (strcmp(info->graph_root.node_name,
|
|
|
|
__btf_name_by_offset(btf, member->name_off)))
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
continue;
|
|
|
|
/* Invalid BTF, two members with same name */
|
|
|
|
if (n)
|
|
|
|
return -EINVAL;
|
|
|
|
n = btf_type_by_id(btf, member->type);
|
|
|
|
if (!__btf_type_is_struct(n))
|
|
|
|
return -EINVAL;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off)))
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
return -EINVAL;
|
|
|
|
offset = __btf_member_bit_offset(n, member);
|
|
|
|
if (offset % 8)
|
|
|
|
return -EINVAL;
|
|
|
|
offset /= 8;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
if (offset % node_type_align)
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
2022-12-17 16:24:57 +08:00
|
|
|
field->graph_root.btf = (struct btf *)btf;
|
|
|
|
field->graph_root.value_btf_id = info->graph_root.value_btf_id;
|
|
|
|
field->graph_root.node_offset = offset;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
}
|
|
|
|
if (!n)
|
|
|
|
return -ENOENT;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
|
|
|
|
struct btf_field_info *info)
|
|
|
|
{
|
|
|
|
return btf_parse_graph_root(btf, field, info, "bpf_list_node",
|
|
|
|
__alignof__(struct bpf_list_node));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
|
|
|
|
struct btf_field_info *info)
|
|
|
|
{
|
|
|
|
return btf_parse_graph_root(btf, field, info, "bpf_rb_node",
|
|
|
|
__alignof__(struct bpf_rb_node));
|
|
|
|
}
|
|
|
|
|
bpf: Remove btf_field_offs, use btf_record's fields instead
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.
This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.
Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:
if (btf_record_successfully_initialized) {
foffs = btf_parse_field_offs(rec);
if (IS_ERR_OR_NULL(foffs))
// free the btf_record and return err
}
Other changes in this patch are pretty mechanical:
* foffs->field_off[i] -> rec->fields[i].offset
* foffs->field_sz[i] -> rec->fields[i].size
* Sort rec->fields in btf_parse_fields before returning
* It's possible that this is necessary independently of other
changes in this patch. btf_record_find in syscall.c expects
btf_record's fields to be sorted by offset, yet there's no
explicit sorting of them before this patch, record's fields are
populated in the order they're read from BTF struct definition.
BTF docs don't say anything about the sortedness of struct fields.
* All functions taking struct btf_field_offs * input now instead take
struct btf_record *. All callsites of these functions already have
access to the correct btf_record.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:03 +08:00
|
|
|
static int btf_field_cmp(const void *_a, const void *_b, const void *priv)
|
|
|
|
{
|
|
|
|
const struct btf_field *a = (const struct btf_field *)_a;
|
|
|
|
const struct btf_field *b = (const struct btf_field *)_b;
|
|
|
|
|
|
|
|
if (a->offset < b->offset)
|
|
|
|
return -1;
|
|
|
|
else if (a->offset > b->offset)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-11-04 03:09:56 +08:00
|
|
|
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 field_mask, u32 value_size)
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
{
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
struct btf_field_info info_arr[BTF_FIELDS_MAX];
|
bpf: Remove btf_field_offs, use btf_record's fields instead
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.
This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.
Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:
if (btf_record_successfully_initialized) {
foffs = btf_parse_field_offs(rec);
if (IS_ERR_OR_NULL(foffs))
// free the btf_record and return err
}
Other changes in this patch are pretty mechanical:
* foffs->field_off[i] -> rec->fields[i].offset
* foffs->field_sz[i] -> rec->fields[i].size
* Sort rec->fields in btf_parse_fields before returning
* It's possible that this is necessary independently of other
changes in this patch. btf_record_find in syscall.c expects
btf_record's fields to be sorted by offset, yet there's no
explicit sorting of them before this patch, record's fields are
populated in the order they're read from BTF struct definition.
BTF docs don't say anything about the sortedness of struct fields.
* All functions taking struct btf_field_offs * input now instead take
struct btf_record *. All callsites of these functions already have
access to the correct btf_record.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:03 +08:00
|
|
|
u32 next_off = 0, field_type_size;
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
struct btf_record *rec;
|
|
|
|
int ret, i, cnt;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
|
2022-11-04 03:09:56 +08:00
|
|
|
ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr));
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
if (!ret)
|
|
|
|
return NULL;
|
|
|
|
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
cnt = ret;
|
bpf: Add comments for map BTF matching requirement for bpf_list_head
The old behavior of bpf_map_meta_equal was that it compared timer_off
to be equal (but not spin_lock_off, because that was not allowed), and
did memcmp of kptr_off_tab.
Now, we memcmp the btf_record of two bpf_map structs, which has all
fields.
We preserve backwards compat as we kzalloc the array, so if only spin
lock and timer exist in map, we only compare offset while the rest of
unused members in the btf_field struct are zeroed out.
In case of kptr, btf and everything else is of vmlinux or module, so as
long type is same it will match, since kernel btf, module, dtor pointer
will be same across maps.
Now with list_head in the mix, things are a bit complicated. We
implicitly add a requirement that both BTFs are same, because struct
btf_field_list_head has btf and value_rec members.
We obviously shouldn't force BTFs to be equal by default, as that breaks
backwards compatibility.
Currently it is only implicitly required due to list_head matching
struct btf and value_rec member. value_rec points back into a btf_record
stashed in the map BTF (btf member of btf_field_list_head). So that
pointer and btf member has to match exactly.
Document all these subtle details so that things don't break in the
future when touching this code.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-19-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:56:08 +08:00
|
|
|
/* This needs to be kzalloc to zero out padding and unused fields, see
|
|
|
|
* comment in btf_record_equal.
|
|
|
|
*/
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!rec)
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2022-11-04 03:09:56 +08:00
|
|
|
rec->spin_lock_off = -EINVAL;
|
|
|
|
rec->timer_off = -EINVAL;
|
2024-04-20 17:09:05 +08:00
|
|
|
rec->wq_off = -EINVAL;
|
2023-04-16 04:18:04 +08:00
|
|
|
rec->refcount_off = -EINVAL;
|
2022-11-04 03:09:56 +08:00
|
|
|
for (i = 0; i < cnt; i++) {
|
bpf: Remove btf_field_offs, use btf_record's fields instead
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.
This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.
Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:
if (btf_record_successfully_initialized) {
foffs = btf_parse_field_offs(rec);
if (IS_ERR_OR_NULL(foffs))
// free the btf_record and return err
}
Other changes in this patch are pretty mechanical:
* foffs->field_off[i] -> rec->fields[i].offset
* foffs->field_sz[i] -> rec->fields[i].size
* Sort rec->fields in btf_parse_fields before returning
* It's possible that this is necessary independently of other
changes in this patch. btf_record_find in syscall.c expects
btf_record's fields to be sorted by offset, yet there's no
explicit sorting of them before this patch, record's fields are
populated in the order they're read from BTF struct definition.
BTF docs don't say anything about the sortedness of struct fields.
* All functions taking struct btf_field_offs * input now instead take
struct btf_record *. All callsites of these functions already have
access to the correct btf_record.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:03 +08:00
|
|
|
field_type_size = btf_field_type_size(info_arr[i].type);
|
|
|
|
if (info_arr[i].off + field_type_size > value_size) {
|
2022-11-04 03:09:56 +08:00
|
|
|
WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size);
|
|
|
|
ret = -EFAULT;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
goto end;
|
|
|
|
}
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
if (info_arr[i].off < next_off) {
|
|
|
|
ret = -EEXIST;
|
|
|
|
goto end;
|
|
|
|
}
|
bpf: Remove btf_field_offs, use btf_record's fields instead
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.
This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.
Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:
if (btf_record_successfully_initialized) {
foffs = btf_parse_field_offs(rec);
if (IS_ERR_OR_NULL(foffs))
// free the btf_record and return err
}
Other changes in this patch are pretty mechanical:
* foffs->field_off[i] -> rec->fields[i].offset
* foffs->field_sz[i] -> rec->fields[i].size
* Sort rec->fields in btf_parse_fields before returning
* It's possible that this is necessary independently of other
changes in this patch. btf_record_find in syscall.c expects
btf_record's fields to be sorted by offset, yet there's no
explicit sorting of them before this patch, record's fields are
populated in the order they're read from BTF struct definition.
BTF docs don't say anything about the sortedness of struct fields.
* All functions taking struct btf_field_offs * input now instead take
struct btf_record *. All callsites of these functions already have
access to the correct btf_record.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:03 +08:00
|
|
|
next_off = info_arr[i].off + field_type_size;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
rec->field_mask |= info_arr[i].type;
|
|
|
|
rec->fields[i].offset = info_arr[i].off;
|
|
|
|
rec->fields[i].type = info_arr[i].type;
|
bpf: Remove btf_field_offs, use btf_record's fields instead
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.
This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.
Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:
if (btf_record_successfully_initialized) {
foffs = btf_parse_field_offs(rec);
if (IS_ERR_OR_NULL(foffs))
// free the btf_record and return err
}
Other changes in this patch are pretty mechanical:
* foffs->field_off[i] -> rec->fields[i].offset
* foffs->field_sz[i] -> rec->fields[i].size
* Sort rec->fields in btf_parse_fields before returning
* It's possible that this is necessary independently of other
changes in this patch. btf_record_find in syscall.c expects
btf_record's fields to be sorted by offset, yet there's no
explicit sorting of them before this patch, record's fields are
populated in the order they're read from BTF struct definition.
BTF docs don't say anything about the sortedness of struct fields.
* All functions taking struct btf_field_offs * input now instead take
struct btf_record *. All callsites of these functions already have
access to the correct btf_record.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:03 +08:00
|
|
|
rec->fields[i].size = field_type_size;
|
2022-11-04 03:09:56 +08:00
|
|
|
|
|
|
|
switch (info_arr[i].type) {
|
|
|
|
case BPF_SPIN_LOCK:
|
|
|
|
WARN_ON_ONCE(rec->spin_lock_off >= 0);
|
|
|
|
/* Cache offset for faster lookup at runtime */
|
|
|
|
rec->spin_lock_off = rec->fields[i].offset;
|
|
|
|
break;
|
|
|
|
case BPF_TIMER:
|
|
|
|
WARN_ON_ONCE(rec->timer_off >= 0);
|
|
|
|
/* Cache offset for faster lookup at runtime */
|
|
|
|
rec->timer_off = rec->fields[i].offset;
|
|
|
|
break;
|
2024-04-20 17:09:05 +08:00
|
|
|
case BPF_WORKQUEUE:
|
|
|
|
WARN_ON_ONCE(rec->wq_off >= 0);
|
|
|
|
/* Cache offset for faster lookup at runtime */
|
|
|
|
rec->wq_off = rec->fields[i].offset;
|
|
|
|
break;
|
2023-04-16 04:18:04 +08:00
|
|
|
case BPF_REFCOUNT:
|
|
|
|
WARN_ON_ONCE(rec->refcount_off >= 0);
|
|
|
|
/* Cache offset for faster lookup at runtime */
|
|
|
|
rec->refcount_off = rec->fields[i].offset;
|
|
|
|
break;
|
2022-11-04 03:09:56 +08:00
|
|
|
case BPF_KPTR_UNREF:
|
|
|
|
case BPF_KPTR_REF:
|
2023-08-27 23:27:39 +08:00
|
|
|
case BPF_KPTR_PERCPU:
|
2022-11-04 03:09:56 +08:00
|
|
|
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
|
|
|
|
if (ret < 0)
|
|
|
|
goto end;
|
|
|
|
break;
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
case BPF_LIST_HEAD:
|
|
|
|
ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]);
|
|
|
|
if (ret < 0)
|
|
|
|
goto end;
|
|
|
|
break;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
case BPF_RB_ROOT:
|
|
|
|
ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]);
|
|
|
|
if (ret < 0)
|
|
|
|
goto end;
|
|
|
|
break;
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
case BPF_LIST_NODE:
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
case BPF_RB_NODE:
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
break;
|
2022-11-04 03:09:56 +08:00
|
|
|
default:
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto end;
|
|
|
|
}
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
rec->cnt++;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
}
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
/* bpf_{list_head, rb_node} require bpf_spin_lock */
|
|
|
|
if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
|
|
|
|
btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
|
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 03:15:25 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
bpf: Migrate bpf_rbtree_remove to possibly fail
This patch modifies bpf_rbtree_remove to account for possible failure
due to the input rb_node already not being in any collection.
The function can now return NULL, and does when the aforementioned
scenario occurs. As before, on successful removal an owning reference to
the removed node is returned.
Adding KF_RET_NULL to bpf_rbtree_remove's kfunc flags - now KF_RET_NULL |
KF_ACQUIRE - provides the desired verifier semantics:
* retval must be checked for NULL before use
* if NULL, retval's ref_obj_id is released
* retval is a "maybe acquired" owning ref, not a non-owning ref,
so it will live past end of critical section (bpf_spin_unlock), and
thus can be checked for NULL after the end of the CS
BPF programs must add checks
============================
This does change bpf_rbtree_remove's verifier behavior. BPF program
writers will need to add NULL checks to their programs, but the
resulting UX looks natural:
bpf_spin_lock(&glock);
n = bpf_rbtree_first(&ghead);
if (!n) { /* ... */}
res = bpf_rbtree_remove(&ghead, &n->node);
bpf_spin_unlock(&glock);
if (!res) /* Newly-added check after this patch */
return 1;
n = container_of(res, /* ... */);
/* Do something else with n */
bpf_obj_drop(n);
return 0;
The "if (!res)" check above is the only addition necessary for the above
program to pass verification after this patch.
bpf_rbtree_remove no longer clobbers non-owning refs
====================================================
An issue arises when bpf_rbtree_remove fails, though. Consider this
example:
struct node_data {
long key;
struct bpf_list_node l;
struct bpf_rb_node r;
struct bpf_refcount ref;
};
long failed_sum;
void bpf_prog()
{
struct node_data *n = bpf_obj_new(/* ... */);
struct bpf_rb_node *res;
n->key = 10;
bpf_spin_lock(&glock);
bpf_list_push_back(&some_list, &n->l); /* n is now a non-owning ref */
res = bpf_rbtree_remove(&some_tree, &n->r, /* ... */);
if (!res)
failed_sum += n->key; /* not possible */
bpf_spin_unlock(&glock);
/* if (res) { do something useful and drop } ... */
}
The bpf_rbtree_remove in this example will always fail. Similarly to
bpf_spin_unlock, bpf_rbtree_remove is a non-owning reference
invalidation point. The verifier clobbers all non-owning refs after a
bpf_rbtree_remove call, so the "failed_sum += n->key" line will fail
verification, and in fact there's no good way to get information about
the node which failed to add after the invalidation. This patch removes
non-owning reference invalidation from bpf_rbtree_remove to allow the
above usecase to pass verification. The logic for why this is now
possible is as follows:
Before this series, bpf_rbtree_add couldn't fail and thus assumed that
its input, a non-owning reference, was in the tree. But it's easy to
construct an example where two non-owning references pointing to the same
underlying memory are acquired and passed to rbtree_remove one after
another (see rbtree_api_release_aliasing in
selftests/bpf/progs/rbtree_fail.c).
So it was necessary to clobber non-owning refs to prevent this
case and, more generally, to enforce "non-owning ref is definitely
in some collection" invariant. This series removes that invariant and
the failure / runtime checking added in this patch provide a clean way
to deal with the aliasing issue - just fail to remove.
Because the aliasing issue prevented by clobbering non-owning refs is no
longer an issue, this patch removes the invalidate_non_owning_refs
call from verifier handling of bpf_rbtree_remove. Note that
bpf_spin_unlock - the other caller of invalidate_non_owning_refs -
clobbers non-owning refs for a different reason, so its clobbering
behavior remains unchanged.
No BPF program changes are necessary for programs to remain valid as a
result of this clobbering change. A valid program before this patch
passed verification with its non-owning refs having shorter (or equal)
lifetimes due to more aggressive clobbering.
Also, update existing tests to check bpf_rbtree_remove retval for NULL
where necessary, and move rbtree_api_release_aliasing from
progs/rbtree_fail.c to progs/rbtree.c since it's now expected to pass
verification.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-8-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:09 +08:00
|
|
|
if (rec->refcount_off < 0 &&
|
|
|
|
btf_record_has_field(rec, BPF_LIST_NODE) &&
|
bpf: Special verifier handling for bpf_rbtree_{remove, first}
Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
that require handling in the verifier:
* both bpf_rbtree_remove and bpf_rbtree_first return the type containing
the bpf_rb_node field, with the offset set to that field's offset,
instead of a struct bpf_rb_node *
* mark_reg_graph_node helper added in previous patch generalizes
this logic, use it
* bpf_rbtree_remove's node input is a node that's been inserted
in the tree - a non-owning reference.
* bpf_rbtree_remove must invalidate non-owning references in order to
avoid aliasing issue. Use previously-added
invalidate_non_owning_refs helper to mark this function as a
non-owning ref invalidation point.
* Unlike other functions, which convert one of their input arg regs to
non-owning reference, bpf_rbtree_first takes no arguments and just
returns a non-owning reference (possibly null)
* For now verifier logic for this is special-cased instead of
adding new kfunc flag.
This patch, along with the previous one, complete special verifier
handling for all rbtree API functions added in this series.
With functional verifier handling of rbtree_remove, under current
non-owning reference scheme, a node type with both bpf_{list,rb}_node
fields could cause the verifier to accept programs which remove such
nodes from collections they haven't been added to.
In order to prevent this, this patch adds a check to btf_parse_fields
which rejects structs with both bpf_{list,rb}_node fields. This is a
temporary measure that can be removed after "collection identity"
followup. See comment added in btf_parse_fields. A linked_list BTF test
exercising the new check is added in this patch as well.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-6-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:14 +08:00
|
|
|
btf_record_has_field(rec, BPF_RB_NODE)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
bpf: Remove btf_field_offs, use btf_record's fields instead
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.
This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.
Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:
if (btf_record_successfully_initialized) {
foffs = btf_parse_field_offs(rec);
if (IS_ERR_OR_NULL(foffs))
// free the btf_record and return err
}
Other changes in this patch are pretty mechanical:
* foffs->field_off[i] -> rec->fields[i].offset
* foffs->field_sz[i] -> rec->fields[i].size
* Sort rec->fields in btf_parse_fields before returning
* It's possible that this is necessary independently of other
changes in this patch. btf_record_find in syscall.c expects
btf_record's fields to be sorted by offset, yet there's no
explicit sorting of them before this patch, record's fields are
populated in the order they're read from BTF struct definition.
BTF docs don't say anything about the sortedness of struct fields.
* All functions taking struct btf_field_offs * input now instead take
struct btf_record *. All callsites of these functions already have
access to the correct btf_record.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-04-16 04:18:03 +08:00
|
|
|
sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp,
|
|
|
|
NULL, rec);
|
|
|
|
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
return rec;
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
end:
|
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 03:09:55 +08:00
|
|
|
btf_record_free(rec);
|
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 05:48:49 +08:00
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
2022-11-18 09:55:57 +08:00
|
|
|
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
/* There are three types that signify ownership of some other type:
|
|
|
|
* kptr_ref, bpf_list_head, bpf_rb_root.
|
|
|
|
* kptr_ref only supports storing kernel types, which can't store
|
|
|
|
* references to program allocated local types.
|
|
|
|
*
|
|
|
|
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
|
|
|
|
* does not form cycles.
|
2022-11-18 09:55:57 +08:00
|
|
|
*/
|
2023-11-07 16:56:37 +08:00
|
|
|
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
|
2022-11-18 09:55:57 +08:00
|
|
|
return 0;
|
|
|
|
for (i = 0; i < rec->cnt; i++) {
|
|
|
|
struct btf_struct_meta *meta;
|
|
|
|
u32 btf_id;
|
|
|
|
|
2023-11-07 16:56:37 +08:00
|
|
|
if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
|
2022-11-18 09:55:57 +08:00
|
|
|
continue;
|
2022-12-17 16:24:57 +08:00
|
|
|
btf_id = rec->fields[i].graph_root.value_btf_id;
|
2022-11-18 09:55:57 +08:00
|
|
|
meta = btf_find_struct_meta(btf, btf_id);
|
|
|
|
if (!meta)
|
|
|
|
return -EFAULT;
|
2022-12-17 16:24:57 +08:00
|
|
|
rec->fields[i].graph_root.value_rec = meta->record;
|
2022-11-18 09:55:57 +08:00
|
|
|
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
/* We need to set value_rec for all root types, but no need
|
|
|
|
* to check ownership cycle for a type unless it's also a
|
|
|
|
* node type.
|
|
|
|
*/
|
2023-11-07 16:56:37 +08:00
|
|
|
if (!(rec->field_mask & BPF_GRAPH_NODE))
|
2022-11-18 09:55:57 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* We need to ensure ownership acyclicity among all types. The
|
|
|
|
* proper way to do it would be to topologically sort all BTF
|
|
|
|
* IDs based on the ownership edges, since there can be multiple
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* bpf_{list_head,rb_node} in a type. Instead, we use the
|
|
|
|
* following resaoning:
|
2022-11-18 09:55:57 +08:00
|
|
|
*
|
|
|
|
* - A type can only be owned by another type in user BTF if it
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* has a bpf_{list,rb}_node. Let's call these node types.
|
2022-11-18 09:55:57 +08:00
|
|
|
* - A type can only _own_ another type in user BTF if it has a
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* bpf_{list_head,rb_root}. Let's call these root types.
|
2022-11-18 09:55:57 +08:00
|
|
|
*
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* We ensure that if a type is both a root and node, its
|
|
|
|
* element types cannot be root types.
|
2022-11-18 09:55:57 +08:00
|
|
|
*
|
|
|
|
* To ensure acyclicity:
|
|
|
|
*
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* When A is an root type but not a node, its ownership
|
|
|
|
* chain can be:
|
2022-11-18 09:55:57 +08:00
|
|
|
* A -> B -> C
|
|
|
|
* Where:
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* - A is an root, e.g. has bpf_rb_root.
|
|
|
|
* - B is both a root and node, e.g. has bpf_rb_node and
|
|
|
|
* bpf_list_head.
|
|
|
|
* - C is only an root, e.g. has bpf_list_node
|
2022-11-18 09:55:57 +08:00
|
|
|
*
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* When A is both a root and node, some other type already
|
|
|
|
* owns it in the BTF domain, hence it can not own
|
|
|
|
* another root type through any of the ownership edges.
|
2022-11-18 09:55:57 +08:00
|
|
|
* A -> B
|
|
|
|
* Where:
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
* - A is both an root and node.
|
|
|
|
* - B is only an node.
|
2022-11-18 09:55:57 +08:00
|
|
|
*/
|
2023-11-07 16:56:37 +08:00
|
|
|
if (meta->record->field_mask & BPF_GRAPH_ROOT)
|
2022-11-18 09:55:57 +08:00
|
|
|
return -ELOOP;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
|
|
|
const struct btf_member *member;
|
2020-09-28 19:31:04 +08:00
|
|
|
void *safe_data;
|
2018-04-19 06:56:00 +08:00
|
|
|
u32 i;
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
safe_data = btf_show_start_struct_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
|
|
|
|
2018-04-19 06:56:00 +08:00
|
|
|
for_each_member(i, t, member) {
|
|
|
|
const struct btf_type *member_type = btf_type_by_id(btf,
|
|
|
|
member->type);
|
|
|
|
const struct btf_kind_operations *ops;
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
u32 member_offset, bitfield_size;
|
|
|
|
u32 bytes_offset;
|
|
|
|
u8 bits8_offset;
|
2018-04-19 06:56:00 +08:00
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_start_member(show, member);
|
2018-04-19 06:56:00 +08:00
|
|
|
|
2021-12-02 02:10:25 +08:00
|
|
|
member_offset = __btf_member_bit_offset(t, member);
|
|
|
|
bitfield_size = __btf_member_bitfield_size(t, member);
|
2019-01-11 03:14:00 +08:00
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
|
|
|
|
bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (bitfield_size) {
|
2020-09-28 19:31:04 +08:00
|
|
|
safe_data = btf_show_start_type(show, member_type,
|
|
|
|
member->type,
|
|
|
|
data + bytes_offset);
|
|
|
|
if (safe_data)
|
|
|
|
btf_bitfield_show(safe_data,
|
|
|
|
bits8_offset,
|
|
|
|
bitfield_size, show);
|
|
|
|
btf_show_end_type(show);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
} else {
|
|
|
|
ops = btf_type_ops(member_type);
|
2020-09-28 19:31:04 +08:00
|
|
|
ops->show(btf, member_type, member->type,
|
|
|
|
data + bytes_offset, bits8_offset, show);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
}
|
2020-09-28 19:31:04 +08:00
|
|
|
|
|
|
|
btf_show_end_member(show);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
2020-09-28 19:31:04 +08:00
|
|
|
|
|
|
|
btf_show_end_struct_type(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_struct_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
|
|
|
{
|
|
|
|
const struct btf_member *m = show->state.member;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First check if any members would be shown (are non-zero).
|
|
|
|
* See comments above "struct btf_show" definition for more
|
|
|
|
* details on how this works at a high-level.
|
|
|
|
*/
|
|
|
|
if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) {
|
|
|
|
if (!show->state.depth_check) {
|
|
|
|
show->state.depth_check = show->state.depth + 1;
|
|
|
|
show->state.depth_to_show = 0;
|
|
|
|
}
|
|
|
|
__btf_struct_show(btf, t, type_id, data, bits_offset, show);
|
|
|
|
/* Restore saved member data here */
|
|
|
|
show->state.member = m;
|
|
|
|
if (show->state.depth_check != show->state.depth + 1)
|
|
|
|
return;
|
|
|
|
show->state.depth_check = 0;
|
|
|
|
|
|
|
|
if (show->state.depth_to_show <= show->state.depth)
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Reaching here indicates we have recursed and found
|
|
|
|
* non-zero child values.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_struct_show(btf, t, type_id, data, bits_offset, show);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static struct btf_kind_operations struct_ops = {
|
|
|
|
.check_meta = btf_struct_check_meta,
|
2018-04-19 06:55:58 +08:00
|
|
|
.resolve = btf_struct_resolve,
|
2018-04-19 06:55:59 +08:00
|
|
|
.check_member = btf_struct_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
2018-04-19 06:55:57 +08:00
|
|
|
.log_details = btf_struct_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_struct_show,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
2018-04-19 06:55:59 +08:00
|
|
|
static int btf_enum_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size, bytes_offset;
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
2020-03-10 15:32:29 +08:00
|
|
|
if (struct_size - bytes_offset < member_type->size) {
|
2018-04-19 06:55:59 +08:00
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
static int btf_enum_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off, nr_bits, bytes_end, struct_size;
|
|
|
|
u32 int_bitsize = sizeof(int) * BITS_PER_BYTE;
|
|
|
|
|
|
|
|
struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset);
|
|
|
|
nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset);
|
|
|
|
if (!nr_bits) {
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
2019-09-25 17:38:35 +08:00
|
|
|
return -EINVAL;
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
nr_bits = int_bitsize;
|
|
|
|
} else if (nr_bits > int_bitsize) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member bitfield_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_end = BITS_ROUNDUP_BYTES(struct_bits_off + nr_bits);
|
|
|
|
if (struct_size < bytes_end) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static s32 btf_enum_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_enum *enums = btf_type_enum(t);
|
|
|
|
struct btf *btf = env->btf;
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
const char *fmt_str;
|
2018-04-19 06:55:57 +08:00
|
|
|
u16 i, nr_enums;
|
|
|
|
u32 meta_needed;
|
|
|
|
|
|
|
|
nr_enums = btf_type_vlen(t);
|
|
|
|
meta_needed = nr_enums * sizeof(*enums);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-09-18 01:45:37 +08:00
|
|
|
if (t->size > 8 || !is_power_of_2(t->size)) {
|
|
|
|
btf_verifier_log_type(env, t, "Unexpected size");
|
2018-04-19 06:55:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 05:23:28 +08:00
|
|
|
/* enum type either no name or a valid one */
|
|
|
|
if (t->name_off &&
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
for (i = 0; i < nr_enums; i++) {
|
2018-04-22 00:48:23 +08:00
|
|
|
if (!btf_name_offset_valid(btf, enums[i].name_off)) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log(env, "\tInvalid name_offset:%u",
|
2018-04-22 00:48:23 +08:00
|
|
|
enums[i].name_off);
|
2018-04-19 06:55:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 05:23:28 +08:00
|
|
|
/* enum member must have a valid name */
|
|
|
|
if (!enums[i].name_off ||
|
|
|
|
!btf_name_valid_identifier(btf, enums[i].name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-10-16 11:24:57 +08:00
|
|
|
if (env->log.level == BPF_LOG_KERNEL)
|
|
|
|
continue;
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n";
|
|
|
|
btf_verifier_log(env, fmt_str,
|
2018-12-14 02:41:46 +08:00
|
|
|
__btf_name_by_offset(btf, enums[i].name_off),
|
2018-04-19 06:55:57 +08:00
|
|
|
enums[i].val);
|
|
|
|
}
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_enum_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 06:56:00 +08:00
|
|
|
{
|
|
|
|
const struct btf_enum *enums = btf_type_enum(t);
|
|
|
|
u32 i, nr_enums = btf_type_vlen(t);
|
2020-09-28 19:31:04 +08:00
|
|
|
void *safe_data;
|
|
|
|
int v;
|
|
|
|
|
|
|
|
safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
|
|
|
|
|
|
|
v = *(int *)safe_data;
|
2018-04-19 06:56:00 +08:00
|
|
|
|
|
|
|
for (i = 0; i < nr_enums; i++) {
|
2020-09-28 19:31:04 +08:00
|
|
|
if (v != enums[i].val)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
btf_show_type_value(show, "%s",
|
|
|
|
__btf_name_by_offset(btf,
|
|
|
|
enums[i].name_off));
|
|
|
|
|
|
|
|
btf_show_end_type(show);
|
|
|
|
return;
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
if (btf_type_kflag(t))
|
|
|
|
btf_show_type_value(show, "%d", v);
|
|
|
|
else
|
|
|
|
btf_show_type_value(show, "%u", v);
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_end_type(show);
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static struct btf_kind_operations enum_ops = {
|
|
|
|
.check_meta = btf_enum_check_meta,
|
2018-04-19 06:55:58 +08:00
|
|
|
.resolve = btf_df_resolve,
|
2018-04-19 06:55:59 +08:00
|
|
|
.check_member = btf_enum_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_enum_check_kflag_member,
|
2018-04-19 06:55:57 +08:00
|
|
|
.log_details = btf_enum_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_enum_show,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
static s32 btf_enum64_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_enum64 *enums = btf_type_enum64(t);
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
const char *fmt_str;
|
|
|
|
u16 i, nr_enums;
|
|
|
|
u32 meta_needed;
|
|
|
|
|
|
|
|
nr_enums = btf_type_vlen(t);
|
|
|
|
meta_needed = nr_enums * sizeof(*enums);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->size > 8 || !is_power_of_2(t->size)) {
|
|
|
|
btf_verifier_log_type(env, t, "Unexpected size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* enum type either no name or a valid one */
|
|
|
|
if (t->name_off &&
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
for (i = 0; i < nr_enums; i++) {
|
|
|
|
if (!btf_name_offset_valid(btf, enums[i].name_off)) {
|
|
|
|
btf_verifier_log(env, "\tInvalid name_offset:%u",
|
|
|
|
enums[i].name_off);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* enum member must have a valid name */
|
|
|
|
if (!enums[i].name_off ||
|
|
|
|
!btf_name_valid_identifier(btf, enums[i].name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (env->log.level == BPF_LOG_KERNEL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n";
|
|
|
|
btf_verifier_log(env, fmt_str,
|
|
|
|
__btf_name_by_offset(btf, enums[i].name_off),
|
|
|
|
btf_enum64_value(enums + i));
|
|
|
|
}
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_enum64_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
|
|
|
{
|
|
|
|
const struct btf_enum64 *enums = btf_type_enum64(t);
|
|
|
|
u32 i, nr_enums = btf_type_vlen(t);
|
|
|
|
void *safe_data;
|
|
|
|
s64 v;
|
|
|
|
|
|
|
|
safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
|
|
|
|
|
|
|
v = *(u64 *)safe_data;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_enums; i++) {
|
|
|
|
if (v != btf_enum64_value(enums + i))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
btf_show_type_value(show, "%s",
|
|
|
|
__btf_name_by_offset(btf,
|
|
|
|
enums[i].name_off));
|
|
|
|
|
|
|
|
btf_show_end_type(show);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t))
|
|
|
|
btf_show_type_value(show, "%lld", v);
|
|
|
|
else
|
|
|
|
btf_show_type_value(show, "%llu", v);
|
|
|
|
btf_show_end_type(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct btf_kind_operations enum64_ops = {
|
|
|
|
.check_meta = btf_enum64_check_meta,
|
|
|
|
.resolve = btf_df_resolve,
|
|
|
|
.check_member = btf_enum_check_member,
|
|
|
|
.check_kflag_member = btf_enum_check_kflag_member,
|
|
|
|
.log_details = btf_enum_log,
|
|
|
|
.show = btf_enum64_show,
|
|
|
|
};
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
u32 meta_needed = btf_type_vlen(t) * sizeof(struct btf_param);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_func_proto_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_param *args = (const struct btf_param *)(t + 1);
|
|
|
|
u16 nr_args = btf_type_vlen(t), i;
|
|
|
|
|
|
|
|
btf_verifier_log(env, "return=%u args=(", t->type);
|
|
|
|
if (!nr_args) {
|
|
|
|
btf_verifier_log(env, "void");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nr_args == 1 && !args[0].type) {
|
|
|
|
/* Only one vararg */
|
|
|
|
btf_verifier_log(env, "vararg");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log(env, "%u %s", args[0].type,
|
2018-12-14 02:41:46 +08:00
|
|
|
__btf_name_by_offset(env->btf,
|
|
|
|
args[0].name_off));
|
2018-11-20 07:29:08 +08:00
|
|
|
for (i = 1; i < nr_args - 1; i++)
|
|
|
|
btf_verifier_log(env, ", %u %s", args[i].type,
|
2018-12-14 02:41:46 +08:00
|
|
|
__btf_name_by_offset(env->btf,
|
|
|
|
args[i].name_off));
|
2018-11-20 07:29:08 +08:00
|
|
|
|
|
|
|
if (nr_args > 1) {
|
|
|
|
const struct btf_param *last_arg = &args[nr_args - 1];
|
|
|
|
|
|
|
|
if (last_arg->type)
|
|
|
|
btf_verifier_log(env, ", %u %s", last_arg->type,
|
2018-12-14 02:41:46 +08:00
|
|
|
__btf_name_by_offset(env->btf,
|
|
|
|
last_arg->name_off));
|
2018-11-20 07:29:08 +08:00
|
|
|
else
|
|
|
|
btf_verifier_log(env, ", vararg");
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
btf_verifier_log(env, ")");
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct btf_kind_operations func_proto_ops = {
|
|
|
|
.check_meta = btf_func_proto_check_meta,
|
|
|
|
.resolve = btf_df_resolve,
|
|
|
|
/*
|
|
|
|
* BTF_KIND_FUNC_PROTO cannot be directly referred by
|
|
|
|
* a struct's member.
|
|
|
|
*
|
2021-05-25 10:56:59 +08:00
|
|
|
* It should be a function pointer instead.
|
2018-11-20 07:29:08 +08:00
|
|
|
* (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO)
|
|
|
|
*
|
|
|
|
* Hence, there is no btf_func_check_member().
|
|
|
|
*/
|
|
|
|
.check_member = btf_df_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2018-11-20 07:29:08 +08:00
|
|
|
.log_details = btf_func_proto_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_df_show,
|
2018-11-20 07:29:08 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static s32 btf_func_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-01-10 14:41:20 +08:00
|
|
|
if (btf_type_vlen(t) > BTF_FUNC_GLOBAL) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid func linkage");
|
2018-11-20 07:29:08 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-02-04 03:17:27 +08:00
|
|
|
static int btf_func_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = btf_func_check(env, t);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
static struct btf_kind_operations func_ops = {
|
|
|
|
.check_meta = btf_func_check_meta,
|
2022-02-04 03:17:27 +08:00
|
|
|
.resolve = btf_func_resolve,
|
2018-11-20 07:29:08 +08:00
|
|
|
.check_member = btf_df_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 14:13:51 +08:00
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2018-11-20 07:29:08 +08:00
|
|
|
.log_details = btf_ref_type_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_df_show,
|
2018-11-20 07:29:08 +08:00
|
|
|
};
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
static s32 btf_var_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_var *var;
|
|
|
|
u32 meta_needed = sizeof(*var);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!t->name_off ||
|
2024-08-07 22:31:10 +08:00
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
2019-04-10 05:20:09 +08:00
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* A var cannot be in type void */
|
|
|
|
if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
var = btf_type_var(t);
|
|
|
|
if (var->linkage != BTF_VAR_STATIC &&
|
|
|
|
var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
|
|
|
|
btf_verifier_log_type(env, t, "Linkage not supported");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_var *var = btf_type_var(t);
|
|
|
|
|
|
|
|
btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_kind_operations var_ops = {
|
|
|
|
.check_meta = btf_var_check_meta,
|
|
|
|
.resolve = btf_var_resolve,
|
|
|
|
.check_member = btf_df_check_member,
|
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
|
|
|
.log_details = btf_var_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_var_show,
|
2019-04-10 05:20:09 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_var_secinfo *vsi;
|
|
|
|
u64 last_vsi_end_off = 0, sum = 0;
|
|
|
|
u32 i, meta_needed;
|
|
|
|
|
|
|
|
meta_needed = btf_type_vlen(t) * sizeof(*vsi);
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!t->size) {
|
|
|
|
btf_verifier_log_type(env, t, "size == 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_section(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
for_each_vsi(i, t, vsi) {
|
|
|
|
/* A var cannot be in type void */
|
|
|
|
if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vsi->size || vsi->size > t->size) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
last_vsi_end_off = vsi->offset + vsi->size;
|
|
|
|
if (last_vsi_end_off > t->size) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid offset+size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_vsi(env, t, vsi, NULL);
|
|
|
|
sum += vsi->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->size < sum) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_datasec_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_var_secinfo *vsi;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u16 i;
|
|
|
|
|
btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR
btf_datasec_resolve contains a bug that causes the following BTF
to fail loading:
[1] DATASEC a size=2 vlen=2
type_id=4 offset=0 size=1
type_id=7 offset=1 size=1
[2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
[3] PTR (anon) type_id=2
[4] VAR a type_id=3 linkage=0
[5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
[6] TYPEDEF td type_id=5
[7] VAR b type_id=6 linkage=0
This error message is printed during btf_check_all_types:
[1] DATASEC a size=2 vlen=2
type_id=7 offset=1 size=1 Invalid type
By tracing btf_*_resolve we can pinpoint the problem:
btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0
btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0
btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0
btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0
btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22
The last invocation of btf_datasec_resolve should invoke btf_var_resolve
by means of env_stack_push, instead it returns EINVAL. The reason is that
env_stack_push is never executed for the second VAR.
if (!env_type_is_resolve_sink(env, var_type) &&
!env_type_is_resolved(env, var_type_id)) {
env_stack_set_next_member(env, i + 1);
return env_stack_push(env, var_type, var_type_id);
}
env_type_is_resolve_sink() changes its behaviour based on resolve_mode.
For RESOLVE_PTR, we can simplify the if condition to the following:
(btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved()
Since we're dealing with a VAR the clause evaluates to false. This is
not sufficient to trigger the bug however. The log output and EINVAL
are only generated if btf_type_id_size() fails.
if (!btf_type_id_size(btf, &type_id, &type_size)) {
btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
return -EINVAL;
}
Most types are sized, so for example a VAR referring to an INT is not a
problem. The bug is only triggered if a VAR points at a modifier. Since
we skipped btf_var_resolve that modifier was also never resolved, which
means that btf_resolved_type_id returns 0 aka VOID for the modifier.
This in turn causes btf_type_id_size to return NULL, triggering EINVAL.
To summarise, the following conditions are necessary:
- VAR pointing at PTR, STRUCT, UNION or ARRAY
- Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or
TYPE_TAG
The fix is to reset resolve_mode to RESOLVE_TBD before attempting to
resolve a VAR from a DATASEC.
Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2023-03-06 19:21:37 +08:00
|
|
|
env->resolve_mode = RESOLVE_TBD;
|
2019-04-10 05:20:09 +08:00
|
|
|
for_each_vsi_from(i, v->next_member, v->t, vsi) {
|
|
|
|
u32 var_type_id = vsi->type, type_id, type_size = 0;
|
|
|
|
const struct btf_type *var_type = btf_type_by_id(env->btf,
|
|
|
|
var_type_id);
|
|
|
|
if (!var_type || !btf_type_is_var(var_type)) {
|
|
|
|
btf_verifier_log_vsi(env, v->t, vsi,
|
|
|
|
"Not a VAR kind member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, var_type) &&
|
|
|
|
!env_type_is_resolved(env, var_type_id)) {
|
|
|
|
env_stack_set_next_member(env, i + 1);
|
|
|
|
return env_stack_push(env, var_type, var_type_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_id = var_type->type;
|
|
|
|
if (!btf_type_id_size(btf, &type_id, &type_size)) {
|
|
|
|
btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->size < type_size) {
|
|
|
|
btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, 0, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_datasec_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_datasec_show(const struct btf *btf,
|
|
|
|
const struct btf_type *t, u32 type_id,
|
|
|
|
void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2019-04-10 05:20:09 +08:00
|
|
|
{
|
|
|
|
const struct btf_var_secinfo *vsi;
|
|
|
|
const struct btf_type *var;
|
|
|
|
u32 i;
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
if (!btf_show_start_type(show, t, type_id, data))
|
|
|
|
return;
|
|
|
|
|
|
|
|
btf_show_type_value(show, "section (\"%s\") = {",
|
|
|
|
__btf_name_by_offset(btf, t->name_off));
|
2019-04-10 05:20:09 +08:00
|
|
|
for_each_vsi(i, t, vsi) {
|
|
|
|
var = btf_type_by_id(btf, vsi->type);
|
|
|
|
if (i)
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show(show, ",");
|
|
|
|
btf_type_ops(var)->show(btf, var, vsi->type,
|
|
|
|
data + vsi->offset, bits_offset, show);
|
2019-04-10 05:20:09 +08:00
|
|
|
}
|
2020-09-28 19:31:04 +08:00
|
|
|
btf_show_end_type(show);
|
2019-04-10 05:20:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_kind_operations datasec_ops = {
|
|
|
|
.check_meta = btf_datasec_check_meta,
|
|
|
|
.resolve = btf_datasec_resolve,
|
|
|
|
.check_member = btf_df_check_member,
|
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
|
|
|
.log_details = btf_datasec_log,
|
2020-09-28 19:31:04 +08:00
|
|
|
.show = btf_datasec_show,
|
2019-04-10 05:20:09 +08:00
|
|
|
};
|
|
|
|
|
2021-02-27 04:22:52 +08:00
|
|
|
static s32 btf_float_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 &&
|
|
|
|
t->size != 16) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid type_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_float_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u64 start_offset_bytes;
|
|
|
|
u64 end_offset_bytes;
|
|
|
|
u64 misalign_bits;
|
|
|
|
u64 align_bytes;
|
|
|
|
u64 align_bits;
|
|
|
|
|
|
|
|
/* Different architectures have different alignment requirements, so
|
|
|
|
* here we check only for the reasonable minimum. This way we ensure
|
|
|
|
* that types after CO-RE can pass the kernel BTF verifier.
|
|
|
|
*/
|
|
|
|
align_bytes = min_t(u64, sizeof(void *), member_type->size);
|
|
|
|
align_bits = align_bytes * BITS_PER_BYTE;
|
|
|
|
div64_u64_rem(member->offset, align_bits, &misalign_bits);
|
|
|
|
if (misalign_bits) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not properly aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
start_offset_bytes = member->offset / BITS_PER_BYTE;
|
|
|
|
end_offset_bytes = start_offset_bytes + member_type->size;
|
|
|
|
if (end_offset_bytes > struct_type->size) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_float_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u", t->size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_kind_operations float_ops = {
|
|
|
|
.check_meta = btf_float_check_meta,
|
|
|
|
.resolve = btf_df_resolve,
|
|
|
|
.check_member = btf_float_check_member,
|
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
|
|
|
.log_details = btf_float_log,
|
|
|
|
.show = btf_df_show,
|
|
|
|
};
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env,
|
2021-09-15 06:30:15 +08:00
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
2021-10-13 00:48:38 +08:00
|
|
|
const struct btf_decl_tag *tag;
|
2021-09-15 06:30:15 +08:00
|
|
|
u32 meta_needed = sizeof(*tag);
|
|
|
|
s32 component_idx;
|
|
|
|
const char *value;
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
value = btf_name_by_offset(env->btf, t->name_off);
|
|
|
|
if (!value || !value[0]) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid value");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
component_idx = btf_type_decl_tag(t)->component_idx;
|
2021-09-15 06:30:15 +08:00
|
|
|
if (component_idx < -1) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid component_idx");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
static int btf_decl_tag_resolve(struct btf_verifier_env *env,
|
2021-09-15 06:30:15 +08:00
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
s32 component_idx;
|
|
|
|
u32 vlen;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
2021-10-13 00:48:38 +08:00
|
|
|
if (!next_type || !btf_type_is_decl_tag_target(next_type)) {
|
2021-09-15 06:30:15 +08:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
component_idx = btf_type_decl_tag(t)->component_idx;
|
2021-09-15 06:30:15 +08:00
|
|
|
if (component_idx != -1) {
|
2021-10-22 03:56:28 +08:00
|
|
|
if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) {
|
2021-09-15 06:30:15 +08:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid component_idx");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_is_struct(next_type)) {
|
|
|
|
vlen = btf_type_vlen(next_type);
|
|
|
|
} else {
|
|
|
|
/* next_type should be a function */
|
|
|
|
next_type = btf_type_by_id(btf, next_type->type);
|
|
|
|
vlen = btf_type_vlen(next_type);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((u32)component_idx >= vlen) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid component_idx");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t)
|
2021-09-15 06:30:15 +08:00
|
|
|
{
|
|
|
|
btf_verifier_log(env, "type=%u component_idx=%d", t->type,
|
2021-10-13 00:48:38 +08:00
|
|
|
btf_type_decl_tag(t)->component_idx);
|
2021-09-15 06:30:15 +08:00
|
|
|
}
|
|
|
|
|
2021-10-13 00:48:38 +08:00
|
|
|
static const struct btf_kind_operations decl_tag_ops = {
|
|
|
|
.check_meta = btf_decl_tag_check_meta,
|
|
|
|
.resolve = btf_decl_tag_resolve,
|
2021-09-15 06:30:15 +08:00
|
|
|
.check_member = btf_df_check_member,
|
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2021-10-13 00:48:38 +08:00
|
|
|
.log_details = btf_decl_tag_log,
|
2021-09-15 06:30:15 +08:00
|
|
|
.show = btf_df_show,
|
|
|
|
};
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
static int btf_func_proto_check(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_type *ret_type;
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf *btf;
|
|
|
|
u16 nr_args, i;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
btf = env->btf;
|
|
|
|
args = (const struct btf_param *)(t + 1);
|
|
|
|
nr_args = btf_type_vlen(t);
|
|
|
|
|
|
|
|
/* Check func return type which could be "void" (t->type == 0) */
|
|
|
|
if (t->type) {
|
|
|
|
u32 ret_type_id = t->type;
|
|
|
|
|
|
|
|
ret_type = btf_type_by_id(btf, ret_type_id);
|
|
|
|
if (!ret_type) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid return type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2022-10-15 08:24:44 +08:00
|
|
|
if (btf_type_is_resolve_source_only(ret_type)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid return type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
if (btf_type_needs_resolve(ret_type) &&
|
|
|
|
!env_type_is_resolved(env, ret_type_id)) {
|
|
|
|
err = btf_resolve(env, ret_type, ret_type_id);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ensure the return type is a type that has a size */
|
|
|
|
if (!btf_type_id_size(btf, &ret_type_id, NULL)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid return type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nr_args)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Last func arg type_id could be 0 if it is a vararg */
|
|
|
|
if (!args[nr_args - 1].type) {
|
|
|
|
if (args[nr_args - 1].name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u",
|
|
|
|
nr_args);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
nr_args--;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nr_args; i++) {
|
|
|
|
const struct btf_type *arg_type;
|
|
|
|
u32 arg_type_id;
|
|
|
|
|
|
|
|
arg_type_id = args[i].type;
|
|
|
|
arg_type = btf_type_by_id(btf, arg_type_id);
|
|
|
|
if (!arg_type) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
|
2022-11-24 08:28:38 +08:00
|
|
|
return -EINVAL;
|
2018-11-20 07:29:08 +08:00
|
|
|
}
|
|
|
|
|
2022-11-23 11:54:22 +08:00
|
|
|
if (btf_type_is_resolve_source_only(arg_type)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
if (args[i].name_off &&
|
|
|
|
(!btf_name_offset_valid(btf, args[i].name_off) ||
|
|
|
|
!btf_name_valid_identifier(btf, args[i].name_off))) {
|
|
|
|
btf_verifier_log_type(env, t,
|
|
|
|
"Invalid arg#%u", i + 1);
|
2022-11-24 08:28:38 +08:00
|
|
|
return -EINVAL;
|
2018-11-20 07:29:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_needs_resolve(arg_type) &&
|
|
|
|
!env_type_is_resolved(env, arg_type_id)) {
|
|
|
|
err = btf_resolve(env, arg_type, arg_type_id);
|
|
|
|
if (err)
|
2022-11-24 08:28:38 +08:00
|
|
|
return err;
|
2018-11-20 07:29:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!btf_type_id_size(btf, &arg_type_id, NULL)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
|
2022-11-24 08:28:38 +08:00
|
|
|
return -EINVAL;
|
2018-11-20 07:29:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-24 08:28:38 +08:00
|
|
|
return 0;
|
2018-11-20 07:29:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_func_check(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_type *proto_type;
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf *btf;
|
|
|
|
u16 nr_args, i;
|
|
|
|
|
|
|
|
btf = env->btf;
|
|
|
|
proto_type = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
if (!proto_type || !btf_type_is_func_proto(proto_type)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
args = (const struct btf_param *)(proto_type + 1);
|
|
|
|
nr_args = btf_type_vlen(proto_type);
|
|
|
|
for (i = 0; i < nr_args; i++) {
|
|
|
|
if (!args[i].name_off && args[i].type) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
|
|
|
|
[BTF_KIND_INT] = &int_ops,
|
|
|
|
[BTF_KIND_PTR] = &ptr_ops,
|
|
|
|
[BTF_KIND_ARRAY] = &array_ops,
|
|
|
|
[BTF_KIND_STRUCT] = &struct_ops,
|
|
|
|
[BTF_KIND_UNION] = &struct_ops,
|
|
|
|
[BTF_KIND_ENUM] = &enum_ops,
|
|
|
|
[BTF_KIND_FWD] = &fwd_ops,
|
|
|
|
[BTF_KIND_TYPEDEF] = &modifier_ops,
|
|
|
|
[BTF_KIND_VOLATILE] = &modifier_ops,
|
|
|
|
[BTF_KIND_CONST] = &modifier_ops,
|
|
|
|
[BTF_KIND_RESTRICT] = &modifier_ops,
|
2018-11-20 07:29:08 +08:00
|
|
|
[BTF_KIND_FUNC] = &func_ops,
|
|
|
|
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
|
2019-04-10 05:20:09 +08:00
|
|
|
[BTF_KIND_VAR] = &var_ops,
|
|
|
|
[BTF_KIND_DATASEC] = &datasec_ops,
|
2021-02-27 04:22:52 +08:00
|
|
|
[BTF_KIND_FLOAT] = &float_ops,
|
2021-10-13 00:48:38 +08:00
|
|
|
[BTF_KIND_DECL_TAG] = &decl_tag_ops,
|
2021-11-12 09:26:09 +08:00
|
|
|
[BTF_KIND_TYPE_TAG] = &modifier_ops,
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
[BTF_KIND_ENUM64] = &enum64_ops,
|
2018-04-19 06:55:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static s32 btf_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
u32 saved_meta_left = meta_left;
|
|
|
|
s32 var_meta_size;
|
|
|
|
|
|
|
|
if (meta_left < sizeof(*t)) {
|
|
|
|
btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu",
|
|
|
|
env->log_type_id, meta_left, sizeof(*t));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
meta_left -= sizeof(*t);
|
|
|
|
|
2018-05-23 05:57:20 +08:00
|
|
|
if (t->info & ~BTF_INFO_MASK) {
|
|
|
|
btf_verifier_log(env, "[%u] Invalid btf_info:%x",
|
|
|
|
env->log_type_id, t->info);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
|
|
|
|
BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
|
|
|
|
btf_verifier_log(env, "[%u] Invalid kind:%u",
|
|
|
|
env->log_type_id, BTF_INFO_KIND(t->info));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-22 00:48:23 +08:00
|
|
|
if (!btf_name_offset_valid(env->btf, t->name_off)) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log(env, "[%u] Invalid name_offset:%u",
|
2018-04-22 00:48:23 +08:00
|
|
|
env->log_type_id, t->name_off);
|
2018-04-19 06:55:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left);
|
|
|
|
if (var_meta_size < 0)
|
|
|
|
return var_meta_size;
|
|
|
|
|
|
|
|
meta_left -= var_meta_size;
|
|
|
|
|
|
|
|
return saved_meta_left - meta_left;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_check_all_metas(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
struct btf_header *hdr;
|
|
|
|
void *cur, *end;
|
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
hdr = &btf->hdr;
|
2018-04-19 06:55:57 +08:00
|
|
|
cur = btf->nohdr_data + hdr->type_off;
|
2018-09-13 01:29:11 +08:00
|
|
|
end = cur + hdr->type_len;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
env->log_type_id = btf->base_btf ? btf->start_id : 1;
|
2018-04-19 06:55:57 +08:00
|
|
|
while (cur < end) {
|
|
|
|
struct btf_type *t = cur;
|
|
|
|
s32 meta_size;
|
|
|
|
|
|
|
|
meta_size = btf_check_meta(env, t, end - cur);
|
|
|
|
if (meta_size < 0)
|
|
|
|
return meta_size;
|
|
|
|
|
|
|
|
btf_add_type(env, t);
|
|
|
|
cur += meta_size;
|
|
|
|
env->log_type_id++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static bool btf_resolve_valid(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
if (!env_type_is_resolved(env, type_id))
|
|
|
|
return false;
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
if (btf_type_is_struct(t) || btf_type_is_datasec(t))
|
2020-11-10 09:19:28 +08:00
|
|
|
return !btf_resolved_type_id(btf, type_id) &&
|
|
|
|
!btf_resolved_type_size(btf, type_id);
|
2018-04-19 06:55:58 +08:00
|
|
|
|
2022-02-04 03:17:27 +08:00
|
|
|
if (btf_type_is_decl_tag(t) || btf_type_is_func(t))
|
2021-09-15 06:30:15 +08:00
|
|
|
return btf_resolved_type_id(btf, type_id) &&
|
|
|
|
!btf_resolved_type_size(btf, type_id);
|
|
|
|
|
2019-04-10 05:20:09 +08:00
|
|
|
if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
|
|
|
|
btf_type_is_var(t)) {
|
2018-04-19 06:55:58 +08:00
|
|
|
t = btf_type_id_resolve(btf, &type_id);
|
2019-04-10 05:20:09 +08:00
|
|
|
return t &&
|
|
|
|
!btf_type_is_modifier(t) &&
|
|
|
|
!btf_type_is_var(t) &&
|
|
|
|
!btf_type_is_datasec(t);
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_is_array(t)) {
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
const struct btf_type *elem_type;
|
|
|
|
u32 elem_type_id = array->type;
|
|
|
|
u32 elem_size;
|
|
|
|
|
|
|
|
elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
|
|
|
|
return elem_type && !btf_type_is_modifier(elem_type) &&
|
|
|
|
(array->nelems * elem_size ==
|
2020-11-10 09:19:28 +08:00
|
|
|
btf_resolved_type_size(btf, type_id));
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
static int btf_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t, u32 type_id)
|
|
|
|
{
|
|
|
|
u32 save_log_type_id = env->log_type_id;
|
|
|
|
const struct resolve_vertex *v;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
env->resolve_mode = RESOLVE_TBD;
|
|
|
|
env_stack_push(env, t, type_id);
|
|
|
|
while (!err && (v = env_stack_peak(env))) {
|
|
|
|
env->log_type_id = v->type_id;
|
|
|
|
err = btf_type_ops(v->t)->resolve(env, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
env->log_type_id = type_id;
|
|
|
|
if (err == -E2BIG) {
|
|
|
|
btf_verifier_log_type(env, t,
|
|
|
|
"Exceeded max resolving depth:%u",
|
|
|
|
MAX_RESOLVE_DEPTH);
|
|
|
|
} else if (err == -EEXIST) {
|
|
|
|
btf_verifier_log_type(env, t, "Loop detected");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Final sanity check */
|
|
|
|
if (!err && !btf_resolve_valid(env, t, type_id)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid resolve state");
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
env->log_type_id = save_log_type_id;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
static int btf_check_all_types(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
2020-11-10 09:19:28 +08:00
|
|
|
const struct btf_type *t;
|
|
|
|
u32 type_id, i;
|
2018-04-19 06:55:58 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
err = env_resolve_init(env);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
env->phase++;
|
2020-11-10 09:19:28 +08:00
|
|
|
for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) {
|
|
|
|
type_id = btf->start_id + i;
|
|
|
|
t = btf_type_by_id(btf, type_id);
|
2018-04-19 06:55:58 +08:00
|
|
|
|
|
|
|
env->log_type_id = type_id;
|
|
|
|
if (btf_type_needs_resolve(t) &&
|
|
|
|
!env_type_is_resolved(env, type_id)) {
|
|
|
|
err = btf_resolve(env, t, type_id);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:08 +08:00
|
|
|
if (btf_type_is_func_proto(t)) {
|
|
|
|
err = btf_func_proto_check(env, t);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
2018-04-19 06:55:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
static int btf_parse_type_sec(struct btf_verifier_env *env)
|
|
|
|
{
|
2018-05-23 05:57:18 +08:00
|
|
|
const struct btf_header *hdr = &env->btf->hdr;
|
2018-04-19 06:55:58 +08:00
|
|
|
int err;
|
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
/* Type section must align to 4 bytes */
|
|
|
|
if (hdr->type_off & (sizeof(u32) - 1)) {
|
|
|
|
btf_verifier_log(env, "Unaligned type_off");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
if (!env->btf->base_btf && !hdr->type_len) {
|
2018-05-23 05:57:18 +08:00
|
|
|
btf_verifier_log(env, "No type found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:55:58 +08:00
|
|
|
err = btf_check_all_metas(env);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
return btf_check_all_types(env);
|
2018-04-19 06:55:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_parse_str_sec(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
const struct btf_header *hdr;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
const char *start, *end;
|
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
hdr = &btf->hdr;
|
2018-04-19 06:55:57 +08:00
|
|
|
start = btf->nohdr_data + hdr->str_off;
|
|
|
|
end = start + hdr->str_len;
|
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
if (end != btf->data + btf->data_size) {
|
|
|
|
btf_verifier_log(env, "String section is not at the end");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-11-10 09:19:28 +08:00
|
|
|
btf->strings = start;
|
|
|
|
|
|
|
|
if (btf->base_btf && !hdr->str_len)
|
|
|
|
return 0;
|
|
|
|
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) {
|
|
|
|
btf_verifier_log(env, "Invalid string section");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (!btf->base_btf && start[0]) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log(env, "Invalid string section");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
static const size_t btf_sec_info_offset[] = {
|
|
|
|
offsetof(struct btf_header, type_off),
|
|
|
|
offsetof(struct btf_header, str_off),
|
|
|
|
};
|
|
|
|
|
|
|
|
static int btf_sec_info_cmp(const void *a, const void *b)
|
2018-04-19 06:55:57 +08:00
|
|
|
{
|
2018-05-23 05:57:18 +08:00
|
|
|
const struct btf_sec_info *x = a;
|
|
|
|
const struct btf_sec_info *y = b;
|
|
|
|
|
|
|
|
return (int)(x->off - y->off) ? : (int)(x->len - y->len);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_check_sec_info(struct btf_verifier_env *env,
|
|
|
|
u32 btf_data_size)
|
|
|
|
{
|
2018-05-24 02:32:36 +08:00
|
|
|
struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
|
2018-05-23 05:57:18 +08:00
|
|
|
u32 total, expected_total, i;
|
2018-04-19 06:55:57 +08:00
|
|
|
const struct btf_header *hdr;
|
2018-05-23 05:57:18 +08:00
|
|
|
const struct btf *btf;
|
|
|
|
|
|
|
|
btf = env->btf;
|
|
|
|
hdr = &btf->hdr;
|
|
|
|
|
|
|
|
/* Populate the secs from hdr */
|
2018-05-24 02:32:36 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
|
2018-05-23 05:57:18 +08:00
|
|
|
secs[i] = *(struct btf_sec_info *)((void *)hdr +
|
|
|
|
btf_sec_info_offset[i]);
|
|
|
|
|
2018-05-24 02:32:36 +08:00
|
|
|
sort(secs, ARRAY_SIZE(btf_sec_info_offset),
|
|
|
|
sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
|
2018-05-23 05:57:18 +08:00
|
|
|
|
|
|
|
/* Check for gaps and overlap among sections */
|
|
|
|
total = 0;
|
|
|
|
expected_total = btf_data_size - hdr->hdr_len;
|
2018-05-24 02:32:36 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
|
2018-05-23 05:57:18 +08:00
|
|
|
if (expected_total < secs[i].off) {
|
|
|
|
btf_verifier_log(env, "Invalid section offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (total < secs[i].off) {
|
|
|
|
/* gap */
|
|
|
|
btf_verifier_log(env, "Unsupported section found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (total > secs[i].off) {
|
|
|
|
btf_verifier_log(env, "Section overlap found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (expected_total - total < secs[i].len) {
|
|
|
|
btf_verifier_log(env,
|
|
|
|
"Total section length too long");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
total += secs[i].len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* There is data other than hdr and known sections */
|
|
|
|
if (expected_total != total) {
|
|
|
|
btf_verifier_log(env, "Unsupported section found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-25 04:42:25 +08:00
|
|
|
static int btf_parse_hdr(struct btf_verifier_env *env)
|
2018-05-23 05:57:18 +08:00
|
|
|
{
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-25 04:42:25 +08:00
|
|
|
u32 hdr_len, hdr_copy, btf_data_size;
|
2018-05-23 05:57:18 +08:00
|
|
|
const struct btf_header *hdr;
|
|
|
|
struct btf *btf;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
btf = env->btf;
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-25 04:42:25 +08:00
|
|
|
btf_data_size = btf->data_size;
|
2018-05-23 05:57:18 +08:00
|
|
|
|
2022-03-20 15:52:40 +08:00
|
|
|
if (btf_data_size < offsetofend(struct btf_header, hdr_len)) {
|
2018-05-23 05:57:18 +08:00
|
|
|
btf_verifier_log(env, "hdr_len not found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-25 04:42:25 +08:00
|
|
|
hdr = btf->data;
|
|
|
|
hdr_len = hdr->hdr_len;
|
2018-05-23 05:57:18 +08:00
|
|
|
if (btf_data_size < hdr_len) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log(env, "btf_header not found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-25 04:42:25 +08:00
|
|
|
/* Ensure the unsupported header fields are zero */
|
|
|
|
if (hdr_len > sizeof(btf->hdr)) {
|
|
|
|
u8 *expected_zero = btf->data + sizeof(btf->hdr);
|
|
|
|
u8 *end = btf->data + hdr_len;
|
|
|
|
|
|
|
|
for (; expected_zero < end; expected_zero++) {
|
|
|
|
if (*expected_zero) {
|
|
|
|
btf_verifier_log(env, "Unsupported btf_header");
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
}
|
2018-05-23 05:57:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-25 04:42:25 +08:00
|
|
|
memcpy(&btf->hdr, btf->data, hdr_copy);
|
2018-05-23 05:57:18 +08:00
|
|
|
|
|
|
|
hdr = &btf->hdr;
|
|
|
|
|
|
|
|
btf_verifier_log_hdr(env, btf_data_size);
|
2018-04-19 06:55:57 +08:00
|
|
|
|
|
|
|
if (hdr->magic != BTF_MAGIC) {
|
|
|
|
btf_verifier_log(env, "Invalid magic");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->version != BTF_VERSION) {
|
|
|
|
btf_verifier_log(env, "Unsupported version");
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->flags) {
|
|
|
|
btf_verifier_log(env, "Unsupported flags");
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
2021-01-10 15:03:40 +08:00
|
|
|
if (!btf->base_btf && btf_data_size == hdr->hdr_len) {
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_log(env, "No data");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2022-09-17 16:42:48 +08:00
|
|
|
return btf_check_sec_info(env, btf_data_size);
|
2018-04-19 06:55:57 +08:00
|
|
|
}
|
|
|
|
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
static const char *alloc_obj_fields[] = {
|
|
|
|
"bpf_spin_lock",
|
|
|
|
"bpf_list_head",
|
|
|
|
"bpf_list_node",
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
"bpf_rb_root",
|
|
|
|
"bpf_rb_node",
|
2023-04-16 04:18:04 +08:00
|
|
|
"bpf_refcount",
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct btf_struct_metas *
|
|
|
|
btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
|
|
|
|
{
|
|
|
|
struct btf_struct_metas *tab = NULL;
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
struct btf_id_set *aof;
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
int i, n, id, ret;
|
|
|
|
|
|
|
|
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
|
|
|
|
BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
|
|
|
|
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!aof)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
aof->cnt = 0;
|
|
|
|
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
|
|
|
|
/* Try to find whether this special type exists in user BTF, and
|
|
|
|
* if so remember its ID so we can easily find it among members
|
|
|
|
* of structs that we iterate in the next loop.
|
|
|
|
*/
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
struct btf_id_set *new_aof;
|
|
|
|
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
|
|
|
|
if (id < 0)
|
|
|
|
continue;
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
|
|
|
|
new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
|
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!new_aof) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_aof;
|
|
|
|
}
|
|
|
|
aof = new_aof;
|
|
|
|
aof->ids[aof->cnt++] = id;
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
}
|
|
|
|
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
n = btf_nr_types(btf);
|
|
|
|
for (i = 1; i < n; i++) {
|
|
|
|
/* Try to find if there are kptrs in user BTF and remember their ID */
|
|
|
|
struct btf_id_set *new_aof;
|
|
|
|
struct btf_field_info tmp;
|
|
|
|
const struct btf_type *t;
|
|
|
|
|
|
|
|
t = btf_type_by_id(btf, i);
|
|
|
|
if (!t) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto free_aof;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = btf_find_kptr(btf, t, 0, 0, &tmp);
|
|
|
|
if (ret != BTF_FIELD_FOUND)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
|
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!new_aof) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_aof;
|
|
|
|
}
|
|
|
|
aof = new_aof;
|
|
|
|
aof->ids[aof->cnt++] = i;
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
}
|
|
|
|
|
2024-09-12 09:28:45 +08:00
|
|
|
if (!aof->cnt) {
|
|
|
|
kfree(aof);
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
return NULL;
|
2024-09-12 09:28:45 +08:00
|
|
|
}
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
|
|
|
|
for (i = 1; i < n; i++) {
|
|
|
|
struct btf_struct_metas *new_tab;
|
|
|
|
const struct btf_member *member;
|
|
|
|
struct btf_struct_meta *type;
|
|
|
|
struct btf_record *record;
|
|
|
|
const struct btf_type *t;
|
|
|
|
int j, tab_cnt;
|
|
|
|
|
|
|
|
t = btf_type_by_id(btf, i);
|
|
|
|
if (!__btf_type_is_struct(t))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
cond_resched();
|
|
|
|
|
|
|
|
for_each_member(j, t, member) {
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
if (btf_id_set_contains(aof, member->type))
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
goto parse;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
parse:
|
|
|
|
tab_cnt = tab ? tab->cnt : 0;
|
|
|
|
new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]),
|
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!new_tab) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free;
|
|
|
|
}
|
|
|
|
if (!tab)
|
|
|
|
new_tab->cnt = 0;
|
|
|
|
tab = new_tab;
|
|
|
|
|
|
|
|
type = &tab->types[tab->cnt];
|
|
|
|
type->btf_id = i;
|
bpf: Add basic bpf_rb_{root,node} support
This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.
structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.
btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.
btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.
After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-14 08:40:10 +08:00
|
|
|
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
|
|
|
|
BPF_KPTR, t->size);
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
/* The record cannot be unset, treat it as an error if so */
|
|
|
|
if (IS_ERR_OR_NULL(record)) {
|
|
|
|
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
|
|
|
|
goto free;
|
|
|
|
}
|
|
|
|
type->record = record;
|
|
|
|
tab->cnt++;
|
|
|
|
}
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
kfree(aof);
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
return tab;
|
|
|
|
free:
|
|
|
|
btf_struct_metas_free(tab);
|
bpf: Search for kptrs in prog BTF structs
Currently btf_parse_fields is used in two places to create struct
btf_record's for structs: when looking at mapval type, and when looking
at any struct in program BTF. The former looks for kptr fields while the
latter does not. This patch modifies the btf_parse_fields call made when
looking at prog BTF struct types to search for kptrs as well.
Before this series there was no reason to search for kptrs in non-mapval
types: a referenced kptr needs some owner to guarantee resource cleanup,
and map values were the only owner that supported this. If a struct with
a kptr field were to have some non-kptr-aware owner, the kptr field
might not be properly cleaned up and result in resources leaking. Only
searching for kptr fields in mapval was a simple way to avoid this
problem.
In practice, though, searching for BPF_KPTR when populating
struct_meta_tab does not expose us to this risk, as struct_meta_tab is
only accessed through btf_find_struct_meta helper, and that helper is
only called in contexts where recognizing the kptr field is safe:
* PTR_TO_BTF_ID reg w/ MEM_ALLOC flag
* Such a reg is a local kptr and must be free'd via bpf_obj_drop,
which will correctly handle kptr field
* When handling specific kfuncs which either expect MEM_ALLOC input or
return MEM_ALLOC output (obj_{new,drop}, percpu_obj_{new,drop},
list+rbtree funcs, refcount_acquire)
* Will correctly handle kptr field for same reasons as above
* When looking at kptr pointee type
* Called by functions which implement "correct kptr resource
handling"
* In btf_check_and_fixup_fields
* Helper that ensures no ownership loops for lists and rbtrees,
doesn't care about kptr field existence
So we should be able to find BPF_KPTR fields in all prog BTF structs
without leaking resources.
Further patches in the series will build on this change to support
kptr_xchg into non-mapval local kptr. Without this change there would be
no kptr field found in such a type.
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
Link: https://lore.kernel.org/r/20240813212424.2871455-3-amery.hung@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-08-14 05:24:21 +08:00
|
|
|
free_aof:
|
|
|
|
kfree(aof);
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
|
|
|
|
{
|
|
|
|
struct btf_struct_metas *tab;
|
|
|
|
|
|
|
|
BUILD_BUG_ON(offsetof(struct btf_struct_meta, btf_id) != 0);
|
|
|
|
tab = btf->struct_meta_tab;
|
|
|
|
if (!tab)
|
|
|
|
return NULL;
|
|
|
|
return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func);
|
|
|
|
}
|
|
|
|
|
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-20 00:46:07 +08:00
|
|
|
static int btf_check_type_tags(struct btf_verifier_env *env,
|
|
|
|
struct btf *btf, int start_id)
|
|
|
|
{
|
|
|
|
int i, n, good_id = start_id - 1;
|
|
|
|
bool in_tags;
|
|
|
|
|
|
|
|
n = btf_nr_types(btf);
|
|
|
|
for (i = start_id; i < n; i++) {
|
|
|
|
const struct btf_type *t;
|
2022-06-15 12:21:51 +08:00
|
|
|
int chain_limit = 32;
|
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-20 00:46:07 +08:00
|
|
|
u32 cur_id = i;
|
|
|
|
|
|
|
|
t = btf_type_by_id(btf, i);
|
|
|
|
if (!t)
|
|
|
|
return -EINVAL;
|
|
|
|
if (!btf_type_is_modifier(t))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
cond_resched();
|
|
|
|
|
|
|
|
in_tags = btf_type_is_type_tag(t);
|
|
|
|
while (btf_type_is_modifier(t)) {
|
2022-06-15 12:21:51 +08:00
|
|
|
if (!chain_limit--) {
|
|
|
|
btf_verifier_log(env, "Max chain length or cycle detected");
|
|
|
|
return -ELOOP;
|
|
|
|
}
|
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-20 00:46:07 +08:00
|
|
|
if (btf_type_is_type_tag(t)) {
|
|
|
|
if (!in_tags) {
|
|
|
|
btf_verifier_log(env, "Type tags don't precede modifiers");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} else if (in_tags) {
|
|
|
|
in_tags = false;
|
|
|
|
}
|
|
|
|
if (cur_id <= good_id)
|
|
|
|
break;
|
|
|
|
/* Move to next type */
|
|
|
|
cur_id = t->type;
|
|
|
|
t = btf_type_by_id(btf, cur_id);
|
|
|
|
if (!t)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
good_id = i;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-04-07 07:41:59 +08:00
|
|
|
static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size)
|
2018-04-19 06:55:57 +08:00
|
|
|
{
|
2023-04-07 07:41:59 +08:00
|
|
|
u32 log_true_size;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = bpf_vlog_finalize(log, &log_true_size);
|
|
|
|
|
|
|
|
if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) &&
|
|
|
|
copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size),
|
|
|
|
&log_true_size, sizeof(log_true_size)))
|
|
|
|
err = -EFAULT;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2023-04-07 07:41:58 +08:00
|
|
|
static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
|
2018-04-19 06:55:57 +08:00
|
|
|
{
|
2023-04-07 07:41:58 +08:00
|
|
|
bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel);
|
|
|
|
char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf);
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
struct btf_struct_metas *struct_meta_tab;
|
2018-04-19 06:55:57 +08:00
|
|
|
struct btf_verifier_env *env = NULL;
|
|
|
|
struct btf *btf = NULL;
|
|
|
|
u8 *data;
|
2023-04-07 07:41:59 +08:00
|
|
|
int err, ret;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
2023-04-07 07:41:58 +08:00
|
|
|
if (attr->btf_size > BTF_MAX_SIZE)
|
2018-04-19 06:55:57 +08:00
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
|
|
|
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!env)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2023-04-07 07:41:59 +08:00
|
|
|
/* user could have requested verbose verifier output
|
|
|
|
* and supplied buffer to store the verification trace
|
|
|
|
*/
|
|
|
|
err = bpf_vlog_init(&env->log, attr->btf_log_level,
|
|
|
|
log_ubuf, attr->btf_log_size);
|
|
|
|
if (err)
|
|
|
|
goto errout_free;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
|
|
|
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!btf) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
2018-05-23 05:57:18 +08:00
|
|
|
env->btf = btf;
|
|
|
|
|
2023-04-07 07:41:58 +08:00
|
|
|
data = kvmalloc(attr->btf_size, GFP_KERNEL | __GFP_NOWARN);
|
2018-04-19 06:55:57 +08:00
|
|
|
if (!data) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf->data = data;
|
2023-04-07 07:41:58 +08:00
|
|
|
btf->data_size = attr->btf_size;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
2023-04-07 07:41:58 +08:00
|
|
|
if (copy_from_bpfptr(data, btf_data, attr->btf_size)) {
|
2018-04-19 06:55:57 +08:00
|
|
|
err = -EFAULT;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-25 04:42:25 +08:00
|
|
|
err = btf_parse_hdr(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
|
|
|
|
|
2018-04-19 06:55:57 +08:00
|
|
|
err = btf_parse_str_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
err = btf_parse_type_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-20 00:46:07 +08:00
|
|
|
err = btf_check_type_tags(env, btf, 1);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
2023-04-07 07:41:59 +08:00
|
|
|
struct_meta_tab = btf_parse_struct_metas(&env->log, btf);
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
if (IS_ERR(struct_meta_tab)) {
|
|
|
|
err = PTR_ERR(struct_meta_tab);
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
btf->struct_meta_tab = struct_meta_tab;
|
|
|
|
|
2022-11-18 09:55:57 +08:00
|
|
|
if (struct_meta_tab) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < struct_meta_tab->cnt; i++) {
|
|
|
|
err = btf_check_and_fixup_fields(btf, struct_meta_tab->types[i].record);
|
|
|
|
if (err < 0)
|
|
|
|
goto errout_meta;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-07 07:41:59 +08:00
|
|
|
err = finalize_log(&env->log, uattr, uattr_size);
|
|
|
|
if (err)
|
|
|
|
goto errout_free;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
2018-05-23 05:57:18 +08:00
|
|
|
btf_verifier_env_free(env);
|
|
|
|
refcount_set(&btf->refcnt, 1);
|
|
|
|
return btf;
|
2018-04-19 06:55:57 +08:00
|
|
|
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
errout_meta:
|
|
|
|
btf_free_struct_meta_tab(btf);
|
2018-04-19 06:55:57 +08:00
|
|
|
errout:
|
2023-04-07 07:41:59 +08:00
|
|
|
/* overwrite err with -ENOSPC or -EFAULT */
|
|
|
|
ret = finalize_log(&env->log, uattr, uattr_size);
|
|
|
|
if (ret)
|
|
|
|
err = ret;
|
|
|
|
errout_free:
|
2018-04-19 06:55:57 +08:00
|
|
|
btf_verifier_env_free(env);
|
|
|
|
if (btf)
|
|
|
|
btf_free(btf);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
2018-04-19 06:56:00 +08:00
|
|
|
|
2024-04-16 00:20:45 +08:00
|
|
|
extern char __start_BTF[];
|
|
|
|
extern char __stop_BTF[];
|
2019-11-15 02:57:15 +08:00
|
|
|
extern struct btf *btf_vmlinux;
|
|
|
|
|
|
|
|
#define BPF_MAP_TYPE(_id, _ops)
|
2020-04-29 08:16:08 +08:00
|
|
|
#define BPF_LINK_TYPE(_id, _name)
|
2019-11-15 02:57:15 +08:00
|
|
|
static union {
|
|
|
|
struct bpf_ctx_convert {
|
|
|
|
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
|
|
|
|
prog_ctx_type _id##_prog; \
|
|
|
|
kern_ctx_type _id##_kern;
|
|
|
|
#include <linux/bpf_types.h>
|
|
|
|
#undef BPF_PROG_TYPE
|
|
|
|
} *__t;
|
|
|
|
/* 't' is written once under lock. Read many times. */
|
|
|
|
const struct btf_type *t;
|
|
|
|
} bpf_ctx_convert;
|
|
|
|
enum {
|
|
|
|
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
|
|
|
|
__ctx_convert##_id,
|
|
|
|
#include <linux/bpf_types.h>
|
|
|
|
#undef BPF_PROG_TYPE
|
2019-11-28 12:35:08 +08:00
|
|
|
__ctx_convert_unused, /* to avoid empty enum in extreme .config */
|
2019-11-15 02:57:15 +08:00
|
|
|
};
|
|
|
|
static u8 bpf_ctx_convert_map[] = {
|
|
|
|
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
|
|
|
|
[_id] = __ctx_convert##_id,
|
|
|
|
#include <linux/bpf_types.h>
|
|
|
|
#undef BPF_PROG_TYPE
|
2019-12-11 04:35:46 +08:00
|
|
|
0, /* avoid empty array */
|
2019-11-15 02:57:15 +08:00
|
|
|
};
|
|
|
|
#undef BPF_MAP_TYPE
|
2020-04-29 08:16:08 +08:00
|
|
|
#undef BPF_LINK_TYPE
|
2019-11-15 02:57:15 +08:00
|
|
|
|
2024-01-18 11:31:40 +08:00
|
|
|
static const struct btf_type *find_canonical_prog_ctx_type(enum bpf_prog_type prog_type)
|
2019-11-15 02:57:15 +08:00
|
|
|
{
|
|
|
|
const struct btf_type *conv_struct;
|
|
|
|
const struct btf_member *ctx_type;
|
|
|
|
|
|
|
|
conv_struct = bpf_ctx_convert.t;
|
2024-01-18 11:31:40 +08:00
|
|
|
if (!conv_struct)
|
2019-11-15 02:57:15 +08:00
|
|
|
return NULL;
|
2024-01-18 11:31:40 +08:00
|
|
|
/* prog_type is valid bpf program type. No need for bounds check. */
|
|
|
|
ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
|
|
|
|
/* ctx_type is a pointer to prog_ctx_type in vmlinux.
|
|
|
|
* Like 'struct __sk_buff'
|
|
|
|
*/
|
|
|
|
return btf_type_by_id(btf_vmlinux, ctx_type->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int find_kern_ctx_type_id(enum bpf_prog_type prog_type)
|
|
|
|
{
|
|
|
|
const struct btf_type *conv_struct;
|
|
|
|
const struct btf_member *ctx_type;
|
|
|
|
|
|
|
|
conv_struct = bpf_ctx_convert.t;
|
|
|
|
if (!conv_struct)
|
|
|
|
return -EFAULT;
|
|
|
|
/* prog_type is valid bpf program type. No need for bounds check. */
|
|
|
|
ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1;
|
|
|
|
/* ctx_type is a pointer to prog_ctx_type in vmlinux.
|
|
|
|
* Like 'struct sk_buff'
|
|
|
|
*/
|
|
|
|
return ctx_type->type;
|
|
|
|
}
|
|
|
|
|
2024-06-12 23:58:32 +08:00
|
|
|
bool btf_is_projection_of(const char *pname, const char *tname)
|
|
|
|
{
|
|
|
|
if (strcmp(pname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0)
|
|
|
|
return true;
|
|
|
|
if (strcmp(pname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2024-02-13 07:32:18 +08:00
|
|
|
bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
|
|
|
|
const struct btf_type *t, enum bpf_prog_type prog_type,
|
|
|
|
int arg)
|
2024-01-18 11:31:40 +08:00
|
|
|
{
|
|
|
|
const struct btf_type *ctx_type;
|
|
|
|
const char *tname, *ctx_tname;
|
|
|
|
|
2019-11-15 02:57:15 +08:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
2024-02-13 07:32:19 +08:00
|
|
|
|
|
|
|
/* KPROBE programs allow bpf_user_pt_regs_t typedef, which we need to
|
|
|
|
* check before we skip all the typedef below.
|
|
|
|
*/
|
|
|
|
if (prog_type == BPF_PROG_TYPE_KPROBE) {
|
|
|
|
while (btf_type_is_modifier(t) && !btf_type_is_typedef(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
if (btf_type_is_typedef(t)) {
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-15 02:57:15 +08:00
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
if (!btf_type_is_struct(t)) {
|
|
|
|
/* Only pointer to struct is supported for now.
|
|
|
|
* That means that BPF_PROG_TYPE_TRACEPOINT with BTF
|
|
|
|
* is not supported yet.
|
|
|
|
* BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
|
|
|
|
*/
|
2024-02-13 07:32:18 +08:00
|
|
|
return false;
|
2019-11-15 02:57:15 +08:00
|
|
|
}
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (!tname) {
|
2020-01-10 14:41:20 +08:00
|
|
|
bpf_log(log, "arg#%d struct doesn't have a name\n", arg);
|
2024-02-13 07:32:18 +08:00
|
|
|
return false;
|
2019-11-15 02:57:15 +08:00
|
|
|
}
|
2024-01-18 11:31:40 +08:00
|
|
|
|
|
|
|
ctx_type = find_canonical_prog_ctx_type(prog_type);
|
|
|
|
if (!ctx_type) {
|
|
|
|
bpf_log(log, "btf_vmlinux is malformed\n");
|
2019-11-15 02:57:15 +08:00
|
|
|
/* should not happen */
|
2024-02-13 07:32:18 +08:00
|
|
|
return false;
|
2024-01-18 11:31:40 +08:00
|
|
|
}
|
2023-02-16 12:59:52 +08:00
|
|
|
again:
|
2024-01-18 11:31:40 +08:00
|
|
|
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off);
|
2019-11-15 02:57:15 +08:00
|
|
|
if (!ctx_tname) {
|
|
|
|
/* should not happen */
|
|
|
|
bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
|
2024-02-13 07:32:18 +08:00
|
|
|
return false;
|
2019-11-15 02:57:15 +08:00
|
|
|
}
|
2024-02-13 07:32:20 +08:00
|
|
|
/* program types without named context types work only with arg:ctx tag */
|
|
|
|
if (ctx_tname[0] == '\0')
|
|
|
|
return false;
|
2019-11-15 02:57:15 +08:00
|
|
|
/* only compare that prog's ctx type name is the same as
|
|
|
|
* kernel expects. No need to compare field by field.
|
|
|
|
* It's ok for bpf prog to do:
|
|
|
|
* struct __sk_buff {};
|
|
|
|
* int socket_filter_bpf_prog(struct __sk_buff *skb)
|
|
|
|
* { // no fields of skb are ever used }
|
|
|
|
*/
|
2024-06-12 23:58:32 +08:00
|
|
|
if (btf_is_projection_of(ctx_tname, tname))
|
2024-02-13 07:32:18 +08:00
|
|
|
return true;
|
2023-02-16 12:59:52 +08:00
|
|
|
if (strcmp(ctx_tname, tname)) {
|
|
|
|
/* bpf_user_pt_regs_t is a typedef, so resolve it to
|
|
|
|
* underlying struct and check name again
|
|
|
|
*/
|
2024-01-18 11:31:40 +08:00
|
|
|
if (!btf_type_is_modifier(ctx_type))
|
2024-02-13 07:32:18 +08:00
|
|
|
return false;
|
2024-01-18 11:31:40 +08:00
|
|
|
while (btf_type_is_modifier(ctx_type))
|
|
|
|
ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type);
|
2023-02-16 12:59:52 +08:00
|
|
|
goto again;
|
|
|
|
}
|
2024-02-13 07:32:18 +08:00
|
|
|
return true;
|
2019-11-15 02:57:15 +08:00
|
|
|
}
|
2019-10-16 11:24:57 +08:00
|
|
|
|
bpf: enforce types for __arg_ctx-tagged arguments in global subprogs
Add enforcement of expected types for context arguments tagged with
arg:ctx (__arg_ctx) tag.
First, any program type will accept generic `void *` context type when
combined with __arg_ctx tag.
Besides accepting "canonical" struct names and `void *`, for a bunch of
program types for which program context is actually a named struct, we
allows a bunch of pragmatic exceptions to match real-world and expected
usage:
- for both kprobes and perf_event we allow `bpf_user_pt_regs_t *` as
canonical context argument type, where `bpf_user_pt_regs_t` is a
*typedef*, not a struct;
- for kprobes, we also always accept `struct pt_regs *`, as that's what
actually is passed as a context to any kprobe program;
- for perf_event, we resolve typedefs (unless it's `bpf_user_pt_regs_t`)
down to actual struct type and accept `struct pt_regs *`, or
`struct user_pt_regs *`, or `struct user_regs_struct *`, depending
on the actual struct type kernel architecture points `bpf_user_pt_regs_t`
typedef to; otherwise, canonical `struct bpf_perf_event_data *` is
expected;
- for raw_tp/raw_tp.w programs, `u64/long *` are accepted, as that's
what's expected with BPF_PROG() usage; otherwise, canonical
`struct bpf_raw_tracepoint_args *` is expected;
- tp_btf supports both `struct bpf_raw_tracepoint_args *` and `u64 *`
formats, both are coded as expections as tp_btf is actually a TRACING
program type, which has no canonical context type;
- iterator programs accept `struct bpf_iter__xxx *` structs, currently
with no further iterator-type specific enforcement;
- fentry/fexit/fmod_ret/lsm/struct_ops all accept `u64 *`;
- classic tracepoint programs, as well as syscall and freplace
programs allow any user-provided type.
In all other cases kernel will enforce exact match of struct name to
expected canonical type. And if user-provided type doesn't match that
expectation, verifier will emit helpful message with expected type name.
Note a bit unnatural way the check is done after processing all the
arguments. This is done to avoid conflict between bpf and bpf-next
trees. Once trees converge, a small follow up patch will place a simple
btf_validate_prog_ctx_type() check into a proper ARG_PTR_TO_CTX branch
(which bpf-next tree patch refactored already), removing duplicated
arg:ctx detection logic.
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240118033143.3384355-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-01-18 11:31:41 +08:00
|
|
|
/* forward declarations for arch-specific underlying types of
|
|
|
|
* bpf_user_pt_regs_t; this avoids the need for arch-specific #ifdef
|
|
|
|
* compilation guards below for BPF_PROG_TYPE_PERF_EVENT checks, but still
|
|
|
|
* works correctly with __builtin_types_compatible_p() on respective
|
|
|
|
* architectures
|
|
|
|
*/
|
|
|
|
struct user_regs_struct;
|
|
|
|
struct user_pt_regs;
|
|
|
|
|
|
|
|
static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
|
|
|
|
const struct btf_type *t, int arg,
|
|
|
|
enum bpf_prog_type prog_type,
|
|
|
|
enum bpf_attach_type attach_type)
|
|
|
|
{
|
|
|
|
const struct btf_type *ctx_type;
|
|
|
|
const char *tname, *ctx_tname;
|
|
|
|
|
|
|
|
if (!btf_is_ptr(t)) {
|
|
|
|
bpf_log(log, "arg#%d type isn't a pointer\n", arg);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
/* KPROBE and PERF_EVENT programs allow bpf_user_pt_regs_t typedef */
|
|
|
|
if (prog_type == BPF_PROG_TYPE_KPROBE || prog_type == BPF_PROG_TYPE_PERF_EVENT) {
|
|
|
|
while (btf_type_is_modifier(t) && !btf_type_is_typedef(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
if (btf_type_is_typedef(t)) {
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* all other program types don't use typedefs for context type */
|
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
/* `void *ctx __arg_ctx` is always valid */
|
|
|
|
if (btf_type_is_void(t))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (str_is_empty(tname)) {
|
|
|
|
bpf_log(log, "arg#%d type doesn't have a name\n", arg);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* special cases */
|
|
|
|
switch (prog_type) {
|
|
|
|
case BPF_PROG_TYPE_KPROBE:
|
|
|
|
if (__btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
case BPF_PROG_TYPE_PERF_EVENT:
|
|
|
|
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
|
|
|
|
__btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0)
|
|
|
|
return 0;
|
|
|
|
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
|
|
|
|
__btf_type_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
|
|
|
|
return 0;
|
|
|
|
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
|
|
|
|
__btf_type_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
|
|
|
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
|
|
|
|
/* allow u64* as ctx */
|
|
|
|
if (btf_is_int(t) && t->size == 8)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
case BPF_PROG_TYPE_TRACING:
|
|
|
|
switch (attach_type) {
|
|
|
|
case BPF_TRACE_RAW_TP:
|
|
|
|
/* tp_btf program is TRACING, so need special case here */
|
|
|
|
if (__btf_type_is_struct(t) &&
|
|
|
|
strcmp(tname, "bpf_raw_tracepoint_args") == 0)
|
|
|
|
return 0;
|
|
|
|
/* allow u64* as ctx */
|
|
|
|
if (btf_is_int(t) && t->size == 8)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
case BPF_TRACE_ITER:
|
|
|
|
/* allow struct bpf_iter__xxx types only */
|
|
|
|
if (__btf_type_is_struct(t) &&
|
|
|
|
strncmp(tname, "bpf_iter__", sizeof("bpf_iter__") - 1) == 0)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
case BPF_TRACE_FENTRY:
|
|
|
|
case BPF_TRACE_FEXIT:
|
|
|
|
case BPF_MODIFY_RETURN:
|
|
|
|
/* allow u64* as ctx */
|
|
|
|
if (btf_is_int(t) && t->size == 8)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case BPF_PROG_TYPE_LSM:
|
|
|
|
case BPF_PROG_TYPE_STRUCT_OPS:
|
|
|
|
/* allow u64* as ctx */
|
|
|
|
if (btf_is_int(t) && t->size == 8)
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
case BPF_PROG_TYPE_TRACEPOINT:
|
|
|
|
case BPF_PROG_TYPE_SYSCALL:
|
|
|
|
case BPF_PROG_TYPE_EXT:
|
|
|
|
return 0; /* anything goes */
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx_type = find_canonical_prog_ctx_type(prog_type);
|
|
|
|
if (!ctx_type) {
|
|
|
|
/* should not happen */
|
|
|
|
bpf_log(log, "btf_vmlinux is malformed\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* resolve typedefs and check that underlying structs are matching as well */
|
|
|
|
while (btf_type_is_modifier(ctx_type))
|
|
|
|
ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type);
|
|
|
|
|
|
|
|
/* if program type doesn't have distinctly named struct type for
|
|
|
|
* context, then __arg_ctx argument can only be `void *`, which we
|
|
|
|
* already checked above
|
|
|
|
*/
|
|
|
|
if (!__btf_type_is_struct(ctx_type)) {
|
|
|
|
bpf_log(log, "arg#%d should be void pointer\n", arg);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off);
|
|
|
|
if (!__btf_type_is_struct(t) || strcmp(ctx_tname, tname) != 0) {
|
|
|
|
bpf_log(log, "arg#%d should be `struct %s *`\n", arg, ctx_tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-11-15 02:57:17 +08:00
|
|
|
static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
|
|
|
|
struct btf *btf,
|
|
|
|
const struct btf_type *t,
|
2020-01-10 14:41:20 +08:00
|
|
|
enum bpf_prog_type prog_type,
|
|
|
|
int arg)
|
2019-11-15 02:57:17 +08:00
|
|
|
{
|
2024-02-13 07:32:18 +08:00
|
|
|
if (!btf_is_prog_ctx_type(log, btf, t, prog_type, arg))
|
2019-11-15 02:57:17 +08:00
|
|
|
return -ENOENT;
|
2024-01-18 11:31:40 +08:00
|
|
|
return find_kern_ctx_type_id(prog_type);
|
2019-11-15 02:57:17 +08:00
|
|
|
}
|
|
|
|
|
2022-11-21 03:54:32 +08:00
|
|
|
int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type)
|
|
|
|
{
|
|
|
|
const struct btf_member *kctx_member;
|
|
|
|
const struct btf_type *conv_struct;
|
|
|
|
const struct btf_type *kctx_type;
|
|
|
|
u32 kctx_type_id;
|
|
|
|
|
|
|
|
conv_struct = bpf_ctx_convert.t;
|
|
|
|
/* get member for kernel ctx type */
|
|
|
|
kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1;
|
|
|
|
kctx_type_id = kctx_member->type;
|
|
|
|
kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id);
|
|
|
|
if (!btf_type_is_struct(kctx_type)) {
|
|
|
|
bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return kctx_type_id;
|
|
|
|
}
|
|
|
|
|
2020-07-12 05:53:26 +08:00
|
|
|
BTF_ID_LIST(bpf_ctx_convert_btf_id)
|
|
|
|
BTF_ID(struct, bpf_ctx_convert)
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name,
|
|
|
|
void *data, unsigned int data_size)
|
2019-10-16 11:24:57 +08:00
|
|
|
{
|
|
|
|
struct btf *btf = NULL;
|
2020-07-12 05:53:26 +08:00
|
|
|
int err;
|
2019-10-16 11:24:57 +08:00
|
|
|
|
2024-04-16 00:20:45 +08:00
|
|
|
if (!IS_ENABLED(CONFIG_DEBUG_INFO_BTF))
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
|
2019-10-16 11:24:57 +08:00
|
|
|
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!btf) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
env->btf = btf;
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
btf->data = data;
|
|
|
|
btf->data_size = data_size;
|
2020-11-10 09:19:29 +08:00
|
|
|
btf->kernel_btf = true;
|
2024-06-20 17:17:31 +08:00
|
|
|
snprintf(btf->name, sizeof(btf->name), "%s", name);
|
2019-10-16 11:24:57 +08:00
|
|
|
|
|
|
|
err = btf_parse_hdr(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
|
|
|
|
|
|
|
|
err = btf_parse_str_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
err = btf_check_all_metas(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-20 00:46:07 +08:00
|
|
|
err = btf_check_type_tags(env, btf, 1);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
2019-10-16 11:24:57 +08:00
|
|
|
refcount_set(&btf->refcnt, 1);
|
2020-11-10 09:19:29 +08:00
|
|
|
|
2019-10-16 11:24:57 +08:00
|
|
|
return btf;
|
|
|
|
|
|
|
|
errout:
|
|
|
|
if (btf) {
|
|
|
|
kvfree(btf->types);
|
|
|
|
kfree(btf);
|
|
|
|
}
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
struct btf *btf_parse_vmlinux(void)
|
|
|
|
{
|
|
|
|
struct btf_verifier_env *env = NULL;
|
|
|
|
struct bpf_verifier_log *log;
|
|
|
|
struct btf *btf;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!env)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
log = &env->log;
|
|
|
|
log->level = BPF_LOG_KERNEL;
|
|
|
|
btf = btf_parse_base(env, "vmlinux", __start_BTF, __stop_BTF - __start_BTF);
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
goto err_out;
|
|
|
|
|
|
|
|
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
|
|
|
|
bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
|
|
|
|
err = btf_alloc_id(btf);
|
|
|
|
if (err) {
|
|
|
|
btf_free(btf);
|
|
|
|
btf = ERR_PTR(err);
|
|
|
|
}
|
|
|
|
err_out:
|
|
|
|
btf_verifier_env_free(env);
|
|
|
|
return btf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If .BTF_ids section was created with distilled base BTF, both base and
|
|
|
|
* split BTF ids will need to be mapped to actual base/split ids for
|
|
|
|
* BTF now that it has been relocated.
|
|
|
|
*/
|
|
|
|
static __u32 btf_relocate_id(const struct btf *btf, __u32 id)
|
|
|
|
{
|
|
|
|
if (!btf->base_btf || !btf->base_id_map)
|
|
|
|
return id;
|
|
|
|
return btf->base_id_map[id];
|
|
|
|
}
|
|
|
|
|
2024-06-23 21:52:24 +08:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
static struct btf *btf_parse_module(const char *module_name, const void *data,
|
|
|
|
unsigned int data_size, void *base_data,
|
|
|
|
unsigned int base_data_size)
|
2020-11-10 09:19:31 +08:00
|
|
|
{
|
2024-06-20 17:17:31 +08:00
|
|
|
struct btf *btf = NULL, *vmlinux_btf, *base_btf = NULL;
|
2020-11-10 09:19:31 +08:00
|
|
|
struct btf_verifier_env *env = NULL;
|
|
|
|
struct bpf_verifier_log *log;
|
2024-06-20 17:17:31 +08:00
|
|
|
int err = 0;
|
2020-11-10 09:19:31 +08:00
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
vmlinux_btf = bpf_get_btf_vmlinux();
|
|
|
|
if (IS_ERR(vmlinux_btf))
|
|
|
|
return vmlinux_btf;
|
|
|
|
if (!vmlinux_btf)
|
2020-11-10 09:19:31 +08:00
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!env)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
log = &env->log;
|
|
|
|
log->level = BPF_LOG_KERNEL;
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
if (base_data) {
|
|
|
|
base_btf = btf_parse_base(env, ".BTF.base", base_data, base_data_size);
|
|
|
|
if (IS_ERR(base_btf)) {
|
|
|
|
err = PTR_ERR(base_btf);
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
base_btf = vmlinux_btf;
|
|
|
|
}
|
|
|
|
|
2020-11-10 09:19:31 +08:00
|
|
|
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!btf) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
env->btf = btf;
|
|
|
|
|
|
|
|
btf->base_btf = base_btf;
|
|
|
|
btf->start_id = base_btf->nr_types;
|
|
|
|
btf->start_str_off = base_btf->hdr.str_len;
|
|
|
|
btf->kernel_btf = true;
|
|
|
|
snprintf(btf->name, sizeof(btf->name), "%s", module_name);
|
|
|
|
|
2024-08-28 14:21:28 +08:00
|
|
|
btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
|
2020-11-10 09:19:31 +08:00
|
|
|
if (!btf->data) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
btf->data_size = data_size;
|
|
|
|
|
|
|
|
err = btf_parse_hdr(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
|
|
|
|
|
|
|
|
err = btf_parse_str_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
err = btf_check_all_metas(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-20 00:46:07 +08:00
|
|
|
err = btf_check_type_tags(env, btf, btf_nr_types(base_btf));
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
2024-06-20 17:17:31 +08:00
|
|
|
if (base_btf != vmlinux_btf) {
|
|
|
|
err = btf_relocate(btf, vmlinux_btf, &btf->base_id_map);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
btf_free(base_btf);
|
|
|
|
base_btf = vmlinux_btf;
|
|
|
|
}
|
|
|
|
|
2020-11-10 09:19:31 +08:00
|
|
|
btf_verifier_env_free(env);
|
|
|
|
refcount_set(&btf->refcnt, 1);
|
|
|
|
return btf;
|
|
|
|
|
|
|
|
errout:
|
|
|
|
btf_verifier_env_free(env);
|
2024-08-30 09:22:14 +08:00
|
|
|
if (!IS_ERR(base_btf) && base_btf != vmlinux_btf)
|
2024-06-20 17:17:31 +08:00
|
|
|
btf_free(base_btf);
|
2020-11-10 09:19:31 +08:00
|
|
|
if (btf) {
|
|
|
|
kvfree(btf->data);
|
|
|
|
kvfree(btf->types);
|
|
|
|
kfree(btf);
|
|
|
|
}
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
2020-11-11 12:06:45 +08:00
|
|
|
#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
|
|
|
|
|
2019-11-15 02:57:17 +08:00
|
|
|
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
|
|
|
|
{
|
2020-09-29 20:45:50 +08:00
|
|
|
struct bpf_prog *tgt_prog = prog->aux->dst_prog;
|
2019-11-15 02:57:17 +08:00
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
if (tgt_prog)
|
2019-11-15 02:57:17 +08:00
|
|
|
return tgt_prog->aux->btf;
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
else
|
|
|
|
return prog->aux->attach_btf;
|
2019-11-15 02:57:17 +08:00
|
|
|
}
|
|
|
|
|
2021-12-09 03:32:41 +08:00
|
|
|
static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
|
2020-01-24 00:15:06 +08:00
|
|
|
{
|
2023-04-10 16:59:07 +08:00
|
|
|
/* skip modifiers */
|
|
|
|
t = btf_type_skip_modifiers(btf, t->type, NULL);
|
2020-01-24 00:15:06 +08:00
|
|
|
|
2021-12-09 03:32:41 +08:00
|
|
|
return btf_type_is_int(t);
|
2020-01-24 00:15:06 +08:00
|
|
|
}
|
|
|
|
|
2022-08-31 23:26:46 +08:00
|
|
|
static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
|
|
|
|
int off)
|
|
|
|
{
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf_type *t;
|
|
|
|
u32 offset = 0, nr_args;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!func_proto)
|
|
|
|
return off / 8;
|
|
|
|
|
|
|
|
nr_args = btf_type_vlen(func_proto);
|
|
|
|
args = (const struct btf_param *)(func_proto + 1);
|
|
|
|
for (i = 0; i < nr_args; i++) {
|
|
|
|
t = btf_type_skip_modifiers(btf, args[i].type, NULL);
|
|
|
|
offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8);
|
|
|
|
if (off < offset)
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
t = btf_type_skip_modifiers(btf, func_proto->type, NULL);
|
|
|
|
offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8);
|
|
|
|
if (off < offset)
|
|
|
|
return nr_args;
|
|
|
|
|
|
|
|
return nr_args + 1;
|
|
|
|
}
|
|
|
|
|
2022-11-25 05:53:14 +08:00
|
|
|
static bool prog_args_trusted(const struct bpf_prog *prog)
|
bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs
Kfuncs currently support specifying the KF_TRUSTED_ARGS flag to signal
to the verifier that it should enforce that a BPF program passes it a
"safe", trusted pointer. Currently, "safe" means that the pointer is
either PTR_TO_CTX, or is refcounted. There may be cases, however, where
the kernel passes a BPF program a safe / trusted pointer to an object
that the BPF program wishes to use as a kptr, but because the object
does not yet have a ref_obj_id from the perspective of the verifier, the
program would be unable to pass it to a KF_ACQUIRE | KF_TRUSTED_ARGS
kfunc.
The solution is to expand the set of pointers that are considered
trusted according to KF_TRUSTED_ARGS, so that programs can invoke kfuncs
with these pointers without getting rejected by the verifier.
There is already a PTR_UNTRUSTED flag that is set in some scenarios,
such as when a BPF program reads a kptr directly from a map
without performing a bpf_kptr_xchg() call. These pointers of course can
and should be rejected by the verifier. Unfortunately, however,
PTR_UNTRUSTED does not cover all the cases for safety that need to
be addressed to adequately protect kfuncs. Specifically, pointers
obtained by a BPF program "walking" a struct are _not_ considered
PTR_UNTRUSTED according to BPF. For example, say that we were to add a
kfunc called bpf_task_acquire(), with KF_ACQUIRE | KF_TRUSTED_ARGS, to
acquire a struct task_struct *. If we only used PTR_UNTRUSTED to signal
that a task was unsafe to pass to a kfunc, the verifier would mistakenly
allow the following unsafe BPF program to be loaded:
SEC("tp_btf/task_newtask")
int BPF_PROG(unsafe_acquire_task,
struct task_struct *task,
u64 clone_flags)
{
struct task_struct *acquired, *nested;
nested = task->last_wakee;
/* Would not be rejected by the verifier. */
acquired = bpf_task_acquire(nested);
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
To address this, this patch defines a new type flag called PTR_TRUSTED
which tracks whether a PTR_TO_BTF_ID pointer is safe to pass to a
KF_TRUSTED_ARGS kfunc or a BPF helper function. PTR_TRUSTED pointers are
passed directly from the kernel as a tracepoint or struct_ops callback
argument. Any nested pointer that is obtained from walking a PTR_TRUSTED
pointer is no longer PTR_TRUSTED. From the example above, the struct
task_struct *task argument is PTR_TRUSTED, but the 'nested' pointer
obtained from 'task->last_wakee' is not PTR_TRUSTED.
A subsequent patch will add kfuncs for storing a task kfunc as a kptr,
and then another patch will add selftests to validate.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221120051004.3605026-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-20 13:10:02 +08:00
|
|
|
{
|
2022-11-25 05:53:14 +08:00
|
|
|
enum bpf_attach_type atype = prog->expected_attach_type;
|
|
|
|
|
|
|
|
switch (prog->type) {
|
|
|
|
case BPF_PROG_TYPE_TRACING:
|
|
|
|
return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER;
|
|
|
|
case BPF_PROG_TYPE_LSM:
|
2022-12-04 04:49:54 +08:00
|
|
|
return bpf_lsm_is_trusted(prog);
|
2022-11-25 05:53:14 +08:00
|
|
|
case BPF_PROG_TYPE_STRUCT_OPS:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs
Kfuncs currently support specifying the KF_TRUSTED_ARGS flag to signal
to the verifier that it should enforce that a BPF program passes it a
"safe", trusted pointer. Currently, "safe" means that the pointer is
either PTR_TO_CTX, or is refcounted. There may be cases, however, where
the kernel passes a BPF program a safe / trusted pointer to an object
that the BPF program wishes to use as a kptr, but because the object
does not yet have a ref_obj_id from the perspective of the verifier, the
program would be unable to pass it to a KF_ACQUIRE | KF_TRUSTED_ARGS
kfunc.
The solution is to expand the set of pointers that are considered
trusted according to KF_TRUSTED_ARGS, so that programs can invoke kfuncs
with these pointers without getting rejected by the verifier.
There is already a PTR_UNTRUSTED flag that is set in some scenarios,
such as when a BPF program reads a kptr directly from a map
without performing a bpf_kptr_xchg() call. These pointers of course can
and should be rejected by the verifier. Unfortunately, however,
PTR_UNTRUSTED does not cover all the cases for safety that need to
be addressed to adequately protect kfuncs. Specifically, pointers
obtained by a BPF program "walking" a struct are _not_ considered
PTR_UNTRUSTED according to BPF. For example, say that we were to add a
kfunc called bpf_task_acquire(), with KF_ACQUIRE | KF_TRUSTED_ARGS, to
acquire a struct task_struct *. If we only used PTR_UNTRUSTED to signal
that a task was unsafe to pass to a kfunc, the verifier would mistakenly
allow the following unsafe BPF program to be loaded:
SEC("tp_btf/task_newtask")
int BPF_PROG(unsafe_acquire_task,
struct task_struct *task,
u64 clone_flags)
{
struct task_struct *acquired, *nested;
nested = task->last_wakee;
/* Would not be rejected by the verifier. */
acquired = bpf_task_acquire(nested);
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
To address this, this patch defines a new type flag called PTR_TRUSTED
which tracks whether a PTR_TO_BTF_ID pointer is safe to pass to a
KF_TRUSTED_ARGS kfunc or a BPF helper function. PTR_TRUSTED pointers are
passed directly from the kernel as a tracepoint or struct_ops callback
argument. Any nested pointer that is obtained from walking a PTR_TRUSTED
pointer is no longer PTR_TRUSTED. From the example above, the struct
task_struct *task argument is PTR_TRUSTED, but the 'nested' pointer
obtained from 'task->last_wakee' is not PTR_TRUSTED.
A subsequent patch will add kfuncs for storing a task kfunc as a kptr,
and then another patch will add selftests to validate.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221120051004.3605026-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-20 13:10:02 +08:00
|
|
|
}
|
|
|
|
|
2024-02-09 10:37:49 +08:00
|
|
|
int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
|
|
|
|
u32 arg_no)
|
|
|
|
{
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf_type *t;
|
|
|
|
int off = 0, i;
|
|
|
|
u32 sz;
|
|
|
|
|
|
|
|
args = btf_params(func_proto);
|
|
|
|
for (i = 0; i < arg_no; i++) {
|
|
|
|
t = btf_type_by_id(btf, args[i].type);
|
|
|
|
t = btf_resolve_size(btf, t, &sz);
|
|
|
|
if (IS_ERR(t))
|
|
|
|
return PTR_ERR(t);
|
|
|
|
off += roundup(sz, 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
return off;
|
|
|
|
}
|
|
|
|
|
2019-10-16 11:25:00 +08:00
|
|
|
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
|
|
|
const struct bpf_prog *prog,
|
|
|
|
struct bpf_insn_access_aux *info)
|
|
|
|
{
|
2019-10-25 08:18:11 +08:00
|
|
|
const struct btf_type *t = prog->aux->attach_func_proto;
|
2020-09-29 20:45:50 +08:00
|
|
|
struct bpf_prog *tgt_prog = prog->aux->dst_prog;
|
2019-11-15 02:57:17 +08:00
|
|
|
struct btf *btf = bpf_prog_get_target_btf(prog);
|
2019-10-25 08:18:11 +08:00
|
|
|
const char *tname = prog->aux->attach_func_name;
|
2019-10-16 11:25:00 +08:00
|
|
|
struct bpf_verifier_log *log = info->log;
|
|
|
|
const struct btf_param *args;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
const char *tag_value;
|
2019-10-16 11:25:00 +08:00
|
|
|
u32 nr_args, arg;
|
2020-05-14 02:02:21 +08:00
|
|
|
int i, ret;
|
2019-10-16 11:25:00 +08:00
|
|
|
|
|
|
|
if (off % 8) {
|
2019-10-25 08:18:11 +08:00
|
|
|
bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
|
2019-10-16 11:25:00 +08:00
|
|
|
tname, off);
|
|
|
|
return false;
|
|
|
|
}
|
2022-08-31 23:26:46 +08:00
|
|
|
arg = get_ctx_arg_idx(btf, t, off);
|
2019-10-16 11:25:00 +08:00
|
|
|
args = (const struct btf_param *)(t + 1);
|
2021-02-26 04:26:29 +08:00
|
|
|
/* if (t == NULL) Fall back to default BPF prog with
|
|
|
|
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
|
|
|
|
*/
|
|
|
|
nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS;
|
2019-10-25 08:18:11 +08:00
|
|
|
if (prog->aux->attach_btf_trace) {
|
|
|
|
/* skip first 'void *__data' argument in btf_trace_##name typedef */
|
|
|
|
args++;
|
|
|
|
nr_args--;
|
|
|
|
}
|
2019-11-15 02:57:04 +08:00
|
|
|
|
2020-03-30 22:42:46 +08:00
|
|
|
if (arg > nr_args) {
|
|
|
|
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
|
|
|
|
tname, arg + 1);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-03-05 03:18:50 +08:00
|
|
|
if (arg == nr_args) {
|
2020-03-30 22:42:46 +08:00
|
|
|
switch (prog->expected_attach_type) {
|
|
|
|
case BPF_LSM_MAC:
|
2024-07-19 19:00:52 +08:00
|
|
|
/* mark we are accessing the return value */
|
|
|
|
info->is_retval = true;
|
|
|
|
fallthrough;
|
|
|
|
case BPF_LSM_CGROUP:
|
2020-03-30 22:42:46 +08:00
|
|
|
case BPF_TRACE_FEXIT:
|
2020-03-29 08:43:52 +08:00
|
|
|
/* When LSM programs are attached to void LSM hooks
|
|
|
|
* they use FEXIT trampolines and when attached to
|
|
|
|
* int LSM hooks, they use MODIFY_RETURN trampolines.
|
|
|
|
*
|
|
|
|
* While the LSM programs are BPF_MODIFY_RETURN-like
|
|
|
|
* the check:
|
|
|
|
*
|
|
|
|
* if (ret_type != 'int')
|
|
|
|
* return -EINVAL;
|
|
|
|
*
|
|
|
|
* is _not_ done here. This is still safe as LSM hooks
|
|
|
|
* have only void and int return types.
|
|
|
|
*/
|
2020-03-05 03:18:50 +08:00
|
|
|
if (!t)
|
|
|
|
return true;
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
2020-03-30 22:42:46 +08:00
|
|
|
break;
|
|
|
|
case BPF_MODIFY_RETURN:
|
2020-03-05 03:18:50 +08:00
|
|
|
/* For now the BPF_MODIFY_RETURN can only be attached to
|
|
|
|
* functions that return an int.
|
|
|
|
*/
|
|
|
|
if (!t)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
t = btf_type_skip_modifiers(btf, t->type, NULL);
|
2020-06-25 06:20:39 +08:00
|
|
|
if (!btf_type_is_small_int(t)) {
|
2020-03-05 03:18:50 +08:00
|
|
|
bpf_log(log,
|
|
|
|
"ret type %s not allowed for fmod_ret\n",
|
2022-09-17 04:28:00 +08:00
|
|
|
btf_type_str(t));
|
2020-03-05 03:18:50 +08:00
|
|
|
return false;
|
|
|
|
}
|
2020-03-30 22:42:46 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
|
|
|
|
tname, arg + 1);
|
|
|
|
return false;
|
2020-03-05 03:18:50 +08:00
|
|
|
}
|
2019-11-15 02:57:04 +08:00
|
|
|
} else {
|
2019-11-15 02:57:17 +08:00
|
|
|
if (!t)
|
2021-02-26 04:26:29 +08:00
|
|
|
/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
|
2019-11-15 02:57:17 +08:00
|
|
|
return true;
|
|
|
|
t = btf_type_by_id(btf, args[arg].type);
|
2019-10-16 11:25:00 +08:00
|
|
|
}
|
2020-03-30 22:42:46 +08:00
|
|
|
|
2019-10-16 11:25:00 +08:00
|
|
|
/* skip modifiers */
|
|
|
|
while (btf_type_is_modifier(t))
|
2019-11-15 02:57:17 +08:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
2022-08-31 23:26:46 +08:00
|
|
|
if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
|
2019-10-16 11:25:00 +08:00
|
|
|
/* accessing a scalar */
|
|
|
|
return true;
|
|
|
|
if (!btf_type_is_ptr(t)) {
|
|
|
|
bpf_log(log,
|
2019-10-25 08:18:11 +08:00
|
|
|
"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
|
2019-10-16 11:25:00 +08:00
|
|
|
tname, arg,
|
2019-11-15 02:57:17 +08:00
|
|
|
__btf_name_by_offset(btf, t->name_off),
|
2022-09-17 04:28:00 +08:00
|
|
|
btf_type_str(t));
|
2019-10-16 11:25:00 +08:00
|
|
|
return false;
|
|
|
|
}
|
2020-07-24 02:41:11 +08:00
|
|
|
|
|
|
|
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
|
|
|
|
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
|
|
|
|
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
|
2021-12-17 08:31:47 +08:00
|
|
|
u32 type, flag;
|
2020-07-24 02:41:11 +08:00
|
|
|
|
2021-12-17 08:31:47 +08:00
|
|
|
type = base_type(ctx_arg_info->reg_type);
|
|
|
|
flag = type_flag(ctx_arg_info->reg_type);
|
2021-12-17 08:31:48 +08:00
|
|
|
if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
|
2021-12-17 08:31:47 +08:00
|
|
|
(flag & PTR_MAYBE_NULL)) {
|
2020-07-24 02:41:11 +08:00
|
|
|
info->reg_type = ctx_arg_info->reg_type;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-16 11:25:00 +08:00
|
|
|
if (t->type == 0)
|
|
|
|
/* This is a pointer to void.
|
|
|
|
* It is the same as scalar from the verifier safety pov.
|
|
|
|
* No further pointer walking is allowed.
|
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
|
2021-12-09 03:32:41 +08:00
|
|
|
if (is_int_ptr(btf, t))
|
2020-01-24 00:15:06 +08:00
|
|
|
return true;
|
|
|
|
|
2019-10-16 11:25:00 +08:00
|
|
|
/* this is a pointer to another type */
|
2020-05-14 02:02:21 +08:00
|
|
|
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
|
|
|
|
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
|
|
|
|
|
|
|
|
if (ctx_arg_info->offset == off) {
|
bpf: Emit better log message if bpf_iter ctx arg btf_id == 0
To avoid kernel build failure due to some missing .BTF-ids referenced
functions/types, the patch ([1]) tries to fill btf_id 0 for
these types.
In bpf verifier, for percpu variable and helper returning btf_id cases,
verifier already emitted proper warning with something like
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
verbose(env, "invalid return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
But this is not the case for bpf_iter context arguments.
I hacked resolve_btfids to encode btf_id 0 for struct task_struct.
With `./test_progs -n 7/5`, I got,
0: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_iter_task' arg0 has btf_id 29739 type STRUCT 'bpf_iter_meta'
; struct seq_file *seq = ctx->meta->seq;
1: (79) r6 = *(u64 *)(r2 +0)
; struct task_struct *task = ctx->task;
2: (79) r7 = *(u64 *)(r1 +8)
; if (task == (void *)0) {
3: (55) if r7 != 0x0 goto pc+11
...
; BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
26: (61) r1 = *(u32 *)(r7 +1372)
Type '(anon)' is not a struct
Basically, verifier will return btf_id 0 for task_struct.
Later on, when the code tries to access task->tgid, the
verifier correctly complains the type is '(anon)' and it is
not a struct. Users still need to backtrace to find out
what is going on.
Let us catch the invalid btf_id 0 earlier
and provide better message indicating btf_id is wrong.
The new error message looks like below:
R1 type=ctx expected=fp
; struct seq_file *seq = ctx->meta->seq;
0: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_iter_task' arg0 has btf_id 29739 type STRUCT 'bpf_iter_meta'
; struct seq_file *seq = ctx->meta->seq;
1: (79) r6 = *(u64 *)(r2 +0)
; struct task_struct *task = ctx->task;
2: (79) r7 = *(u64 *)(r1 +8)
invalid btf_id for context argument offset 8
invalid bpf_context access off=8 size=8
[1] https://lore.kernel.org/bpf/20210727132532.2473636-1-hengqi.chen@gmail.com/
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210728183025.1461750-1-yhs@fb.com
2021-07-29 02:30:25 +08:00
|
|
|
if (!ctx_arg_info->btf_id) {
|
|
|
|
bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-05-14 02:02:21 +08:00
|
|
|
info->reg_type = ctx_arg_info->reg_type;
|
2024-02-09 10:37:47 +08:00
|
|
|
info->btf = ctx_arg_info->btf ? : btf_vmlinux;
|
2020-07-21 00:34:03 +08:00
|
|
|
info->btf_id = ctx_arg_info->btf_id;
|
|
|
|
return true;
|
2020-05-14 02:02:21 +08:00
|
|
|
}
|
|
|
|
}
|
2019-10-16 11:25:00 +08:00
|
|
|
|
2020-07-21 00:34:03 +08:00
|
|
|
info->reg_type = PTR_TO_BTF_ID;
|
2022-11-25 05:53:14 +08:00
|
|
|
if (prog_args_trusted(prog))
|
bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs
Kfuncs currently support specifying the KF_TRUSTED_ARGS flag to signal
to the verifier that it should enforce that a BPF program passes it a
"safe", trusted pointer. Currently, "safe" means that the pointer is
either PTR_TO_CTX, or is refcounted. There may be cases, however, where
the kernel passes a BPF program a safe / trusted pointer to an object
that the BPF program wishes to use as a kptr, but because the object
does not yet have a ref_obj_id from the perspective of the verifier, the
program would be unable to pass it to a KF_ACQUIRE | KF_TRUSTED_ARGS
kfunc.
The solution is to expand the set of pointers that are considered
trusted according to KF_TRUSTED_ARGS, so that programs can invoke kfuncs
with these pointers without getting rejected by the verifier.
There is already a PTR_UNTRUSTED flag that is set in some scenarios,
such as when a BPF program reads a kptr directly from a map
without performing a bpf_kptr_xchg() call. These pointers of course can
and should be rejected by the verifier. Unfortunately, however,
PTR_UNTRUSTED does not cover all the cases for safety that need to
be addressed to adequately protect kfuncs. Specifically, pointers
obtained by a BPF program "walking" a struct are _not_ considered
PTR_UNTRUSTED according to BPF. For example, say that we were to add a
kfunc called bpf_task_acquire(), with KF_ACQUIRE | KF_TRUSTED_ARGS, to
acquire a struct task_struct *. If we only used PTR_UNTRUSTED to signal
that a task was unsafe to pass to a kfunc, the verifier would mistakenly
allow the following unsafe BPF program to be loaded:
SEC("tp_btf/task_newtask")
int BPF_PROG(unsafe_acquire_task,
struct task_struct *task,
u64 clone_flags)
{
struct task_struct *acquired, *nested;
nested = task->last_wakee;
/* Would not be rejected by the verifier. */
acquired = bpf_task_acquire(nested);
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
To address this, this patch defines a new type flag called PTR_TRUSTED
which tracks whether a PTR_TO_BTF_ID pointer is safe to pass to a
KF_TRUSTED_ARGS kfunc or a BPF helper function. PTR_TRUSTED pointers are
passed directly from the kernel as a tracepoint or struct_ops callback
argument. Any nested pointer that is obtained from walking a PTR_TRUSTED
pointer is no longer PTR_TRUSTED. From the example above, the struct
task_struct *task argument is PTR_TRUSTED, but the 'nested' pointer
obtained from 'task->last_wakee' is not PTR_TRUSTED.
A subsequent patch will add kfuncs for storing a task kfunc as a kptr,
and then another patch will add selftests to validate.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221120051004.3605026-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-20 13:10:02 +08:00
|
|
|
info->reg_type |= PTR_TRUSTED;
|
|
|
|
|
2024-09-11 11:37:15 +08:00
|
|
|
if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
|
|
|
|
info->reg_type |= PTR_MAYBE_NULL;
|
|
|
|
|
2019-11-15 02:57:17 +08:00
|
|
|
if (tgt_prog) {
|
2020-09-29 20:45:52 +08:00
|
|
|
enum bpf_prog_type tgt_type;
|
|
|
|
|
|
|
|
if (tgt_prog->type == BPF_PROG_TYPE_EXT)
|
|
|
|
tgt_type = tgt_prog->aux->saved_dst_prog_type;
|
|
|
|
else
|
|
|
|
tgt_type = tgt_prog->type;
|
|
|
|
|
|
|
|
ret = btf_translate_to_vmlinux(log, btf, t, tgt_type, arg);
|
2019-11-15 02:57:17 +08:00
|
|
|
if (ret > 0) {
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
info->btf = btf_vmlinux;
|
2019-11-15 02:57:17 +08:00
|
|
|
info->btf_id = ret;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2020-01-09 08:34:56 +08:00
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
info->btf = btf;
|
2020-01-09 08:34:56 +08:00
|
|
|
info->btf_id = t->type;
|
2019-11-15 02:57:17 +08:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
|
|
|
|
if (btf_type_is_type_tag(t)) {
|
|
|
|
tag_value = __btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (strcmp(tag_value, "user") == 0)
|
|
|
|
info->reg_type |= MEM_USER;
|
2022-03-05 03:16:56 +08:00
|
|
|
if (strcmp(tag_value, "percpu") == 0)
|
|
|
|
info->reg_type |= MEM_PERCPU;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
}
|
|
|
|
|
2019-10-16 11:25:00 +08:00
|
|
|
/* skip modifiers */
|
2020-01-09 08:34:56 +08:00
|
|
|
while (btf_type_is_modifier(t)) {
|
|
|
|
info->btf_id = t->type;
|
2019-11-15 02:57:17 +08:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
2020-01-09 08:34:56 +08:00
|
|
|
}
|
2019-10-16 11:25:00 +08:00
|
|
|
if (!btf_type_is_struct(t)) {
|
|
|
|
bpf_log(log,
|
2019-10-25 08:18:11 +08:00
|
|
|
"func '%s' arg%d type %s is not a struct\n",
|
2022-09-17 04:28:00 +08:00
|
|
|
tname, arg, btf_type_str(t));
|
2019-10-16 11:25:00 +08:00
|
|
|
return false;
|
|
|
|
}
|
2019-10-25 08:18:11 +08:00
|
|
|
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
|
2022-09-17 04:28:00 +08:00
|
|
|
tname, arg, info->btf_id, btf_type_str(t),
|
2019-11-15 02:57:17 +08:00
|
|
|
__btf_name_by_offset(btf, t->name_off));
|
2019-10-16 11:25:00 +08:00
|
|
|
return true;
|
|
|
|
}
|
2024-01-20 06:50:04 +08:00
|
|
|
EXPORT_SYMBOL_GPL(btf_ctx_access);
|
2019-10-16 11:25:00 +08:00
|
|
|
|
2020-08-26 03:21:17 +08:00
|
|
|
enum bpf_struct_walk_result {
|
|
|
|
/* < 0 error */
|
|
|
|
WALK_SCALAR = 0,
|
|
|
|
WALK_PTR,
|
|
|
|
WALK_STRUCT,
|
|
|
|
};
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
|
2020-08-26 03:21:17 +08:00
|
|
|
const struct btf_type *t, int off, int size,
|
2023-04-04 12:50:24 +08:00
|
|
|
u32 *next_btf_id, enum bpf_type_flag *flag,
|
|
|
|
const char **field_name)
|
2019-10-16 11:25:00 +08:00
|
|
|
{
|
2019-11-08 02:09:03 +08:00
|
|
|
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
|
|
|
|
const struct btf_type *mtype, *elem_type = NULL;
|
2019-10-16 11:25:00 +08:00
|
|
|
const struct btf_member *member;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
const char *tname, *mname, *tag_value;
|
2020-08-26 03:21:17 +08:00
|
|
|
u32 vlen, elem_id, mid;
|
2019-10-16 11:25:00 +08:00
|
|
|
|
|
|
|
again:
|
2023-06-27 05:25:21 +08:00
|
|
|
if (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_skip_modifiers(btf, t->type, NULL);
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
tname = __btf_name_by_offset(btf, t->name_off);
|
2019-10-16 11:25:00 +08:00
|
|
|
if (!btf_type_is_struct(t)) {
|
2020-01-09 08:34:56 +08:00
|
|
|
bpf_log(log, "Type '%s' is not a struct\n", tname);
|
2019-10-16 11:25:00 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-05-10 01:59:16 +08:00
|
|
|
vlen = btf_type_vlen(t);
|
2023-07-13 10:56:39 +08:00
|
|
|
if (BTF_INFO_KIND(t->info) == BTF_KIND_UNION && vlen != 1 && !(*flag & PTR_UNTRUSTED))
|
|
|
|
/*
|
|
|
|
* walking unions yields untrusted pointers
|
|
|
|
* with exception of __bpf_md_ptr and other
|
|
|
|
* unions with a single member
|
|
|
|
*/
|
|
|
|
*flag |= PTR_UNTRUSTED;
|
|
|
|
|
2020-01-09 08:35:01 +08:00
|
|
|
if (off + size > t->size) {
|
2020-05-10 01:59:16 +08:00
|
|
|
/* If the last element is a variable size array, we may
|
|
|
|
* need to relax the rule.
|
|
|
|
*/
|
|
|
|
struct btf_array *array_elem;
|
|
|
|
|
|
|
|
if (vlen == 0)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
member = btf_type_member(t) + vlen - 1;
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
mtype = btf_type_skip_modifiers(btf, member->type,
|
2020-05-10 01:59:16 +08:00
|
|
|
NULL);
|
|
|
|
if (!btf_type_is_array(mtype))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
array_elem = (struct btf_array *)(mtype + 1);
|
|
|
|
if (array_elem->nelems != 0)
|
|
|
|
goto error;
|
|
|
|
|
2021-12-02 02:10:25 +08:00
|
|
|
moff = __btf_member_bit_offset(t, member) / 8;
|
2020-05-10 01:59:16 +08:00
|
|
|
if (off < moff)
|
|
|
|
goto error;
|
|
|
|
|
2023-04-20 11:27:34 +08:00
|
|
|
/* allow structure and integer */
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
t = btf_type_skip_modifiers(btf, array_elem->type,
|
2020-08-26 03:21:16 +08:00
|
|
|
NULL);
|
2023-04-20 11:27:34 +08:00
|
|
|
|
|
|
|
if (btf_type_is_int(t))
|
|
|
|
return WALK_SCALAR;
|
|
|
|
|
2020-08-26 03:21:16 +08:00
|
|
|
if (!btf_type_is_struct(t))
|
2020-05-10 01:59:16 +08:00
|
|
|
goto error;
|
|
|
|
|
2020-08-26 03:21:16 +08:00
|
|
|
off = (off - moff) % t->size;
|
|
|
|
goto again;
|
2020-05-10 01:59:16 +08:00
|
|
|
|
|
|
|
error:
|
2020-01-09 08:35:01 +08:00
|
|
|
bpf_log(log, "access beyond struct %s at off %u size %u\n",
|
|
|
|
tname, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2019-10-16 11:25:00 +08:00
|
|
|
|
2020-01-09 08:35:01 +08:00
|
|
|
for_each_member(i, t, member) {
|
2019-11-08 02:09:03 +08:00
|
|
|
/* offset of the field in bytes */
|
2021-12-02 02:10:25 +08:00
|
|
|
moff = __btf_member_bit_offset(t, member) / 8;
|
2019-11-08 02:09:03 +08:00
|
|
|
if (off + size <= moff)
|
2019-10-16 11:25:00 +08:00
|
|
|
/* won't find anything, field is already too far */
|
|
|
|
break;
|
2020-01-09 08:35:01 +08:00
|
|
|
|
2021-12-02 02:10:25 +08:00
|
|
|
if (__btf_member_bitfield_size(t, member)) {
|
|
|
|
u32 end_bit = __btf_member_bit_offset(t, member) +
|
|
|
|
__btf_member_bitfield_size(t, member);
|
2020-01-09 08:35:01 +08:00
|
|
|
|
|
|
|
/* off <= moff instead of off == moff because clang
|
|
|
|
* does not generate a BTF member for anonymous
|
|
|
|
* bitfield like the ":16" here:
|
|
|
|
* struct {
|
|
|
|
* int :16;
|
|
|
|
* int x:8;
|
|
|
|
* };
|
|
|
|
*/
|
|
|
|
if (off <= moff &&
|
|
|
|
BITS_ROUNDUP_BYTES(end_bit) <= off + size)
|
2020-08-26 03:21:17 +08:00
|
|
|
return WALK_SCALAR;
|
2020-01-09 08:35:01 +08:00
|
|
|
|
|
|
|
/* off may be accessing a following member
|
|
|
|
*
|
|
|
|
* or
|
|
|
|
*
|
|
|
|
* Doing partial access at either end of this
|
|
|
|
* bitfield. Continue on this case also to
|
|
|
|
* treat it as not accessing this bitfield
|
|
|
|
* and eventually error out as field not
|
|
|
|
* found to keep it simple.
|
|
|
|
* It could be relaxed if there was a legit
|
|
|
|
* partial access case later.
|
|
|
|
*/
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-11-08 02:09:03 +08:00
|
|
|
/* In case of "off" is pointing to holes of a struct */
|
|
|
|
if (off < moff)
|
2020-01-09 08:35:01 +08:00
|
|
|
break;
|
2019-10-16 11:25:00 +08:00
|
|
|
|
|
|
|
/* type of the field */
|
2020-08-26 03:21:17 +08:00
|
|
|
mid = member->type;
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
mtype = btf_type_by_id(btf, member->type);
|
|
|
|
mname = __btf_name_by_offset(btf, member->name_off);
|
2019-10-16 11:25:00 +08:00
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
mtype = __btf_resolve_size(btf, mtype, &msize,
|
2020-08-26 03:21:17 +08:00
|
|
|
&elem_type, &elem_id, &total_nelems,
|
|
|
|
&mid);
|
2019-11-08 02:09:03 +08:00
|
|
|
if (IS_ERR(mtype)) {
|
2019-10-16 11:25:00 +08:00
|
|
|
bpf_log(log, "field %s doesn't have size\n", mname);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
2019-11-08 02:09:03 +08:00
|
|
|
|
|
|
|
mtrue_end = moff + msize;
|
|
|
|
if (off >= mtrue_end)
|
2019-10-16 11:25:00 +08:00
|
|
|
/* no overlap with member, keep iterating */
|
|
|
|
continue;
|
2019-11-08 02:09:03 +08:00
|
|
|
|
|
|
|
if (btf_type_is_array(mtype)) {
|
|
|
|
u32 elem_idx;
|
|
|
|
|
2020-08-26 03:21:13 +08:00
|
|
|
/* __btf_resolve_size() above helps to
|
2019-11-08 02:09:03 +08:00
|
|
|
* linearize a multi-dimensional array.
|
|
|
|
*
|
|
|
|
* The logic here is treating an array
|
|
|
|
* in a struct as the following way:
|
|
|
|
*
|
|
|
|
* struct outer {
|
|
|
|
* struct inner array[2][2];
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* looks like:
|
|
|
|
*
|
|
|
|
* struct outer {
|
|
|
|
* struct inner array_elem0;
|
|
|
|
* struct inner array_elem1;
|
|
|
|
* struct inner array_elem2;
|
|
|
|
* struct inner array_elem3;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* When accessing outer->array[1][0], it moves
|
|
|
|
* moff to "array_elem2", set mtype to
|
|
|
|
* "struct inner", and msize also becomes
|
|
|
|
* sizeof(struct inner). Then most of the
|
|
|
|
* remaining logic will fall through without
|
|
|
|
* caring the current member is an array or
|
|
|
|
* not.
|
|
|
|
*
|
|
|
|
* Unlike mtype/msize/moff, mtrue_end does not
|
|
|
|
* change. The naming difference ("_true") tells
|
|
|
|
* that it is not always corresponding to
|
|
|
|
* the current mtype/msize/moff.
|
|
|
|
* It is the true end of the current
|
|
|
|
* member (i.e. array in this case). That
|
|
|
|
* will allow an int array to be accessed like
|
|
|
|
* a scratch space,
|
|
|
|
* i.e. allow access beyond the size of
|
|
|
|
* the array's element as long as it is
|
|
|
|
* within the mtrue_end boundary.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* skip empty array */
|
|
|
|
if (moff == mtrue_end)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
msize /= total_nelems;
|
|
|
|
elem_idx = (off - moff) / msize;
|
|
|
|
moff += elem_idx * msize;
|
|
|
|
mtype = elem_type;
|
2020-08-26 03:21:17 +08:00
|
|
|
mid = elem_id;
|
2019-11-08 02:09:03 +08:00
|
|
|
}
|
|
|
|
|
2019-10-16 11:25:00 +08:00
|
|
|
/* the 'off' we're looking for is either equal to start
|
|
|
|
* of this field or inside of this struct
|
|
|
|
*/
|
|
|
|
if (btf_type_is_struct(mtype)) {
|
|
|
|
/* our field must be inside that union or struct */
|
|
|
|
t = mtype;
|
|
|
|
|
2020-08-26 03:21:17 +08:00
|
|
|
/* return if the offset matches the member offset */
|
|
|
|
if (off == moff) {
|
|
|
|
*next_btf_id = mid;
|
|
|
|
return WALK_STRUCT;
|
|
|
|
}
|
|
|
|
|
2019-10-16 11:25:00 +08:00
|
|
|
/* adjust offset we're looking for */
|
2019-11-08 02:09:03 +08:00
|
|
|
off -= moff;
|
2019-10-16 11:25:00 +08:00
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_is_ptr(mtype)) {
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
const struct btf_type *stype, *t;
|
|
|
|
enum bpf_type_flag tmp_flag = 0;
|
2020-02-01 08:03:14 +08:00
|
|
|
u32 id;
|
2019-10-16 11:25:00 +08:00
|
|
|
|
2019-11-08 02:09:03 +08:00
|
|
|
if (msize != size || off != moff) {
|
|
|
|
bpf_log(log,
|
|
|
|
"cannot access ptr member %s with moff %u in struct %s with off %u size %u\n",
|
|
|
|
mname, moff, tname, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
|
2022-03-05 03:16:56 +08:00
|
|
|
/* check type tag */
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
t = btf_type_by_id(btf, mtype->type);
|
|
|
|
if (btf_type_is_type_tag(t)) {
|
|
|
|
tag_value = __btf_name_by_offset(btf, t->name_off);
|
2022-03-05 03:16:56 +08:00
|
|
|
/* check __user tag */
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
if (strcmp(tag_value, "user") == 0)
|
|
|
|
tmp_flag = MEM_USER;
|
2022-03-05 03:16:56 +08:00
|
|
|
/* check __percpu tag */
|
|
|
|
if (strcmp(tag_value, "percpu") == 0)
|
|
|
|
tmp_flag = MEM_PERCPU;
|
2022-11-24 13:32:17 +08:00
|
|
|
/* check __rcu tag */
|
|
|
|
if (strcmp(tag_value, "rcu") == 0)
|
|
|
|
tmp_flag = MEM_RCU;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
|
2019-10-16 11:25:00 +08:00
|
|
|
if (btf_type_is_struct(stype)) {
|
2020-02-01 08:03:14 +08:00
|
|
|
*next_btf_id = id;
|
2023-03-03 12:14:46 +08:00
|
|
|
*flag |= tmp_flag;
|
2023-04-04 12:50:24 +08:00
|
|
|
if (field_name)
|
|
|
|
*field_name = mname;
|
2020-08-26 03:21:17 +08:00
|
|
|
return WALK_PTR;
|
2019-10-16 11:25:00 +08:00
|
|
|
}
|
|
|
|
}
|
2019-11-08 02:09:03 +08:00
|
|
|
|
|
|
|
/* Allow more flexible access within an int as long as
|
|
|
|
* it is within mtrue_end.
|
|
|
|
* Since mtrue_end could be the end of an array,
|
|
|
|
* that also allows using an array of int as a scratch
|
|
|
|
* space. e.g. skb->cb[].
|
|
|
|
*/
|
bpf: Fix an error in verifying a field in a union
We are utilizing BPF LSM to monitor BPF operations within our container
environment. When we add support for raw_tracepoint, it hits below
error.
; (const void *)attr->raw_tracepoint.name);
27: (79) r3 = *(u64 *)(r2 +0)
access beyond the end of member map_type (mend:4) in struct (anon) with off 0 size 8
It can be reproduced with below BPF prog.
SEC("lsm/bpf")
int BPF_PROG(bpf_audit, int cmd, union bpf_attr *attr, unsigned int size)
{
switch (cmd) {
case BPF_RAW_TRACEPOINT_OPEN:
bpf_printk("raw_tracepoint is %s", attr->raw_tracepoint.name);
break;
default:
break;
}
return 0;
}
The reason is that when accessing a field in a union, such as bpf_attr,
if the field is located within a nested struct that is not the first
member of the union, it can result in incorrect field verification.
union bpf_attr {
struct {
__u32 map_type; <<<< Actually it will find that field.
__u32 key_size;
__u32 value_size;
...
};
...
struct {
__u64 name; <<<< We want to verify this field.
__u32 prog_fd;
} raw_tracepoint;
};
Considering the potential deep nesting levels, finding a perfect
solution to address this issue has proven challenging. Therefore, I
propose a solution where we simply skip the verification process if the
field in question is located within a union.
Fixes: 7e3617a72df3 ("bpf: Add array support to btf_struct_access")
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/r/20230713025642.27477-4-laoar.shao@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-07-13 10:56:41 +08:00
|
|
|
if (off + size > mtrue_end && !(*flag & PTR_UNTRUSTED)) {
|
2019-11-08 02:09:03 +08:00
|
|
|
bpf_log(log,
|
|
|
|
"access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n",
|
|
|
|
mname, mtrue_end, tname, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
2020-08-26 03:21:17 +08:00
|
|
|
return WALK_SCALAR;
|
2019-10-16 11:25:00 +08:00
|
|
|
}
|
|
|
|
bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2022-11-15 03:15:28 +08:00
|
|
|
int btf_struct_access(struct bpf_verifier_log *log,
|
|
|
|
const struct bpf_reg_state *reg,
|
|
|
|
int off, int size, enum bpf_access_type atype __maybe_unused,
|
2023-04-04 12:50:24 +08:00
|
|
|
u32 *next_btf_id, enum bpf_type_flag *flag,
|
|
|
|
const char **field_name)
|
2020-08-26 03:21:17 +08:00
|
|
|
{
|
2022-11-15 03:15:28 +08:00
|
|
|
const struct btf *btf = reg->btf;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
enum bpf_type_flag tmp_flag = 0;
|
2022-11-15 03:15:28 +08:00
|
|
|
const struct btf_type *t;
|
|
|
|
u32 id = reg->btf_id;
|
2020-08-26 03:21:17 +08:00
|
|
|
int err;
|
|
|
|
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
while (type_is_alloc(reg->type)) {
|
|
|
|
struct btf_struct_meta *meta;
|
|
|
|
struct btf_record *rec;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
meta = btf_find_struct_meta(btf, id);
|
|
|
|
if (!meta)
|
|
|
|
break;
|
|
|
|
rec = meta->record;
|
|
|
|
for (i = 0; i < rec->cnt; i++) {
|
|
|
|
struct btf_field *field = &rec->fields[i];
|
|
|
|
u32 offset = field->offset;
|
2024-05-24 01:41:55 +08:00
|
|
|
if (off < offset + field->size && offset < off + size) {
|
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 09:55:56 +08:00
|
|
|
bpf_log(log,
|
|
|
|
"direct access to %s is disallowed\n",
|
|
|
|
btf_field_type_name(field->type));
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-11-15 03:15:28 +08:00
|
|
|
t = btf_type_by_id(btf, id);
|
2020-08-26 03:21:17 +08:00
|
|
|
do {
|
2023-04-04 12:50:24 +08:00
|
|
|
err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag, field_name);
|
2020-08-26 03:21:17 +08:00
|
|
|
|
|
|
|
switch (err) {
|
|
|
|
case WALK_PTR:
|
2022-11-18 09:55:55 +08:00
|
|
|
/* For local types, the destination register cannot
|
|
|
|
* become a pointer again.
|
|
|
|
*/
|
|
|
|
if (type_is_alloc(reg->type))
|
|
|
|
return SCALAR_VALUE;
|
2020-08-26 03:21:17 +08:00
|
|
|
/* If we found the pointer or scalar on t+off,
|
|
|
|
* we're done.
|
|
|
|
*/
|
|
|
|
*next_btf_id = id;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 23:46:06 +08:00
|
|
|
*flag = tmp_flag;
|
2020-08-26 03:21:17 +08:00
|
|
|
return PTR_TO_BTF_ID;
|
|
|
|
case WALK_SCALAR:
|
|
|
|
return SCALAR_VALUE;
|
|
|
|
case WALK_STRUCT:
|
|
|
|
/* We found nested struct, so continue the search
|
|
|
|
* by diving in it. At this point the offset is
|
|
|
|
* aligned with the new type, so set it to 0.
|
|
|
|
*/
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
t = btf_type_by_id(btf, id);
|
2020-08-26 03:21:17 +08:00
|
|
|
off = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* It's either error or unknown return value..
|
|
|
|
* scream and leave.
|
|
|
|
*/
|
|
|
|
if (WARN_ONCE(err > 0, "unknown btf_struct_walk return value"))
|
|
|
|
return -EINVAL;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
} while (t);
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
/* Check that two BTF types, each specified as an BTF object + id, are exactly
|
|
|
|
* the same. Trivial ID check is not enough due to module BTFs, because we can
|
|
|
|
* end up with two different module BTFs, but IDs point to the common type in
|
|
|
|
* vmlinux BTF.
|
|
|
|
*/
|
2022-11-18 09:56:01 +08:00
|
|
|
bool btf_types_are_same(const struct btf *btf1, u32 id1,
|
|
|
|
const struct btf *btf2, u32 id2)
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
{
|
|
|
|
if (id1 != id2)
|
|
|
|
return false;
|
|
|
|
if (btf1 == btf2)
|
|
|
|
return true;
|
|
|
|
return btf_type_by_id(btf1, id1) == btf_type_by_id(btf2, id2);
|
|
|
|
}
|
|
|
|
|
2020-08-26 03:21:18 +08:00
|
|
|
bool btf_struct_ids_match(struct bpf_verifier_log *log,
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
const struct btf *btf, u32 id, int off,
|
2022-04-25 05:48:57 +08:00
|
|
|
const struct btf *need_btf, u32 need_type_id,
|
|
|
|
bool strict)
|
2020-08-26 03:21:18 +08:00
|
|
|
{
|
|
|
|
const struct btf_type *type;
|
2023-07-13 10:56:39 +08:00
|
|
|
enum bpf_type_flag flag = 0;
|
2020-08-26 03:21:18 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Are we already done? */
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id))
|
2020-08-26 03:21:18 +08:00
|
|
|
return true;
|
2022-04-25 05:48:57 +08:00
|
|
|
/* In case of strict type match, we do not walk struct, the top level
|
|
|
|
* type match must succeed. When strict is true, off should have already
|
|
|
|
* been 0.
|
|
|
|
*/
|
|
|
|
if (strict)
|
|
|
|
return false;
|
2020-08-26 03:21:18 +08:00
|
|
|
again:
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
type = btf_type_by_id(btf, id);
|
2020-08-26 03:21:18 +08:00
|
|
|
if (!type)
|
|
|
|
return false;
|
2023-04-04 12:50:24 +08:00
|
|
|
err = btf_struct_walk(log, btf, type, off, 1, &id, &flag, NULL);
|
2020-08-26 03:21:18 +08:00
|
|
|
if (err != WALK_STRUCT)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* We found nested struct object. If it matches
|
|
|
|
* the requested ID, we're done. Otherwise let's
|
|
|
|
* continue the search with offset 0 in the new
|
|
|
|
* type.
|
|
|
|
*/
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
if (!btf_types_are_same(btf, id, need_btf, need_type_id)) {
|
2020-08-26 03:21:18 +08:00
|
|
|
off = 0;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-11-15 02:57:04 +08:00
|
|
|
static int __get_type_size(struct btf *btf, u32 btf_id,
|
2022-08-08 01:51:16 +08:00
|
|
|
const struct btf_type **ret_type)
|
2019-11-15 02:57:04 +08:00
|
|
|
{
|
|
|
|
const struct btf_type *t;
|
|
|
|
|
2022-08-08 01:51:16 +08:00
|
|
|
*ret_type = btf_type_by_id(btf, 0);
|
2019-11-15 02:57:04 +08:00
|
|
|
if (!btf_id)
|
|
|
|
/* void */
|
|
|
|
return 0;
|
|
|
|
t = btf_type_by_id(btf, btf_id);
|
|
|
|
while (t && btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
2022-08-08 01:51:16 +08:00
|
|
|
if (!t)
|
2019-11-15 02:57:04 +08:00
|
|
|
return -EINVAL;
|
2022-08-08 01:51:16 +08:00
|
|
|
*ret_type = t;
|
2019-11-15 02:57:04 +08:00
|
|
|
if (btf_type_is_ptr(t))
|
|
|
|
/* kernel size of pointer. Not BPF's size of pointer*/
|
|
|
|
return sizeof(void *);
|
2022-08-31 23:26:46 +08:00
|
|
|
if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
|
2019-11-15 02:57:04 +08:00
|
|
|
return t->size;
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2023-01-28 08:06:44 +08:00
|
|
|
static u8 __get_type_fmodel_flags(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
u8 flags = 0;
|
|
|
|
|
|
|
|
if (__btf_type_is_struct(t))
|
|
|
|
flags |= BTF_FMODEL_STRUCT_ARG;
|
|
|
|
if (btf_type_is_signed_int(t))
|
|
|
|
flags |= BTF_FMODEL_SIGNED_ARG;
|
|
|
|
|
|
|
|
return flags;
|
|
|
|
}
|
|
|
|
|
2019-11-15 02:57:04 +08:00
|
|
|
int btf_distill_func_proto(struct bpf_verifier_log *log,
|
|
|
|
struct btf *btf,
|
|
|
|
const struct btf_type *func,
|
|
|
|
const char *tname,
|
|
|
|
struct btf_func_model *m)
|
|
|
|
{
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf_type *t;
|
|
|
|
u32 i, nargs;
|
|
|
|
int ret;
|
|
|
|
|
2019-11-15 02:57:17 +08:00
|
|
|
if (!func) {
|
|
|
|
/* BTF function prototype doesn't match the verifier types.
|
2021-02-26 04:26:29 +08:00
|
|
|
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
|
2019-11-15 02:57:17 +08:00
|
|
|
*/
|
2022-08-31 23:26:46 +08:00
|
|
|
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
|
2019-11-15 02:57:17 +08:00
|
|
|
m->arg_size[i] = 8;
|
2022-08-31 23:26:46 +08:00
|
|
|
m->arg_flags[i] = 0;
|
|
|
|
}
|
2019-11-15 02:57:17 +08:00
|
|
|
m->ret_size = 8;
|
2023-01-28 08:06:44 +08:00
|
|
|
m->ret_flags = 0;
|
2021-02-26 04:26:29 +08:00
|
|
|
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
|
2019-11-15 02:57:17 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2019-11-15 02:57:04 +08:00
|
|
|
args = (const struct btf_param *)(func + 1);
|
|
|
|
nargs = btf_type_vlen(func);
|
2022-03-25 00:42:38 +08:00
|
|
|
if (nargs > MAX_BPF_FUNC_ARGS) {
|
2019-11-15 02:57:04 +08:00
|
|
|
bpf_log(log,
|
|
|
|
"The function %s has %d arguments. Too many.\n",
|
|
|
|
tname, nargs);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
ret = __get_type_size(btf, func->type, &t);
|
2022-08-31 23:26:46 +08:00
|
|
|
if (ret < 0 || __btf_type_is_struct(t)) {
|
2019-11-15 02:57:04 +08:00
|
|
|
bpf_log(log,
|
|
|
|
"The function %s return type %s is unsupported.\n",
|
2022-09-17 04:28:00 +08:00
|
|
|
tname, btf_type_str(t));
|
2019-11-15 02:57:04 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
m->ret_size = ret;
|
2023-01-28 08:06:44 +08:00
|
|
|
m->ret_flags = __get_type_fmodel_flags(t);
|
2019-11-15 02:57:04 +08:00
|
|
|
|
|
|
|
for (i = 0; i < nargs; i++) {
|
2021-05-05 21:25:29 +08:00
|
|
|
if (i == nargs - 1 && args[i].type == 0) {
|
|
|
|
bpf_log(log,
|
|
|
|
"The function %s with variable args is unsupported.\n",
|
|
|
|
tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-11-15 02:57:04 +08:00
|
|
|
ret = __get_type_size(btf, args[i].type, &t);
|
2022-08-31 23:26:46 +08:00
|
|
|
|
|
|
|
/* No support of struct argument size greater than 16 bytes */
|
|
|
|
if (ret < 0 || ret > 16) {
|
2019-11-15 02:57:04 +08:00
|
|
|
bpf_log(log,
|
|
|
|
"The function %s arg%d type %s is unsupported.\n",
|
2022-09-17 04:28:00 +08:00
|
|
|
tname, i, btf_type_str(t));
|
2019-11-15 02:57:04 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2021-05-05 21:25:29 +08:00
|
|
|
if (ret == 0) {
|
|
|
|
bpf_log(log,
|
|
|
|
"The function %s has malformed void argument.\n",
|
|
|
|
tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-11-15 02:57:04 +08:00
|
|
|
m->arg_size[i] = ret;
|
2023-01-28 08:06:44 +08:00
|
|
|
m->arg_flags[i] = __get_type_fmodel_flags(t);
|
2019-11-15 02:57:04 +08:00
|
|
|
}
|
|
|
|
m->nr_args = nargs;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-01-21 08:53:46 +08:00
|
|
|
/* Compare BTFs of two functions assuming only scalars and pointers to context.
|
|
|
|
* t1 points to BTF_KIND_FUNC in btf1
|
|
|
|
* t2 points to BTF_KIND_FUNC in btf2
|
|
|
|
* Returns:
|
|
|
|
* EINVAL - function prototype mismatch
|
|
|
|
* EFAULT - verifier bug
|
|
|
|
* 0 - 99% match. The last 1% is validated by the verifier.
|
|
|
|
*/
|
2020-02-10 09:14:41 +08:00
|
|
|
static int btf_check_func_type_match(struct bpf_verifier_log *log,
|
|
|
|
struct btf *btf1, const struct btf_type *t1,
|
|
|
|
struct btf *btf2, const struct btf_type *t2)
|
2020-01-21 08:53:46 +08:00
|
|
|
{
|
|
|
|
const struct btf_param *args1, *args2;
|
|
|
|
const char *fn1, *fn2, *s1, *s2;
|
|
|
|
u32 nargs1, nargs2, i;
|
|
|
|
|
|
|
|
fn1 = btf_name_by_offset(btf1, t1->name_off);
|
|
|
|
fn2 = btf_name_by_offset(btf2, t2->name_off);
|
|
|
|
|
|
|
|
if (btf_func_linkage(t1) != BTF_FUNC_GLOBAL) {
|
|
|
|
bpf_log(log, "%s() is not a global function\n", fn1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (btf_func_linkage(t2) != BTF_FUNC_GLOBAL) {
|
|
|
|
bpf_log(log, "%s() is not a global function\n", fn2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
t1 = btf_type_by_id(btf1, t1->type);
|
|
|
|
if (!t1 || !btf_type_is_func_proto(t1))
|
|
|
|
return -EFAULT;
|
|
|
|
t2 = btf_type_by_id(btf2, t2->type);
|
|
|
|
if (!t2 || !btf_type_is_func_proto(t2))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
args1 = (const struct btf_param *)(t1 + 1);
|
|
|
|
nargs1 = btf_type_vlen(t1);
|
|
|
|
args2 = (const struct btf_param *)(t2 + 1);
|
|
|
|
nargs2 = btf_type_vlen(t2);
|
|
|
|
|
|
|
|
if (nargs1 != nargs2) {
|
|
|
|
bpf_log(log, "%s() has %d args while %s() has %d args\n",
|
|
|
|
fn1, nargs1, fn2, nargs2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
|
|
|
|
t2 = btf_type_skip_modifiers(btf2, t2->type, NULL);
|
|
|
|
if (t1->info != t2->info) {
|
|
|
|
bpf_log(log,
|
|
|
|
"Return type %s of %s() doesn't match type %s of %s()\n",
|
|
|
|
btf_type_str(t1), fn1,
|
|
|
|
btf_type_str(t2), fn2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nargs1; i++) {
|
|
|
|
t1 = btf_type_skip_modifiers(btf1, args1[i].type, NULL);
|
|
|
|
t2 = btf_type_skip_modifiers(btf2, args2[i].type, NULL);
|
|
|
|
|
|
|
|
if (t1->info != t2->info) {
|
|
|
|
bpf_log(log, "arg%d in %s() is %s while %s() has %s\n",
|
|
|
|
i, fn1, btf_type_str(t1),
|
|
|
|
fn2, btf_type_str(t2));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (btf_type_has_size(t1) && t1->size != t2->size) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() has size %d while %s() has %d\n",
|
|
|
|
i, fn1, t1->size,
|
|
|
|
fn2, t2->size);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* global functions are validated with scalars and pointers
|
|
|
|
* to context only. And only global functions can be replaced.
|
|
|
|
* Hence type check only those types.
|
|
|
|
*/
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
if (btf_type_is_int(t1) || btf_is_any_enum(t1))
|
2020-01-21 08:53:46 +08:00
|
|
|
continue;
|
|
|
|
if (!btf_type_is_ptr(t1)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() has unrecognized type\n",
|
|
|
|
i, fn1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
|
|
|
|
t2 = btf_type_skip_modifiers(btf2, t2->type, NULL);
|
|
|
|
if (!btf_type_is_struct(t1)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() is not a pointer to context\n",
|
|
|
|
i, fn1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (!btf_type_is_struct(t2)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() is not a pointer to context\n",
|
|
|
|
i, fn2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* This is an optional check to make program writing easier.
|
|
|
|
* Compare names of structs and report an error to the user.
|
|
|
|
* btf_prepare_func_args() already checked that t2 struct
|
|
|
|
* is a context type. btf_prepare_func_args() will check
|
|
|
|
* later that t1 struct is a context type as well.
|
|
|
|
*/
|
|
|
|
s1 = btf_name_by_offset(btf1, t1->name_off);
|
|
|
|
s2 = btf_name_by_offset(btf2, t2->name_off);
|
|
|
|
if (strcmp(s1, s2)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d %s(struct %s *) doesn't match %s(struct %s *)\n",
|
|
|
|
i, fn1, s1, fn2, s2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Compare BTFs of given program with BTF of target program */
|
2020-09-26 05:25:01 +08:00
|
|
|
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
|
2020-01-21 08:53:46 +08:00
|
|
|
struct btf *btf2, const struct btf_type *t2)
|
|
|
|
{
|
|
|
|
struct btf *btf1 = prog->aux->btf;
|
|
|
|
const struct btf_type *t1;
|
|
|
|
u32 btf_id = 0;
|
|
|
|
|
|
|
|
if (!prog->aux->func_info) {
|
2020-09-26 05:25:01 +08:00
|
|
|
bpf_log(log, "Program extension requires BTF\n");
|
2020-01-21 08:53:46 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_id = prog->aux->func_info[0].type_id;
|
|
|
|
if (!btf_id)
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
t1 = btf_type_by_id(btf1, btf_id);
|
|
|
|
if (!t1 || !btf_type_is_func(t1))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2020-09-26 05:25:01 +08:00
|
|
|
return btf_check_func_type_match(log, btf1, t1, btf2, t2);
|
2020-01-21 08:53:46 +08:00
|
|
|
}
|
|
|
|
|
bpf: add support for passing dynptr pointer to global subprog
Add ability to pass a pointer to dynptr into global functions.
This allows to have global subprogs that accept and work with generic
dynptrs that are created by caller. Dynptr argument is detected based on
the name of a struct type, if it's "bpf_dynptr", it's assumed to be
a proper dynptr pointer. Both actual struct and forward struct
declaration types are supported.
This is conceptually exactly the same semantics as
bpf_user_ringbuf_drain()'s use of dynptr to pass a variable-sized
pointer to ringbuf record. So we heavily rely on CONST_PTR_TO_DYNPTR
bits of already existing logic in the verifier.
During global subprog validation, we mark such CONST_PTR_TO_DYNPTR as
having LOCAL type, as that's the most unassuming type of dynptr and it
doesn't have any special helpers that can try to free or acquire extra
references (unlike skb, xdp, or ringbuf dynptr). So that seems like a safe
"choice" to make from correctness standpoint. It's still possible to
pass any type of dynptr to such subprog, though, because generic dynptr
helpers, like getting data/slice pointers, read/write memory copying
routines, dynptr adjustment and getter routines all work correctly with
any type of dynptr.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:31 +08:00
|
|
|
static bool btf_is_dynptr_ptr(const struct btf *btf, const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const char *name;
|
|
|
|
|
|
|
|
t = btf_type_by_id(btf, t->type); /* skip PTR */
|
|
|
|
|
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
/* allow either struct or struct forward declaration */
|
|
|
|
if (btf_type_is_struct(t) ||
|
|
|
|
(btf_type_is_fwd(t) && btf_type_kflag(t) == 0)) {
|
|
|
|
name = btf_str_by_offset(btf, t->name_off);
|
|
|
|
return name && strcmp(name, "bpf_dynptr") == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2024-01-30 08:06:45 +08:00
|
|
|
struct bpf_cand_cache {
|
|
|
|
const char *name;
|
|
|
|
u32 name_len;
|
|
|
|
u16 kind;
|
|
|
|
u16 cnt;
|
|
|
|
struct {
|
|
|
|
const struct btf *btf;
|
|
|
|
u32 id;
|
|
|
|
} cands[];
|
|
|
|
};
|
|
|
|
|
|
|
|
static DEFINE_MUTEX(cand_cache_mutex);
|
|
|
|
|
|
|
|
static struct bpf_cand_cache *
|
|
|
|
bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id);
|
|
|
|
|
|
|
|
static int btf_get_ptr_to_btf_id(struct bpf_verifier_log *log, int arg_idx,
|
|
|
|
const struct btf *btf, const struct btf_type *t)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cc;
|
|
|
|
struct bpf_core_ctx ctx = {
|
|
|
|
.btf = btf,
|
|
|
|
.log = log,
|
|
|
|
};
|
|
|
|
u32 kern_type_id, type_id;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
/* skip PTR and modifiers */
|
|
|
|
type_id = t->type;
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
while (btf_type_is_modifier(t)) {
|
|
|
|
type_id = t->type;
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_lock(&cand_cache_mutex);
|
|
|
|
cc = bpf_core_find_cands(&ctx, type_id);
|
|
|
|
if (IS_ERR(cc)) {
|
|
|
|
err = PTR_ERR(cc);
|
|
|
|
bpf_log(log, "arg#%d reference type('%s %s') candidate matching error: %d\n",
|
|
|
|
arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off),
|
|
|
|
err);
|
|
|
|
goto cand_cache_unlock;
|
|
|
|
}
|
|
|
|
if (cc->cnt != 1) {
|
|
|
|
bpf_log(log, "arg#%d reference type('%s %s') %s\n",
|
|
|
|
arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off),
|
|
|
|
cc->cnt == 0 ? "has no matches" : "is ambiguous");
|
|
|
|
err = cc->cnt == 0 ? -ENOENT : -ESRCH;
|
|
|
|
goto cand_cache_unlock;
|
|
|
|
}
|
|
|
|
if (btf_is_module(cc->cands[0].btf)) {
|
|
|
|
bpf_log(log, "arg#%d reference type('%s %s') points to kernel module type (unsupported)\n",
|
|
|
|
arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off));
|
|
|
|
err = -EOPNOTSUPP;
|
|
|
|
goto cand_cache_unlock;
|
|
|
|
}
|
|
|
|
kern_type_id = cc->cands[0].id;
|
|
|
|
|
|
|
|
cand_cache_unlock:
|
|
|
|
mutex_unlock(&cand_cache_mutex);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
return kern_type_id;
|
|
|
|
}
|
|
|
|
|
2024-01-05 08:09:04 +08:00
|
|
|
enum btf_arg_tag {
|
2024-03-08 09:08:04 +08:00
|
|
|
ARG_TAG_CTX = BIT_ULL(0),
|
|
|
|
ARG_TAG_NONNULL = BIT_ULL(1),
|
|
|
|
ARG_TAG_TRUSTED = BIT_ULL(2),
|
|
|
|
ARG_TAG_NULLABLE = BIT_ULL(3),
|
|
|
|
ARG_TAG_ARENA = BIT_ULL(4),
|
2024-01-05 08:09:04 +08:00
|
|
|
};
|
|
|
|
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
/* Process BTF of a function to produce high-level expectation of function
|
|
|
|
* arguments (like ARG_PTR_TO_CTX, or ARG_PTR_TO_MEM, etc). This information
|
|
|
|
* is cached in subprog info for reuse.
|
2020-01-10 14:41:20 +08:00
|
|
|
* Returns:
|
|
|
|
* EFAULT - there is a verifier bug. Abort verification.
|
|
|
|
* EINVAL - cannot convert BTF.
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
* 0 - Successfully processed BTF and constructed argument expectations.
|
2020-01-10 14:41:20 +08:00
|
|
|
*/
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
|
2020-01-10 14:41:20 +08:00
|
|
|
{
|
2023-12-15 09:13:27 +08:00
|
|
|
bool is_global = subprog_aux(env, subprog)->linkage == BTF_FUNC_GLOBAL;
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
struct bpf_subprog_info *sub = subprog_info(env, subprog);
|
2020-01-10 14:41:20 +08:00
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
struct bpf_prog *prog = env->prog;
|
2020-01-21 08:53:46 +08:00
|
|
|
enum bpf_prog_type prog_type = prog->type;
|
2020-01-10 14:41:20 +08:00
|
|
|
struct btf *btf = prog->aux->btf;
|
|
|
|
const struct btf_param *args;
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
const struct btf_type *t, *ref_t, *fn_t;
|
2020-01-10 14:41:20 +08:00
|
|
|
u32 i, nargs, btf_id;
|
|
|
|
const char *tname;
|
|
|
|
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
if (sub->args_cached)
|
|
|
|
return 0;
|
|
|
|
|
2023-12-15 09:13:27 +08:00
|
|
|
if (!prog->aux->func_info) {
|
2020-01-10 14:41:20 +08:00
|
|
|
bpf_log(log, "Verifier bug\n");
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_id = prog->aux->func_info[subprog].type_id;
|
|
|
|
if (!btf_id) {
|
2023-12-15 09:13:27 +08:00
|
|
|
if (!is_global) /* not fatal for static funcs */
|
|
|
|
return -EINVAL;
|
2020-01-10 14:41:20 +08:00
|
|
|
bpf_log(log, "Global functions need valid BTF\n");
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
fn_t = btf_type_by_id(btf, btf_id);
|
|
|
|
if (!fn_t || !btf_type_is_func(fn_t)) {
|
2020-01-10 14:41:20 +08:00
|
|
|
/* These checks were already done by the verifier while loading
|
|
|
|
* struct bpf_func_info
|
|
|
|
*/
|
|
|
|
bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n",
|
|
|
|
subprog);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
tname = btf_name_by_offset(btf, fn_t->name_off);
|
2020-01-10 14:41:20 +08:00
|
|
|
|
|
|
|
if (prog->aux->func_info_aux[subprog].unreliable) {
|
|
|
|
bpf_log(log, "Verifier bug in function %s()\n", tname);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
2020-01-21 08:53:46 +08:00
|
|
|
if (prog_type == BPF_PROG_TYPE_EXT)
|
2020-09-29 20:45:50 +08:00
|
|
|
prog_type = prog->aux->dst_prog->type;
|
2020-01-10 14:41:20 +08:00
|
|
|
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
t = btf_type_by_id(btf, fn_t->type);
|
2020-01-10 14:41:20 +08:00
|
|
|
if (!t || !btf_type_is_func_proto(t)) {
|
|
|
|
bpf_log(log, "Invalid type of function %s()\n", tname);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
args = (const struct btf_param *)(t + 1);
|
|
|
|
nargs = btf_type_vlen(t);
|
2021-02-26 04:26:29 +08:00
|
|
|
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
|
2024-02-03 03:05:29 +08:00
|
|
|
if (!is_global)
|
|
|
|
return -EINVAL;
|
2021-02-26 04:26:29 +08:00
|
|
|
bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
|
|
|
|
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
|
2020-01-10 14:41:20 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
bpf: Add support for custom exception callbacks
By default, the subprog generated by the verifier to handle a thrown
exception hardcodes a return value of 0. To allow user-defined logic
and modification of the return value when an exception is thrown,
introduce the 'exception_callback:' declaration tag, which marks a
callback as the default exception handler for the program.
The format of the declaration tag is 'exception_callback:<value>', where
<value> is the name of the exception callback. Each main program can be
tagged using this BTF declaratiion tag to associate it with an exception
callback. In case the tag is absent, the default callback is used.
As such, the exception callback cannot be modified at runtime, only set
during verification.
Allowing modification of the callback for the current program execution
at runtime leads to issues when the programs begin to nest, as any
per-CPU state maintaing this information will have to be saved and
restored. We don't want it to stay in bpf_prog_aux as this takes a
global effect for all programs. An alternative solution is spilling
the callback pointer at a known location on the program stack on entry,
and then passing this location to bpf_throw as a parameter.
However, since exceptions are geared more towards a use case where they
are ideally never invoked, optimizing for this use case and adding to
the complexity has diminishing returns.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230912233214.1518551-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-09-13 07:32:03 +08:00
|
|
|
/* check that function returns int, exception cb also requires this */
|
2020-01-10 14:41:20 +08:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
if (!btf_type_is_int(t) && !btf_is_any_enum(t)) {
|
2024-02-03 03:05:29 +08:00
|
|
|
if (!is_global)
|
|
|
|
return -EINVAL;
|
2020-01-10 14:41:20 +08:00
|
|
|
bpf_log(log,
|
|
|
|
"Global function %s() doesn't return scalar. Only those are supported.\n",
|
|
|
|
tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* Convert BTF function arguments into verifier types.
|
|
|
|
* Only PTR_TO_CTX and SCALAR are supported atm.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < nargs; i++) {
|
2024-01-05 08:09:04 +08:00
|
|
|
u32 tags = 0;
|
2024-01-05 08:09:05 +08:00
|
|
|
int id = 0;
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
|
|
|
|
/* 'arg:<tag>' decl_tag takes precedence over derivation of
|
|
|
|
* register type from BTF type itself
|
|
|
|
*/
|
2024-01-05 08:09:05 +08:00
|
|
|
while ((id = btf_find_next_decl_tag(btf, fn_t, i, "arg:", id)) > 0) {
|
|
|
|
const struct btf_type *tag_t = btf_type_by_id(btf, id);
|
|
|
|
const char *tag = __btf_name_by_offset(btf, tag_t->name_off) + 4;
|
|
|
|
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
/* disallow arg tags in static subprogs */
|
|
|
|
if (!is_global) {
|
|
|
|
bpf_log(log, "arg#%d type tag is not supported in static functions\n", i);
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
2024-01-05 08:09:04 +08:00
|
|
|
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
if (strcmp(tag, "ctx") == 0) {
|
2024-01-05 08:09:04 +08:00
|
|
|
tags |= ARG_TAG_CTX;
|
2024-01-30 08:06:45 +08:00
|
|
|
} else if (strcmp(tag, "trusted") == 0) {
|
|
|
|
tags |= ARG_TAG_TRUSTED;
|
2024-01-05 08:09:04 +08:00
|
|
|
} else if (strcmp(tag, "nonnull") == 0) {
|
|
|
|
tags |= ARG_TAG_NONNULL;
|
2024-01-30 08:06:46 +08:00
|
|
|
} else if (strcmp(tag, "nullable") == 0) {
|
|
|
|
tags |= ARG_TAG_NULLABLE;
|
2024-03-08 09:08:04 +08:00
|
|
|
} else if (strcmp(tag, "arena") == 0) {
|
|
|
|
tags |= ARG_TAG_ARENA;
|
2024-01-05 08:09:04 +08:00
|
|
|
} else {
|
|
|
|
bpf_log(log, "arg#%d has unsupported set of tags\n", i);
|
|
|
|
return -EOPNOTSUPP;
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
}
|
|
|
|
}
|
2024-01-05 08:09:05 +08:00
|
|
|
if (id != -ENOENT) {
|
|
|
|
bpf_log(log, "arg#%d type tag fetching failure: %d\n", i, id);
|
|
|
|
return id;
|
|
|
|
}
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
|
2024-01-05 08:09:04 +08:00
|
|
|
t = btf_type_by_id(btf, args[i].type);
|
2020-01-10 14:41:20 +08:00
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
2024-01-05 08:09:04 +08:00
|
|
|
if (!btf_type_is_ptr(t))
|
|
|
|
goto skip_pointer;
|
|
|
|
|
2024-02-13 07:32:18 +08:00
|
|
|
if ((tags & ARG_TAG_CTX) || btf_is_prog_ctx_type(log, btf, t, prog_type, i)) {
|
2024-01-05 08:09:04 +08:00
|
|
|
if (tags & ~ARG_TAG_CTX) {
|
|
|
|
bpf_log(log, "arg#%d has invalid combination of tags\n", i);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2024-01-26 04:55:06 +08:00
|
|
|
if ((tags & ARG_TAG_CTX) &&
|
|
|
|
btf_validate_prog_ctx_type(log, btf, t, i, prog_type,
|
|
|
|
prog->expected_attach_type))
|
|
|
|
return -EINVAL;
|
2023-12-15 09:13:27 +08:00
|
|
|
sub->args[i].arg_type = ARG_PTR_TO_CTX;
|
|
|
|
continue;
|
|
|
|
}
|
2024-01-05 08:09:04 +08:00
|
|
|
if (btf_is_dynptr_ptr(btf, t)) {
|
|
|
|
if (tags) {
|
|
|
|
bpf_log(log, "arg#%d has invalid combination of tags\n", i);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
bpf: add support for passing dynptr pointer to global subprog
Add ability to pass a pointer to dynptr into global functions.
This allows to have global subprogs that accept and work with generic
dynptrs that are created by caller. Dynptr argument is detected based on
the name of a struct type, if it's "bpf_dynptr", it's assumed to be
a proper dynptr pointer. Both actual struct and forward struct
declaration types are supported.
This is conceptually exactly the same semantics as
bpf_user_ringbuf_drain()'s use of dynptr to pass a variable-sized
pointer to ringbuf record. So we heavily rely on CONST_PTR_TO_DYNPTR
bits of already existing logic in the verifier.
During global subprog validation, we mark such CONST_PTR_TO_DYNPTR as
having LOCAL type, as that's the most unassuming type of dynptr and it
doesn't have any special helpers that can try to free or acquire extra
references (unlike skb, xdp, or ringbuf dynptr). So that seems like a safe
"choice" to make from correctness standpoint. It's still possible to
pass any type of dynptr to such subprog, though, because generic dynptr
helpers, like getting data/slice pointers, read/write memory copying
routines, dynptr adjustment and getter routines all work correctly with
any type of dynptr.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:31 +08:00
|
|
|
sub->args[i].arg_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY;
|
|
|
|
continue;
|
|
|
|
}
|
2024-01-30 08:06:45 +08:00
|
|
|
if (tags & ARG_TAG_TRUSTED) {
|
|
|
|
int kern_type_id;
|
|
|
|
|
|
|
|
if (tags & ARG_TAG_NONNULL) {
|
|
|
|
bpf_log(log, "arg#%d has invalid combination of tags\n", i);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
kern_type_id = btf_get_ptr_to_btf_id(log, i, btf, t);
|
|
|
|
if (kern_type_id < 0)
|
|
|
|
return kern_type_id;
|
|
|
|
|
|
|
|
sub->args[i].arg_type = ARG_PTR_TO_BTF_ID | PTR_TRUSTED;
|
2024-01-30 08:06:46 +08:00
|
|
|
if (tags & ARG_TAG_NULLABLE)
|
|
|
|
sub->args[i].arg_type |= PTR_MAYBE_NULL;
|
2024-01-30 08:06:45 +08:00
|
|
|
sub->args[i].btf_id = kern_type_id;
|
|
|
|
continue;
|
|
|
|
}
|
2024-03-08 09:08:04 +08:00
|
|
|
if (tags & ARG_TAG_ARENA) {
|
|
|
|
if (tags & ~ARG_TAG_ARENA) {
|
|
|
|
bpf_log(log, "arg#%d arena cannot be combined with any other tags\n", i);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
sub->args[i].arg_type = ARG_PTR_TO_ARENA;
|
|
|
|
continue;
|
|
|
|
}
|
2024-01-05 08:09:04 +08:00
|
|
|
if (is_global) { /* generic user data pointer */
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
u32 mem_size;
|
|
|
|
|
2024-01-30 08:06:46 +08:00
|
|
|
if (tags & ARG_TAG_NULLABLE) {
|
|
|
|
bpf_log(log, "arg#%d has invalid combination of tags\n", i);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-02-13 04:56:41 +08:00
|
|
|
t = btf_type_skip_modifiers(btf, t->type, NULL);
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
ref_t = btf_resolve_size(btf, t, &mem_size);
|
2021-02-13 04:56:41 +08:00
|
|
|
if (IS_ERR(ref_t)) {
|
2024-01-05 08:09:04 +08:00
|
|
|
bpf_log(log, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
|
|
|
|
i, btf_type_str(t), btf_name_by_offset(btf, t->name_off),
|
2021-02-13 04:56:41 +08:00
|
|
|
PTR_ERR(ref_t));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2024-01-05 08:09:04 +08:00
|
|
|
sub->args[i].arg_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL;
|
|
|
|
if (tags & ARG_TAG_NONNULL)
|
|
|
|
sub->args[i].arg_type &= ~PTR_MAYBE_NULL;
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
sub->args[i].mem_size = mem_size;
|
2020-01-10 14:41:20 +08:00
|
|
|
continue;
|
|
|
|
}
|
2024-01-05 08:09:04 +08:00
|
|
|
|
|
|
|
skip_pointer:
|
|
|
|
if (tags) {
|
|
|
|
bpf_log(log, "arg#%d has pointer tag, but is not a pointer type\n", i);
|
bpf: support 'arg:xxx' btf_decl_tag-based hints for global subprog args
Add support for annotating global BPF subprog arguments to provide more
information about expected semantics of the argument. Currently,
verifier relies purely on argument's BTF type information, and supports
three general use cases: scalar, pointer-to-context, and
pointer-to-fixed-size-memory.
Scalar and pointer-to-fixed-mem work well in practice and are quite
natural to use. But pointer-to-context is a bit problematic, as typical
BPF users don't realize that they need to use a special type name to
signal to verifier that argument is not just some pointer, but actually
a PTR_TO_CTX. Further, even if users do know which type to use, it is
limiting in situations where the same BPF program logic is used across
few different program types. Common case is kprobes, tracepoints, and
perf_event programs having a helper to send some data over BPF perf
buffer. bpf_perf_event_output() requires `ctx` argument, and so it's
quite cumbersome to share such global subprog across few BPF programs of
different types, necessitating extra static subprog that is context
type-agnostic.
Long story short, there is a need to go beyond types and allow users to
add hints to global subprog arguments to define expectations.
This patch adds such support for two initial special tags:
- pointer to context;
- non-null qualifier for generic pointer arguments.
All of the above came up in practice already and seem generally useful
additions. Non-null qualifier is an often requested feature, which
currently has to be worked around by having unnecessary NULL checks
inside subprogs even if we know that arguments are never NULL. Pointer
to context was discussed earlier.
As for implementation, we utilize btf_decl_tag attribute and set up an
"arg:xxx" convention to specify argument hint. As such:
- btf_decl_tag("arg:ctx") is a PTR_TO_CTX hint;
- btf_decl_tag("arg:nonnull") marks pointer argument as not allowed to
be NULL, making NULL check inside global subprog unnecessary.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:30 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2024-01-05 08:09:03 +08:00
|
|
|
if (btf_type_is_int(t) || btf_is_any_enum(t)) {
|
|
|
|
sub->args[i].arg_type = ARG_ANYTHING;
|
|
|
|
continue;
|
|
|
|
}
|
2024-02-03 03:05:29 +08:00
|
|
|
if (!is_global)
|
|
|
|
return -EINVAL;
|
2020-01-10 14:41:20 +08:00
|
|
|
bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n",
|
2022-09-17 04:28:00 +08:00
|
|
|
i, btf_type_str(t), tname);
|
2020-01-10 14:41:20 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
bpf: abstract away global subprog arg preparation logic from reg state setup
btf_prepare_func_args() is used to understand expectations and
restrictions on global subprog arguments. But current implementation is
hard to extend, as it intermixes BTF-based func prototype parsing and
interpretation logic with setting up register state at subprog entry.
Worse still, those registers are not completely set up inside
btf_prepare_func_args(), requiring some more logic later in
do_check_common(). Like calling mark_reg_unknown() and similar
initialization operations.
This intermixing of BTF interpretation and register state setup is
problematic. First, it causes duplication of BTF parsing logic for global
subprog verification (to set up initial state of global subprog) and
global subprog call sites analysis (when we need to check that whatever
is being passed into global subprog matches expectations), performed in
btf_check_subprog_call().
Given we want to extend global func argument with tags later, this
duplication is problematic. So refactor btf_prepare_func_args() to do
only BTF-based func proto and args parsing, returning high-level
argument "expectations" only, with no regard to specifics of register
state. I.e., if it's a context argument, instead of setting register
state to PTR_TO_CTX, we return ARG_PTR_TO_CTX enum for that argument as
"an argument specification" for further processing inside
do_check_common(). Similarly for SCALAR arguments, PTR_TO_MEM, etc.
This allows to reuse btf_prepare_func_args() in following patches at
global subprog call site analysis time. It also keeps register setup
code consistently in one place, do_check_common().
Besides all this, we cache this argument specs information inside
env->subprog_info, eliminating the need to redo these potentially
expensive BTF traversals, especially if BPF program's BTF is big and/or
there are lots of global subprog calls.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-12-15 09:13:25 +08:00
|
|
|
|
|
|
|
sub->arg_cnt = nargs;
|
|
|
|
sub->args_cached = true;
|
|
|
|
|
2019-11-15 02:57:16 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
static void btf_type_show(const struct btf *btf, u32 type_id, void *obj,
|
|
|
|
struct btf_show *show)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = btf_type_by_id(btf, type_id);
|
|
|
|
|
|
|
|
show->btf = btf;
|
|
|
|
memset(&show->state, 0, sizeof(show->state));
|
|
|
|
memset(&show->obj, 0, sizeof(show->obj));
|
|
|
|
|
|
|
|
btf_type_ops(t)->show(btf, t, type_id, obj, 0, show);
|
|
|
|
}
|
|
|
|
|
2024-07-12 02:23:21 +08:00
|
|
|
__printf(2, 0) static void btf_seq_show(struct btf_show *show, const char *fmt,
|
|
|
|
va_list args)
|
2020-09-28 19:31:04 +08:00
|
|
|
{
|
|
|
|
seq_vprintf((struct seq_file *)show->target, fmt, args);
|
|
|
|
}
|
|
|
|
|
2020-09-28 19:31:09 +08:00
|
|
|
int btf_type_seq_show_flags(const struct btf *btf, u32 type_id,
|
|
|
|
void *obj, struct seq_file *m, u64 flags)
|
2020-09-28 19:31:04 +08:00
|
|
|
{
|
|
|
|
struct btf_show sseq;
|
|
|
|
|
|
|
|
sseq.target = m;
|
|
|
|
sseq.showfn = btf_seq_show;
|
|
|
|
sseq.flags = flags;
|
|
|
|
|
|
|
|
btf_type_show(btf, type_id, obj, &sseq);
|
|
|
|
|
|
|
|
return sseq.state.status;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:56:00 +08:00
|
|
|
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
|
|
|
|
struct seq_file *m)
|
|
|
|
{
|
2020-09-28 19:31:04 +08:00
|
|
|
(void) btf_type_seq_show_flags(btf, type_id, obj, m,
|
|
|
|
BTF_SHOW_NONAME | BTF_SHOW_COMPACT |
|
|
|
|
BTF_SHOW_ZERO | BTF_SHOW_UNSAFE);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct btf_show_snprintf {
|
|
|
|
struct btf_show show;
|
|
|
|
int len_left; /* space left in string */
|
|
|
|
int len; /* length we would have written */
|
|
|
|
};
|
|
|
|
|
2024-07-12 02:23:21 +08:00
|
|
|
__printf(2, 0) static void btf_snprintf_show(struct btf_show *show, const char *fmt,
|
|
|
|
va_list args)
|
2020-09-28 19:31:04 +08:00
|
|
|
{
|
|
|
|
struct btf_show_snprintf *ssnprintf = (struct btf_show_snprintf *)show;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
len = vsnprintf(show->target, ssnprintf->len_left, fmt, args);
|
|
|
|
|
|
|
|
if (len < 0) {
|
|
|
|
ssnprintf->len_left = 0;
|
|
|
|
ssnprintf->len = len;
|
2022-07-12 05:13:17 +08:00
|
|
|
} else if (len >= ssnprintf->len_left) {
|
2020-09-28 19:31:04 +08:00
|
|
|
/* no space, drive on to get length we would have written */
|
|
|
|
ssnprintf->len_left = 0;
|
|
|
|
ssnprintf->len += len;
|
|
|
|
} else {
|
|
|
|
ssnprintf->len_left -= len;
|
|
|
|
ssnprintf->len += len;
|
|
|
|
show->target += len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
|
|
|
|
char *buf, int len, u64 flags)
|
|
|
|
{
|
|
|
|
struct btf_show_snprintf ssnprintf;
|
|
|
|
|
|
|
|
ssnprintf.show.target = buf;
|
|
|
|
ssnprintf.show.flags = flags;
|
|
|
|
ssnprintf.show.showfn = btf_snprintf_show;
|
|
|
|
ssnprintf.len_left = len;
|
|
|
|
ssnprintf.len = 0;
|
|
|
|
|
|
|
|
btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf);
|
|
|
|
|
2022-02-21 02:40:55 +08:00
|
|
|
/* If we encountered an error, return it. */
|
2020-09-28 19:31:04 +08:00
|
|
|
if (ssnprintf.show.state.status)
|
|
|
|
return ssnprintf.show.state.status;
|
2018-04-19 06:56:00 +08:00
|
|
|
|
2020-09-28 19:31:04 +08:00
|
|
|
/* Otherwise return length we would have written */
|
|
|
|
return ssnprintf.len;
|
2018-04-19 06:56:00 +08:00
|
|
|
}
|
2018-04-19 06:56:01 +08:00
|
|
|
|
2019-08-20 21:53:46 +08:00
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp)
|
|
|
|
{
|
|
|
|
const struct btf *btf = filp->private_data;
|
|
|
|
|
|
|
|
seq_printf(m, "btf_id:\t%u\n", btf->id);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-04-19 06:56:01 +08:00
|
|
|
static int btf_release(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
btf_put(filp->private_data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:56:02 +08:00
|
|
|
const struct file_operations btf_fops = {
|
2019-08-20 21:53:46 +08:00
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
.show_fdinfo = bpf_btf_show_fdinfo,
|
|
|
|
#endif
|
2018-04-19 06:56:01 +08:00
|
|
|
.release = btf_release,
|
|
|
|
};
|
|
|
|
|
2018-05-05 05:49:51 +08:00
|
|
|
static int __btf_new_fd(struct btf *btf)
|
|
|
|
{
|
|
|
|
return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
|
|
|
|
}
|
|
|
|
|
2023-04-07 07:41:58 +08:00
|
|
|
int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
|
2018-04-19 06:56:01 +08:00
|
|
|
{
|
|
|
|
struct btf *btf;
|
2018-05-05 05:49:51 +08:00
|
|
|
int ret;
|
2018-04-19 06:56:01 +08:00
|
|
|
|
2023-04-07 07:41:58 +08:00
|
|
|
btf = btf_parse(attr, uattr, uattr_size);
|
2018-04-19 06:56:01 +08:00
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
|
|
|
|
2018-05-05 05:49:51 +08:00
|
|
|
ret = btf_alloc_id(btf);
|
|
|
|
if (ret) {
|
|
|
|
btf_free(btf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The BTF ID is published to the userspace.
|
|
|
|
* All BTF free must go through call_rcu() from
|
|
|
|
* now on (i.e. free by calling btf_put()).
|
|
|
|
*/
|
|
|
|
|
|
|
|
ret = __btf_new_fd(btf);
|
|
|
|
if (ret < 0)
|
2018-04-19 06:56:01 +08:00
|
|
|
btf_put(btf);
|
|
|
|
|
2018-05-05 05:49:51 +08:00
|
|
|
return ret;
|
2018-04-19 06:56:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct btf *btf_get_by_fd(int fd)
|
|
|
|
{
|
|
|
|
struct btf *btf;
|
2024-08-14 05:34:10 +08:00
|
|
|
CLASS(fd, f)(fd);
|
2018-04-19 06:56:01 +08:00
|
|
|
|
2024-08-14 05:34:10 +08:00
|
|
|
if (fd_empty(f))
|
2018-04-19 06:56:01 +08:00
|
|
|
return ERR_PTR(-EBADF);
|
|
|
|
|
2024-08-14 05:34:10 +08:00
|
|
|
if (fd_file(f)->f_op != &btf_fops)
|
2018-04-19 06:56:01 +08:00
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
2024-06-01 02:12:01 +08:00
|
|
|
btf = fd_file(f)->private_data;
|
2018-05-05 05:49:51 +08:00
|
|
|
refcount_inc(&btf->refcnt);
|
2018-04-19 06:56:01 +08:00
|
|
|
|
|
|
|
return btf;
|
|
|
|
}
|
2018-04-19 06:56:02 +08:00
|
|
|
|
|
|
|
int btf_get_info_by_fd(const struct btf *btf,
|
|
|
|
const union bpf_attr *attr,
|
|
|
|
union bpf_attr __user *uattr)
|
|
|
|
{
|
2018-05-05 05:49:52 +08:00
|
|
|
struct bpf_btf_info __user *uinfo;
|
2020-03-21 00:22:58 +08:00
|
|
|
struct bpf_btf_info info;
|
2018-05-05 05:49:52 +08:00
|
|
|
u32 info_copy, btf_copy;
|
|
|
|
void __user *ubtf;
|
2020-11-10 09:19:29 +08:00
|
|
|
char __user *uname;
|
|
|
|
u32 uinfo_len, uname_len, name_len;
|
|
|
|
int ret = 0;
|
2018-04-19 06:56:02 +08:00
|
|
|
|
2018-05-05 05:49:52 +08:00
|
|
|
uinfo = u64_to_user_ptr(attr->info.info);
|
|
|
|
uinfo_len = attr->info.info_len;
|
|
|
|
|
|
|
|
info_copy = min_t(u32, uinfo_len, sizeof(info));
|
2020-03-21 00:22:58 +08:00
|
|
|
memset(&info, 0, sizeof(info));
|
2018-05-05 05:49:52 +08:00
|
|
|
if (copy_from_user(&info, uinfo, info_copy))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
info.id = btf->id;
|
|
|
|
ubtf = u64_to_user_ptr(info.btf);
|
|
|
|
btf_copy = min_t(u32, btf->data_size, info.btf_size);
|
|
|
|
if (copy_to_user(ubtf, btf->data, btf_copy))
|
|
|
|
return -EFAULT;
|
|
|
|
info.btf_size = btf->data_size;
|
|
|
|
|
2020-11-10 09:19:29 +08:00
|
|
|
info.kernel_btf = btf->kernel_btf;
|
|
|
|
|
|
|
|
uname = u64_to_user_ptr(info.name);
|
|
|
|
uname_len = info.name_len;
|
|
|
|
if (!uname ^ !uname_len)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
name_len = strlen(btf->name);
|
|
|
|
info.name_len = name_len;
|
|
|
|
|
|
|
|
if (uname) {
|
|
|
|
if (uname_len >= name_len + 1) {
|
|
|
|
if (copy_to_user(uname, btf->name, name_len + 1))
|
|
|
|
return -EFAULT;
|
|
|
|
} else {
|
|
|
|
char zero = '\0';
|
|
|
|
|
|
|
|
if (copy_to_user(uname, btf->name, uname_len - 1))
|
|
|
|
return -EFAULT;
|
|
|
|
if (put_user(zero, uname + uname_len - 1))
|
|
|
|
return -EFAULT;
|
|
|
|
/* let user-space know about too short buffer */
|
|
|
|
ret = -ENOSPC;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-05 05:49:52 +08:00
|
|
|
if (copy_to_user(uinfo, &info, info_copy) ||
|
|
|
|
put_user(info_copy, &uattr->info.info_len))
|
2018-04-19 06:56:02 +08:00
|
|
|
return -EFAULT;
|
|
|
|
|
2020-11-10 09:19:29 +08:00
|
|
|
return ret;
|
2018-04-19 06:56:02 +08:00
|
|
|
}
|
2018-05-05 05:49:51 +08:00
|
|
|
|
|
|
|
int btf_get_fd_by_id(u32 id)
|
|
|
|
{
|
|
|
|
struct btf *btf;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
btf = idr_find(&btf_idr, id);
|
|
|
|
if (!btf || !refcount_inc_not_zero(&btf->refcnt))
|
|
|
|
btf = ERR_PTR(-ENOENT);
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
|
|
|
|
|
|
|
fd = __btf_new_fd(btf);
|
|
|
|
if (fd < 0)
|
|
|
|
btf_put(btf);
|
|
|
|
|
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-04 04:46:29 +08:00
|
|
|
u32 btf_obj_id(const struct btf *btf)
|
2018-05-05 05:49:51 +08:00
|
|
|
{
|
|
|
|
return btf->id;
|
|
|
|
}
|
2020-08-26 03:21:19 +08:00
|
|
|
|
2020-12-04 04:46:30 +08:00
|
|
|
bool btf_is_kernel(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return btf->kernel_btf;
|
|
|
|
}
|
|
|
|
|
2021-01-12 15:55:18 +08:00
|
|
|
bool btf_is_module(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return btf->kernel_btf && strcmp(btf->name, "vmlinux") != 0;
|
|
|
|
}
|
|
|
|
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:44 +08:00
|
|
|
enum {
|
|
|
|
BTF_MODULE_F_LIVE = (1 << 0),
|
|
|
|
};
|
|
|
|
|
2020-11-10 09:19:31 +08:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
struct btf_module {
|
|
|
|
struct list_head list;
|
|
|
|
struct module *module;
|
|
|
|
struct btf *btf;
|
|
|
|
struct bin_attribute *sysfs_attr;
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:44 +08:00
|
|
|
int flags;
|
2020-11-10 09:19:31 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static LIST_HEAD(btf_modules);
|
|
|
|
static DEFINE_MUTEX(btf_module_mutex);
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
btf_module_read(struct file *file, struct kobject *kobj,
|
|
|
|
struct bin_attribute *bin_attr,
|
|
|
|
char *buf, loff_t off, size_t len)
|
|
|
|
{
|
|
|
|
const struct btf *btf = bin_attr->private;
|
|
|
|
|
|
|
|
memcpy(buf, btf->data + off, len);
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2021-12-02 02:10:31 +08:00
|
|
|
static void purge_cand_cache(struct btf *btf);
|
|
|
|
|
2020-11-10 09:19:31 +08:00
|
|
|
static int btf_module_notify(struct notifier_block *nb, unsigned long op,
|
|
|
|
void *module)
|
|
|
|
{
|
|
|
|
struct btf_module *btf_mod, *tmp;
|
|
|
|
struct module *mod = module;
|
|
|
|
struct btf *btf;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
if (mod->btf_data_size == 0 ||
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:44 +08:00
|
|
|
(op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
|
|
|
|
op != MODULE_STATE_GOING))
|
2020-11-10 09:19:31 +08:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case MODULE_STATE_COMING:
|
|
|
|
btf_mod = kzalloc(sizeof(*btf_mod), GFP_KERNEL);
|
|
|
|
if (!btf_mod) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
2024-06-20 17:17:31 +08:00
|
|
|
btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size,
|
|
|
|
mod->btf_base_data, mod->btf_base_data_size);
|
2020-11-10 09:19:31 +08:00
|
|
|
if (IS_ERR(btf)) {
|
|
|
|
kfree(btf_mod);
|
2023-01-07 10:53:31 +08:00
|
|
|
if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) {
|
|
|
|
pr_warn("failed to validate module [%s] BTF: %ld\n",
|
|
|
|
mod->name, PTR_ERR(btf));
|
2022-02-23 09:28:14 +08:00
|
|
|
err = PTR_ERR(btf);
|
2023-01-07 10:53:31 +08:00
|
|
|
} else {
|
|
|
|
pr_warn_once("Kernel module BTF mismatch detected, BTF debug info may be unavailable for some modules\n");
|
|
|
|
}
|
2020-11-10 09:19:31 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
err = btf_alloc_id(btf);
|
|
|
|
if (err) {
|
|
|
|
btf_free(btf);
|
|
|
|
kfree(btf_mod);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-12-02 02:10:31 +08:00
|
|
|
purge_cand_cache(NULL);
|
2020-11-10 09:19:31 +08:00
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
btf_mod->module = module;
|
|
|
|
btf_mod->btf = btf;
|
|
|
|
list_add(&btf_mod->list, &btf_modules);
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_SYSFS)) {
|
|
|
|
struct bin_attribute *attr;
|
|
|
|
|
|
|
|
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
|
|
|
|
if (!attr)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
sysfs_bin_attr_init(attr);
|
|
|
|
attr->attr.name = btf->name;
|
|
|
|
attr->attr.mode = 0444;
|
|
|
|
attr->size = btf->data_size;
|
|
|
|
attr->private = btf;
|
|
|
|
attr->read = btf_module_read;
|
|
|
|
|
|
|
|
err = sysfs_create_bin_file(btf_kobj, attr);
|
|
|
|
if (err) {
|
|
|
|
pr_warn("failed to register module [%s] BTF in sysfs: %d\n",
|
|
|
|
mod->name, err);
|
|
|
|
kfree(attr);
|
|
|
|
err = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_mod->sysfs_attr = attr;
|
|
|
|
}
|
|
|
|
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:44 +08:00
|
|
|
break;
|
|
|
|
case MODULE_STATE_LIVE:
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->module != module)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
btf_mod->flags |= BTF_MODULE_F_LIVE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
2020-11-10 09:19:31 +08:00
|
|
|
break;
|
|
|
|
case MODULE_STATE_GOING:
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->module != module)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
list_del(&btf_mod->list);
|
|
|
|
if (btf_mod->sysfs_attr)
|
|
|
|
sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
|
2021-12-02 02:10:31 +08:00
|
|
|
purge_cand_cache(btf_mod->btf);
|
2020-11-10 09:19:31 +08:00
|
|
|
btf_put(btf_mod->btf);
|
|
|
|
kfree(btf_mod->sysfs_attr);
|
|
|
|
kfree(btf_mod);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return notifier_from_errno(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block btf_module_nb = {
|
|
|
|
.notifier_call = btf_module_notify,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init btf_module_init(void)
|
|
|
|
{
|
|
|
|
register_module_notifier(&btf_module_nb);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
fs_initcall(btf_module_init);
|
|
|
|
#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
|
2021-01-12 15:55:18 +08:00
|
|
|
|
|
|
|
struct module *btf_try_get_module(const struct btf *btf)
|
|
|
|
{
|
|
|
|
struct module *res = NULL;
|
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
struct btf_module *btf_mod, *tmp;
|
|
|
|
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->btf != btf)
|
|
|
|
continue;
|
|
|
|
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:44 +08:00
|
|
|
/* We must only consider module whose __init routine has
|
|
|
|
* finished, hence we must check for BTF_MODULE_F_LIVE flag,
|
|
|
|
* which is set from the notifier callback for
|
|
|
|
* MODULE_STATE_LIVE.
|
|
|
|
*/
|
|
|
|
if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
|
2021-01-12 15:55:18 +08:00
|
|
|
res = btf_mod->module;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
2021-05-14 08:36:11 +08:00
|
|
|
|
2022-03-17 19:59:51 +08:00
|
|
|
/* Returns struct btf corresponding to the struct module.
|
|
|
|
* This function can return NULL or ERR_PTR.
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
*/
|
|
|
|
static struct btf *btf_get_module_btf(const struct module *module)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
struct btf_module *btf_mod, *tmp;
|
|
|
|
#endif
|
2022-03-17 19:59:51 +08:00
|
|
|
struct btf *btf = NULL;
|
|
|
|
|
|
|
|
if (!module) {
|
|
|
|
btf = bpf_get_btf_vmlinux();
|
2022-03-20 22:30:03 +08:00
|
|
|
if (!IS_ERR_OR_NULL(btf))
|
2022-03-17 19:59:51 +08:00
|
|
|
btf_get(btf);
|
|
|
|
return btf;
|
|
|
|
}
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->module != module)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
btf_get(btf_mod->btf);
|
|
|
|
btf = btf_mod->btf;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return btf;
|
|
|
|
}
|
|
|
|
|
2024-02-08 14:24:22 +08:00
|
|
|
static int check_btf_kconfigs(const struct module *module, const char *feature)
|
|
|
|
{
|
|
|
|
if (!module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
|
|
|
|
pr_err("missing vmlinux BTF, cannot register %s\n", feature);
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
if (module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
|
|
|
|
pr_warn("missing module BTF, cannot register %s\n", feature);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-05-14 08:36:11 +08:00
|
|
|
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
|
|
|
|
{
|
2022-03-17 19:59:43 +08:00
|
|
|
struct btf *btf = NULL;
|
|
|
|
int btf_obj_fd = 0;
|
2021-05-14 08:36:11 +08:00
|
|
|
long ret;
|
|
|
|
|
|
|
|
if (flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (name_sz <= 1 || name[name_sz - 1])
|
|
|
|
return -EINVAL;
|
|
|
|
|
2022-03-17 19:59:43 +08:00
|
|
|
ret = bpf_find_btf_id(name, kind, &btf);
|
|
|
|
if (ret > 0 && btf_is_module(btf)) {
|
|
|
|
btf_obj_fd = __btf_new_fd(btf);
|
|
|
|
if (btf_obj_fd < 0) {
|
|
|
|
btf_put(btf);
|
|
|
|
return btf_obj_fd;
|
2021-05-14 08:36:11 +08:00
|
|
|
}
|
2022-03-17 19:59:43 +08:00
|
|
|
return ret | (((u64)btf_obj_fd) << 32);
|
2021-05-14 08:36:11 +08:00
|
|
|
}
|
2022-03-17 19:59:43 +08:00
|
|
|
if (ret > 0)
|
|
|
|
btf_put(btf);
|
2021-05-14 08:36:11 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
|
|
|
|
.func = bpf_btf_find_by_name_kind,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
2021-12-17 08:31:51 +08:00
|
|
|
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
2021-05-14 08:36:11 +08:00
|
|
|
.arg2_type = ARG_CONST_SIZE,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
2021-08-26 09:48:31 +08:00
|
|
|
|
2021-11-12 23:02:43 +08:00
|
|
|
BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
|
|
|
|
#define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type)
|
|
|
|
BTF_TRACING_TYPE_xxx
|
|
|
|
#undef BTF_TRACING_TYPE
|
2021-10-02 09:17:51 +08:00
|
|
|
|
2024-08-09 07:22:28 +08:00
|
|
|
/* Validate well-formedness of iter argument type.
|
|
|
|
* On success, return positive BTF ID of iter state's STRUCT type.
|
|
|
|
* On error, negative error is returned.
|
|
|
|
*/
|
|
|
|
int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
|
|
|
|
{
|
|
|
|
const struct btf_param *arg;
|
|
|
|
const struct btf_type *t;
|
|
|
|
const char *name;
|
|
|
|
int btf_id;
|
|
|
|
|
|
|
|
if (btf_type_vlen(func) <= arg_idx)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
arg = &btf_params(func)[arg_idx];
|
|
|
|
t = btf_type_skip_modifiers(btf, arg->type, NULL);
|
|
|
|
if (!t || !btf_type_is_ptr(t))
|
|
|
|
return -EINVAL;
|
|
|
|
t = btf_type_skip_modifiers(btf, t->type, &btf_id);
|
|
|
|
if (!t || !__btf_type_is_struct(t))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
name = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return btf_id;
|
|
|
|
}
|
|
|
|
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
|
|
|
|
const struct btf_type *func, u32 func_flags)
|
|
|
|
{
|
|
|
|
u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
|
2024-08-09 07:22:28 +08:00
|
|
|
const char *sfx, *iter_name;
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
const struct btf_type *t;
|
|
|
|
char exp_name[128];
|
|
|
|
u32 nr_args;
|
2024-08-09 07:22:28 +08:00
|
|
|
int btf_id;
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
|
|
|
|
/* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
|
|
|
|
if (!flags || (flags & (flags - 1)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* any BPF iter kfunc should have `struct bpf_iter_<type> *` first arg */
|
|
|
|
nr_args = btf_type_vlen(func);
|
|
|
|
if (nr_args < 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-08-09 07:22:28 +08:00
|
|
|
btf_id = btf_check_iter_arg(btf, func, 0);
|
|
|
|
if (btf_id < 0)
|
|
|
|
return btf_id;
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
|
|
|
|
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
|
|
|
|
* fit nicely in stack slots
|
|
|
|
*/
|
2024-08-09 07:22:28 +08:00
|
|
|
t = btf_type_by_id(btf, btf_id);
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
if (t->size == 0 || (t->size % 8))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
|
|
|
|
* naming pattern
|
|
|
|
*/
|
2024-08-09 07:22:28 +08:00
|
|
|
iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1;
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
if (flags & KF_ITER_NEW)
|
|
|
|
sfx = "new";
|
|
|
|
else if (flags & KF_ITER_NEXT)
|
|
|
|
sfx = "next";
|
|
|
|
else /* (flags & KF_ITER_DESTROY) */
|
|
|
|
sfx = "destroy";
|
|
|
|
|
|
|
|
snprintf(exp_name, sizeof(exp_name), "bpf_iter_%s_%s", iter_name, sfx);
|
|
|
|
if (strcmp(func_name, exp_name))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* only iter constructor should have extra arguments */
|
|
|
|
if (!(flags & KF_ITER_NEW) && nr_args != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags & KF_ITER_NEXT) {
|
|
|
|
/* bpf_iter_<type>_next() should return pointer */
|
|
|
|
t = btf_type_skip_modifiers(btf, func->type, NULL);
|
|
|
|
if (!t || !btf_type_is_ptr(t))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & KF_ITER_DESTROY) {
|
|
|
|
/* bpf_iter_<type>_destroy() should return void */
|
|
|
|
t = btf_type_by_id(btf, func->type);
|
|
|
|
if (!t || !btf_type_is_void(t))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags)
|
|
|
|
{
|
|
|
|
const struct btf_type *func;
|
|
|
|
const char *func_name;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* any kfunc should be FUNC -> FUNC_PROTO */
|
|
|
|
func = btf_type_by_id(btf, func_id);
|
|
|
|
if (!func || !btf_type_is_func(func))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* sanity check kfunc name */
|
|
|
|
func_name = btf_name_by_offset(btf, func->name_off);
|
|
|
|
if (!func_name || !func_name[0])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
func = btf_type_by_id(btf, func->type);
|
|
|
|
if (!func || !btf_type_is_func_proto(func))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY)) {
|
|
|
|
err = btf_check_iter_kfuncs(btf, func_name, func, func_flags);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
/* Kernel Function (kfunc) BTF ID set registration API */
|
2021-10-02 09:17:51 +08:00
|
|
|
|
2022-07-21 21:42:35 +08:00
|
|
|
static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
|
2023-05-20 06:51:54 +08:00
|
|
|
const struct btf_kfunc_id_set *kset)
|
2021-10-02 09:17:51 +08:00
|
|
|
{
|
2023-05-20 06:51:54 +08:00
|
|
|
struct btf_kfunc_hook_filter *hook_filter;
|
|
|
|
struct btf_id_set8 *add_set = kset->set;
|
2022-07-21 21:42:35 +08:00
|
|
|
bool vmlinux_set = !btf_is_module(btf);
|
2023-05-20 06:51:54 +08:00
|
|
|
bool add_filter = !!kset->filter;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
struct btf_kfunc_set_tab *tab;
|
2022-07-21 21:42:35 +08:00
|
|
|
struct btf_id_set8 *set;
|
2024-06-20 17:17:31 +08:00
|
|
|
u32 set_cnt, i;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
int ret;
|
|
|
|
|
2022-07-21 21:42:35 +08:00
|
|
|
if (hook >= BTF_KFUNC_HOOK_MAX) {
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!add_set->cnt)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
tab = btf->kfunc_set_tab;
|
2023-05-20 06:51:54 +08:00
|
|
|
|
|
|
|
if (tab && add_filter) {
|
|
|
|
u32 i;
|
|
|
|
|
|
|
|
hook_filter = &tab->hook_filters[hook];
|
|
|
|
for (i = 0; i < hook_filter->nr_filters; i++) {
|
|
|
|
if (hook_filter->filters[i] == kset->filter) {
|
|
|
|
add_filter = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) {
|
|
|
|
ret = -E2BIG;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
if (!tab) {
|
|
|
|
tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!tab)
|
|
|
|
return -ENOMEM;
|
|
|
|
btf->kfunc_set_tab = tab;
|
|
|
|
}
|
|
|
|
|
2022-07-21 21:42:35 +08:00
|
|
|
set = tab->sets[hook];
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
/* Warn when register_btf_kfunc_id_set is called twice for the same hook
|
|
|
|
* for module sets.
|
|
|
|
*/
|
|
|
|
if (WARN_ON_ONCE(set && !vmlinux_set)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* In case of vmlinux sets, there may be more than one set being
|
|
|
|
* registered per hook. To create a unified set, we allocate a new set
|
|
|
|
* and concatenate all individual sets being registered. While each set
|
|
|
|
* is individually sorted, they may become unsorted when concatenated,
|
|
|
|
* hence re-sorting the final set again is required to make binary
|
2022-07-21 21:42:35 +08:00
|
|
|
* searching the set using btf_id_set8_contains function work.
|
2024-06-20 17:17:31 +08:00
|
|
|
*
|
|
|
|
* For module sets, we need to allocate as we may need to relocate
|
|
|
|
* BTF ids.
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
*/
|
|
|
|
set_cnt = set ? set->cnt : 0;
|
|
|
|
|
|
|
|
if (set_cnt > U32_MAX - add_set->cnt) {
|
|
|
|
ret = -EOVERFLOW;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
|
|
|
|
ret = -E2BIG;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Grow set */
|
2022-07-21 21:42:35 +08:00
|
|
|
set = krealloc(tab->sets[hook],
|
|
|
|
offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]),
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!set) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For newly allocated set, initialize set->cnt to 0 */
|
2022-07-21 21:42:35 +08:00
|
|
|
if (!tab->sets[hook])
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
set->cnt = 0;
|
2022-07-21 21:42:35 +08:00
|
|
|
tab->sets[hook] = set;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
|
|
|
|
/* Concatenate the two sets */
|
2022-07-21 21:42:35 +08:00
|
|
|
memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0]));
|
2024-06-20 17:17:31 +08:00
|
|
|
/* Now that the set is copied, update with relocated BTF ids */
|
|
|
|
for (i = set->cnt; i < set->cnt + add_set->cnt; i++)
|
|
|
|
set->pairs[i].id = btf_relocate_id(btf, set->pairs[i].id);
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
set->cnt += add_set->cnt;
|
|
|
|
|
2022-07-21 21:42:35 +08:00
|
|
|
sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
|
2023-05-20 06:51:54 +08:00
|
|
|
if (add_filter) {
|
|
|
|
hook_filter = &tab->hook_filters[hook];
|
|
|
|
hook_filter->filters[hook_filter->nr_filters++] = kset->filter;
|
|
|
|
}
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
return 0;
|
|
|
|
end:
|
|
|
|
btf_free_kfunc_set_tab(btf);
|
|
|
|
return ret;
|
2021-10-02 09:17:51 +08:00
|
|
|
}
|
|
|
|
|
2022-07-21 21:42:35 +08:00
|
|
|
static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
enum btf_kfunc_hook hook,
|
2023-05-20 06:51:54 +08:00
|
|
|
u32 kfunc_btf_id,
|
|
|
|
const struct bpf_prog *prog)
|
2021-10-02 09:17:51 +08:00
|
|
|
{
|
2023-05-20 06:51:54 +08:00
|
|
|
struct btf_kfunc_hook_filter *hook_filter;
|
2022-07-21 21:42:35 +08:00
|
|
|
struct btf_id_set8 *set;
|
2023-05-20 06:51:54 +08:00
|
|
|
u32 *id, i;
|
2021-10-02 09:17:51 +08:00
|
|
|
|
2022-07-21 21:42:35 +08:00
|
|
|
if (hook >= BTF_KFUNC_HOOK_MAX)
|
|
|
|
return NULL;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
if (!btf->kfunc_set_tab)
|
2022-07-21 21:42:35 +08:00
|
|
|
return NULL;
|
2023-05-20 06:51:54 +08:00
|
|
|
hook_filter = &btf->kfunc_set_tab->hook_filters[hook];
|
|
|
|
for (i = 0; i < hook_filter->nr_filters; i++) {
|
|
|
|
if (hook_filter->filters[i](prog, kfunc_btf_id))
|
|
|
|
return NULL;
|
|
|
|
}
|
2022-07-21 21:42:35 +08:00
|
|
|
set = btf->kfunc_set_tab->sets[hook];
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
if (!set)
|
2022-07-21 21:42:35 +08:00
|
|
|
return NULL;
|
|
|
|
id = btf_id_set8_contains(set, kfunc_btf_id);
|
|
|
|
if (!id)
|
|
|
|
return NULL;
|
|
|
|
/* The flags for BTF ID are located next to it */
|
|
|
|
return id + 1;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
|
|
|
|
{
|
|
|
|
switch (prog_type) {
|
2022-11-21 03:54:26 +08:00
|
|
|
case BPF_PROG_TYPE_UNSPEC:
|
|
|
|
return BTF_KFUNC_HOOK_COMMON;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
case BPF_PROG_TYPE_XDP:
|
|
|
|
return BTF_KFUNC_HOOK_XDP;
|
|
|
|
case BPF_PROG_TYPE_SCHED_CLS:
|
|
|
|
return BTF_KFUNC_HOOK_TC;
|
|
|
|
case BPF_PROG_TYPE_STRUCT_OPS:
|
|
|
|
return BTF_KFUNC_HOOK_STRUCT_OPS;
|
2022-05-19 04:59:08 +08:00
|
|
|
case BPF_PROG_TYPE_TRACING:
|
2024-09-06 06:38:11 +08:00
|
|
|
case BPF_PROG_TYPE_TRACEPOINT:
|
|
|
|
case BPF_PROG_TYPE_PERF_EVENT:
|
2022-09-20 15:59:39 +08:00
|
|
|
case BPF_PROG_TYPE_LSM:
|
2022-05-19 04:59:08 +08:00
|
|
|
return BTF_KFUNC_HOOK_TRACING;
|
|
|
|
case BPF_PROG_TYPE_SYSCALL:
|
|
|
|
return BTF_KFUNC_HOOK_SYSCALL;
|
2023-03-01 23:49:50 +08:00
|
|
|
case BPF_PROG_TYPE_CGROUP_SKB:
|
2024-08-20 00:28:04 +08:00
|
|
|
case BPF_PROG_TYPE_CGROUP_SOCK:
|
|
|
|
case BPF_PROG_TYPE_CGROUP_DEVICE:
|
2023-10-12 02:51:05 +08:00
|
|
|
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
|
2024-08-20 00:28:04 +08:00
|
|
|
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
|
|
|
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
|
|
|
return BTF_KFUNC_HOOK_CGROUP;
|
2023-03-01 23:49:50 +08:00
|
|
|
case BPF_PROG_TYPE_SCHED_ACT:
|
|
|
|
return BTF_KFUNC_HOOK_SCHED_ACT;
|
|
|
|
case BPF_PROG_TYPE_SK_SKB:
|
|
|
|
return BTF_KFUNC_HOOK_SK_SKB;
|
|
|
|
case BPF_PROG_TYPE_SOCKET_FILTER:
|
|
|
|
return BTF_KFUNC_HOOK_SOCKET_FILTER;
|
|
|
|
case BPF_PROG_TYPE_LWT_OUT:
|
|
|
|
case BPF_PROG_TYPE_LWT_IN:
|
|
|
|
case BPF_PROG_TYPE_LWT_XMIT:
|
|
|
|
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
|
|
|
|
return BTF_KFUNC_HOOK_LWT;
|
2023-04-22 01:02:55 +08:00
|
|
|
case BPF_PROG_TYPE_NETFILTER:
|
|
|
|
return BTF_KFUNC_HOOK_NETFILTER;
|
2024-04-30 19:28:25 +08:00
|
|
|
case BPF_PROG_TYPE_KPROBE:
|
|
|
|
return BTF_KFUNC_HOOK_KPROBE;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
default:
|
|
|
|
return BTF_KFUNC_HOOK_MAX;
|
2021-10-02 09:17:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
/* Caution:
|
|
|
|
* Reference to the module (obtained using btf_try_get_module) corresponding to
|
|
|
|
* the struct btf *MUST* be held when calling this function from verifier
|
|
|
|
* context. This is usually true as we stash references in prog's kfunc_btf_tab;
|
|
|
|
* keeping the reference for the duration of the call provides the necessary
|
|
|
|
* protection for looking up a well-formed btf->kfunc_set_tab.
|
|
|
|
*/
|
2022-07-21 21:42:35 +08:00
|
|
|
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
|
2023-05-20 06:51:54 +08:00
|
|
|
u32 kfunc_btf_id,
|
|
|
|
const struct bpf_prog *prog)
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
{
|
2023-05-20 06:51:54 +08:00
|
|
|
enum bpf_prog_type prog_type = resolve_prog_type(prog);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
enum btf_kfunc_hook hook;
|
2022-11-21 03:54:26 +08:00
|
|
|
u32 *kfunc_flags;
|
|
|
|
|
2023-05-20 06:51:54 +08:00
|
|
|
kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog);
|
2022-11-21 03:54:26 +08:00
|
|
|
if (kfunc_flags)
|
|
|
|
return kfunc_flags;
|
2021-10-02 09:17:53 +08:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
hook = bpf_prog_type_to_kfunc_hook(prog_type);
|
2023-05-20 06:51:54 +08:00
|
|
|
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
}
|
2021-11-22 22:47:40 +08:00
|
|
|
|
2023-05-20 06:51:54 +08:00
|
|
|
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
|
|
|
|
const struct bpf_prog *prog)
|
2022-12-06 22:59:32 +08:00
|
|
|
{
|
2023-05-20 06:51:54 +08:00
|
|
|
return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog);
|
2022-12-06 22:59:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
|
|
|
|
const struct btf_kfunc_id_set *kset)
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
{
|
|
|
|
struct btf *btf;
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
int ret, i;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
|
|
|
|
btf = btf_get_module_btf(kset->owner);
|
2024-02-08 14:24:22 +08:00
|
|
|
if (!btf)
|
|
|
|
return check_btf_kconfigs(kset->owner, "kfunc");
|
2022-01-26 08:13:40 +08:00
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
for (i = 0; i < kset->set->cnt; i++) {
|
2024-06-20 17:17:31 +08:00
|
|
|
ret = btf_check_kfunc_protos(btf, btf_relocate_id(btf, kset->set->pairs[i].id),
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
kset->set->pairs[i].flags);
|
|
|
|
if (ret)
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
|
2023-05-20 06:51:54 +08:00
|
|
|
ret = btf_populate_kfunc_set(btf, hook, kset);
|
|
|
|
|
bpf: add iterator kfuncs registration and validation logic
Add ability to register kfuncs that implement BPF open-coded iterator
contract and enforce naming and function proto convention. Enforcement
happens at the time of kfunc registration and significantly simplifies
the rest of iterators logic in the verifier.
More details follow in subsequent patches, but we enforce the following
conditions.
All kfuncs (constructor, next, destructor) have to be named consistenly
as bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents
iterator type, and iterator state should be represented as a matching
`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
a pointer to this `struct bpf_iter_<type>` as the very first argument.
Additionally:
- Constructor, i.e., bpf_iter_<type>_new(), can have arbitrary extra
number of arguments. Return type is not enforced either.
- Next method, i.e., bpf_iter_<type>_next(), has to return a pointer
type and should have exactly one argument: `struct bpf_iter_<type> *`
(const/volatile/restrict and typedefs are ignored).
- Destructor, i.e., bpf_iter_<type>_destroy(), should return void and
should have exactly one argument, similar to the next method.
- struct bpf_iter_<type> size is enforced to be positive and
a multiple of 8 bytes (to fit stack slots correctly).
Such strictness and consistency allows to build generic helpers
abstracting important, but boilerplate, details to be able to use
open-coded iterators effectively and ergonomically (see bpf_for_each()
in subsequent patches). It also simplifies the verifier logic in some
places. At the same time, this doesn't hurt generality of possible
iterator implementations. Win-win.
Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is
marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course),
while destructor kfunc is marked as KF_ITER_DESTROY.
Additionally, we add a trivial kfunc name validation: it should be
a valid non-NULL and non-empty string.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-09 02:41:15 +08:00
|
|
|
err_out:
|
2022-03-17 19:59:51 +08:00
|
|
|
btf_put(btf);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
return ret;
|
|
|
|
}
|
2022-12-06 22:59:32 +08:00
|
|
|
|
|
|
|
/* This function must be invoked only from initcalls/module init functions */
|
|
|
|
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
|
|
|
|
const struct btf_kfunc_id_set *kset)
|
|
|
|
{
|
|
|
|
enum btf_kfunc_hook hook;
|
|
|
|
|
2024-01-29 09:24:08 +08:00
|
|
|
/* All kfuncs need to be tagged as such in BTF.
|
|
|
|
* WARN() for initcall registrations that do not check errors.
|
|
|
|
*/
|
|
|
|
if (!(kset->set->flags & BTF_SET8_KFUNCS)) {
|
|
|
|
WARN_ON(!kset->owner);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2022-12-06 22:59:32 +08:00
|
|
|
hook = bpf_prog_type_to_kfunc_hook(prog_type);
|
|
|
|
return __register_btf_kfunc_id_set(hook, kset);
|
|
|
|
}
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-15 00:39:45 +08:00
|
|
|
EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
|
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Andrii Nakryiko says:
====================
bpf-next 2021-12-10 v2
We've added 115 non-merge commits during the last 26 day(s) which contain
a total of 182 files changed, 5747 insertions(+), 2564 deletions(-).
The main changes are:
1) Various samples fixes, from Alexander Lobakin.
2) BPF CO-RE support in kernel and light skeleton, from Alexei Starovoitov.
3) A batch of new unified APIs for libbpf, logging improvements, version
querying, etc. Also a batch of old deprecations for old APIs and various
bug fixes, in preparation for libbpf 1.0, from Andrii Nakryiko.
4) BPF documentation reorganization and improvements, from Christoph Hellwig
and Dave Tucker.
5) Support for declarative initialization of BPF_MAP_TYPE_PROG_ARRAY in
libbpf, from Hengqi Chen.
6) Verifier log fixes, from Hou Tao.
7) Runtime-bounded loops support with bpf_loop() helper, from Joanne Koong.
8) Extend branch record capturing to all platforms that support it,
from Kajol Jain.
9) Light skeleton codegen improvements, from Kumar Kartikeya Dwivedi.
10) bpftool doc-generating script improvements, from Quentin Monnet.
11) Two libbpf v0.6 bug fixes, from Shuyi Cheng and Vincent Minet.
12) Deprecation warning fix for perf/bpf_counter, from Song Liu.
13) MAX_TAIL_CALL_CNT unification and MIPS build fix for libbpf,
from Tiezhu Yang.
14) BTF_KING_TYPE_TAG follow-up fixes, from Yonghong Song.
15) Selftests fixes and improvements, from Ilya Leoshkevich, Jean-Philippe
Brucker, Jiri Olsa, Maxim Mikityanskiy, Tirthendu Sarkar, Yucong Sun,
and others.
* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (115 commits)
libbpf: Add "bool skipped" to struct bpf_map
libbpf: Fix typo in btf__dedup@LIBBPF_0.0.2 definition
bpftool: Switch bpf_object__load_xattr() to bpf_object__load()
selftests/bpf: Remove the only use of deprecated bpf_object__load_xattr()
selftests/bpf: Add test for libbpf's custom log_buf behavior
selftests/bpf: Replace all uses of bpf_load_btf() with bpf_btf_load()
libbpf: Deprecate bpf_object__load_xattr()
libbpf: Add per-program log buffer setter and getter
libbpf: Preserve kernel error code and remove kprobe prog type guessing
libbpf: Improve logging around BPF program loading
libbpf: Allow passing user log setting through bpf_object_open_opts
libbpf: Allow passing preallocated log_buf when loading BTF into kernel
libbpf: Add OPTS-based bpf_btf_load() API
libbpf: Fix bpf_prog_load() log_buf logic for log_level 0
samples/bpf: Remove unneeded variable
bpf: Remove redundant assignment to pointer t
selftests/bpf: Fix a compilation warning
perf/bpf_counter: Use bpf_map_create instead of bpf_create_map
samples: bpf: Fix 'unknown warning group' build warning on Clang
samples: bpf: Fix xdp_sample_user.o linking with Clang
...
====================
Link: https://lore.kernel.org/r/20211210234746.2100561-1-andrii@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-11 07:56:10 +08:00
|
|
|
|
2022-12-06 22:59:32 +08:00
|
|
|
/* This function must be invoked only from initcalls/module init functions */
|
|
|
|
int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset)
|
|
|
|
{
|
|
|
|
return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set);
|
|
|
|
|
2022-04-25 05:48:54 +08:00
|
|
|
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
|
|
|
|
{
|
|
|
|
struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
|
|
|
|
struct btf_id_dtor_kfunc *dtor;
|
|
|
|
|
|
|
|
if (!tab)
|
|
|
|
return -ENOENT;
|
|
|
|
/* Even though the size of tab->dtors[0] is > sizeof(u32), we only need
|
|
|
|
* to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func.
|
|
|
|
*/
|
|
|
|
BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0);
|
|
|
|
dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func);
|
|
|
|
if (!dtor)
|
|
|
|
return -ENOENT;
|
|
|
|
return dtor->kfunc_btf_id;
|
|
|
|
}
|
|
|
|
|
bpf: Wire up freeing of referenced kptr
A destructor kfunc can be defined as void func(type *), where type may
be void or any other pointer type as per convenience.
In this patch, we ensure that the type is sane and capture the function
pointer into off_desc of ptr_off_tab for the specific pointer offset,
with the invariant that the dtor pointer is always set when 'kptr_ref'
tag is applied to the pointer's pointee type, which is indicated by the
flag BPF_MAP_VALUE_OFF_F_REF.
Note that only BTF IDs whose destructor kfunc is registered, thus become
the allowed BTF IDs for embedding as referenced kptr. Hence it serves
the purpose of finding dtor kfunc BTF ID, as well acting as a check
against the whitelist of allowed BTF IDs for this purpose.
Finally, wire up the actual freeing of the referenced pointer if any at
all available offsets, so that no references are leaked after the BPF
map goes away and the BPF program previously moved the ownership a
referenced pointer into it.
The behavior is similar to BPF timers, where bpf_map_{update,delete}_elem
will free any existing referenced kptr. The same case is with LRU map's
bpf_lru_push_free/htab_lru_push_free functions, which are extended to
reset unreferenced and free referenced kptr.
Note that unlike BPF timers, kptr is not reset or freed when map uref
drops to zero.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-8-memxor@gmail.com
2022-04-25 05:48:55 +08:00
|
|
|
static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt)
|
|
|
|
{
|
|
|
|
const struct btf_type *dtor_func, *dtor_func_proto, *t;
|
|
|
|
const struct btf_param *args;
|
|
|
|
s32 dtor_btf_id;
|
|
|
|
u32 nr_args, i;
|
|
|
|
|
|
|
|
for (i = 0; i < cnt; i++) {
|
2024-06-20 17:17:31 +08:00
|
|
|
dtor_btf_id = btf_relocate_id(btf, dtors[i].kfunc_btf_id);
|
bpf: Wire up freeing of referenced kptr
A destructor kfunc can be defined as void func(type *), where type may
be void or any other pointer type as per convenience.
In this patch, we ensure that the type is sane and capture the function
pointer into off_desc of ptr_off_tab for the specific pointer offset,
with the invariant that the dtor pointer is always set when 'kptr_ref'
tag is applied to the pointer's pointee type, which is indicated by the
flag BPF_MAP_VALUE_OFF_F_REF.
Note that only BTF IDs whose destructor kfunc is registered, thus become
the allowed BTF IDs for embedding as referenced kptr. Hence it serves
the purpose of finding dtor kfunc BTF ID, as well acting as a check
against the whitelist of allowed BTF IDs for this purpose.
Finally, wire up the actual freeing of the referenced pointer if any at
all available offsets, so that no references are leaked after the BPF
map goes away and the BPF program previously moved the ownership a
referenced pointer into it.
The behavior is similar to BPF timers, where bpf_map_{update,delete}_elem
will free any existing referenced kptr. The same case is with LRU map's
bpf_lru_push_free/htab_lru_push_free functions, which are extended to
reset unreferenced and free referenced kptr.
Note that unlike BPF timers, kptr is not reset or freed when map uref
drops to zero.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-8-memxor@gmail.com
2022-04-25 05:48:55 +08:00
|
|
|
|
|
|
|
dtor_func = btf_type_by_id(btf, dtor_btf_id);
|
|
|
|
if (!dtor_func || !btf_type_is_func(dtor_func))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
dtor_func_proto = btf_type_by_id(btf, dtor_func->type);
|
|
|
|
if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Make sure the prototype of the destructor kfunc is 'void func(type *)' */
|
|
|
|
t = btf_type_by_id(btf, dtor_func_proto->type);
|
|
|
|
if (!t || !btf_type_is_void(t))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
nr_args = btf_type_vlen(dtor_func_proto);
|
|
|
|
if (nr_args != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
args = btf_params(dtor_func_proto);
|
|
|
|
t = btf_type_by_id(btf, args[0].type);
|
|
|
|
/* Allow any pointer type, as width on targets Linux supports
|
|
|
|
* will be same for all pointer types (i.e. sizeof(void *))
|
|
|
|
*/
|
|
|
|
if (!t || !btf_type_is_ptr(t))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-04-25 05:48:54 +08:00
|
|
|
/* This function must be invoked only from initcalls/module init functions */
|
|
|
|
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
|
|
|
|
struct module *owner)
|
|
|
|
{
|
|
|
|
struct btf_id_dtor_kfunc_tab *tab;
|
|
|
|
struct btf *btf;
|
2024-06-20 17:17:31 +08:00
|
|
|
u32 tab_cnt, i;
|
2022-04-25 05:48:54 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
btf = btf_get_module_btf(owner);
|
2024-02-08 14:24:22 +08:00
|
|
|
if (!btf)
|
|
|
|
return check_btf_kconfigs(owner, "dtor kfuncs");
|
2022-04-25 05:48:54 +08:00
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
|
|
|
|
|
|
|
if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
|
|
|
|
pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
|
|
|
|
ret = -E2BIG;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
bpf: Wire up freeing of referenced kptr
A destructor kfunc can be defined as void func(type *), where type may
be void or any other pointer type as per convenience.
In this patch, we ensure that the type is sane and capture the function
pointer into off_desc of ptr_off_tab for the specific pointer offset,
with the invariant that the dtor pointer is always set when 'kptr_ref'
tag is applied to the pointer's pointee type, which is indicated by the
flag BPF_MAP_VALUE_OFF_F_REF.
Note that only BTF IDs whose destructor kfunc is registered, thus become
the allowed BTF IDs for embedding as referenced kptr. Hence it serves
the purpose of finding dtor kfunc BTF ID, as well acting as a check
against the whitelist of allowed BTF IDs for this purpose.
Finally, wire up the actual freeing of the referenced pointer if any at
all available offsets, so that no references are leaked after the BPF
map goes away and the BPF program previously moved the ownership a
referenced pointer into it.
The behavior is similar to BPF timers, where bpf_map_{update,delete}_elem
will free any existing referenced kptr. The same case is with LRU map's
bpf_lru_push_free/htab_lru_push_free functions, which are extended to
reset unreferenced and free referenced kptr.
Note that unlike BPF timers, kptr is not reset or freed when map uref
drops to zero.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-8-memxor@gmail.com
2022-04-25 05:48:55 +08:00
|
|
|
/* Ensure that the prototype of dtor kfuncs being registered is sane */
|
|
|
|
ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt);
|
|
|
|
if (ret < 0)
|
|
|
|
goto end;
|
|
|
|
|
2022-04-25 05:48:54 +08:00
|
|
|
tab = btf->dtor_kfunc_tab;
|
|
|
|
/* Only one call allowed for modules */
|
|
|
|
if (WARN_ON_ONCE(tab && btf_is_module(btf))) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
tab_cnt = tab ? tab->cnt : 0;
|
|
|
|
if (tab_cnt > U32_MAX - add_cnt) {
|
|
|
|
ret = -EOVERFLOW;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
|
|
|
|
pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
|
|
|
|
ret = -E2BIG;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
tab = krealloc(btf->dtor_kfunc_tab,
|
|
|
|
offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]),
|
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!tab) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!btf->dtor_kfunc_tab)
|
|
|
|
tab->cnt = 0;
|
|
|
|
btf->dtor_kfunc_tab = tab;
|
|
|
|
|
|
|
|
memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0]));
|
2024-06-20 17:17:31 +08:00
|
|
|
|
|
|
|
/* remap BTF ids based on BTF relocation (if any) */
|
|
|
|
for (i = tab_cnt; i < tab_cnt + add_cnt; i++) {
|
|
|
|
tab->dtors[i].btf_id = btf_relocate_id(btf, tab->dtors[i].btf_id);
|
|
|
|
tab->dtors[i].kfunc_btf_id = btf_relocate_id(btf, tab->dtors[i].kfunc_btf_id);
|
|
|
|
}
|
|
|
|
|
2022-04-25 05:48:54 +08:00
|
|
|
tab->cnt += add_cnt;
|
|
|
|
|
|
|
|
sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
|
|
|
|
|
|
|
|
end:
|
2023-01-20 20:21:48 +08:00
|
|
|
if (ret)
|
|
|
|
btf_free_dtor_kfunc_tab(btf);
|
2022-04-25 05:48:54 +08:00
|
|
|
btf_put(btf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
|
|
|
|
|
2022-02-04 08:55:18 +08:00
|
|
|
#define MAX_TYPES_ARE_COMPAT_DEPTH 2
|
|
|
|
|
|
|
|
/* Check local and target types for compatibility. This check is used for
|
|
|
|
* type-based CO-RE relocations and follow slightly different rules than
|
|
|
|
* field-based relocations. This function assumes that root types were already
|
|
|
|
* checked for name match. Beyond that initial root-level name check, names
|
|
|
|
* are completely ignored. Compatibility rules are as follows:
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
* - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but
|
2022-02-04 08:55:18 +08:00
|
|
|
* kind should match for local and target types (i.e., STRUCT is not
|
|
|
|
* compatible with UNION);
|
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-07 14:26:00 +08:00
|
|
|
* - for ENUMs/ENUM64s, the size is ignored;
|
2022-02-04 08:55:18 +08:00
|
|
|
* - for INT, size and signedness are ignored;
|
|
|
|
* - for ARRAY, dimensionality is ignored, element types are checked for
|
|
|
|
* compatibility recursively;
|
|
|
|
* - CONST/VOLATILE/RESTRICT modifiers are ignored;
|
|
|
|
* - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
|
|
|
|
* - FUNC_PROTOs are compatible if they have compatible signature: same
|
|
|
|
* number of input args and compatible return and argument types.
|
|
|
|
* These rules are not set in stone and probably will be adjusted as we get
|
|
|
|
* more experience with using BPF CO-RE relocations.
|
|
|
|
*/
|
2021-12-02 02:10:26 +08:00
|
|
|
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
|
|
|
|
const struct btf *targ_btf, __u32 targ_id)
|
|
|
|
{
|
2022-06-24 02:29:34 +08:00
|
|
|
return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
|
2022-02-04 08:55:18 +08:00
|
|
|
MAX_TYPES_ARE_COMPAT_DEPTH);
|
2021-12-02 02:10:26 +08:00
|
|
|
}
|
|
|
|
|
2022-06-29 00:01:21 +08:00
|
|
|
#define MAX_TYPES_MATCH_DEPTH 2
|
|
|
|
|
|
|
|
int bpf_core_types_match(const struct btf *local_btf, u32 local_id,
|
|
|
|
const struct btf *targ_btf, u32 targ_id)
|
|
|
|
{
|
|
|
|
return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false,
|
|
|
|
MAX_TYPES_MATCH_DEPTH);
|
|
|
|
}
|
|
|
|
|
2021-12-02 02:10:26 +08:00
|
|
|
static bool bpf_core_is_flavor_sep(const char *s)
|
|
|
|
{
|
|
|
|
/* check X___Y name pattern, where X and Y are not underscores */
|
|
|
|
return s[0] != '_' && /* X */
|
|
|
|
s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
|
|
|
|
s[4] != '_'; /* Y */
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t bpf_core_essential_name_len(const char *name)
|
|
|
|
{
|
|
|
|
size_t n = strlen(name);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = n - 5; i >= 0; i--) {
|
|
|
|
if (bpf_core_is_flavor_sep(name + i))
|
|
|
|
return i + 1;
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
2021-12-02 02:10:28 +08:00
|
|
|
|
2021-12-02 02:10:31 +08:00
|
|
|
static void bpf_free_cands(struct bpf_cand_cache *cands)
|
|
|
|
{
|
|
|
|
if (!cands->cnt)
|
|
|
|
/* empty candidate array was allocated on stack */
|
|
|
|
return;
|
|
|
|
kfree(cands);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands)
|
|
|
|
{
|
|
|
|
kfree(cands->name);
|
|
|
|
kfree(cands);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define VMLINUX_CAND_CACHE_SIZE 31
|
|
|
|
static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE];
|
|
|
|
|
|
|
|
#define MODULE_CAND_CACHE_SIZE 31
|
|
|
|
static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE];
|
|
|
|
|
|
|
|
static void __print_cand_cache(struct bpf_verifier_log *log,
|
|
|
|
struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cc;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < cache_size; i++) {
|
|
|
|
cc = cache[i];
|
|
|
|
if (!cc)
|
|
|
|
continue;
|
|
|
|
bpf_log(log, "[%d]%s(", i, cc->name);
|
|
|
|
for (j = 0; j < cc->cnt; j++) {
|
|
|
|
bpf_log(log, "%d", cc->cands[j].id);
|
|
|
|
if (j < cc->cnt - 1)
|
|
|
|
bpf_log(log, " ");
|
|
|
|
}
|
|
|
|
bpf_log(log, "), ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print_cand_cache(struct bpf_verifier_log *log)
|
|
|
|
{
|
|
|
|
mutex_lock(&cand_cache_mutex);
|
|
|
|
bpf_log(log, "vmlinux_cand_cache:");
|
|
|
|
__print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
|
|
|
bpf_log(log, "\nmodule_cand_cache:");
|
|
|
|
__print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
bpf_log(log, "\n");
|
|
|
|
mutex_unlock(&cand_cache_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 hash_cands(struct bpf_cand_cache *cands)
|
|
|
|
{
|
|
|
|
return jhash(cands->name, cands->name_len, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands,
|
|
|
|
struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size];
|
|
|
|
|
|
|
|
if (cc && cc->name_len == cands->name_len &&
|
|
|
|
!strncmp(cc->name, cands->name, cands->name_len))
|
|
|
|
return cc;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t sizeof_cands(int cnt)
|
|
|
|
{
|
|
|
|
return offsetof(struct bpf_cand_cache, cands[cnt]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
|
|
|
|
struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands;
|
|
|
|
|
|
|
|
if (*cc) {
|
|
|
|
bpf_free_cands_from_cache(*cc);
|
|
|
|
*cc = NULL;
|
|
|
|
}
|
2021-12-09 14:21:22 +08:00
|
|
|
new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL);
|
2021-12-02 02:10:31 +08:00
|
|
|
if (!new_cands) {
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
/* strdup the name, since it will stay in cache.
|
|
|
|
* the cands->name points to strings in prog's BTF and the prog can be unloaded.
|
|
|
|
*/
|
|
|
|
new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL);
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
if (!new_cands->name) {
|
|
|
|
kfree(new_cands);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
*cc = new_cands;
|
|
|
|
return new_cands;
|
|
|
|
}
|
|
|
|
|
2021-12-07 09:48:39 +08:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
2021-12-02 02:10:31 +08:00
|
|
|
static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cc;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < cache_size; i++) {
|
|
|
|
cc = cache[i];
|
|
|
|
if (!cc)
|
|
|
|
continue;
|
|
|
|
if (!btf) {
|
|
|
|
/* when new module is loaded purge all of module_cand_cache,
|
|
|
|
* since new module might have candidates with the name
|
|
|
|
* that matches cached cands.
|
|
|
|
*/
|
|
|
|
bpf_free_cands_from_cache(cc);
|
|
|
|
cache[i] = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* when module is unloaded purge cache entries
|
|
|
|
* that match module's btf
|
|
|
|
*/
|
|
|
|
for (j = 0; j < cc->cnt; j++)
|
|
|
|
if (cc->cands[j].btf == btf) {
|
|
|
|
bpf_free_cands_from_cache(cc);
|
|
|
|
cache[i] = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static void purge_cand_cache(struct btf *btf)
|
|
|
|
{
|
|
|
|
mutex_lock(&cand_cache_mutex);
|
|
|
|
__purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
mutex_unlock(&cand_cache_mutex);
|
|
|
|
}
|
2021-12-07 09:48:39 +08:00
|
|
|
#endif
|
2021-12-02 02:10:31 +08:00
|
|
|
|
|
|
|
static struct bpf_cand_cache *
|
|
|
|
bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
|
|
|
|
int targ_start_id)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *new_cands;
|
|
|
|
const struct btf_type *t;
|
|
|
|
const char *targ_name;
|
|
|
|
size_t targ_essent_len;
|
|
|
|
int n, i;
|
|
|
|
|
|
|
|
n = btf_nr_types(targ_btf);
|
|
|
|
for (i = targ_start_id; i < n; i++) {
|
|
|
|
t = btf_type_by_id(targ_btf, i);
|
|
|
|
if (btf_kind(t) != cands->kind)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
targ_name = btf_name_by_offset(targ_btf, t->name_off);
|
|
|
|
if (!targ_name)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* the resched point is before strncmp to make sure that search
|
|
|
|
* for non-existing name will have a chance to schedule().
|
|
|
|
*/
|
|
|
|
cond_resched();
|
|
|
|
|
|
|
|
if (strncmp(cands->name, targ_name, cands->name_len) != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
targ_essent_len = bpf_core_essential_name_len(targ_name);
|
|
|
|
if (targ_essent_len != cands->name_len)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* most of the time there is only one candidate for a given kind+name pair */
|
|
|
|
new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL);
|
|
|
|
if (!new_cands) {
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(new_cands, cands, sizeof_cands(cands->cnt));
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
cands = new_cands;
|
|
|
|
cands->cands[cands->cnt].btf = targ_btf;
|
|
|
|
cands->cands[cands->cnt].id = i;
|
|
|
|
cands->cnt++;
|
|
|
|
}
|
|
|
|
return cands;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_cand_cache *
|
|
|
|
bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cands, *cc, local_cand = {};
|
|
|
|
const struct btf *local_btf = ctx->btf;
|
|
|
|
const struct btf_type *local_type;
|
|
|
|
const struct btf *main_btf;
|
|
|
|
size_t local_essent_len;
|
|
|
|
struct btf *mod_btf;
|
|
|
|
const char *name;
|
|
|
|
int id;
|
|
|
|
|
|
|
|
main_btf = bpf_get_btf_vmlinux();
|
|
|
|
if (IS_ERR(main_btf))
|
2021-12-12 10:08:19 +08:00
|
|
|
return ERR_CAST(main_btf);
|
2022-03-20 22:30:03 +08:00
|
|
|
if (!main_btf)
|
|
|
|
return ERR_PTR(-EINVAL);
|
2021-12-02 02:10:31 +08:00
|
|
|
|
|
|
|
local_type = btf_type_by_id(local_btf, local_type_id);
|
|
|
|
if (!local_type)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
name = btf_name_by_offset(local_btf, local_type->name_off);
|
|
|
|
if (str_is_empty(name))
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
local_essent_len = bpf_core_essential_name_len(name);
|
|
|
|
|
|
|
|
cands = &local_cand;
|
|
|
|
cands->name = name;
|
|
|
|
cands->kind = btf_kind(local_type);
|
|
|
|
cands->name_len = local_essent_len;
|
|
|
|
|
|
|
|
cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
|
|
|
/* cands is a pointer to stack here */
|
|
|
|
if (cc) {
|
|
|
|
if (cc->cnt)
|
|
|
|
return cc;
|
|
|
|
goto check_modules;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Attempt to find target candidates in vmlinux BTF first */
|
|
|
|
cands = bpf_core_add_cands(cands, main_btf, 1);
|
|
|
|
if (IS_ERR(cands))
|
2021-12-12 10:08:19 +08:00
|
|
|
return ERR_CAST(cands);
|
2021-12-02 02:10:31 +08:00
|
|
|
|
|
|
|
/* cands is a pointer to kmalloced memory here if cands->cnt > 0 */
|
|
|
|
|
|
|
|
/* populate cache even when cands->cnt == 0 */
|
|
|
|
cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
|
|
|
if (IS_ERR(cc))
|
2021-12-12 10:08:19 +08:00
|
|
|
return ERR_CAST(cc);
|
2021-12-02 02:10:31 +08:00
|
|
|
|
|
|
|
/* if vmlinux BTF has any candidate, don't go for module BTFs */
|
|
|
|
if (cc->cnt)
|
|
|
|
return cc;
|
|
|
|
|
|
|
|
check_modules:
|
|
|
|
/* cands is a pointer to stack here and cands->cnt == 0 */
|
|
|
|
cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
if (cc)
|
|
|
|
/* if cache has it return it even if cc->cnt == 0 */
|
|
|
|
return cc;
|
|
|
|
|
|
|
|
/* If candidate is not found in vmlinux's BTF then search in module's BTFs */
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
idr_for_each_entry(&btf_idr, mod_btf, id) {
|
|
|
|
if (!btf_is_module(mod_btf))
|
|
|
|
continue;
|
|
|
|
/* linear search could be slow hence unlock/lock
|
|
|
|
* the IDR to avoiding holding it for too long
|
|
|
|
*/
|
|
|
|
btf_get(mod_btf);
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
|
2023-04-21 09:49:01 +08:00
|
|
|
btf_put(mod_btf);
|
|
|
|
if (IS_ERR(cands))
|
2021-12-12 10:08:19 +08:00
|
|
|
return ERR_CAST(cands);
|
2021-12-02 02:10:31 +08:00
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
}
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
/* cands is a pointer to kmalloced memory here if cands->cnt > 0
|
|
|
|
* or pointer to stack if cands->cnd == 0.
|
|
|
|
* Copy it into the cache even when cands->cnt == 0 and
|
|
|
|
* return the result.
|
|
|
|
*/
|
|
|
|
return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
}
|
|
|
|
|
2021-12-02 02:10:28 +08:00
|
|
|
int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
|
|
|
|
int relo_idx, void *insn)
|
|
|
|
{
|
2021-12-02 02:10:31 +08:00
|
|
|
bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL;
|
|
|
|
struct bpf_core_cand_list cands = {};
|
2022-02-16 06:58:50 +08:00
|
|
|
struct bpf_core_relo_res targ_res;
|
2021-12-04 02:28:36 +08:00
|
|
|
struct bpf_core_spec *specs;
|
2024-08-22 16:01:23 +08:00
|
|
|
const struct btf_type *type;
|
2021-12-02 02:10:31 +08:00
|
|
|
int err;
|
|
|
|
|
2021-12-04 02:28:36 +08:00
|
|
|
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
|
|
|
|
* into arrays of btf_ids of struct fields and array indices.
|
|
|
|
*/
|
|
|
|
specs = kcalloc(3, sizeof(*specs), GFP_KERNEL);
|
|
|
|
if (!specs)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2024-08-22 16:01:23 +08:00
|
|
|
type = btf_type_by_id(ctx->btf, relo->type_id);
|
|
|
|
if (!type) {
|
|
|
|
bpf_log(ctx->log, "relo #%u: bad type id %u\n",
|
|
|
|
relo_idx, relo->type_id);
|
2024-10-08 00:09:58 +08:00
|
|
|
kfree(specs);
|
2024-08-22 16:01:23 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-12-02 02:10:31 +08:00
|
|
|
if (need_cands) {
|
|
|
|
struct bpf_cand_cache *cc;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
mutex_lock(&cand_cache_mutex);
|
|
|
|
cc = bpf_core_find_cands(ctx, relo->type_id);
|
|
|
|
if (IS_ERR(cc)) {
|
|
|
|
bpf_log(ctx->log, "target candidate search failed for %d\n",
|
|
|
|
relo->type_id);
|
|
|
|
err = PTR_ERR(cc);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (cc->cnt) {
|
|
|
|
cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL);
|
|
|
|
if (!cands.cands) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 0; i < cc->cnt; i++) {
|
|
|
|
bpf_log(ctx->log,
|
|
|
|
"CO-RE relocating %s %s: found target candidate [%d]\n",
|
|
|
|
btf_kind_str[cc->kind], cc->name, cc->cands[i].id);
|
|
|
|
cands.cands[i].btf = cc->cands[i].btf;
|
|
|
|
cands.cands[i].id = cc->cands[i].id;
|
|
|
|
}
|
|
|
|
cands.len = cc->cnt;
|
|
|
|
/* cand_cache_mutex needs to span the cache lookup and
|
|
|
|
* copy of btf pointer into bpf_core_cand_list,
|
2022-02-16 06:58:50 +08:00
|
|
|
* since module can be unloaded while bpf_core_calc_relo_insn
|
2021-12-02 02:10:31 +08:00
|
|
|
* is working with module's btf.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2022-02-16 06:58:50 +08:00
|
|
|
err = bpf_core_calc_relo_insn((void *)ctx->log, relo, relo_idx, ctx->btf, &cands, specs,
|
|
|
|
&targ_res);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
err = bpf_core_patch_insn((void *)ctx->log, insn, relo->insn_off / 8, relo, relo_idx,
|
|
|
|
&targ_res);
|
|
|
|
|
2021-12-02 02:10:31 +08:00
|
|
|
out:
|
2021-12-04 02:28:36 +08:00
|
|
|
kfree(specs);
|
2021-12-02 02:10:31 +08:00
|
|
|
if (need_cands) {
|
|
|
|
kfree(cands.cands);
|
|
|
|
mutex_unlock(&cand_cache_mutex);
|
|
|
|
if (ctx->log->level & BPF_LOG_LEVEL2)
|
|
|
|
print_cand_cache(ctx->log);
|
|
|
|
}
|
|
|
|
return err;
|
2021-12-02 02:10:28 +08:00
|
|
|
}
|
2023-01-21 03:25:15 +08:00
|
|
|
|
|
|
|
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
|
|
|
|
const struct bpf_reg_state *reg,
|
2023-04-04 12:50:24 +08:00
|
|
|
const char *field_name, u32 btf_id, const char *suffix)
|
2023-01-21 03:25:15 +08:00
|
|
|
{
|
|
|
|
struct btf *btf = reg->btf;
|
|
|
|
const struct btf_type *walk_type, *safe_type;
|
|
|
|
const char *tname;
|
|
|
|
char safe_tname[64];
|
|
|
|
long ret, safe_id;
|
2023-04-04 12:50:24 +08:00
|
|
|
const struct btf_member *member;
|
2023-01-21 03:25:15 +08:00
|
|
|
u32 i;
|
|
|
|
|
|
|
|
walk_type = btf_type_by_id(btf, reg->btf_id);
|
|
|
|
if (!walk_type)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
tname = btf_name_by_offset(btf, walk_type->name_off);
|
|
|
|
|
2023-03-03 12:14:46 +08:00
|
|
|
ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
|
2023-09-09 00:33:35 +08:00
|
|
|
if (ret >= sizeof(safe_tname))
|
2023-01-21 03:25:15 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info));
|
|
|
|
if (safe_id < 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
safe_type = btf_type_by_id(btf, safe_id);
|
|
|
|
if (!safe_type)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for_each_member(i, safe_type, member) {
|
|
|
|
const char *m_name = __btf_name_by_offset(btf, member->name_off);
|
2023-04-04 12:50:24 +08:00
|
|
|
const struct btf_type *mtype = btf_type_by_id(btf, member->type);
|
|
|
|
u32 id;
|
|
|
|
|
|
|
|
if (!btf_type_is_ptr(mtype))
|
|
|
|
continue;
|
2023-01-21 03:25:15 +08:00
|
|
|
|
2023-04-04 12:50:24 +08:00
|
|
|
btf_type_skip_modifiers(btf, mtype->type, &id);
|
2023-01-21 03:25:15 +08:00
|
|
|
/* If we match on both type and name, the field is considered trusted. */
|
2023-04-04 12:50:24 +08:00
|
|
|
if (btf_id == id && !strcmp(field_name, m_name))
|
2023-01-21 03:25:15 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
bpf: Allow trusted args to walk struct when checking BTF IDs
When validating BTF types for KF_TRUSTED_ARGS kfuncs, the verifier
currently enforces that the top-level type must match when calling
the kfunc. In other words, the verifier does not allow the BPF program
to pass a bitwise equivalent struct, despite it being allowed according
to the C standard.
For example, if you have the following type:
struct nf_conn___init {
struct nf_conn ct;
};
The C standard stipulates that it would be safe to pass a struct
nf_conn___init to a kfunc expecting a struct nf_conn. The verifier
currently disallows this, however, as semantically kfuncs may want to
enforce that structs that have equivalent types according to the C
standard, but have different BTF IDs, are not able to be passed to
kfuncs expecting one or the other. For example, struct nf_conn___init
may not be queried / looked up, as it is allocated but may not yet be
fully initialized.
On the other hand, being able to pass types that are equivalent
according to the C standard will be useful for other types of kfunc /
kptrs enabled by BPF. For example, in a follow-on patch, a series of
kfuncs will be added which allow programs to do bitwise queries on
cpumasks that are either allocated by the program (in which case they'll
be a 'struct bpf_cpumask' type that wraps a cpumask_t as its first
element), or a cpumask that was allocated by the main kernel (in which
case it will just be a straight cpumask_t, as in task->cpus_ptr).
Having the two types of cpumasks allows us to distinguish between the
two for when a cpumask is read-only vs. mutatable. A struct bpf_cpumask
can be mutated by e.g. bpf_cpumask_clear(), whereas a regular cpumask_t
cannot be. On the other hand, a struct bpf_cpumask can of course be
queried in the exact same manner as a cpumask_t, with e.g.
bpf_cpumask_test_cpu().
If we were to enforce that top level types match, then a user that's
passing a struct bpf_cpumask to a read-only cpumask_t argument would
have to cast with something like bpf_cast_to_kern_ctx() (which itself
would need to be updated to expect the alias, and currently it only
accommodates a single alias per prog type). Additionally, not specifying
KF_TRUSTED_ARGS is not an option, as some kfuncs take one argument as a
struct bpf_cpumask *, and another as a struct cpumask *
(i.e. cpumask_t).
In order to enable this, this patch relaxes the constraint that a
KF_TRUSTED_ARGS kfunc must have strict type matching, and instead only
enforces strict type matching if a type is observed to be a "no-cast
alias" (i.e., that the type names are equivalent, but one is suffixed
with ___init).
Additionally, in order to try and be conservative and match existing
behavior / expectations, this patch also enforces strict type checking
for acquire kfuncs. We were already enforcing it for release kfuncs, so
this should also improve the consistency of the semantics for kfuncs.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230120192523.3650503-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-01-21 03:25:16 +08:00
|
|
|
|
|
|
|
bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
|
|
|
|
const struct btf *reg_btf, u32 reg_id,
|
|
|
|
const struct btf *arg_btf, u32 arg_id)
|
|
|
|
{
|
|
|
|
const char *reg_name, *arg_name, *search_needle;
|
|
|
|
const struct btf_type *reg_type, *arg_type;
|
|
|
|
int reg_len, arg_len, cmp_len;
|
|
|
|
size_t pattern_len = sizeof(NOCAST_ALIAS_SUFFIX) - sizeof(char);
|
|
|
|
|
|
|
|
reg_type = btf_type_by_id(reg_btf, reg_id);
|
|
|
|
if (!reg_type)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
arg_type = btf_type_by_id(arg_btf, arg_id);
|
|
|
|
if (!arg_type)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
reg_name = btf_name_by_offset(reg_btf, reg_type->name_off);
|
|
|
|
arg_name = btf_name_by_offset(arg_btf, arg_type->name_off);
|
|
|
|
|
|
|
|
reg_len = strlen(reg_name);
|
|
|
|
arg_len = strlen(arg_name);
|
|
|
|
|
|
|
|
/* Exactly one of the two type names may be suffixed with ___init, so
|
|
|
|
* if the strings are the same size, they can't possibly be no-cast
|
|
|
|
* aliases of one another. If you have two of the same type names, e.g.
|
|
|
|
* they're both nf_conn___init, it would be improper to return true
|
|
|
|
* because they are _not_ no-cast aliases, they are the same type.
|
|
|
|
*/
|
|
|
|
if (reg_len == arg_len)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Either of the two names must be the other name, suffixed with ___init. */
|
|
|
|
if ((reg_len != arg_len + pattern_len) &&
|
|
|
|
(arg_len != reg_len + pattern_len))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (reg_len < arg_len) {
|
|
|
|
search_needle = strstr(arg_name, NOCAST_ALIAS_SUFFIX);
|
|
|
|
cmp_len = reg_len;
|
|
|
|
} else {
|
|
|
|
search_needle = strstr(reg_name, NOCAST_ALIAS_SUFFIX);
|
|
|
|
cmp_len = arg_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!search_needle)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* ___init suffix must come at the end of the name */
|
|
|
|
if (*(search_needle + pattern_len) != '\0')
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return !strncmp(reg_name, arg_name, cmp_len);
|
|
|
|
}
|
2024-01-20 06:49:55 +08:00
|
|
|
|
2024-01-20 06:50:02 +08:00
|
|
|
#ifdef CONFIG_BPF_JIT
|
2024-01-20 06:49:55 +08:00
|
|
|
static int
|
2024-01-20 06:50:02 +08:00
|
|
|
btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
|
|
|
|
struct bpf_verifier_log *log)
|
2024-01-20 06:49:55 +08:00
|
|
|
{
|
|
|
|
struct btf_struct_ops_tab *tab, *new_tab;
|
2024-01-20 06:50:02 +08:00
|
|
|
int i, err;
|
2024-01-20 06:49:55 +08:00
|
|
|
|
|
|
|
tab = btf->struct_ops_tab;
|
|
|
|
if (!tab) {
|
|
|
|
tab = kzalloc(offsetof(struct btf_struct_ops_tab, ops[4]),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!tab)
|
|
|
|
return -ENOMEM;
|
|
|
|
tab->capacity = 4;
|
|
|
|
btf->struct_ops_tab = tab;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < tab->cnt; i++)
|
|
|
|
if (tab->ops[i].st_ops == st_ops)
|
|
|
|
return -EEXIST;
|
|
|
|
|
|
|
|
if (tab->cnt == tab->capacity) {
|
|
|
|
new_tab = krealloc(tab,
|
|
|
|
offsetof(struct btf_struct_ops_tab,
|
|
|
|
ops[tab->capacity * 2]),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!new_tab)
|
|
|
|
return -ENOMEM;
|
|
|
|
tab = new_tab;
|
|
|
|
tab->capacity *= 2;
|
|
|
|
btf->struct_ops_tab = tab;
|
|
|
|
}
|
|
|
|
|
|
|
|
tab->ops[btf->struct_ops_tab->cnt].st_ops = st_ops;
|
|
|
|
|
2024-01-20 06:50:02 +08:00
|
|
|
err = bpf_struct_ops_desc_init(&tab->ops[btf->struct_ops_tab->cnt], btf, log);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2024-01-20 06:49:55 +08:00
|
|
|
btf->struct_ops_tab->cnt++;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2024-01-20 06:50:02 +08:00
|
|
|
|
|
|
|
const struct bpf_struct_ops_desc *
|
|
|
|
bpf_struct_ops_find_value(struct btf *btf, u32 value_id)
|
|
|
|
{
|
|
|
|
const struct bpf_struct_ops_desc *st_ops_list;
|
|
|
|
unsigned int i;
|
|
|
|
u32 cnt;
|
|
|
|
|
|
|
|
if (!value_id)
|
|
|
|
return NULL;
|
|
|
|
if (!btf->struct_ops_tab)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
cnt = btf->struct_ops_tab->cnt;
|
|
|
|
st_ops_list = btf->struct_ops_tab->ops;
|
|
|
|
for (i = 0; i < cnt; i++) {
|
|
|
|
if (st_ops_list[i].value_id == value_id)
|
|
|
|
return &st_ops_list[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_struct_ops_desc *
|
|
|
|
bpf_struct_ops_find(struct btf *btf, u32 type_id)
|
|
|
|
{
|
|
|
|
const struct bpf_struct_ops_desc *st_ops_list;
|
|
|
|
unsigned int i;
|
|
|
|
u32 cnt;
|
|
|
|
|
|
|
|
if (!type_id)
|
|
|
|
return NULL;
|
|
|
|
if (!btf->struct_ops_tab)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
cnt = btf->struct_ops_tab->cnt;
|
|
|
|
st_ops_list = btf->struct_ops_tab->ops;
|
|
|
|
for (i = 0; i < cnt; i++) {
|
|
|
|
if (st_ops_list[i].type_id == type_id)
|
|
|
|
return &st_ops_list[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log;
|
|
|
|
struct btf *btf;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
btf = btf_get_module_btf(st_ops->owner);
|
|
|
|
if (!btf)
|
2024-02-08 14:24:23 +08:00
|
|
|
return check_btf_kconfigs(st_ops->owner, "struct_ops");
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
2024-01-20 06:50:02 +08:00
|
|
|
|
|
|
|
log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!log) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
|
|
|
|
log->level = BPF_LOG_KERNEL;
|
|
|
|
|
|
|
|
err = btf_add_struct_ops(btf, st_ops, log);
|
|
|
|
|
|
|
|
errout:
|
|
|
|
kfree(log);
|
|
|
|
btf_put(btf);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(__register_bpf_struct_ops);
|
|
|
|
#endif
|
2024-02-09 10:37:48 +08:00
|
|
|
|
|
|
|
bool btf_param_match_suffix(const struct btf *btf,
|
|
|
|
const struct btf_param *arg,
|
|
|
|
const char *suffix)
|
|
|
|
{
|
|
|
|
int suffix_len = strlen(suffix), len;
|
|
|
|
const char *param_name;
|
|
|
|
|
|
|
|
/* In the future, this can be ported to use BTF tagging */
|
|
|
|
param_name = btf_name_by_offset(btf, arg->name_off);
|
|
|
|
if (str_is_empty(param_name))
|
|
|
|
return false;
|
|
|
|
len = strlen(param_name);
|
|
|
|
if (len <= suffix_len)
|
|
|
|
return false;
|
|
|
|
param_name += len - suffix_len;
|
|
|
|
return !strncmp(param_name, suffix, suffix_len);
|
|
|
|
}
|