2019-05-29 22:18:09 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2014-11-14 09:36:49 +08:00
|
|
|
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
|
|
|
*/
|
|
|
|
#include <linux/bpf.h>
|
|
|
|
#include <linux/rcupdate.h>
|
2015-03-14 09:27:16 +08:00
|
|
|
#include <linux/random.h>
|
2015-03-14 09:27:17 +08:00
|
|
|
#include <linux/smp.h>
|
2016-10-21 18:46:33 +08:00
|
|
|
#include <linux/topology.h>
|
2015-05-30 05:23:06 +08:00
|
|
|
#include <linux/ktime.h>
|
2015-06-13 10:39:12 +08:00
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/uidgid.h>
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
#include <linux/filter.h>
|
2019-03-19 08:55:26 +08:00
|
|
|
#include <linux/ctype.h>
|
2020-01-23 07:36:46 +08:00
|
|
|
#include <linux/jiffies.h>
|
2020-03-05 04:41:56 +08:00
|
|
|
#include <linux/pid_namespace.h>
|
|
|
|
#include <linux/proc_ns.h>
|
2019-03-19 08:55:26 +08:00
|
|
|
|
|
|
|
#include "../../lib/kstrtox.h"
|
2014-11-14 09:36:49 +08:00
|
|
|
|
|
|
|
/* If kernel subsystem is allowing eBPF programs to call this function,
|
|
|
|
* inside its own verifier_ops->get_func_proto() callback it should return
|
|
|
|
* bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
|
|
|
|
*
|
|
|
|
* Different map implementations will rely on rcu in map methods
|
|
|
|
* lookup/update/delete, therefore eBPF programs must run under rcu lock
|
|
|
|
* if program is allowed to access maps, so check rcu_read_lock_held in
|
|
|
|
* all three functions.
|
|
|
|
*/
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
|
2014-11-14 09:36:49 +08:00
|
|
|
{
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
return (unsigned long) map->ops->map_lookup_elem(map, key);
|
2014-11-14 09:36:49 +08:00
|
|
|
}
|
|
|
|
|
2015-03-01 19:31:42 +08:00
|
|
|
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
|
2015-05-30 05:23:07 +08:00
|
|
|
.func = bpf_map_lookup_elem,
|
|
|
|
.gpl_only = false,
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 06:26:13 +08:00
|
|
|
.pkt_access = true,
|
2015-05-30 05:23:07 +08:00
|
|
|
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
|
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
2014-11-14 09:36:49 +08:00
|
|
|
};
|
|
|
|
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
|
|
|
|
void *, value, u64, flags)
|
2014-11-14 09:36:49 +08:00
|
|
|
{
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
return map->ops->map_update_elem(map, key, value, flags);
|
2014-11-14 09:36:49 +08:00
|
|
|
}
|
|
|
|
|
2015-03-01 19:31:42 +08:00
|
|
|
const struct bpf_func_proto bpf_map_update_elem_proto = {
|
2015-05-30 05:23:07 +08:00
|
|
|
.func = bpf_map_update_elem,
|
|
|
|
.gpl_only = false,
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 06:26:13 +08:00
|
|
|
.pkt_access = true,
|
2015-05-30 05:23:07 +08:00
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
|
|
|
.arg3_type = ARG_PTR_TO_MAP_VALUE,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
2014-11-14 09:36:49 +08:00
|
|
|
};
|
|
|
|
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
|
2014-11-14 09:36:49 +08:00
|
|
|
{
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
return map->ops->map_delete_elem(map, key);
|
|
|
|
}
|
|
|
|
|
2015-03-01 19:31:42 +08:00
|
|
|
const struct bpf_func_proto bpf_map_delete_elem_proto = {
|
2015-05-30 05:23:07 +08:00
|
|
|
.func = bpf_map_delete_elem,
|
|
|
|
.gpl_only = false,
|
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.
For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.
At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.
For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-20 06:26:13 +08:00
|
|
|
.pkt_access = true,
|
2015-05-30 05:23:07 +08:00
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
2014-11-14 09:36:49 +08:00
|
|
|
};
|
2015-03-14 09:27:16 +08:00
|
|
|
|
2018-10-18 21:16:25 +08:00
|
|
|
BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
|
|
|
|
{
|
|
|
|
return map->ops->map_push_elem(map, value, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_map_push_elem_proto = {
|
|
|
|
.func = bpf_map_push_elem,
|
|
|
|
.gpl_only = false,
|
|
|
|
.pkt_access = true,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg2_type = ARG_PTR_TO_MAP_VALUE,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
|
|
|
|
{
|
|
|
|
return map->ops->map_pop_elem(map, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_map_pop_elem_proto = {
|
|
|
|
.func = bpf_map_pop_elem,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
|
|
|
|
{
|
|
|
|
return map->ops->map_peek_elem(map, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_map_peek_elem_proto = {
|
|
|
|
.func = bpf_map_pop_elem,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
|
|
|
|
};
|
|
|
|
|
2015-03-14 09:27:16 +08:00
|
|
|
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
|
bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs
While recently arguing on a seccomp discussion that raw prandom_u32()
access shouldn't be exposed to unpriviledged user space, I forgot the
fact that SKF_AD_RANDOM extension actually already does it for some time
in cBPF via commit 4cd3675ebf74 ("filter: added BPF random opcode").
Since prandom_u32() is being used in a lot of critical networking code,
lets be more conservative and split their states. Furthermore, consolidate
eBPF and cBPF prandom handlers to use the new internal PRNG. For eBPF,
bpf_get_prandom_u32() was only accessible for priviledged users, but
should that change one day, we also don't want to leak raw sequences
through things like eBPF maps.
One thought was also to have own per bpf_prog states, but due to ABI
reasons this is not easily possible, i.e. the program code currently
cannot access bpf_prog itself, and copying the rnd_state to/from the
stack scratch space whenever a program uses the prng seems not really
worth the trouble and seems too hacky. If needed, taus113 could in such
cases be implemented within eBPF using a map entry to keep the state
space, or get_random_bytes() could become a second helper in cases where
performance would not be critical.
Both sides can trigger a one-time late init via prandom_init_once() on
the shared state. Performance-wise, there should even be a tiny gain
as bpf_user_rnd_u32() saves one function call. The PRNG needs to live
inside the BPF core since kernels could have a NET-less config as well.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Chema Gonzalez <chema@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 07:20:39 +08:00
|
|
|
.func = bpf_user_rnd_u32,
|
2015-03-14 09:27:16 +08:00
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
2015-03-14 09:27:17 +08:00
|
|
|
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_0(bpf_get_smp_processor_id)
|
2015-03-14 09:27:17 +08:00
|
|
|
{
|
2016-06-28 18:18:26 +08:00
|
|
|
return smp_processor_id();
|
2015-03-14 09:27:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
|
|
|
|
.func = bpf_get_smp_processor_id,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
2015-05-30 05:23:06 +08:00
|
|
|
|
2016-10-21 18:46:33 +08:00
|
|
|
BPF_CALL_0(bpf_get_numa_node_id)
|
|
|
|
{
|
|
|
|
return numa_node_id();
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_numa_node_id_proto = {
|
|
|
|
.func = bpf_get_numa_node_id,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
|
|
|
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_0(bpf_ktime_get_ns)
|
2015-05-30 05:23:06 +08:00
|
|
|
{
|
|
|
|
/* NMI safe access to clock monotonic */
|
|
|
|
return ktime_get_mono_fast_ns();
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_ktime_get_ns_proto = {
|
|
|
|
.func = bpf_ktime_get_ns,
|
2020-04-21 02:47:50 +08:00
|
|
|
.gpl_only = false,
|
2015-05-30 05:23:06 +08:00
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
2015-06-13 10:39:12 +08:00
|
|
|
|
2020-04-27 00:15:25 +08:00
|
|
|
BPF_CALL_0(bpf_ktime_get_boot_ns)
|
|
|
|
{
|
|
|
|
/* NMI safe access to clock boottime */
|
|
|
|
return ktime_get_boot_fast_ns();
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
|
|
|
|
.func = bpf_ktime_get_boot_ns,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
|
|
|
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_0(bpf_get_current_pid_tgid)
|
2015-06-13 10:39:12 +08:00
|
|
|
{
|
|
|
|
struct task_struct *task = current;
|
|
|
|
|
2016-09-09 08:45:28 +08:00
|
|
|
if (unlikely(!task))
|
2015-06-13 10:39:12 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return (u64) task->tgid << 32 | task->pid;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
|
|
|
|
.func = bpf_get_current_pid_tgid,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
|
|
|
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_0(bpf_get_current_uid_gid)
|
2015-06-13 10:39:12 +08:00
|
|
|
{
|
|
|
|
struct task_struct *task = current;
|
|
|
|
kuid_t uid;
|
|
|
|
kgid_t gid;
|
|
|
|
|
2016-09-09 08:45:28 +08:00
|
|
|
if (unlikely(!task))
|
2015-06-13 10:39:12 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
current_uid_gid(&uid, &gid);
|
|
|
|
return (u64) from_kgid(&init_user_ns, gid) << 32 |
|
2016-09-09 08:45:28 +08:00
|
|
|
from_kuid(&init_user_ns, uid);
|
2015-06-13 10:39:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
|
|
|
|
.func = bpf_get_current_uid_gid,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
|
|
|
|
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions
to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros
that are used today. Motivation for this is to hide all the register handling
and all necessary casts from the user, so that it is done automatically in the
background when adding a BPF_CALL_<n>() call.
This makes current helpers easier to review, eases to write future helpers,
avoids getting the casting mess wrong, and allows for extending all helpers at
once (f.e. build time checks, etc). It also helps detecting more easily in
code reviews that unused registers are not instrumented in the code by accident,
breaking compatibility with existing programs.
BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some
fundamental differences, for example, for generating the actual helper function
that carries all u64 regs, we need to fill unused regs, so that we always end up
with 5 u64 regs as an argument.
I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and
they look all as expected. No sparse issue spotted. We let this also sit for a
few days with Fengguang's kbuild test robot, and there were no issues seen. On
s390, it barked on the "uses dynamic stack allocation" notice, which is an old
one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion
to the call wrapper, just telling that the perf raw record/frag sits on stack
(gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests
and they were fine as well. All eBPF helpers are now converted to use these
macros, getting rid of a good chunk of all the raw castings.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-09 08:45:31 +08:00
|
|
|
BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
|
2015-06-13 10:39:12 +08:00
|
|
|
{
|
|
|
|
struct task_struct *task = current;
|
|
|
|
|
2016-04-13 06:10:52 +08:00
|
|
|
if (unlikely(!task))
|
|
|
|
goto err_clear;
|
2015-06-13 10:39:12 +08:00
|
|
|
|
2016-04-13 06:10:52 +08:00
|
|
|
strncpy(buf, task->comm, size);
|
|
|
|
|
|
|
|
/* Verifier guarantees that size > 0. For task->comm exceeding
|
|
|
|
* size, guarantee that buf is %NUL-terminated. Unconditionally
|
|
|
|
* done here to save the size test.
|
|
|
|
*/
|
|
|
|
buf[size - 1] = 0;
|
2015-06-13 10:39:12 +08:00
|
|
|
return 0;
|
2016-04-13 06:10:52 +08:00
|
|
|
err_clear:
|
|
|
|
memset(buf, 0, size);
|
|
|
|
return -EINVAL;
|
2015-06-13 10:39:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_current_comm_proto = {
|
|
|
|
.func = bpf_get_current_comm,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
2017-01-10 02:19:50 +08:00
|
|
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
|
|
|
.arg2_type = ARG_CONST_SIZE,
|
2015-06-13 10:39:12 +08:00
|
|
|
};
|
2018-06-04 06:59:41 +08:00
|
|
|
|
2019-02-01 07:40:04 +08:00
|
|
|
#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
|
|
|
|
|
|
|
|
static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
|
|
|
|
{
|
|
|
|
arch_spinlock_t *l = (void *)lock;
|
|
|
|
union {
|
|
|
|
__u32 val;
|
|
|
|
arch_spinlock_t lock;
|
|
|
|
} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
|
|
|
|
|
|
|
|
compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
|
|
|
|
BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
|
|
|
|
BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
|
|
|
|
arch_spin_lock(l);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
|
|
|
|
{
|
|
|
|
arch_spinlock_t *l = (void *)lock;
|
|
|
|
|
|
|
|
arch_spin_unlock(l);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
|
|
|
|
{
|
|
|
|
atomic_t *l = (void *)lock;
|
|
|
|
|
|
|
|
BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
|
|
|
|
do {
|
|
|
|
atomic_cond_read_relaxed(l, !VAL);
|
|
|
|
} while (atomic_xchg(l, 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
|
|
|
|
{
|
|
|
|
atomic_t *l = (void *)lock;
|
|
|
|
|
|
|
|
atomic_set_release(l, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static DEFINE_PER_CPU(unsigned long, irqsave_flags);
|
|
|
|
|
|
|
|
notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
__bpf_spin_lock(lock);
|
|
|
|
__this_cpu_write(irqsave_flags, flags);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_spin_lock_proto = {
|
|
|
|
.func = bpf_spin_lock,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_VOID,
|
|
|
|
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
|
|
|
|
};
|
|
|
|
|
|
|
|
notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
flags = __this_cpu_read(irqsave_flags);
|
|
|
|
__bpf_spin_unlock(lock);
|
|
|
|
local_irq_restore(flags);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_spin_unlock_proto = {
|
|
|
|
.func = bpf_spin_unlock,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_VOID,
|
|
|
|
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
|
|
|
|
};
|
|
|
|
|
2019-02-01 07:40:09 +08:00
|
|
|
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
|
|
|
bool lock_src)
|
|
|
|
{
|
|
|
|
struct bpf_spin_lock *lock;
|
|
|
|
|
|
|
|
if (lock_src)
|
|
|
|
lock = src + map->spin_lock_off;
|
|
|
|
else
|
|
|
|
lock = dst + map->spin_lock_off;
|
|
|
|
preempt_disable();
|
|
|
|
____bpf_spin_lock(lock);
|
|
|
|
copy_map_value(map, dst, src);
|
|
|
|
____bpf_spin_unlock(lock);
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
|
2020-01-23 07:36:46 +08:00
|
|
|
BPF_CALL_0(bpf_jiffies64)
|
|
|
|
{
|
|
|
|
return get_jiffies_64();
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_jiffies64_proto = {
|
|
|
|
.func = bpf_jiffies64,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
|
|
|
|
2018-06-04 06:59:41 +08:00
|
|
|
#ifdef CONFIG_CGROUPS
|
|
|
|
BPF_CALL_0(bpf_get_current_cgroup_id)
|
|
|
|
{
|
|
|
|
struct cgroup *cgrp = task_dfl_cgroup(current);
|
|
|
|
|
2019-11-05 07:54:30 +08:00
|
|
|
return cgroup_id(cgrp);
|
2018-06-04 06:59:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
|
|
|
|
.func = bpf_get_current_cgroup_id,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
2018-08-03 05:27:24 +08:00
|
|
|
|
2020-03-27 23:58:54 +08:00
|
|
|
BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
|
|
|
|
{
|
|
|
|
struct cgroup *cgrp = task_dfl_cgroup(current);
|
|
|
|
struct cgroup *ancestor;
|
|
|
|
|
|
|
|
ancestor = cgroup_ancestor(cgrp, ancestor_level);
|
|
|
|
if (!ancestor)
|
|
|
|
return 0;
|
|
|
|
return cgroup_id(ancestor);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
|
|
|
|
.func = bpf_get_current_ancestor_cgroup_id,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
2018-09-28 22:45:36 +08:00
|
|
|
#ifdef CONFIG_CGROUP_BPF
|
2018-09-28 22:45:40 +08:00
|
|
|
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
|
|
|
|
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
2018-08-03 05:27:24 +08:00
|
|
|
|
|
|
|
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
|
|
|
|
{
|
2018-09-28 22:45:36 +08:00
|
|
|
/* flags argument is not used now,
|
|
|
|
* but provides an ability to extend the API.
|
|
|
|
* verifier checks that its value is correct.
|
2018-08-03 05:27:24 +08:00
|
|
|
*/
|
2018-09-28 22:45:36 +08:00
|
|
|
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
|
2018-09-28 22:45:40 +08:00
|
|
|
struct bpf_cgroup_storage *storage;
|
2018-09-28 22:45:43 +08:00
|
|
|
void *ptr;
|
2018-09-28 22:45:36 +08:00
|
|
|
|
2018-09-28 22:45:40 +08:00
|
|
|
storage = this_cpu_read(bpf_cgroup_storage[stype]);
|
|
|
|
|
2018-09-28 22:45:43 +08:00
|
|
|
if (stype == BPF_CGROUP_STORAGE_SHARED)
|
|
|
|
ptr = &READ_ONCE(storage->buf)->data[0];
|
|
|
|
else
|
|
|
|
ptr = this_cpu_ptr(storage->percpu_buf);
|
|
|
|
|
|
|
|
return (unsigned long)ptr;
|
2018-08-03 05:27:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_local_storage_proto = {
|
|
|
|
.func = bpf_get_local_storage,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_PTR_TO_MAP_VALUE,
|
|
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg2_type = ARG_ANYTHING,
|
|
|
|
};
|
2018-06-04 06:59:41 +08:00
|
|
|
#endif
|
2019-03-19 08:55:26 +08:00
|
|
|
|
|
|
|
#define BPF_STRTOX_BASE_MASK 0x1F
|
|
|
|
|
|
|
|
static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
|
|
|
|
unsigned long long *res, bool *is_negative)
|
|
|
|
{
|
|
|
|
unsigned int base = flags & BPF_STRTOX_BASE_MASK;
|
|
|
|
const char *cur_buf = buf;
|
|
|
|
size_t cur_len = buf_len;
|
|
|
|
unsigned int consumed;
|
|
|
|
size_t val_len;
|
|
|
|
char str[64];
|
|
|
|
|
|
|
|
if (!buf || !buf_len || !res || !is_negative)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (base != 0 && base != 8 && base != 10 && base != 16)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (flags & ~BPF_STRTOX_BASE_MASK)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
while (cur_buf < buf + buf_len && isspace(*cur_buf))
|
|
|
|
++cur_buf;
|
|
|
|
|
|
|
|
*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
|
|
|
|
if (*is_negative)
|
|
|
|
++cur_buf;
|
|
|
|
|
|
|
|
consumed = cur_buf - buf;
|
|
|
|
cur_len -= consumed;
|
|
|
|
if (!cur_len)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
cur_len = min(cur_len, sizeof(str) - 1);
|
|
|
|
memcpy(str, cur_buf, cur_len);
|
|
|
|
str[cur_len] = '\0';
|
|
|
|
cur_buf = str;
|
|
|
|
|
|
|
|
cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
|
|
|
|
val_len = _parse_integer(cur_buf, base, res);
|
|
|
|
|
|
|
|
if (val_len & KSTRTOX_OVERFLOW)
|
|
|
|
return -ERANGE;
|
|
|
|
|
|
|
|
if (val_len == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
cur_buf += val_len;
|
|
|
|
consumed += cur_buf - str;
|
|
|
|
|
|
|
|
return consumed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
|
|
|
|
long long *res)
|
|
|
|
{
|
|
|
|
unsigned long long _res;
|
|
|
|
bool is_negative;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
if (is_negative) {
|
|
|
|
if ((long long)-_res > 0)
|
|
|
|
return -ERANGE;
|
|
|
|
*res = -_res;
|
|
|
|
} else {
|
|
|
|
if ((long long)_res < 0)
|
|
|
|
return -ERANGE;
|
|
|
|
*res = _res;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
|
|
|
|
long *, res)
|
|
|
|
{
|
|
|
|
long long _res;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __bpf_strtoll(buf, buf_len, flags, &_res);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
if (_res != (long)_res)
|
|
|
|
return -ERANGE;
|
|
|
|
*res = _res;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_strtol_proto = {
|
|
|
|
.func = bpf_strtol,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_MEM,
|
|
|
|
.arg2_type = ARG_CONST_SIZE,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
.arg4_type = ARG_PTR_TO_LONG,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
|
|
|
|
unsigned long *, res)
|
|
|
|
{
|
|
|
|
unsigned long long _res;
|
|
|
|
bool is_negative;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
if (is_negative)
|
|
|
|
return -EINVAL;
|
|
|
|
if (_res != (unsigned long)_res)
|
|
|
|
return -ERANGE;
|
|
|
|
*res = _res;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_strtoul_proto = {
|
|
|
|
.func = bpf_strtoul,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_MEM,
|
|
|
|
.arg2_type = ARG_CONST_SIZE,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
.arg4_type = ARG_PTR_TO_LONG,
|
|
|
|
};
|
2018-09-28 22:45:36 +08:00
|
|
|
#endif
|
2020-03-05 04:41:56 +08:00
|
|
|
|
|
|
|
BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
|
|
|
|
struct bpf_pidns_info *, nsdata, u32, size)
|
|
|
|
{
|
|
|
|
struct task_struct *task = current;
|
|
|
|
struct pid_namespace *pidns;
|
|
|
|
int err = -EINVAL;
|
|
|
|
|
|
|
|
if (unlikely(size != sizeof(struct bpf_pidns_info)))
|
|
|
|
goto clear;
|
|
|
|
|
|
|
|
if (unlikely((u64)(dev_t)dev != dev))
|
|
|
|
goto clear;
|
|
|
|
|
|
|
|
if (unlikely(!task))
|
|
|
|
goto clear;
|
|
|
|
|
|
|
|
pidns = task_active_pid_ns(task);
|
|
|
|
if (unlikely(!pidns)) {
|
|
|
|
err = -ENOENT;
|
|
|
|
goto clear;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ns_match(&pidns->ns, (dev_t)dev, ino))
|
|
|
|
goto clear;
|
|
|
|
|
|
|
|
nsdata->pid = task_pid_nr_ns(task, pidns);
|
|
|
|
nsdata->tgid = task_tgid_nr_ns(task, pidns);
|
|
|
|
return 0;
|
|
|
|
clear:
|
|
|
|
memset((void *)nsdata, 0, (size_t) size);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
|
|
|
|
.func = bpf_get_ns_current_pid_tgid,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_ANYTHING,
|
|
|
|
.arg2_type = ARG_ANYTHING,
|
|
|
|
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
|
|
|
|
.arg4_type = ARG_CONST_SIZE,
|
|
|
|
};
|
2020-04-25 07:59:41 +08:00
|
|
|
|
|
|
|
static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
|
|
|
|
.func = bpf_get_raw_cpu_id,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
|
|
|
|
u64, flags, void *, data, u64, size)
|
|
|
|
{
|
|
|
|
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_event_output_data_proto = {
|
|
|
|
.func = bpf_event_output_data,
|
|
|
|
.gpl_only = true,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_CTX,
|
|
|
|
.arg2_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
.arg4_type = ARG_PTR_TO_MEM,
|
|
|
|
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
|
|
|
};
|
|
|
|
|
2020-05-25 00:50:55 +08:00
|
|
|
const struct bpf_func_proto bpf_get_current_task_proto __weak;
|
|
|
|
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
|
|
|
|
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
|
|
|
|
const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
|
|
|
|
const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
|
|
|
|
|
2020-04-25 07:59:41 +08:00
|
|
|
const struct bpf_func_proto *
|
|
|
|
bpf_base_func_proto(enum bpf_func_id func_id)
|
|
|
|
{
|
|
|
|
switch (func_id) {
|
|
|
|
case BPF_FUNC_map_lookup_elem:
|
|
|
|
return &bpf_map_lookup_elem_proto;
|
|
|
|
case BPF_FUNC_map_update_elem:
|
|
|
|
return &bpf_map_update_elem_proto;
|
|
|
|
case BPF_FUNC_map_delete_elem:
|
|
|
|
return &bpf_map_delete_elem_proto;
|
|
|
|
case BPF_FUNC_map_push_elem:
|
|
|
|
return &bpf_map_push_elem_proto;
|
|
|
|
case BPF_FUNC_map_pop_elem:
|
|
|
|
return &bpf_map_pop_elem_proto;
|
|
|
|
case BPF_FUNC_map_peek_elem:
|
|
|
|
return &bpf_map_peek_elem_proto;
|
|
|
|
case BPF_FUNC_get_prandom_u32:
|
|
|
|
return &bpf_get_prandom_u32_proto;
|
|
|
|
case BPF_FUNC_get_smp_processor_id:
|
|
|
|
return &bpf_get_raw_smp_processor_id_proto;
|
|
|
|
case BPF_FUNC_get_numa_node_id:
|
|
|
|
return &bpf_get_numa_node_id_proto;
|
|
|
|
case BPF_FUNC_tail_call:
|
|
|
|
return &bpf_tail_call_proto;
|
|
|
|
case BPF_FUNC_ktime_get_ns:
|
|
|
|
return &bpf_ktime_get_ns_proto;
|
2020-04-27 00:15:25 +08:00
|
|
|
case BPF_FUNC_ktime_get_boot_ns:
|
|
|
|
return &bpf_ktime_get_boot_ns_proto;
|
2020-04-25 07:59:41 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-05-14 07:03:54 +08:00
|
|
|
if (!bpf_capable())
|
2020-04-25 07:59:41 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
switch (func_id) {
|
|
|
|
case BPF_FUNC_spin_lock:
|
|
|
|
return &bpf_spin_lock_proto;
|
|
|
|
case BPF_FUNC_spin_unlock:
|
|
|
|
return &bpf_spin_unlock_proto;
|
|
|
|
case BPF_FUNC_trace_printk:
|
2020-05-14 07:03:54 +08:00
|
|
|
if (!perfmon_capable())
|
|
|
|
return NULL;
|
2020-04-25 07:59:41 +08:00
|
|
|
return bpf_get_trace_printk_proto();
|
|
|
|
case BPF_FUNC_jiffies64:
|
|
|
|
return &bpf_jiffies64_proto;
|
2020-05-25 00:50:55 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!perfmon_capable())
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
switch (func_id) {
|
|
|
|
case BPF_FUNC_get_current_task:
|
|
|
|
return &bpf_get_current_task_proto;
|
|
|
|
case BPF_FUNC_probe_read_user:
|
|
|
|
return &bpf_probe_read_user_proto;
|
|
|
|
case BPF_FUNC_probe_read_kernel:
|
|
|
|
return &bpf_probe_read_kernel_proto;
|
|
|
|
case BPF_FUNC_probe_read_user_str:
|
|
|
|
return &bpf_probe_read_user_str_proto;
|
|
|
|
case BPF_FUNC_probe_read_kernel_str:
|
|
|
|
return &bpf_probe_read_kernel_str_proto;
|
2020-04-25 07:59:41 +08:00
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|