2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-18 18:23:53 +08:00

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-05-31

The following pull-request contains BPF updates for your *net-next* tree.

Lots of exciting new features in the first PR of this developement cycle!
The main changes are:

1) misc verifier improvements, from Alexei.

2) bpftool can now convert btf to valid C, from Andrii.

3) verifier can insert explicit ZEXT insn when requested by 32-bit JITs.
   This feature greatly improves BPF speed on 32-bit architectures. From Jiong.

4) cgroups will now auto-detach bpf programs. This fixes issue of thousands
   bpf programs got stuck in dying cgroups. From Roman.

5) new bpf_send_signal() helper, from Yonghong.

6) cgroup inet skb programs can signal CN to the stack, from Lawrence.

7) miscellaneous cleanups, from many developers.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-05-31 21:21:18 -07:00
commit 0462eaacee
122 changed files with 6433 additions and 1016 deletions

View File

@ -172,11 +172,31 @@ registers which makes BPF inefficient virtual machine for 32-bit
CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
be added to BPF in the future?
A: NO. The first thing to improve performance on 32-bit archs is to teach
LLVM to generate code that uses 32-bit subregisters. Then second step
is to teach verifier to mark operations where zero-ing upper bits
is unnecessary. Then JITs can take advantage of those markings and
drastically reduce size of generated code and improve performance.
A: NO.
But some optimizations on zero-ing the upper 32 bits for BPF registers are
available, and can be leveraged to improve the performance of JITed BPF
programs for 32-bit architectures.
Starting with version 7, LLVM is able to generate instructions that operate
on 32-bit subregisters, provided the option -mattr=+alu32 is passed for
compiling a program. Furthermore, the verifier can now mark the
instructions for which zero-ing the upper bits of the destination register
is required, and insert an explicit zero-extension (zext) instruction
(a mov32 variant). This means that for architectures without zext hardware
support, the JIT back-ends do not need to clear the upper bits for
subregisters written by alu32 instructions or narrow loads. Instead, the
back-ends simply need to support code generation for that mov32 variant,
and to overwrite bpf_jit_needs_zext() to make it return "true" (in order to
enable zext insertion in the verifier).
Note that it is possible for a JIT back-end to have partial hardware
support for zext. In that case, if verifier zext insertion is enabled,
it could lead to the insertion of unnecessary zext instructions. Such
instructions could be removed by creating a simple peephole inside the JIT
back-end: if one instruction has hardware support for zext and if the next
instruction is an explicit zext, then the latter can be skipped when doing
the code generation.
Q: Does BPF have a stable ABI?
------------------------------

View File

@ -736,7 +736,8 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
/* ALU operation */
emit_alu_r(rd[1], rs, true, false, op, ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
}
arm_bpf_put_reg64(dst, rd, ctx);
@ -758,8 +759,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
struct jit_ctx *ctx) {
if (!is64) {
emit_a32_mov_r(dst_lo, src_lo, ctx);
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
@ -1060,17 +1062,20 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
case BPF_B:
/* Load a Byte */
emit(ARM_LDRB_I(rd[1], rm, off), ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_H:
/* Load a HalfWord */
emit(ARM_LDRH_I(rd[1], rm, off), ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_W:
/* Load a Word */
emit(ARM_LDR_I(rd[1], rm, off), ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_DW:
/* Load a Double Word */
@ -1359,6 +1364,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_MOV | BPF_X:
switch (BPF_SRC(code)) {
case BPF_X:
if (imm == 1) {
/* Special mov32 for zext */
emit_a32_mov_i(dst_hi, 0, ctx);
break;
}
emit_a32_mov_r64(is64, dst, src, ctx);
break;
case BPF_K:
@ -1438,7 +1448,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
}
emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
case BPF_ALU64 | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_X:
@ -1453,7 +1464,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
return -EINVAL;
if (imm)
emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
/* dst = dst << imm */
case BPF_ALU64 | BPF_LSH | BPF_K:
@ -1488,7 +1500,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = ~dst */
case BPF_ALU | BPF_NEG:
emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
/* dst = ~dst (64 bit) */
case BPF_ALU64 | BPF_NEG:
@ -1544,11 +1557,13 @@ emit_bswap_uxt:
#else /* ARMv6+ */
emit(ARM_UXTH(rd[1], rd[1]), ctx);
#endif
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
if (!ctx->prog->aux->verifier_zext)
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
break;
case 32:
/* zero-extend 32 bits into 64 bits */
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
if (!ctx->prog->aux->verifier_zext)
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
break;
case 64:
/* nop */
@ -1838,6 +1853,11 @@ void bpf_jit_compile(struct bpf_prog *prog)
/* Nothing to do here. We support Internal BPF. */
}
bool bpf_jit_needs_zext(void)
{
return true;
}
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_prog *tmp, *orig_prog = prog;

View File

@ -504,6 +504,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
/* slw clears top 32 bits */
PPC_SLW(dst_reg, dst_reg, src_reg);
/* skip zero extension move, but set address map. */
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
PPC_SLD(dst_reg, dst_reg, src_reg);
@ -511,6 +514,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
/* with imm 0, we still need to clear top 32 bits */
PPC_SLWI(dst_reg, dst_reg, imm);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
if (imm != 0)
@ -518,12 +523,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
break;
case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
PPC_SRW(dst_reg, dst_reg, src_reg);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
PPC_SRD(dst_reg, dst_reg, src_reg);
break;
case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
PPC_SRWI(dst_reg, dst_reg, imm);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
if (imm != 0)
@ -548,6 +557,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
*/
case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
if (imm == 1) {
/* special mov32 for zext */
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
break;
}
PPC_MR(dst_reg, src_reg);
goto bpf_alu32_trunc;
case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
@ -555,11 +569,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_LI32(dst_reg, imm);
if (imm < 0)
goto bpf_alu32_trunc;
else if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
bpf_alu32_trunc:
/* Truncate to 32-bits */
if (BPF_CLASS(code) == BPF_ALU)
if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
break;
@ -618,10 +634,13 @@ emit_clear:
case 16:
/* zero-extend 16 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 48);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case 32:
/* zero-extend 32 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 32);
if (!fp->aux->verifier_zext)
/* zero-extend 32 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 32);
break;
case 64:
/* nop */
@ -698,14 +717,20 @@ emit_clear:
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
PPC_LBZ(dst_reg, src_reg, off);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
PPC_LHZ(dst_reg, src_reg, off);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
PPC_LWZ(dst_reg, src_reg, off);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
@ -1046,6 +1071,11 @@ struct powerpc64_jit_data {
struct codegen_context ctx;
};
bool bpf_jit_needs_zext(void)
{
return true;
}
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;

View File

@ -731,6 +731,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
{
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
BPF_CLASS(insn->code) == BPF_JMP;
struct bpf_prog_aux *aux = ctx->prog->aux;
int rvoff, i = insn - ctx->prog->insnsi;
u8 rd = -1, rs = -1, code = insn->code;
s16 off = insn->off;
@ -742,8 +743,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
case BPF_ALU64 | BPF_MOV | BPF_X:
if (imm == 1) {
/* Special mov32 for zext */
emit_zext_32(rd, ctx);
break;
}
emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@ -771,19 +777,19 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_X:
emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_X:
@ -867,7 +873,7 @@ out_be:
case BPF_ALU | BPF_MOV | BPF_K:
case BPF_ALU64 | BPF_MOV | BPF_K:
emit_imm(rd, imm, ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@ -882,7 +888,7 @@ out_be:
emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
rv_addw(rd, rd, RV_REG_T1), ctx);
}
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_K:
@ -895,7 +901,7 @@ out_be:
emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
rv_subw(rd, rd, RV_REG_T1), ctx);
}
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_K:
@ -906,7 +912,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_and(rd, rd, RV_REG_T1), ctx);
}
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_K:
@ -917,7 +923,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_or(rd, rd, RV_REG_T1), ctx);
}
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_K:
@ -928,7 +934,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_xor(rd, rd, RV_REG_T1), ctx);
}
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_K:
@ -936,7 +942,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
rv_mulw(rd, rd, RV_REG_T1), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_K:
@ -944,7 +950,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
rv_divuw(rd, rd, RV_REG_T1), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_K:
@ -952,7 +958,7 @@ out_be:
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
rv_remuw(rd, rd, RV_REG_T1), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_K:
@ -1239,6 +1245,8 @@ out_be:
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_LDX | BPF_MEM | BPF_H:
if (is_12b_int(off)) {
@ -1249,6 +1257,8 @@ out_be:
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_LDX | BPF_MEM | BPF_W:
if (is_12b_int(off)) {
@ -1259,6 +1269,8 @@ out_be:
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_LDX | BPF_MEM | BPF_DW:
if (is_12b_int(off)) {
@ -1503,6 +1515,11 @@ static void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
bool bpf_jit_needs_zext(void)
{
return true;
}
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
bool tmp_blinded = false, extra_pass = false;

View File

@ -299,9 +299,11 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT_ZERO(b1) \
({ \
/* llgfr %dst,%dst (zero extend to 64 bit) */ \
EMIT4(0xb9160000, b1, b1); \
REG_SET_SEEN(b1); \
if (!fp->aux->verifier_zext) { \
/* llgfr %dst,%dst (zero extend to 64 bit) */ \
EMIT4(0xb9160000, b1, b1); \
REG_SET_SEEN(b1); \
} \
})
/*
@ -520,6 +522,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
/* llgfr %dst,%src */
EMIT4(0xb9160000, dst_reg, src_reg);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
/* lgr %dst,%src */
@ -528,6 +532,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
/* llilf %dst,imm */
EMIT6_IMM(0xc00f0000, dst_reg, imm);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
/* lgfi %dst,imm */
@ -639,6 +645,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT4(0xb9970000, REG_W0, src_reg);
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
}
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
@ -676,6 +684,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT_CONST_U32(imm));
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
}
case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
@ -864,10 +874,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case 16: /* dst = (u16) cpu_to_be16(dst) */
/* llghr %dst,%dst */
EMIT4(0xb9850000, dst_reg, dst_reg);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case 32: /* dst = (u32) cpu_to_be32(dst) */
/* llgfr %dst,%dst */
EMIT4(0xb9160000, dst_reg, dst_reg);
if (!fp->aux->verifier_zext)
/* llgfr %dst,%dst */
EMIT4(0xb9160000, dst_reg, dst_reg);
break;
case 64: /* dst = (u64) cpu_to_be64(dst) */
break;
@ -882,12 +895,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
/* llghr %dst,%dst */
EMIT4(0xb9850000, dst_reg, dst_reg);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case 32: /* dst = (u32) cpu_to_le32(dst) */
/* lrvr %dst,%dst */
EMIT4(0xb91f0000, dst_reg, dst_reg);
/* llgfr %dst,%dst */
EMIT4(0xb9160000, dst_reg, dst_reg);
if (!fp->aux->verifier_zext)
/* llgfr %dst,%dst */
EMIT4(0xb9160000, dst_reg, dst_reg);
break;
case 64: /* dst = (u64) cpu_to_le64(dst) */
/* lrvgr %dst,%dst */
@ -968,16 +984,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
/* llgc %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
/* llgh %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
/* llgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
/* lg %dst,0(off,%src) */
@ -1282,6 +1304,11 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
return 0;
}
bool bpf_jit_needs_zext(void)
{
return true;
}
/*
* Compile eBPF program "fp"
*/

View File

@ -908,6 +908,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
emit_alu3_K(SRL, src, 0, dst, ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_ALU64 | BPF_MOV | BPF_X:
emit_reg_move(src, dst, ctx);
@ -942,6 +944,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU | BPF_DIV | BPF_X:
emit_write_y(G0, ctx);
emit_alu(DIV, src, dst, ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_ALU64 | BPF_DIV | BPF_X:
emit_alu(UDIVX, src, dst, ctx);
@ -975,6 +979,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
case BPF_ALU | BPF_RSH | BPF_X:
emit_alu(SRL, src, dst, ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_ALU64 | BPF_RSH | BPF_X:
emit_alu(SRLX, src, dst, ctx);
@ -997,9 +1003,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case 16:
emit_alu_K(SLL, dst, 16, ctx);
emit_alu_K(SRL, dst, 16, ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case 32:
emit_alu_K(SRL, dst, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_alu_K(SRL, dst, 0, ctx);
break;
case 64:
/* nop */
@ -1021,6 +1030,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu3_K(AND, dst, 0xff, dst, ctx);
emit_alu3_K(SLL, tmp, 8, tmp, ctx);
emit_alu(OR, tmp, dst, ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case 32:
@ -1037,6 +1048,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu3_K(AND, dst, 0xff, dst, ctx); /* dst = dst & 0xff */
emit_alu3_K(SLL, dst, 24, dst, ctx); /* dst = dst << 24 */
emit_alu(OR, tmp, dst, ctx); /* dst = dst | tmp */
if (insn_is_zext(&insn[1]))
return 1;
break;
case 64:
@ -1050,6 +1063,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = imm */
case BPF_ALU | BPF_MOV | BPF_K:
emit_loadimm32(imm, dst, ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_ALU64 | BPF_MOV | BPF_K:
emit_loadimm_sext(imm, dst, ctx);
@ -1132,6 +1147,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
case BPF_ALU | BPF_RSH | BPF_K:
emit_alu_K(SRL, dst, imm, ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_ALU64 | BPF_RSH | BPF_K:
emit_alu_K(SRLX, dst, imm, ctx);
@ -1144,7 +1161,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
do_alu32_trunc:
if (BPF_CLASS(code) == BPF_ALU)
if (BPF_CLASS(code) == BPF_ALU &&
!ctx->prog->aux->verifier_zext)
emit_alu_K(SRL, dst, 0, ctx);
break;
@ -1265,6 +1283,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
rs2 = RS2(tmp);
}
emit(opcode | RS1(src) | rs2 | RD(dst), ctx);
if (opcode != LD64 && insn_is_zext(&insn[1]))
return 1;
break;
}
/* ST: *(size *)(dst + off) = imm */
@ -1432,6 +1452,11 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = 0x91d02005; /* ta 5 */
}
bool bpf_jit_needs_zext(void)
{
return true;
}
struct sparc64_jit_data {
struct bpf_binary_header *header;
u8 *image;

View File

@ -253,13 +253,14 @@ static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
/* dst = src */
static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
const u8 src[], bool dstk,
bool sstk, u8 **pprog)
bool sstk, u8 **pprog,
const struct bpf_prog_aux *aux)
{
emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
if (is64)
/* complete 8 byte move */
emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
else
else if (!aux->verifier_zext)
/* zero out high 4 bytes */
emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
}
@ -313,7 +314,8 @@ static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
}
static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
bool dstk, u8 **pprog)
bool dstk, u8 **pprog,
const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
int cnt = 0;
@ -334,12 +336,14 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
*/
EMIT2(0x0F, 0xB7);
EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
if (!aux->verifier_zext)
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 32:
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
if (!aux->verifier_zext)
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 64:
/* nop */
@ -358,7 +362,8 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
}
static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
bool dstk, u8 **pprog)
bool dstk, u8 **pprog,
const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
int cnt = 0;
@ -380,16 +385,18 @@ static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
EMIT2(0x0F, 0xB7);
EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
if (!aux->verifier_zext)
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 32:
/* Emit 'bswap eax' to swap lower 4 bytes */
EMIT1(0x0F);
EMIT1(add_1reg(0xC8, dreg_lo));
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
if (!aux->verifier_zext)
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 64:
/* Emit 'bswap eax' to swap lower 4 bytes */
@ -569,7 +576,7 @@ static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
const u8 dst[], const u8 src[],
bool dstk, bool sstk,
u8 **pprog)
u8 **pprog, const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
@ -577,7 +584,7 @@ static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
if (is64)
emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
&prog);
else
else if (!aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
*pprog = prog;
}
@ -668,7 +675,8 @@ static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
/* ALU operation (64 bit) */
static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
const u8 dst[], const u32 val,
bool dstk, u8 **pprog)
bool dstk, u8 **pprog,
const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
u32 hi = 0;
@ -679,7 +687,7 @@ static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
if (is64)
emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
else
else if (!aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
*pprog = prog;
@ -1713,8 +1721,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU64 | BPF_MOV | BPF_X:
switch (BPF_SRC(code)) {
case BPF_X:
emit_ia32_mov_r64(is64, dst, src, dstk,
sstk, &prog);
if (imm32 == 1) {
/* Special mov32 for zext. */
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
}
emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
&prog, bpf_prog->aux);
break;
case BPF_K:
/* Sign-extend immediate value to dst reg */
@ -1754,11 +1767,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
switch (BPF_SRC(code)) {
case BPF_X:
emit_ia32_alu_r64(is64, BPF_OP(code), dst,
src, dstk, sstk, &prog);
src, dstk, sstk, &prog,
bpf_prog->aux);
break;
case BPF_K:
emit_ia32_alu_i64(is64, BPF_OP(code), dst,
imm32, dstk, &prog);
imm32, dstk, &prog,
bpf_prog->aux);
break;
}
break;
@ -1777,7 +1792,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
false, &prog);
break;
}
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
if (!bpf_prog->aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU | BPF_RSH | BPF_X:
@ -1797,7 +1813,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
&prog);
break;
}
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
if (!bpf_prog->aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = dst / src(imm) */
/* dst = dst % src(imm) */
@ -1819,7 +1836,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
&prog);
break;
}
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
if (!bpf_prog->aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
case BPF_ALU64 | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_X:
@ -1836,7 +1854,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
false, &prog);
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
if (!bpf_prog->aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = dst << imm */
case BPF_ALU64 | BPF_LSH | BPF_K:
@ -1872,7 +1891,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_NEG:
emit_ia32_alu_i(is64, false, BPF_OP(code),
dst_lo, 0, dstk, &prog);
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
if (!bpf_prog->aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = ~dst (64 bit) */
case BPF_ALU64 | BPF_NEG:
@ -1892,11 +1912,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
/* dst = htole(dst) */
case BPF_ALU | BPF_END | BPF_FROM_LE:
emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
bpf_prog->aux);
break;
/* dst = htobe(dst) */
case BPF_ALU | BPF_END | BPF_FROM_BE:
emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
bpf_prog->aux);
break;
/* dst = imm64 */
case BPF_LD | BPF_IMM | BPF_DW: {
@ -2051,6 +2073,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_B:
case BPF_H:
case BPF_W:
if (!bpf_prog->aux->verifier_zext)
break;
if (dstk) {
EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
STACK_VAR(dst_hi));
@ -2475,6 +2499,11 @@ notyet:
return proglen;
}
bool bpf_jit_needs_zext(void)
{
return true;
}
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;

View File

@ -8,6 +8,9 @@
#include <linux/bpf_lirc.h>
#include "rc-core-priv.h"
#define lirc_rcu_dereference(p) \
rcu_dereference_protected(p, lockdep_is_held(&ir_raw_handler_lock))
/*
* BPF interface for raw IR
*/
@ -136,7 +139,7 @@ const struct bpf_verifier_ops lirc_mode2_verifier_ops = {
static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
{
struct bpf_prog_array __rcu *old_array;
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
struct ir_raw_event_ctrl *raw;
int ret;
@ -154,12 +157,12 @@ static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
goto unlock;
}
if (raw->progs && bpf_prog_array_length(raw->progs) >= BPF_MAX_PROGS) {
old_array = lirc_rcu_dereference(raw->progs);
if (old_array && bpf_prog_array_length(old_array) >= BPF_MAX_PROGS) {
ret = -E2BIG;
goto unlock;
}
old_array = raw->progs;
ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
if (ret < 0)
goto unlock;
@ -174,7 +177,7 @@ unlock:
static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
{
struct bpf_prog_array __rcu *old_array;
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
struct ir_raw_event_ctrl *raw;
int ret;
@ -192,7 +195,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
goto unlock;
}
old_array = raw->progs;
old_array = lirc_rcu_dereference(raw->progs);
ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array);
/*
* Do not use bpf_prog_array_delete_safe() as we would end up
@ -223,21 +226,22 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
/*
* This should be called once the rc thread has been stopped, so there can be
* no concurrent bpf execution.
*
* Should be called with the ir_raw_handler_lock held.
*/
void lirc_bpf_free(struct rc_dev *rcdev)
{
struct bpf_prog_array_item *item;
struct bpf_prog_array *array;
if (!rcdev->raw->progs)
array = lirc_rcu_dereference(rcdev->raw->progs);
if (!array)
return;
item = rcu_dereference(rcdev->raw->progs)->items;
while (item->prog) {
for (item = array->items; item->prog; item++)
bpf_prog_put(item->prog);
item++;
}
bpf_prog_array_free(rcdev->raw->progs);
bpf_prog_array_free(array);
}
int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
@ -290,7 +294,7 @@ int lirc_prog_detach(const union bpf_attr *attr)
int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
{
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
struct bpf_prog_array __rcu *progs;
struct bpf_prog_array *progs;
struct rc_dev *rcdev;
u32 cnt, flags = 0;
int ret;
@ -311,7 +315,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
if (ret)
goto put;
progs = rcdev->raw->progs;
progs = lirc_rcu_dereference(rcdev->raw->progs);
cnt = progs ? bpf_prog_array_length(progs) : 0;
if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) {

View File

@ -622,6 +622,13 @@ static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
}
}
static void
wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst)
{
if (meta->flags & FLAG_INSN_DO_ZEXT)
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
}
static void
wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
enum nfp_relo_type relo)
@ -858,7 +865,8 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
}
static int
data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset,
u8 dst_gpr, int size)
{
unsigned int i;
u16 shift, sz;
@ -881,14 +889,15 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
if (i < 2)
wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
wrp_zext(nfp_prog, meta, dst_gpr);
return 0;
}
static int
data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
swreg lreg, swreg rreg, int size, enum cmd_mode mode)
data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
u8 dst_gpr, swreg lreg, swreg rreg, int size,
enum cmd_mode mode)
{
unsigned int i;
u8 mask, sz;
@ -911,33 +920,34 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
if (i < 2)
wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
wrp_zext(nfp_prog, meta, dst_gpr);
return 0;
}
static int
data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
u8 dst_gpr, u8 size)
data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
{
return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
size, CMD_MODE_32b);
return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr),
offset, size, CMD_MODE_32b);
}
static int
data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
u8 dst_gpr, u8 size)
data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
{
swreg rega, regb;
addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb,
size, CMD_MODE_40b_BA);
}
static int
construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
u16 offset, u16 src, u8 size)
{
swreg tmp_reg;
@ -953,10 +963,12 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
/* Load data */
return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size);
}
static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
static int
construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
u16 offset, u8 size)
{
swreg tmp_reg;
@ -967,7 +979,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
/* Load data */
tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
return data_ld(nfp_prog, tmp_reg, 0, size);
return data_ld(nfp_prog, meta, tmp_reg, 0, size);
}
static int
@ -1204,7 +1216,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
}
if (clr_gpr && size < 8)
wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
wrp_zext(nfp_prog, meta, gpr);
while (size) {
u32 slice_end;
@ -1305,9 +1317,10 @@ wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
enum alu_op alu_op)
{
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
@ -1319,7 +1332,7 @@ wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
@ -2396,12 +2409,14 @@ static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
u8 dst = meta->insn.dst_reg * 2;
emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
static int
__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
u8 shift_amt)
{
if (shift_amt) {
/* Set signedness bit (MSB of result). */
@ -2410,7 +2425,7 @@ static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
reg_b(dst), SHF_SC_R_SHF, shift_amt);
}
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
@ -2425,7 +2440,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
umin = meta->umin_src;
umax = meta->umax_src;
if (umin == umax)
return __ashr_imm(nfp_prog, dst, umin);
return __ashr_imm(nfp_prog, meta, dst, umin);
src = insn->src_reg * 2;
/* NOTE: the first insn will set both indirect shift amount (source A)
@ -2434,7 +2449,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
reg_b(dst), SHF_SC_R_SHF);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
@ -2444,15 +2459,17 @@ static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
return __ashr_imm(nfp_prog, dst, insn->imm);
return __ashr_imm(nfp_prog, meta, dst, insn->imm);
}
static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
static int
__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
u8 shift_amt)
{
if (shift_amt)
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_SHF, shift_amt);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
@ -2461,7 +2478,7 @@ static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
return __shr_imm(nfp_prog, dst, insn->imm);
return __shr_imm(nfp_prog, meta, dst, insn->imm);
}
static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@ -2474,22 +2491,24 @@ static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
umin = meta->umin_src;
umax = meta->umax_src;
if (umin == umax)
return __shr_imm(nfp_prog, dst, umin);
return __shr_imm(nfp_prog, meta, dst, umin);
src = insn->src_reg * 2;
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_SHF);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
static int
__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
u8 shift_amt)
{
if (shift_amt)
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF, shift_amt);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
@ -2498,7 +2517,7 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
return __shl_imm(nfp_prog, dst, insn->imm);
return __shl_imm(nfp_prog, meta, dst, insn->imm);
}
static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@ -2511,11 +2530,11 @@ static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
umin = meta->umin_src;
umax = meta->umax_src;
if (umin == umax)
return __shl_imm(nfp_prog, dst, umin);
return __shl_imm(nfp_prog, meta, dst, umin);
src = insn->src_reg * 2;
shl_reg64_lt32_low(nfp_prog, dst, src);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
wrp_zext(nfp_prog, meta, dst);
return 0;
}
@ -2577,34 +2596,34 @@ static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ld(nfp_prog, meta->insn.imm, 1);
return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1);
}
static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ld(nfp_prog, meta->insn.imm, 2);
return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2);
}
static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ld(nfp_prog, meta->insn.imm, 4);
return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4);
}
static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ind_ld(nfp_prog, meta->insn.imm,
return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
meta->insn.src_reg * 2, 1);
}
static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ind_ld(nfp_prog, meta->insn.imm,
return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
meta->insn.src_reg * 2, 2);
}
static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ind_ld(nfp_prog, meta->insn.imm,
return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
meta->insn.src_reg * 2, 4);
}
@ -2682,7 +2701,7 @@ mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2,
tmp_reg, meta->insn.dst_reg * 2, size);
}
@ -2694,7 +2713,7 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2,
tmp_reg, meta->insn.dst_reg * 2, size);
}
@ -2755,7 +2774,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
if (!len_mid) {
wrp_immed(nfp_prog, dst_hi, 0);
wrp_zext(nfp_prog, meta, dst_gpr);
return 0;
}
@ -2763,7 +2782,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
if (size <= REG_WIDTH) {
wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
wrp_immed(nfp_prog, dst_hi, 0);
wrp_zext(nfp_prog, meta, dst_gpr);
} else {
swreg src_hi = reg_xfer(idx + 2);
@ -2794,10 +2813,10 @@ mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
if (size < REG_WIDTH) {
wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
wrp_immed(nfp_prog, dst_hi, 0);
wrp_zext(nfp_prog, meta, dst_gpr);
} else if (size == REG_WIDTH) {
wrp_mov(nfp_prog, dst_lo, src_lo);
wrp_immed(nfp_prog, dst_hi, 0);
wrp_zext(nfp_prog, meta, dst_gpr);
} else {
swreg src_hi = reg_xfer(idx + 1);

View File

@ -238,6 +238,8 @@ struct nfp_bpf_reg_state {
#define FLAG_INSN_SKIP_PREC_DEPENDENT BIT(4)
/* Instruction is optimized by the verifier */
#define FLAG_INSN_SKIP_VERIFIER_OPT BIT(5)
/* Instruction needs to zero extend to high 32-bit */
#define FLAG_INSN_DO_ZEXT BIT(6)
#define FLAG_INSN_SKIP_MASK (FLAG_INSN_SKIP_NOOP | \
FLAG_INSN_SKIP_PREC_DEPENDENT | \

View File

@ -744,6 +744,17 @@ continue_subprog:
goto continue_subprog;
}
static void nfp_bpf_insn_flag_zext(struct nfp_prog *nfp_prog,
struct bpf_insn_aux_data *aux)
{
struct nfp_insn_meta *meta;
list_for_each_entry(meta, &nfp_prog->insns, l) {
if (aux[meta->n].zext_dst)
meta->flags |= FLAG_INSN_DO_ZEXT;
}
}
int nfp_bpf_finalize(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *info;
@ -784,6 +795,7 @@ int nfp_bpf_finalize(struct bpf_verifier_env *env)
return -EOPNOTSUPP;
}
nfp_bpf_insn_flag_zext(nfp_prog, env->insn_aux_data);
return 0;
}

View File

@ -6,6 +6,7 @@
#include <linux/errno.h>
#include <linux/jump_label.h>
#include <linux/percpu.h>
#include <linux/percpu-refcount.h>
#include <linux/rbtree.h>
#include <uapi/linux/bpf.h>
@ -71,11 +72,17 @@ struct cgroup_bpf {
u32 flags[MAX_BPF_ATTACH_TYPE];
/* temp storage for effective prog array used by prog_attach/detach */
struct bpf_prog_array __rcu *inactive;
struct bpf_prog_array *inactive;
/* reference counter used to detach bpf programs after cgroup removal */
struct percpu_ref refcnt;
/* cgroup_bpf is released using a work queue */
struct work_struct release_work;
};
void cgroup_bpf_put(struct cgroup *cgrp);
int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags);
@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
struct bpf_prog;
struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype,

View File

@ -66,6 +66,11 @@ struct bpf_map_ops {
u64 imm, u32 *off);
};
struct bpf_map_memory {
u32 pages;
struct user_struct *user;
};
struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
@ -86,7 +91,7 @@ struct bpf_map {
u32 btf_key_type_id;
u32 btf_value_type_id;
struct btf *btf;
u32 pages;
struct bpf_map_memory memory;
bool unpriv_array;
bool frozen; /* write-once */
/* 48 bytes hole */
@ -94,8 +99,7 @@ struct bpf_map {
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
atomic_t refcnt;
atomic_t refcnt ____cacheline_aligned;
atomic_t usercnt;
struct work_struct work;
char name[BPF_OBJ_NAME_LEN];
@ -370,6 +374,7 @@ struct bpf_prog_aux {
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested;
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
@ -513,17 +518,17 @@ struct bpf_prog_array {
};
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
void bpf_prog_array_free(struct bpf_prog_array *progs);
int bpf_prog_array_length(struct bpf_prog_array *progs);
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
__u32 __user *prog_ids, u32 cnt);
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
struct bpf_prog *old_prog);
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
int bpf_prog_array_copy_info(struct bpf_prog_array *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);
@ -551,6 +556,56 @@ _out: \
_ret; \
})
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
* so BPF programs can request cwr for TCP packets.
*
* Current cgroup skb programs can only return 0 or 1 (0 to drop the
* packet. This macro changes the behavior so the low order bit
* indicates whether the packet should be dropped (0) or not (1)
* and the next bit is a congestion notification bit. This could be
* used by TCP to call tcp_enter_cwr()
*
* Hence, new allowed return values of CGROUP EGRESS BPF programs are:
* 0: drop packet
* 1: keep packet
* 2: drop packet and cn
* 3: keep packet and cn
*
* This macro then converts it to one of the NET_XMIT or an error
* code that is then interpreted as drop packet (and no cn):
* 0: NET_XMIT_SUCCESS skb should be transmitted
* 1: NET_XMIT_DROP skb should be dropped and cn
* 2: NET_XMIT_CN skb should be transmitted and cn
* 3: -EPERM skb should be dropped
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 ret; \
u32 _ret = 1; \
u32 _cn = 0; \
preempt_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \
ret = func(_prog, ctx); \
_ret &= (ret & 1); \
_cn |= (ret & 2); \
_item++; \
} \
rcu_read_unlock(); \
preempt_enable(); \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
_ret = (_cn ? NET_XMIT_DROP : -EPERM); \
_ret; \
})
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
__BPF_PROG_RUN_ARRAY(array, ctx, func, false)
@ -595,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size);
void bpf_map_charge_finish(struct bpf_map_memory *mem);
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src);
void *bpf_map_area_alloc(size_t size, int numa_node);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);

View File

@ -36,9 +36,11 @@
*/
enum bpf_reg_liveness {
REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */
REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
};
struct bpf_reg_state {
@ -131,6 +133,11 @@ struct bpf_reg_state {
* pointing to bpf_func_state.
*/
u32 frameno;
/* Tracks subreg definition. The stored value is the insn_idx of the
* writing insn. This is safe because subreg_def is used before any insn
* patching which only happens after main verification finished.
*/
s32 subreg_def;
enum bpf_reg_liveness live;
};
@ -187,6 +194,7 @@ struct bpf_func_state {
struct bpf_verifier_state {
/* call stack tracking */
struct bpf_func_state *frame[MAX_CALL_FRAMES];
u32 insn_idx;
u32 curframe;
u32 active_spin_lock;
bool speculative;
@ -232,7 +240,9 @@ struct bpf_insn_aux_data {
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
int sanitize_stack_off; /* stack slot to be cleared */
bool seen; /* this insn was processed by the verifier */
bool zext_dst; /* this insn zero extends dst reg */
u8 alu_state; /* used in combination with alu_limit */
bool prune_point;
unsigned int orig_idx; /* original instruction index */
};

View File

@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
#endif /* !CONFIG_CGROUPS */
#ifdef CONFIG_CGROUP_BPF
static inline void cgroup_bpf_get(struct cgroup *cgrp)
{
percpu_ref_get(&cgrp->bpf.refcnt);
}
static inline void cgroup_bpf_put(struct cgroup *cgrp)
{
percpu_ref_put(&cgrp->bpf.refcnt);
}
#else /* CONFIG_CGROUP_BPF */
static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
#endif /* CONFIG_CGROUP_BPF */
#endif /* _LINUX_CGROUP_H */

View File

@ -160,6 +160,20 @@ struct ctl_table_header;
.off = 0, \
.imm = IMM })
/* Special form of mov32, used for doing explicit zero extension on dst. */
#define BPF_ZEXT_REG(DST) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_MOV | BPF_X, \
.dst_reg = DST, \
.src_reg = DST, \
.off = 0, \
.imm = 1 })
static inline bool insn_is_zext(const struct bpf_insn *insn)
{
return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1;
}
/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
#define BPF_LD_IMM64(DST, IMM) \
BPF_LD_IMM64_RAW(DST, 0, IMM)
@ -512,7 +526,8 @@ struct bpf_prog {
blinded:1, /* Was blinded */
is_func:1, /* program is a bpf function */
kprobe_override:1, /* Do we override a kprobe? */
has_callchain_buf:1; /* callchain buffer allocated? */
has_callchain_buf:1, /* callchain buffer allocated? */
enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
@ -811,6 +826,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_helper_changes_pkt_data(void *func);
static inline bool bpf_dump_raw_ok(void)

View File

@ -260,6 +260,24 @@ enum bpf_attach_type {
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
* Verifier does sub-register def/use analysis and identifies instructions whose
* def only matters for low 32-bit, high 32-bit is never referenced later
* through implicit zero extension. Therefore verifier notifies JIT back-ends
* that it is safe to ignore clearing high 32-bit for these instructions. This
* saves some back-ends a lot of code-gen. However such optimization is not
* necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
* hence hasn't used verifier's analysis result. But, we really want to have a
* way to be able to verify the correctness of the described optimization on
* x86_64 on which testsuites are frequently exercised.
*
* So, this flag is introduced. Once it is set, verifier will randomize high
* 32-bit for those instructions who has been identified as safe to ignore them.
* Then, if verifier is not doing correct analysis, such randomization will
* regress tests to expose bugs.
*/
#define BPF_F_TEST_RND_HI32 (1U << 2)
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
@ -2672,6 +2690,20 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
*
* int bpf_send_signal(u32 sig)
* Description
* Send signal *sig* to the current task.
* Return
* 0 on success or successfully queued.
*
* **-EBUSY** if work queue under nmi is full.
*
* **-EINVAL** if *sig* is invalid.
*
* **-EPERM** if no permission to send the *sig*.
*
* **-EAGAIN** if bpf program can try again.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2782,7 +2814,8 @@ union bpf_attr {
FN(strtol), \
FN(strtoul), \
FN(sk_storage_get), \
FN(sk_storage_delete),
FN(sk_storage_delete), \
FN(send_signal),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call

View File

@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
u64 cost, array_size, mask64;
struct bpf_map_memory mem;
struct bpf_array *array;
elem_size = round_up(attr->value_size, 8);
@ -116,32 +117,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
if (percpu) {
if (percpu)
cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
}
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
ret = bpf_map_precharge_memlock(cost);
ret = bpf_map_charge_init(&mem, cost);
if (ret < 0)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
array->map.pages = cost;
bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}

View File

@ -22,13 +22,23 @@
DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
/**
* cgroup_bpf_put() - put references of all bpf programs
* @cgrp: the cgroup to modify
*/
void cgroup_bpf_put(struct cgroup *cgrp)
void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
percpu_ref_kill(&cgrp->bpf.refcnt);
}
/**
* cgroup_bpf_release() - put references of all bpf programs and
* release all cgroup bpf data
* @work: work structure embedded into the cgroup to modify
*/
static void cgroup_bpf_release(struct work_struct *work)
{
struct cgroup *cgrp = container_of(work, struct cgroup,
bpf.release_work);
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *old_array;
unsigned int type;
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp)
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
bpf_prog_array_free(cgrp->bpf.effective[type]);
old_array = rcu_dereference_protected(
cgrp->bpf.effective[type],
percpu_ref_is_dying(&cgrp->bpf.refcnt));
bpf_prog_array_free(old_array);
}
percpu_ref_exit(&cgrp->bpf.refcnt);
cgroup_put(cgrp);
}
/**
* cgroup_bpf_release_fn() - callback used to schedule releasing
* of bpf cgroup data
* @ref: percpu ref counter structure
*/
static void cgroup_bpf_release_fn(struct percpu_ref *ref)
{
struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
queue_work(system_wq, &cgrp->bpf.release_work);
}
/* count number of elements in the list.
@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
*/
static int compute_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array __rcu **array)
struct bpf_prog_array **array)
{
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *progs;
@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
}
} while ((p = cgroup_parent(p)));
rcu_assign_pointer(*array, progs);
*array = progs;
return 0;
}
static void activate_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array __rcu *array)
struct bpf_prog_array *old_array)
{
struct bpf_prog_array __rcu *old_array;
old_array = xchg(&cgrp->bpf.effective[type], array);
rcu_swap_protected(cgrp->bpf.effective[type], old_array,
lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
*/
@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
* that array below is variable length
*/
#define NR ARRAY_SIZE(cgrp->bpf.effective)
struct bpf_prog_array __rcu *arrays[NR] = {};
int i;
struct bpf_prog_array *arrays[NR] = {};
int ret, i;
ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
GFP_KERNEL);
if (ret)
return ret;
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
cleanup:
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
percpu_ref_exit(&cgrp->bpf.refcnt);
return -ENOMEM;
}
@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
enum bpf_attach_type type = attr->query.attach_type;
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
struct bpf_prog_array *effective;
int cnt, ret = 0, i;
effective = rcu_dereference_protected(cgrp->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
cnt = bpf_prog_array_length(effective);
else
cnt = prog_list_length(progs);
@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
}
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
prog_ids, cnt);
return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
} else {
struct bpf_prog_list *pl;
u32 id;
@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* The program type passed in via @type must be suitable for network
* filtering. No further check is performed to assert that.
*
* This function will return %-EPERM if any if an attached program was found
* and if it returned != 1 during execution. In all other cases, 0 is returned.
* For egress packets, this function can return:
* NET_XMIT_SUCCESS (0) - continue with packet output
* NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
* NET_XMIT_CN (2) - continue with packet output and notify TCP
* to call cwr
* -EPERM - drop packet
*
* For ingress packets, this function will return -EPERM if any
* attached program was found and if it returned != 1 during execution.
* Otherwise 0 is returned.
*/
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
/* compute pointers for the bpf prog */
bpf_compute_and_save_data_end(skb, &saved_data_end);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
__bpf_prog_run_save_cb);
if (type == BPF_CGROUP_INET_EGRESS) {
ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
} else {
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
__bpf_prog_run_save_cb);
ret = (ret == 1 ? 0 : -EPERM);
}
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);

View File

@ -1795,38 +1795,33 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
return &empty_prog_array.hdr;
}
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
void bpf_prog_array_free(struct bpf_prog_array *progs)
{
if (!progs ||
progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
if (!progs || progs == &empty_prog_array.hdr)
return;
kfree_rcu(progs, rcu);
}
int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
int bpf_prog_array_length(struct bpf_prog_array *array)
{
struct bpf_prog_array_item *item;
u32 cnt = 0;
rcu_read_lock();
item = rcu_dereference(array)->items;
for (; item->prog; item++)
for (item = array->items; item->prog; item++)
if (item->prog != &dummy_bpf_prog.prog)
cnt++;
rcu_read_unlock();
return cnt;
}
static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
u32 *prog_ids,
u32 request_cnt)
{
struct bpf_prog_array_item *item;
int i = 0;
item = rcu_dereference_check(array, 1)->items;
for (; item->prog; item++) {
for (item = array->items; item->prog; item++) {
if (item->prog == &dummy_bpf_prog.prog)
continue;
prog_ids[i] = item->prog->aux->id;
@ -1839,7 +1834,7 @@ static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
return !!(item->prog);
}
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
__u32 __user *prog_ids, u32 cnt)
{
unsigned long err = 0;
@ -1850,18 +1845,12 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
* cnt = bpf_prog_array_length();
* if (cnt > 0)
* bpf_prog_array_copy_to_user(..., cnt);
* so below kcalloc doesn't need extra cnt > 0 check, but
* bpf_prog_array_length() releases rcu lock and
* prog array could have been swapped with empty or larger array,
* so always copy 'cnt' prog_ids to the user.
* In a rare race the user will see zero prog_ids
* so below kcalloc doesn't need extra cnt > 0 check.
*/
ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
if (!ids)
return -ENOMEM;
rcu_read_lock();
nospc = bpf_prog_array_copy_core(array, ids, cnt);
rcu_read_unlock();
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
kfree(ids);
if (err)
@ -1871,19 +1860,19 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
return 0;
}
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
struct bpf_prog *old_prog)
{
struct bpf_prog_array_item *item = array->items;
struct bpf_prog_array_item *item;
for (; item->prog; item++)
for (item = array->items; item->prog; item++)
if (item->prog == old_prog) {
WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
break;
}
}
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array)
@ -1947,7 +1936,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
return 0;
}
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
int bpf_prog_array_copy_info(struct bpf_prog_array *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt)
{
@ -2090,6 +2079,15 @@ bool __weak bpf_helper_changes_pkt_data(void *func)
return false;
}
/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
* analysis code and wants explicit zero extension inserted by verifier.
* Otherwise, return FALSE.
*/
bool __weak bpf_jit_needs_zext(void)
{
return false;
}
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
*/

View File

@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_cmap;
cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */
ret = bpf_map_precharge_memlock(cmap->map.pages);
ret = bpf_map_charge_init(&cmap->map.memory, cost);
if (ret) {
err = ret;
goto free_cmap;
@ -121,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
__alignof__(unsigned long));
if (!cmap->flush_needed)
goto free_cmap;
goto free_charge;
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
@ -133,6 +130,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
return &cmap->map;
free_percpu:
free_percpu(cmap->flush_needed);
free_charge:
bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
kfree(cmap);
return ERR_PTR(err);

View File

@ -108,13 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
cost += dev_map_bitmap_size(attr) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_dtab;
dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* if map size is larger than memlock limit, reject it early */
err = bpf_map_precharge_memlock(dtab->map.pages);
/* if map size is larger than memlock limit, reject it */
err = bpf_map_charge_init(&dtab->map.memory, cost);
if (err)
goto free_dtab;
@ -125,19 +121,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
__alignof__(unsigned long),
GFP_KERNEL | __GFP_NOWARN);
if (!dtab->flush_needed)
goto free_dtab;
goto free_charge;
dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
goto free_dtab;
goto free_charge;
spin_lock(&dev_map_lock);
list_add_tail_rcu(&dtab->list, &dev_map_list);
spin_unlock(&dev_map_lock);
return &dtab->map;
free_charge:
bpf_map_charge_finish(&dtab->map.memory);
free_dtab:
free_percpu(dtab->flush_needed);
kfree(dtab);

View File

@ -360,14 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else
cost += (u64) htab->elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
/* make sure page count doesn't overflow */
goto free_htab;
htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* if map size is larger than memlock limit, reject it early */
err = bpf_map_precharge_memlock(htab->map.pages);
/* if map size is larger than memlock limit, reject it */
err = bpf_map_charge_init(&htab->map.memory, cost);
if (err)
goto free_htab;
@ -376,7 +370,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
goto free_htab;
goto free_charge;
if (htab->map.map_flags & BPF_F_ZERO_SEED)
htab->hashrnd = 0;
@ -409,6 +403,8 @@ free_prealloc:
prealloc_destroy(htab);
free_buckets:
bpf_map_area_free(htab->buckets);
free_charge:
bpf_map_charge_finish(&htab->map.memory);
free_htab:
kfree(htab);
return ERR_PTR(err);

View File

@ -272,6 +272,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
struct bpf_map_memory mem;
int ret;
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
return ERR_PTR(-EINVAL);
@ -290,13 +292,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);
ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
if (ret < 0)
return ERR_PTR(ret);
map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
__GFP_ZERO | GFP_USER, numa_node);
if (!map)
if (!map) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
PAGE_SIZE) >> PAGE_SHIFT;
bpf_map_charge_move(&map->map.memory, &mem);
/* copy mandatory map attributes */
bpf_map_init_from_attr(&map->map, attr);

View File

@ -573,14 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
cost_per_node = sizeof(struct lpm_trie_node) +
attr->value_size + trie->data_size;
cost += (u64) attr->max_entries * cost_per_node;
if (cost >= U32_MAX - PAGE_SIZE) {
ret = -E2BIG;
goto out_err;
}
trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
ret = bpf_map_precharge_memlock(trie->map.pages);
ret = bpf_map_charge_init(&trie->map.memory, cost);
if (ret)
goto out_err;

View File

@ -67,29 +67,28 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
{
int ret, numa_node = bpf_map_attr_numa_node(attr);
struct bpf_map_memory mem = {0};
struct bpf_queue_stack *qs;
u64 size, queue_size, cost;
size = (u64) attr->max_entries + 1;
cost = queue_size = sizeof(*qs) + size * attr->value_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-E2BIG);
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
ret = bpf_map_precharge_memlock(cost);
ret = bpf_map_charge_init(&mem, cost);
if (ret < 0)
return ERR_PTR(ret);
qs = bpf_map_area_alloc(queue_size, numa_node);
if (!qs)
if (!qs) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
memset(qs, 0, sizeof(*qs));
bpf_map_init_from_attr(&qs->map, attr);
qs->map.pages = cost;
bpf_map_charge_move(&qs->map.memory, &mem);
qs->size = size;
raw_spin_lock_init(&qs->lock);

View File

@ -151,7 +151,8 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
{
int err, numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array;
u64 cost, array_size;
struct bpf_map_memory mem;
u64 array_size;
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
@ -159,24 +160,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
array_size = sizeof(*array);
array_size += (u64)attr->max_entries * sizeof(struct sock *);
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
err = bpf_map_precharge_memlock(cost);
err = bpf_map_charge_init(&mem, array_size);
if (err)
return ERR_PTR(err);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
array->map.pages = cost;
bpf_map_charge_move(&array->map.memory, &mem);
return &array->map;
}

View File

@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
struct bpf_stack_map *smap;
struct bpf_map_memory mem;
u64 cost, n_buckets;
int err;
@ -116,40 +117,37 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
n_buckets = roundup_pow_of_two(attr->max_entries);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-E2BIG);
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
err = bpf_map_charge_init(&mem, cost);
if (err)
return ERR_PTR(err);
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
if (!smap)
if (!smap) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
err = -E2BIG;
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
if (cost >= U32_MAX - PAGE_SIZE)
goto free_smap;
}
bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
err = bpf_map_precharge_memlock(smap->map.pages);
if (err)
goto free_smap;
err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
goto free_smap;
goto free_charge;
err = prealloc_elems_and_freelist(smap);
if (err)
goto put_buffers;
bpf_map_charge_move(&smap->map.memory, &mem);
return &smap->map;
put_buffers:
put_callchain_buffers();
free_smap:
free_charge:
bpf_map_charge_finish(&mem);
bpf_map_area_free(smap);
return ERR_PTR(err);
}

View File

@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
map->numa_node = bpf_map_attr_numa_node(attr);
}
int bpf_map_precharge_memlock(u32 pages)
{
struct user_struct *user = get_current_user();
unsigned long memlock_limit, cur;
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
cur = atomic_long_read(&user->locked_vm);
free_uid(user);
if (cur + pages > memlock_limit)
return -EPERM;
return 0;
}
static int bpf_charge_memlock(struct user_struct *user, u32 pages)
{
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@ -214,45 +201,62 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages)
static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
{
atomic_long_sub(pages, &user->locked_vm);
if (user)
atomic_long_sub(pages, &user->locked_vm);
}
static int bpf_map_init_memlock(struct bpf_map *map)
int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size)
{
struct user_struct *user = get_current_user();
u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
struct user_struct *user;
int ret;
ret = bpf_charge_memlock(user, map->pages);
if (size >= U32_MAX - PAGE_SIZE)
return -E2BIG;
user = get_current_user();
ret = bpf_charge_memlock(user, pages);
if (ret) {
free_uid(user);
return ret;
}
map->user = user;
return ret;
mem->pages = pages;
mem->user = user;
return 0;
}
static void bpf_map_release_memlock(struct bpf_map *map)
void bpf_map_charge_finish(struct bpf_map_memory *mem)
{
struct user_struct *user = map->user;
bpf_uncharge_memlock(user, map->pages);
free_uid(user);
bpf_uncharge_memlock(mem->user, mem->pages);
free_uid(mem->user);
}
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src)
{
*dst = *src;
/* Make sure src will not be used for the redundant uncharging. */
memset(src, 0, sizeof(struct bpf_map_memory));
}
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
{
int ret;
ret = bpf_charge_memlock(map->user, pages);
ret = bpf_charge_memlock(map->memory.user, pages);
if (ret)
return ret;
map->pages += pages;
map->memory.pages += pages;
return ret;
}
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
{
bpf_uncharge_memlock(map->user, pages);
map->pages -= pages;
bpf_uncharge_memlock(map->memory.user, pages);
map->memory.pages -= pages;
}
static int bpf_map_alloc_id(struct bpf_map *map)
@ -303,11 +307,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
struct bpf_map_memory mem;
bpf_map_release_memlock(map);
bpf_map_charge_move(&mem, &map->memory);
security_bpf_map_free(map);
/* implementation dependent freeing */
map->ops->map_free(map);
bpf_map_charge_finish(&mem);
}
static void bpf_map_put_uref(struct bpf_map *map)
@ -395,7 +401,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->value_size,
map->max_entries,
map->map_flags,
map->pages * 1ULL << PAGE_SHIFT,
map->memory.pages * 1ULL << PAGE_SHIFT,
map->id,
READ_ONCE(map->frozen));
@ -549,6 +555,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_map_memory mem;
struct bpf_map *map;
int f_flags;
int err;
@ -573,7 +580,7 @@ static int map_create(union bpf_attr *attr)
err = bpf_obj_name_cpy(map->name, attr->map_name);
if (err)
goto free_map_nouncharge;
goto free_map;
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
@ -583,20 +590,20 @@ static int map_create(union bpf_attr *attr)
if (!attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
goto free_map;
}
btf = btf_get_by_fd(attr->btf_fd);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto free_map_nouncharge;
goto free_map;
}
err = map_check_btf(map, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err) {
btf_put(btf);
goto free_map_nouncharge;
goto free_map;
}
map->btf = btf;
@ -608,15 +615,11 @@ static int map_create(union bpf_attr *attr)
err = security_bpf_map_alloc(map);
if (err)
goto free_map_nouncharge;
err = bpf_map_init_memlock(map);
if (err)
goto free_map_sec;
goto free_map;
err = bpf_map_alloc_id(map);
if (err)
goto free_map;
goto free_map_sec;
err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
@ -632,13 +635,13 @@ static int map_create(union bpf_attr *attr)
return err;
free_map:
bpf_map_release_memlock(map);
free_map_sec:
security_bpf_map_free(map);
free_map_nouncharge:
free_map:
btf_put(map->btf);
bpf_map_charge_move(&mem, &map->memory);
map->ops->map_free(map);
bpf_map_charge_finish(&mem);
return err;
}
@ -1585,6 +1588,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
case BPF_PROG_TYPE_CGROUP_SKB:
switch (expected_attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
return 0;
default:
return -EINVAL;
}
default:
return 0;
}
@ -1604,7 +1615,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT))
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
BPF_F_ANY_ALIGNMENT |
BPF_F_TEST_RND_HI32))
return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@ -1834,6 +1847,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
return prog->enforce_expected_attach_type &&
prog->expected_attach_type != attach_type ?
-EINVAL : 0;
default:
return 0;
}

View File

@ -176,7 +176,7 @@ struct bpf_verifier_stack_elem {
struct bpf_verifier_stack_elem *next;
};
#define BPF_COMPLEXITY_LIMIT_STACK 1024
#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
#define BPF_COMPLEXITY_LIMIT_STATES 64
#define BPF_MAP_PTR_UNPRIV 1UL
@ -782,8 +782,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
if (err)
goto err;
elem->st.speculative |= speculative;
if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
verbose(env, "BPF program is too complex\n");
if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
verbose(env, "The sequence of %d jumps is too complex.\n",
env->stack_size);
goto err;
}
return &elem->st;
@ -981,6 +982,7 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
__mark_reg_not_init(regs + regno);
}
#define DEF_NOT_SUBREG (0)
static void init_reg_state(struct bpf_verifier_env *env,
struct bpf_func_state *state)
{
@ -991,6 +993,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
mark_reg_not_init(env, regs, i);
regs[i].live = REG_LIVE_NONE;
regs[i].parent = NULL;
regs[i].subreg_def = DEF_NOT_SUBREG;
}
/* frame pointer */
@ -1136,7 +1139,7 @@ next:
*/
static int mark_reg_read(struct bpf_verifier_env *env,
const struct bpf_reg_state *state,
struct bpf_reg_state *parent)
struct bpf_reg_state *parent, u8 flag)
{
bool writes = parent == state->parent; /* Observe write marks */
int cnt = 0;
@ -1151,17 +1154,26 @@ static int mark_reg_read(struct bpf_verifier_env *env,
parent->var_off.value, parent->off);
return -EFAULT;
}
if (parent->live & REG_LIVE_READ)
/* The first condition is more likely to be true than the
* second, checked it first.
*/
if ((parent->live & REG_LIVE_READ) == flag ||
parent->live & REG_LIVE_READ64)
/* The parentage chain never changes and
* this parent was already marked as LIVE_READ.
* There is no need to keep walking the chain again and
* keep re-marking all parents as LIVE_READ.
* This case happens when the same register is read
* multiple times without writes into it in-between.
* Also, if parent has the stronger REG_LIVE_READ64 set,
* then no need to set the weak REG_LIVE_READ32.
*/
break;
/* ... then we depend on parent's value */
parent->live |= REG_LIVE_READ;
parent->live |= flag;
/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
if (flag == REG_LIVE_READ64)
parent->live &= ~REG_LIVE_READ32;
state = parent;
parent = state->parent;
writes = true;
@ -1173,12 +1185,129 @@ static int mark_reg_read(struct bpf_verifier_env *env,
return 0;
}
/* This function is supposed to be used by the following 32-bit optimization
* code only. It returns TRUE if the source or destination register operates
* on 64-bit, otherwise return FALSE.
*/
static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
{
u8 code, class, op;
code = insn->code;
class = BPF_CLASS(code);
op = BPF_OP(code);
if (class == BPF_JMP) {
/* BPF_EXIT for "main" will reach here. Return TRUE
* conservatively.
*/
if (op == BPF_EXIT)
return true;
if (op == BPF_CALL) {
/* BPF to BPF call will reach here because of marking
* caller saved clobber with DST_OP_NO_MARK for which we
* don't care the register def because they are anyway
* marked as NOT_INIT already.
*/
if (insn->src_reg == BPF_PSEUDO_CALL)
return false;
/* Helper call will reach here because of arg type
* check, conservatively return TRUE.
*/
if (t == SRC_OP)
return true;
return false;
}
}
if (class == BPF_ALU64 || class == BPF_JMP ||
/* BPF_END always use BPF_ALU class. */
(class == BPF_ALU && op == BPF_END && insn->imm == 64))
return true;
if (class == BPF_ALU || class == BPF_JMP32)
return false;
if (class == BPF_LDX) {
if (t != SRC_OP)
return BPF_SIZE(code) == BPF_DW;
/* LDX source must be ptr. */
return true;
}
if (class == BPF_STX) {
if (reg->type != SCALAR_VALUE)
return true;
return BPF_SIZE(code) == BPF_DW;
}
if (class == BPF_LD) {
u8 mode = BPF_MODE(code);
/* LD_IMM64 */
if (mode == BPF_IMM)
return true;
/* Both LD_IND and LD_ABS return 32-bit data. */
if (t != SRC_OP)
return false;
/* Implicit ctx ptr. */
if (regno == BPF_REG_6)
return true;
/* Explicit source could be any width. */
return true;
}
if (class == BPF_ST)
/* The only source register for BPF_ST is a ptr. */
return true;
/* Conservatively return true at default. */
return true;
}
/* Return TRUE if INSN doesn't have explicit value define. */
static bool insn_no_def(struct bpf_insn *insn)
{
u8 class = BPF_CLASS(insn->code);
return (class == BPF_JMP || class == BPF_JMP32 ||
class == BPF_STX || class == BPF_ST);
}
/* Return TRUE if INSN has defined any 32-bit value explicitly. */
static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
if (insn_no_def(insn))
return false;
return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
}
static void mark_insn_zext(struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
s32 def_idx = reg->subreg_def;
if (def_idx == DEF_NOT_SUBREG)
return;
env->insn_aux_data[def_idx - 1].zext_dst = true;
/* The dst will be zero extended, so won't be sub-register anymore. */
reg->subreg_def = DEF_NOT_SUBREG;
}
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
enum reg_arg_type t)
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
struct bpf_reg_state *reg, *regs = state->regs;
bool rw64;
if (regno >= MAX_BPF_REG) {
verbose(env, "R%d is invalid\n", regno);
@ -1186,6 +1315,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
}
reg = &regs[regno];
rw64 = is_reg64(env, insn, regno, reg, t);
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
if (reg->type == NOT_INIT) {
@ -1196,7 +1326,11 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
if (regno == BPF_REG_FP)
return 0;
return mark_reg_read(env, reg, reg->parent);
if (rw64)
mark_insn_zext(env, reg);
return mark_reg_read(env, reg, reg->parent,
rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
} else {
/* check whether register used as dest operand can be written to */
if (regno == BPF_REG_FP) {
@ -1204,6 +1338,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
reg->live |= REG_LIVE_WRITTEN;
reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
if (t == DST_OP)
mark_reg_unknown(env, regs, regno);
}
@ -1383,7 +1518,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
}
mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
reg_state->stack[spi].spilled_ptr.parent);
reg_state->stack[spi].spilled_ptr.parent,
REG_LIVE_READ64);
return 0;
} else {
int zeros = 0;
@ -1400,7 +1536,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
return -EACCES;
}
mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
reg_state->stack[spi].spilled_ptr.parent);
reg_state->stack[spi].spilled_ptr.parent,
REG_LIVE_READ64);
if (value_regno >= 0) {
if (zeros == size) {
/* any size read into register is zero extended,
@ -2109,6 +2246,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
value_regno);
if (reg_type_may_be_null(reg_type))
regs[value_regno].id = ++env->id_gen;
/* A load of ctx field could have different
* actual load size with the one encoded in the
* insn. When the dst is PTR, it is for sure not
* a sub-register.
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
}
regs[value_regno].type = reg_type;
}
@ -2368,7 +2511,8 @@ mark:
* the whole slot to be marked as 'read'
*/
mark_reg_read(env, &state->stack[spi].spilled_ptr,
state->stack[spi].spilled_ptr.parent);
state->stack[spi].spilled_ptr.parent,
REG_LIVE_READ64);
}
return update_stack_depth(env, state, min_off);
}
@ -3332,6 +3476,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
}
/* helper call returns 64-bit value. */
regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
/* update return register (already marked as written above) */
if (fn->ret_type == RET_INTEGER) {
/* sets type to SCALAR_VALUE */
@ -4263,6 +4410,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
*/
*dst_reg = *src_reg;
dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = DEF_NOT_SUBREG;
} else {
/* R1 = (u32) R2 */
if (is_pointer_value(env, insn->src_reg)) {
@ -4273,6 +4421,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
} else if (src_reg->type == SCALAR_VALUE) {
*dst_reg = *src_reg;
dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = env->insn_idx + 1;
} else {
mark_reg_unknown(env, regs,
insn->dst_reg);
@ -5352,16 +5501,23 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
* Already marked as written above.
*/
mark_reg_unknown(env, regs, BPF_REG_0);
/* ld_abs load up to 32-bit skb data. */
regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
return 0;
}
static int check_return_code(struct bpf_verifier_env *env)
{
struct tnum enforce_attach_type_range = tnum_unknown;
struct bpf_reg_state *reg;
struct tnum range = tnum_range(0, 1);
switch (env->prog->type) {
case BPF_PROG_TYPE_CGROUP_SKB:
if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
range = tnum_range(0, 3);
enforce_attach_type_range = tnum_range(2, 3);
}
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
@ -5380,18 +5536,23 @@ static int check_return_code(struct bpf_verifier_env *env)
}
if (!tnum_in(range, reg->var_off)) {
char tn_buf[48];
verbose(env, "At program exit the register R0 ");
if (!tnum_is_unknown(reg->var_off)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
verbose(env, " should have been 0 or 1\n");
tnum_strn(tn_buf, sizeof(tn_buf), range);
verbose(env, " should have been %s\n", tn_buf);
return -EINVAL;
}
if (!tnum_is_unknown(enforce_attach_type_range) &&
tnum_in(enforce_attach_type_range, reg->var_off))
env->prog->enforce_expected_attach_type = 1;
return 0;
}
@ -5435,7 +5596,25 @@ enum {
BRANCH = 2,
};
#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
static u32 state_htab_size(struct bpf_verifier_env *env)
{
return env->prog->len;
}
static struct bpf_verifier_state_list **explored_state(
struct bpf_verifier_env *env,
int idx)
{
struct bpf_verifier_state *cur = env->cur_state;
struct bpf_func_state *state = cur->frame[cur->curframe];
return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
}
static void init_explored_state(struct bpf_verifier_env *env, int idx)
{
env->insn_aux_data[idx].prune_point = true;
}
/* t, w, e - match pseudo-code above:
* t - index of current instruction
@ -5461,7 +5640,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
if (e == BRANCH)
/* mark branch target for state pruning */
env->explored_states[w] = STATE_LIST_MARK;
init_explored_state(env, w);
if (insn_state[w] == 0) {
/* tree-edge */
@ -5529,9 +5708,9 @@ peek_stack:
else if (ret < 0)
goto err_free;
if (t + 1 < insn_cnt)
env->explored_states[t + 1] = STATE_LIST_MARK;
init_explored_state(env, t + 1);
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
env->explored_states[t] = STATE_LIST_MARK;
init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
if (ret == 1)
goto peek_stack;
@ -5554,10 +5733,10 @@ peek_stack:
* after every call and jump
*/
if (t + 1 < insn_cnt)
env->explored_states[t + 1] = STATE_LIST_MARK;
init_explored_state(env, t + 1);
} else {
/* conditional jump with two edges */
env->explored_states[t] = STATE_LIST_MARK;
init_explored_state(env, t);
ret = push_insn(t, t + 1, FALLTHROUGH, env);
if (ret == 1)
goto peek_stack;
@ -6005,12 +6184,10 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
struct bpf_verifier_state_list *sl;
int i;
sl = env->explored_states[insn];
if (!sl)
return;
while (sl != STATE_LIST_MARK) {
if (sl->state.curframe != cur->curframe)
sl = *explored_state(env, insn);
while (sl) {
if (sl->state.insn_idx != insn ||
sl->state.curframe != cur->curframe)
goto next;
for (i = 0; i <= cur->curframe; i++)
if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
@ -6292,20 +6469,33 @@ static bool states_equal(struct bpf_verifier_env *env,
return true;
}
/* Return 0 if no propagation happened. Return negative error code if error
* happened. Otherwise, return the propagated bit.
*/
static int propagate_liveness_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
struct bpf_reg_state *parent_reg)
{
u8 parent_flag = parent_reg->live & REG_LIVE_READ;
u8 flag = reg->live & REG_LIVE_READ;
int err;
if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
/* When comes here, read flags of PARENT_REG or REG could be any of
* REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
* of propagation if PARENT_REG has strongest REG_LIVE_READ64.
*/
if (parent_flag == REG_LIVE_READ64 ||
/* Or if there is no read flag from REG. */
!flag ||
/* Or if the read flag from REG is the same as PARENT_REG. */
parent_flag == flag)
return 0;
err = mark_reg_read(env, reg, parent_reg);
err = mark_reg_read(env, reg, parent_reg, flag);
if (err)
return err;
return 0;
return flag;
}
/* A write screens off any subsequent reads; but write marks come from the
@ -6339,8 +6529,10 @@ static int propagate_liveness(struct bpf_verifier_env *env,
for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
err = propagate_liveness_reg(env, &state_reg[i],
&parent_reg[i]);
if (err)
if (err < 0)
return err;
if (err == REG_LIVE_READ64)
mark_insn_zext(env, &parent_reg[i]);
}
/* Propagate stack slots. */
@ -6350,11 +6542,11 @@ static int propagate_liveness(struct bpf_verifier_env *env,
state_reg = &state->stack[i].spilled_ptr;
err = propagate_liveness_reg(env, state_reg,
parent_reg);
if (err)
if (err < 0)
return err;
}
}
return err;
return 0;
}
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
@ -6364,18 +6556,21 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
struct bpf_verifier_state *cur = env->cur_state, *new;
int i, j, err, states_cnt = 0;
pprev = &env->explored_states[insn_idx];
sl = *pprev;
if (!sl)
if (!env->insn_aux_data[insn_idx].prune_point)
/* this 'insn_idx' instruction wasn't marked, so we will not
* be doing state search here
*/
return 0;
pprev = explored_state(env, insn_idx);
sl = *pprev;
clean_live_states(env, insn_idx, cur);
while (sl != STATE_LIST_MARK) {
while (sl) {
states_cnt++;
if (sl->state.insn_idx != insn_idx)
goto next;
if (states_equal(env, &sl->state, cur)) {
sl->hit_cnt++;
/* reached equivalent register/stack state,
@ -6393,7 +6588,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
return err;
return 1;
}
states_cnt++;
sl->miss_cnt++;
/* heuristic to determine whether this state is beneficial
* to keep checking from state equivalence point of view.
@ -6420,6 +6614,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
sl = *pprev;
continue;
}
next:
pprev = &sl->next;
sl = *pprev;
}
@ -6451,8 +6646,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
kfree(new_sl);
return err;
}
new_sl->next = env->explored_states[insn_idx];
env->explored_states[insn_idx] = new_sl;
new->insn_idx = insn_idx;
new_sl->next = *explored_state(env, insn_idx);
*explored_state(env, insn_idx) = new_sl;
/* connect new state to parentage chain. Current frame needs all
* registers connected. Only r6 - r9 of the callers are alive (pushed
* to the stack implicitly by JITs) so in callers' frames connect just
@ -7130,14 +7326,23 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
* [0, off) and [off, end) to new locations, so the patched range stays zero
*/
static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
u32 off, u32 cnt)
static int adjust_insn_aux_data(struct bpf_verifier_env *env,
struct bpf_prog *new_prog, u32 off, u32 cnt)
{
struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
struct bpf_insn *insn = new_prog->insnsi;
u32 prog_len;
int i;
/* aux info at OFF always needs adjustment, no matter fast path
* (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
* original insn at old prog.
*/
old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
if (cnt == 1)
return 0;
prog_len = new_prog->len;
new_data = vzalloc(array_size(prog_len,
sizeof(struct bpf_insn_aux_data)));
if (!new_data)
@ -7145,8 +7350,10 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
memcpy(new_data + off + cnt - 1, old_data + off,
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
for (i = off; i < off + cnt - 1; i++)
for (i = off; i < off + cnt - 1; i++) {
new_data[i].seen = true;
new_data[i].zext_dst = insn_has_def32(env, insn + i);
}
env->insn_aux_data = new_data;
vfree(old_data);
return 0;
@ -7179,7 +7386,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
env->insn_aux_data[off].orig_idx);
return NULL;
}
if (adjust_insn_aux_data(env, new_prog->len, off, len))
if (adjust_insn_aux_data(env, new_prog, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
return new_prog;
@ -7443,6 +7650,84 @@ static int opt_remove_nops(struct bpf_verifier_env *env)
return 0;
}
static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
const union bpf_attr *attr)
{
struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
struct bpf_insn_aux_data *aux = env->insn_aux_data;
int i, patch_len, delta = 0, len = env->prog->len;
struct bpf_insn *insns = env->prog->insnsi;
struct bpf_prog *new_prog;
bool rnd_hi32;
rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
zext_patch[1] = BPF_ZEXT_REG(0);
rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
for (i = 0; i < len; i++) {
int adj_idx = i + delta;
struct bpf_insn insn;
insn = insns[adj_idx];
if (!aux[adj_idx].zext_dst) {
u8 code, class;
u32 imm_rnd;
if (!rnd_hi32)
continue;
code = insn.code;
class = BPF_CLASS(code);
if (insn_no_def(&insn))
continue;
/* NOTE: arg "reg" (the fourth one) is only used for
* BPF_STX which has been ruled out in above
* check, it is safe to pass NULL here.
*/
if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
if (class == BPF_LD &&
BPF_MODE(code) == BPF_IMM)
i++;
continue;
}
/* ctx load could be transformed into wider load. */
if (class == BPF_LDX &&
aux[adj_idx].ptr_type == PTR_TO_CTX)
continue;
imm_rnd = get_random_int();
rnd_hi32_patch[0] = insn;
rnd_hi32_patch[1].imm = imm_rnd;
rnd_hi32_patch[3].dst_reg = insn.dst_reg;
patch = rnd_hi32_patch;
patch_len = 4;
goto apply_patch_buffer;
}
if (!bpf_jit_needs_zext())
continue;
zext_patch[0] = insn;
zext_patch[1].dst_reg = insn.dst_reg;
zext_patch[1].src_reg = insn.dst_reg;
patch = zext_patch;
patch_len = 2;
apply_patch_buffer:
new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
if (!new_prog)
return -ENOMEM;
env->prog = new_prog;
insns = new_prog->insnsi;
aux = env->insn_aux_data;
delta += patch_len - 1;
}
return 0;
}
/* convert load instructions that access fields of a context type into a
* sequence of instructions that access fields of the underlying structure:
* struct __sk_buff -> struct sk_buff
@ -8130,16 +8415,15 @@ static void free_states(struct bpf_verifier_env *env)
if (!env->explored_states)
return;
for (i = 0; i < env->prog->len; i++) {
for (i = 0; i < state_htab_size(env); i++) {
sl = env->explored_states[i];
if (sl)
while (sl != STATE_LIST_MARK) {
sln = sl->next;
free_verifier_state(&sl->state, false);
kfree(sl);
sl = sln;
}
while (sl) {
sln = sl->next;
free_verifier_state(&sl->state, false);
kfree(sl);
sl = sln;
}
}
kvfree(env->explored_states);
@ -8239,7 +8523,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
goto skip_full_check;
}
env->explored_states = kvcalloc(env->prog->len,
env->explored_states = kvcalloc(state_htab_size(env),
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
@ -8294,6 +8578,15 @@ skip_full_check:
if (ret == 0)
ret = fixup_bpf_calls(env);
/* do 32-bit optimization after insn patching has done so those patched
* insns could be handled correctly.
*/
if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
: false;
}
if (ret == 0)
ret = fixup_call_args(env);

View File

@ -37,13 +37,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
cost += sizeof(struct list_head) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_m;
m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */
err = bpf_map_precharge_memlock(m->map.pages);
err = bpf_map_charge_init(&m->map.memory, cost);
if (err)
goto free_m;
@ -51,7 +47,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
m->flush_list = alloc_percpu(struct list_head);
if (!m->flush_list)
goto free_m;
goto free_charge;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
@ -65,6 +61,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
free_percpu:
free_percpu(m->flush_list);
free_charge:
bpf_map_charge_finish(&m->map.memory);
free_m:
kfree(m);
return ERR_PTR(err);

View File

@ -4955,8 +4955,6 @@ static void css_release_work_fn(struct work_struct *work)
if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL);
cgroup_bpf_put(cgrp);
}
mutex_unlock(&cgroup_mutex);
@ -5482,6 +5480,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
cgroup1_check_for_release(parent);
cgroup_bpf_offline(cgrp);
/* put the base reference */
percpu_ref_kill(&cgrp->self.refcnt);
@ -6221,6 +6221,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
* Don't use cgroup_get_live().
*/
cgroup_get(sock_cgroup_ptr(skcd));
cgroup_bpf_get(sock_cgroup_ptr(skcd));
return;
}
@ -6232,6 +6233,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp;
cgroup_bpf_get(cset->dfl_cgrp);
break;
}
cpu_relax();
@ -6242,7 +6244,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
cgroup_put(sock_cgroup_ptr(skcd));
struct cgroup *cgrp = sock_cgroup_ptr(skcd);
cgroup_bpf_put(cgrp);
cgroup_put(cgrp);
}
#endif /* CONFIG_SOCK_CGROUP_DATA */

View File

@ -19,6 +19,9 @@
#include "trace_probe.h"
#include "trace.h"
#define bpf_event_rcu_dereference(p) \
rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
#ifdef CONFIG_MODULES
struct bpf_trace_module {
struct module *module;
@ -567,6 +570,69 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
.arg3_type = ARG_ANYTHING,
};
struct send_signal_irq_work {
struct irq_work irq_work;
struct task_struct *task;
u32 sig;
};
static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
static void do_bpf_send_signal(struct irq_work *entry)
{
struct send_signal_irq_work *work;
work = container_of(entry, struct send_signal_irq_work, irq_work);
group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID);
}
BPF_CALL_1(bpf_send_signal, u32, sig)
{
struct send_signal_irq_work *work = NULL;
/* Similar to bpf_probe_write_user, task needs to be
* in a sound condition and kernel memory access be
* permitted in order to send signal to the current
* task.
*/
if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(uaccess_kernel()))
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
if (in_nmi()) {
/* Do an early check on signal validity. Otherwise,
* the error is lost in deferred irq_work.
*/
if (unlikely(!valid_signal(sig)))
return -EINVAL;
work = this_cpu_ptr(&send_signal_work);
if (work->irq_work.flags & IRQ_WORK_BUSY)
return -EBUSY;
/* Add the current task, which is the target of sending signal,
* to the irq_work. The current task may change when queued
* irq works get executed.
*/
work->task = current;
work->sig = sig;
irq_work_queue(&work->irq_work);
return 0;
}
return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID);
}
static const struct bpf_func_proto bpf_send_signal_proto = {
.func = bpf_send_signal,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@ -617,6 +683,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
case BPF_FUNC_send_signal:
return &bpf_send_signal_proto;
default:
return NULL;
}
@ -1034,7 +1102,7 @@ static DEFINE_MUTEX(bpf_event_mutex);
int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog *prog)
{
struct bpf_prog_array __rcu *old_array;
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret = -EEXIST;
@ -1052,7 +1120,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
if (event->prog)
goto unlock;
old_array = event->tp_event->prog_array;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
if (old_array &&
bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
ret = -E2BIG;
@ -1075,7 +1143,7 @@ unlock:
void perf_event_detach_bpf_prog(struct perf_event *event)
{
struct bpf_prog_array __rcu *old_array;
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret;
@ -1084,7 +1152,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
if (!event->prog)
goto unlock;
old_array = event->tp_event->prog_array;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
if (ret == -ENOENT)
goto unlock;
@ -1106,6 +1174,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
{
struct perf_event_query_bpf __user *uquery = info;
struct perf_event_query_bpf query = {};
struct bpf_prog_array *progs;
u32 *ids, prog_cnt, ids_len;
int ret;
@ -1130,10 +1199,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
*/
mutex_lock(&bpf_event_mutex);
ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
ids,
ids_len,
&prog_cnt);
progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
mutex_unlock(&bpf_event_mutex);
if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
@ -1343,5 +1410,18 @@ static int __init bpf_event_init(void)
return 0;
}
static int __init send_signal_irq_work_init(void)
{
int cpu;
struct send_signal_irq_work *work;
for_each_possible_cpu(cpu) {
work = per_cpu_ptr(&send_signal_work, cpu);
init_irq_work(&work->irq_work, do_bpf_send_signal);
}
return 0;
}
fs_initcall(bpf_event_init);
subsys_initcall(send_signal_irq_work_init);
#endif /* CONFIG_MODULES */

View File

@ -627,6 +627,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
unsigned int i;
u32 nbuckets;
u64 cost;
int ret;
smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
if (!smap)
@ -635,13 +636,21 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus()));
nbuckets = 1U << smap->bucket_log;
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
ret = bpf_map_charge_init(&smap->map.memory, cost);
if (ret < 0) {
kfree(smap);
return ERR_PTR(ret);
}
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
GFP_USER | __GFP_NOWARN);
if (!smap->buckets) {
bpf_map_charge_finish(&smap->map.memory);
kfree(smap);
return ERR_PTR(-ENOMEM);
}
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
for (i = 0; i < nbuckets; i++) {
INIT_HLIST_HEAD(&smap->buckets[i].list);
@ -651,7 +660,6 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
BPF_SK_STORAGE_CACHE_SIZE;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
return &smap->map;
}

View File

@ -44,13 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
/* Make sure page count doesn't overflow. */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
if (cost >= U32_MAX - PAGE_SIZE) {
err = -EINVAL;
goto free_stab;
}
stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
err = bpf_map_precharge_memlock(stab->map.pages);
err = bpf_map_charge_init(&stab->map.memory, cost);
if (err)
goto free_stab;
@ -60,6 +54,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
if (stab->sks)
return &stab->map;
err = -ENOMEM;
bpf_map_charge_finish(&stab->map.memory);
free_stab:
kfree(stab);
return ERR_PTR(err);

View File

@ -287,16 +287,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
return ret;
}
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
if (ret) {
kfree_skb(skb);
return ret;
}
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
@ -315,18 +308,37 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
return ip_finish_output2(net, sk, skb);
}
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
switch (ret) {
case NET_XMIT_SUCCESS:
return __ip_finish_output(net, sk, skb);
case NET_XMIT_CN:
return __ip_finish_output(net, sk, skb) ? : ret;
default:
kfree_skb(skb);
return ret;
}
}
static int ip_mc_finish_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
if (ret) {
switch (ret) {
case NET_XMIT_SUCCESS:
return dev_loopback_xmit(net, sk, skb);
case NET_XMIT_CN:
return dev_loopback_xmit(net, sk, skb) ? : ret;
default:
kfree_skb(skb);
return ret;
}
return dev_loopback_xmit(net, sk, skb);
}
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)

View File

@ -128,16 +128,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
return -EINVAL;
}
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
if (ret) {
kfree_skb(skb);
return ret;
}
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
@ -154,6 +146,22 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
return ip6_finish_output2(net, sk, skb);
}
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
switch (ret) {
case NET_XMIT_SUCCESS:
return __ip6_finish_output(net, sk, skb);
case NET_XMIT_CN:
return __ip6_finish_output(net, sk, skb) ? : ret;
default:
kfree_skb(skb);
return ret;
}
}
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;

View File

@ -1,6 +1,7 @@
cpustat
fds_example
hbm
ibumad
lathist
lwt_len_hist
map_perf_test

View File

@ -26,7 +26,6 @@ hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
hostprogs-y += test_cgrp2_attach
hostprogs-y += test_cgrp2_attach2
hostprogs-y += test_cgrp2_sock
hostprogs-y += test_cgrp2_sock2
hostprogs-y += xdp1
@ -81,7 +80,6 @@ map_perf_test-objs := bpf_load.o map_perf_test_user.o
test_overhead-objs := bpf_load.o test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o
test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
test_cgrp2_sock-objs := test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
xdp1-objs := xdp1_user.o

View File

@ -40,7 +40,7 @@ int prog_cnt;
int prog_array_fd = -1;
struct bpf_map_data map_data[MAX_MAPS];
int map_data_count = 0;
int map_data_count;
static int populate_prog_array(const char *event, int prog_fd)
{
@ -65,7 +65,7 @@ static int write_kprobe_events(const char *val)
else
flags = O_WRONLY | O_APPEND;
fd = open("/sys/kernel/debug/tracing/kprobe_events", flags);
fd = open(DEBUGFS "kprobe_events", flags);
ret = write(fd, val, strlen(val));
close(fd);
@ -490,8 +490,8 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
/* Verify no newer features were requested */
if (validate_zero) {
addr = (unsigned char*) def + map_sz_copy;
end = (unsigned char*) def + map_sz_elf;
addr = (unsigned char *) def + map_sz_copy;
end = (unsigned char *) def + map_sz_elf;
for (; addr < end; addr++) {
if (*addr != 0) {
free(sym);

View File

@ -13,10 +13,10 @@ Usage() {
echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
echo "loads. The output is the goodput in Mbps (unless -D was used)."
echo ""
echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]"
echo " [-d=<delay>|--delay=<delay>] [--debug] [-E]"
echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E]"
echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
echo " [-l] [-N] [-p=<port>|--port=<port>] [-P]"
echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
echo " Where:"
@ -33,6 +33,7 @@ Usage() {
echo " -f or --flows number of concurrent flows (default=1)"
echo " -i or --id cgroup id (an integer, default is 1)"
echo " -N use netperf instead of iperf3"
echo " --no_cn Do not return CN notifications"
echo " -l do not limit flows using loopback"
echo " -h Help"
echo " -p or --port iperf3 port (default is 5201)"
@ -115,6 +116,9 @@ processArgs () {
-c=*|--cc=*)
cc="${i#*=}"
;;
--no_cn)
flags="$flags --no_cn"
;;
--debug)
flags="$flags -d"
debug_flag=1

View File

@ -16,6 +16,7 @@
* -l Also limit flows doing loopback
* -n <#> To create cgroup \"/hbm#\" and attach prog
* Default is /hbm1
* --no_cn Do not return cn notifications
* -r <rate> Rate limit in Mbps
* -s Get HBM stats (marked, dropped, etc.)
* -t <time> Exit after specified seconds (default is 0)
@ -42,6 +43,7 @@
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <getopt.h>
#include "bpf_load.h"
#include "bpf_rlimit.h"
@ -59,6 +61,7 @@ bool stats_flag;
bool loopback_flag;
bool debugFlag;
bool work_conserving_flag;
bool no_cn_flag;
static void Usage(void);
static void read_trace_pipe2(void);
@ -185,6 +188,7 @@ static int run_bpf_prog(char *prog, int cg_id)
qstats.rate = rate;
qstats.stats = stats_flag ? 1 : 0;
qstats.loopback = loopback_flag ? 1 : 0;
qstats.no_cn = no_cn_flag ? 1 : 0;
if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) {
printf("ERROR: Could not update map element\n");
goto err;
@ -312,6 +316,14 @@ static int run_bpf_prog(char *prog, int cg_id)
double percent_pkts, percent_bytes;
char fname[100];
FILE *fout;
int k;
static const char *returnValNames[] = {
"DROP_PKT",
"ALLOW_PKT",
"DROP_PKT_CWR",
"ALLOW_PKT_CWR"
};
#define RET_VAL_COUNT 4
// Future support of ingress
// if (!outFlag)
@ -346,6 +358,31 @@ static int run_bpf_prog(char *prog, int cg_id)
(qstats.bytes_total + 1);
fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
// ECN CE markings
percent_pkts = (qstats.pkts_ecn_ce * 100.0) /
(qstats.pkts_total + 1);
fprintf(fout, "pkts_ecn_ce:%6.2f (%d)\n", percent_pkts,
(int)qstats.pkts_ecn_ce);
// Average cwnd
fprintf(fout, "avg cwnd:%d\n",
(int)(qstats.sum_cwnd / (qstats.sum_cwnd_cnt + 1)));
// Average rtt
fprintf(fout, "avg rtt:%d\n",
(int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
// Average credit
fprintf(fout, "avg credit:%d\n",
(int)(qstats.sum_credit /
(1500 * ((int)qstats.pkts_total) + 1)));
// Return values stats
for (k = 0; k < RET_VAL_COUNT; k++) {
percent_pkts = (qstats.returnValCount[k] * 100.0) /
(qstats.pkts_total + 1);
fprintf(fout, "%s:%6.2f (%d)\n", returnValNames[k],
percent_pkts, (int)qstats.returnValCount[k]);
}
fclose(fout);
}
@ -366,14 +403,15 @@ static void Usage(void)
{
printf("This program loads a cgroup skb BPF program to enforce\n"
"cgroup output (egress) bandwidth limits.\n\n"
"USAGE: hbm [-o] [-d] [-l] [-n <id>] [-r <rate>] [-s]\n"
" [-t <secs>] [-w] [-h] [prog]\n"
"USAGE: hbm [-o] [-d] [-l] [-n <id>] [--no_cn] [-r <rate>]\n"
" [-s] [-t <secs>] [-w] [-h] [prog]\n"
" Where:\n"
" -o indicates egress direction (default)\n"
" -d print BPF trace debug buffer\n"
" -l also limit flows using loopback\n"
" -n <#> to create cgroup \"/hbm#\" and attach prog\n"
" Default is /hbm1\n"
" --no_cn disable CN notifcations\n"
" -r <rate> Rate in Mbps\n"
" -s Update HBM stats\n"
" -t <time> Exit after specified seconds (default is 0)\n"
@ -393,9 +431,16 @@ int main(int argc, char **argv)
int k;
int cg_id = 1;
char *optstring = "iodln:r:st:wh";
struct option loptions[] = {
{"no_cn", 0, NULL, 1},
{NULL, 0, NULL, 0}
};
while ((k = getopt(argc, argv, optstring)) != -1) {
while ((k = getopt_long(argc, argv, optstring, loptions, NULL)) != -1) {
switch (k) {
case 1:
no_cn_flag = true;
break;
case'o':
break;
case 'd':

View File

@ -19,7 +19,8 @@ struct hbm_vqueue {
struct hbm_queue_stats {
unsigned long rate; /* in Mbps*/
unsigned long stats:1, /* get HBM stats (marked, dropped,..) */
loopback:1; /* also limit flows using loopback */
loopback:1, /* also limit flows using loopback */
no_cn:1; /* do not use cn flags */
unsigned long long pkts_marked;
unsigned long long bytes_marked;
unsigned long long pkts_dropped;
@ -28,4 +29,10 @@ struct hbm_queue_stats {
unsigned long long bytes_total;
unsigned long long firstPacketTime;
unsigned long long lastPacketTime;
unsigned long long pkts_ecn_ce;
unsigned long long returnValCount[4];
unsigned long long sum_cwnd;
unsigned long long sum_rtt;
unsigned long long sum_cwnd_cnt;
long long sum_credit;
};

View File

@ -30,15 +30,8 @@
#define ALLOW_PKT 1
#define TCP_ECN_OK 1
#define HBM_DEBUG 0 // Set to 1 to enable debugging
#if HBM_DEBUG
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
#else
#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
#undef bpf_printk
#define bpf_printk(fmt, ...)
#endif
@ -72,17 +65,43 @@ struct bpf_map_def SEC("maps") queue_stats = {
BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
struct hbm_pkt_info {
int cwnd;
int rtt;
bool is_ip;
bool is_tcp;
short ecn;
};
static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
{
struct bpf_sock *sk;
struct bpf_tcp_sock *tp;
sk = skb->sk;
if (sk) {
sk = bpf_sk_fullsock(sk);
if (sk) {
if (sk->protocol == IPPROTO_TCP) {
tp = bpf_tcp_sock(sk);
if (tp) {
pkti->cwnd = tp->snd_cwnd;
pkti->rtt = tp->srtt_us >> 3;
return 0;
}
}
}
}
return 1;
}
static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
struct hbm_pkt_info *pkti)
{
struct iphdr iph;
struct ipv6hdr *ip6h;
pkti->cwnd = 0;
pkti->rtt = 0;
bpf_skb_load_bytes(skb, 0, &iph, 12);
if (iph.version == 6) {
ip6h = (struct ipv6hdr *)&iph;
@ -98,6 +117,8 @@ static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
pkti->is_tcp = false;
pkti->ecn = 0;
}
if (pkti->is_tcp)
get_tcp_info(skb, pkti);
}
static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
@ -112,8 +133,14 @@ static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
int len,
unsigned long long curtime,
bool congestion_flag,
bool drop_flag)
bool drop_flag,
bool cwr_flag,
bool ecn_ce_flag,
struct hbm_pkt_info *pkti,
int credit)
{
int rv = ALLOW_PKT;
if (qsp != NULL) {
// Following is needed for work conserving
__sync_add_and_fetch(&(qsp->bytes_total), len);
@ -123,7 +150,7 @@ static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
qsp->firstPacketTime = curtime;
qsp->lastPacketTime = curtime;
__sync_add_and_fetch(&(qsp->pkts_total), 1);
if (congestion_flag || drop_flag) {
if (congestion_flag) {
__sync_add_and_fetch(&(qsp->pkts_marked), 1);
__sync_add_and_fetch(&(qsp->bytes_marked), len);
}
@ -132,6 +159,34 @@ static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
__sync_add_and_fetch(&(qsp->bytes_dropped),
len);
}
if (ecn_ce_flag)
__sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
if (pkti->cwnd) {
__sync_add_and_fetch(&(qsp->sum_cwnd),
pkti->cwnd);
__sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
}
if (pkti->rtt)
__sync_add_and_fetch(&(qsp->sum_rtt),
pkti->rtt);
__sync_add_and_fetch(&(qsp->sum_credit), credit);
if (drop_flag)
rv = DROP_PKT;
if (cwr_flag)
rv |= 2;
if (rv == DROP_PKT)
__sync_add_and_fetch(&(qsp->returnValCount[0]),
1);
else if (rv == ALLOW_PKT)
__sync_add_and_fetch(&(qsp->returnValCount[1]),
1);
else if (rv == 2)
__sync_add_and_fetch(&(qsp->returnValCount[2]),
1);
else if (rv == 3)
__sync_add_and_fetch(&(qsp->returnValCount[3]),
1);
}
}
}

View File

@ -62,11 +62,12 @@ int _hbm_out_cg(struct __sk_buff *skb)
unsigned int queue_index = 0;
unsigned long long curtime;
int credit;
signed long long delta = 0, zero = 0;
signed long long delta = 0, new_credit;
int max_credit = MAX_CREDIT;
bool congestion_flag = false;
bool drop_flag = false;
bool cwr_flag = false;
bool ecn_ce_flag = false;
struct hbm_vqueue *qdp;
struct hbm_queue_stats *qsp = NULL;
int rv = ALLOW_PKT;
@ -99,9 +100,11 @@ int _hbm_out_cg(struct __sk_buff *skb)
*/
if (delta > 0) {
qdp->lasttime = curtime;
credit += CREDIT_PER_NS(delta, qdp->rate);
if (credit > MAX_CREDIT)
new_credit = credit + CREDIT_PER_NS(delta, qdp->rate);
if (new_credit > MAX_CREDIT)
credit = MAX_CREDIT;
else
credit = new_credit;
}
credit -= len;
qdp->credit = credit;
@ -119,13 +122,16 @@ int _hbm_out_cg(struct __sk_buff *skb)
// Set flags (drop, congestion, cwr)
// Dropping => we are congested, so ignore congestion flag
if (credit < -DROP_THRESH ||
(len > LARGE_PKT_THRESH &&
credit < -LARGE_PKT_DROP_THRESH)) {
// Very congested, set drop flag
(len > LARGE_PKT_THRESH && credit < -LARGE_PKT_DROP_THRESH)) {
// Very congested, set drop packet
drop_flag = true;
if (pkti.ecn)
congestion_flag = true;
else if (pkti.is_tcp)
cwr_flag = true;
} else if (credit < 0) {
// Congested, set congestion flag
if (pkti.ecn) {
if (pkti.ecn || pkti.is_tcp) {
if (credit < -MARK_THRESH)
congestion_flag = true;
else
@ -136,22 +142,38 @@ int _hbm_out_cg(struct __sk_buff *skb)
}
if (congestion_flag) {
if (!bpf_skb_ecn_set_ce(skb)) {
if (len > LARGE_PKT_THRESH) {
if (bpf_skb_ecn_set_ce(skb)) {
ecn_ce_flag = true;
} else {
if (pkti.is_tcp) {
unsigned int rand = bpf_get_prandom_u32();
if (-credit >= MARK_THRESH +
(rand % MARK_REGION_SIZE)) {
// Do congestion control
cwr_flag = true;
}
} else if (len > LARGE_PKT_THRESH) {
// Problem if too many small packets?
drop_flag = true;
}
}
}
if (drop_flag)
rv = DROP_PKT;
if (qsp != NULL)
if (qsp->no_cn)
cwr_flag = false;
hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag);
hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
cwr_flag, ecn_ce_flag, &pkti, credit);
if (rv == DROP_PKT)
if (drop_flag) {
__sync_add_and_fetch(&(qdp->credit), len);
rv = DROP_PKT;
}
if (cwr_flag)
rv |= 2;
return rv;
}
char _license[] SEC("license") = "GPL";

View File

@ -21,13 +21,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_basertt(struct bpf_sock_ops *skops)
{

View File

@ -22,13 +22,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_bufs(struct bpf_sock_ops *skops)
{

View File

@ -22,13 +22,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_clamp(struct bpf_sock_ops *skops)
{

View File

@ -21,13 +21,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_cong(struct bpf_sock_ops *skops)
{

View File

@ -22,13 +22,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_iw(struct bpf_sock_ops *skops)
{

View File

@ -21,13 +21,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_rwnd(struct bpf_sock_ops *skops)
{

View File

@ -21,13 +21,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_synrto(struct bpf_sock_ops *skops)
{

View File

@ -20,13 +20,6 @@
#define DEBUG 1
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sockops")
int bpf_basertt(struct bpf_sock_ops *skops)
{

View File

@ -7,13 +7,6 @@
#define SAMPLE_SIZE 64ul
#define MAX_CPUS 128
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
struct bpf_map_def SEC("maps") my_map = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),

View File

@ -19,10 +19,11 @@ SYNOPSIS
BTF COMMANDS
=============
| **bpftool** **btf dump** *BTF_SRC*
| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
| **bpftool** **btf help**
|
| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
| *FORMAT* := { **raw** | **c** }
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
@ -31,23 +32,27 @@ DESCRIPTION
**bpftool btf dump** *BTF_SRC*
Dump BTF entries from a given *BTF_SRC*.
When **id** is specified, BTF object with that ID will be
loaded and all its BTF types emitted.
When **id** is specified, BTF object with that ID will be
loaded and all its BTF types emitted.
When **map** is provided, it's expected that map has
associated BTF object with BTF types describing key and
value. It's possible to select whether to dump only BTF
type(s) associated with key (**key**), value (**value**),
both key and value (**kv**), or all BTF types present in
associated BTF object (**all**). If not specified, **kv**
is assumed.
When **map** is provided, it's expected that map has
associated BTF object with BTF types describing key and
value. It's possible to select whether to dump only BTF
type(s) associated with key (**key**), value (**value**),
both key and value (**kv**), or all BTF types present in
associated BTF object (**all**). If not specified, **kv**
is assumed.
When **prog** is provided, it's expected that program has
associated BTF object with BTF types.
When **prog** is provided, it's expected that program has
associated BTF object with BTF types.
When specifying *FILE*, an ELF file is expected, containing
.BTF section with well-defined BTF binary format data,
typically produced by clang or pahole.
When specifying *FILE*, an ELF file is expected, containing
.BTF section with well-defined BTF binary format data,
typically produced by clang or pahole.
**format** option can be used to override default (raw)
output format. Raw (**raw**) or C-syntax (**c**) output
formats are supported.
**bpftool btf help**
Print short help message.
@ -67,6 +72,10 @@ OPTIONS
-p, --pretty
Generate human-readable JSON output. Implies **-j**.
-d, --debug
Print all logs available from libbpf, including debug-level
information.
EXAMPLES
========
**# bpftool btf dump id 1226**

View File

@ -113,6 +113,10 @@ OPTIONS
-f, --bpffs
Show file names of pinned programs.
-d, --debug
Print all logs available from libbpf, including debug-level
information.
EXAMPLES
========
|

View File

@ -73,6 +73,10 @@ OPTIONS
-p, --pretty
Generate human-readable JSON output. Implies **-j**.
-d, --debug
Print all logs available from libbpf, including debug-level
information.
SEE ALSO
========
**bpf**\ (2),

View File

@ -152,6 +152,10 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
-d, --debug
Print all logs available from libbpf, including debug-level
information.
EXAMPLES
========
**# bpftool map show**

View File

@ -65,6 +65,10 @@ OPTIONS
-p, --pretty
Generate human-readable JSON output. Implies **-j**.
-d, --debug
Print all logs available from libbpf, including debug-level
information.
EXAMPLES
========

View File

@ -53,6 +53,10 @@ OPTIONS
-p, --pretty
Generate human-readable JSON output. Implies **-j**.
-d, --debug
Print all logs available from libbpf, including debug-level
information.
EXAMPLES
========

View File

@ -174,6 +174,11 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
-d, --debug
Print all logs available, even debug-level information. This
includes logs from libbpf as well as from the verifier, when
attempting to load programs.
EXAMPLES
========
**# bpftool prog show**

View File

@ -66,6 +66,10 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
-d, --debug
Print all logs available, even debug-level information. This
includes logs from libbpf as well as from the verifier, when
attempting to load programs.
SEE ALSO
========

View File

@ -71,6 +71,12 @@ _bpftool_get_prog_tags()
command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
}
_bpftool_get_btf_ids()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) )
}
_bpftool_get_obj_map_names()
{
local obj
@ -181,7 +187,7 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat'
local c='--version --json --pretty --bpffs --mapcompat --debug'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@ -635,14 +641,30 @@ _bpftool()
map)
_bpftool_get_map_ids
;;
dump)
_bpftool_get_btf_ids
;;
esac
return 0
;;
format)
COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
;;
*)
if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then
COMPREPLY+=( $( compgen -W 'key value kv all' -- \
"$cur" ) )
fi
# emit extra options
case ${words[3]} in
id|file)
_bpftool_once_attr 'format'
;;
map|prog)
if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then
COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) )
fi
_bpftool_once_attr 'format'
;;
*)
;;
esac
return 0
;;
esac

View File

@ -8,8 +8,8 @@
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <gelf.h>
#include <bpf.h>
#include <libbpf.h>
#include <linux/btf.h>
#include "btf.h"
@ -340,109 +340,40 @@ static int dump_btf_raw(const struct btf *btf,
return 0;
}
static bool check_btf_endianness(GElf_Ehdr *ehdr)
static void __printf(2, 0) btf_dump_printf(void *ctx,
const char *fmt, va_list args)
{
static unsigned int const endian = 1;
switch (ehdr->e_ident[EI_DATA]) {
case ELFDATA2LSB:
return *(unsigned char const *)&endian == 1;
case ELFDATA2MSB:
return *(unsigned char const *)&endian == 0;
default:
return 0;
}
vfprintf(stdout, fmt, args);
}
static int btf_load_from_elf(const char *path, struct btf **btf)
static int dump_btf_c(const struct btf *btf,
__u32 *root_type_ids, int root_type_cnt)
{
int err = -1, fd = -1, idx = 0;
Elf_Data *btf_data = NULL;
Elf_Scn *scn = NULL;
Elf *elf = NULL;
GElf_Ehdr ehdr;
struct btf_dump *d;
int err = 0, i;
if (elf_version(EV_CURRENT) == EV_NONE) {
p_err("failed to init libelf for %s", path);
return -1;
}
d = btf_dump__new(btf, NULL, NULL, btf_dump_printf);
if (IS_ERR(d))
return PTR_ERR(d);
fd = open(path, O_RDONLY);
if (fd < 0) {
p_err("failed to open %s: %s", path, strerror(errno));
return -1;
}
elf = elf_begin(fd, ELF_C_READ, NULL);
if (!elf) {
p_err("failed to open %s as ELF file", path);
goto done;
}
if (!gelf_getehdr(elf, &ehdr)) {
p_err("failed to get EHDR from %s", path);
goto done;
}
if (!check_btf_endianness(&ehdr)) {
p_err("non-native ELF endianness is not supported");
goto done;
}
if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
p_err("failed to get e_shstrndx from %s\n", path);
goto done;
}
while ((scn = elf_nextscn(elf, scn)) != NULL) {
GElf_Shdr sh;
char *name;
idx++;
if (gelf_getshdr(scn, &sh) != &sh) {
p_err("failed to get section(%d) header from %s",
idx, path);
goto done;
}
name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
if (!name) {
p_err("failed to get section(%d) name from %s",
idx, path);
goto done;
}
if (strcmp(name, BTF_ELF_SEC) == 0) {
btf_data = elf_getdata(scn, 0);
if (!btf_data) {
p_err("failed to get section(%d, %s) data from %s",
idx, name, path);
if (root_type_cnt) {
for (i = 0; i < root_type_cnt; i++) {
err = btf_dump__dump_type(d, root_type_ids[i]);
if (err)
goto done;
}
} else {
int cnt = btf__get_nr_types(btf);
for (i = 1; i <= cnt; i++) {
err = btf_dump__dump_type(d, i);
if (err)
goto done;
}
break;
}
}
if (!btf_data) {
p_err("%s ELF section not found in %s", BTF_ELF_SEC, path);
goto done;
}
*btf = btf__new(btf_data->d_buf, btf_data->d_size);
if (IS_ERR(*btf)) {
err = PTR_ERR(*btf);
*btf = NULL;
p_err("failed to load BTF data from %s: %s",
path, strerror(err));
goto done;
}
err = 0;
done:
if (err) {
if (*btf) {
btf__free(*btf);
*btf = NULL;
}
}
if (elf)
elf_end(elf);
close(fd);
btf_dump__free(d);
return err;
}
@ -451,6 +382,7 @@ static int do_dump(int argc, char **argv)
struct btf *btf = NULL;
__u32 root_type_ids[2];
int root_type_cnt = 0;
bool dump_c = false;
__u32 btf_id = -1;
const char *src;
int fd = -1;
@ -522,9 +454,14 @@ static int do_dump(int argc, char **argv)
}
NEXT_ARG();
} else if (is_prefix(src, "file")) {
err = btf_load_from_elf(*argv, &btf);
if (err)
btf = btf__parse_elf(*argv, NULL);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
btf = NULL;
p_err("failed to load BTF from %s: %s",
*argv, strerror(err));
goto done;
}
NEXT_ARG();
} else {
err = -1;
@ -532,6 +469,29 @@ static int do_dump(int argc, char **argv)
goto done;
}
while (argc) {
if (is_prefix(*argv, "format")) {
NEXT_ARG();
if (argc < 1) {
p_err("expecting value for 'format' option\n");
goto done;
}
if (strcmp(*argv, "c") == 0) {
dump_c = true;
} else if (strcmp(*argv, "raw") == 0) {
dump_c = false;
} else {
p_err("unrecognized format specifier: '%s', possible values: raw, c",
*argv);
goto done;
}
NEXT_ARG();
} else {
p_err("unrecognized option: '%s'", *argv);
goto done;
}
}
if (!btf) {
err = btf__get_from_id(btf_id, &btf);
if (err) {
@ -545,7 +505,16 @@ static int do_dump(int argc, char **argv)
}
}
dump_btf_raw(btf, root_type_ids, root_type_cnt);
if (dump_c) {
if (json_output) {
p_err("JSON output for C-syntax dump is not supported");
err = -ENOTSUP;
goto done;
}
err = dump_btf_c(btf, root_type_ids, root_type_cnt);
} else {
err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
}
done:
close(fd);
@ -561,10 +530,11 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
"Usage: %s btf dump BTF_SRC\n"
"Usage: %s btf dump BTF_SRC [format FORMAT]\n"
" %s btf help\n"
"\n"
" BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
" FORMAT := { raw | c }\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"

View File

@ -10,6 +10,7 @@
#include <string.h>
#include <bpf.h>
#include <libbpf.h>
#include "main.h"
@ -25,6 +26,7 @@ bool pretty_output;
bool json_output;
bool show_pinned;
bool block_mount;
bool verifier_logs;
int bpf_flags;
struct pinned_obj_table prog_table;
struct pinned_obj_table map_table;
@ -77,6 +79,13 @@ static int do_version(int argc, char **argv)
return 0;
}
static int __printf(2, 0)
print_all_levels(__maybe_unused enum libbpf_print_level level,
const char *format, va_list args)
{
return vfprintf(stderr, format, args);
}
int cmd_select(const struct cmd *cmds, int argc, char **argv,
int (*help)(int argc, char **argv))
{
@ -317,6 +326,7 @@ int main(int argc, char **argv)
{ "bpffs", no_argument, NULL, 'f' },
{ "mapcompat", no_argument, NULL, 'm' },
{ "nomount", no_argument, NULL, 'n' },
{ "debug", no_argument, NULL, 'd' },
{ 0 }
};
int opt, ret;
@ -332,7 +342,7 @@ int main(int argc, char **argv)
hash_init(map_table.table);
opterr = 0;
while ((opt = getopt_long(argc, argv, "Vhpjfmn",
while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
@ -362,6 +372,10 @@ int main(int argc, char **argv)
case 'n':
block_mount = true;
break;
case 'd':
libbpf_set_print(print_all_levels);
verifier_logs = true;
break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)

View File

@ -91,6 +91,7 @@ extern json_writer_t *json_wtr;
extern bool json_output;
extern bool show_pinned;
extern bool block_mount;
extern bool verifier_logs;
extern int bpf_flags;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;

View File

@ -750,10 +750,11 @@ static int do_detach(int argc, char **argv)
static int load_with_options(int argc, char **argv, bool first_prog_only)
{
enum bpf_attach_type expected_attach_type;
struct bpf_object_open_attr attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
struct bpf_object_load_attr load_attr = { 0 };
struct bpf_object_open_attr open_attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
enum bpf_attach_type expected_attach_type;
struct map_replace *map_replace = NULL;
struct bpf_program *prog = NULL, *pos;
unsigned int old_map_fds = 0;
@ -767,7 +768,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
if (!REQ_ARGS(2))
return -1;
attr.file = GET_ARG();
open_attr.file = GET_ARG();
pinfile = GET_ARG();
while (argc) {
@ -776,7 +777,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
NEXT_ARG();
if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
if (open_attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
p_err("program type already specified");
goto err_free_reuse_maps;
}
@ -793,7 +794,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
strcat(type, *argv);
strcat(type, "/");
err = libbpf_prog_type_by_name(type, &attr.prog_type,
err = libbpf_prog_type_by_name(type,
&open_attr.prog_type,
&expected_attach_type);
free(type);
if (err < 0)
@ -881,16 +883,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
set_max_rlimit();
obj = __bpf_object__open_xattr(&attr, bpf_flags);
obj = __bpf_object__open_xattr(&open_attr, bpf_flags);
if (IS_ERR_OR_NULL(obj)) {
p_err("failed to open object file");
goto err_free_reuse_maps;
}
bpf_object__for_each_program(pos, obj) {
enum bpf_prog_type prog_type = attr.prog_type;
enum bpf_prog_type prog_type = open_attr.prog_type;
if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
if (open_attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
const char *sec_name = bpf_program__title(pos, false);
err = libbpf_prog_type_by_name(sec_name, &prog_type,
@ -960,7 +962,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
goto err_close_obj;
}
err = bpf_object__load(obj);
load_attr.obj = obj;
if (verifier_logs)
/* log_level1 + log_level2 + stats, but not stable UAPI */
load_attr.log_level = 1 + 2 + 4;
err = bpf_object__load_xattr(&load_attr);
if (err) {
p_err("failed to load object file");
goto err_close_obj;

View File

@ -31,9 +31,7 @@ void kernel_syms_load(struct dump_data *dd)
if (!fp)
return;
while (!feof(fp)) {
if (!fgets(buff, sizeof(buff), fp))
break;
while (fgets(buff, sizeof(buff), fp)) {
tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1,
sizeof(*dd->sym_mapping));
if (!tmp) {

View File

@ -260,6 +260,24 @@ enum bpf_attach_type {
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
* Verifier does sub-register def/use analysis and identifies instructions whose
* def only matters for low 32-bit, high 32-bit is never referenced later
* through implicit zero extension. Therefore verifier notifies JIT back-ends
* that it is safe to ignore clearing high 32-bit for these instructions. This
* saves some back-ends a lot of code-gen. However such optimization is not
* necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
* hence hasn't used verifier's analysis result. But, we really want to have a
* way to be able to verify the correctness of the described optimization on
* x86_64 on which testsuites are frequently exercised.
*
* So, this flag is introduced. Once it is set, verifier will randomize high
* 32-bit for those instructions who has been identified as safe to ignore them.
* Then, if verifier is not doing correct analysis, such randomization will
* regress tests to expose bugs.
*/
#define BPF_F_TEST_RND_HI32 (1U << 2)
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
@ -2672,6 +2690,20 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
*
* int bpf_send_signal(u32 sig)
* Description
* Send signal *sig* to the current task.
* Return
* 0 on success or successfully queued.
*
* **-EBUSY** if work queue under nmi is full.
*
* **-EINVAL** if *sig* is invalid.
*
* **-EPERM** if no permission to send the *sig*.
*
* **-EAGAIN** if bpf program can try again.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2782,7 +2814,8 @@ union bpf_attr {
FN(strtol), \
FN(strtoul), \
FN(sk_storage_get), \
FN(sk_storage_delete),
FN(sk_storage_delete), \
FN(send_signal),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call

View File

@ -0,0 +1,114 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* Universal TUN/TAP device driver.
* Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef _UAPI__IF_TUN_H
#define _UAPI__IF_TUN_H
#include <linux/types.h>
#include <linux/if_ether.h>
#include <linux/filter.h>
/* Read queue size */
#define TUN_READQ_SIZE 500
/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */
#define TUN_TUN_DEV IFF_TUN
#define TUN_TAP_DEV IFF_TAP
#define TUN_TYPE_MASK 0x000f
/* Ioctl defines */
#define TUNSETNOCSUM _IOW('T', 200, int)
#define TUNSETDEBUG _IOW('T', 201, int)
#define TUNSETIFF _IOW('T', 202, int)
#define TUNSETPERSIST _IOW('T', 203, int)
#define TUNSETOWNER _IOW('T', 204, int)
#define TUNSETLINK _IOW('T', 205, int)
#define TUNSETGROUP _IOW('T', 206, int)
#define TUNGETFEATURES _IOR('T', 207, unsigned int)
#define TUNSETOFFLOAD _IOW('T', 208, unsigned int)
#define TUNSETTXFILTER _IOW('T', 209, unsigned int)
#define TUNGETIFF _IOR('T', 210, unsigned int)
#define TUNGETSNDBUF _IOR('T', 211, int)
#define TUNSETSNDBUF _IOW('T', 212, int)
#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
#define TUNGETVNETHDRSZ _IOR('T', 215, int)
#define TUNSETVNETHDRSZ _IOW('T', 216, int)
#define TUNSETQUEUE _IOW('T', 217, int)
#define TUNSETIFINDEX _IOW('T', 218, unsigned int)
#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
#define TUNSETVNETLE _IOW('T', 220, int)
#define TUNGETVNETLE _IOR('T', 221, int)
/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on
* little-endian hosts. Not all kernel configurations support them, but all
* configurations that support SET also support GET.
*/
#define TUNSETVNETBE _IOW('T', 222, int)
#define TUNGETVNETBE _IOR('T', 223, int)
#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
#define TUNSETFILTEREBPF _IOR('T', 225, int)
#define TUNSETCARRIER _IOW('T', 226, int)
#define TUNGETDEVNETNS _IO('T', 227)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
#define IFF_TAP 0x0002
#define IFF_NAPI 0x0010
#define IFF_NAPI_FRAGS 0x0020
#define IFF_NO_PI 0x1000
/* This flag has no real effect */
#define IFF_ONE_QUEUE 0x2000
#define IFF_VNET_HDR 0x4000
#define IFF_TUN_EXCL 0x8000
#define IFF_MULTI_QUEUE 0x0100
#define IFF_ATTACH_QUEUE 0x0200
#define IFF_DETACH_QUEUE 0x0400
/* read-only flag */
#define IFF_PERSIST 0x0800
#define IFF_NOFILTER 0x1000
/* Socket options */
#define TUN_TX_TIMESTAMP 1
/* Features for GSO (TUNSETOFFLOAD). */
#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */
#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */
#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */
#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */
#define TUN_F_UFO 0x10 /* I can handle UFO packets */
/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
#define TUN_PKT_STRIP 0x0001
struct tun_pi {
__u16 flags;
__be16 proto;
};
/*
* Filter spec (used for SETXXFILTER ioctls)
* This stuff is applicable only to the TAP (Ethernet) devices.
* If the count is zero the filter is disabled and the driver accepts
* all packets (promisc mode).
* If the filter is enabled in order to accept broadcast packets
* broadcast addr must be explicitly included in the addr list.
*/
#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */
struct tun_filter {
__u16 flags; /* TUN_FLT_ flags see above */
__u16 count; /* Number of addresses */
__u8 addr[0][ETH_ALEN];
};
#endif /* _UAPI__IF_TUN_H */

View File

@ -1 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
btf_dump.o

View File

@ -3,7 +3,7 @@
BPF_VERSION = 0
BPF_PATCHLEVEL = 0
BPF_EXTRAVERSION = 3
BPF_EXTRAVERSION = 4
MAKEFLAGS += --no-print-directory
@ -204,6 +204,16 @@ check_abi: $(OUTPUT)libbpf.so
"versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
"Please make sure all LIBBPF_API symbols are" \
"versioned in $(VERSION_SCRIPT)." >&2; \
readelf -s --wide $(OUTPUT)libbpf-in.o | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf -s --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
$(OUTPUT)libbpf_versioned_syms.tmp; \
rm $(OUTPUT)libbpf_global_syms.tmp \
$(OUTPUT)libbpf_versioned_syms.tmp; \
exit 1; \
fi

View File

@ -256,6 +256,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
if (load_attr->name)
memcpy(attr.prog_name, load_attr->name,
min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
attr.prog_flags = load_attr->prog_flags;
fd = sys_bpf_prog_load(&attr, sizeof(attr));
if (fd >= 0)

View File

@ -87,6 +87,7 @@ struct bpf_load_program_attr {
const void *line_info;
__u32 line_info_cnt;
__u32 log_level;
__u32 prog_flags;
};
/* Flags to direct loading requirements */

View File

@ -4,14 +4,17 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <linux/err.h>
#include <linux/btf.h>
#include <gelf.h>
#include "btf.h"
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_internal.h"
#include "hashmap.h"
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
@ -417,6 +420,132 @@ done:
return btf;
}
static bool btf_check_endianness(const GElf_Ehdr *ehdr)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
#else
# error "Unrecognized __BYTE_ORDER__"
#endif
}
struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
{
Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
int err = 0, fd = -1, idx = 0;
struct btf *btf = NULL;
Elf_Scn *scn = NULL;
Elf *elf = NULL;
GElf_Ehdr ehdr;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warning("failed to init libelf for %s\n", path);
return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
}
fd = open(path, O_RDONLY);
if (fd < 0) {
err = -errno;
pr_warning("failed to open %s: %s\n", path, strerror(errno));
return ERR_PTR(err);
}
err = -LIBBPF_ERRNO__FORMAT;
elf = elf_begin(fd, ELF_C_READ, NULL);
if (!elf) {
pr_warning("failed to open %s as ELF file\n", path);
goto done;
}
if (!gelf_getehdr(elf, &ehdr)) {
pr_warning("failed to get EHDR from %s\n", path);
goto done;
}
if (!btf_check_endianness(&ehdr)) {
pr_warning("non-native ELF endianness is not supported\n");
goto done;
}
if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
pr_warning("failed to get e_shstrndx from %s\n", path);
goto done;
}
while ((scn = elf_nextscn(elf, scn)) != NULL) {
GElf_Shdr sh;
char *name;
idx++;
if (gelf_getshdr(scn, &sh) != &sh) {
pr_warning("failed to get section(%d) header from %s\n",
idx, path);
goto done;
}
name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
if (!name) {
pr_warning("failed to get section(%d) name from %s\n",
idx, path);
goto done;
}
if (strcmp(name, BTF_ELF_SEC) == 0) {
btf_data = elf_getdata(scn, 0);
if (!btf_data) {
pr_warning("failed to get section(%d, %s) data from %s\n",
idx, name, path);
goto done;
}
continue;
} else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
btf_ext_data = elf_getdata(scn, 0);
if (!btf_ext_data) {
pr_warning("failed to get section(%d, %s) data from %s\n",
idx, name, path);
goto done;
}
continue;
}
}
err = 0;
if (!btf_data) {
err = -ENOENT;
goto done;
}
btf = btf__new(btf_data->d_buf, btf_data->d_size);
if (IS_ERR(btf))
goto done;
if (btf_ext && btf_ext_data) {
*btf_ext = btf_ext__new(btf_ext_data->d_buf,
btf_ext_data->d_size);
if (IS_ERR(*btf_ext))
goto done;
} else if (btf_ext) {
*btf_ext = NULL;
}
done:
if (elf)
elf_end(elf);
close(fd);
if (err)
return ERR_PTR(err);
/*
* btf is always parsed before btf_ext, so no need to clean up
* btf_ext, if btf loading failed
*/
if (IS_ERR(btf))
return btf;
if (btf_ext && IS_ERR(*btf_ext)) {
btf__free(btf);
err = PTR_ERR(*btf_ext);
return ERR_PTR(err);
}
return btf;
}
static int compare_vsi_off(const void *_a, const void *_b)
{
const struct btf_var_secinfo *a = _a;
@ -1165,16 +1294,9 @@ done:
return err;
}
#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31
#define BTF_UNPROCESSED_ID ((__u32)-1)
#define BTF_IN_PROGRESS_ID ((__u32)-2)
struct btf_dedup_node {
struct btf_dedup_node *next;
__u32 type_id;
};
struct btf_dedup {
/* .BTF section to be deduped in-place */
struct btf *btf;
@ -1190,7 +1312,7 @@ struct btf_dedup {
* candidates, which is fine because we rely on subsequent
* btf_xxx_equal() checks to authoritatively verify type equality.
*/
struct btf_dedup_node **dedup_table;
struct hashmap *dedup_table;
/* Canonical types map */
__u32 *map;
/* Hypothetical mapping, used during type graph equivalence checks */
@ -1215,30 +1337,18 @@ struct btf_str_ptrs {
__u32 cap;
};
static inline __u32 hash_combine(__u32 h, __u32 value)
static long hash_combine(long h, long value)
{
/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
#define GOLDEN_RATIO_PRIME 0x9e370001UL
return h * 37 + value * GOLDEN_RATIO_PRIME;
#undef GOLDEN_RATIO_PRIME
return h * 31 + value;
}
#define for_each_dedup_cand(d, hash, node) \
for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
node; \
node = node->next)
#define for_each_dedup_cand(d, node, hash) \
hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
{
struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
int bucket = hash & (d->opts.dedup_table_size - 1);
if (!node)
return -ENOMEM;
node->type_id = type_id;
node->next = d->dedup_table[bucket];
d->dedup_table[bucket] = node;
return 0;
return hashmap__append(d->dedup_table,
(void *)hash, (void *)(long)type_id);
}
static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@ -1267,36 +1377,10 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
d->hypot_cnt = 0;
}
static void btf_dedup_table_free(struct btf_dedup *d)
{
struct btf_dedup_node *head, *tmp;
int i;
if (!d->dedup_table)
return;
for (i = 0; i < d->opts.dedup_table_size; i++) {
while (d->dedup_table[i]) {
tmp = d->dedup_table[i];
d->dedup_table[i] = tmp->next;
free(tmp);
}
head = d->dedup_table[i];
while (head) {
tmp = head;
head = head->next;
free(tmp);
}
}
free(d->dedup_table);
d->dedup_table = NULL;
}
static void btf_dedup_free(struct btf_dedup *d)
{
btf_dedup_table_free(d);
hashmap__free(d->dedup_table);
d->dedup_table = NULL;
free(d->map);
d->map = NULL;
@ -1310,40 +1394,43 @@ static void btf_dedup_free(struct btf_dedup *d)
free(d);
}
/* Find closest power of two >= to size, capped at 2^max_size_log */
static __u32 roundup_pow2_max(__u32 size, int max_size_log)
static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
{
int i;
for (i = 0; i < max_size_log && (1U << i) < size; i++)
;
return 1U << i;
return (size_t)key;
}
static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
{
return 0;
}
static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
{
return k1 == k2;
}
static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
const struct btf_dedup_opts *opts)
{
struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
int i, err = 0;
__u32 sz;
if (!d)
return ERR_PTR(-ENOMEM);
d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
sz = opts && opts->dedup_table_size ? opts->dedup_table_size
: BTF_DEDUP_TABLE_DEFAULT_SIZE;
sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG);
d->opts.dedup_table_size = sz;
/* dedup_table_size is now used only to force collisions in tests */
if (opts && opts->dedup_table_size == 1)
hash_fn = btf_dedup_collision_hash_fn;
d->btf = btf;
d->btf_ext = btf_ext;
d->dedup_table = calloc(d->opts.dedup_table_size,
sizeof(struct btf_dedup_node *));
if (!d->dedup_table) {
err = -ENOMEM;
d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
if (IS_ERR(d->dedup_table)) {
err = PTR_ERR(d->dedup_table);
d->dedup_table = NULL;
goto done;
}
@ -1662,9 +1749,9 @@ done:
return err;
}
static __u32 btf_hash_common(struct btf_type *t)
static long btf_hash_common(struct btf_type *t)
{
__u32 h;
long h;
h = hash_combine(0, t->name_off);
h = hash_combine(h, t->info);
@ -1680,10 +1767,10 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
}
/* Calculate type signature hash of INT. */
static __u32 btf_hash_int(struct btf_type *t)
static long btf_hash_int(struct btf_type *t)
{
__u32 info = *(__u32 *)(t + 1);
__u32 h;
long h;
h = btf_hash_common(t);
h = hash_combine(h, info);
@ -1703,9 +1790,9 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
}
/* Calculate type signature hash of ENUM. */
static __u32 btf_hash_enum(struct btf_type *t)
static long btf_hash_enum(struct btf_type *t)
{
__u32 h;
long h;
/* don't hash vlen and enum members to support enum fwd resolving */
h = hash_combine(0, t->name_off);
@ -1757,11 +1844,11 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
* as referenced type IDs equivalence is established separately during type
* graph equivalence check algorithm.
*/
static __u32 btf_hash_struct(struct btf_type *t)
static long btf_hash_struct(struct btf_type *t)
{
struct btf_member *member = (struct btf_member *)(t + 1);
__u32 vlen = BTF_INFO_VLEN(t->info);
__u32 h = btf_hash_common(t);
long h = btf_hash_common(t);
int i;
for (i = 0; i < vlen; i++) {
@ -1804,10 +1891,10 @@ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
* under assumption that they were already resolved to canonical type IDs and
* are not going to change.
*/
static __u32 btf_hash_array(struct btf_type *t)
static long btf_hash_array(struct btf_type *t)
{
struct btf_array *info = (struct btf_array *)(t + 1);
__u32 h = btf_hash_common(t);
long h = btf_hash_common(t);
h = hash_combine(h, info->type);
h = hash_combine(h, info->index_type);
@ -1858,11 +1945,11 @@ static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
* under assumption that they were already resolved to canonical type IDs and
* are not going to change.
*/
static inline __u32 btf_hash_fnproto(struct btf_type *t)
static long btf_hash_fnproto(struct btf_type *t)
{
struct btf_param *member = (struct btf_param *)(t + 1);
__u16 vlen = BTF_INFO_VLEN(t->info);
__u32 h = btf_hash_common(t);
long h = btf_hash_common(t);
int i;
for (i = 0; i < vlen; i++) {
@ -1880,7 +1967,7 @@ static inline __u32 btf_hash_fnproto(struct btf_type *t)
* This function is called during reference types deduplication to compare
* FUNC_PROTO to potential canonical representative.
*/
static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
{
struct btf_param *m1, *m2;
__u16 vlen;
@ -1906,7 +1993,7 @@ static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
* IDs. This check is performed during type graph equivalence check and
* referenced types equivalence is checked separately.
*/
static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
{
struct btf_param *m1, *m2;
__u16 vlen;
@ -1937,11 +2024,12 @@ static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
{
struct btf_type *t = d->btf->types[type_id];
struct hashmap_entry *hash_entry;
struct btf_type *cand;
struct btf_dedup_node *cand_node;
/* if we don't find equivalent type, then we are canonical */
__u32 new_id = type_id;
__u32 h;
__u32 cand_id;
long h;
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_CONST:
@ -1960,10 +2048,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_INT:
h = btf_hash_int(t);
for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand = d->btf->types[cand_id];
if (btf_equal_int(t, cand)) {
new_id = cand_node->type_id;
new_id = cand_id;
break;
}
}
@ -1971,10 +2060,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_ENUM:
h = btf_hash_enum(t);
for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand = d->btf->types[cand_id];
if (btf_equal_enum(t, cand)) {
new_id = cand_node->type_id;
new_id = cand_id;
break;
}
if (d->opts.dont_resolve_fwds)
@ -1982,21 +2072,22 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
if (btf_compat_enum(t, cand)) {
if (btf_is_enum_fwd(t)) {
/* resolve fwd to full enum */
new_id = cand_node->type_id;
new_id = cand_id;
break;
}
/* resolve canonical enum fwd to full enum */
d->map[cand_node->type_id] = type_id;
d->map[cand_id] = type_id;
}
}
break;
case BTF_KIND_FWD:
h = btf_hash_common(t);
for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand = d->btf->types[cand_id];
if (btf_equal_common(t, cand)) {
new_id = cand_node->type_id;
new_id = cand_id;
break;
}
}
@ -2397,12 +2488,12 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
*/
static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
{
struct btf_dedup_node *cand_node;
struct btf_type *cand_type, *t;
struct hashmap_entry *hash_entry;
/* if we don't find equivalent type, then we are canonical */
__u32 new_id = type_id;
__u16 kind;
__u32 h;
long h;
/* already deduped or is in process of deduping (loop detected) */
if (d->map[type_id] <= BTF_MAX_NR_TYPES)
@ -2415,7 +2506,8 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
return 0;
h = btf_hash_struct(t);
for_each_dedup_cand(d, h, cand_node) {
for_each_dedup_cand(d, hash_entry, h) {
__u32 cand_id = (__u32)(long)hash_entry->value;
int eq;
/*
@ -2428,17 +2520,17 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
* creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
* FWD and compatible STRUCT/UNION are considered equivalent.
*/
cand_type = d->btf->types[cand_node->type_id];
cand_type = d->btf->types[cand_id];
if (!btf_shallow_equal_struct(t, cand_type))
continue;
btf_dedup_clear_hypot_map(d);
eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id);
eq = btf_dedup_is_equiv(d, type_id, cand_id);
if (eq < 0)
return eq;
if (!eq)
continue;
new_id = cand_node->type_id;
new_id = cand_id;
btf_dedup_merge_hypot_map(d);
break;
}
@ -2488,12 +2580,12 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
*/
static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
{
struct btf_dedup_node *cand_node;
struct hashmap_entry *hash_entry;
__u32 new_id = type_id, cand_id;
struct btf_type *t, *cand;
/* if we don't find equivalent type, then we are representative type */
__u32 new_id = type_id;
int ref_type_id;
__u32 h;
long h;
if (d->map[type_id] == BTF_IN_PROGRESS_ID)
return -ELOOP;
@ -2516,10 +2608,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
t->type = ref_type_id;
h = btf_hash_common(t);
for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand = d->btf->types[cand_id];
if (btf_equal_common(t, cand)) {
new_id = cand_node->type_id;
new_id = cand_id;
break;
}
}
@ -2539,10 +2632,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
info->index_type = ref_type_id;
h = btf_hash_array(t);
for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand = d->btf->types[cand_id];
if (btf_equal_array(t, cand)) {
new_id = cand_node->type_id;
new_id = cand_id;
break;
}
}
@ -2570,10 +2664,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
}
h = btf_hash_fnproto(t);
for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand = d->btf->types[cand_id];
if (btf_equal_fnproto(t, cand)) {
new_id = cand_node->type_id;
new_id = cand_id;
break;
}
}
@ -2600,7 +2695,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
if (err < 0)
return err;
}
btf_dedup_table_free(d);
/* we won't need d->dedup_table anymore */
hashmap__free(d->dedup_table);
d->dedup_table = NULL;
return 0;
}

View File

@ -4,6 +4,7 @@
#ifndef __LIBBPF_BTF_H
#define __LIBBPF_BTF_H
#include <stdarg.h>
#include <linux/types.h>
#ifdef __cplusplus
@ -59,6 +60,8 @@ struct btf_ext_header {
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
LIBBPF_API struct btf *btf__parse_elf(const char *path,
struct btf_ext **btf_ext);
LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
@ -100,6 +103,22 @@ struct btf_dedup_opts {
LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
const struct btf_dedup_opts *opts);
struct btf_dump;
struct btf_dump_opts {
void *ctx;
};
typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
const struct btf_ext *btf_ext,
const struct btf_dump_opts *opts,
btf_dump_printf_fn_t printf_fn);
LIBBPF_API void btf_dump__free(struct btf_dump *d);
LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
#ifdef __cplusplus
} /* extern "C" */
#endif

1336
tools/lib/bpf/btf_dump.c Normal file

File diff suppressed because it is too large Load Diff

229
tools/lib/bpf/hashmap.c Normal file
View File

@ -0,0 +1,229 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* Generic non-thread safe hash map implementation.
*
* Copyright (c) 2019 Facebook
*/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <linux/err.h>
#include "hashmap.h"
/* start with 4 buckets */
#define HASHMAP_MIN_CAP_BITS 2
static void hashmap_add_entry(struct hashmap_entry **pprev,
struct hashmap_entry *entry)
{
entry->next = *pprev;
*pprev = entry;
}
static void hashmap_del_entry(struct hashmap_entry **pprev,
struct hashmap_entry *entry)
{
*pprev = entry->next;
entry->next = NULL;
}
void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
hashmap_equal_fn equal_fn, void *ctx)
{
map->hash_fn = hash_fn;
map->equal_fn = equal_fn;
map->ctx = ctx;
map->buckets = NULL;
map->cap = 0;
map->cap_bits = 0;
map->sz = 0;
}
struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
hashmap_equal_fn equal_fn,
void *ctx)
{
struct hashmap *map = malloc(sizeof(struct hashmap));
if (!map)
return ERR_PTR(-ENOMEM);
hashmap__init(map, hash_fn, equal_fn, ctx);
return map;
}
void hashmap__clear(struct hashmap *map)
{
free(map->buckets);
map->cap = map->cap_bits = map->sz = 0;
}
void hashmap__free(struct hashmap *map)
{
if (!map)
return;
hashmap__clear(map);
free(map);
}
size_t hashmap__size(const struct hashmap *map)
{
return map->sz;
}
size_t hashmap__capacity(const struct hashmap *map)
{
return map->cap;
}
static bool hashmap_needs_to_grow(struct hashmap *map)
{
/* grow if empty or more than 75% filled */
return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
}
static int hashmap_grow(struct hashmap *map)
{
struct hashmap_entry **new_buckets;
struct hashmap_entry *cur, *tmp;
size_t new_cap_bits, new_cap;
size_t h;
int bkt;
new_cap_bits = map->cap_bits + 1;
if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
new_cap_bits = HASHMAP_MIN_CAP_BITS;
new_cap = 1UL << new_cap_bits;
new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
if (!new_buckets)
return -ENOMEM;
hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
hashmap_add_entry(&new_buckets[h], cur);
}
map->cap = new_cap;
map->cap_bits = new_cap_bits;
free(map->buckets);
map->buckets = new_buckets;
return 0;
}
static bool hashmap_find_entry(const struct hashmap *map,
const void *key, size_t hash,
struct hashmap_entry ***pprev,
struct hashmap_entry **entry)
{
struct hashmap_entry *cur, **prev_ptr;
if (!map->buckets)
return false;
for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
cur;
prev_ptr = &cur->next, cur = cur->next) {
if (map->equal_fn(cur->key, key, map->ctx)) {
if (pprev)
*pprev = prev_ptr;
*entry = cur;
return true;
}
}
return false;
}
int hashmap__insert(struct hashmap *map, const void *key, void *value,
enum hashmap_insert_strategy strategy,
const void **old_key, void **old_value)
{
struct hashmap_entry *entry;
size_t h;
int err;
if (old_key)
*old_key = NULL;
if (old_value)
*old_value = NULL;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (strategy != HASHMAP_APPEND &&
hashmap_find_entry(map, key, h, NULL, &entry)) {
if (old_key)
*old_key = entry->key;
if (old_value)
*old_value = entry->value;
if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
entry->key = key;
entry->value = value;
return 0;
} else if (strategy == HASHMAP_ADD) {
return -EEXIST;
}
}
if (strategy == HASHMAP_UPDATE)
return -ENOENT;
if (hashmap_needs_to_grow(map)) {
err = hashmap_grow(map);
if (err)
return err;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
}
entry = malloc(sizeof(struct hashmap_entry));
if (!entry)
return -ENOMEM;
entry->key = key;
entry->value = value;
hashmap_add_entry(&map->buckets[h], entry);
map->sz++;
return 0;
}
bool hashmap__find(const struct hashmap *map, const void *key, void **value)
{
struct hashmap_entry *entry;
size_t h;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (!hashmap_find_entry(map, key, h, NULL, &entry))
return false;
if (value)
*value = entry->value;
return true;
}
bool hashmap__delete(struct hashmap *map, const void *key,
const void **old_key, void **old_value)
{
struct hashmap_entry **pprev, *entry;
size_t h;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (!hashmap_find_entry(map, key, h, &pprev, &entry))
return false;
if (old_key)
*old_key = entry->key;
if (old_value)
*old_value = entry->value;
hashmap_del_entry(pprev, entry);
free(entry);
map->sz--;
return true;
}

173
tools/lib/bpf/hashmap.h Normal file
View File

@ -0,0 +1,173 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/*
* Generic non-thread safe hash map implementation.
*
* Copyright (c) 2019 Facebook
*/
#ifndef __LIBBPF_HASHMAP_H
#define __LIBBPF_HASHMAP_H
#include <stdbool.h>
#include <stddef.h>
#include "libbpf_internal.h"
static inline size_t hash_bits(size_t h, int bits)
{
/* shuffle bits and return requested number of upper bits */
return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
}
typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
struct hashmap_entry {
const void *key;
void *value;
struct hashmap_entry *next;
};
struct hashmap {
hashmap_hash_fn hash_fn;
hashmap_equal_fn equal_fn;
void *ctx;
struct hashmap_entry **buckets;
size_t cap;
size_t cap_bits;
size_t sz;
};
#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
.hash_fn = (hash_fn), \
.equal_fn = (equal_fn), \
.ctx = (ctx), \
.buckets = NULL, \
.cap = 0, \
.cap_bits = 0, \
.sz = 0, \
}
void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
hashmap_equal_fn equal_fn, void *ctx);
struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
hashmap_equal_fn equal_fn,
void *ctx);
void hashmap__clear(struct hashmap *map);
void hashmap__free(struct hashmap *map);
size_t hashmap__size(const struct hashmap *map);
size_t hashmap__capacity(const struct hashmap *map);
/*
* Hashmap insertion strategy:
* - HASHMAP_ADD - only add key/value if key doesn't exist yet;
* - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
* update value;
* - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
* nothing and return -ENOENT;
* - HASHMAP_APPEND - always add key/value pair, even if key already exists.
* This turns hashmap into a multimap by allowing multiple values to be
* associated with the same key. Most useful read API for such hashmap is
* hashmap__for_each_key_entry() iteration. If hashmap__find() is still
* used, it will return last inserted key/value entry (first in a bucket
* chain).
*/
enum hashmap_insert_strategy {
HASHMAP_ADD,
HASHMAP_SET,
HASHMAP_UPDATE,
HASHMAP_APPEND,
};
/*
* hashmap__insert() adds key/value entry w/ various semantics, depending on
* provided strategy value. If a given key/value pair replaced already
* existing key/value pair, both old key and old value will be returned
* through old_key and old_value to allow calling code do proper memory
* management.
*/
int hashmap__insert(struct hashmap *map, const void *key, void *value,
enum hashmap_insert_strategy strategy,
const void **old_key, void **old_value);
static inline int hashmap__add(struct hashmap *map,
const void *key, void *value)
{
return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
}
static inline int hashmap__set(struct hashmap *map,
const void *key, void *value,
const void **old_key, void **old_value)
{
return hashmap__insert(map, key, value, HASHMAP_SET,
old_key, old_value);
}
static inline int hashmap__update(struct hashmap *map,
const void *key, void *value,
const void **old_key, void **old_value)
{
return hashmap__insert(map, key, value, HASHMAP_UPDATE,
old_key, old_value);
}
static inline int hashmap__append(struct hashmap *map,
const void *key, void *value)
{
return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
}
bool hashmap__delete(struct hashmap *map, const void *key,
const void **old_key, void **old_value);
bool hashmap__find(const struct hashmap *map, const void *key, void **value);
/*
* hashmap__for_each_entry - iterate over all entries in hashmap
* @map: hashmap to iterate
* @cur: struct hashmap_entry * used as a loop cursor
* @bkt: integer used as a bucket loop cursor
*/
#define hashmap__for_each_entry(map, cur, bkt) \
for (bkt = 0; bkt < map->cap; bkt++) \
for (cur = map->buckets[bkt]; cur; cur = cur->next)
/*
* hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
* against removals
* @map: hashmap to iterate
* @cur: struct hashmap_entry * used as a loop cursor
* @tmp: struct hashmap_entry * used as a temporary next cursor storage
* @bkt: integer used as a bucket loop cursor
*/
#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
for (bkt = 0; bkt < map->cap; bkt++) \
for (cur = map->buckets[bkt]; \
cur && ({tmp = cur->next; true; }); \
cur = tmp)
/*
* hashmap__for_each_key_entry - iterate over entries associated with given key
* @map: hashmap to iterate
* @cur: struct hashmap_entry * used as a loop cursor
* @key: key to iterate entries for
*/
#define hashmap__for_each_key_entry(map, cur, _key) \
for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
map->cap_bits); \
map->buckets ? map->buckets[bkt] : NULL; }); \
cur; \
cur = cur->next) \
if (map->equal_fn(cur->key, (_key), map->ctx))
#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
map->cap_bits); \
cur = map->buckets ? map->buckets[bkt] : NULL; }); \
cur && ({ tmp = cur->next; true; }); \
cur = tmp) \
if (map->equal_fn(cur->key, (_key), map->ctx))
#endif /* __LIBBPF_HASHMAP_H */

View File

@ -188,6 +188,7 @@ struct bpf_program {
void *line_info;
__u32 line_info_rec_size;
__u32 line_info_cnt;
__u32 prog_flags;
};
enum libbpf_map_type {
@ -348,8 +349,11 @@ static int
bpf_program__init(void *data, size_t size, char *section_name, int idx,
struct bpf_program *prog)
{
if (size < sizeof(struct bpf_insn)) {
pr_warning("corrupted section '%s'\n", section_name);
const size_t bpf_insn_sz = sizeof(struct bpf_insn);
if (size == 0 || size % bpf_insn_sz) {
pr_warning("corrupted section '%s', size: %zu\n",
section_name, size);
return -EINVAL;
}
@ -375,9 +379,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
section_name);
goto errout;
}
prog->insns_cnt = size / sizeof(struct bpf_insn);
memcpy(prog->insns, data,
prog->insns_cnt * sizeof(struct bpf_insn));
prog->insns_cnt = size / bpf_insn_sz;
memcpy(prog->insns, data, size);
prog->idx = idx;
prog->instances.fds = NULL;
prog->instances.nr = -1;
@ -494,15 +497,14 @@ static struct bpf_object *bpf_object__new(const char *path,
strcpy(obj->path, path);
/* Using basename() GNU version which doesn't modify arg. */
strncpy(obj->name, basename((void *)path),
sizeof(obj->name) - 1);
strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1);
end = strchr(obj->name, '.');
if (end)
*end = 0;
obj->efile.fd = -1;
/*
* Caller of this function should also calls
* Caller of this function should also call
* bpf_object__elf_finish() after data collection to return
* obj_buf to user. If not, we should duplicate the buffer to
* avoid user freeing them before elf finish.
@ -562,38 +564,35 @@ static int bpf_object__elf_init(struct bpf_object *obj)
} else {
obj->efile.fd = open(obj->path, O_RDONLY);
if (obj->efile.fd < 0) {
char errmsg[STRERR_BUFSIZE];
char *cp = libbpf_strerror_r(errno, errmsg,
sizeof(errmsg));
char errmsg[STRERR_BUFSIZE], *cp;
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warning("failed to open %s: %s\n", obj->path, cp);
return -errno;
return err;
}
obj->efile.elf = elf_begin(obj->efile.fd,
LIBBPF_ELF_C_READ_MMAP,
NULL);
LIBBPF_ELF_C_READ_MMAP, NULL);
}
if (!obj->efile.elf) {
pr_warning("failed to open %s as ELF file\n",
obj->path);
pr_warning("failed to open %s as ELF file\n", obj->path);
err = -LIBBPF_ERRNO__LIBELF;
goto errout;
}
if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
pr_warning("failed to get EHDR from %s\n",
obj->path);
pr_warning("failed to get EHDR from %s\n", obj->path);
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
ep = &obj->efile.ehdr;
/* Old LLVM set e_machine to EM_NONE */
if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) {
pr_warning("%s is not an eBPF object file\n",
obj->path);
if (ep->e_type != ET_REL ||
(ep->e_machine && ep->e_machine != EM_BPF)) {
pr_warning("%s is not an eBPF object file\n", obj->path);
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
@ -604,47 +603,31 @@ errout:
return err;
}
static int
bpf_object__check_endianness(struct bpf_object *obj)
static int bpf_object__check_endianness(struct bpf_object *obj)
{
static unsigned int const endian = 1;
switch (obj->efile.ehdr.e_ident[EI_DATA]) {
case ELFDATA2LSB:
/* We are big endian, BPF obj is little endian. */
if (*(unsigned char const *)&endian != 1)
goto mismatch;
break;
case ELFDATA2MSB:
/* We are little endian, BPF obj is big endian. */
if (*(unsigned char const *)&endian != 0)
goto mismatch;
break;
default:
return -LIBBPF_ERRNO__ENDIAN;
}
return 0;
mismatch:
pr_warning("Error: endianness mismatch.\n");
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
return 0;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
return 0;
#else
# error "Unrecognized __BYTE_ORDER__"
#endif
pr_warning("endianness mismatch.\n");
return -LIBBPF_ERRNO__ENDIAN;
}
static int
bpf_object__init_license(struct bpf_object *obj,
void *data, size_t size)
bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
{
memcpy(obj->license, data,
min(size, sizeof(obj->license) - 1));
memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
pr_debug("license of %s is %s\n", obj->path, obj->license);
return 0;
}
static int
bpf_object__init_kversion(struct bpf_object *obj,
void *data, size_t size)
bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
{
__u32 kver;
@ -654,8 +637,7 @@ bpf_object__init_kversion(struct bpf_object *obj,
}
memcpy(&kver, data, sizeof(kver));
obj->kern_version = kver;
pr_debug("kernel version of %s is %x\n", obj->path,
obj->kern_version);
pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
return 0;
}
@ -811,8 +793,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
def->key_size = sizeof(int);
def->value_size = data->d_size;
def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA ?
BPF_F_RDONLY_PROG : 0;
def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
if (data_buff) {
*data_buff = malloc(data->d_size);
if (!*data_buff) {
@ -827,8 +808,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
return 0;
}
static int
bpf_object__init_maps(struct bpf_object *obj, int flags)
static int bpf_object__init_maps(struct bpf_object *obj, int flags)
{
int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
bool strict = !(flags & MAPS_RELAX_COMPAT);
@ -930,6 +910,11 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
map_name = elf_strptr(obj->efile.elf,
obj->efile.strtabidx,
sym.st_name);
if (!map_name) {
pr_warning("failed to get map #%d name sym string for obj %s\n",
map_idx, obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
obj->maps[map_idx].offset = sym.st_value;
@ -1104,8 +1089,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
/* Elf is corrupted/truncated, avoid calling elf_strptr. */
if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
pr_warning("failed to get e_shstrndx from %s\n",
obj->path);
pr_warning("failed to get e_shstrndx from %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
@ -1226,7 +1210,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
pr_warning("Corrupted ELF file: index of strtab invalid\n");
return LIBBPF_ERRNO__FORMAT;
return -LIBBPF_ERRNO__FORMAT;
}
if (btf_data) {
obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
@ -1346,8 +1330,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
size_t nr_maps = obj->nr_maps;
int i, nrels;
pr_debug("collecting relocating info for: '%s'\n",
prog->section_name);
pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
nrels = shdr->sh_size / shdr->sh_entsize;
prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
@ -1372,9 +1355,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
return -LIBBPF_ERRNO__FORMAT;
}
if (!gelf_getsym(symbols,
GELF_R_SYM(rel.r_info),
&sym)) {
if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
pr_warning("relocation: symbol %"PRIx64" not found\n",
GELF_R_SYM(rel.r_info));
return -LIBBPF_ERRNO__FORMAT;
@ -1435,8 +1416,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
if (maps[map_idx].libbpf_type != type)
continue;
if (type != LIBBPF_MAP_UNSPEC ||
(type == LIBBPF_MAP_UNSPEC &&
maps[map_idx].offset == sym.st_value)) {
maps[map_idx].offset == sym.st_value) {
pr_debug("relocation: find map %zd (%s) for insn %u\n",
map_idx, maps[map_idx].name, insn_idx);
break;
@ -1444,7 +1424,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
}
if (map_idx >= nr_maps) {
pr_warning("bpf relocation: map_idx %d large than %d\n",
pr_warning("bpf relocation: map_idx %d larger than %d\n",
(int)map_idx, (int)nr_maps - 1);
return -LIBBPF_ERRNO__RELOC;
}
@ -1756,7 +1736,7 @@ bpf_object__create_maps(struct bpf_object *obj)
create_attr.key_size = def->key_size;
create_attr.value_size = def->value_size;
create_attr.max_entries = def->max_entries;
create_attr.btf_fd = 0;
create_attr.btf_fd = -1;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
if (bpf_map_type__is_map_in_map(def->type) &&
@ -1770,11 +1750,11 @@ bpf_object__create_maps(struct bpf_object *obj)
}
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_type_id) {
if (*pfd < 0 && create_attr.btf_fd >= 0) {
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, errno);
create_attr.btf_fd = 0;
create_attr.btf_fd = -1;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
map->btf_key_type_id = 0;
@ -1803,7 +1783,7 @@ err_out:
}
}
pr_debug("create map %s: fd=%d\n", map->name, *pfd);
pr_debug("created map %s: fd=%d\n", map->name, *pfd);
}
return 0;
@ -1824,18 +1804,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,
if (btf_prog_info) {
/*
* Some info has already been found but has problem
* in the last btf_ext reloc. Must have to error
* out.
* in the last btf_ext reloc. Must have to error out.
*/
pr_warning("Error in relocating %s for sec %s.\n",
info_name, prog->section_name);
return err;
}
/*
* Have problem loading the very first info. Ignore
* the rest.
*/
/* Have problem loading the very first info. Ignore the rest. */
pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n",
info_name, prog->section_name, info_name);
return 0;
@ -2039,9 +2015,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
return -LIBBPF_ERRNO__RELOC;
}
err = bpf_program__collect_reloc(prog,
shdr, data,
obj);
err = bpf_program__collect_reloc(prog, shdr, data, obj);
if (err)
return err;
}
@ -2058,6 +2032,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
char *log_buf;
int ret;
if (!insns || !insns_cnt)
return -EINVAL;
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = prog->type;
load_attr.expected_attach_type = prog->expected_attach_type;
@ -2068,7 +2045,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.license = license;
load_attr.kern_version = kern_version;
load_attr.prog_ifindex = prog->prog_ifindex;
load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
load_attr.prog_btf_fd = prog->btf_fd;
load_attr.func_info = prog->func_info;
load_attr.func_info_rec_size = prog->func_info_rec_size;
load_attr.func_info_cnt = prog->func_info_cnt;
@ -2076,8 +2053,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.line_info_rec_size = prog->line_info_rec_size;
load_attr.line_info_cnt = prog->line_info_cnt;
load_attr.log_level = prog->log_level;
if (!load_attr.insns || !load_attr.insns_cnt)
return -EINVAL;
load_attr.prog_flags = prog->prog_flags;
retry_load:
log_buf = malloc(log_buf_size);
@ -2222,7 +2198,7 @@ static bool bpf_program__is_function_storage(struct bpf_program *prog,
}
static int
bpf_object__load_progs(struct bpf_object *obj)
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
size_t i;
int err;
@ -2230,6 +2206,7 @@ bpf_object__load_progs(struct bpf_object *obj)
for (i = 0; i < obj->nr_programs; i++) {
if (bpf_program__is_function_storage(&obj->programs[i], obj))
continue;
obj->programs[i].log_level |= log_level;
err = bpf_program__load(&obj->programs[i],
obj->license,
obj->kern_version);
@ -2356,11 +2333,9 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
(unsigned long)obj_buf,
(unsigned long)obj_buf_sz);
tmp_name[sizeof(tmp_name) - 1] = '\0';
name = tmp_name;
}
pr_debug("loading object '%s' from buffer\n",
name);
pr_debug("loading object '%s' from buffer\n", name);
return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
}
@ -2381,10 +2356,14 @@ int bpf_object__unload(struct bpf_object *obj)
return 0;
}
int bpf_object__load(struct bpf_object *obj)
int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
{
struct bpf_object *obj;
int err;
if (!attr)
return -EINVAL;
obj = attr->obj;
if (!obj)
return -EINVAL;
@ -2397,7 +2376,7 @@ int bpf_object__load(struct bpf_object *obj)
CHECK_ERR(bpf_object__create_maps(obj), err, out);
CHECK_ERR(bpf_object__relocate(obj), err, out);
CHECK_ERR(bpf_object__load_progs(obj), err, out);
CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
return 0;
out:
@ -2406,6 +2385,15 @@ out:
return err;
}
int bpf_object__load(struct bpf_object *obj)
{
struct bpf_object_load_attr attr = {
.obj = obj,
};
return bpf_object__load_xattr(&attr);
}
static int check_path(const char *path)
{
char *cp, errmsg[STRERR_BUFSIZE];
@ -3458,9 +3446,7 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
long libbpf_get_error(const void *ptr)
{
if (IS_ERR(ptr))
return PTR_ERR(ptr);
return 0;
return PTR_ERR_OR_ZERO(ptr);
}
int bpf_prog_load(const char *file, enum bpf_prog_type type,
@ -3521,6 +3507,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
expected_attach_type);
prog->log_level = attr->log_level;
prog->prog_flags = attr->prog_flags;
if (!first_prog)
first_prog = prog;
}

View File

@ -89,8 +89,14 @@ LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
LIBBPF_API void bpf_object__close(struct bpf_object *object);
struct bpf_object_load_attr {
struct bpf_object *obj;
int log_level;
};
/* Load/unload object into/from kernel */
LIBBPF_API int bpf_object__load(struct bpf_object *obj);
LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
@ -320,6 +326,7 @@ struct bpf_prog_load_attr {
enum bpf_attach_type expected_attach_type;
int ifindex;
int log_level;
int prog_flags;
};
LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,

View File

@ -164,3 +164,12 @@ LIBBPF_0.0.3 {
bpf_map_freeze;
btf__finalize_data;
} LIBBPF_0.0.2;
LIBBPF_0.0.4 {
global:
btf_dump__dump_type;
btf_dump__free;
btf_dump__new;
btf__parse_elf;
bpf_object__load_xattr;
} LIBBPF_0.0.3;

View File

@ -9,6 +9,8 @@
#ifndef __LIBBPF_LIBBPF_INTERNAL_H
#define __LIBBPF_LIBBPF_INTERNAL_H
#include "libbpf.h"
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)

View File

@ -22,6 +22,7 @@ test_lirc_mode2_user
get_cgroup_id_user
test_skb_cgroup_id_user
test_socket_cookie
test_cgroup_attach
test_cgroup_storage
test_select_reuseport
test_flow_dissector
@ -35,3 +36,6 @@ test_sysctl
alu32
libbpf.pc
libbpf.so.*
test_hashmap
test_btf_dump
xdping

View File

@ -15,7 +15,9 @@ LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
LLVM_READELF ?= llvm-readelf
BTF_PAHOLE ?= pahole
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lrt -lpthread
# Order correspond to 'make run_tests' order
@ -23,7 +25,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
test_btf_dump test_cgroup_attach xdping
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
@ -54,7 +57,8 @@ TEST_PROGS := test_kmod.sh \
test_lwt_ip_encap.sh \
test_tcp_check_syncookie.sh \
test_tc_tunnel.sh \
test_tc_edt.sh
test_tc_edt.sh \
test_xdping.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@ -78,9 +82,9 @@ $(OUTPUT)/test_maps: map_tests/*.c
BPFOBJ := $(OUTPUT)/libbpf.a
$(TEST_GEN_PROGS): $(BPFOBJ)
$(TEST_GEN_PROGS): test_stub.o $(BPFOBJ)
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(TEST_GEN_PROGS_EXTENDED): test_stub.o $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@ -96,6 +100,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
.PHONY: force
@ -176,7 +181,7 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\
$(ALU32_BUILD_DIR)/urandom_read
$(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \
-o $(ALU32_BUILD_DIR)/test_progs_32 \
test_progs.c trace_helpers.c prog_tests/*.c \
test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \
$(OUTPUT)/libbpf.a $(LDLIBS)
$(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H)

View File

@ -8,6 +8,14 @@
*/
#define SEC(NAME) __attribute__((section(NAME), used))
/* helper macro to print out debug messages */
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
/* helper functions called from eBPF programs written in C */
static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
(void *) BPF_FUNC_map_lookup_elem;
@ -216,6 +224,7 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
(void *) BPF_FUNC_sk_storage_get;
static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
(void *)BPF_FUNC_sk_storage_delete;
static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions

View File

@ -33,6 +33,60 @@
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
CGROUP_WORK_DIR, path)
/**
* enable_all_controllers() - Enable all available cgroup v2 controllers
*
* Enable all available cgroup v2 controllers in order to increase
* the code coverage.
*
* If successful, 0 is returned.
*/
int enable_all_controllers(char *cgroup_path)
{
char path[PATH_MAX + 1];
char buf[PATH_MAX];
char *c, *c2;
int fd, cfd;
size_t len;
snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
fd = open(path, O_RDONLY);
if (fd < 0) {
log_err("Opening cgroup.controllers: %s", path);
return 1;
}
len = read(fd, buf, sizeof(buf) - 1);
if (len < 0) {
close(fd);
log_err("Reading cgroup.controllers: %s", path);
return 1;
}
buf[len] = 0;
close(fd);
/* No controllers available? We're probably on cgroup v1. */
if (len == 0)
return 0;
snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
cfd = open(path, O_RDWR);
if (cfd < 0) {
log_err("Opening cgroup.subtree_control: %s", path);
return 1;
}
for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
if (dprintf(cfd, "+%s\n", c) <= 0) {
log_err("Enabling controller %s: %s", c, path);
close(cfd);
return 1;
}
}
close(cfd);
return 0;
}
/**
* setup_cgroup_environment() - Setup the cgroup environment
*
@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
return 1;
}
if (enable_all_controllers(cgroup_workdir))
return 1;
return 0;
}

View File

@ -12,7 +12,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
return vfprintf(stderr, "%s", args);
}
static int check_load(const char *file)
static int check_load(const char *file, enum bpf_prog_type type)
{
struct bpf_prog_load_attr attr;
struct bpf_object *obj = NULL;
@ -20,8 +20,9 @@ static int check_load(const char *file)
memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
attr.file = file;
attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
attr.prog_type = type;
attr.log_level = 4;
attr.prog_flags = BPF_F_TEST_RND_HI32;
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
bpf_object__close(obj);
if (err)
@ -31,19 +32,24 @@ static int check_load(const char *file)
void test_bpf_verif_scale(void)
{
const char *file1 = "./test_verif_scale1.o";
const char *file2 = "./test_verif_scale2.o";
const char *file3 = "./test_verif_scale3.o";
int err;
const char *scale[] = {
"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o"
};
const char *pyperf[] = {
"./pyperf50.o", "./pyperf100.o", "./pyperf180.o"
};
int err, i;
if (verifier_stats)
libbpf_set_print(libbpf_debug_print);
err = check_load(file1);
err |= check_load(file2);
err |= check_load(file3);
if (!err)
printf("test_verif_scale:OK\n");
else
printf("test_verif_scale:FAIL\n");
for (i = 0; i < ARRAY_SIZE(scale); i++) {
err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS);
printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
}
for (i = 0; i < ARRAY_SIZE(pyperf); i++) {
err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK");
}
}

View File

@ -0,0 +1,198 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
static volatile int sigusr1_received = 0;
static void sigusr1_handler(int signum)
{
sigusr1_received++;
}
static int test_send_signal_common(struct perf_event_attr *attr,
int prog_type,
const char *test_name)
{
int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd;
const char *file = "./test_send_signal_kern.o";
struct bpf_object *obj = NULL;
int pipe_c2p[2], pipe_p2c[2];
__u32 key = 0, duration = 0;
char buf[256];
pid_t pid;
__u64 val;
if (CHECK(pipe(pipe_c2p), test_name,
"pipe pipe_c2p error: %s\n", strerror(errno)))
goto no_fork_done;
if (CHECK(pipe(pipe_p2c), test_name,
"pipe pipe_p2c error: %s\n", strerror(errno))) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
goto no_fork_done;
}
pid = fork();
if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
close(pipe_p2c[1]);
goto no_fork_done;
}
if (pid == 0) {
/* install signal handler and notify parent */
signal(SIGUSR1, sigusr1_handler);
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
/* notify parent signal handler is installed */
write(pipe_c2p[1], buf, 1);
/* make sure parent enabled bpf program to send_signal */
read(pipe_p2c[0], buf, 1);
/* wait a little for signal handler */
sleep(1);
if (sigusr1_received)
write(pipe_c2p[1], "2", 1);
else
write(pipe_c2p[1], "0", 1);
/* wait for parent notification and exit */
read(pipe_p2c[0], buf, 1);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
exit(0);
}
close(pipe_c2p[1]); /* close write */
close(pipe_p2c[0]); /* close read */
err = bpf_prog_load(file, prog_type, &obj, &prog_fd);
if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n",
strerror(errno)))
goto prog_load_failure;
pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
-1 /* group id */, 0 /* flags */);
if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
strerror(errno))) {
err = -1;
goto close_prog;
}
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n",
strerror(errno)))
goto disable_pmu;
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n",
strerror(errno)))
goto disable_pmu;
err = -1;
info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map");
if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map"))
goto disable_pmu;
status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map");
if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map"))
goto disable_pmu;
/* wait until child signal handler installed */
read(pipe_c2p[0], buf, 1);
/* trigger the bpf send_signal */
key = 0;
val = (((__u64)(SIGUSR1)) << 32) | pid;
bpf_map_update_elem(info_map_fd, &key, &val, 0);
/* notify child that bpf program can send_signal now */
write(pipe_p2c[1], buf, 1);
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
goto disable_pmu;
if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
err = -1;
goto disable_pmu;
}
err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
/* notify child safe to exit */
write(pipe_p2c[1], buf, 1);
disable_pmu:
close(pmu_fd);
close_prog:
bpf_object__close(obj);
prog_load_failure:
close(pipe_c2p[0]);
close(pipe_p2c[1]);
wait(NULL);
no_fork_done:
return err;
}
static int test_send_signal_tracepoint(void)
{
const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
struct perf_event_attr attr = {
.type = PERF_TYPE_TRACEPOINT,
.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN,
.sample_period = 1,
.wakeup_events = 1,
};
__u32 duration = 0;
int bytes, efd;
char buf[256];
efd = open(id_path, O_RDONLY, 0);
if (CHECK(efd < 0, "tracepoint",
"open syscalls/sys_enter_nanosleep/id failure: %s\n",
strerror(errno)))
return -1;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
"read syscalls/sys_enter_nanosleep/id failure: %s\n",
strerror(errno)))
return -1;
attr.config = strtol(buf, NULL, 0);
return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
}
static int test_send_signal_nmi(void)
{
struct perf_event_attr attr = {
.sample_freq = 50,
.freq = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event");
}
void test_send_signal(void)
{
int ret = 0;
ret |= test_send_signal_tracepoint();
ret |= test_send_signal_nmi();
if (!ret)
printf("test_send_signal:OK\n");
else
printf("test_send_signal:FAIL\n");
}

View File

@ -0,0 +1,92 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* BTF-to-C dumper tests for bitfield.
*
* Copyright (c) 2019 Facebook
*/
#include <stdbool.h>
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct bitfields_only_mixed_types {
* int a: 3;
* long int b: 2;
* _Bool c: 1;
* enum {
* A = 0,
* B = 1,
* } d: 1;
* short e: 5;
* int: 20;
* unsigned int f: 30;
*};
*
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct bitfields_only_mixed_types {
int a: 3;
long int b: 2;
bool c: 1; /* it's really a _Bool type */
enum {
A, /* A = 0, dumper is very explicit */
B, /* B = 1, same */
} d: 1;
short e: 5;
/* 20-bit padding here */
unsigned f: 30; /* this gets aligned on 4-byte boundary */
};
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct bitfield_mixed_with_others {
* char: 4;
* int a: 4;
* short b;
* long int c;
* long int d: 8;
* int e;
* int f;
*};
*
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct bitfield_mixed_with_others {
long: 4; /* char is enough as a backing field */
int a: 4;
/* 8-bit implicit padding */
short b; /* combined with previous bitfield */
/* 4 more bytes of implicit padding */
long c;
long d: 8;
/* 24 bits implicit padding */
int e; /* combined with previous bitfield */
int f;
/* 4 bytes of padding */
};
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct bitfield_flushed {
* int a: 4;
* long: 60;
* long int b: 16;
*};
*
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct bitfield_flushed {
int a: 4;
long: 0; /* flush until next natural alignment boundary */
long b: 16;
};
int f(struct {
struct bitfields_only_mixed_types _1;
struct bitfield_mixed_with_others _2;
struct bitfield_flushed _3;
} *_)
{
return 0;
}

View File

@ -0,0 +1,35 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* BTF-to-C dumper test for multi-dimensional array output.
*
* Copyright (c) 2019 Facebook
*/
/* ----- START-EXPECTED-OUTPUT ----- */
typedef int arr_t[2];
typedef int multiarr_t[3][4][5];
typedef int *ptr_arr_t[6];
typedef int *ptr_multiarr_t[7][8][9][10];
typedef int * (*fn_ptr_arr_t[11])();
typedef int * (*fn_ptr_multiarr_t[12][13])();
struct root_struct {
arr_t _1;
multiarr_t _2;
ptr_arr_t _3;
ptr_multiarr_t _4;
fn_ptr_arr_t _5;
fn_ptr_multiarr_t _6;
};
/* ------ END-EXPECTED-OUTPUT ------ */
int f(struct root_struct *s)
{
return 0;
}

View File

@ -0,0 +1,73 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* BTF-to-C dumper test validating no name versioning happens between
* independent C namespaces (struct/union/enum vs typedef/enum values).
*
* Copyright (c) 2019 Facebook
*/
/* ----- START-EXPECTED-OUTPUT ----- */
struct S {
int S;
int U;
};
typedef struct S S;
union U {
int S;
int U;
};
typedef union U U;
enum E {
V = 0,
};
typedef enum E E;
struct A {};
union B {};
enum C {
A = 1,
B = 2,
C = 3,
};
struct X {};
union Y {};
enum Z;
typedef int X;
typedef int Y;
typedef int Z;
/*------ END-EXPECTED-OUTPUT ------ */
int f(struct {
struct S _1;
S _2;
union U _3;
U _4;
enum E _5;
E _6;
struct A a;
union B b;
enum C c;
struct X x;
union Y y;
enum Z *z;
X xx;
Y yy;
Z zz;
} *_)
{
return 0;
}

View File

@ -0,0 +1,63 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* BTF-to-C dumper test for topological sorting of dependent structs.
*
* Copyright (c) 2019 Facebook
*/
/* ----- START-EXPECTED-OUTPUT ----- */
struct s1 {};
struct s3;
struct s4;
struct s2 {
struct s2 *s2;
struct s3 *s3;
struct s4 *s4;
};
struct s3 {
struct s1 s1;
struct s2 s2;
};
struct s4 {
struct s1 s1;
struct s3 s3;
};
struct list_head {
struct list_head *next;
struct list_head *prev;
};
struct hlist_node {
struct hlist_node *next;
struct hlist_node **pprev;
};
struct hlist_head {
struct hlist_node *first;
};
struct callback_head {
struct callback_head *next;
void (*func)(struct callback_head *);
};
struct root_struct {
struct s4 s4;
struct list_head l;
struct hlist_node n;
struct hlist_head h;
struct callback_head cb;
};
/*------ END-EXPECTED-OUTPUT ------ */
int f(struct root_struct *root)
{
return 0;
}

View File

@ -0,0 +1,75 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* BTF-to-C dumper tests for struct packing determination.
*
* Copyright (c) 2019 Facebook
*/
/* ----- START-EXPECTED-OUTPUT ----- */
struct packed_trailing_space {
int a;
short b;
} __attribute__((packed));
struct non_packed_trailing_space {
int a;
short b;
};
struct packed_fields {
short a;
int b;
} __attribute__((packed));
struct non_packed_fields {
short a;
int b;
};
struct nested_packed {
char: 4;
int a: 4;
long int b;
struct {
char c;
int d;
} __attribute__((packed)) e;
} __attribute__((packed));
union union_is_never_packed {
int a: 4;
char b;
char c: 1;
};
union union_does_not_need_packing {
struct {
long int a;
int b;
} __attribute__((packed));
int c;
};
union jump_code_union {
char code[5];
struct {
char jump;
int offset;
} __attribute__((packed));
};
/*------ END-EXPECTED-OUTPUT ------ */
int f(struct {
struct packed_trailing_space _1;
struct non_packed_trailing_space _2;
struct packed_fields _3;
struct non_packed_fields _4;
struct nested_packed _5;
union union_is_never_packed _6;
union union_does_not_need_packing _7;
union jump_code_union _8;
} *_)
{
return 0;
}

View File

@ -0,0 +1,111 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* BTF-to-C dumper tests for implicit and explicit padding between fields and
* at the end of a struct.
*
* Copyright (c) 2019 Facebook
*/
/* ----- START-EXPECTED-OUTPUT ----- */
struct padded_implicitly {
int a;
long int b;
char c;
};
/* ------ END-EXPECTED-OUTPUT ------ */
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct padded_explicitly {
* int a;
* int: 32;
* int b;
*};
*
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct padded_explicitly {
int a;
int: 1; /* algo will explicitly pad with full 32 bits here */
int b;
};
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct padded_a_lot {
* int a;
* long: 32;
* long: 64;
* long: 64;
* int b;
*};
*
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct padded_a_lot {
int a;
/* 32 bit of implicit padding here, which algo will make explicit */
long: 64;
long: 64;
int b;
};
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct padded_cache_line {
* int a;
* long: 32;
* long: 64;
* long: 64;
* long: 64;
* int b;
*};
*
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct padded_cache_line {
int a;
int b __attribute__((aligned(32)));
};
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct zone_padding {
* char x[0];
*};
*
*struct zone {
* int a;
* short b;
* short: 16;
* struct zone_padding __pad__;
*};
*
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct zone_padding {
char x[0];
} __attribute__((__aligned__(8)));
struct zone {
int a;
short b;
short: 16;
struct zone_padding __pad__;
};
int f(struct {
struct padded_implicitly _1;
struct padded_explicitly _2;
struct padded_a_lot _3;
struct padded_cache_line _4;
struct zone _5;
} *_)
{
return 0;
}

View File

@ -0,0 +1,229 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/*
* BTF-to-C dumper test for majority of C syntax quirks.
*
* Copyright (c) 2019 Facebook
*/
/* ----- START-EXPECTED-OUTPUT ----- */
enum e1 {
A = 0,
B = 1,
};
enum e2 {
C = 100,
D = -100,
E = 0,
};
typedef enum e2 e2_t;
typedef enum {
F = 0,
G = 1,
H = 2,
} e3_t;
typedef int int_t;
typedef volatile const int * volatile const crazy_ptr_t;
typedef int *****we_need_to_go_deeper_ptr_t;
typedef volatile const we_need_to_go_deeper_ptr_t * restrict * volatile * const * restrict volatile * restrict const * volatile const * restrict volatile const how_about_this_ptr_t;
typedef int *ptr_arr_t[10];
typedef void (*fn_ptr1_t)(int);
typedef void (*printf_fn_t)(const char *, ...);
/* ------ END-EXPECTED-OUTPUT ------ */
/*
* While previous function pointers are pretty trivial (C-syntax-level
* trivial), the following are deciphered here for future generations:
*
* - `fn_ptr2_t`: function, taking anonymous struct as a first arg and pointer
* to a function, that takes int and returns int, as a second arg; returning
* a pointer to a const pointer to a char. Equivalent to:
* typedef struct { int a; } s_t;
* typedef int (*fn_t)(int);
* typedef char * const * (*fn_ptr2_t)(s_t, fn_t);
*
* - `fn_complext_t`: pointer to a function returning struct and accepting
* union and struct. All structs and enum are anonymous and defined inline.
*
* - `signal_t: pointer to a function accepting a pointer to a function as an
* argument and returning pointer to a function as a result. Sane equivalent:
* typedef void (*signal_handler_t)(int);
* typedef signal_handler_t (*signal_ptr_t)(int, signal_handler_t);
*
* - fn_ptr_arr1_t: array of pointers to a function accepting pointer to
* a pointer to an int and returning pointer to a char. Easy.
*
* - fn_ptr_arr2_t: array of const pointers to a function taking no arguments
* and returning a const pointer to a function, that takes pointer to a
* `int -> char *` function and returns pointer to a char. Equivalent:
* typedef char * (*fn_input_t)(int);
* typedef char * (*fn_output_outer_t)(fn_input_t);
* typedef const fn_output_outer_t (* fn_output_inner_t)();
* typedef const fn_output_inner_t fn_ptr_arr2_t[5];
*/
/* ----- START-EXPECTED-OUTPUT ----- */
typedef char * const * (*fn_ptr2_t)(struct {
int a;
}, int (*)(int));
typedef struct {
int a;
void (*b)(int, struct {
int c;
}, union {
char d;
int e[5];
});
} (*fn_complex_t)(union {
void *f;
char g[16];
}, struct {
int h;
});
typedef void (* (*signal_t)(int, void (*)(int)))(int);
typedef char * (*fn_ptr_arr1_t[10])(int **);
typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
struct struct_w_typedefs {
int_t a;
crazy_ptr_t b;
we_need_to_go_deeper_ptr_t c;
how_about_this_ptr_t d;
ptr_arr_t e;
fn_ptr1_t f;
printf_fn_t g;
fn_ptr2_t h;
fn_complex_t i;
signal_t j;
fn_ptr_arr1_t k;
fn_ptr_arr2_t l;
};
typedef struct {
int x;
int y;
int z;
} anon_struct_t;
struct struct_fwd;
typedef struct struct_fwd struct_fwd_t;
typedef struct struct_fwd *struct_fwd_ptr_t;
union union_fwd;
typedef union union_fwd union_fwd_t;
typedef union union_fwd *union_fwd_ptr_t;
struct struct_empty {};
struct struct_simple {
int a;
char b;
const int_t *p;
struct struct_empty s;
enum e2 e;
enum {
ANON_VAL1 = 1,
ANON_VAL2 = 2,
} f;
int arr1[13];
enum e2 arr2[5];
};
union union_empty {};
union union_simple {
void *ptr;
int num;
int_t num2;
union union_empty u;
};
struct struct_in_struct {
struct struct_simple simple;
union union_simple also_simple;
struct {
int a;
} not_so_hard_as_well;
union {
int b;
int c;
} anon_union_is_good;
struct {
int d;
int e;
};
union {
int f;
int g;
};
};
struct struct_with_embedded_stuff {
int a;
struct {
int b;
struct {
struct struct_with_embedded_stuff *c;
const char *d;
} e;
union {
volatile long int f;
void * restrict g;
};
};
union {
const int_t *h;
void (*i)(char, int, void *);
} j;
enum {
K = 100,
L = 200,
} m;
char n[16];
struct {
char o;
int p;
void (*q)(int);
} r[5];
struct struct_in_struct s[10];
int t[11];
};
struct root_struct {
enum e1 _1;
enum e2 _2;
e2_t _2_1;
e3_t _2_2;
struct struct_w_typedefs _3;
anon_struct_t _7;
struct struct_fwd *_8;
struct_fwd_t *_9;
struct_fwd_ptr_t _10;
union union_fwd *_11;
union_fwd_t *_12;
union_fwd_ptr_t _13;
struct struct_with_embedded_stuff _14;
};
/* ------ END-EXPECTED-OUTPUT ------ */
int f(struct root_struct *s)
{
return 0;
}

View File

@ -0,0 +1,268 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
#define TASK_COMM_LEN 16
typedef struct {
int PyThreadState_frame;
int PyThreadState_thread;
int PyFrameObject_back;
int PyFrameObject_code;
int PyFrameObject_lineno;
int PyCodeObject_filename;
int PyCodeObject_name;
int String_data;
int String_size;
} OffsetConfig;
typedef struct {
uintptr_t current_state_addr;
uintptr_t tls_key_addr;
OffsetConfig offsets;
bool use_tls;
} PidData;
typedef struct {
uint32_t success;
} Stats;
typedef struct {
char name[FUNCTION_NAME_LEN];
char file[FILE_NAME_LEN];
} Symbol;
typedef struct {
uint32_t pid;
uint32_t tid;
char comm[TASK_COMM_LEN];
int32_t kernel_stack_id;
int32_t user_stack_id;
bool thread_current;
bool pthread_match;
bool stack_complete;
int16_t stack_len;
int32_t stack[STACK_MAX_LEN];
int has_meta;
int metadata;
char dummy_safeguard;
} Event;
struct bpf_elf_map {
__u32 type;
__u32 size_key;
__u32 size_value;
__u32 max_elem;
__u32 flags;
};
typedef int pid_t;
typedef struct {
void* f_back; // PyFrameObject.f_back, previous frame
void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
void* co_filename; // PyCodeObject.co_filename
void* co_name; // PyCodeObject.co_name
} FrameData;
static inline __attribute__((__always_inline__)) void*
get_thread_state(void* tls_base, PidData* pidData)
{
void* thread_state;
int key;
bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
bpf_probe_read(&thread_state, sizeof(thread_state),
tls_base + 0x310 + key * 0x10 + 0x08);
return thread_state;
}
static inline __attribute__((__always_inline__)) bool
get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol)
{
// read data from PyFrameObject
bpf_probe_read(&frame->f_back,
sizeof(frame->f_back),
frame_ptr + pidData->offsets.PyFrameObject_back);
bpf_probe_read(&frame->f_code,
sizeof(frame->f_code),
frame_ptr + pidData->offsets.PyFrameObject_code);
// read data from PyCodeObject
if (!frame->f_code)
return false;
bpf_probe_read(&frame->co_filename,
sizeof(frame->co_filename),
frame->f_code + pidData->offsets.PyCodeObject_filename);
bpf_probe_read(&frame->co_name,
sizeof(frame->co_name),
frame->f_code + pidData->offsets.PyCodeObject_name);
// read actual names into symbol
if (frame->co_filename)
bpf_probe_read_str(&symbol->file,
sizeof(symbol->file),
frame->co_filename + pidData->offsets.String_data);
if (frame->co_name)
bpf_probe_read_str(&symbol->name,
sizeof(symbol->name),
frame->co_name + pidData->offsets.String_data);
return true;
}
struct bpf_elf_map SEC("maps") pidmap = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(int),
.size_value = sizeof(PidData),
.max_elem = 1,
};
struct bpf_elf_map SEC("maps") eventmap = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(int),
.size_value = sizeof(Event),
.max_elem = 1,
};
struct bpf_elf_map SEC("maps") symbolmap = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(Symbol),
.size_value = sizeof(int),
.max_elem = 1,
};
struct bpf_elf_map SEC("maps") statsmap = {
.type = BPF_MAP_TYPE_ARRAY,
.size_key = sizeof(Stats),
.size_value = sizeof(int),
.max_elem = 1,
};
struct bpf_elf_map SEC("maps") perfmap = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.size_key = sizeof(int),
.size_value = sizeof(int),
.max_elem = 32,
};
struct bpf_elf_map SEC("maps") stackmap = {
.type = BPF_MAP_TYPE_STACK_TRACE,
.size_key = sizeof(int),
.size_value = sizeof(long long) * 127,
.max_elem = 1000,
};
static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx)
{
uint64_t pid_tgid = bpf_get_current_pid_tgid();
pid_t pid = (pid_t)(pid_tgid >> 32);
PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
if (!pidData)
return 0;
int zero = 0;
Event* event = bpf_map_lookup_elem(&eventmap, &zero);
if (!event)
return 0;
event->pid = pid;
event->tid = (pid_t)pid_tgid;
bpf_get_current_comm(&event->comm, sizeof(event->comm));
event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
void* thread_state_current = (void*)0;
bpf_probe_read(&thread_state_current,
sizeof(thread_state_current),
(void*)(long)pidData->current_state_addr);
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
void* tls_base = (void*)task;
void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
: thread_state_current;
event->thread_current = thread_state == thread_state_current;
if (pidData->use_tls) {
uint64_t pthread_created;
uint64_t pthread_self;
bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
bpf_probe_read(&pthread_created,
sizeof(pthread_created),
thread_state + pidData->offsets.PyThreadState_thread);
event->pthread_match = pthread_created == pthread_self;
} else {
event->pthread_match = 1;
}
if (event->pthread_match || !pidData->use_tls) {
void* frame_ptr;
FrameData frame;
Symbol sym = {};
int cur_cpu = bpf_get_smp_processor_id();
bpf_probe_read(&frame_ptr,
sizeof(frame_ptr),
thread_state + pidData->offsets.PyThreadState_frame);
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
return 0;
#pragma unroll
/* Unwind python stack */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
if (!symbol_id) {
bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
if (!symbol_id)
return 0;
}
if (*symbol_id == new_symbol_id)
(*symbol_counter)++;
event->stack[i] = *symbol_id;
event->stack_len = i + 1;
frame_ptr = frame.f_back;
}
}
event->stack_complete = frame_ptr == NULL;
} else {
event->stack_complete = 1;
}
Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
if (stats)
stats->success++;
event->has_meta = 0;
bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
return 0;
}
SEC("raw_tracepoint/kfree_skb")
int on_event(struct pt_regs* ctx)
{
int i, ret = 0;
ret |= __on_event(ctx);
ret |= __on_event(ctx);
ret |= __on_event(ctx);
ret |= __on_event(ctx);
ret |= __on_event(ctx);
return ret;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 100
#include "pyperf.h"

View File

@ -0,0 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 180
#include "pyperf.h"

View File

@ -0,0 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 50
#include "pyperf.h"

Some files were not shown because too many files have changed in this diff Show More