mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-27 14:14:24 +08:00
Networking fixes for 5.15-rc4, including fixes from mac80211, netfilter
and bpf. Current release - regressions: - bpf, cgroup: assign cgroup in cgroup_sk_alloc when called from interrupt - mdio: revert mechanical patches which broke handling of optional resources - dev_addr_list: prevent address duplication Previous releases - regressions: - sctp: break out if skb_header_pointer returns NULL in sctp_rcv_ootb (NULL deref) - Revert "mac80211: do not use low data rates for data frames with no ack flag", fixing broadcast transmissions - mac80211: fix use-after-free in CCMP/GCMP RX - netfilter: include zone id in tuple hash again, minimize collisions - netfilter: nf_tables: unlink table before deleting it (race -> UAF) - netfilter: log: work around missing softdep backend module - mptcp: don't return sockets in foreign netns - sched: flower: protect fl_walk() with rcu (race -> UAF) - ixgbe: fix NULL pointer dereference in ixgbe_xdp_setup - smsc95xx: fix stalled rx after link change - enetc: fix the incorrect clearing of IF_MODE bits - ipv4: fix rtnexthop len when RTA_FLOW is present - dsa: mv88e6xxx: 6161: use correct MAX MTU config method for this SKU - e100: fix length calculation & buffer overrun in ethtool::get_regs Previous releases - always broken: - mac80211: fix using stale frag_tail skb pointer in A-MSDU tx - mac80211: drop frames from invalid MAC address in ad-hoc mode - af_unix: fix races in sk_peer_pid and sk_peer_cred accesses (race -> UAF) - bpf, x86: Fix bpf mapping of atomic fetch implementation - bpf: handle return value of BPF_PROG_TYPE_STRUCT_OPS prog - netfilter: ip6_tables: zero-initialize fragment offset - mhi: fix error path in mhi_net_newlink - af_unix: return errno instead of NULL in unix_create1() when over the fs.file-max limit Misc: - bpf: exempt CAP_BPF from checks against bpf_jit_limit - netfilter: conntrack: make max chain length random, prevent guessing buckets by attackers - netfilter: nf_nat_masquerade: make async masq_inet6_event handling generic, defer conntrack walk to work queue (prevent hogging RTNL lock) Signed-off-by: Jakub Kicinski <kuba@kernel.org> -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE6jPA+I1ugmIBA4hXMUZtbf5SIrsFAmFV5KYACgkQMUZtbf5S Irs3cRAAqNsgaQXSVOXcPKsndeDDKHIv1Ktes8aOP9tgEXZw5rpsbct7g9Yxc0os Oolyt6HThjWr3u1/e3HHJO9I5Klr/J8eQReoRZnKW+6TYZflmmzfuf8u1nx6SLP/ tliz5y8wKbp8BqqNuTMdRpm+R1QQcNkXTeruUoR1PgREcY4J7bC2BRrqeZhBGHSR Z5yPOIietFN3nITxNwbe4AYJXlesMc6QCWhBtXjMPGQ4Zc4/sjDNfqi7eHJi2H2y kW2dHeXG86gnlgFllOBBWP85ptxynyxoNQJuhrxgC9T+/FpSVST7cwKbtmkwDI3M 5WGmeE6B3yfF8iOQuR8fbKQmsnLgQlYhjpbbhgN0GxzkyI7RpGYOFroX0Pht4IVZ mwprDOtvoLs4UeDjULRMB0JZfRN75PCtVlhfUkhhJxXGCCmnhGYaxG/pE+6OQWlr +n8RXYYMoOzPaHIYTS9NGSGqT0r32IUy/W5Yfv3rEzSeehy2/fxzGr2fOyBGs+q7 xrnqpsOnM8cODDwGMy3TclCI4Dd72WoHNCHPhA/bk/ZMjHpBd4CSEZPm8IROY3Ja g1t68cncgL8fB7TSD9WLFgYu67Lg5j0gC/BHOzUQDQMX5IbhOq/fj1xRy5Lc6SYp mqW1f7LdnixBe4W61VjDAYq5jJRqrwEWedx+rvV/ONLvr77KULk= =rSti -----END PGP SIGNATURE----- Merge tag 'net-5.15-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net Pull networking fixes from Jakub Kicinski: "Networking fixes, including fixes from mac80211, netfilter and bpf. Current release - regressions: - bpf, cgroup: assign cgroup in cgroup_sk_alloc when called from interrupt - mdio: revert mechanical patches which broke handling of optional resources - dev_addr_list: prevent address duplication Previous releases - regressions: - sctp: break out if skb_header_pointer returns NULL in sctp_rcv_ootb (NULL deref) - Revert "mac80211: do not use low data rates for data frames with no ack flag", fixing broadcast transmissions - mac80211: fix use-after-free in CCMP/GCMP RX - netfilter: include zone id in tuple hash again, minimize collisions - netfilter: nf_tables: unlink table before deleting it (race -> UAF) - netfilter: log: work around missing softdep backend module - mptcp: don't return sockets in foreign netns - sched: flower: protect fl_walk() with rcu (race -> UAF) - ixgbe: fix NULL pointer dereference in ixgbe_xdp_setup - smsc95xx: fix stalled rx after link change - enetc: fix the incorrect clearing of IF_MODE bits - ipv4: fix rtnexthop len when RTA_FLOW is present - dsa: mv88e6xxx: 6161: use correct MAX MTU config method for this SKU - e100: fix length calculation & buffer overrun in ethtool::get_regs Previous releases - always broken: - mac80211: fix using stale frag_tail skb pointer in A-MSDU tx - mac80211: drop frames from invalid MAC address in ad-hoc mode - af_unix: fix races in sk_peer_pid and sk_peer_cred accesses (race -> UAF) - bpf, x86: Fix bpf mapping of atomic fetch implementation - bpf: handle return value of BPF_PROG_TYPE_STRUCT_OPS prog - netfilter: ip6_tables: zero-initialize fragment offset - mhi: fix error path in mhi_net_newlink - af_unix: return errno instead of NULL in unix_create1() when over the fs.file-max limit Misc: - bpf: exempt CAP_BPF from checks against bpf_jit_limit - netfilter: conntrack: make max chain length random, prevent guessing buckets by attackers - netfilter: nf_nat_masquerade: make async masq_inet6_event handling generic, defer conntrack walk to work queue (prevent hogging RTNL lock)" * tag 'net-5.15-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (77 commits) af_unix: fix races in sk_peer_pid and sk_peer_cred accesses net: stmmac: fix EEE init issue when paired with EEE capable PHYs net: dev_addr_list: handle first address in __hw_addr_add_ex net: sched: flower: protect fl_walk() with rcu net: introduce and use lock_sock_fast_nested() net: phy: bcm7xxx: Fixed indirect MMD operations net: hns3: disable firmware compatible features when uninstall PF net: hns3: fix always enable rx vlan filter problem after selftest net: hns3: PF enable promisc for VF when mac table is overflow net: hns3: fix show wrong state when add existing uc mac address net: hns3: fix mixed flag HCLGE_FLAG_MQPRIO_ENABLE and HCLGE_FLAG_DCB_ENABLE net: hns3: don't rollback when destroy mqprio fail net: hns3: remove tc enable checking net: hns3: do not allow call hns3_nic_net_open repeatedly ixgbe: Fix NULL pointer dereference in ixgbe_xdp_setup net: bridge: mcast: Associate the seqcount with its protecting lock. net: mdio-ipq4019: Fix the error for an optional regs resource net: hns3: fix hclge_dbg_dump_tm_pg() stack usage net: mdio: mscc-miim: Fix the mdio controller af_unix: Return errno instead of NULL in unix_create1(). ...
This commit is contained in:
commit
4de593fb96
@ -3384,9 +3384,11 @@ F: Documentation/networking/filter.rst
|
||||
F: Documentation/userspace-api/ebpf/
|
||||
F: arch/*/net/*
|
||||
F: include/linux/bpf*
|
||||
F: include/linux/btf*
|
||||
F: include/linux/filter.h
|
||||
F: include/trace/events/xdp.h
|
||||
F: include/uapi/linux/bpf*
|
||||
F: include/uapi/linux/btf*
|
||||
F: include/uapi/linux/filter.h
|
||||
F: kernel/bpf/
|
||||
F: kernel/trace/bpf_trace.c
|
||||
|
@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx)
|
||||
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
|
||||
func##_positive)
|
||||
|
||||
static bool is_bad_offset(int b_off)
|
||||
{
|
||||
return b_off > 0x1ffff || b_off < -0x20000;
|
||||
}
|
||||
|
||||
static int build_body(struct jit_ctx *ctx)
|
||||
{
|
||||
const struct bpf_prog *prog = ctx->skf;
|
||||
@ -728,7 +733,10 @@ load_common:
|
||||
/* Load return register on DS for failures */
|
||||
emit_reg_move(r_ret, r_zero, ctx);
|
||||
/* Return with error */
|
||||
emit_b(b_imm(prog->len, ctx), ctx);
|
||||
b_off = b_imm(prog->len, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_b(b_off, ctx);
|
||||
emit_nop(ctx);
|
||||
break;
|
||||
case BPF_LD | BPF_W | BPF_IND:
|
||||
@ -775,8 +783,10 @@ load_ind:
|
||||
emit_jalr(MIPS_R_RA, r_s0, ctx);
|
||||
emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
|
||||
/* Check the error value */
|
||||
emit_bcond(MIPS_COND_NE, r_ret, 0,
|
||||
b_imm(prog->len, ctx), ctx);
|
||||
b_off = b_imm(prog->len, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx);
|
||||
emit_reg_move(r_ret, r_zero, ctx);
|
||||
/* We are good */
|
||||
/* X <- P[1:K] & 0xf */
|
||||
@ -855,8 +865,10 @@ load_ind:
|
||||
/* A /= X */
|
||||
ctx->flags |= SEEN_X | SEEN_A;
|
||||
/* Check if r_X is zero */
|
||||
emit_bcond(MIPS_COND_EQ, r_X, r_zero,
|
||||
b_imm(prog->len, ctx), ctx);
|
||||
b_off = b_imm(prog->len, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
|
||||
emit_load_imm(r_ret, 0, ctx); /* delay slot */
|
||||
emit_div(r_A, r_X, ctx);
|
||||
break;
|
||||
@ -864,8 +876,10 @@ load_ind:
|
||||
/* A %= X */
|
||||
ctx->flags |= SEEN_X | SEEN_A;
|
||||
/* Check if r_X is zero */
|
||||
emit_bcond(MIPS_COND_EQ, r_X, r_zero,
|
||||
b_imm(prog->len, ctx), ctx);
|
||||
b_off = b_imm(prog->len, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
|
||||
emit_load_imm(r_ret, 0, ctx); /* delay slot */
|
||||
emit_mod(r_A, r_X, ctx);
|
||||
break;
|
||||
@ -926,7 +940,10 @@ load_ind:
|
||||
break;
|
||||
case BPF_JMP | BPF_JA:
|
||||
/* pc += K */
|
||||
emit_b(b_imm(i + k + 1, ctx), ctx);
|
||||
b_off = b_imm(i + k + 1, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_b(b_off, ctx);
|
||||
emit_nop(ctx);
|
||||
break;
|
||||
case BPF_JMP | BPF_JEQ | BPF_K:
|
||||
@ -1056,12 +1073,16 @@ jmp_cmp:
|
||||
break;
|
||||
case BPF_RET | BPF_A:
|
||||
ctx->flags |= SEEN_A;
|
||||
if (i != prog->len - 1)
|
||||
if (i != prog->len - 1) {
|
||||
/*
|
||||
* If this is not the last instruction
|
||||
* then jump to the epilogue
|
||||
*/
|
||||
emit_b(b_imm(prog->len, ctx), ctx);
|
||||
b_off = b_imm(prog->len, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_b(b_off, ctx);
|
||||
}
|
||||
emit_reg_move(r_ret, r_A, ctx); /* delay slot */
|
||||
break;
|
||||
case BPF_RET | BPF_K:
|
||||
@ -1075,7 +1096,10 @@ jmp_cmp:
|
||||
* If this is not the last instruction
|
||||
* then jump to the epilogue
|
||||
*/
|
||||
emit_b(b_imm(prog->len, ctx), ctx);
|
||||
b_off = b_imm(prog->len, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_b(b_off, ctx);
|
||||
emit_nop(ctx);
|
||||
}
|
||||
break;
|
||||
@ -1133,8 +1157,10 @@ jmp_cmp:
|
||||
/* Load *dev pointer */
|
||||
emit_load_ptr(r_s0, r_skb, off, ctx);
|
||||
/* error (0) in the delay slot */
|
||||
emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
|
||||
b_imm(prog->len, ctx), ctx);
|
||||
b_off = b_imm(prog->len, ctx);
|
||||
if (is_bad_offset(b_off))
|
||||
return -E2BIG;
|
||||
emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx);
|
||||
emit_reg_move(r_ret, r_zero, ctx);
|
||||
if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
|
||||
BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
|
||||
@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp)
|
||||
|
||||
/* Generate the actual JIT code */
|
||||
build_prologue(&ctx);
|
||||
build_body(&ctx);
|
||||
if (build_body(&ctx)) {
|
||||
module_memfree(ctx.target);
|
||||
goto out;
|
||||
}
|
||||
build_epilogue(&ctx);
|
||||
|
||||
/* Update the icache */
|
||||
|
@ -1341,9 +1341,10 @@ st: if (is_imm8(insn->off))
|
||||
if (insn->imm == (BPF_AND | BPF_FETCH) ||
|
||||
insn->imm == (BPF_OR | BPF_FETCH) ||
|
||||
insn->imm == (BPF_XOR | BPF_FETCH)) {
|
||||
u8 *branch_target;
|
||||
bool is64 = BPF_SIZE(insn->code) == BPF_DW;
|
||||
u32 real_src_reg = src_reg;
|
||||
u32 real_dst_reg = dst_reg;
|
||||
u8 *branch_target;
|
||||
|
||||
/*
|
||||
* Can't be implemented with a single x86 insn.
|
||||
@ -1354,11 +1355,13 @@ st: if (is_imm8(insn->off))
|
||||
emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
|
||||
if (src_reg == BPF_REG_0)
|
||||
real_src_reg = BPF_REG_AX;
|
||||
if (dst_reg == BPF_REG_0)
|
||||
real_dst_reg = BPF_REG_AX;
|
||||
|
||||
branch_target = prog;
|
||||
/* Load old value */
|
||||
emit_ldx(&prog, BPF_SIZE(insn->code),
|
||||
BPF_REG_0, dst_reg, insn->off);
|
||||
BPF_REG_0, real_dst_reg, insn->off);
|
||||
/*
|
||||
* Perform the (commutative) operation locally,
|
||||
* put the result in the AUX_REG.
|
||||
@ -1369,7 +1372,8 @@ st: if (is_imm8(insn->off))
|
||||
add_2reg(0xC0, AUX_REG, real_src_reg));
|
||||
/* Attempt to swap in new value */
|
||||
err = emit_atomic(&prog, BPF_CMPXCHG,
|
||||
dst_reg, AUX_REG, insn->off,
|
||||
real_dst_reg, AUX_REG,
|
||||
insn->off,
|
||||
BPF_SIZE(insn->code));
|
||||
if (WARN_ON(err))
|
||||
return err;
|
||||
@ -1383,11 +1387,10 @@ st: if (is_imm8(insn->off))
|
||||
/* Restore R0 after clobbering RAX */
|
||||
emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
|
||||
insn->off, BPF_SIZE(insn->code));
|
||||
insn->off, BPF_SIZE(insn->code));
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
@ -1744,7 +1747,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
|
||||
}
|
||||
|
||||
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
|
||||
struct bpf_prog *p, int stack_size, bool mod_ret)
|
||||
struct bpf_prog *p, int stack_size, bool save_ret)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
u8 *jmp_insn;
|
||||
@ -1777,11 +1780,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
|
||||
if (emit_call(&prog, p->bpf_func, prog))
|
||||
return -EINVAL;
|
||||
|
||||
/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
|
||||
/*
|
||||
* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
|
||||
* of the previous call which is then passed on the stack to
|
||||
* the next BPF program.
|
||||
*
|
||||
* BPF_TRAMP_FENTRY trampoline may need to return the return
|
||||
* value of BPF_PROG_TYPE_STRUCT_OPS prog.
|
||||
*/
|
||||
if (mod_ret)
|
||||
if (save_ret)
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
|
||||
|
||||
/* replace 2 nops with JE insn, since jmp target is known */
|
||||
@ -1828,13 +1835,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
|
||||
}
|
||||
|
||||
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
|
||||
struct bpf_tramp_progs *tp, int stack_size)
|
||||
struct bpf_tramp_progs *tp, int stack_size,
|
||||
bool save_ret)
|
||||
{
|
||||
int i;
|
||||
u8 *prog = *pprog;
|
||||
|
||||
for (i = 0; i < tp->nr_progs; i++) {
|
||||
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
|
||||
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
|
||||
save_ret))
|
||||
return -EINVAL;
|
||||
}
|
||||
*pprog = prog;
|
||||
@ -1877,6 +1886,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_valid_bpf_tramp_flags(unsigned int flags)
|
||||
{
|
||||
if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
|
||||
(flags & BPF_TRAMP_F_SKIP_FRAME))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
|
||||
* and it must be used alone.
|
||||
*/
|
||||
if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
|
||||
(flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Example:
|
||||
* __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
|
||||
* its 'struct btf_func_model' will be nr_args=2
|
||||
@ -1949,17 +1975,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
|
||||
u8 **branches = NULL;
|
||||
u8 *prog;
|
||||
bool save_ret;
|
||||
|
||||
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
|
||||
if (nr_args > 6)
|
||||
return -ENOTSUPP;
|
||||
|
||||
if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
|
||||
(flags & BPF_TRAMP_F_SKIP_FRAME))
|
||||
if (!is_valid_bpf_tramp_flags(flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG)
|
||||
stack_size += 8; /* room for return value of orig_call */
|
||||
/* room for return value of orig_call or fentry prog */
|
||||
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
|
||||
if (save_ret)
|
||||
stack_size += 8;
|
||||
|
||||
if (flags & BPF_TRAMP_F_IP_ARG)
|
||||
stack_size += 8; /* room for IP address argument */
|
||||
@ -2005,7 +2033,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
}
|
||||
|
||||
if (fentry->nr_progs)
|
||||
if (invoke_bpf(m, &prog, fentry, stack_size))
|
||||
if (invoke_bpf(m, &prog, fentry, stack_size,
|
||||
flags & BPF_TRAMP_F_RET_FENTRY_RET))
|
||||
return -EINVAL;
|
||||
|
||||
if (fmod_ret->nr_progs) {
|
||||
@ -2052,7 +2081,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
}
|
||||
|
||||
if (fexit->nr_progs)
|
||||
if (invoke_bpf(m, &prog, fexit, stack_size)) {
|
||||
if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
|
||||
ret = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -2072,9 +2101,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
ret = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
/* restore original return value back into RAX */
|
||||
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
|
||||
}
|
||||
/* restore return value of orig_call or fentry prog back into RAX */
|
||||
if (save_ret)
|
||||
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
|
||||
|
||||
EMIT1(0x5B); /* pop rbx */
|
||||
EMIT1(0xC9); /* leave */
|
||||
|
@ -2834,8 +2834,8 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Port Control 2: don't force a good FCS, set the maximum frame size to
|
||||
* 10240 bytes, disable 802.1q tags checking, don't discard tagged or
|
||||
/* Port Control 2: don't force a good FCS, set the MTU size to
|
||||
* 10222 bytes, disable 802.1q tags checking, don't discard tagged or
|
||||
* untagged frames on this port, do a destination address lookup on all
|
||||
* received packets as usual, disable ARP mirroring and don't send a
|
||||
* copy of all transmitted/received frames on this port to the CPU.
|
||||
@ -2854,7 +2854,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
|
||||
return err;
|
||||
|
||||
if (chip->info->ops->port_set_jumbo_size) {
|
||||
err = chip->info->ops->port_set_jumbo_size(chip, port, 10240);
|
||||
err = chip->info->ops->port_set_jumbo_size(chip, port, 10218);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@ -2944,10 +2944,10 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port)
|
||||
struct mv88e6xxx_chip *chip = ds->priv;
|
||||
|
||||
if (chip->info->ops->port_set_jumbo_size)
|
||||
return 10240;
|
||||
return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
|
||||
else if (chip->info->ops->set_max_frame_size)
|
||||
return 1632;
|
||||
return 1522;
|
||||
return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
|
||||
return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
|
||||
}
|
||||
|
||||
static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
|
||||
@ -2955,6 +2955,9 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
|
||||
struct mv88e6xxx_chip *chip = ds->priv;
|
||||
int ret = 0;
|
||||
|
||||
if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
|
||||
new_mtu += EDSA_HLEN;
|
||||
|
||||
mv88e6xxx_reg_lock(chip);
|
||||
if (chip->info->ops->port_set_jumbo_size)
|
||||
ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
|
||||
@ -3725,7 +3728,6 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
|
||||
.port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
|
||||
.port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
|
||||
.port_set_ether_type = mv88e6351_port_set_ether_type,
|
||||
.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
|
||||
.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
|
||||
.port_pause_limit = mv88e6097_port_pause_limit,
|
||||
.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
|
||||
@ -3750,6 +3752,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
|
||||
.avb_ops = &mv88e6165_avb_ops,
|
||||
.ptp_ops = &mv88e6165_ptp_ops,
|
||||
.phylink_validate = mv88e6185_phylink_validate,
|
||||
.set_max_frame_size = mv88e6185_g1_set_max_frame_size,
|
||||
};
|
||||
|
||||
static const struct mv88e6xxx_ops mv88e6165_ops = {
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/timecounter.h>
|
||||
#include <net/dsa.h>
|
||||
|
||||
#define EDSA_HLEN 8
|
||||
#define MV88E6XXX_N_FID 4096
|
||||
|
||||
/* PVT limits for 4-bit port and 5-bit switch */
|
||||
|
@ -232,6 +232,8 @@ int mv88e6185_g1_set_max_frame_size(struct mv88e6xxx_chip *chip, int mtu)
|
||||
u16 val;
|
||||
int err;
|
||||
|
||||
mtu += ETH_HLEN + ETH_FCS_LEN;
|
||||
|
||||
err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &val);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -1277,6 +1277,8 @@ int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port,
|
||||
u16 reg;
|
||||
int err;
|
||||
|
||||
size += VLAN_ETH_HLEN + ETH_FCS_LEN;
|
||||
|
||||
err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL2, ®);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -192,6 +192,9 @@ static int bgmac_probe(struct platform_device *pdev)
|
||||
bgmac->dma_dev = &pdev->dev;
|
||||
|
||||
ret = of_get_mac_address(np, bgmac->net_dev->dev_addr);
|
||||
if (ret == -EPROBE_DEFER)
|
||||
return ret;
|
||||
|
||||
if (ret)
|
||||
dev_warn(&pdev->dev,
|
||||
"MAC address not present in device tree\n");
|
||||
|
@ -541,8 +541,7 @@ static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode)
|
||||
|
||||
if (phy_interface_mode_is_rgmii(phy_mode)) {
|
||||
val = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
|
||||
val &= ~ENETC_PM0_IFM_EN_AUTO;
|
||||
val &= ENETC_PM0_IFM_IFMODE_MASK;
|
||||
val &= ~(ENETC_PM0_IFM_EN_AUTO | ENETC_PM0_IFM_IFMODE_MASK);
|
||||
val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG;
|
||||
enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
|
||||
}
|
||||
|
@ -752,7 +752,6 @@ struct hnae3_tc_info {
|
||||
u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */
|
||||
u16 tqp_count[HNAE3_MAX_TC];
|
||||
u16 tqp_offset[HNAE3_MAX_TC];
|
||||
unsigned long tc_en; /* bitmap of TC enabled */
|
||||
u8 num_tc; /* Total number of enabled TCs */
|
||||
bool mqprio_active;
|
||||
};
|
||||
|
@ -623,13 +623,9 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < HNAE3_MAX_TC; i++) {
|
||||
if (!test_bit(i, &tc_info->tc_en))
|
||||
continue;
|
||||
|
||||
for (i = 0; i < tc_info->num_tc; i++)
|
||||
netdev_set_tc_queue(netdev, i, tc_info->tqp_count[i],
|
||||
tc_info->tqp_offset[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ret = netif_set_real_num_tx_queues(netdev, queue_size);
|
||||
@ -779,6 +775,11 @@ static int hns3_nic_net_open(struct net_device *netdev)
|
||||
if (hns3_nic_resetting(netdev))
|
||||
return -EBUSY;
|
||||
|
||||
if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) {
|
||||
netdev_warn(netdev, "net open repeatedly!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
netif_carrier_off(netdev);
|
||||
|
||||
ret = hns3_nic_set_real_num_queue(netdev);
|
||||
@ -4865,12 +4866,9 @@ static void hns3_init_tx_ring_tc(struct hns3_nic_priv *priv)
|
||||
struct hnae3_tc_info *tc_info = &kinfo->tc_info;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < HNAE3_MAX_TC; i++) {
|
||||
for (i = 0; i < tc_info->num_tc; i++) {
|
||||
int j;
|
||||
|
||||
if (!test_bit(i, &tc_info->tc_en))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < tc_info->tqp_count[i]; j++) {
|
||||
struct hnae3_queue *q;
|
||||
|
||||
|
@ -334,7 +334,8 @@ static void hns3_selftest_prepare(struct net_device *ndev,
|
||||
|
||||
#if IS_ENABLED(CONFIG_VLAN_8021Q)
|
||||
/* Disable the vlan filter for selftest does not support it */
|
||||
if (h->ae_algo->ops->enable_vlan_filter)
|
||||
if (h->ae_algo->ops->enable_vlan_filter &&
|
||||
ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
|
||||
h->ae_algo->ops->enable_vlan_filter(h, false);
|
||||
#endif
|
||||
|
||||
@ -359,7 +360,8 @@ static void hns3_selftest_restore(struct net_device *ndev, bool if_running)
|
||||
h->ae_algo->ops->halt_autoneg(h, false);
|
||||
|
||||
#if IS_ENABLED(CONFIG_VLAN_8021Q)
|
||||
if (h->ae_algo->ops->enable_vlan_filter)
|
||||
if (h->ae_algo->ops->enable_vlan_filter &&
|
||||
ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
|
||||
h->ae_algo->ops->enable_vlan_filter(h, true);
|
||||
#endif
|
||||
|
||||
|
@ -467,7 +467,7 @@ err_csq:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int hclge_firmware_compat_config(struct hclge_dev *hdev)
|
||||
static int hclge_firmware_compat_config(struct hclge_dev *hdev, bool en)
|
||||
{
|
||||
struct hclge_firmware_compat_cmd *req;
|
||||
struct hclge_desc desc;
|
||||
@ -475,13 +475,16 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev)
|
||||
|
||||
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_IMP_COMPAT_CFG, false);
|
||||
|
||||
req = (struct hclge_firmware_compat_cmd *)desc.data;
|
||||
if (en) {
|
||||
req = (struct hclge_firmware_compat_cmd *)desc.data;
|
||||
|
||||
hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
|
||||
hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
|
||||
if (hnae3_dev_phy_imp_supported(hdev))
|
||||
hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1);
|
||||
req->compat = cpu_to_le32(compat);
|
||||
hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
|
||||
hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
|
||||
if (hnae3_dev_phy_imp_supported(hdev))
|
||||
hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1);
|
||||
|
||||
req->compat = cpu_to_le32(compat);
|
||||
}
|
||||
|
||||
return hclge_cmd_send(&hdev->hw, &desc, 1);
|
||||
}
|
||||
@ -538,7 +541,7 @@ int hclge_cmd_init(struct hclge_dev *hdev)
|
||||
/* ask the firmware to enable some features, driver can work without
|
||||
* it.
|
||||
*/
|
||||
ret = hclge_firmware_compat_config(hdev);
|
||||
ret = hclge_firmware_compat_config(hdev, true);
|
||||
if (ret)
|
||||
dev_warn(&hdev->pdev->dev,
|
||||
"Firmware compatible features not enabled(%d).\n",
|
||||
@ -568,6 +571,8 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
|
||||
|
||||
void hclge_cmd_uninit(struct hclge_dev *hdev)
|
||||
{
|
||||
hclge_firmware_compat_config(hdev, false);
|
||||
|
||||
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
|
||||
/* wait to ensure that the firmware completes the possible left
|
||||
* over commands.
|
||||
|
@ -247,6 +247,10 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
|
||||
}
|
||||
|
||||
hclge_tm_schd_info_update(hdev, num_tc);
|
||||
if (num_tc > 1)
|
||||
hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
|
||||
else
|
||||
hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
|
||||
|
||||
ret = hclge_ieee_ets_to_tm_info(hdev, ets);
|
||||
if (ret)
|
||||
@ -306,8 +310,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
|
||||
u8 i, j, pfc_map, *prio_tc;
|
||||
int ret;
|
||||
|
||||
if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
|
||||
hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
|
||||
if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
|
||||
return -EINVAL;
|
||||
|
||||
if (pfc->pfc_en == hdev->tm_info.pfc_en)
|
||||
@ -441,8 +444,6 @@ static int hclge_mqprio_qopt_check(struct hclge_dev *hdev,
|
||||
static void hclge_sync_mqprio_qopt(struct hnae3_tc_info *tc_info,
|
||||
struct tc_mqprio_qopt_offload *mqprio_qopt)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(tc_info, 0, sizeof(*tc_info));
|
||||
tc_info->num_tc = mqprio_qopt->qopt.num_tc;
|
||||
memcpy(tc_info->prio_tc, mqprio_qopt->qopt.prio_tc_map,
|
||||
@ -451,9 +452,6 @@ static void hclge_sync_mqprio_qopt(struct hnae3_tc_info *tc_info,
|
||||
sizeof_field(struct hnae3_tc_info, tqp_count));
|
||||
memcpy(tc_info->tqp_offset, mqprio_qopt->qopt.offset,
|
||||
sizeof_field(struct hnae3_tc_info, tqp_offset));
|
||||
|
||||
for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
|
||||
set_bit(tc_info->prio_tc[i], &tc_info->tc_en);
|
||||
}
|
||||
|
||||
static int hclge_config_tc(struct hclge_dev *hdev,
|
||||
@ -519,12 +517,17 @@ static int hclge_setup_tc(struct hnae3_handle *h,
|
||||
return hclge_notify_init_up(hdev);
|
||||
|
||||
err_out:
|
||||
/* roll-back */
|
||||
memcpy(&kinfo->tc_info, &old_tc_info, sizeof(old_tc_info));
|
||||
if (hclge_config_tc(hdev, &kinfo->tc_info))
|
||||
dev_err(&hdev->pdev->dev,
|
||||
"failed to roll back tc configuration\n");
|
||||
|
||||
if (!tc) {
|
||||
dev_warn(&hdev->pdev->dev,
|
||||
"failed to destroy mqprio, will active after reset, ret = %d\n",
|
||||
ret);
|
||||
} else {
|
||||
/* roll-back */
|
||||
memcpy(&kinfo->tc_info, &old_tc_info, sizeof(old_tc_info));
|
||||
if (hclge_config_tc(hdev, &kinfo->tc_info))
|
||||
dev_err(&hdev->pdev->dev,
|
||||
"failed to roll back tc configuration\n");
|
||||
}
|
||||
hclge_notify_init_up(hdev);
|
||||
|
||||
return ret;
|
||||
|
@ -719,9 +719,9 @@ static void hclge_dbg_fill_shaper_content(struct hclge_tm_shaper_para *para,
|
||||
sprintf(result[(*index)++], "%6u", para->rate);
|
||||
}
|
||||
|
||||
static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
|
||||
static int __hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *data_str,
|
||||
char *buf, int len)
|
||||
{
|
||||
char data_str[ARRAY_SIZE(tm_pg_items)][HCLGE_DBG_DATA_STR_LEN];
|
||||
struct hclge_tm_shaper_para c_shaper_para, p_shaper_para;
|
||||
char *result[ARRAY_SIZE(tm_pg_items)], *sch_mode_str;
|
||||
u8 pg_id, sch_mode, weight, pri_bit_map, i, j;
|
||||
@ -729,8 +729,10 @@ static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
|
||||
int pos = 0;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tm_pg_items); i++)
|
||||
result[i] = &data_str[i][0];
|
||||
for (i = 0; i < ARRAY_SIZE(tm_pg_items); i++) {
|
||||
result[i] = data_str;
|
||||
data_str += HCLGE_DBG_DATA_STR_LEN;
|
||||
}
|
||||
|
||||
hclge_dbg_fill_content(content, sizeof(content), tm_pg_items,
|
||||
NULL, ARRAY_SIZE(tm_pg_items));
|
||||
@ -781,6 +783,24 @@ static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
|
||||
{
|
||||
char *data_str;
|
||||
int ret;
|
||||
|
||||
data_str = kcalloc(ARRAY_SIZE(tm_pg_items),
|
||||
HCLGE_DBG_DATA_STR_LEN, GFP_KERNEL);
|
||||
|
||||
if (!data_str)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = __hclge_dbg_dump_tm_pg(hdev, data_str, buf, len);
|
||||
|
||||
kfree(data_str);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int hclge_dbg_dump_tm_port(struct hclge_dev *hdev, char *buf, int len)
|
||||
{
|
||||
struct hclge_tm_shaper_para shaper_para;
|
||||
|
@ -8708,15 +8708,8 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
|
||||
}
|
||||
|
||||
/* check if we just hit the duplicate */
|
||||
if (!ret) {
|
||||
dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n",
|
||||
vport->vport_id, addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dev_err(&hdev->pdev->dev,
|
||||
"PF failed to add unicast entry(%pM) in the MAC table\n",
|
||||
addr);
|
||||
if (!ret)
|
||||
return -EEXIST;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -8868,7 +8861,13 @@ static void hclge_sync_vport_mac_list(struct hclge_vport *vport,
|
||||
} else {
|
||||
set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
|
||||
&vport->state);
|
||||
break;
|
||||
|
||||
/* If one unicast mac address is existing in hardware,
|
||||
* we need to try whether other unicast mac addresses
|
||||
* are new addresses that can be added.
|
||||
*/
|
||||
if (ret != -EEXIST)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -12797,8 +12796,12 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
|
||||
continue;
|
||||
|
||||
if (vport->vf_info.trusted) {
|
||||
uc_en = vport->vf_info.request_uc_en > 0;
|
||||
mc_en = vport->vf_info.request_mc_en > 0;
|
||||
uc_en = vport->vf_info.request_uc_en > 0 ||
|
||||
vport->overflow_promisc_flags &
|
||||
HNAE3_OVERFLOW_UPE;
|
||||
mc_en = vport->vf_info.request_mc_en > 0 ||
|
||||
vport->overflow_promisc_flags &
|
||||
HNAE3_OVERFLOW_MPE;
|
||||
}
|
||||
bc_en = vport->vf_info.request_bc_en > 0;
|
||||
|
||||
|
@ -687,12 +687,10 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
|
||||
|
||||
for (i = 0; i < HNAE3_MAX_TC; i++) {
|
||||
if (hdev->hw_tc_map & BIT(i) && i < kinfo->tc_info.num_tc) {
|
||||
set_bit(i, &kinfo->tc_info.tc_en);
|
||||
kinfo->tc_info.tqp_offset[i] = i * kinfo->rss_size;
|
||||
kinfo->tc_info.tqp_count[i] = kinfo->rss_size;
|
||||
} else {
|
||||
/* Set to default queue if TC is disable */
|
||||
clear_bit(i, &kinfo->tc_info.tc_en);
|
||||
kinfo->tc_info.tqp_offset[i] = 0;
|
||||
kinfo->tc_info.tqp_count[i] = 1;
|
||||
}
|
||||
@ -729,14 +727,6 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
|
||||
for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
|
||||
hdev->tm_info.prio_tc[i] =
|
||||
(i >= hdev->tm_info.num_tc) ? 0 : i;
|
||||
|
||||
/* DCB is enabled if we have more than 1 TC or pfc_en is
|
||||
* non-zero.
|
||||
*/
|
||||
if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
|
||||
hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
|
||||
else
|
||||
hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
|
||||
}
|
||||
|
||||
static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
|
||||
@ -767,10 +757,10 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
|
||||
|
||||
static void hclge_update_fc_mode_by_dcb_flag(struct hclge_dev *hdev)
|
||||
{
|
||||
if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) {
|
||||
if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en) {
|
||||
if (hdev->fc_mode_last_time == HCLGE_FC_PFC)
|
||||
dev_warn(&hdev->pdev->dev,
|
||||
"DCB is disable, but last mode is FC_PFC\n");
|
||||
"Only 1 tc used, but last mode is FC_PFC\n");
|
||||
|
||||
hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
|
||||
} else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
|
||||
@ -796,7 +786,7 @@ static void hclge_update_fc_mode(struct hclge_dev *hdev)
|
||||
}
|
||||
}
|
||||
|
||||
static void hclge_pfc_info_init(struct hclge_dev *hdev)
|
||||
void hclge_tm_pfc_info_update(struct hclge_dev *hdev)
|
||||
{
|
||||
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
|
||||
hclge_update_fc_mode(hdev);
|
||||
@ -812,7 +802,7 @@ static void hclge_tm_schd_info_init(struct hclge_dev *hdev)
|
||||
|
||||
hclge_tm_vport_info_update(hdev);
|
||||
|
||||
hclge_pfc_info_init(hdev);
|
||||
hclge_tm_pfc_info_update(hdev);
|
||||
}
|
||||
|
||||
static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev)
|
||||
@ -1558,19 +1548,6 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
|
||||
hclge_tm_schd_info_init(hdev);
|
||||
}
|
||||
|
||||
void hclge_tm_pfc_info_update(struct hclge_dev *hdev)
|
||||
{
|
||||
/* DCB is enabled if we have more than 1 TC or pfc_en is
|
||||
* non-zero.
|
||||
*/
|
||||
if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
|
||||
hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
|
||||
else
|
||||
hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
|
||||
|
||||
hclge_pfc_info_init(hdev);
|
||||
}
|
||||
|
||||
int hclge_tm_init_hw(struct hclge_dev *hdev, bool init)
|
||||
{
|
||||
int ret;
|
||||
@ -1616,7 +1593,7 @@ int hclge_tm_vport_map_update(struct hclge_dev *hdev)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE))
|
||||
if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en)
|
||||
return 0;
|
||||
|
||||
return hclge_tm_bp_setup(hdev);
|
||||
|
@ -354,7 +354,7 @@ static int hns_mdio_reset(struct mii_bus *bus)
|
||||
|
||||
if (dev_of_node(bus->parent)) {
|
||||
if (!mdio_dev->subctrl_vbase) {
|
||||
dev_err(&bus->dev, "mdio sys ctl reg has not maped\n");
|
||||
dev_err(&bus->dev, "mdio sys ctl reg has not mapped\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
|
@ -4708,14 +4708,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (adapter->failover_pending) {
|
||||
adapter->init_done_rc = -EAGAIN;
|
||||
netdev_dbg(netdev, "Failover pending, ignoring login response\n");
|
||||
complete(&adapter->init_done);
|
||||
/* login response buffer will be released on reset */
|
||||
return 0;
|
||||
}
|
||||
|
||||
netdev->mtu = adapter->req_mtu - ETH_HLEN;
|
||||
|
||||
netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
|
||||
|
@ -2437,11 +2437,15 @@ static void e100_get_drvinfo(struct net_device *netdev,
|
||||
sizeof(info->bus_info));
|
||||
}
|
||||
|
||||
#define E100_PHY_REGS 0x1C
|
||||
#define E100_PHY_REGS 0x1D
|
||||
static int e100_get_regs_len(struct net_device *netdev)
|
||||
{
|
||||
struct nic *nic = netdev_priv(netdev);
|
||||
return 1 + E100_PHY_REGS + sizeof(nic->mem->dump_buf);
|
||||
|
||||
/* We know the number of registers, and the size of the dump buffer.
|
||||
* Calculate the total size in bytes.
|
||||
*/
|
||||
return (1 + E100_PHY_REGS) * sizeof(u32) + sizeof(nic->mem->dump_buf);
|
||||
}
|
||||
|
||||
static void e100_get_regs(struct net_device *netdev,
|
||||
@ -2455,14 +2459,18 @@ static void e100_get_regs(struct net_device *netdev,
|
||||
buff[0] = ioread8(&nic->csr->scb.cmd_hi) << 24 |
|
||||
ioread8(&nic->csr->scb.cmd_lo) << 16 |
|
||||
ioread16(&nic->csr->scb.status);
|
||||
for (i = E100_PHY_REGS; i >= 0; i--)
|
||||
buff[1 + E100_PHY_REGS - i] =
|
||||
mdio_read(netdev, nic->mii.phy_id, i);
|
||||
for (i = 0; i < E100_PHY_REGS; i++)
|
||||
/* Note that we read the registers in reverse order. This
|
||||
* ordering is the ABI apparently used by ethtool and other
|
||||
* applications.
|
||||
*/
|
||||
buff[1 + i] = mdio_read(netdev, nic->mii.phy_id,
|
||||
E100_PHY_REGS - 1 - i);
|
||||
memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf));
|
||||
e100_exec_cb(nic, NULL, e100_dump);
|
||||
msleep(10);
|
||||
memcpy(&buff[2 + E100_PHY_REGS], nic->mem->dump_buf,
|
||||
sizeof(nic->mem->dump_buf));
|
||||
memcpy(&buff[1 + E100_PHY_REGS], nic->mem->dump_buf,
|
||||
sizeof(nic->mem->dump_buf));
|
||||
}
|
||||
|
||||
static void e100_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
|
||||
|
@ -3208,7 +3208,7 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter)
|
||||
max_combined = ixgbe_max_rss_indices(adapter);
|
||||
}
|
||||
|
||||
return max_combined;
|
||||
return min_t(int, max_combined, num_online_cpus());
|
||||
}
|
||||
|
||||
static void ixgbe_get_channels(struct net_device *dev,
|
||||
|
@ -10112,6 +10112,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
|
||||
struct ixgbe_adapter *adapter = netdev_priv(dev);
|
||||
struct bpf_prog *old_prog;
|
||||
bool need_reset;
|
||||
int num_queues;
|
||||
|
||||
if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
|
||||
return -EINVAL;
|
||||
@ -10161,11 +10162,14 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
|
||||
/* Kick start the NAPI context if there is an AF_XDP socket open
|
||||
* on that queue id. This so that receiving will start.
|
||||
*/
|
||||
if (need_reset && prog)
|
||||
for (i = 0; i < adapter->num_rx_queues; i++)
|
||||
if (need_reset && prog) {
|
||||
num_queues = min_t(int, adapter->num_rx_queues,
|
||||
adapter->num_xdp_queues);
|
||||
for (i = 0; i < num_queues; i++)
|
||||
if (adapter->xdp_ring[i]->xsk_pool)
|
||||
(void)ixgbe_xsk_wakeup(adapter->netdev, i,
|
||||
XDP_WAKEUP_RX);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4,8 +4,6 @@
|
||||
#
|
||||
|
||||
obj-$(CONFIG_KS8842) += ks8842.o
|
||||
obj-$(CONFIG_KS8851) += ks8851.o
|
||||
ks8851-objs = ks8851_common.o ks8851_spi.o
|
||||
obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o
|
||||
ks8851_mll-objs = ks8851_common.o ks8851_par.o
|
||||
obj-$(CONFIG_KS8851) += ks8851_common.o ks8851_spi.o
|
||||
obj-$(CONFIG_KS8851_MLL) += ks8851_common.o ks8851_par.o
|
||||
obj-$(CONFIG_KSZ884X_PCI) += ksz884x.o
|
||||
|
@ -1057,6 +1057,7 @@ int ks8851_suspend(struct device *dev)
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ks8851_suspend);
|
||||
|
||||
int ks8851_resume(struct device *dev)
|
||||
{
|
||||
@ -1070,6 +1071,7 @@ int ks8851_resume(struct device *dev)
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ks8851_resume);
|
||||
#endif
|
||||
|
||||
static int ks8851_register_mdiobus(struct ks8851_net *ks, struct device *dev)
|
||||
@ -1243,6 +1245,7 @@ err_reg:
|
||||
err_reg_io:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ks8851_probe_common);
|
||||
|
||||
int ks8851_remove_common(struct device *dev)
|
||||
{
|
||||
@ -1261,3 +1264,8 @@ int ks8851_remove_common(struct device *dev)
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ks8851_remove_common);
|
||||
|
||||
MODULE_DESCRIPTION("KS8851 Network driver");
|
||||
MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -380,15 +380,6 @@ static void ionic_sw_stats_get_txq_values(struct ionic_lif *lif, u64 **buf,
|
||||
&ionic_dbg_intr_stats_desc[i]);
|
||||
(*buf)++;
|
||||
}
|
||||
for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
|
||||
**buf = IONIC_READ_STAT64(&txqcq->napi_stats,
|
||||
&ionic_dbg_napi_stats_desc[i]);
|
||||
(*buf)++;
|
||||
}
|
||||
for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
|
||||
**buf = txqcq->napi_stats.work_done_cntr[i];
|
||||
(*buf)++;
|
||||
}
|
||||
for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
|
||||
**buf = txstats->sg_cntr[i];
|
||||
(*buf)++;
|
||||
|
@ -486,6 +486,10 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
|
||||
timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0);
|
||||
stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
|
||||
eee_tw_timer);
|
||||
if (priv->hw->xpcs)
|
||||
xpcs_config_eee(priv->hw->xpcs,
|
||||
priv->plat->mult_fact_100ns,
|
||||
true);
|
||||
}
|
||||
|
||||
if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) {
|
||||
|
@ -73,6 +73,7 @@ config CASSINI
|
||||
config SUNVNET_COMMON
|
||||
tristate "Common routines to support Sun Virtual Networking"
|
||||
depends on SUN_LDOMS
|
||||
depends on INET
|
||||
default m
|
||||
|
||||
config SUNVNET
|
||||
|
@ -48,6 +48,7 @@ config BPQETHER
|
||||
config DMASCC
|
||||
tristate "High-speed (DMA) SCC driver for AX.25"
|
||||
depends on ISA && AX25 && BROKEN_ON_SMP && ISA_DMA_API
|
||||
depends on VIRT_TO_BUS
|
||||
help
|
||||
This is a driver for high-speed SCC boards, i.e. those supporting
|
||||
DMA on one port. You usually use those boards to connect your
|
||||
|
@ -207,6 +207,7 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct ipq4019_mdio_data *priv;
|
||||
struct mii_bus *bus;
|
||||
struct resource *res;
|
||||
int ret;
|
||||
|
||||
bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*priv));
|
||||
@ -224,7 +225,10 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
|
||||
return PTR_ERR(priv->mdio_clk);
|
||||
|
||||
/* The platform resource is provided on the chipset IPQ5018 */
|
||||
priv->eth_ldo_rdy = devm_platform_ioremap_resource(pdev, 1);
|
||||
/* This resource is optional */
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
|
||||
if (res)
|
||||
priv->eth_ldo_rdy = devm_ioremap_resource(&pdev->dev, res);
|
||||
|
||||
bus->name = "ipq4019_mdio";
|
||||
bus->read = ipq4019_mdio_read;
|
||||
|
@ -134,8 +134,9 @@ static int mscc_miim_reset(struct mii_bus *bus)
|
||||
|
||||
static int mscc_miim_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct mii_bus *bus;
|
||||
struct mscc_miim_dev *dev;
|
||||
struct resource *res;
|
||||
struct mii_bus *bus;
|
||||
int ret;
|
||||
|
||||
bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*dev));
|
||||
@ -156,10 +157,14 @@ static int mscc_miim_probe(struct platform_device *pdev)
|
||||
return PTR_ERR(dev->regs);
|
||||
}
|
||||
|
||||
dev->phy_regs = devm_platform_ioremap_resource(pdev, 1);
|
||||
if (IS_ERR(dev->phy_regs)) {
|
||||
dev_err(&pdev->dev, "Unable to map internal phy registers\n");
|
||||
return PTR_ERR(dev->phy_regs);
|
||||
/* This resource is optional */
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
|
||||
if (res) {
|
||||
dev->phy_regs = devm_ioremap_resource(&pdev->dev, res);
|
||||
if (IS_ERR(dev->phy_regs)) {
|
||||
dev_err(&pdev->dev, "Unable to map internal phy registers\n");
|
||||
return PTR_ERR(dev->phy_regs);
|
||||
}
|
||||
}
|
||||
|
||||
ret = of_mdiobus_register(bus, pdev->dev.of_node);
|
||||
|
@ -321,7 +321,7 @@ static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
|
||||
/* Start MHI channels */
|
||||
err = mhi_prepare_for_transfer(mhi_dev);
|
||||
if (err)
|
||||
goto out_err;
|
||||
return err;
|
||||
|
||||
/* Number of transfer descriptors determines size of the queue */
|
||||
mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
|
||||
@ -331,10 +331,6 @@ static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
free_netdev(ndev);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
|
||||
|
@ -27,7 +27,12 @@
|
||||
#define MII_BCM7XXX_SHD_2_ADDR_CTRL 0xe
|
||||
#define MII_BCM7XXX_SHD_2_CTRL_STAT 0xf
|
||||
#define MII_BCM7XXX_SHD_2_BIAS_TRIM 0x1a
|
||||
#define MII_BCM7XXX_SHD_3_PCS_CTRL 0x0
|
||||
#define MII_BCM7XXX_SHD_3_PCS_STATUS 0x1
|
||||
#define MII_BCM7XXX_SHD_3_EEE_CAP 0x2
|
||||
#define MII_BCM7XXX_SHD_3_AN_EEE_ADV 0x3
|
||||
#define MII_BCM7XXX_SHD_3_EEE_LP 0x4
|
||||
#define MII_BCM7XXX_SHD_3_EEE_WK_ERR 0x5
|
||||
#define MII_BCM7XXX_SHD_3_PCS_CTRL_2 0x6
|
||||
#define MII_BCM7XXX_PCS_CTRL_2_DEF 0x4400
|
||||
#define MII_BCM7XXX_SHD_3_AN_STAT 0xb
|
||||
@ -216,25 +221,37 @@ static int bcm7xxx_28nm_resume(struct phy_device *phydev)
|
||||
return genphy_config_aneg(phydev);
|
||||
}
|
||||
|
||||
static int phy_set_clr_bits(struct phy_device *dev, int location,
|
||||
int set_mask, int clr_mask)
|
||||
static int __phy_set_clr_bits(struct phy_device *dev, int location,
|
||||
int set_mask, int clr_mask)
|
||||
{
|
||||
int v, ret;
|
||||
|
||||
v = phy_read(dev, location);
|
||||
v = __phy_read(dev, location);
|
||||
if (v < 0)
|
||||
return v;
|
||||
|
||||
v &= ~clr_mask;
|
||||
v |= set_mask;
|
||||
|
||||
ret = phy_write(dev, location, v);
|
||||
ret = __phy_write(dev, location, v);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static int phy_set_clr_bits(struct phy_device *dev, int location,
|
||||
int set_mask, int clr_mask)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->mdio.bus->mdio_lock);
|
||||
ret = __phy_set_clr_bits(dev, location, set_mask, clr_mask);
|
||||
mutex_unlock(&dev->mdio.bus->mdio_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bcm7xxx_28nm_ephy_01_afe_config_init(struct phy_device *phydev)
|
||||
{
|
||||
int ret;
|
||||
@ -398,6 +415,93 @@ static int bcm7xxx_28nm_ephy_config_init(struct phy_device *phydev)
|
||||
return bcm7xxx_28nm_ephy_apd_enable(phydev);
|
||||
}
|
||||
|
||||
#define MII_BCM7XXX_REG_INVALID 0xff
|
||||
|
||||
static u8 bcm7xxx_28nm_ephy_regnum_to_shd(u16 regnum)
|
||||
{
|
||||
switch (regnum) {
|
||||
case MDIO_CTRL1:
|
||||
return MII_BCM7XXX_SHD_3_PCS_CTRL;
|
||||
case MDIO_STAT1:
|
||||
return MII_BCM7XXX_SHD_3_PCS_STATUS;
|
||||
case MDIO_PCS_EEE_ABLE:
|
||||
return MII_BCM7XXX_SHD_3_EEE_CAP;
|
||||
case MDIO_AN_EEE_ADV:
|
||||
return MII_BCM7XXX_SHD_3_AN_EEE_ADV;
|
||||
case MDIO_AN_EEE_LPABLE:
|
||||
return MII_BCM7XXX_SHD_3_EEE_LP;
|
||||
case MDIO_PCS_EEE_WK_ERR:
|
||||
return MII_BCM7XXX_SHD_3_EEE_WK_ERR;
|
||||
default:
|
||||
return MII_BCM7XXX_REG_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
static bool bcm7xxx_28nm_ephy_dev_valid(int devnum)
|
||||
{
|
||||
return devnum == MDIO_MMD_AN || devnum == MDIO_MMD_PCS;
|
||||
}
|
||||
|
||||
static int bcm7xxx_28nm_ephy_read_mmd(struct phy_device *phydev,
|
||||
int devnum, u16 regnum)
|
||||
{
|
||||
u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum);
|
||||
int ret;
|
||||
|
||||
if (!bcm7xxx_28nm_ephy_dev_valid(devnum) ||
|
||||
shd == MII_BCM7XXX_REG_INVALID)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* set shadow mode 2 */
|
||||
ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
|
||||
MII_BCM7XXX_SHD_MODE_2, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Access the desired shadow register address */
|
||||
ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd);
|
||||
if (ret < 0)
|
||||
goto reset_shadow_mode;
|
||||
|
||||
ret = __phy_read(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT);
|
||||
|
||||
reset_shadow_mode:
|
||||
/* reset shadow mode 2 */
|
||||
__phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0,
|
||||
MII_BCM7XXX_SHD_MODE_2);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bcm7xxx_28nm_ephy_write_mmd(struct phy_device *phydev,
|
||||
int devnum, u16 regnum, u16 val)
|
||||
{
|
||||
u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum);
|
||||
int ret;
|
||||
|
||||
if (!bcm7xxx_28nm_ephy_dev_valid(devnum) ||
|
||||
shd == MII_BCM7XXX_REG_INVALID)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* set shadow mode 2 */
|
||||
ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
|
||||
MII_BCM7XXX_SHD_MODE_2, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Access the desired shadow register address */
|
||||
ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd);
|
||||
if (ret < 0)
|
||||
goto reset_shadow_mode;
|
||||
|
||||
/* Write the desired value in the shadow register */
|
||||
__phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT, val);
|
||||
|
||||
reset_shadow_mode:
|
||||
/* reset shadow mode 2 */
|
||||
return __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0,
|
||||
MII_BCM7XXX_SHD_MODE_2);
|
||||
}
|
||||
|
||||
static int bcm7xxx_28nm_ephy_resume(struct phy_device *phydev)
|
||||
{
|
||||
int ret;
|
||||
@ -595,6 +699,8 @@ static void bcm7xxx_28nm_remove(struct phy_device *phydev)
|
||||
.get_stats = bcm7xxx_28nm_get_phy_stats, \
|
||||
.probe = bcm7xxx_28nm_probe, \
|
||||
.remove = bcm7xxx_28nm_remove, \
|
||||
.read_mmd = bcm7xxx_28nm_ephy_read_mmd, \
|
||||
.write_mmd = bcm7xxx_28nm_ephy_write_mmd, \
|
||||
}
|
||||
|
||||
#define BCM7XXX_40NM_EPHY(_oui, _name) \
|
||||
|
@ -537,6 +537,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
|
||||
err = device_register(&bus->dev);
|
||||
if (err) {
|
||||
pr_err("mii_bus %s failed to register\n", bus->id);
|
||||
put_device(&bus->dev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -493,6 +493,25 @@ static int gpy_loopback(struct phy_device *phydev, bool enable)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gpy115_loopback(struct phy_device *phydev, bool enable)
|
||||
{
|
||||
int ret;
|
||||
int fw_minor;
|
||||
|
||||
if (enable)
|
||||
return gpy_loopback(phydev, enable);
|
||||
|
||||
ret = phy_read(phydev, PHY_FWV);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
fw_minor = FIELD_GET(PHY_FWV_MINOR_MASK, ret);
|
||||
if (fw_minor > 0x0076)
|
||||
return gpy_loopback(phydev, 0);
|
||||
|
||||
return genphy_soft_reset(phydev);
|
||||
}
|
||||
|
||||
static struct phy_driver gpy_drivers[] = {
|
||||
{
|
||||
PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx),
|
||||
@ -527,7 +546,7 @@ static struct phy_driver gpy_drivers[] = {
|
||||
.handle_interrupt = gpy_handle_interrupt,
|
||||
.set_wol = gpy_set_wol,
|
||||
.get_wol = gpy_get_wol,
|
||||
.set_loopback = gpy_loopback,
|
||||
.set_loopback = gpy115_loopback,
|
||||
},
|
||||
{
|
||||
PHY_ID_MATCH_MODEL(PHY_ID_GPY115C),
|
||||
@ -544,7 +563,7 @@ static struct phy_driver gpy_drivers[] = {
|
||||
.handle_interrupt = gpy_handle_interrupt,
|
||||
.set_wol = gpy_set_wol,
|
||||
.get_wol = gpy_get_wol,
|
||||
.set_loopback = gpy_loopback,
|
||||
.set_loopback = gpy115_loopback,
|
||||
},
|
||||
{
|
||||
.phy_id = PHY_ID_GPY211B,
|
||||
|
@ -1178,7 +1178,10 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
|
||||
|
||||
static void smsc95xx_handle_link_change(struct net_device *net)
|
||||
{
|
||||
struct usbnet *dev = netdev_priv(net);
|
||||
|
||||
phy_print_status(net->phydev);
|
||||
usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
|
||||
}
|
||||
|
||||
static int smsc95xx_start_phy(struct usbnet *dev)
|
||||
|
@ -1867,8 +1867,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer)
|
||||
bcn_int -= data->bcn_delta;
|
||||
data->bcn_delta = 0;
|
||||
}
|
||||
hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer),
|
||||
ns_to_ktime(bcn_int * NSEC_PER_USEC));
|
||||
hrtimer_forward_now(&data->beacon_timer,
|
||||
ns_to_ktime(bcn_int * NSEC_PER_USEC));
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
|
@ -578,11 +578,12 @@ struct btf_func_model {
|
||||
* programs only. Should not be used with normal calls and indirect calls.
|
||||
*/
|
||||
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
|
||||
|
||||
/* Store IP address of the caller on the trampoline stack,
|
||||
* so it's available for trampoline's programs.
|
||||
*/
|
||||
#define BPF_TRAMP_F_IP_ARG BIT(3)
|
||||
/* Return the return value of fentry prog. Only used by bpf_struct_ops. */
|
||||
#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4)
|
||||
|
||||
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
|
||||
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
|
||||
|
@ -597,5 +597,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
|
||||
int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh,
|
||||
u8 rt_family, unsigned char *flags, bool skip_oif);
|
||||
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh,
|
||||
int nh_weight, u8 rt_family);
|
||||
int nh_weight, u8 rt_family, u32 nh_tclassid);
|
||||
#endif /* _NET_FIB_H */
|
||||
|
@ -2818,13 +2818,13 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
|
||||
* Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag
|
||||
* when they are able to replace in-use PTK keys according to the following
|
||||
* requirements:
|
||||
* 1) They do not hand over frames decrypted with the old key to
|
||||
mac80211 once the call to set_key() with command %DISABLE_KEY has been
|
||||
completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key,
|
||||
* 1) They do not hand over frames decrypted with the old key to mac80211
|
||||
once the call to set_key() with command %DISABLE_KEY has been completed,
|
||||
2) either drop or continue to use the old key for any outgoing frames queued
|
||||
at the time of the key deletion (including re-transmits),
|
||||
3) never send out a frame queued prior to the set_key() %SET_KEY command
|
||||
encrypted with the new key and
|
||||
encrypted with the new key when also needing
|
||||
@IEEE80211_KEY_FLAG_GENERATE_IV and
|
||||
4) never send out a frame unencrypted when it should be encrypted.
|
||||
Mac80211 will not queue any new frames for a deleted key to the driver.
|
||||
*/
|
||||
|
@ -325,7 +325,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
|
||||
struct fib_nh_common *nhc = &nhi->fib_nhc;
|
||||
int weight = nhg->nh_entries[i].weight;
|
||||
|
||||
if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0)
|
||||
if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <uapi/linux/pkt_sched.h>
|
||||
|
||||
#define DEFAULT_TX_QUEUE_LEN 1000
|
||||
#define STAB_SIZE_LOG_MAX 30
|
||||
|
||||
struct qdisc_walker {
|
||||
int stop;
|
||||
|
@ -488,8 +488,10 @@ struct sock {
|
||||
u8 sk_prefer_busy_poll;
|
||||
u16 sk_busy_poll_budget;
|
||||
#endif
|
||||
spinlock_t sk_peer_lock;
|
||||
struct pid *sk_peer_pid;
|
||||
const struct cred *sk_peer_cred;
|
||||
|
||||
long sk_rcvtimeo;
|
||||
ktime_t sk_stamp;
|
||||
#if BITS_PER_LONG==32
|
||||
@ -1623,7 +1625,36 @@ void release_sock(struct sock *sk);
|
||||
SINGLE_DEPTH_NESTING)
|
||||
#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
|
||||
|
||||
bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
|
||||
bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
|
||||
|
||||
/**
|
||||
* lock_sock_fast - fast version of lock_sock
|
||||
* @sk: socket
|
||||
*
|
||||
* This version should be used for very small section, where process wont block
|
||||
* return false if fast path is taken:
|
||||
*
|
||||
* sk_lock.slock locked, owned = 0, BH disabled
|
||||
*
|
||||
* return true if slow path is taken:
|
||||
*
|
||||
* sk_lock.slock unlocked, owned = 1, BH enabled
|
||||
*/
|
||||
static inline bool lock_sock_fast(struct sock *sk)
|
||||
{
|
||||
/* The sk_lock has mutex_lock() semantics here. */
|
||||
mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
return __lock_sock_fast(sk);
|
||||
}
|
||||
|
||||
/* fast socket lock variant for caller already holding a [different] socket lock */
|
||||
static inline bool lock_sock_fast_nested(struct sock *sk)
|
||||
{
|
||||
mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
|
||||
|
||||
return __lock_sock_fast(sk);
|
||||
}
|
||||
|
||||
/**
|
||||
* unlock_sock_fast - complement of lock_sock_fast
|
||||
|
@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
const struct btf_type *mtype, *ptype;
|
||||
struct bpf_prog *prog;
|
||||
u32 moff;
|
||||
u32 flags;
|
||||
|
||||
moff = btf_member_bit_offset(t, member) / 8;
|
||||
ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
|
||||
@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
|
||||
tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
|
||||
tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
|
||||
flags = st_ops->func_models[i].ret_size > 0 ?
|
||||
BPF_TRAMP_F_RET_FENTRY_RET : 0;
|
||||
err = arch_prepare_bpf_trampoline(NULL, image,
|
||||
st_map->image + PAGE_SIZE,
|
||||
&st_ops->func_models[i], 0,
|
||||
tprogs, NULL);
|
||||
&st_ops->func_models[i],
|
||||
flags, tprogs, NULL);
|
||||
if (err < 0)
|
||||
goto reset_unlock;
|
||||
|
||||
|
@ -827,7 +827,7 @@ int bpf_jit_charge_modmem(u32 pages)
|
||||
{
|
||||
if (atomic_long_add_return(pages, &bpf_jit_current) >
|
||||
(bpf_jit_limit >> PAGE_SHIFT)) {
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
if (!bpf_capable()) {
|
||||
atomic_long_sub(pages, &bpf_jit_current);
|
||||
return -EPERM;
|
||||
}
|
||||
|
@ -6574,22 +6574,29 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
|
||||
|
||||
void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
|
||||
{
|
||||
/* Don't associate the sock with unrelated interrupted task's cgroup. */
|
||||
if (in_interrupt())
|
||||
return;
|
||||
struct cgroup *cgroup;
|
||||
|
||||
rcu_read_lock();
|
||||
/* Don't associate the sock with unrelated interrupted task's cgroup. */
|
||||
if (in_interrupt()) {
|
||||
cgroup = &cgrp_dfl_root.cgrp;
|
||||
cgroup_get(cgroup);
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
struct css_set *cset;
|
||||
|
||||
cset = task_css_set(current);
|
||||
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
|
||||
skcd->cgroup = cset->dfl_cgrp;
|
||||
cgroup_bpf_get(cset->dfl_cgrp);
|
||||
cgroup = cset->dfl_cgrp;
|
||||
break;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
out:
|
||||
skcd->cgroup = cgroup;
|
||||
cgroup_bpf_get(cgroup);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -552,6 +552,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
|
||||
__skb->gso_segs = skb_shinfo(skb)->gso_segs;
|
||||
}
|
||||
|
||||
static struct proto bpf_dummy_proto = {
|
||||
.name = "bpf_dummy",
|
||||
.owner = THIS_MODULE,
|
||||
.obj_size = sizeof(struct sock),
|
||||
};
|
||||
|
||||
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
@ -596,20 +602,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
break;
|
||||
}
|
||||
|
||||
sk = kzalloc(sizeof(struct sock), GFP_USER);
|
||||
sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
|
||||
if (!sk) {
|
||||
kfree(data);
|
||||
kfree(ctx);
|
||||
return -ENOMEM;
|
||||
}
|
||||
sock_net_set(sk, net);
|
||||
sock_init_data(NULL, sk);
|
||||
|
||||
skb = build_skb(data, 0);
|
||||
if (!skb) {
|
||||
kfree(data);
|
||||
kfree(ctx);
|
||||
kfree(sk);
|
||||
sk_free(sk);
|
||||
return -ENOMEM;
|
||||
}
|
||||
skb->sk = sk;
|
||||
@ -682,8 +687,7 @@ out:
|
||||
if (dev && dev != net->loopback_dev)
|
||||
dev_put(dev);
|
||||
kfree_skb(skb);
|
||||
bpf_sk_storage_free(sk);
|
||||
kfree(sk);
|
||||
sk_free(sk);
|
||||
kfree(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
@ -1677,8 +1677,6 @@ static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
|
||||
int ifindex,
|
||||
struct br_ip *saddr)
|
||||
{
|
||||
lockdep_assert_held_once(&brmctx->br->multicast_lock);
|
||||
|
||||
write_seqcount_begin(&querier->seq);
|
||||
querier->port_ifidx = ifindex;
|
||||
memcpy(&querier->addr, saddr, sizeof(*saddr));
|
||||
@ -3867,13 +3865,13 @@ void br_multicast_ctx_init(struct net_bridge *br,
|
||||
|
||||
brmctx->ip4_other_query.delay_time = 0;
|
||||
brmctx->ip4_querier.port_ifidx = 0;
|
||||
seqcount_init(&brmctx->ip4_querier.seq);
|
||||
seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
|
||||
brmctx->multicast_igmp_version = 2;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
brmctx->multicast_mld_version = 1;
|
||||
brmctx->ip6_other_query.delay_time = 0;
|
||||
brmctx->ip6_querier.port_ifidx = 0;
|
||||
seqcount_init(&brmctx->ip6_querier.seq);
|
||||
seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
|
||||
#endif
|
||||
|
||||
timer_setup(&brmctx->ip4_mc_router_timer,
|
||||
|
@ -82,7 +82,7 @@ struct bridge_mcast_other_query {
|
||||
struct bridge_mcast_querier {
|
||||
struct br_ip addr;
|
||||
int port_ifidx;
|
||||
seqcount_t seq;
|
||||
seqcount_spinlock_t seq;
|
||||
};
|
||||
|
||||
/* IGMP/MLD statistics */
|
||||
|
@ -50,6 +50,11 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
|
||||
if (addr_len > MAX_ADDR_LEN)
|
||||
return -EINVAL;
|
||||
|
||||
ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
|
||||
if (ha && !memcmp(addr, ha->addr, addr_len) &&
|
||||
(!addr_type || addr_type == ha->type))
|
||||
goto found_it;
|
||||
|
||||
while (*ins_point) {
|
||||
int diff;
|
||||
|
||||
@ -64,6 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
|
||||
} else if (diff > 0) {
|
||||
ins_point = &parent->rb_right;
|
||||
} else {
|
||||
found_it:
|
||||
if (exclusive)
|
||||
return -EEXIST;
|
||||
if (global) {
|
||||
|
@ -1376,6 +1376,16 @@ set_sndbuf:
|
||||
}
|
||||
EXPORT_SYMBOL(sock_setsockopt);
|
||||
|
||||
static const struct cred *sk_get_peer_cred(struct sock *sk)
|
||||
{
|
||||
const struct cred *cred;
|
||||
|
||||
spin_lock(&sk->sk_peer_lock);
|
||||
cred = get_cred(sk->sk_peer_cred);
|
||||
spin_unlock(&sk->sk_peer_lock);
|
||||
|
||||
return cred;
|
||||
}
|
||||
|
||||
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
|
||||
struct ucred *ucred)
|
||||
@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
|
||||
struct ucred peercred;
|
||||
if (len > sizeof(peercred))
|
||||
len = sizeof(peercred);
|
||||
|
||||
spin_lock(&sk->sk_peer_lock);
|
||||
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
|
||||
spin_unlock(&sk->sk_peer_lock);
|
||||
|
||||
if (copy_to_user(optval, &peercred, len))
|
||||
return -EFAULT;
|
||||
goto lenout;
|
||||
@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
|
||||
|
||||
case SO_PEERGROUPS:
|
||||
{
|
||||
const struct cred *cred;
|
||||
int ret, n;
|
||||
|
||||
if (!sk->sk_peer_cred)
|
||||
cred = sk_get_peer_cred(sk);
|
||||
if (!cred)
|
||||
return -ENODATA;
|
||||
|
||||
n = sk->sk_peer_cred->group_info->ngroups;
|
||||
n = cred->group_info->ngroups;
|
||||
if (len < n * sizeof(gid_t)) {
|
||||
len = n * sizeof(gid_t);
|
||||
put_cred(cred);
|
||||
return put_user(len, optlen) ? -EFAULT : -ERANGE;
|
||||
}
|
||||
len = n * sizeof(gid_t);
|
||||
|
||||
ret = groups_to_user((gid_t __user *)optval,
|
||||
sk->sk_peer_cred->group_info);
|
||||
ret = groups_to_user((gid_t __user *)optval, cred->group_info);
|
||||
put_cred(cred);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto lenout;
|
||||
@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head)
|
||||
sk->sk_frag.page = NULL;
|
||||
}
|
||||
|
||||
if (sk->sk_peer_cred)
|
||||
put_cred(sk->sk_peer_cred);
|
||||
/* We do not need to acquire sk->sk_peer_lock, we are the last user. */
|
||||
put_cred(sk->sk_peer_cred);
|
||||
put_pid(sk->sk_peer_pid);
|
||||
|
||||
if (likely(sk->sk_net_refcnt))
|
||||
put_net(sock_net(sk));
|
||||
sk_prot_free(sk->sk_prot_creator, sk);
|
||||
@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
|
||||
|
||||
sk->sk_peer_pid = NULL;
|
||||
sk->sk_peer_cred = NULL;
|
||||
spin_lock_init(&sk->sk_peer_lock);
|
||||
|
||||
sk->sk_write_pending = 0;
|
||||
sk->sk_rcvlowat = 1;
|
||||
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
|
||||
@ -3210,24 +3230,8 @@ void release_sock(struct sock *sk)
|
||||
}
|
||||
EXPORT_SYMBOL(release_sock);
|
||||
|
||||
/**
|
||||
* lock_sock_fast - fast version of lock_sock
|
||||
* @sk: socket
|
||||
*
|
||||
* This version should be used for very small section, where process wont block
|
||||
* return false if fast path is taken:
|
||||
*
|
||||
* sk_lock.slock locked, owned = 0, BH disabled
|
||||
*
|
||||
* return true if slow path is taken:
|
||||
*
|
||||
* sk_lock.slock unlocked, owned = 1, BH enabled
|
||||
*/
|
||||
bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
|
||||
bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
|
||||
{
|
||||
/* The sk_lock has mutex_lock() semantics here. */
|
||||
mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
might_sleep();
|
||||
spin_lock_bh(&sk->sk_lock.slock);
|
||||
|
||||
@ -3256,7 +3260,7 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
|
||||
spin_unlock_bh(&sk->sk_lock.slock);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(lock_sock_fast);
|
||||
EXPORT_SYMBOL(__lock_sock_fast);
|
||||
|
||||
int sock_gettstamp(struct socket *sock, void __user *userstamp,
|
||||
bool timeval, bool time32)
|
||||
|
@ -1661,7 +1661,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
|
||||
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
|
||||
int nh_weight, u8 rt_family)
|
||||
int nh_weight, u8 rt_family, u32 nh_tclassid)
|
||||
{
|
||||
const struct net_device *dev = nhc->nhc_dev;
|
||||
struct rtnexthop *rtnh;
|
||||
@ -1679,6 +1679,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
|
||||
|
||||
rtnh->rtnh_flags = flags;
|
||||
|
||||
if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid))
|
||||
goto nla_put_failure;
|
||||
|
||||
/* length of rtnetlink header + attributes */
|
||||
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
|
||||
|
||||
@ -1706,14 +1709,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
|
||||
}
|
||||
|
||||
for_nexthops(fi) {
|
||||
if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
|
||||
AF_INET) < 0)
|
||||
goto nla_put_failure;
|
||||
u32 nh_tclassid = 0;
|
||||
#ifdef CONFIG_IP_ROUTE_CLASSID
|
||||
if (nh->nh_tclassid &&
|
||||
nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
|
||||
goto nla_put_failure;
|
||||
nh_tclassid = nh->nh_tclassid;
|
||||
#endif
|
||||
if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
|
||||
AF_INET, nh_tclassid) < 0)
|
||||
goto nla_put_failure;
|
||||
} endfor_nexthops(fi);
|
||||
|
||||
mp_end:
|
||||
|
@ -42,7 +42,7 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
|
||||
|
||||
static struct nf_hook_ops *rawtable_ops __read_mostly;
|
||||
|
||||
static int __net_init iptable_raw_table_init(struct net *net)
|
||||
static int iptable_raw_table_init(struct net *net)
|
||||
{
|
||||
struct ipt_replace *repl;
|
||||
const struct xt_table *table = &packet_raw;
|
||||
|
@ -1053,7 +1053,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
__be16 dport;
|
||||
u8 tos;
|
||||
int err, is_udplite = IS_UDPLITE(sk);
|
||||
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
|
||||
int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
|
||||
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
|
||||
struct sk_buff *skb;
|
||||
struct ip_options_data opt_copy;
|
||||
@ -1361,7 +1361,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
|
||||
}
|
||||
|
||||
up->len += size;
|
||||
if (!(up->corkflag || (flags&MSG_MORE)))
|
||||
if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
|
||||
ret = udp_push_pending_frames(sk);
|
||||
if (!ret)
|
||||
ret = size;
|
||||
@ -2662,9 +2662,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
|
||||
switch (optname) {
|
||||
case UDP_CORK:
|
||||
if (val != 0) {
|
||||
up->corkflag = 1;
|
||||
WRITE_ONCE(up->corkflag, 1);
|
||||
} else {
|
||||
up->corkflag = 0;
|
||||
WRITE_ONCE(up->corkflag, 0);
|
||||
lock_sock(sk);
|
||||
push_pending_frames(sk);
|
||||
release_sock(sk);
|
||||
@ -2787,7 +2787,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
|
||||
|
||||
switch (optname) {
|
||||
case UDP_CORK:
|
||||
val = up->corkflag;
|
||||
val = READ_ONCE(up->corkflag);
|
||||
break;
|
||||
|
||||
case UDP_ENCAP:
|
||||
|
@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb,
|
||||
* things we don't know, ie. tcp syn flag or ports). If the
|
||||
* rule is also a fragment-specific rule, non-fragments won't
|
||||
* match it. */
|
||||
acpar.fragoff = 0;
|
||||
acpar.hotdrop = false;
|
||||
acpar.state = state;
|
||||
|
||||
|
@ -5681,14 +5681,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
|
||||
goto nla_put_failure;
|
||||
|
||||
if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
|
||||
rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
|
||||
rt->fib6_nh->fib_nh_weight, AF_INET6,
|
||||
0) < 0)
|
||||
goto nla_put_failure;
|
||||
|
||||
list_for_each_entry_safe(sibling, next_sibling,
|
||||
&rt->fib6_siblings, fib6_siblings) {
|
||||
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
|
||||
sibling->fib6_nh->fib_nh_weight,
|
||||
AF_INET6) < 0)
|
||||
AF_INET6, 0) < 0)
|
||||
goto nla_put_failure;
|
||||
}
|
||||
|
||||
|
@ -1303,7 +1303,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
int addr_len = msg->msg_namelen;
|
||||
bool connected = false;
|
||||
int ulen = len;
|
||||
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
|
||||
int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
|
||||
int err;
|
||||
int is_udplite = IS_UDPLITE(sk);
|
||||
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
|
||||
|
@ -60,7 +60,10 @@ static struct mesh_table *mesh_table_alloc(void)
|
||||
atomic_set(&newtbl->entries, 0);
|
||||
spin_lock_init(&newtbl->gates_lock);
|
||||
spin_lock_init(&newtbl->walk_lock);
|
||||
rhashtable_init(&newtbl->rhead, &mesh_rht_params);
|
||||
if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) {
|
||||
kfree(newtbl);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return newtbl;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
/*
|
||||
* Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
|
||||
* Copyright 2012-2013, cozybit Inc.
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*/
|
||||
|
||||
#include "mesh.h"
|
||||
@ -588,7 +589,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta,
|
||||
|
||||
/* only transmit to PS STA with announced, non-zero awake window */
|
||||
if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
|
||||
(!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
|
||||
(!elems->awake_window || !get_unaligned_le16(elems->awake_window)))
|
||||
return;
|
||||
|
||||
if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
|
||||
|
@ -392,10 +392,6 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
|
||||
int mcast_rate;
|
||||
bool use_basicrate = false;
|
||||
|
||||
if (ieee80211_is_tx_data(txrc->skb) &&
|
||||
info->flags & IEEE80211_TX_CTL_NO_ACK)
|
||||
return false;
|
||||
|
||||
if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
|
||||
__rate_control_send_low(txrc->hw, sband, pubsta, info,
|
||||
txrc->rate_idx_mask);
|
||||
|
@ -4131,7 +4131,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
|
||||
if (!bssid)
|
||||
return false;
|
||||
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
|
||||
ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
|
||||
ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) ||
|
||||
!is_valid_ether_addr(hdr->addr2))
|
||||
return false;
|
||||
if (ieee80211_is_beacon(hdr->frame_control))
|
||||
return true;
|
||||
|
@ -2209,7 +2209,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
|
||||
}
|
||||
|
||||
vht_mcs = iterator.this_arg[4] >> 4;
|
||||
if (vht_mcs > 11)
|
||||
vht_mcs = 0;
|
||||
vht_nss = iterator.this_arg[4] & 0xF;
|
||||
if (!vht_nss || vht_nss > 8)
|
||||
vht_nss = 1;
|
||||
break;
|
||||
|
||||
/*
|
||||
@ -3380,6 +3384,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
|
||||
if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
|
||||
goto out;
|
||||
|
||||
/* If n == 2, the "while (*frag_tail)" loop above didn't execute
|
||||
* and frag_tail should be &skb_shinfo(head)->frag_list.
|
||||
* However, ieee80211_amsdu_prepare_head() can reallocate it.
|
||||
* Reload frag_tail to have it pointing to the correct place.
|
||||
*/
|
||||
if (n == 2)
|
||||
frag_tail = &skb_shinfo(head)->frag_list;
|
||||
|
||||
/*
|
||||
* Pad out the previous subframe to a multiple of 4 by adding the
|
||||
* padding to the next one, that's being added. Note that head->len
|
||||
|
@ -520,6 +520,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
|
||||
return RX_DROP_UNUSABLE;
|
||||
}
|
||||
|
||||
/* reload hdr - skb might have been reallocated */
|
||||
hdr = (void *)rx->skb->data;
|
||||
|
||||
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
|
||||
if (!rx->sta || data_len < 0)
|
||||
return RX_DROP_UNUSABLE;
|
||||
@ -749,6 +752,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
|
||||
return RX_DROP_UNUSABLE;
|
||||
}
|
||||
|
||||
/* reload hdr - skb might have been reallocated */
|
||||
hdr = (void *)rx->skb->data;
|
||||
|
||||
data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
|
||||
if (!rx->sta || data_len < 0)
|
||||
return RX_DROP_UNUSABLE;
|
||||
|
@ -36,7 +36,7 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
|
||||
struct sock *sk;
|
||||
|
||||
net = sock_net(in_skb->sk);
|
||||
msk = mptcp_token_get_sock(req->id.idiag_cookie[0]);
|
||||
msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]);
|
||||
if (!msk)
|
||||
goto out_nosk;
|
||||
|
||||
|
@ -1718,9 +1718,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
|
||||
|
||||
list_for_each_entry(entry, &pernet->local_addr_list, list) {
|
||||
if (addresses_equal(&entry->addr, &addr.addr, true)) {
|
||||
ret = mptcp_nl_addr_backup(net, &entry->addr, bkup);
|
||||
if (ret)
|
||||
return ret;
|
||||
mptcp_nl_addr_backup(net, &entry->addr, bkup);
|
||||
|
||||
if (bkup)
|
||||
entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
|
||||
|
@ -2735,7 +2735,7 @@ cleanup:
|
||||
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
|
||||
mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
bool slow = lock_sock_fast(ssk);
|
||||
bool slow = lock_sock_fast_nested(ssk);
|
||||
|
||||
sock_orphan(ssk);
|
||||
unlock_sock_fast(ssk, slow);
|
||||
|
@ -709,7 +709,7 @@ int mptcp_token_new_connect(struct sock *sk);
|
||||
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
|
||||
struct mptcp_sock *msk);
|
||||
bool mptcp_token_exists(u32 token);
|
||||
struct mptcp_sock *mptcp_token_get_sock(u32 token);
|
||||
struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
|
||||
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
|
||||
long *s_num);
|
||||
void mptcp_token_destroy(struct mptcp_sock *msk);
|
||||
|
@ -86,7 +86,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
|
||||
struct mptcp_sock *msk;
|
||||
int local_id;
|
||||
|
||||
msk = mptcp_token_get_sock(subflow_req->token);
|
||||
msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token);
|
||||
if (!msk) {
|
||||
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
|
||||
return NULL;
|
||||
|
@ -108,18 +108,12 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
|
||||
|
||||
e->valid = 0;
|
||||
|
||||
msk = mptcp_token_get_sock(e->token);
|
||||
msk = mptcp_token_get_sock(net, e->token);
|
||||
if (!msk) {
|
||||
spin_unlock_bh(&join_entry_locks[i]);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If this fails, the token got re-used in the mean time by another
|
||||
* mptcp socket in a different netns, i.e. entry is outdated.
|
||||
*/
|
||||
if (!net_eq(sock_net((struct sock *)msk), net))
|
||||
goto err_put;
|
||||
|
||||
subflow_req->remote_nonce = e->remote_nonce;
|
||||
subflow_req->local_nonce = e->local_nonce;
|
||||
subflow_req->backup = e->backup;
|
||||
@ -128,11 +122,6 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
|
||||
subflow_req->msk = msk;
|
||||
spin_unlock_bh(&join_entry_locks[i]);
|
||||
return true;
|
||||
|
||||
err_put:
|
||||
spin_unlock_bh(&join_entry_locks[i]);
|
||||
sock_put((struct sock *)msk);
|
||||
return false;
|
||||
}
|
||||
|
||||
void __init mptcp_join_cookie_init(void)
|
||||
|
@ -231,6 +231,7 @@ found:
|
||||
|
||||
/**
|
||||
* mptcp_token_get_sock - retrieve mptcp connection sock using its token
|
||||
* @net: restrict to this namespace
|
||||
* @token: token of the mptcp connection to retrieve
|
||||
*
|
||||
* This function returns the mptcp connection structure with the given token.
|
||||
@ -238,7 +239,7 @@ found:
|
||||
*
|
||||
* returns NULL if no connection with the given token value exists.
|
||||
*/
|
||||
struct mptcp_sock *mptcp_token_get_sock(u32 token)
|
||||
struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token)
|
||||
{
|
||||
struct hlist_nulls_node *pos;
|
||||
struct token_bucket *bucket;
|
||||
@ -251,11 +252,15 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token)
|
||||
again:
|
||||
sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
|
||||
msk = mptcp_sk(sk);
|
||||
if (READ_ONCE(msk->token) != token)
|
||||
if (READ_ONCE(msk->token) != token ||
|
||||
!net_eq(sock_net(sk), net))
|
||||
continue;
|
||||
|
||||
if (!refcount_inc_not_zero(&sk->sk_refcnt))
|
||||
goto not_found;
|
||||
if (READ_ONCE(msk->token) != token) {
|
||||
|
||||
if (READ_ONCE(msk->token) != token ||
|
||||
!net_eq(sock_net(sk), net)) {
|
||||
sock_put(sk);
|
||||
goto again;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test)
|
||||
GFP_USER);
|
||||
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req);
|
||||
mptcp_token_init_request((struct request_sock *)req);
|
||||
sock_net_set((struct sock *)req, &init_net);
|
||||
return req;
|
||||
}
|
||||
|
||||
@ -22,7 +23,7 @@ static void mptcp_token_test_req_basic(struct kunit *test)
|
||||
KUNIT_ASSERT_EQ(test, 0,
|
||||
mptcp_token_new_request((struct request_sock *)req));
|
||||
KUNIT_EXPECT_NE(test, 0, (int)req->token);
|
||||
KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token));
|
||||
KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, req->token));
|
||||
|
||||
/* cleanup */
|
||||
mptcp_token_destroy_request((struct request_sock *)req);
|
||||
@ -55,6 +56,7 @@ static struct mptcp_sock *build_msk(struct kunit *test)
|
||||
msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
|
||||
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
|
||||
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
|
||||
sock_net_set((struct sock *)msk, &init_net);
|
||||
return msk;
|
||||
}
|
||||
|
||||
@ -74,11 +76,11 @@ static void mptcp_token_test_msk_basic(struct kunit *test)
|
||||
mptcp_token_new_connect((struct sock *)icsk));
|
||||
KUNIT_EXPECT_NE(test, 0, (int)ctx->token);
|
||||
KUNIT_EXPECT_EQ(test, ctx->token, msk->token);
|
||||
KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token));
|
||||
KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, ctx->token));
|
||||
KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt));
|
||||
|
||||
mptcp_token_destroy(msk);
|
||||
KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token));
|
||||
KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, ctx->token));
|
||||
}
|
||||
|
||||
static void mptcp_token_test_accept(struct kunit *test)
|
||||
@ -90,11 +92,11 @@ static void mptcp_token_test_accept(struct kunit *test)
|
||||
mptcp_token_new_request((struct request_sock *)req));
|
||||
msk->token = req->token;
|
||||
mptcp_token_accept(req, msk);
|
||||
KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
|
||||
KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
|
||||
|
||||
/* this is now a no-op */
|
||||
mptcp_token_destroy_request((struct request_sock *)req);
|
||||
KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
|
||||
KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
|
||||
|
||||
/* cleanup */
|
||||
mptcp_token_destroy(msk);
|
||||
@ -116,7 +118,7 @@ static void mptcp_token_test_destroyed(struct kunit *test)
|
||||
|
||||
/* simulate race on removal */
|
||||
refcount_set(&sk->sk_refcnt, 0);
|
||||
KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token));
|
||||
KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, msk->token));
|
||||
|
||||
/* cleanup */
|
||||
mptcp_token_destroy(msk);
|
||||
|
@ -130,11 +130,11 @@ htable_size(u8 hbits)
|
||||
{
|
||||
size_t hsize;
|
||||
|
||||
/* We must fit both into u32 in jhash and size_t */
|
||||
/* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
|
||||
if (hbits > 31)
|
||||
return 0;
|
||||
hsize = jhash_size(hbits);
|
||||
if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
|
||||
if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
|
||||
< hsize)
|
||||
return 0;
|
||||
|
||||
|
@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void)
|
||||
int idx;
|
||||
|
||||
/* Compute size and mask */
|
||||
if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
|
||||
pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
|
||||
ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
|
||||
}
|
||||
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
|
||||
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
|
||||
|
||||
|
@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
|
||||
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
|
||||
static __read_mostly bool nf_conntrack_locks_all;
|
||||
|
||||
/* serialize hash resizes and nf_ct_iterate_cleanup */
|
||||
static DEFINE_MUTEX(nf_conntrack_mutex);
|
||||
|
||||
#define GC_SCAN_INTERVAL (120u * HZ)
|
||||
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
|
||||
|
||||
#define MAX_CHAINLEN 64u
|
||||
#define MIN_CHAINLEN 8u
|
||||
#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
|
||||
|
||||
static struct conntrack_gc_work conntrack_gc_work;
|
||||
|
||||
@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
|
||||
static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
|
||||
|
||||
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
|
||||
unsigned int zoneid,
|
||||
const struct net *net)
|
||||
{
|
||||
struct {
|
||||
struct nf_conntrack_man src;
|
||||
union nf_inet_addr dst_addr;
|
||||
unsigned int zone;
|
||||
u32 net_mix;
|
||||
u16 dport;
|
||||
u16 proto;
|
||||
@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
|
||||
/* The direction must be ignored, so handle usable members manually. */
|
||||
combined.src = tuple->src;
|
||||
combined.dst_addr = tuple->dst.u3;
|
||||
combined.zone = zoneid;
|
||||
combined.net_mix = net_hash_mix(net);
|
||||
combined.dport = (__force __u16)tuple->dst.u.all;
|
||||
combined.proto = tuple->dst.protonum;
|
||||
@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash)
|
||||
|
||||
static u32 __hash_conntrack(const struct net *net,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
unsigned int zoneid,
|
||||
unsigned int size)
|
||||
{
|
||||
return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
|
||||
return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
|
||||
}
|
||||
|
||||
static u32 hash_conntrack(const struct net *net,
|
||||
const struct nf_conntrack_tuple *tuple)
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
unsigned int zoneid)
|
||||
{
|
||||
return scale_hash(hash_conntrack_raw(tuple, net));
|
||||
return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
|
||||
}
|
||||
|
||||
static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
|
||||
@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
|
||||
do {
|
||||
sequence = read_seqcount_begin(&nf_conntrack_generation);
|
||||
hash = hash_conntrack(net,
|
||||
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
||||
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
||||
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
|
||||
reply_hash = hash_conntrack(net,
|
||||
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
|
||||
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
||||
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
|
||||
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
|
||||
|
||||
clean_from_lists(ct);
|
||||
@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash *
|
||||
nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
|
||||
const struct nf_conntrack_tuple *tuple)
|
||||
{
|
||||
return __nf_conntrack_find_get(net, zone, tuple,
|
||||
hash_conntrack_raw(tuple, net));
|
||||
unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
|
||||
struct nf_conntrack_tuple_hash *thash;
|
||||
|
||||
thash = __nf_conntrack_find_get(net, zone, tuple,
|
||||
hash_conntrack_raw(tuple, zone_id, net));
|
||||
|
||||
if (thash)
|
||||
return thash;
|
||||
|
||||
rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
|
||||
if (rid != zone_id)
|
||||
return __nf_conntrack_find_get(net, zone, tuple,
|
||||
hash_conntrack_raw(tuple, rid, net));
|
||||
return thash;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
|
||||
|
||||
@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
|
||||
unsigned int hash, reply_hash;
|
||||
struct nf_conntrack_tuple_hash *h;
|
||||
struct hlist_nulls_node *n;
|
||||
unsigned int max_chainlen;
|
||||
unsigned int chainlen = 0;
|
||||
unsigned int sequence;
|
||||
int err = -EEXIST;
|
||||
@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
|
||||
do {
|
||||
sequence = read_seqcount_begin(&nf_conntrack_generation);
|
||||
hash = hash_conntrack(net,
|
||||
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
||||
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
||||
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
|
||||
reply_hash = hash_conntrack(net,
|
||||
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
|
||||
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
||||
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
|
||||
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
|
||||
|
||||
max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
|
||||
|
||||
/* See if there's one in the list already, including reverse */
|
||||
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
|
||||
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
||||
zone, net))
|
||||
goto out;
|
||||
|
||||
if (chainlen++ > MAX_CHAINLEN)
|
||||
if (chainlen++ > max_chainlen)
|
||||
goto chaintoolong;
|
||||
}
|
||||
|
||||
@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
|
||||
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
||||
zone, net))
|
||||
goto out;
|
||||
if (chainlen++ > MAX_CHAINLEN)
|
||||
if (chainlen++ > max_chainlen)
|
||||
goto chaintoolong;
|
||||
}
|
||||
|
||||
@ -1103,8 +1131,8 @@ drop:
|
||||
int
|
||||
__nf_conntrack_confirm(struct sk_buff *skb)
|
||||
{
|
||||
unsigned int chainlen = 0, sequence, max_chainlen;
|
||||
const struct nf_conntrack_zone *zone;
|
||||
unsigned int chainlen = 0, sequence;
|
||||
unsigned int hash, reply_hash;
|
||||
struct nf_conntrack_tuple_hash *h;
|
||||
struct nf_conn *ct;
|
||||
@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
||||
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
|
||||
hash = scale_hash(hash);
|
||||
reply_hash = hash_conntrack(net,
|
||||
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
|
||||
|
||||
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
||||
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
|
||||
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
|
||||
|
||||
/* We're not in hash table, and we refuse to set up related
|
||||
@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
||||
goto dying;
|
||||
}
|
||||
|
||||
max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
|
||||
/* See if there's one in the list already, including reverse:
|
||||
NAT could have grabbed it without realizing, since we're
|
||||
not in the hash. If there is, we lost race. */
|
||||
@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
||||
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
||||
zone, net))
|
||||
goto out;
|
||||
if (chainlen++ > MAX_CHAINLEN)
|
||||
if (chainlen++ > max_chainlen)
|
||||
goto chaintoolong;
|
||||
}
|
||||
|
||||
@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
||||
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
||||
zone, net))
|
||||
goto out;
|
||||
if (chainlen++ > MAX_CHAINLEN) {
|
||||
if (chainlen++ > max_chainlen) {
|
||||
chaintoolong:
|
||||
nf_ct_add_to_dying_list(ct);
|
||||
NF_CT_STAT_INC(net, chaintoolong);
|
||||
@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
|
||||
rcu_read_lock();
|
||||
begin:
|
||||
nf_conntrack_get_ht(&ct_hash, &hsize);
|
||||
hash = __hash_conntrack(net, tuple, hsize);
|
||||
hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
|
||||
|
||||
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
|
||||
struct nf_conntrack_tuple_hash *h;
|
||||
enum ip_conntrack_info ctinfo;
|
||||
struct nf_conntrack_zone tmp;
|
||||
u32 hash, zone_id, rid;
|
||||
struct nf_conn *ct;
|
||||
u32 hash;
|
||||
|
||||
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
|
||||
dataoff, state->pf, protonum, state->net,
|
||||
@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
|
||||
|
||||
/* look for tuple match */
|
||||
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
|
||||
hash = hash_conntrack_raw(&tuple, state->net);
|
||||
|
||||
zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
|
||||
hash = hash_conntrack_raw(&tuple, zone_id, state->net);
|
||||
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
|
||||
|
||||
if (!h) {
|
||||
rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
|
||||
if (zone_id != rid) {
|
||||
u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
|
||||
|
||||
h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (!h) {
|
||||
h = init_conntrack(state->net, tmpl, &tuple,
|
||||
skb, dataoff, hash);
|
||||
@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
|
||||
spinlock_t *lockp;
|
||||
|
||||
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
|
||||
struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
|
||||
|
||||
if (hlist_nulls_empty(hslot))
|
||||
continue;
|
||||
|
||||
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
|
||||
local_bh_disable();
|
||||
nf_conntrack_lock(lockp);
|
||||
if (*bucket < nf_conntrack_htable_size) {
|
||||
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
|
||||
if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
|
||||
continue;
|
||||
/* All nf_conn objects are added to hash table twice, one
|
||||
* for original direction tuple, once for the reply tuple.
|
||||
*
|
||||
* Exception: In the IPS_NAT_CLASH case, only the reply
|
||||
* tuple is added (the original tuple already existed for
|
||||
* a different object).
|
||||
*
|
||||
* We only need to call the iterator once for each
|
||||
* conntrack, so we just use the 'reply' direction
|
||||
* tuple while iterating.
|
||||
*/
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
if (iter(ct, data))
|
||||
goto found;
|
||||
}
|
||||
hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
|
||||
if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
|
||||
continue;
|
||||
/* All nf_conn objects are added to hash table twice, one
|
||||
* for original direction tuple, once for the reply tuple.
|
||||
*
|
||||
* Exception: In the IPS_NAT_CLASH case, only the reply
|
||||
* tuple is added (the original tuple already existed for
|
||||
* a different object).
|
||||
*
|
||||
* We only need to call the iterator once for each
|
||||
* conntrack, so we just use the 'reply' direction
|
||||
* tuple while iterating.
|
||||
*/
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
if (iter(ct, data))
|
||||
goto found;
|
||||
}
|
||||
spin_unlock(lockp);
|
||||
local_bh_enable();
|
||||
@ -2264,26 +2308,20 @@ found:
|
||||
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
|
||||
void *data, u32 portid, int report)
|
||||
{
|
||||
unsigned int bucket = 0, sequence;
|
||||
unsigned int bucket = 0;
|
||||
struct nf_conn *ct;
|
||||
|
||||
might_sleep();
|
||||
|
||||
for (;;) {
|
||||
sequence = read_seqcount_begin(&nf_conntrack_generation);
|
||||
mutex_lock(&nf_conntrack_mutex);
|
||||
while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
|
||||
/* Time to push up daises... */
|
||||
|
||||
while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
|
||||
/* Time to push up daises... */
|
||||
|
||||
nf_ct_delete(ct, portid, report);
|
||||
nf_ct_put(ct);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
|
||||
break;
|
||||
bucket = 0;
|
||||
nf_ct_delete(ct, portid, report);
|
||||
nf_ct_put(ct);
|
||||
cond_resched();
|
||||
}
|
||||
mutex_unlock(&nf_conntrack_mutex);
|
||||
}
|
||||
|
||||
struct iter_data {
|
||||
@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
|
||||
if (!hash)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&nf_conntrack_mutex);
|
||||
old_size = nf_conntrack_htable_size;
|
||||
if (old_size == hashsize) {
|
||||
mutex_unlock(&nf_conntrack_mutex);
|
||||
kvfree(hash);
|
||||
return 0;
|
||||
}
|
||||
@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
|
||||
|
||||
for (i = 0; i < nf_conntrack_htable_size; i++) {
|
||||
while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
|
||||
unsigned int zone_id;
|
||||
|
||||
h = hlist_nulls_entry(nf_conntrack_hash[i].first,
|
||||
struct nf_conntrack_tuple_hash, hnnode);
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
hlist_nulls_del_rcu(&h->hnnode);
|
||||
|
||||
zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
|
||||
bucket = __hash_conntrack(nf_ct_net(ct),
|
||||
&h->tuple, hashsize);
|
||||
&h->tuple, zone_id, hashsize);
|
||||
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
|
||||
}
|
||||
}
|
||||
@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
|
||||
nf_conntrack_all_unlock();
|
||||
local_bh_enable();
|
||||
|
||||
mutex_unlock(&nf_conntrack_mutex);
|
||||
|
||||
synchronize_net();
|
||||
kvfree(old_hash);
|
||||
return 0;
|
||||
|
@ -150,13 +150,16 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
|
||||
|
||||
/* We keep an extra hash for each conntrack, for fast searching. */
|
||||
static unsigned int
|
||||
hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
|
||||
hash_by_src(const struct net *net,
|
||||
const struct nf_conntrack_zone *zone,
|
||||
const struct nf_conntrack_tuple *tuple)
|
||||
{
|
||||
unsigned int hash;
|
||||
struct {
|
||||
struct nf_conntrack_man src;
|
||||
u32 net_mix;
|
||||
u32 protonum;
|
||||
u32 zone;
|
||||
} __aligned(SIPHASH_ALIGNMENT) combined;
|
||||
|
||||
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
|
||||
@ -165,9 +168,13 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
|
||||
|
||||
/* Original src, to ensure we map it consistently if poss. */
|
||||
combined.src = tuple->src;
|
||||
combined.net_mix = net_hash_mix(n);
|
||||
combined.net_mix = net_hash_mix(net);
|
||||
combined.protonum = tuple->dst.protonum;
|
||||
|
||||
/* Zone ID can be used provided its valid for both directions */
|
||||
if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
|
||||
combined.zone = zone->id;
|
||||
|
||||
hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
|
||||
|
||||
return reciprocal_scale(hash, nf_nat_htable_size);
|
||||
@ -272,7 +279,7 @@ find_appropriate_src(struct net *net,
|
||||
struct nf_conntrack_tuple *result,
|
||||
const struct nf_nat_range2 *range)
|
||||
{
|
||||
unsigned int h = hash_by_src(net, tuple);
|
||||
unsigned int h = hash_by_src(net, zone, tuple);
|
||||
const struct nf_conn *ct;
|
||||
|
||||
hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
|
||||
@ -619,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct,
|
||||
unsigned int srchash;
|
||||
spinlock_t *lock;
|
||||
|
||||
srchash = hash_by_src(net,
|
||||
srchash = hash_by_src(net, nf_ct_zone(ct),
|
||||
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
||||
lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
|
||||
spin_lock_bh(lock);
|
||||
@ -788,7 +795,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
|
||||
{
|
||||
unsigned int h;
|
||||
|
||||
h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
||||
h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
||||
spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
|
||||
hlist_del_rcu(&ct->nat_bysource);
|
||||
spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
|
||||
|
@ -9,8 +9,19 @@
|
||||
|
||||
#include <net/netfilter/nf_nat_masquerade.h>
|
||||
|
||||
struct masq_dev_work {
|
||||
struct work_struct work;
|
||||
struct net *net;
|
||||
union nf_inet_addr addr;
|
||||
int ifindex;
|
||||
int (*iter)(struct nf_conn *i, void *data);
|
||||
};
|
||||
|
||||
#define MAX_MASQ_WORKER_COUNT 16
|
||||
|
||||
static DEFINE_MUTEX(masq_mutex);
|
||||
static unsigned int masq_refcnt __read_mostly;
|
||||
static atomic_t masq_worker_count __read_mostly;
|
||||
|
||||
unsigned int
|
||||
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
|
||||
@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
|
||||
|
||||
static int device_cmp(struct nf_conn *i, void *ifindex)
|
||||
static void iterate_cleanup_work(struct work_struct *work)
|
||||
{
|
||||
struct masq_dev_work *w;
|
||||
|
||||
w = container_of(work, struct masq_dev_work, work);
|
||||
|
||||
nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
|
||||
|
||||
put_net(w->net);
|
||||
kfree(w);
|
||||
atomic_dec(&masq_worker_count);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
/* Iterate conntrack table in the background and remove conntrack entries
|
||||
* that use the device/address being removed.
|
||||
*
|
||||
* In case too many work items have been queued already or memory allocation
|
||||
* fails iteration is skipped, conntrack entries will time out eventually.
|
||||
*/
|
||||
static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
|
||||
int ifindex,
|
||||
int (*iter)(struct nf_conn *i, void *data),
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct masq_dev_work *w;
|
||||
|
||||
if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
|
||||
return;
|
||||
|
||||
net = maybe_get_net(net);
|
||||
if (!net)
|
||||
return;
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
goto err_module;
|
||||
|
||||
w = kzalloc(sizeof(*w), gfp_flags);
|
||||
if (w) {
|
||||
/* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
|
||||
atomic_inc(&masq_worker_count);
|
||||
|
||||
INIT_WORK(&w->work, iterate_cleanup_work);
|
||||
w->ifindex = ifindex;
|
||||
w->net = net;
|
||||
w->iter = iter;
|
||||
if (addr)
|
||||
w->addr = *addr;
|
||||
schedule_work(&w->work);
|
||||
return;
|
||||
}
|
||||
|
||||
module_put(THIS_MODULE);
|
||||
err_module:
|
||||
put_net(net);
|
||||
}
|
||||
|
||||
static int device_cmp(struct nf_conn *i, void *arg)
|
||||
{
|
||||
const struct nf_conn_nat *nat = nfct_nat(i);
|
||||
const struct masq_dev_work *w = arg;
|
||||
|
||||
if (!nat)
|
||||
return 0;
|
||||
return nat->masq_index == (int)(long)ifindex;
|
||||
return nat->masq_index == w->ifindex;
|
||||
}
|
||||
|
||||
static int masq_device_event(struct notifier_block *this,
|
||||
@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this,
|
||||
* and forget them.
|
||||
*/
|
||||
|
||||
nf_ct_iterate_cleanup_net(net, device_cmp,
|
||||
(void *)(long)dev->ifindex, 0, 0);
|
||||
nf_nat_masq_schedule(net, NULL, dev->ifindex,
|
||||
device_cmp, GFP_KERNEL);
|
||||
}
|
||||
|
||||
return NOTIFY_DONE;
|
||||
@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this,
|
||||
|
||||
static int inet_cmp(struct nf_conn *ct, void *ptr)
|
||||
{
|
||||
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
|
||||
struct net_device *dev = ifa->ifa_dev->dev;
|
||||
struct nf_conntrack_tuple *tuple;
|
||||
struct masq_dev_work *w = ptr;
|
||||
|
||||
if (!device_cmp(ct, (void *)(long)dev->ifindex))
|
||||
if (!device_cmp(ct, ptr))
|
||||
return 0;
|
||||
|
||||
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
|
||||
|
||||
return ifa->ifa_address == tuple->dst.u3.ip;
|
||||
return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
|
||||
}
|
||||
|
||||
static int masq_inet_event(struct notifier_block *this,
|
||||
unsigned long event,
|
||||
void *ptr)
|
||||
{
|
||||
struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
|
||||
struct net *net = dev_net(idev->dev);
|
||||
const struct in_ifaddr *ifa = ptr;
|
||||
const struct in_device *idev;
|
||||
const struct net_device *dev;
|
||||
union nf_inet_addr addr;
|
||||
|
||||
if (event != NETDEV_DOWN)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* The masq_dev_notifier will catch the case of the device going
|
||||
* down. So if the inetdev is dead and being destroyed we have
|
||||
* no work to do. Otherwise this is an individual address removal
|
||||
* and we have to perform the flush.
|
||||
*/
|
||||
idev = ifa->ifa_dev;
|
||||
if (idev->dead)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (event == NETDEV_DOWN)
|
||||
nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
|
||||
addr.ip = ifa->ifa_address;
|
||||
|
||||
dev = idev->dev;
|
||||
nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
|
||||
inet_cmp, GFP_KERNEL);
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = {
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static atomic_t v6_worker_count __read_mostly;
|
||||
|
||||
static int
|
||||
nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
|
||||
const struct in6_addr *daddr, unsigned int srcprefs,
|
||||
@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
|
||||
|
||||
struct masq_dev_work {
|
||||
struct work_struct work;
|
||||
struct net *net;
|
||||
struct in6_addr addr;
|
||||
int ifindex;
|
||||
};
|
||||
|
||||
static int inet6_cmp(struct nf_conn *ct, void *work)
|
||||
{
|
||||
struct masq_dev_work *w = (struct masq_dev_work *)work;
|
||||
struct nf_conntrack_tuple *tuple;
|
||||
|
||||
if (!device_cmp(ct, (void *)(long)w->ifindex))
|
||||
return 0;
|
||||
|
||||
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
|
||||
|
||||
return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
|
||||
}
|
||||
|
||||
static void iterate_cleanup_work(struct work_struct *work)
|
||||
{
|
||||
struct masq_dev_work *w;
|
||||
|
||||
w = container_of(work, struct masq_dev_work, work);
|
||||
|
||||
nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
|
||||
|
||||
put_net(w->net);
|
||||
kfree(w);
|
||||
atomic_dec(&v6_worker_count);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
|
||||
*
|
||||
* Defer it to the system workqueue.
|
||||
@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this,
|
||||
{
|
||||
struct inet6_ifaddr *ifa = ptr;
|
||||
const struct net_device *dev;
|
||||
struct masq_dev_work *w;
|
||||
struct net *net;
|
||||
union nf_inet_addr addr;
|
||||
|
||||
if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
|
||||
if (event != NETDEV_DOWN)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
dev = ifa->idev->dev;
|
||||
net = maybe_get_net(dev_net(dev));
|
||||
if (!net)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
goto err_module;
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
|
||||
w = kmalloc(sizeof(*w), GFP_ATOMIC);
|
||||
if (w) {
|
||||
atomic_inc(&v6_worker_count);
|
||||
addr.in6 = ifa->addr;
|
||||
|
||||
INIT_WORK(&w->work, iterate_cleanup_work);
|
||||
w->ifindex = dev->ifindex;
|
||||
w->net = net;
|
||||
w->addr = ifa->addr;
|
||||
schedule_work(&w->work);
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
module_put(THIS_MODULE);
|
||||
err_module:
|
||||
put_net(net);
|
||||
nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
|
||||
GFP_ATOMIC);
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
|
@ -4336,7 +4336,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
|
||||
if (ops->privsize != NULL)
|
||||
size = ops->privsize(nla, &desc);
|
||||
alloc_size = sizeof(*set) + size + udlen;
|
||||
if (alloc_size < size)
|
||||
if (alloc_size < size || alloc_size > INT_MAX)
|
||||
return -ENOMEM;
|
||||
set = kvzalloc(alloc_size, GFP_KERNEL);
|
||||
if (!set)
|
||||
@ -9599,7 +9599,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
|
||||
table->use--;
|
||||
nf_tables_chain_destroy(&ctx);
|
||||
}
|
||||
list_del(&table->list);
|
||||
nf_tables_table_destroy(&ctx);
|
||||
}
|
||||
|
||||
@ -9612,6 +9611,8 @@ static void __nft_release_tables(struct net *net)
|
||||
if (nft_table_has_owner(table))
|
||||
continue;
|
||||
|
||||
list_del(&table->list);
|
||||
|
||||
__nft_release_table(net, table);
|
||||
}
|
||||
}
|
||||
@ -9619,31 +9620,38 @@ static void __nft_release_tables(struct net *net)
|
||||
static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
|
||||
void *ptr)
|
||||
{
|
||||
struct nft_table *table, *to_delete[8];
|
||||
struct nftables_pernet *nft_net;
|
||||
struct netlink_notify *n = ptr;
|
||||
struct nft_table *table, *nt;
|
||||
struct net *net = n->net;
|
||||
bool release = false;
|
||||
unsigned int deleted;
|
||||
bool restart = false;
|
||||
|
||||
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
nft_net = nft_pernet(net);
|
||||
deleted = 0;
|
||||
mutex_lock(&nft_net->commit_mutex);
|
||||
again:
|
||||
list_for_each_entry(table, &nft_net->tables, list) {
|
||||
if (nft_table_has_owner(table) &&
|
||||
n->portid == table->nlpid) {
|
||||
__nft_release_hook(net, table);
|
||||
release = true;
|
||||
list_del_rcu(&table->list);
|
||||
to_delete[deleted++] = table;
|
||||
if (deleted >= ARRAY_SIZE(to_delete))
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (release) {
|
||||
if (deleted) {
|
||||
restart = deleted >= ARRAY_SIZE(to_delete);
|
||||
synchronize_rcu();
|
||||
list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
|
||||
if (nft_table_has_owner(table) &&
|
||||
n->portid == table->nlpid)
|
||||
__nft_release_table(net, table);
|
||||
}
|
||||
while (deleted)
|
||||
__nft_release_table(net, to_delete[--deleted]);
|
||||
|
||||
if (restart)
|
||||
goto again;
|
||||
}
|
||||
mutex_unlock(&nft_net->commit_mutex);
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/netfilter_bridge/ebtables.h>
|
||||
#include <linux/netfilter_arp/arp_tables.h>
|
||||
#include <net/netfilter/nf_tables.h>
|
||||
#include <net/netfilter/nf_log.h>
|
||||
|
||||
/* Used for matches where *info is larger than X byte */
|
||||
#define NFT_MATCH_LARGE_THRESH 192
|
||||
@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
|
||||
nft_compat_wait_for_destructors();
|
||||
|
||||
ret = xt_check_target(&par, size, proto, inv);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT) {
|
||||
const char *modname = NULL;
|
||||
|
||||
if (strcmp(target->name, "LOG") == 0)
|
||||
modname = "nf_log_syslog";
|
||||
else if (strcmp(target->name, "NFLOG") == 0)
|
||||
modname = "nfnetlink_log";
|
||||
|
||||
if (modname &&
|
||||
nft_request_module(ctx->net, "%s", modname) == -EAGAIN)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* The standard target cannot be used */
|
||||
if (!target->target)
|
||||
|
@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
static int log_tg_check(const struct xt_tgchk_param *par)
|
||||
{
|
||||
const struct xt_log_info *loginfo = par->targinfo;
|
||||
int ret;
|
||||
|
||||
if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
|
||||
return -EINVAL;
|
||||
@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
|
||||
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
|
||||
if (ret != 0 && !par->nft_compat) {
|
||||
request_module("%s", "nf_log_syslog");
|
||||
|
||||
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void log_tg_destroy(const struct xt_tgdtor_param *par)
|
||||
|
@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
static int nflog_tg_check(const struct xt_tgchk_param *par)
|
||||
{
|
||||
const struct xt_nflog_info *info = par->targinfo;
|
||||
int ret;
|
||||
|
||||
if (info->flags & ~XT_NFLOG_MASK)
|
||||
return -EINVAL;
|
||||
if (info->prefix[sizeof(info->prefix) - 1] != '\0')
|
||||
return -EINVAL;
|
||||
|
||||
return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
|
||||
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
|
||||
if (ret != 0 && !par->nft_compat) {
|
||||
request_module("%s", "nfnetlink_log");
|
||||
|
||||
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
|
||||
|
@ -2188,18 +2188,24 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
|
||||
|
||||
arg->count = arg->skip;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) {
|
||||
/* don't return filters that are being deleted */
|
||||
if (!refcount_inc_not_zero(&f->refcnt))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (arg->fn(tp, f, arg) < 0) {
|
||||
__fl_put(f);
|
||||
arg->stop = 1;
|
||||
rcu_read_lock();
|
||||
break;
|
||||
}
|
||||
__fl_put(f);
|
||||
arg->count++;
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
arg->cookie = id;
|
||||
}
|
||||
|
||||
|
@ -513,6 +513,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
|
||||
return stab;
|
||||
}
|
||||
|
||||
if (s->size_log > STAB_SIZE_LOG_MAX ||
|
||||
s->cell_log > STAB_SIZE_LOG_MAX) {
|
||||
NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
|
||||
if (!stab)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -702,7 +702,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
|
||||
ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
|
||||
|
||||
/* Break out if chunk length is less then minimal. */
|
||||
if (ntohs(ch->length) < sizeof(_ch))
|
||||
if (!ch || ntohs(ch->length) < sizeof(_ch))
|
||||
break;
|
||||
|
||||
ch_end = offset + SCTP_PAD4(ntohs(ch->length));
|
||||
|
@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
|
||||
|
||||
static void init_peercred(struct sock *sk)
|
||||
{
|
||||
put_pid(sk->sk_peer_pid);
|
||||
if (sk->sk_peer_cred)
|
||||
put_cred(sk->sk_peer_cred);
|
||||
const struct cred *old_cred;
|
||||
struct pid *old_pid;
|
||||
|
||||
spin_lock(&sk->sk_peer_lock);
|
||||
old_pid = sk->sk_peer_pid;
|
||||
old_cred = sk->sk_peer_cred;
|
||||
sk->sk_peer_pid = get_pid(task_tgid(current));
|
||||
sk->sk_peer_cred = get_current_cred();
|
||||
spin_unlock(&sk->sk_peer_lock);
|
||||
|
||||
put_pid(old_pid);
|
||||
put_cred(old_cred);
|
||||
}
|
||||
|
||||
static void copy_peercred(struct sock *sk, struct sock *peersk)
|
||||
{
|
||||
put_pid(sk->sk_peer_pid);
|
||||
if (sk->sk_peer_cred)
|
||||
put_cred(sk->sk_peer_cred);
|
||||
const struct cred *old_cred;
|
||||
struct pid *old_pid;
|
||||
|
||||
if (sk < peersk) {
|
||||
spin_lock(&sk->sk_peer_lock);
|
||||
spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
|
||||
} else {
|
||||
spin_lock(&peersk->sk_peer_lock);
|
||||
spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
old_pid = sk->sk_peer_pid;
|
||||
old_cred = sk->sk_peer_cred;
|
||||
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
|
||||
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
|
||||
|
||||
spin_unlock(&sk->sk_peer_lock);
|
||||
spin_unlock(&peersk->sk_peer_lock);
|
||||
|
||||
put_pid(old_pid);
|
||||
put_cred(old_cred);
|
||||
}
|
||||
|
||||
static int unix_listen(struct socket *sock, int backlog)
|
||||
@ -828,20 +850,25 @@ struct proto unix_stream_proto = {
|
||||
|
||||
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
|
||||
{
|
||||
struct sock *sk = NULL;
|
||||
struct unix_sock *u;
|
||||
struct sock *sk;
|
||||
int err;
|
||||
|
||||
atomic_long_inc(&unix_nr_socks);
|
||||
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
|
||||
goto out;
|
||||
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
|
||||
err = -ENFILE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (type == SOCK_STREAM)
|
||||
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
|
||||
else /*dgram and seqpacket */
|
||||
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
|
||||
|
||||
if (!sk)
|
||||
goto out;
|
||||
if (!sk) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
sock_init_data(sock, sk);
|
||||
|
||||
@ -861,20 +888,23 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
|
||||
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
|
||||
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
|
||||
unix_insert_socket(unix_sockets_unbound(sk), sk);
|
||||
out:
|
||||
if (sk == NULL)
|
||||
atomic_long_dec(&unix_nr_socks);
|
||||
else {
|
||||
local_bh_disable();
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
local_bh_disable();
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
local_bh_enable();
|
||||
|
||||
return sk;
|
||||
|
||||
err:
|
||||
atomic_long_dec(&unix_nr_socks);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int unix_create(struct net *net, struct socket *sock, int protocol,
|
||||
int kern)
|
||||
{
|
||||
struct sock *sk;
|
||||
|
||||
if (protocol && protocol != PF_UNIX)
|
||||
return -EPROTONOSUPPORT;
|
||||
|
||||
@ -901,7 +931,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
|
||||
return -ESOCKTNOSUPPORT;
|
||||
}
|
||||
|
||||
return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
|
||||
sk = unix_create1(net, sock, kern, sock->type);
|
||||
if (IS_ERR(sk))
|
||||
return PTR_ERR(sk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unix_release(struct socket *sock)
|
||||
@ -1314,12 +1348,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
|
||||
we will have to recheck all again in any case.
|
||||
*/
|
||||
|
||||
err = -ENOMEM;
|
||||
|
||||
/* create new sock for complete connection */
|
||||
newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
|
||||
if (newsk == NULL)
|
||||
if (IS_ERR(newsk)) {
|
||||
err = PTR_ERR(newsk);
|
||||
newsk = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
|
||||
/* Allocate skb for sending to listening sock */
|
||||
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
|
||||
|
@ -1649,11 +1649,17 @@ static bool btf_is_non_static(const struct btf_type *t)
|
||||
static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name,
|
||||
int *out_btf_sec_id, int *out_btf_id)
|
||||
{
|
||||
int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0;
|
||||
int i, j, n, m, btf_id = 0;
|
||||
const struct btf_type *t;
|
||||
const struct btf_var_secinfo *vi;
|
||||
const char *name;
|
||||
|
||||
if (!obj->btf) {
|
||||
pr_warn("failed to find BTF info for object '%s'\n", obj->filename);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
n = btf__get_nr_types(obj->btf);
|
||||
for (i = 1; i <= n; i++) {
|
||||
t = btf__type_by_id(obj->btf, i);
|
||||
|
||||
|
@ -375,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
|
||||
$(TRUNNER_BPF_PROGS_DIR)/%.c \
|
||||
$(TRUNNER_BPF_PROGS_DIR)/*.h \
|
||||
$$(INCLUDE_DIR)/vmlinux.h \
|
||||
$(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
|
||||
$(wildcard $(BPFDIR)/bpf_*.h) \
|
||||
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
|
||||
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
|
||||
$(TRUNNER_BPF_CFLAGS))
|
||||
|
||||
|
@ -112,6 +112,14 @@ setup()
|
||||
ip netns add "${NS2}"
|
||||
ip netns add "${NS3}"
|
||||
|
||||
# rp_filter gets confused by what these tests are doing, so disable it
|
||||
ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
|
||||
ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
|
||||
ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
|
||||
ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
|
||||
ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
|
||||
ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
|
||||
|
||||
ip link add veth1 type veth peer name veth2
|
||||
ip link add veth3 type veth peer name veth4
|
||||
ip link add veth5 type veth peer name veth6
|
||||
@ -236,11 +244,6 @@ setup()
|
||||
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
|
||||
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
|
||||
|
||||
# rp_filter gets confused by what these tests are doing, so disable it
|
||||
ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
|
||||
ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
|
||||
ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
|
||||
|
||||
TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
|
||||
|
||||
sleep 1 # reduce flakiness
|
||||
|
309
tools/testing/selftests/netfilter/nft_nat_zones.sh
Executable file
309
tools/testing/selftests/netfilter/nft_nat_zones.sh
Executable file
@ -0,0 +1,309 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Test connection tracking zone and NAT source port reallocation support.
|
||||
#
|
||||
|
||||
# Kselftest framework requirement - SKIP code is 4.
|
||||
ksft_skip=4
|
||||
|
||||
# Don't increase too much, 2000 clients should work
|
||||
# just fine but script can then take several minutes with
|
||||
# KASAN/debug builds.
|
||||
maxclients=100
|
||||
|
||||
have_iperf=1
|
||||
ret=0
|
||||
|
||||
# client1---.
|
||||
# veth1-.
|
||||
# |
|
||||
# NAT Gateway --veth0--> Server
|
||||
# | |
|
||||
# veth2-' |
|
||||
# client2---' |
|
||||
# .... |
|
||||
# clientX----vethX---'
|
||||
|
||||
# All clients share identical IP address.
|
||||
# NAT Gateway uses policy routing and conntrack zones to isolate client
|
||||
# namespaces. Each client connects to Server, each with colliding tuples:
|
||||
# clientsaddr:10000 -> serveraddr:dport
|
||||
# NAT Gateway is supposed to do port reallocation for each of the
|
||||
# connections.
|
||||
|
||||
sfx=$(mktemp -u "XXXXXXXX")
|
||||
gw="ns-gw-$sfx"
|
||||
cl1="ns-cl1-$sfx"
|
||||
cl2="ns-cl2-$sfx"
|
||||
srv="ns-srv-$sfx"
|
||||
|
||||
v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
|
||||
v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
|
||||
v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
|
||||
v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
|
||||
v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
|
||||
v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
|
||||
|
||||
cleanup()
|
||||
{
|
||||
ip netns del $gw
|
||||
ip netns del $srv
|
||||
for i in $(seq 1 $maxclients); do
|
||||
ip netns del ns-cl$i-$sfx 2>/dev/null
|
||||
done
|
||||
|
||||
sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
|
||||
sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
|
||||
sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
|
||||
sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
|
||||
sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
|
||||
sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
|
||||
}
|
||||
|
||||
nft --version > /dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not run test without nft tool"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
ip -Version > /dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not run test without ip tool"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
conntrack -V > /dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not run test without conntrack tool"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
iperf3 -v >/dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
have_iperf=0
|
||||
fi
|
||||
|
||||
ip netns add "$gw"
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not create net namespace $gw"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
ip -net "$gw" link set lo up
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
ip netns add "$srv"
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not create server netns $srv"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
|
||||
ip -net "$gw" link set veth0 up
|
||||
ip -net "$srv" link set lo up
|
||||
ip -net "$srv" link set eth0 up
|
||||
|
||||
sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
|
||||
sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
|
||||
sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
|
||||
sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
|
||||
sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
|
||||
sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
|
||||
|
||||
for i in $(seq 1 $maxclients);do
|
||||
cl="ns-cl$i-$sfx"
|
||||
|
||||
ip netns add "$cl"
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not create client netns $cl"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: No virtual ethernet pair device support in kernel"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
done
|
||||
|
||||
for i in $(seq 1 $maxclients);do
|
||||
cl="ns-cl$i-$sfx"
|
||||
echo netns exec "$cl" ip link set lo up
|
||||
echo netns exec "$cl" ip link set eth0 up
|
||||
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
|
||||
echo netns exec "$gw" ip link set veth$i up
|
||||
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
|
||||
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
|
||||
|
||||
# clients have same IP addresses.
|
||||
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
|
||||
echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
|
||||
echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
|
||||
echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
|
||||
|
||||
# NB: same addresses on client-facing interfaces.
|
||||
echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
|
||||
echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
|
||||
|
||||
# gw: policy routing
|
||||
echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
|
||||
echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
|
||||
echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
|
||||
echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
|
||||
echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
|
||||
done | ip -batch /dev/stdin
|
||||
|
||||
ip -net "$gw" addr add 10.3.0.1/24 dev veth0
|
||||
ip -net "$gw" addr add dead:3::1/64 dev veth0
|
||||
|
||||
ip -net "$srv" addr add 10.3.0.99/24 dev eth0
|
||||
ip -net "$srv" addr add dead:3::99/64 dev eth0
|
||||
|
||||
ip netns exec $gw nft -f /dev/stdin<<EOF
|
||||
table inet raw {
|
||||
map iiftomark {
|
||||
type ifname : mark
|
||||
}
|
||||
|
||||
map iiftozone {
|
||||
typeof iifname : ct zone
|
||||
}
|
||||
|
||||
set inicmp {
|
||||
flags dynamic
|
||||
type ipv4_addr . ifname . ipv4_addr
|
||||
}
|
||||
set inflows {
|
||||
flags dynamic
|
||||
type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
|
||||
}
|
||||
|
||||
set inflows6 {
|
||||
flags dynamic
|
||||
type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
|
||||
}
|
||||
|
||||
chain prerouting {
|
||||
type filter hook prerouting priority -64000; policy accept;
|
||||
ct original zone set meta iifname map @iiftozone
|
||||
meta mark set meta iifname map @iiftomark
|
||||
|
||||
tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
|
||||
add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
|
||||
ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
|
||||
}
|
||||
|
||||
chain nat_postrouting {
|
||||
type nat hook postrouting priority 0; policy accept;
|
||||
ct mark set meta mark meta oifname veth0 masquerade
|
||||
}
|
||||
|
||||
chain mangle_prerouting {
|
||||
type filter hook prerouting priority -100; policy accept;
|
||||
ct direction reply meta mark set ct mark
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
( echo add element inet raw iiftomark \{
|
||||
for i in $(seq 1 $((maxclients-1))); do
|
||||
echo \"veth$i\" : $i,
|
||||
done
|
||||
echo \"veth$maxclients\" : $maxclients \}
|
||||
echo add element inet raw iiftozone \{
|
||||
for i in $(seq 1 $((maxclients-1))); do
|
||||
echo \"veth$i\" : $i,
|
||||
done
|
||||
echo \"veth$maxclients\" : $maxclients \}
|
||||
) | ip netns exec $gw nft -f /dev/stdin
|
||||
|
||||
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
|
||||
|
||||
# useful for debugging: allows to use 'ping' from clients to gateway.
|
||||
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
|
||||
ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
|
||||
|
||||
for i in $(seq 1 $maxclients); do
|
||||
cl="ns-cl$i-$sfx"
|
||||
ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
|
||||
if [ $? -ne 0 ]; then
|
||||
echo FAIL: Ping failure from $cl 1>&2
|
||||
ret=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
for i in $(seq 1 $maxclients); do
|
||||
ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
|
||||
if [ $? -ne 0 ];then
|
||||
ret=1
|
||||
echo "FAIL: counter icmp mismatch for veth$i" 1>&2
|
||||
ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
|
||||
if [ $? -ne 0 ];then
|
||||
ret=1
|
||||
echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
|
||||
ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
|
||||
fi
|
||||
|
||||
if [ $ret -eq 0 ]; then
|
||||
echo "PASS: ping test from all $maxclients namespaces"
|
||||
fi
|
||||
|
||||
if [ $have_iperf -eq 0 ];then
|
||||
echo "SKIP: iperf3 not installed"
|
||||
if [ $ret -ne 0 ];then
|
||||
exit $ret
|
||||
fi
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
|
||||
iperfpid=$!
|
||||
sleep 1
|
||||
|
||||
for i in $(seq 1 $maxclients); do
|
||||
if [ $ret -ne 0 ]; then
|
||||
break
|
||||
fi
|
||||
cl="ns-cl$i-$sfx"
|
||||
ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo FAIL: Failure to connect for $cl 1>&2
|
||||
ip netns exec $gw conntrack -S 1>&2
|
||||
ret=1
|
||||
fi
|
||||
done
|
||||
if [ $ret -eq 0 ];then
|
||||
echo "PASS: iperf3 connections for all $maxclients net namespaces"
|
||||
fi
|
||||
|
||||
kill $iperfpid
|
||||
wait
|
||||
|
||||
for i in $(seq 1 $maxclients); do
|
||||
ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
|
||||
if [ $? -ne 0 ];then
|
||||
ret=1
|
||||
echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ $ret -eq 0 ];then
|
||||
echo "PASS: Found client connection for all $maxclients net namespaces"
|
||||
fi
|
||||
|
||||
ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
|
||||
if [ $? -ne 0 ];then
|
||||
ret=1
|
||||
echo "FAIL: cannot find return entry on veth0" 1>&2
|
||||
fi
|
||||
|
||||
exit $ret
|
156
tools/testing/selftests/netfilter/nft_zones_many.sh
Executable file
156
tools/testing/selftests/netfilter/nft_zones_many.sh
Executable file
@ -0,0 +1,156 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test insertion speed for packets with identical addresses/ports
|
||||
# that are all placed in distinct conntrack zones.
|
||||
|
||||
sfx=$(mktemp -u "XXXXXXXX")
|
||||
ns="ns-$sfx"
|
||||
|
||||
# Kselftest framework requirement - SKIP code is 4.
|
||||
ksft_skip=4
|
||||
|
||||
zones=20000
|
||||
have_ct_tool=0
|
||||
ret=0
|
||||
|
||||
cleanup()
|
||||
{
|
||||
ip netns del $ns
|
||||
}
|
||||
|
||||
ip netns add $ns
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not create net namespace $gw"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
conntrack -V > /dev/null 2>&1
|
||||
if [ $? -eq 0 ];then
|
||||
have_ct_tool=1
|
||||
fi
|
||||
|
||||
ip -net "$ns" link set lo up
|
||||
|
||||
test_zones() {
|
||||
local max_zones=$1
|
||||
|
||||
ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
|
||||
ip netns exec $ns nft -f /dev/stdin<<EOF
|
||||
flush ruleset
|
||||
table inet raw {
|
||||
map rndzone {
|
||||
typeof numgen inc mod $max_zones : ct zone
|
||||
}
|
||||
|
||||
chain output {
|
||||
type filter hook output priority -64000; policy accept;
|
||||
udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
|
||||
}
|
||||
}
|
||||
EOF
|
||||
(
|
||||
echo "add element inet raw rndzone {"
|
||||
for i in $(seq 1 $max_zones);do
|
||||
echo -n "$i : $i"
|
||||
if [ $i -lt $max_zones ]; then
|
||||
echo ","
|
||||
else
|
||||
echo "}"
|
||||
fi
|
||||
done
|
||||
) | ip netns exec $ns nft -f /dev/stdin
|
||||
|
||||
local i=0
|
||||
local j=0
|
||||
local outerstart=$(date +%s%3N)
|
||||
local stop=$outerstart
|
||||
|
||||
while [ $i -lt $max_zones ]; do
|
||||
local start=$(date +%s%3N)
|
||||
i=$((i + 10000))
|
||||
j=$((j + 1))
|
||||
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
|
||||
if [ $? -ne 0 ] ;then
|
||||
ret=1
|
||||
break
|
||||
fi
|
||||
|
||||
stop=$(date +%s%3N)
|
||||
local duration=$((stop-start))
|
||||
echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
|
||||
done
|
||||
|
||||
if [ $have_ct_tool -eq 1 ]; then
|
||||
local count=$(ip netns exec "$ns" conntrack -C)
|
||||
local duration=$((stop-outerstart))
|
||||
|
||||
if [ $count -eq $max_zones ]; then
|
||||
echo "PASS: inserted $count entries from packet path in $duration ms total"
|
||||
else
|
||||
ip netns exec $ns conntrack -S 1>&2
|
||||
echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $ret -ne 0 ];then
|
||||
echo "FAIL: insert $max_zones entries from packet path" 1>&2
|
||||
fi
|
||||
}
|
||||
|
||||
test_conntrack_tool() {
|
||||
local max_zones=$1
|
||||
|
||||
ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
|
||||
|
||||
local outerstart=$(date +%s%3N)
|
||||
local start=$(date +%s%3N)
|
||||
local stop=$start
|
||||
local i=0
|
||||
while [ $i -lt $max_zones ]; do
|
||||
i=$((i + 1))
|
||||
ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
|
||||
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
|
||||
if [ $? -ne 0 ];then
|
||||
ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
|
||||
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
|
||||
echo "FAIL: conntrack -I returned an error"
|
||||
ret=1
|
||||
break
|
||||
fi
|
||||
|
||||
if [ $((i%10000)) -eq 0 ];then
|
||||
stop=$(date +%s%3N)
|
||||
|
||||
local duration=$((stop-start))
|
||||
echo "PASS: added 10000 entries in $duration ms (now $i total)"
|
||||
start=$stop
|
||||
fi
|
||||
done
|
||||
|
||||
local count=$(ip netns exec "$ns" conntrack -C)
|
||||
local duration=$((stop-outerstart))
|
||||
|
||||
if [ $count -eq $max_zones ]; then
|
||||
echo "PASS: inserted $count entries via ctnetlink in $duration ms"
|
||||
else
|
||||
ip netns exec $ns conntrack -S 1>&2
|
||||
echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
|
||||
ret=1
|
||||
fi
|
||||
}
|
||||
|
||||
test_zones $zones
|
||||
|
||||
if [ $have_ct_tool -eq 1 ];then
|
||||
test_conntrack_tool $zones
|
||||
else
|
||||
echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
|
||||
if [ $ret -eq 0 ];then
|
||||
exit $ksft_skip
|
||||
fi
|
||||
fi
|
||||
|
||||
exit $ret
|
Loading…
Reference in New Issue
Block a user