mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-13 14:24:11 +08:00
bpf: Add support for changing congestion control
Added support for changing congestion control for SOCK_OPS bpf programs through the setsockopt bpf helper function. It also adds a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for congestion controls, like dctcp, that need to enable ECN in the SYN packets. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
d9925368a6
commit
91b5b21c7c
@ -1004,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
|
|||||||
void tcp_get_available_congestion_control(char *buf, size_t len);
|
void tcp_get_available_congestion_control(char *buf, size_t len);
|
||||||
void tcp_get_allowed_congestion_control(char *buf, size_t len);
|
void tcp_get_allowed_congestion_control(char *buf, size_t len);
|
||||||
int tcp_set_allowed_congestion_control(char *allowed);
|
int tcp_set_allowed_congestion_control(char *allowed);
|
||||||
int tcp_set_congestion_control(struct sock *sk, const char *name);
|
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
|
||||||
|
void tcp_reinit_congestion_control(struct sock *sk,
|
||||||
|
const struct tcp_congestion_ops *ca);
|
||||||
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
|
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
|
||||||
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
|
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
|
||||||
|
|
||||||
@ -2078,4 +2080,9 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
|
|||||||
rwnd = 0;
|
rwnd = 0;
|
||||||
return rwnd;
|
return rwnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
|
||||||
|
{
|
||||||
|
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
|
||||||
|
}
|
||||||
#endif /* _TCP_H */
|
#endif /* _TCP_H */
|
||||||
|
@ -778,6 +778,9 @@ enum {
|
|||||||
* passive connection is
|
* passive connection is
|
||||||
* established
|
* established
|
||||||
*/
|
*/
|
||||||
|
BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
|
||||||
|
* needs ECN
|
||||||
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||||
|
@ -2719,8 +2719,24 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
|
|||||||
}
|
}
|
||||||
} else if (level == SOL_TCP &&
|
} else if (level == SOL_TCP &&
|
||||||
sk->sk_prot->setsockopt == tcp_setsockopt) {
|
sk->sk_prot->setsockopt == tcp_setsockopt) {
|
||||||
/* Place holder */
|
#ifdef CONFIG_INET
|
||||||
|
if (optname == TCP_CONGESTION) {
|
||||||
|
char name[TCP_CA_NAME_MAX];
|
||||||
|
|
||||||
|
strncpy(name, optval, min_t(long, optlen,
|
||||||
|
TCP_CA_NAME_MAX-1));
|
||||||
|
name[TCP_CA_NAME_MAX-1] = 0;
|
||||||
|
ret = tcp_set_congestion_control(sk, name, false);
|
||||||
|
if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
|
||||||
|
/* replacing an existing ca */
|
||||||
|
tcp_reinit_congestion_control(sk,
|
||||||
|
inet_csk(sk)->icsk_ca_ops);
|
||||||
|
} else {
|
||||||
|
ret = -EINVAL;
|
||||||
|
}
|
||||||
|
#else
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
|||||||
name[val] = 0;
|
name[val] = 0;
|
||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
err = tcp_set_congestion_control(sk, name);
|
err = tcp_set_congestion_control(sk, name, true);
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
|
|||||||
INET_ECN_dontxmit(sk);
|
INET_ECN_dontxmit(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_reinit_congestion_control(struct sock *sk,
|
void tcp_reinit_congestion_control(struct sock *sk,
|
||||||
const struct tcp_congestion_ops *ca)
|
const struct tcp_congestion_ops *ca)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
|
||||||
@ -333,8 +333,12 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Change congestion control for socket */
|
/* Change congestion control for socket. If load is false, then it is the
|
||||||
int tcp_set_congestion_control(struct sock *sk, const char *name)
|
* responsibility of the caller to call tcp_init_congestion_control or
|
||||||
|
* tcp_reinit_congestion_control (if the current congestion control was
|
||||||
|
* already initialized.
|
||||||
|
*/
|
||||||
|
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
const struct tcp_congestion_ops *ca;
|
const struct tcp_congestion_ops *ca;
|
||||||
@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
|
|||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
ca = __tcp_ca_find_autoload(name);
|
if (!load)
|
||||||
|
ca = tcp_ca_find(name);
|
||||||
|
else
|
||||||
|
ca = __tcp_ca_find_autoload(name);
|
||||||
/* No change asking for existing value */
|
/* No change asking for existing value */
|
||||||
if (ca == icsk->icsk_ca_ops) {
|
if (ca == icsk->icsk_ca_ops) {
|
||||||
icsk->icsk_ca_setsockopt = 1;
|
icsk->icsk_ca_setsockopt = 1;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
if (!ca)
|
if (!ca) {
|
||||||
err = -ENOENT;
|
err = -ENOENT;
|
||||||
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
|
} else if (!load) {
|
||||||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
|
icsk->icsk_ca_ops = ca;
|
||||||
|
if (!try_module_get(ca->owner))
|
||||||
|
err = -EBUSY;
|
||||||
|
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
|
||||||
|
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
|
||||||
err = -EPERM;
|
err = -EPERM;
|
||||||
else if (!try_module_get(ca->owner))
|
} else if (!try_module_get(ca->owner)) {
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
else
|
} else {
|
||||||
tcp_reinit_congestion_control(sk, ca);
|
tcp_reinit_congestion_control(sk, ca);
|
||||||
|
}
|
||||||
out:
|
out:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return err;
|
return err;
|
||||||
|
@ -6191,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
|
|||||||
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
|
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
|
||||||
|
|
||||||
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
|
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
|
||||||
(ecn_ok_dst & DST_FEATURE_ECN_CA))
|
(ecn_ok_dst & DST_FEATURE_ECN_CA) ||
|
||||||
|
tcp_bpf_ca_needs_ecn((struct sock *)req))
|
||||||
inet_rsk(req)->ecn_ok = 1;
|
inet_rsk(req)->ecn_ok = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
|
|||||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
|
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
|
||||||
if (!(tp->ecn_flags & TCP_ECN_OK))
|
if (!(tp->ecn_flags & TCP_ECN_OK))
|
||||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
|
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
|
||||||
else if (tcp_ca_needs_ecn(sk))
|
else if (tcp_ca_needs_ecn(sk) ||
|
||||||
|
tcp_bpf_ca_needs_ecn(sk))
|
||||||
INET_ECN_xmit(sk);
|
INET_ECN_xmit(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
|
|||||||
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
|
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
|
||||||
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
|
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
|
||||||
tcp_ca_needs_ecn(sk);
|
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
|
||||||
|
|
||||||
if (!use_ecn) {
|
if (!use_ecn) {
|
||||||
const struct dst_entry *dst = __sk_dst_get(sk);
|
const struct dst_entry *dst = __sk_dst_get(sk);
|
||||||
@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
|
|||||||
if (use_ecn) {
|
if (use_ecn) {
|
||||||
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
|
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
|
||||||
tp->ecn_flags = TCP_ECN_OK;
|
tp->ecn_flags = TCP_ECN_OK;
|
||||||
if (tcp_ca_needs_ecn(sk))
|
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
|
||||||
INET_ECN_xmit(sk);
|
INET_ECN_xmit(sk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user