mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-20 19:23:57 +08:00
net/sched: act_skbmod: Add SKBMOD_F_ECN option support
Currently, when doing rate limiting using the tc-police(8) action, the easiest way is to simply drop the packets which exceed or conform the configured bandwidth limit. Add a new option to tc-skbmod(8), so that users may use the ECN [1] extension to explicitly inform the receiver about the congestion instead of dropping packets "on the floor". The 2 least significant bits of the Traffic Class field in IPv4 and IPv6 headers are used to represent different ECN states [2]: 0b00: "Non ECN-Capable Transport", Non-ECT 0b10: "ECN Capable Transport", ECT(0) 0b01: "ECN Capable Transport", ECT(1) 0b11: "Congestion Encountered", CE As an example: $ tc filter add dev eth0 parent 1: protocol ip prio 10 \ matchall action skbmod ecn Doing the above marks all ECT(0) and ECT(1) packets as CE. It does NOT affect Non-ECT or non-IP packets. In the tc-police scenario mentioned above, users may pipe a tc-police action and a tc-skbmod "ecn" action together to achieve ECN-based rate limiting. For TCP connections, upon receiving a CE packet, the receiver will respond with an ECE packet, asking the sender to reduce their congestion window. However ECN also works with other L4 protocols e.g. DCCP and SCTP [2], and our implementation does not touch or care about L4 headers. The updated tc-skbmod SYNOPSIS looks like the following: tc ... action skbmod { set SETTABLE | swap SWAPPABLE | ecn } ... Only one of "set", "swap" or "ecn" shall be used in a single tc-skbmod command. Trying to use more than one of them at a time is considered undefined behavior; pipe multiple tc-skbmod commands together instead. "set" and "swap" only affect Ethernet packets, while "ecn" only affects IPv{4,6} packets. It is also worth mentioning that, in theory, the same effect could be achieved by piping a "police" action and a "bpf" action using the bpf_skb_ecn_set_ce() helper, but this requires eBPF programming from the user, thus impractical. Depends on patch "net/sched: act_skbmod: Skip non-Ethernet packets". [1] https://datatracker.ietf.org/doc/html/rfc3168 [2] https://en.wikipedia.org/wiki/Explicit_Congestion_Notification Reviewed-by: Cong Wang <cong.wang@bytedance.com> Signed-off-by: Peilin Ye <peilin.ye@bytedance.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
d80f6d6665
commit
56af5e749f
@ -17,6 +17,7 @@
|
||||
#define SKBMOD_F_SMAC 0x2
|
||||
#define SKBMOD_F_ETYPE 0x4
|
||||
#define SKBMOD_F_SWAPMAC 0x8
|
||||
#define SKBMOD_F_ECN 0x10
|
||||
|
||||
struct tc_skbmod {
|
||||
tc_gen;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <net/inet_ecn.h>
|
||||
#include <net/netlink.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/pkt_cls.h>
|
||||
@ -21,15 +22,13 @@
|
||||
static unsigned int skbmod_net_id;
|
||||
static struct tc_action_ops act_skbmod_ops;
|
||||
|
||||
#define MAX_EDIT_LEN ETH_HLEN
|
||||
static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
struct tcf_result *res)
|
||||
{
|
||||
struct tcf_skbmod *d = to_skbmod(a);
|
||||
int action;
|
||||
int action, max_edit_len, err;
|
||||
struct tcf_skbmod_params *p;
|
||||
u64 flags;
|
||||
int err;
|
||||
|
||||
tcf_lastuse_update(&d->tcf_tm);
|
||||
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
|
||||
@ -38,19 +37,34 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
if (unlikely(action == TC_ACT_SHOT))
|
||||
goto drop;
|
||||
|
||||
if (!skb->dev || skb->dev->type != ARPHRD_ETHER)
|
||||
return action;
|
||||
max_edit_len = skb_mac_header_len(skb);
|
||||
p = rcu_dereference_bh(d->skbmod_p);
|
||||
flags = p->flags;
|
||||
|
||||
/* XXX: if you are going to edit more fields beyond ethernet header
|
||||
* (example when you add IP header replacement or vlan swap)
|
||||
* then MAX_EDIT_LEN needs to change appropriately
|
||||
*/
|
||||
err = skb_ensure_writable(skb, MAX_EDIT_LEN);
|
||||
/* tcf_skbmod_init() guarantees "flags" to be one of the following:
|
||||
* 1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE}
|
||||
* 2. SKBMOD_F_SWAPMAC
|
||||
* 3. SKBMOD_F_ECN
|
||||
* SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet
|
||||
* packets.
|
||||
*/
|
||||
if (flags == SKBMOD_F_ECN) {
|
||||
switch (skb_protocol(skb, true)) {
|
||||
case cpu_to_be16(ETH_P_IP):
|
||||
case cpu_to_be16(ETH_P_IPV6):
|
||||
max_edit_len += skb_network_header_len(skb);
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
} else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = skb_ensure_writable(skb, max_edit_len);
|
||||
if (unlikely(err)) /* best policy is to drop on the floor */
|
||||
goto drop;
|
||||
|
||||
p = rcu_dereference_bh(d->skbmod_p);
|
||||
flags = p->flags;
|
||||
if (flags & SKBMOD_F_DMAC)
|
||||
ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
|
||||
if (flags & SKBMOD_F_SMAC)
|
||||
@ -66,6 +80,10 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
|
||||
}
|
||||
|
||||
if (flags & SKBMOD_F_ECN)
|
||||
INET_ECN_set_ce(skb);
|
||||
|
||||
out:
|
||||
return action;
|
||||
|
||||
drop:
|
||||
@ -129,6 +147,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
|
||||
index = parm->index;
|
||||
if (parm->flags & SKBMOD_F_SWAPMAC)
|
||||
lflags = SKBMOD_F_SWAPMAC;
|
||||
if (parm->flags & SKBMOD_F_ECN)
|
||||
lflags = SKBMOD_F_ECN;
|
||||
|
||||
err = tcf_idr_check_alloc(tn, &index, a, bind);
|
||||
if (err < 0)
|
||||
|
Loading…
Reference in New Issue
Block a user