mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
Merge branch 'net-fib_rules-add-dscp-selector-support'
Ido Schimmel says: ==================== net: fib_rules: Add DSCP selector support Currently, the kernel rejects IPv4 FIB rules that try to match on the upper three DSCP bits: # ip -4 rule add tos 0x1c table 100 # ip -4 rule add tos 0x3c table 100 Error: Invalid tos. The reason for that is that historically users of the FIB lookup API only populated the lower three DSCP bits in the TOS field of the IPv4 flow key ('flowi4_tos'), which fits the TOS definition from the initial IPv4 specification (RFC 791). This is not very useful nowadays and instead some users want to be able to match on the six bits DSCP field, which replaced the TOS and IP precedence fields over 25 years ago (RFC 2474). In addition, the current behavior differs between IPv4 and IPv6 which does allow users to match on the entire DSCP field using the TOS selector. Recent patchsets made sure that callers of the FIB lookup API now populate the entire DSCP field in the IPv4 flow key. Therefore, it is now possible to extend FIB rules to match on DSCP. This is done by adding a new DSCP attribute which is implemented for both IPv4 and IPv6 to provide user space programs a consistent behavior between both address families. The behavior of the old TOS selector is unchanged and IPv4 FIB rules using it will only match on the lower three DSCP bits. The kernel will reject rules that try to use both selectors. Patch #1 adds the new DSCP attribute but rejects its usage. Patches #2-#3 implement IPv4 and IPv6 support. Patch #4 allows user space to use the new attribute. Patches #5-#6 add selftests. ==================== Link: https://patch.msgid.link/20240911093748.3662015-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
7bb50f30c1
@ -67,6 +67,7 @@ enum {
|
||||
FRA_IP_PROTO, /* ip proto */
|
||||
FRA_SPORT_RANGE, /* sport */
|
||||
FRA_DPORT_RANGE, /* dport */
|
||||
FRA_DSCP, /* dscp */
|
||||
__FRA_MAX
|
||||
};
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/module.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/inet_dscp.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/fib_rules.h>
|
||||
#include <net/ip_tunnels.h>
|
||||
@ -766,7 +767,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
|
||||
[FRA_PROTOCOL] = { .type = NLA_U8 },
|
||||
[FRA_IP_PROTO] = { .type = NLA_U8 },
|
||||
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
|
||||
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
|
||||
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
|
||||
[FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
|
||||
};
|
||||
|
||||
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
|
@ -37,6 +37,7 @@ struct fib4_rule {
|
||||
u8 dst_len;
|
||||
u8 src_len;
|
||||
dscp_t dscp;
|
||||
u8 dscp_full:1; /* DSCP or TOS selector */
|
||||
__be32 src;
|
||||
__be32 srcmask;
|
||||
__be32 dst;
|
||||
@ -186,7 +187,15 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
|
||||
((daddr ^ r->dst) & r->dstmask))
|
||||
return 0;
|
||||
|
||||
if (r->dscp && !fib_dscp_masked_match(r->dscp, fl4))
|
||||
/* When DSCP selector is used we need to match on the entire DSCP field
|
||||
* in the flow information structure. When TOS selector is used we need
|
||||
* to mask the upper three DSCP bits prior to matching to maintain
|
||||
* legacy behavior.
|
||||
*/
|
||||
if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
|
||||
return 0;
|
||||
else if (!r->dscp_full && r->dscp &&
|
||||
!fib_dscp_masked_match(r->dscp, fl4))
|
||||
return 0;
|
||||
|
||||
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
|
||||
@ -217,6 +226,20 @@ static struct fib_table *fib_empty_table(struct net *net)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
if (rule4->dscp) {
|
||||
NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
|
||||
rule4->dscp_full = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
|
||||
struct fib_rule_hdr *frh,
|
||||
struct nlattr **tb,
|
||||
@ -238,6 +261,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
|
||||
}
|
||||
rule4->dscp = inet_dsfield_to_dscp(frh->tos);
|
||||
|
||||
if (tb[FRA_DSCP] &&
|
||||
fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
|
||||
goto errout;
|
||||
|
||||
/* split local/main if they are not already split */
|
||||
err = fib_unmerge(net);
|
||||
if (err)
|
||||
@ -320,9 +347,19 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
|
||||
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
|
||||
return 0;
|
||||
|
||||
if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
|
||||
if (frh->tos &&
|
||||
(rule4->dscp_full ||
|
||||
inet_dscp_to_dsfield(rule4->dscp) != frh->tos))
|
||||
return 0;
|
||||
|
||||
if (tb[FRA_DSCP]) {
|
||||
dscp_t dscp;
|
||||
|
||||
dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
|
||||
if (!rule4->dscp_full || rule4->dscp != dscp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IP_ROUTE_CLASSID
|
||||
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
|
||||
return 0;
|
||||
@ -344,7 +381,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
|
||||
|
||||
frh->dst_len = rule4->dst_len;
|
||||
frh->src_len = rule4->src_len;
|
||||
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
|
||||
|
||||
if (rule4->dscp_full) {
|
||||
frh->tos = 0;
|
||||
if (nla_put_u8(skb, FRA_DSCP,
|
||||
inet_dscp_to_dsfield(rule4->dscp) >> 2))
|
||||
goto nla_put_failure;
|
||||
} else {
|
||||
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
|
||||
}
|
||||
|
||||
if ((rule4->dst_len &&
|
||||
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
|
||||
@ -366,7 +411,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
|
||||
{
|
||||
return nla_total_size(4) /* dst */
|
||||
+ nla_total_size(4) /* src */
|
||||
+ nla_total_size(4); /* flow */
|
||||
+ nla_total_size(4) /* flow */
|
||||
+ nla_total_size(1); /* dscp */
|
||||
}
|
||||
|
||||
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
|
||||
|
@ -27,6 +27,7 @@ struct fib6_rule {
|
||||
struct rt6key src;
|
||||
struct rt6key dst;
|
||||
dscp_t dscp;
|
||||
u8 dscp_full:1; /* DSCP or TOS selector */
|
||||
};
|
||||
|
||||
static bool fib6_rule_matchall(const struct fib_rule *rule)
|
||||
@ -345,6 +346,20 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
if (rule6->dscp) {
|
||||
NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
|
||||
rule6->dscp_full = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
|
||||
struct fib_rule_hdr *frh,
|
||||
struct nlattr **tb,
|
||||
@ -361,6 +376,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
|
||||
}
|
||||
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
|
||||
|
||||
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
|
||||
goto errout;
|
||||
|
||||
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
|
||||
if (rule->table == RT6_TABLE_UNSPEC) {
|
||||
NL_SET_ERR_MSG(extack, "Invalid table");
|
||||
@ -413,9 +431,19 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
|
||||
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
|
||||
return 0;
|
||||
|
||||
if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
|
||||
if (frh->tos &&
|
||||
(rule6->dscp_full ||
|
||||
inet_dscp_to_dsfield(rule6->dscp) != frh->tos))
|
||||
return 0;
|
||||
|
||||
if (tb[FRA_DSCP]) {
|
||||
dscp_t dscp;
|
||||
|
||||
dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
|
||||
if (!rule6->dscp_full || rule6->dscp != dscp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (frh->src_len &&
|
||||
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
|
||||
return 0;
|
||||
@ -434,7 +462,15 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
|
||||
|
||||
frh->dst_len = rule6->dst.plen;
|
||||
frh->src_len = rule6->src.plen;
|
||||
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
|
||||
|
||||
if (rule6->dscp_full) {
|
||||
frh->tos = 0;
|
||||
if (nla_put_u8(skb, FRA_DSCP,
|
||||
inet_dscp_to_dsfield(rule6->dscp) >> 2))
|
||||
goto nla_put_failure;
|
||||
} else {
|
||||
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
|
||||
}
|
||||
|
||||
if ((rule6->dst.plen &&
|
||||
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
|
||||
@ -450,7 +486,8 @@ nla_put_failure:
|
||||
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
|
||||
{
|
||||
return nla_total_size(16) /* dst */
|
||||
+ nla_total_size(16); /* src */
|
||||
+ nla_total_size(16) /* src */
|
||||
+ nla_total_size(1); /* dscp */
|
||||
}
|
||||
|
||||
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
|
||||
|
@ -274,6 +274,23 @@ fib_rule6_test()
|
||||
"$getnomatch" "ipproto ipv6-icmp match" \
|
||||
"ipproto ipv6-tcp no match"
|
||||
fi
|
||||
|
||||
fib_check_iproute_support "dscp" "tos"
|
||||
if [ $? -eq 0 ]; then
|
||||
match="dscp 0x3f"
|
||||
getmatch="tos 0xfc"
|
||||
getnomatch="tos 0xf4"
|
||||
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
|
||||
"$getnomatch" "dscp redirect to table" \
|
||||
"dscp no redirect to table"
|
||||
|
||||
match="dscp 0x3f"
|
||||
getmatch="from $SRC_IP6 iif $DEV tos 0xfc"
|
||||
getnomatch="from $SRC_IP6 iif $DEV tos 0xf4"
|
||||
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
|
||||
"$getnomatch" "iif dscp redirect to table" \
|
||||
"iif dscp no redirect to table"
|
||||
fi
|
||||
}
|
||||
|
||||
fib_rule6_vrf_test()
|
||||
@ -319,6 +336,34 @@ fib_rule6_connect_test()
|
||||
log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)"
|
||||
|
||||
$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
|
||||
|
||||
ip rule help 2>&1 | grep -q dscp
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "SKIP: iproute2 iprule too old, missing dscp match"
|
||||
cleanup_peer
|
||||
return
|
||||
fi
|
||||
|
||||
$IP -6 rule add dscp 0x3f table $RTABLE_PEER
|
||||
|
||||
nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \
|
||||
-l 2001:db8::1:11 -r 2001:db8::1:11
|
||||
log_test $? 0 "rule6 dscp udp connect"
|
||||
|
||||
nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \
|
||||
-l 2001:db8::1:11 -r 2001:db8::1:11
|
||||
log_test $? 0 "rule6 dscp tcp connect"
|
||||
|
||||
nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \
|
||||
-l 2001:db8::1:11 -r 2001:db8::1:11
|
||||
log_test $? 1 "rule6 dscp udp no connect"
|
||||
|
||||
nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \
|
||||
-l 2001:db8::1:11 -r 2001:db8::1:11
|
||||
log_test $? 1 "rule6 dscp tcp no connect"
|
||||
|
||||
$IP -6 rule del dscp 0x3f table $RTABLE_PEER
|
||||
|
||||
cleanup_peer
|
||||
}
|
||||
|
||||
@ -468,6 +513,23 @@ fib_rule4_test()
|
||||
"$getnomatch" "ipproto icmp match" \
|
||||
"ipproto tcp no match"
|
||||
fi
|
||||
|
||||
fib_check_iproute_support "dscp" "tos"
|
||||
if [ $? -eq 0 ]; then
|
||||
match="dscp 0x3f"
|
||||
getmatch="tos 0xfc"
|
||||
getnomatch="tos 0xf4"
|
||||
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
|
||||
"$getnomatch" "dscp redirect to table" \
|
||||
"dscp no redirect to table"
|
||||
|
||||
match="dscp 0x3f"
|
||||
getmatch="from $SRC_IP iif $DEV tos 0xfc"
|
||||
getnomatch="from $SRC_IP iif $DEV tos 0xf4"
|
||||
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
|
||||
"$getnomatch" "iif dscp redirect to table" \
|
||||
"iif dscp no redirect to table"
|
||||
fi
|
||||
}
|
||||
|
||||
fib_rule4_vrf_test()
|
||||
@ -513,6 +575,34 @@ fib_rule4_connect_test()
|
||||
log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)"
|
||||
|
||||
$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
|
||||
|
||||
ip rule help 2>&1 | grep -q dscp
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "SKIP: iproute2 iprule too old, missing dscp match"
|
||||
cleanup_peer
|
||||
return
|
||||
fi
|
||||
|
||||
$IP -4 rule add dscp 0x3f table $RTABLE_PEER
|
||||
|
||||
nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \
|
||||
-l 198.51.100.11 -r 198.51.100.11
|
||||
log_test $? 0 "rule4 dscp udp connect"
|
||||
|
||||
nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \
|
||||
-l 198.51.100.11 -r 198.51.100.11
|
||||
log_test $? 0 "rule4 dscp tcp connect"
|
||||
|
||||
nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \
|
||||
-l 198.51.100.11 -r 198.51.100.11
|
||||
log_test $? 1 "rule4 dscp udp no connect"
|
||||
|
||||
nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \
|
||||
-l 198.51.100.11 -r 198.51.100.11
|
||||
log_test $? 1 "rule4 dscp tcp no connect"
|
||||
|
||||
$IP -4 rule del dscp 0x3f table $RTABLE_PEER
|
||||
|
||||
cleanup_peer
|
||||
}
|
||||
################################################################################
|
||||
|
Loading…
Reference in New Issue
Block a user