From 54a59aed395ce0f4177b5212e5746a6462de3ad9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 9 Oct 2023 11:26:54 +0200 Subject: [PATCH] net, sched: Make tc-related drop reason more flexible Currently, the kfree_skb_reason() in sch_handle_{ingress,egress}() can only express a basic SKB_DROP_REASON_TC_INGRESS or SKB_DROP_REASON_TC_EGRESS reason. Victor kicked-off an initial proposal to make this more flexible by disambiguating verdict from return code by moving the verdict into struct tcf_result and letting tcf_classify() return a negative error. If hit, then two new drop reasons were added in the proposal, that is SKB_DROP_REASON_TC_INGRESS_ERROR as well as SKB_DROP_REASON_TC_EGRESS_ERROR. Further analysis of the actual error codes would have required to attach to tcf_classify via kprobe/kretprobe to more deeply debug skb and the returned error. In order to make the kfree_skb_reason() in sch_handle_{ingress,egress}() more extensible, it can be addressed in a more straight forward way, that is: Instead of placing the verdict into struct tcf_result, we can just put the drop reason in there, which does not require changes throughout various classful schedulers given the existing verdict logic can stay as is. Then, SKB_DROP_REASON_TC_ERROR{,_*} can be added to the enum skb_drop_reason to disambiguate between an error or an intentional drop. New drop reason error codes can be added successively to the tc code base. For internal error locations which have not yet been annotated with a SKB_DROP_REASON_TC_ERROR{,_*}, the fallback is SKB_DROP_REASON_TC_INGRESS and SKB_DROP_REASON_TC_EGRESS, respectively. Generic errors could be marked with a SKB_DROP_REASON_TC_ERROR code until they are converted to more specific ones if it is found that they would be useful for troubleshooting. While drop reasons have infrastructure for subsystem specific error codes which are currently used by mac80211 and ovs, Jakub mentioned that it is preferred for tc to use the enum skb_drop_reason core codes given it is a better fit and currently the tooling support is better, too. With regards to the latter: [...] I think Alastair (bpftrace) is working on auto-prettifying enums when bpftrace outputs maps. So we can do something like: $ bpftrace -e 'tracepoint:skb:kfree_skb { @[args->reason] = count(); }' Attaching 1 probe... ^C @[SKB_DROP_REASON_TC_INGRESS]: 2 @[SKB_CONSUMED]: 34 ^^^^^^^^^^^^ names!! Auto-magically. [...] Add a small helper tcf_set_drop_reason() which can be used to set the drop reason into the tcf_result. Signed-off-by: Daniel Borkmann Cc: Jamal Hadi Salim Cc: Victor Nogueira Link: https://lore.kernel.org/netdev/20231006063233.74345d36@kernel.org Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231009092655.22025-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 6 ++++++ include/net/sch_generic.h | 3 +-- net/core/dev.c | 15 ++++++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f308e8268651..a76c9171db0e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl) return xchg(clp, cl); } +static inline void tcf_set_drop_reason(struct tcf_result *res, + enum skb_drop_reason reason) +{ + res->drop_reason = reason; +} + static inline void __tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c7318c73cfd6..dcb9160e6467 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -324,7 +324,6 @@ struct Qdisc_ops { struct module *owner; }; - struct tcf_result { union { struct { @@ -332,8 +331,8 @@ struct tcf_result { u32 classid; }; const struct tcf_proto *goto_tp; - }; + enum skb_drop_reason drop_reason; }; struct tcf_chain; diff --git a/net/core/dev.c b/net/core/dev.c index 3ca746a5f0ad..97e7b9833db9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3914,7 +3914,8 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_NET_XGRESS -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, + enum skb_drop_reason *drop_reason) { int ret = TC_ACT_UNSPEC; #ifdef CONFIG_NET_CLS_ACT @@ -3926,12 +3927,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; + res.drop_reason = *drop_reason; mini_qdisc_bstats_cpu_update(miniq, skb); ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); /* Only tcf related quirks below. */ switch (ret) { case TC_ACT_SHOT: + *drop_reason = res.drop_reason; mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: @@ -3981,6 +3984,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev, bool *another) { struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS; int sch_ret; if (!entry) @@ -3998,7 +4002,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, if (sch_ret != TC_ACT_UNSPEC) goto ingress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); ingress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4015,7 +4019,7 @@ ingress_verdict: *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_RX_DROP; return NULL; /* used by tc_run */ @@ -4036,6 +4040,7 @@ static __always_inline struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS; int sch_ret; if (!entry) @@ -4049,7 +4054,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (sch_ret != TC_ACT_UNSPEC) goto egress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); egress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4058,7 +4063,7 @@ egress_verdict: *ret = NET_XMIT_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_XMIT_DROP; return NULL; /* used by tc_run */