From eed4baeb040aa41323d0091c18d36cc5a895792d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:44:58 +0100 Subject: [PATCH 01/10] mlxsw: spectrum: qdiscs: Support qdisc per tclass Add the option to set a qdisc per tclass. Match the qdisc to the tclass by parent ID. Supported currently for sch_red only. It allows offloading sch_prio as root qdisc and sch_red as its child. (However, doing so might corrupt the stats for both parent and child.) Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 62 ++++++++++++++++--- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 2673310f92da..cc9786ff3b05 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -247,6 +247,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; struct mlxsw_sp_qdisc *root_qdisc; + struct mlxsw_sp_qdisc *tclass_qdiscs; unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 0b7670459051..858846a019ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -42,6 +42,8 @@ #include "reg.h" #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1) +#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \ + MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1)) enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, @@ -99,6 +101,26 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, mlxsw_sp_qdisc->handle == handle; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, + bool root_only) +{ + int tclass, child_index; + + if (parent == TC_H_ROOT) + return mlxsw_sp_port->root_qdisc; + + if (root_only || !mlxsw_sp_port->root_qdisc || + !mlxsw_sp_port->root_qdisc->ops || + TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle || + TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS) + return NULL; + + child_index = TC_H_MIN(parent); + tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); + return &mlxsw_sp_port->tclass_qdiscs[tclass]; +} + static int mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) @@ -406,11 +428,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - if (p->parent != TC_H_ROOT) + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; - mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; - if (p->command == TC_RED_REPLACE) return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, @@ -441,9 +462,12 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, { int i; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_port->tclass_qdiscs[i]); + } return 0; } @@ -569,10 +593,10 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - if (p->parent != TC_H_ROOT) + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; - mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; if (p->command == TC_PRIO_REPLACE) return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, @@ -596,17 +620,35 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { - mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc), - GFP_KERNEL); - if (!mlxsw_sp_port->root_qdisc) - return -ENOMEM; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int i; + mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL); + if (!mlxsw_sp_qdisc) + goto err_root_qdisc_init; + + mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc; mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS, + GFP_KERNEL); + if (!mlxsw_sp_qdisc) + goto err_tclass_qdiscs_init; + + mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i; + return 0; + +err_tclass_qdiscs_init: + kfree(mlxsw_sp_port->root_qdisc); +err_root_qdisc_init: + return -ENOMEM; } void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) { + kfree(mlxsw_sp_port->tclass_qdiscs); kfree(mlxsw_sp_port->root_qdisc); } From 2f88047ec42cc0cc8228247033a8f83960ce7f81 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:44:59 +0100 Subject: [PATCH 02/10] mlxsw: spectrum: Add priority counters Add TX packets and bytes counters per switch priority per port. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8d2d140d7910..7c6204f701ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev, xstats->tail_drop[i] = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl); } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl); + xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl); + } } static void update_stats_cache(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index cc9786ff3b05..d5e711d8ad71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats { u64 wred_drop[TC_MAX_QUEUE]; u64 tail_drop[TC_MAX_QUEUE]; u64 backlog[TC_MAX_QUEUE]; + u64 tx_bytes[IEEE_8021QAZ_MAX_TCS]; + u64 tx_packets[IEEE_8021QAZ_MAX_TCS]; }; struct mlxsw_sp_port { From 1631ab2e8d31577d65f4cf857f609ef3131ff61e Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:00 +0100 Subject: [PATCH 03/10] mlxsw: spectrum: qdiscs: Add priority map per qdisc Add priority map per qdisc, to indicate which priorities are being directed through this qdisc. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 858846a019ae..0e0299020d82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -78,6 +78,7 @@ struct mlxsw_sp_qdisc_ops { struct mlxsw_sp_qdisc { u32 handle; u8 tclass_num; + u8 prio_bitmap; union { struct red_stats red; } xstats_base; @@ -467,6 +468,7 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, MLXSW_SP_PORT_DEFAULT_TCLASS); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, &mlxsw_sp_port->tclass_qdiscs[i]); + mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; } return 0; @@ -494,11 +496,15 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, int tclass, i; int err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]); err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass); if (err) return err; + mlxsw_sp_port->tclass_qdiscs[tclass].prio_bitmap |= BIT(i); } return 0; @@ -628,6 +634,7 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) goto err_root_qdisc_init; mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc; + mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff; mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS, From 04cc0bf5d6ceb0ff19e8636fb39695620e587b06 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:01 +0100 Subject: [PATCH 04/10] mlxsw: spectrum: qdiscs: Collect stats for sch_red based on priomap Priority counters count packets according to their packet priority. Collect the stats for sch_red based on these counters, so the qdisc bstats will be the sum of counters matching the priorities marked in the qdisc priomap. Changing the mapping of the priorities to bands while traffic is running can result in losing the stats of the bands qdiscs from their last dump call to this change, as if the qdisc was unoffloaded and re-offloaded. It will not affect the traffic behaviour according to sch_red. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 69 ++++++++++++++----- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 0e0299020d82..b722af360475 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -208,6 +208,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static void +mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, + u8 prio_bitmap, u64 *tx_packets, + u64 *tx_bytes) +{ + int i; + + *tx_packets = 0; + *tx_bytes = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (prio_bitmap & BIT(i)) { + *tx_packets += xstats->tx_packets[i]; + *tx_bytes += xstats->tx_bytes[i]; + } + } +} + static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, @@ -253,17 +270,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; struct red_stats *red_base; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; stats_base = &mlxsw_sp_qdisc->stats_base; red_base = &mlxsw_sp_qdisc->xstats_base.red; - stats_base->tx_packets = stats->tx_packets; - stats_base->tx_bytes = stats->tx_bytes; - + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &stats_base->tx_packets, + &stats_base->tx_bytes); red_base->prob_mark = xstats->ecn; red_base->prob_drop = xstats->wred_drop[tclass_num]; red_base->pdrop = xstats->tail_drop[tclass_num]; @@ -380,14 +396,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; stats_base = &mlxsw_sp_qdisc->stats_base; - tx_bytes = stats->tx_bytes - stats_base->tx_bytes; - tx_packets = stats->tx_packets - stats_base->tx_packets; + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &tx_packets, &tx_bytes); + tx_bytes = tx_bytes - stats_base->tx_bytes; + tx_packets = tx_packets - stats_base->tx_packets; + overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - stats_base->overlimits; drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - @@ -493,18 +511,31 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, void *params) { struct tc_prio_qopt_offload_params *p = params; - int tclass, i; + struct mlxsw_sp_qdisc *child_qdisc; + int tclass, i, band; + u8 old_priomap; int err; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; - - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]); - err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass); - if (err) - return err; - mlxsw_sp_port->tclass_qdiscs[tclass].prio_bitmap |= BIT(i); + for (band = 0; band < p->bands; band++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + old_priomap = child_qdisc->prio_bitmap; + child_qdisc->prio_bitmap = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (p->priomap[i] == band) { + child_qdisc->prio_bitmap |= BIT(i); + if (BIT(i) & old_priomap) + continue; + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, + i, tclass); + if (err) + return err; + } + } + if (old_priomap != child_qdisc->prio_bitmap && + child_qdisc->ops && child_qdisc->ops->clean_stats) + child_qdisc->ops->clean_stats(mlxsw_sp_port, + child_qdisc); } return 0; From cc6e5c13af8b592cfed49f4854eddddbb58c6949 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:02 +0100 Subject: [PATCH 05/10] mlxsw: spectrum: qdiscs: Update backlog handling of a child qdiscs When removing a child qdisc its backlog will be decreased from the parent backlog. The driver backlog count should do the same. When the parent changes its configuration, the child might need to clean its stats. However, the backlog can't be cleaned with the rest of the stats, because it reflects a momentary value that needs to be synced with the core, not the history of the qdisc. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index b722af360475..5ddaafc8aa18 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -294,6 +294,12 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { + struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + + if (root_qdisc != mlxsw_sp_qdisc) + root_qdisc->stats_base.backlog -= + mlxsw_sp_qdisc->stats_base.backlog; + return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, mlxsw_sp_qdisc->tclass_num); } @@ -358,6 +364,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc->stats_base.backlog); p->qstats->backlog -= backlog; + mlxsw_sp_qdisc->stats_base.backlog = 0; } static int @@ -512,7 +519,7 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, { struct tc_prio_qopt_offload_params *p = params; struct mlxsw_sp_qdisc *child_qdisc; - int tclass, i, band; + int tclass, i, band, backlog; u8 old_priomap; int err; @@ -533,9 +540,12 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, } } if (old_priomap != child_qdisc->prio_bitmap && - child_qdisc->ops && child_qdisc->ops->clean_stats) + child_qdisc->ops && child_qdisc->ops->clean_stats) { + backlog = child_qdisc->stats_base.backlog; child_qdisc->ops->clean_stats(mlxsw_sp_port, child_qdisc); + child_qdisc->stats_base.backlog = backlog; + } } return 0; From fd5ac14a1aae63ef95b22cc2eb23e3a25b3436be Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:03 +0100 Subject: [PATCH 06/10] net: sch: Don't warn on missmatching qlen and backlog for offloaded qdiscs Offloaded qdiscs are allowed to expose only parts of their statistics. It means that if backlog is being exposed and qlen is not, it might trigger a warning in qdisc_tree_reduce_backlog. Do not warn in case the qdisc that was removed was an offloaded one. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 27e672c12492..68f9d942bed4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, unsigned int len) { + bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; @@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, * If child was empty even before update then backlog * counter is screwed and we skip notification because * parent class is already passive. + * + * If the original child was offloaded then it is allowed + * to be seem as empty, so the parent is notified anyway. */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n); + notify = !sch->q.qlen && !WARN_ON_ONCE(!n && + !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { From 23f2b4048cc49d786c452f681dd5d8920ca095a7 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:04 +0100 Subject: [PATCH 07/10] mlxsw: spectrum: Update sch_prio stats to include sch_red related drops sch_prio as root qdisc should count all the drops its children have. Since it is possible for it to have sch_red children, it needs to count RED early drops. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 5ddaafc8aa18..bed7495d35d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -584,6 +584,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { drops += xstats->tail_drop[i]; + drops += xstats->wred_drop[i]; backlog += xstats->backlog[i]; } drops = drops - stats_base->drops; @@ -619,8 +620,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base->tx_bytes = stats->tx_bytes; stats_base->drops = 0; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { stats_base->drops += xstats->tail_drop[i]; + stats_base->drops += xstats->wred_drop[i]; + } mlxsw_sp_qdisc->stats_base.backlog = 0; } From 98ceb7b6d64552f995973be1a0ee9af0bf85fb3d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:05 +0100 Subject: [PATCH 08/10] mlxsw: spectrum: qdiscs: prio: Delete child qdiscs when removing bands When the number the bands of sch_prio is decreased, child qdiscs on the deleted bands would get deleted as well. This change and deletions are being done under sch_tree_lock of the sch_prio qdisc. Part of the destruction of qdisc is unoffloading it, if it is offloaded. Un-offloading can't be done inside this lock. Move the offload command to be done before reducing the number of bands, so unoffloading of the qdiscs that are about to be deleted could be done outside of the lock. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 7 ++++++- net/sched/sch_prio.c | 13 ++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index bed7495d35d6..a2a3ac09c3bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -547,7 +547,12 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc->stats_base.backlog = backlog; } } - + for (; band < IEEE_8021QAZ_MAX_TCS; band++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + child_qdisc->prio_bitmap = 0; + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); + } return 0; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index efbf51f35778..ba2d6d17d95a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch) sch->q.qlen = 0; } -static int prio_offload(struct Qdisc *sch, bool enable) +static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) { - struct prio_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload opt = { .handle = sch->handle, @@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable) if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; - if (enable) { + if (qopt) { opt.command = TC_PRIO_REPLACE; - opt.replace_params.bands = q->bands; - memcpy(&opt.replace_params.priomap, q->prio2band, + opt.replace_params.bands = qopt->bands; + memcpy(&opt.replace_params.priomap, qopt->priomap, TC_PRIO_MAX + 1); opt.replace_params.qstats = &sch->qstats; } else { @@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch) struct prio_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - prio_offload(sch, false); + prio_offload(sch, NULL); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } } + prio_offload(sch, qopt); sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); @@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } sch_tree_unlock(sch); - prio_offload(sch, true); return 0; } From b9c7a7acc749f3d0667a2ab44ea38110d5a1f286 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:06 +0100 Subject: [PATCH 09/10] net: sch: prio: Add offload ability for grafting a child Offload sch_prio graft command for capable drivers. Warn in case of a failure, unless the graft was done as part of a destroy operation (the new qdisc is a noop) or if all the qdiscs (the parent, the old child, and the new one) are not offloaded. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 8 ++++++++ net/sched/sch_prio.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 87406252f0a3..e828d31be5da 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -806,6 +806,7 @@ enum tc_prio_command { TC_PRIO_REPLACE, TC_PRIO_DESTROY, TC_PRIO_STATS, + TC_PRIO_GRAFT, }; struct tc_prio_qopt_offload_params { @@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params { struct gnet_stats_queue *qstats; }; +struct tc_prio_qopt_offload_graft_params { + u8 band; + u32 child_handle; +}; + struct tc_prio_qopt_offload { enum tc_prio_command command; u32 handle; @@ -825,6 +831,8 @@ struct tc_prio_qopt_offload { union { struct tc_prio_qopt_offload_params replace_params; struct tc_qopt_offload_stats stats; + struct tc_prio_qopt_offload_graft_params graft_params; }; }; + #endif diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index ba2d6d17d95a..222e53d3d27a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -308,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); + struct tc_prio_qopt_offload graft_offload; + struct net_device *dev = qdisc_dev(sch); unsigned long band = arg - 1; + bool any_qdisc_is_offloaded; + int err; if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->queues[band]); + + if (!tc_can_offload(dev)) + return 0; + + graft_offload.handle = sch->handle; + graft_offload.parent = sch->parent; + graft_offload.graft_params.band = band; + graft_offload.graft_params.child_handle = new->handle; + graft_offload.command = TC_PRIO_GRAFT; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, + &graft_offload); + + /* Don't report error if the graft is part of destroy operation. */ + if (err && new != &noop_qdisc) { + /* Don't report error if the parent, the old child and the new + * one are not offloaded. + */ + any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; + any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED; + if (*old) + any_qdisc_is_offloaded |= (*old)->flags & + TCQ_F_OFFLOADED; + + if (any_qdisc_is_offloaded) + NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); + } + return 0; } From 32dc5efc6cb40dea9ee0797b7c237efc470e0c35 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 28 Feb 2018 10:45:07 +0100 Subject: [PATCH 10/10] mlxsw: spectrum: qdiscs: prio: Handle graft command Handle graft command for an offloaded sch_prio. Grafting a qdisc to any place other than under its original parent is not supported by mlxsw and will cause the grafted qdisc to stop being offloaded. Signed-off-by: Nogah Frankel Reviewed-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index a2a3ac09c3bc..91262b0573e3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -122,6 +122,24 @@ mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, return &mlxsw_sp_port->tclass_qdiscs[tclass]; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) +{ + int i; + + if (mlxsw_sp_port->root_qdisc->handle == handle) + return mlxsw_sp_port->root_qdisc; + + if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC) + return NULL; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle) + return &mlxsw_sp_port->tclass_qdiscs[i]; + + return NULL; +} + static int mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) @@ -643,6 +661,39 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, }; +/* Grafting is not supported in mlxsw. It will result in un-offloading of the + * grafted qdisc as well as the qdisc in the qdisc new location. + * (However, if the graft is to the location where the qdisc is already at, it + * will be ignored completely and won't cause un-offloading). + */ +static int +mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_prio_qopt_offload_graft_params *p) +{ + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band); + struct mlxsw_sp_qdisc *old_qdisc; + + /* Check if the grafted qdisc is already in its "new" location. If so - + * nothing needs to be done. + */ + if (p->band < IEEE_8021QAZ_MAX_TCS && + mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle) + return 0; + + /* See if the grafted qdisc is already offloaded on any tclass. If so, + * unoffload it. + */ + old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, + p->child_handle); + if (old_qdisc) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); + + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_port->tclass_qdiscs[tclass_num]); + return -EOPNOTSUPP; +} + int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { @@ -668,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, case TC_PRIO_STATS: return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); + case TC_PRIO_GRAFT: + return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->graft_params); default: return -EOPNOTSUPP; }