linux/net/sched/sch_mqprio_lib.c

// SPDX-License-Identifier: GPL-2.0-only

#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/types.h>
#include <net/pkt_sched.h>

#include "sch_mqprio_lib.h"

/* Returns true if the intervals [a, b) and [c, d) overlap. */
static bool intervals_overlap(int a, int b, int c, int d)
{
	int left = max(a, c), right = min(b, d);

	return left < right;
}

static int mqprio_validate_queue_counts(struct net_device *dev,
					const struct tc_mqprio_qopt *qopt,
					bool allow_overlapping_txqs,
					struct netlink_ext_ack *extack)
{
	int i, j;

	for (i = 0; i < qopt->num_tc; i++) {
		unsigned int last = qopt->offset[i] + qopt->count[i];

		if (!qopt->count[i]) {
			NL_SET_ERR_MSG_FMT_MOD(extack, "No queues for TC %d",
					       i);
			return -EINVAL;
		}

		/* Verify the queue count is in tx range being equal to the
		 * real_num_tx_queues indicates the last queue is in use.
		 */
		if (qopt->offset[i] >= dev->real_num_tx_queues ||
		    last > dev->real_num_tx_queues) {
			NL_SET_ERR_MSG_FMT_MOD(extack,
					       "Queues %d:%d for TC %d exceed the %d TX queues available",
					       qopt->count[i], qopt->offset[i],
					       i, dev->real_num_tx_queues);
			return -EINVAL;
		}

		if (allow_overlapping_txqs)
			continue;

		/* Verify that the offset and counts do not overlap */
		for (j = i + 1; j < qopt->num_tc; j++) {
			if (intervals_overlap(qopt->offset[i], last,
					      qopt->offset[j],
					      qopt->offset[j] +
					      qopt->count[j])) {
				NL_SET_ERR_MSG_FMT_MOD(extack,
						       "TC %d queues %d@%d overlap with TC %d queues %d@%d",
						       i, qopt->count[i], qopt->offset[i],
						       j, qopt->count[j], qopt->offset[j]);
				return -EINVAL;
			}
		}
	}

	return 0;
}

int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
			 bool validate_queue_counts,
			 bool allow_overlapping_txqs,
			 struct netlink_ext_ack *extack)
{
	int i, err;

	/* Verify num_tc is not out of max range */
	if (qopt->num_tc > TC_MAX_QUEUE) {
		NL_SET_ERR_MSG(extack,
			       "Number of traffic classes is outside valid range");
		return -EINVAL;
	}

	/* Verify priority mapping uses valid tcs */
	for (i = 0; i <= TC_BITMASK; i++) {
		if (qopt->prio_tc_map[i] >= qopt->num_tc) {
			NL_SET_ERR_MSG(extack,
				       "Invalid traffic class in priority to traffic class mapping");
			return -EINVAL;
		}
	}

	if (validate_queue_counts) {
		err = mqprio_validate_queue_counts(dev, qopt,
						   allow_overlapping_txqs,
						   extack);
		if (err)
			return err;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(mqprio_validate_qopt);

void mqprio_qopt_reconstruct(struct net_device *dev, struct tc_mqprio_qopt *qopt)
{
	int tc, num_tc = netdev_get_num_tc(dev);

	qopt->num_tc = num_tc;
	memcpy(qopt->prio_tc_map, dev->prio_tc_map, sizeof(qopt->prio_tc_map));

	for (tc = 0; tc < num_tc; tc++) {
		qopt->count[tc] = dev->tc_to_txq[tc].count;
		qopt->offset[tc] = dev->tc_to_txq[tc].offset;
	}
}
EXPORT_SYMBOL_GPL(mqprio_qopt_reconstruct);

void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE],
			  struct tc_mqprio_qopt_offload *mqprio)
{
	unsigned long preemptible_tcs = 0;
	int tc;

	for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
		if (fp[tc] == TC_FP_PREEMPTIBLE)
			preemptible_tcs |= BIT(tc);

	mqprio->preemptible_tcs = preemptible_tcs;
}
EXPORT_SYMBOL_GPL(mqprio_fp_to_offload);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio");
net/sched: taprio: centralize mqprio qopt validation There is a lot of code in taprio which is "borrowed" from mqprio. It makes sense to put a stop to the "borrowing" and start actually reusing code. Because taprio and mqprio are built as part of different kernel modules, code reuse can only take place either by writing it as static inline (limiting), putting it in sch_generic.o (not generic enough), or creating a third auto-selectable kernel module which only holds library code. I opted for the third variant. In a previous change, mqprio gained support for reverse TC:TXQ mappings, something which taprio still denies. Make taprio use the same validation logic so that it supports this configuration as well. The taprio code didn't enforce TXQ overlaps in txtime-assist mode and that looks intentional, even if I've no idea why that might be. Preserve that, but add a comment. There isn't any dedicated MAINTAINERS entry for mqprio, so nothing to update there. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Simon Horman <simon.horman@corigine.com> Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2023-02-04 21:53:01 +08:00			`// SPDX-License-Identifier: GPL-2.0-only`

			`#include <linux/net.h>`
			`#include <linux/netdevice.h>`
			`#include <linux/netlink.h>`
			`#include <linux/types.h>`
			`#include <net/pkt_sched.h>`

			`#include "sch_mqprio_lib.h"`

			`/* Returns true if the intervals [a, b) and [c, d) overlap. */`
			`static bool intervals_overlap(int a, int b, int c, int d)`
			`{`
			`int left = max(a, c), right = min(b, d);`

			`return left < right;`
			`}`

			`static int mqprio_validate_queue_counts(struct net_device *dev,`
			`const struct tc_mqprio_qopt *qopt,`
			`bool allow_overlapping_txqs,`
			`struct netlink_ext_ack *extack)`
			`{`
			`int i, j;`

			`for (i = 0; i < qopt->num_tc; i++) {`
			`unsigned int last = qopt->offset[i] + qopt->count[i];`

			`if (!qopt->count[i]) {`
			`NL_SET_ERR_MSG_FMT_MOD(extack, "No queues for TC %d",`
			`i);`
			`return -EINVAL;`
			`}`

			`/* Verify the queue count is in tx range being equal to the`
			`* real_num_tx_queues indicates the last queue is in use.`
			`*/`
			`if (qopt->offset[i] >= dev->real_num_tx_queues \|\|`
			`last > dev->real_num_tx_queues) {`
			`NL_SET_ERR_MSG_FMT_MOD(extack,`
			`"Queues %d:%d for TC %d exceed the %d TX queues available",`
			`qopt->count[i], qopt->offset[i],`
			`i, dev->real_num_tx_queues);`
			`return -EINVAL;`
			`}`

			`if (allow_overlapping_txqs)`
			`continue;`

			`/* Verify that the offset and counts do not overlap */`
			`for (j = i + 1; j < qopt->num_tc; j++) {`
			`if (intervals_overlap(qopt->offset[i], last,`
			`qopt->offset[j],`
			`qopt->offset[j] +`
			`qopt->count[j])) {`
			`NL_SET_ERR_MSG_FMT_MOD(extack,`
			`"TC %d queues %d@%d overlap with TC %d queues %d@%d",`
			`i, qopt->count[i], qopt->offset[i],`
			`j, qopt->count[j], qopt->offset[j]);`
			`return -EINVAL;`
			`}`
			`}`
			`}`

			`return 0;`
			`}`

			`int mqprio_validate_qopt(struct net_device dev, struct tc_mqprio_qopt qopt,`
			`bool validate_queue_counts,`
			`bool allow_overlapping_txqs,`
			`struct netlink_ext_ack *extack)`
			`{`
			`int i, err;`

			`/* Verify num_tc is not out of max range */`
			`if (qopt->num_tc > TC_MAX_QUEUE) {`
			`NL_SET_ERR_MSG(extack,`
			`"Number of traffic classes is outside valid range");`
			`return -EINVAL;`
			`}`

			`/* Verify priority mapping uses valid tcs */`
			`for (i = 0; i <= TC_BITMASK; i++) {`
			`if (qopt->prio_tc_map[i] >= qopt->num_tc) {`
			`NL_SET_ERR_MSG(extack,`
			`"Invalid traffic class in priority to traffic class mapping");`
			`return -EINVAL;`
			`}`
			`}`

			`if (validate_queue_counts) {`
			`err = mqprio_validate_queue_counts(dev, qopt,`
			`allow_overlapping_txqs,`
			`extack);`
			`if (err)`
			`return err;`
			`}`

			`return 0;`
			`}`
			`EXPORT_SYMBOL_GPL(mqprio_validate_qopt);`

net/sched: refactor mqprio qopt reconstruction to a library function The taprio qdisc will need to reconstruct a struct tc_mqprio_qopt from netdev settings once more in a future patch, but this code was already written twice, once in taprio and once in mqprio. Refactor the code to a helper in the common mqprio library. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Simon Horman <simon.horman@corigine.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2023-02-04 21:53:02 +08:00			`void mqprio_qopt_reconstruct(struct net_device dev, struct tc_mqprio_qopt qopt)`
			`{`
			`int tc, num_tc = netdev_get_num_tc(dev);`

			`qopt->num_tc = num_tc;`
			`memcpy(qopt->prio_tc_map, dev->prio_tc_map, sizeof(qopt->prio_tc_map));`

			`for (tc = 0; tc < num_tc; tc++) {`
			`qopt->count[tc] = dev->tc_to_txq[tc].count;`
			`qopt->offset[tc] = dev->tc_to_txq[tc].offset;`
			`}`
			`}`
			`EXPORT_SYMBOL_GPL(mqprio_qopt_reconstruct);`

net/sched: mqprio: allow per-TC user input of FP adminStatus IEEE 802.1Q-2018 clause 6.7.2 Frame preemption specifies that each packet priority can be assigned to a "frame preemption status" value of either "express" or "preemptible". Express priorities are transmitted by the local device through the eMAC, and preemptible priorities through the pMAC (the concepts of eMAC and pMAC come from the 802.3 MAC Merge layer). The FP adminStatus is defined per packet priority, but 802.1Q clause 12.30.1.1.1 framePreemptionAdminStatus also says that: \| Priorities that all map to the same traffic class should be \| constrained to use the same value of preemption status. It is impossible to ignore the cognitive dissonance in the standard here, because it practically means that the FP adminStatus only takes distinct values per traffic class, even though it is defined per priority. I can see no valid use case which is prevented by having the kernel take the FP adminStatus as input per traffic class (what we do here). In addition, this also enforces the above constraint by construction. User space network managers which wish to expose FP adminStatus per priority are free to do so; they must only observe the prio_tc_map of the netdev (which presumably is also under their control, when constructing the mqprio netlink attributes). The reason for configuring frame preemption as a property of the Qdisc layer is that the information about "preemptible TCs" is closest to the place which handles the num_tc and prio_tc_map of the netdev. If the UAPI would have been any other layer, it would be unclear what to do with the FP information when num_tc collapses to 0. A key assumption is that only mqprio/taprio change the num_tc and prio_tc_map of the netdev. Not sure if that's a great assumption to make. Having FP in tc-mqprio can be seen as an implementation of the use case defined in 802.1Q Annex S.2 "Preemption used in isolation". There will be a separate implementation of FP in tc-taprio, for the other use cases. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ferenc Fejes <fejes@inf.elte.hu> Reviewed-by: Simon Horman <simon.horman@corigine.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org> 2023-04-12 02:01:54 +08:00			`void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE],`
			`struct tc_mqprio_qopt_offload *mqprio)`
			`{`
			`unsigned long preemptible_tcs = 0;`
			`int tc;`

			`for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)`
			`if (fp[tc] == TC_FP_PREEMPTIBLE)`
			`preemptible_tcs \|= BIT(tc);`

			`mqprio->preemptible_tcs = preemptible_tcs;`
			`}`
			`EXPORT_SYMBOL_GPL(mqprio_fp_to_offload);`

net/sched: taprio: centralize mqprio qopt validation There is a lot of code in taprio which is "borrowed" from mqprio. It makes sense to put a stop to the "borrowing" and start actually reusing code. Because taprio and mqprio are built as part of different kernel modules, code reuse can only take place either by writing it as static inline (limiting), putting it in sch_generic.o (not generic enough), or creating a third auto-selectable kernel module which only holds library code. I opted for the third variant. In a previous change, mqprio gained support for reverse TC:TXQ mappings, something which taprio still denies. Make taprio use the same validation logic so that it supports this configuration as well. The taprio code didn't enforce TXQ overlaps in txtime-assist mode and that looks intentional, even if I've no idea why that might be. Preserve that, but add a comment. There isn't any dedicated MAINTAINERS entry for mqprio, so nothing to update there. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Simon Horman <simon.horman@corigine.com> Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2023-02-04 21:53:01 +08:00			`MODULE_LICENSE("GPL");`
net: sched: Fill in missing MODULE_DESCRIPTION for qdiscs W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Fill in missing MODULE_DESCRIPTIONs for TC qdiscs. Signed-off-by: Victor Nogueira <victor@mojatatu.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Reviewed-by: Vinicius Costa Gomes <vinicius.gomes@intel.com> Link: https://lore.kernel.org/r/20231027155045.46291-4-victor@mojatatu.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> 2023-10-27 23:50:45 +08:00			`MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio");`