linux/include/net/dcbnl.h

137 lines
5.0 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2008, Intel Corporation.
*
* Author: Lucy Liu <lucy.liu@intel.com>
*/
#ifndef __NET_DCBNL_H__
#define __NET_DCBNL_H__
#include <linux/dcbnl.h>
struct net_device;
struct dcb_app_type {
int ifindex;
struct dcb_app app;
struct list_head list;
u8 dcbx;
};
u16 dcb_getrewr(struct net_device *dev, struct dcb_app *app);
int dcb_setrewr(struct net_device *dev, struct dcb_app *app);
int dcb_delrewr(struct net_device *dev, struct dcb_app *app);
int dcb_setapp(struct net_device *, struct dcb_app *);
u8 dcb_getapp(struct net_device *, struct dcb_app *);
int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *);
struct dcb_rewr_prio_pcp_map {
u16 map[IEEE_8021QAZ_MAX_TCS];
};
void dcb_getrewr_prio_pcp_mask_map(const struct net_device *dev,
struct dcb_rewr_prio_pcp_map *p_map);
net: dcb: Add priority-to-DSCP map getters On ingress, a network device such as a switch assigns to packets priority based on various criteria. Common options include interpreting PCP and DSCP fields according to user configuration. When a packet egresses the switch, a reverse process may rewrite PCP and/or DSCP values according to packet priority. The following three functions support a) obtaining a DSCP-to-priority map or vice versa, and b) finding default-priority entries in APP database. The DCB subsystem supports for APP entries a very generous M:N mapping between priorities and protocol identifiers. Understandably, several (say) DSCP values can map to the same priority. But this asymmetry holds the other way around as well--one priority can map to several DSCP values. For this reason, the following functions operate in terms of bitmaps, with ones in positions that match some APP entry. - dcb_ieee_getapp_dscp_prio_mask_map() to compute for a given netdevice a map of DSCP-to-priority-mask, which gives for each DSCP value a bitmap of priorities related to that DSCP value by APP, along the lines of dcb_ieee_getapp_mask(). - dcb_ieee_getapp_prio_dscp_mask_map() similarly to compute for a given netdevice a map from priorities to a bitmap of DSCPs. - dcb_ieee_getapp_default_prio_mask() which finds all default-priority rules for a given port in APP database, and returns a mask of priorities allowed by these default-priority rules. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-07-27 20:26:56 +08:00
struct dcb_ieee_app_prio_map {
u64 map[IEEE_8021QAZ_MAX_TCS];
};
void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
struct dcb_ieee_app_prio_map *p_map);
void dcb_getrewr_prio_dscp_mask_map(const struct net_device *dev,
struct dcb_ieee_app_prio_map *p_map);
net: dcb: Add priority-to-DSCP map getters On ingress, a network device such as a switch assigns to packets priority based on various criteria. Common options include interpreting PCP and DSCP fields according to user configuration. When a packet egresses the switch, a reverse process may rewrite PCP and/or DSCP values according to packet priority. The following three functions support a) obtaining a DSCP-to-priority map or vice versa, and b) finding default-priority entries in APP database. The DCB subsystem supports for APP entries a very generous M:N mapping between priorities and protocol identifiers. Understandably, several (say) DSCP values can map to the same priority. But this asymmetry holds the other way around as well--one priority can map to several DSCP values. For this reason, the following functions operate in terms of bitmaps, with ones in positions that match some APP entry. - dcb_ieee_getapp_dscp_prio_mask_map() to compute for a given netdevice a map of DSCP-to-priority-mask, which gives for each DSCP value a bitmap of priorities related to that DSCP value by APP, along the lines of dcb_ieee_getapp_mask(). - dcb_ieee_getapp_prio_dscp_mask_map() similarly to compute for a given netdevice a map from priorities to a bitmap of DSCPs. - dcb_ieee_getapp_default_prio_mask() which finds all default-priority rules for a given port in APP database, and returns a mask of priorities allowed by these default-priority rules. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-07-27 20:26:56 +08:00
struct dcb_ieee_app_dscp_map {
u8 map[64];
};
void dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev,
struct dcb_ieee_app_dscp_map *p_map);
u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev);
int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd,
u32 seq, u32 pid);
int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
u32 seq, u32 pid);
net: dcbnl, add multicast group for DCB Now that dcbnl is being used in many cases by more than a single agent it is beneficial to be notified when some entity either driver or user space has changed the DCB attributes. Today applications either end up polling the interface or relying on a user space database to maintain the DCB state and post events. Polling is a poor solution for obvious reasons. And relying on a user space database has its own downside. Namely it has created strange boot dependencies requiring the database be populated before any applications dependent on DCB attributes starts or the application goes into a polling loop. Populating the database requires negotiating link setting with the peer and can take anywhere from less than a second up to a few seconds depending on the switch implementation. Perhaps more importantly if another application or an embedded agent sets a DCB link attribute the database has no way of knowing other than polling the kernel. This prevents applications from responding quickly to changes in link events which at least in the FCoE case and probably any other protocols expecting a lossless link may result in IO errors. By adding a multicast group for DCB we have clean way to disseminate kernel DCB link attributes up to user space. Avoiding the need for user space to maintain a coherant database and disperse events that potentially do not reflect the current link state. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-06-21 15:34:37 +08:00
/*
* Ops struct for the netlink callbacks. Used by DCB-enabled drivers through
* the netdevice struct.
*/
struct dcbnl_rtnl_ops {
/* IEEE 802.1Qaz std */
int (*ieee_getets) (struct net_device *, struct ieee_ets *);
int (*ieee_setets) (struct net_device *, struct ieee_ets *);
int (*ieee_getmaxrate) (struct net_device *, struct ieee_maxrate *);
int (*ieee_setmaxrate) (struct net_device *, struct ieee_maxrate *);
int (*ieee_getqcn) (struct net_device *, struct ieee_qcn *);
int (*ieee_setqcn) (struct net_device *, struct ieee_qcn *);
int (*ieee_getqcnstats) (struct net_device *, struct ieee_qcn_stats *);
int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *);
int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
int (*ieee_getapp) (struct net_device *, struct dcb_app *);
int (*ieee_setapp) (struct net_device *, struct dcb_app *);
int (*ieee_delapp) (struct net_device *, struct dcb_app *);
int (*ieee_peer_getets) (struct net_device *, struct ieee_ets *);
int (*ieee_peer_getpfc) (struct net_device *, struct ieee_pfc *);
/* CEE std */
u8 (*getstate)(struct net_device *);
u8 (*setstate)(struct net_device *, u8);
void (*getpermhwaddr)(struct net_device *, u8 *);
void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8);
void (*setpgbwgcfgtx)(struct net_device *, int, u8);
void (*setpgtccfgrx)(struct net_device *, int, u8, u8, u8, u8);
void (*setpgbwgcfgrx)(struct net_device *, int, u8);
void (*getpgtccfgtx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
void (*getpgbwgcfgtx)(struct net_device *, int, u8 *);
void (*getpgtccfgrx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
void (*getpgbwgcfgrx)(struct net_device *, int, u8 *);
void (*setpfccfg)(struct net_device *, int, u8);
void (*getpfccfg)(struct net_device *, int, u8 *);
u8 (*setall)(struct net_device *);
u8 (*getcap)(struct net_device *, int, u8 *);
int (*getnumtcs)(struct net_device *, int, u8 *);
int (*setnumtcs)(struct net_device *, int, u8);
u8 (*getpfcstate)(struct net_device *);
void (*setpfcstate)(struct net_device *, u8);
void (*getbcncfg)(struct net_device *, int, u32 *);
void (*setbcncfg)(struct net_device *, int, u32);
void (*getbcnrp)(struct net_device *, int, u8 *);
void (*setbcnrp)(struct net_device *, int, u8);
int (*setapp)(struct net_device *, u8, u16, u8);
int (*getapp)(struct net_device *, u8, u16);
u8 (*getfeatcfg)(struct net_device *, int, u8 *);
u8 (*setfeatcfg)(struct net_device *, int, u8);
/* DCBX configuration */
u8 (*getdcbx)(struct net_device *);
u8 (*setdcbx)(struct net_device *, u8);
/* peer apps */
int (*peer_getappinfo)(struct net_device *, struct dcb_peer_app_info *,
u16 *);
int (*peer_getapptable)(struct net_device *, struct dcb_app *);
/* CEE peer */
int (*cee_peer_getpg) (struct net_device *, struct cee_pg *);
int (*cee_peer_getpfc) (struct net_device *, struct cee_pfc *);
net/dcb: Add dcbnl buffer attribute In this patch, we add dcbnl buffer attribute to allow user change the NIC's buffer configuration such as priority to buffer mapping and buffer size of individual buffer. This attribute combined with pfc attribute allows advanced user to fine tune the qos setting for specific priority queue. For example, user can give dedicated buffer for one or more priorities or user can give large buffer to certain priorities. The dcb buffer configuration will be controlled by lldptool. lldptool -T -i eth2 -V BUFFER prio 0,2,5,7,1,2,3,6 maps priorities 0,1,2,3,4,5,6,7 to receive buffer 0,2,5,7,1,2,3,6 lldptool -T -i eth2 -V BUFFER size 87296,87296,0,87296,0,0,0,0 sets receive buffer size for buffer 0,1,2,3,4,5,6,7 respectively After discussion on mailing list with Jakub, Jiri, Ido and John, we agreed to choose dcbnl over devlink interface since this feature is intended to set port attributes which are governed by the netdev instance of that port, where devlink API is more suitable for global ASIC configurations. We present an use case scenario where dcbnl buffer attribute configured by advance user helps reduce the latency of messages of different sizes. Scenarios description: On ConnectX-5, we run latency sensitive traffic with small/medium message sizes ranging from 64B to 256KB and bandwidth sensitive traffic with large messages sizes 512KB and 1MB. We group small, medium, and large message sizes to their own pfc enables priorities as follow. Priorities 1 & 2 (64B, 256B and 1KB) Priorities 3 & 4 (4KB, 8KB, 16KB, 64KB, 128KB and 256KB) Priorities 5 & 6 (512KB and 1MB) By default, ConnectX-5 maps all pfc enabled priorities to a single lossless fixed buffer size of 50% of total available buffer space. The other 50% is assigned to lossy buffer. Using dcbnl buffer attribute, we create three equal size lossless buffers. Each buffer has 25% of total available buffer space. Thus, the lossy buffer size reduces to 25%. Priority to lossless buffer mappings are set as follow. Priorities 1 & 2 on lossless buffer #1 Priorities 3 & 4 on lossless buffer #2 Priorities 5 & 6 on lossless buffer #3 We observe improvements in latency for small and medium message sizes as follows. Please note that the large message sizes bandwidth performance is reduced but the total bandwidth remains the same. 256B message size (42 % latency reduction) 4K message size (21% latency reduction) 64K message size (16% latency reduction) CC: Ido Schimmel <idosch@idosch.org> CC: Jakub Kicinski <jakub.kicinski@netronome.com> CC: Jiri Pirko <jiri@resnulli.us> CC: Or Gerlitz <gerlitz.or@gmail.com> CC: Parav Pandit <parav@mellanox.com> CC: Aron Silverton <aron.silverton@oracle.com> Signed-off-by: Huy Nguyen <huyn@mellanox.com> Reviewed-by: Parav Pandit <parav@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
2018-02-23 01:57:10 +08:00
/* buffer settings */
int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *);
int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *);
/* apptrust */
int (*dcbnl_setapptrust)(struct net_device *, u8 *, int);
int (*dcbnl_getapptrust)(struct net_device *, u8 *, int *);
/* rewrite */
int (*dcbnl_setrewr)(struct net_device *dev, struct dcb_app *app);
int (*dcbnl_delrewr)(struct net_device *dev, struct dcb_app *app);
};
#endif /* __NET_DCBNL_H__ */