Merge branch 'mlx5-sriov-updates'

Saeed Mahameed says:

====================
Mellanox 100G ethernet SRIOV Upgrades

This series introduces new features and upgrades for mlx5 etherenet SRIOV,
while the first patch provides a bug fixes for a compilation issue introduced
buy the previous aRFS series for when CONFIG_RFS_ACCEL=y and CONFIG_MLX5_CORE_EN=n.

Changes from V0:
    - 1st patch: Don't add a new Kconfig flag.  Instead, compile out en_arfs.c \
contents when CONFIG_RFS_ACCEL=n

SRIOV upgrades:
    - Use synchronize_irq instead of the vport events spin_lock
    - Fix memory leak in error flow
    - Added full VST support
    - Spoofcheck support
    - Trusted VF promiscuous and allmulti support

VST and Spoofcheck in details:
    - Adding Low level firmware commands support for creating ACLs
     (Access Control Lists) Flow tables.  ACLs are regular flow tables with
     the only exception that they are bound to a specific e-Switch vport (VF)
     and they can be one of two types
        > egress ACL: filters traffic going from e-Switch to VF.
        > ingress ACL: filters traffic going from VF to e-Switch.
    - Ingress/Egress ACLs (per vport) for VF VST mode filtering.
    - Ingress/Egress ACLs (per vport) for VF spoofcheck filtering.
    - Ingress/Egress ACLs (per vport) configuration:
        > Created only when at least one of (VST, spoofcheck) is configured.
	> if (!spoofchk && !vst) allow all traffic.  i.e. no ACLs.
        > if (spoofchk && vst) allow only untagged traffic with smac=original mac \
                sent from the VF.
        > if (spoofchk && !vst) allow only traffic with smac=original mac sent from \
the VF.  > if (!spoofchk && vst) allow only untagged traffic.

Trusted VF promiscuous and allmulti support in details:
    - Added two flow groups for allmulti and promisc VFs to the e-Switch FDB table
        > Allmulti group: One rule that forwards any mcast traffic coming from
                          either uplink or VFs/PF vports.
        > Promisc group: One rule that forwards all unmatched traffic coming from \
                uplink.
    - Add vport context change event handling for promisc and allmulti
      If VF is trusted respect the request and:
        > if allmulti request: add the vport to the allmulti group.
          and to all other L2 mcast address in the FDB table.
        > if promisc request: add the vport to the promisc group.
        > Note: A promisc VF can only see traffic that was not explicitly matched to
                or requested by any other VF.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2016-05-04 14:04:49 -04:00
commit 3f7496aa72
13 changed files with 1118 additions and 73 deletions

View File

@ -6,7 +6,6 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
en_txrx.o en_clock.o vxlan.o en_tc.o
en_txrx.o en_clock.o vxlan.o en_tc.o en_arfs.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o
mlx5_core-$(CONFIG_RFS_ACCEL) += en_arfs.o

View File

@ -30,6 +30,8 @@
* SOFTWARE.
*/
#ifdef CONFIG_RFS_ACCEL
#include <linux/hash.h>
#include <linux/mlx5/fs.h>
#include <linux/ip.h>
@ -747,3 +749,4 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
spin_unlock_bh(&arfs->arfs_lock);
return arfs_rule->filter_id;
}
#endif

View File

@ -2438,6 +2438,21 @@ static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
vlan, qos);
}
static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
}
static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
}
static int mlx5_vport_link2ifla(u8 esw_link)
{
switch (esw_link) {
@ -2607,6 +2622,8 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
#endif
.ndo_set_vf_mac = mlx5e_set_vf_mac,
.ndo_set_vf_vlan = mlx5e_set_vf_vlan,
.ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk,
.ndo_set_vf_trust = mlx5e_set_vf_trust,
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,

File diff suppressed because it is too large Load Diff

View File

@ -88,18 +88,40 @@ struct l2addr_node {
kfree(ptr); \
})
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_group *allow_untagged_spoofchk_grp;
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
struct mlx5_flow_rule *allow_rule;
struct mlx5_flow_rule *drop_rule;
};
struct vport_egress {
struct mlx5_flow_table *acl;
struct mlx5_flow_group *allowed_vlans_grp;
struct mlx5_flow_group *drop_grp;
struct mlx5_flow_rule *allowed_vlan;
struct mlx5_flow_rule *drop_rule;
};
struct mlx5_vport {
struct mlx5_core_dev *dev;
int vport;
struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE];
struct mlx5_flow_rule *promisc_rule;
struct mlx5_flow_rule *allmulti_rule;
struct work_struct vport_change_handler;
/* This spinlock protects access to vport data, between
* "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event"
* once vport marked as disabled new interrupts are discarded.
*/
spinlock_t lock; /* vport events sync */
struct vport_ingress ingress;
struct vport_egress egress;
u16 vlan;
u8 qos;
bool spoofchk;
bool trusted;
bool enabled;
u16 enabled_events;
};
@ -113,6 +135,8 @@ struct mlx5_l2_table {
struct mlx5_eswitch_fdb {
void *fdb;
struct mlx5_flow_group *addr_grp;
struct mlx5_flow_group *allmulti_grp;
struct mlx5_flow_group *promisc_grp;
};
struct mlx5_eswitch {
@ -124,6 +148,11 @@ struct mlx5_eswitch {
struct mlx5_vport *vports;
int total_vports;
int enabled_vports;
/* Synchronize between vport change events
* and async SRIOV admin state changes
*/
struct mutex state_lock;
struct esw_mc_addr *mc_promisc;
};
/* E-Switch API */
@ -138,6 +167,10 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
int vport, int link_state);
int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
int vport, u16 vlan, u8 qos);
int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
int vport, bool spoofchk);
int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
int vport_num, bool setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
int vport, struct ifla_vf_info *ivi);
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,

View File

@ -50,6 +50,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
if (ft->vport) {
MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
}
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
@ -57,6 +61,7 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
}
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
u16 vport,
enum fs_flow_table_type type, unsigned int level,
unsigned int log_size, struct mlx5_flow_table
*next_ft, unsigned int *table_id)
@ -77,6 +82,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
MLX5_SET(create_flow_table_in, in, table_type, type);
MLX5_SET(create_flow_table_in, in, level, level);
MLX5_SET(create_flow_table_in, in, log_size, log_size);
if (vport) {
MLX5_SET(create_flow_table_in, in, vport_number, vport);
MLX5_SET(create_flow_table_in, in, other_vport, 1);
}
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
@ -101,6 +110,10 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
MLX5_CMD_OP_DESTROY_FLOW_TABLE);
MLX5_SET(destroy_flow_table_in, in, table_type, ft->type);
MLX5_SET(destroy_flow_table_in, in, table_id, ft->id);
if (ft->vport) {
MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport);
MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
}
return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
sizeof(out));
@ -120,6 +133,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
MLX5_CMD_OP_MODIFY_FLOW_TABLE);
MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
if (ft->vport) {
MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport);
MLX5_SET(modify_flow_table_in, in, other_vport, 1);
}
MLX5_SET(modify_flow_table_in, in, modify_field_select,
MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
if (next_ft) {
@ -148,6 +165,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
MLX5_CMD_OP_CREATE_FLOW_GROUP);
MLX5_SET(create_flow_group_in, in, table_type, ft->type);
MLX5_SET(create_flow_group_in, in, table_id, ft->id);
if (ft->vport) {
MLX5_SET(create_flow_group_in, in, vport_number, ft->vport);
MLX5_SET(create_flow_group_in, in, other_vport, 1);
}
err = mlx5_cmd_exec_check_status(dev, in,
inlen, out,
@ -174,6 +195,10 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
MLX5_SET(destroy_flow_group_in, in, table_type, ft->type);
MLX5_SET(destroy_flow_group_in, in, table_id, ft->id);
MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
if (ft->vport) {
MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport);
MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
}
return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
sizeof(out));
@ -207,6 +232,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(set_fte_in, in, table_type, ft->type);
MLX5_SET(set_fte_in, in, table_id, ft->id);
MLX5_SET(set_fte_in, in, flow_index, fte->index);
if (ft->vport) {
MLX5_SET(set_fte_in, in, vport_number, ft->vport);
MLX5_SET(set_fte_in, in, other_vport, 1);
}
in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
@ -285,6 +314,10 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
MLX5_SET(delete_fte_in, in, table_type, ft->type);
MLX5_SET(delete_fte_in, in, table_id, ft->id);
MLX5_SET(delete_fte_in, in, flow_index, index);
if (ft->vport) {
MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
MLX5_SET(delete_fte_in, in, other_vport, 1);
}
err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));

View File

@ -34,6 +34,7 @@
#define _MLX5_FS_CMD_
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
u16 vport,
enum fs_flow_table_type type, unsigned int level,
unsigned int log_size, struct mlx5_flow_table
*next_ft, unsigned int *table_id);

View File

@ -457,7 +457,7 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
return fg;
}
static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
enum fs_flow_table_type table_type)
{
struct mlx5_flow_table *ft;
@ -469,6 +469,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
ft->level = level;
ft->node.type = FS_TYPE_FLOW_TABLE;
ft->type = table_type;
ft->vport = vport;
ft->max_fte = max_fte;
INIT_LIST_HEAD(&ft->fwd_rules);
mutex_init(&ft->lock);
@ -700,9 +701,9 @@ static void list_add_flow_table(struct mlx5_flow_table *ft,
list_add(&ft->node.list, prev);
}
struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio, int max_fte,
u32 level)
static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
u16 vport, int prio,
int max_fte, u32 level)
{
struct mlx5_flow_table *next_ft = NULL;
struct mlx5_flow_table *ft;
@ -732,6 +733,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
*/
level += fs_prio->start_level;
ft = alloc_flow_table(level,
vport,
roundup_pow_of_two(max_fte),
root->table_type);
if (!ft) {
@ -742,7 +744,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
tree_init_node(&ft->node, 1, del_flow_table);
log_table_sz = ilog2(ft->max_fte);
next_ft = find_next_chained_ft(fs_prio);
err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level,
err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->type, ft->level,
log_table_sz, next_ft, &ft->id);
if (err)
goto free_ft;
@ -766,6 +768,20 @@ unlock_root:
return ERR_PTR(err);
}
struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio, int max_fte,
u32 level)
{
return __mlx5_create_flow_table(ns, 0, prio, max_fte, level);
}
struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
int prio, int max_fte,
u32 level, u16 vport)
{
return __mlx5_create_flow_table(ns, vport, prio, max_fte, level);
}
struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
@ -1319,6 +1335,16 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
return &dev->priv.fdb_root_ns->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
if (dev->priv.esw_egress_root_ns)
return &dev->priv.esw_egress_root_ns->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
if (dev->priv.esw_ingress_root_ns)
return &dev->priv.esw_ingress_root_ns->ns;
else
return NULL;
default:
return NULL;
}
@ -1699,6 +1725,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
{
cleanup_root_ns(dev);
cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns);
}
static int init_fdb_root_ns(struct mlx5_core_dev *dev)
@ -1719,6 +1747,38 @@ static int init_fdb_root_ns(struct mlx5_core_dev *dev)
}
}
static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
{
struct fs_prio *prio;
dev->priv.esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL);
if (!dev->priv.esw_egress_root_ns)
return -ENOMEM;
/* create 1 prio*/
prio = fs_create_prio(&dev->priv.esw_egress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev));
if (IS_ERR(prio))
return PTR_ERR(prio);
else
return 0;
}
static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
{
struct fs_prio *prio;
dev->priv.esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL);
if (!dev->priv.esw_ingress_root_ns)
return -ENOMEM;
/* create 1 prio*/
prio = fs_create_prio(&dev->priv.esw_ingress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev));
if (IS_ERR(prio))
return PTR_ERR(prio);
else
return 0;
}
int mlx5_init_fs(struct mlx5_core_dev *dev)
{
int err = 0;
@ -1731,8 +1791,21 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
err = init_fdb_root_ns(dev);
if (err)
cleanup_root_ns(dev);
goto err;
}
if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
err = init_egress_acl_root_ns(dev);
if (err)
goto err;
}
if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
err = init_ingress_acl_root_ns(dev);
if (err)
goto err;
}
return 0;
err:
mlx5_cleanup_fs(dev);
return err;
}

View File

@ -45,8 +45,10 @@ enum fs_node_type {
};
enum fs_flow_table_type {
FS_FT_NIC_RX = 0x0,
FS_FT_FDB = 0X4,
FS_FT_NIC_RX = 0x0,
FS_FT_ESW_EGRESS_ACL = 0x2,
FS_FT_ESW_INGRESS_ACL = 0x3,
FS_FT_FDB = 0X4,
};
enum fs_fte_status {
@ -79,6 +81,7 @@ struct mlx5_flow_rule {
struct mlx5_flow_table {
struct fs_node node;
u32 id;
u16 vport;
unsigned int max_fte;
unsigned int level;
enum fs_flow_table_type type;

View File

@ -42,6 +42,8 @@
#define DRIVER_VERSION "3.0-1"
#define DRIVER_RELDATE "January 2015"
#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
extern int mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \

View File

@ -1349,6 +1349,18 @@ enum mlx5_cap_type {
#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap)
#define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap)
#define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap)
#define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap)
#define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap)
#define MLX5_CAP_ESW(mdev, cap) \
MLX5_GET(e_switch_cap, \
mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap)

View File

@ -518,6 +518,8 @@ struct mlx5_priv {
unsigned long pci_dev_data;
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_root_namespace *fdb_root_ns;
struct mlx5_flow_root_namespace *esw_egress_root_ns;
struct mlx5_flow_root_namespace *esw_ingress_root_ns;
};
enum mlx5_device_state {

View File

@ -58,6 +58,8 @@ enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_LEFTOVERS,
MLX5_FLOW_NAMESPACE_ANCHOR,
MLX5_FLOW_NAMESPACE_FDB,
MLX5_FLOW_NAMESPACE_ESW_EGRESS,
MLX5_FLOW_NAMESPACE_ESW_INGRESS,
};
struct mlx5_flow_table;
@ -90,6 +92,11 @@ mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
u32 level);
struct mlx5_flow_table *
mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
u32 level, u16 vport);
int mlx5_destroy_flow_table(struct mlx5_flow_table *ft);
/* inbox should be set with the following values: