Merge branch 'nfp-ring-reconfig-and-xdp-support'

Jakub Kicinski says:

====================
ring reconfiguration and XDP support

This set adds support for ethtool channel API and XDP.

I kick off with ethtool get_channels() implementation.
set_channels() needs some preparations to get right.  I follow
the prepare/commit paradigm and allocate all resources before
stopping the device.  It has already been done for ndo_change_mtu
and ethtool set_ringparam(), it makes sense now to consolidate all
the required logic in one place.

XDP support requires splitting TX rings into two classes -
for the stack and for XDP.  The ring structures are identical.
The differences are in how they are connected to IRQ vector
structs and how the completion/cleanup works.  When XDP is enabled
I switch from the frag allocator to page-per-packet and map buffers
BIDIRECTIONALly.

Last but not least XDP offload is added (the patch just takes
care of the small formal differences between cls_bpf and XDP).

There is a tiny & trivial DebugFS patch in the mix, I hope it can
be taken via net-next provided we have the right Acks.

Resending with improved commit message and CCing more people on patch 10.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2016-11-04 14:56:17 -04:00
commit 95ae31a9a9
9 changed files with 881 additions and 306 deletions

View File

@ -62,6 +62,7 @@ enum nfp_bpf_action_type {
NN_ACT_TC_DROP,
NN_ACT_TC_REDIR,
NN_ACT_DIRECT,
NN_ACT_XDP,
};
/* Software register representation, hardware encoding in asm.h */

View File

@ -1126,7 +1126,7 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
meta->insn.src_reg * 2, true, 4);
}
static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
if (meta->insn.off == offsetof(struct sk_buff, len))
emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
@ -1134,12 +1134,42 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
else
return -ENOTSUPP;
wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
return 0;
}
static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
u32 dst = reg_both(meta->insn.dst_reg * 2);
if (meta->insn.off != offsetof(struct xdp_md, data) &&
meta->insn.off != offsetof(struct xdp_md, data_end))
return -ENOTSUPP;
emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
if (meta->insn.off == offsetof(struct xdp_md, data))
return 0;
emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, NFP_BPF_ABI_LEN);
return 0;
}
static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
int ret;
if (nfp_prog->act == NN_ACT_XDP)
ret = mem_ldx4_xdp(nfp_prog, meta);
else
ret = mem_ldx4_skb(nfp_prog, meta);
wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
return ret;
}
static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
if (meta->insn.off == offsetof(struct sk_buff, mark))
return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
@ -1147,6 +1177,18 @@ static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return -ENOTSUPP;
}
static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return -ENOTSUPP;
}
static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
if (nfp_prog->act == NN_ACT_XDP)
return mem_stx4_xdp(nfp_prog, meta);
return mem_stx4_skb(nfp_prog, meta);
}
static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
if (meta->insn.off < 0) /* TODO */
@ -1530,6 +1572,47 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
}
static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
{
/* XDP return codes:
* 0 aborted 0x82 -> drop, count as stat3
* 1 drop 0x22 -> drop, count as stat1
* 2 pass 0x11 -> pass, count as stat0
* 3 tx 0x44 -> redir, count as stat2
* * unknown 0x82 -> drop, count as stat3
*/
/* Target for aborts */
nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
emit_alu(nfp_prog, reg_a(0),
reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
/* Target for normal exits */
nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
/* if R0 > 3 jump to abort */
emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
wrp_immed(nfp_prog, reg_b(2), 0x44112282);
emit_shf(nfp_prog, reg_a(1),
reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
emit_shf(nfp_prog, reg_b(2),
reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
emit_alu(nfp_prog, reg_a(0),
reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
}
static void nfp_outro(struct nfp_prog *nfp_prog)
{
switch (nfp_prog->act) {
@ -1540,6 +1623,9 @@ static void nfp_outro(struct nfp_prog *nfp_prog)
case NN_ACT_TC_REDIR:
nfp_outro_tc_legacy(nfp_prog);
break;
case NN_ACT_XDP:
nfp_outro_xdp(nfp_prog);
break;
}
}

View File

@ -80,6 +80,9 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
{
const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
if (nfp_prog->act == NN_ACT_XDP)
return 0;
if (reg0->type != CONST_IMM) {
pr_info("unsupported exit state: %d, imm: %llx\n",
reg0->type, reg0->imm);

View File

@ -171,7 +171,10 @@ struct nfp_net_tx_desc {
* on the head's buffer). Equal to skb->len for non-TSO packets.
*/
struct nfp_net_tx_buf {
struct sk_buff *skb;
union {
struct sk_buff *skb;
void *frag;
};
dma_addr_t dma_addr;
short int fidx;
u16 pkt_cnt;
@ -341,6 +344,7 @@ struct nfp_net_rx_ring {
* @napi: NAPI structure for this ring vec
* @tx_ring: Pointer to TX ring
* @rx_ring: Pointer to RX ring
* @xdp_ring: Pointer to an extra TX ring for XDP
* @irq_idx: Index into MSI-X table
* @rx_sync: Seqlock for atomic updates of RX stats
* @rx_pkts: Number of received packets
@ -384,6 +388,8 @@ struct nfp_net_r_vector {
u64 hw_csum_rx_inner_ok;
u64 hw_csum_rx_error;
struct nfp_net_tx_ring *xdp_ring;
struct u64_stats_sync tx_sync;
u64 tx_pkts;
u64 tx_bytes;
@ -429,9 +435,11 @@ struct nfp_stat_pair {
* @is_vf: Is the driver attached to a VF?
* @fw_loaded: Is the firmware loaded?
* @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf
* @bpf_offload_xdp: Offloaded BPF program is XDP
* @ctrl: Local copy of the control register/word.
* @fl_bufsz: Currently configured size of the freelist buffers
* @rx_offset: Offset in the RX buffers where packet data starts
* @xdp_prog: Installed XDP program
* @cpp: Pointer to the CPP handle
* @nfp_dev_cpp: Pointer to the NFP Device handle
* @ctrl_area: Pointer to the CPP area for the control BAR
@ -451,6 +459,7 @@ struct nfp_stat_pair {
* @max_tx_rings: Maximum number of TX rings supported by the Firmware
* @max_rx_rings: Maximum number of RX rings supported by the Firmware
* @num_tx_rings: Currently configured number of TX rings
* @num_stack_tx_rings: Number of TX rings used by the stack (not XDP)
* @num_rx_rings: Currently configured number of RX rings
* @txd_cnt: Size of the TX ring in number of descriptors
* @rxd_cnt: Size of the RX ring in number of descriptors
@ -494,12 +503,15 @@ struct nfp_net {
unsigned is_vf:1;
unsigned fw_loaded:1;
unsigned bpf_offload_skip_sw:1;
unsigned bpf_offload_xdp:1;
u32 ctrl;
u32 fl_bufsz;
u32 rx_offset;
struct bpf_prog *xdp_prog;
struct nfp_net_tx_ring *tx_rings;
struct nfp_net_rx_ring *rx_rings;
@ -532,6 +544,7 @@ struct nfp_net {
unsigned int max_rx_rings;
unsigned int num_tx_rings;
unsigned int num_stack_tx_rings;
unsigned int num_rx_rings;
int stride_tx;
@ -583,6 +596,13 @@ struct nfp_net {
struct dentry *debugfs_dir;
};
struct nfp_net_ring_set {
unsigned int n_rings;
unsigned int mtu;
unsigned int dcnt;
void *rings;
};
/* Functions to read/write from/to a BAR
* Performs any endian conversion necessary.
*/
@ -771,7 +791,9 @@ void nfp_net_rss_write_key(struct nfp_net *nn);
void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
int nfp_net_irqs_alloc(struct nfp_net *nn);
void nfp_net_irqs_disable(struct nfp_net *nn);
int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt);
int
nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx);
#ifdef CONFIG_NFP_NET_DEBUG
void nfp_net_debugfs_create(void);
@ -797,8 +819,6 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
#endif /* CONFIG_NFP_NET_DEBUG */
void nfp_net_filter_stats_timer(unsigned long data);
int
nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
struct tc_cls_bpf_offload *cls_bpf);
int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf);
#endif /* _NFP_NET_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -114,6 +114,16 @@ static const struct file_operations nfp_rx_q_fops = {
.llseek = seq_lseek
};
static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f);
static const struct file_operations nfp_tx_q_fops = {
.owner = THIS_MODULE,
.open = nfp_net_debugfs_tx_q_open,
.release = single_release,
.read = seq_read,
.llseek = seq_lseek
};
static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
{
struct nfp_net_r_vector *r_vec = file->private;
@ -126,10 +136,13 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
rtnl_lock();
if (!r_vec->nfp_net || !r_vec->tx_ring)
if (debugfs_real_fops(file->file) == &nfp_tx_q_fops)
tx_ring = r_vec->tx_ring;
else
tx_ring = r_vec->xdp_ring;
if (!r_vec->nfp_net || !tx_ring)
goto out;
nn = r_vec->nfp_net;
tx_ring = r_vec->tx_ring;
if (!netif_running(nn->netdev))
goto out;
@ -148,9 +161,14 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
txd->vals[2], txd->vals[3]);
skb = READ_ONCE(tx_ring->txbufs[i].skb);
if (skb)
seq_printf(file, " skb->head=%p skb->data=%p",
skb->head, skb->data);
if (skb) {
if (tx_ring == r_vec->tx_ring)
seq_printf(file, " skb->head=%p skb->data=%p",
skb->head, skb->data);
else
seq_printf(file, " frag=%p", skb);
}
if (tx_ring->txbufs[i].dma_addr)
seq_printf(file, " dma_addr=%pad",
&tx_ring->txbufs[i].dma_addr);
@ -176,7 +194,7 @@ static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f)
return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private);
}
static const struct file_operations nfp_tx_q_fops = {
static const struct file_operations nfp_xdp_q_fops = {
.owner = THIS_MODULE,
.open = nfp_net_debugfs_tx_q_open,
.release = single_release,
@ -186,7 +204,7 @@ static const struct file_operations nfp_tx_q_fops = {
void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
{
struct dentry *queues, *tx, *rx;
struct dentry *queues, *tx, *rx, *xdp;
char int_name[16];
int i;
@ -204,16 +222,19 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
rx = debugfs_create_dir("rx", queues);
tx = debugfs_create_dir("tx", queues);
if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx))
xdp = debugfs_create_dir("xdp", queues);
if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx) || IS_ERR_OR_NULL(xdp))
return;
for (i = 0; i < nn->num_rx_rings; i++) {
for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) {
sprintf(int_name, "%d", i);
debugfs_create_file(int_name, S_IRUSR, rx,
&nn->r_vecs[i], &nfp_rx_q_fops);
debugfs_create_file(int_name, S_IRUSR, xdp,
&nn->r_vecs[i], &nfp_xdp_q_fops);
}
for (i = 0; i < nn->num_tx_rings; i++) {
for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) {
sprintf(int_name, "%d", i);
debugfs_create_file(int_name, S_IRUSR, tx,
&nn->r_vecs[i], &nfp_tx_q_fops);

View File

@ -158,6 +158,28 @@ static void nfp_net_get_ringparam(struct net_device *netdev,
ring->tx_pending = nn->txd_cnt;
}
static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
{
struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
struct nfp_net_ring_set rx = {
.n_rings = nn->num_rx_rings,
.mtu = nn->netdev->mtu,
.dcnt = rxd_cnt,
};
struct nfp_net_ring_set tx = {
.n_rings = nn->num_tx_rings,
.dcnt = txd_cnt,
};
if (nn->rxd_cnt != rxd_cnt)
reconfig_rx = &rx;
if (nn->txd_cnt != txd_cnt)
reconfig_tx = &tx;
return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
reconfig_rx, reconfig_tx);
}
static int nfp_net_set_ringparam(struct net_device *netdev,
struct ethtool_ringparam *ring)
{
@ -614,6 +636,76 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
}
static void nfp_net_get_channels(struct net_device *netdev,
struct ethtool_channels *channel)
{
struct nfp_net *nn = netdev_priv(netdev);
unsigned int num_tx_rings;
num_tx_rings = nn->num_tx_rings;
if (nn->xdp_prog)
num_tx_rings -= nn->num_rx_rings;
channel->max_rx = min(nn->max_rx_rings, nn->max_r_vecs);
channel->max_tx = min(nn->max_tx_rings, nn->max_r_vecs);
channel->max_combined = min(channel->max_rx, channel->max_tx);
channel->max_other = NFP_NET_NON_Q_VECTORS;
channel->combined_count = min(nn->num_rx_rings, num_tx_rings);
channel->rx_count = nn->num_rx_rings - channel->combined_count;
channel->tx_count = num_tx_rings - channel->combined_count;
channel->other_count = NFP_NET_NON_Q_VECTORS;
}
static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx,
unsigned int total_tx)
{
struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
struct nfp_net_ring_set rx = {
.n_rings = total_rx,
.mtu = nn->netdev->mtu,
.dcnt = nn->rxd_cnt,
};
struct nfp_net_ring_set tx = {
.n_rings = total_tx,
.dcnt = nn->txd_cnt,
};
if (nn->num_rx_rings != total_rx)
reconfig_rx = &rx;
if (nn->num_stack_tx_rings != total_tx ||
(nn->xdp_prog && reconfig_rx))
reconfig_tx = &tx;
/* nfp_net_check_config() will catch tx.n_rings > nn->max_tx_rings */
if (nn->xdp_prog)
tx.n_rings += total_rx;
return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
reconfig_rx, reconfig_tx);
}
static int nfp_net_set_channels(struct net_device *netdev,
struct ethtool_channels *channel)
{
struct nfp_net *nn = netdev_priv(netdev);
unsigned int total_rx, total_tx;
/* Reject unsupported */
if (!channel->combined_count ||
channel->other_count != NFP_NET_NON_Q_VECTORS ||
(channel->rx_count && channel->tx_count))
return -EINVAL;
total_rx = channel->combined_count + channel->rx_count;
total_tx = channel->combined_count + channel->tx_count;
if (total_rx > min(nn->max_rx_rings, nn->max_r_vecs) ||
total_tx > min(nn->max_tx_rings, nn->max_r_vecs))
return -EINVAL;
return nfp_net_set_num_rings(nn, total_rx, total_tx);
}
static const struct ethtool_ops nfp_net_ethtool_ops = {
.get_drvinfo = nfp_net_get_drvinfo,
.get_link = ethtool_op_get_link,
@ -632,6 +724,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
.get_regs = nfp_net_get_regs,
.get_coalesce = nfp_net_get_coalesce,
.set_coalesce = nfp_net_set_coalesce,
.get_channels = nfp_net_get_channels,
.set_channels = nfp_net_set_channels,
};
void nfp_net_set_ethtool_ops(struct net_device *netdev)

View File

@ -111,6 +111,9 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
const struct tc_action *a;
LIST_HEAD(actions);
if (!cls_bpf->exts)
return NN_ACT_XDP;
/* TC direct action */
if (cls_bpf->exts_integrated) {
if (tc_no_actions(cls_bpf->exts))
@ -233,9 +236,7 @@ static int nfp_net_bpf_stop(struct nfp_net *nn)
return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
}
int
nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
struct tc_cls_bpf_offload *cls_bpf)
int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
{
struct nfp_bpf_result res;
dma_addr_t dma_addr;

View File

@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu;
* Must only be called under the protection established by
* debugfs_use_file_start().
*/
static inline const struct file_operations *debugfs_real_fops(struct file *filp)
static inline const struct file_operations *
debugfs_real_fops(const struct file *filp)
__must_hold(&debugfs_srcu)
{
/*