mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-12 13:34:10 +08:00
tun: support NAPI for packets received from batched XDP buffs
In tun, NAPI is supported and we can also use NAPI in the path of batched XDP buffs to accelerate packet processing. What is more, after we use NAPI, GRO is also supported. The iperf shows that the throughput of single stream could be improved from 4.5Gbps to 9.2Gbps. Additionally, 9.2 Gbps nearly reachs the line speed of the phy nic and there is still about 15% idle cpu core remaining on the vhost thread. Test topology: [iperf server]<--->tap<--->dpdk testpmd<--->phy nic<--->[iperf client] Iperf stream: iperf3 -c 10.0.0.2 -i 1 -t 10 Before: ... [ 5] 5.00-6.00 sec 558 MBytes 4.68 Gbits/sec 0 1.50 MBytes [ 5] 6.00-7.00 sec 556 MBytes 4.67 Gbits/sec 1 1.35 MBytes [ 5] 7.00-8.00 sec 556 MBytes 4.67 Gbits/sec 2 1.18 MBytes [ 5] 8.00-9.00 sec 559 MBytes 4.69 Gbits/sec 0 1.48 MBytes [ 5] 9.00-10.00 sec 556 MBytes 4.67 Gbits/sec 1 1.33 MBytes - - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 5.39 GBytes 4.63 Gbits/sec 72 sender [ 5] 0.00-10.04 sec 5.39 GBytes 4.61 Gbits/sec receiver After: ... [ 5] 5.00-6.00 sec 1.07 GBytes 9.19 Gbits/sec 0 1.55 MBytes [ 5] 6.00-7.00 sec 1.08 GBytes 9.30 Gbits/sec 0 1.63 MBytes [ 5] 7.00-8.00 sec 1.08 GBytes 9.25 Gbits/sec 0 1.72 MBytes [ 5] 8.00-9.00 sec 1.08 GBytes 9.25 Gbits/sec 77 1.31 MBytes [ 5] 9.00-10.00 sec 1.08 GBytes 9.24 Gbits/sec 0 1.48 MBytes - - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 10.8 GBytes 9.28 Gbits/sec 166 sender [ 5] 0.00-10.04 sec 10.8 GBytes 9.24 Gbits/sec receiver Reported-at: https://lore.kernel.org/all/CACGkMEvTLG0Ayg+TtbN4q4pPW-ycgCCs3sC3-TF8cuRTf7Pp1A@mail.gmail.com Signed-off-by: Harold Huang <baymaxhuang@gmail.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20220228033805.1579435-1-baymaxhuang@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
422ce83667
commit
fb3f903769
@ -2388,9 +2388,10 @@ static int tun_xdp_one(struct tun_struct *tun,
|
||||
struct virtio_net_hdr *gso = &hdr->gso;
|
||||
struct bpf_prog *xdp_prog;
|
||||
struct sk_buff *skb = NULL;
|
||||
struct sk_buff_head *queue;
|
||||
u32 rxhash = 0, act;
|
||||
int buflen = hdr->buflen;
|
||||
int err = 0;
|
||||
int ret = 0;
|
||||
bool skb_xdp = false;
|
||||
struct page *page;
|
||||
|
||||
@ -2405,13 +2406,13 @@ static int tun_xdp_one(struct tun_struct *tun,
|
||||
xdp_set_data_meta_invalid(xdp);
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, xdp);
|
||||
err = tun_xdp_act(tun, xdp_prog, xdp, act);
|
||||
if (err < 0) {
|
||||
ret = tun_xdp_act(tun, xdp_prog, xdp, act);
|
||||
if (ret < 0) {
|
||||
put_page(virt_to_head_page(xdp->data));
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (err) {
|
||||
switch (ret) {
|
||||
case XDP_REDIRECT:
|
||||
*flush = true;
|
||||
fallthrough;
|
||||
@ -2435,7 +2436,7 @@ static int tun_xdp_one(struct tun_struct *tun,
|
||||
build:
|
||||
skb = build_skb(xdp->data_hard_start, buflen);
|
||||
if (!skb) {
|
||||
err = -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2445,7 +2446,7 @@ build:
|
||||
if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
|
||||
atomic_long_inc(&tun->rx_frame_errors);
|
||||
kfree_skb(skb);
|
||||
err = -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2455,16 +2456,27 @@ build:
|
||||
skb_record_rx_queue(skb, tfile->queue_index);
|
||||
|
||||
if (skb_xdp) {
|
||||
err = do_xdp_generic(xdp_prog, skb);
|
||||
if (err != XDP_PASS)
|
||||
ret = do_xdp_generic(xdp_prog, skb);
|
||||
if (ret != XDP_PASS) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
|
||||
!tfile->detached)
|
||||
rxhash = __skb_get_hash_symmetric(skb);
|
||||
|
||||
netif_receive_skb(skb);
|
||||
if (tfile->napi_enabled) {
|
||||
queue = &tfile->sk.sk_write_queue;
|
||||
spin_lock(&queue->lock);
|
||||
__skb_queue_tail(queue, skb);
|
||||
spin_unlock(&queue->lock);
|
||||
ret = 1;
|
||||
} else {
|
||||
netif_receive_skb(skb);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
/* No need to disable preemption here since this function is
|
||||
* always called with bh disabled
|
||||
@ -2475,7 +2487,7 @@ build:
|
||||
tun_flow_update(tun, rxhash, tfile);
|
||||
|
||||
out:
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
|
||||
@ -2492,7 +2504,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
|
||||
if (ctl && (ctl->type == TUN_MSG_PTR)) {
|
||||
struct tun_page tpage;
|
||||
int n = ctl->num;
|
||||
int flush = 0;
|
||||
int flush = 0, queued = 0;
|
||||
|
||||
memset(&tpage, 0, sizeof(tpage));
|
||||
|
||||
@ -2501,12 +2513,17 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
xdp = &((struct xdp_buff *)ctl->ptr)[i];
|
||||
tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
|
||||
ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
|
||||
if (ret > 0)
|
||||
queued += ret;
|
||||
}
|
||||
|
||||
if (flush)
|
||||
xdp_do_flush();
|
||||
|
||||
if (tfile->napi_enabled && queued > 0)
|
||||
napi_schedule(&tfile->napi);
|
||||
|
||||
rcu_read_unlock();
|
||||
local_bh_enable();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user