mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-09 14:14:00 +08:00
net: Fix packet reordering caused by GRO and listified RX cooperation
Commit323ebb61e3
("net: use listified RX for handling GRO_NORMAL skbs") introduces batching of GRO_NORMAL packets in napi_frags_finish, and commit6570bc79c0
("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()") adds the same to napi_skb_finish. However, dev_gro_receive (that is called just before napi_{frags,skb}_finish) can also pass skbs to the networking stack: e.g., when the GRO session is flushed, napi_gro_complete is called, which passes pp directly to netif_receive_skb_internal, skipping napi->rx_list. It means that the packet stored in pp will be handled by the stack earlier than the packets that arrived before, but are still waiting in napi->rx_list. It leads to TCP reorderings that can be observed in the TCPOFOQueue counter in netstat. This commit fixes the reordering issue by making napi_gro_complete also use napi->rx_list, so that all packets going through GRO will keep their order. In order to keep napi_gro_flush working properly, gro_normal_list calls are moved after the flush to clear napi->rx_list. iwlwifi calls napi_gro_flush directly and does the same thing that is done by gro_normal_list, so the same change is applied there: napi_gro_flush is moved to be before the flush of napi->rx_list. A few other drivers also use napi_gro_flush (brocade/bna/bnad.c, cortina/gemini.c, hisilicon/hns3/hns3_enet.c). The first two also use napi_complete_done afterwards, which performs the gro_normal_list flush, so they are fine. The latter calls napi_gro_receive right after napi_gro_flush, so it can end up with non-empty napi->rx_list anyway. Fixes:323ebb61e3
("net: use listified RX for handling GRO_NORMAL skbs") Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com> Cc: Alexander Lobakin <alobakin@dlink.ru> Cc: Edward Cree <ecree@solarflare.com> Acked-by: Alexander Lobakin <alobakin@dlink.ru> Acked-by: Saeed Mahameed <saeedm@mellanox.com> Acked-by: Edward Cree <ecree@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
0ace17d568
commit
c80794323e
@ -1529,13 +1529,13 @@ out:
|
|||||||
|
|
||||||
napi = &rxq->napi;
|
napi = &rxq->napi;
|
||||||
if (napi->poll) {
|
if (napi->poll) {
|
||||||
|
napi_gro_flush(napi, false);
|
||||||
|
|
||||||
if (napi->rx_count) {
|
if (napi->rx_count) {
|
||||||
netif_receive_skb_list(&napi->rx_list);
|
netif_receive_skb_list(&napi->rx_list);
|
||||||
INIT_LIST_HEAD(&napi->rx_list);
|
INIT_LIST_HEAD(&napi->rx_list);
|
||||||
napi->rx_count = 0;
|
napi->rx_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
napi_gro_flush(napi, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
iwl_pcie_rxq_restock(trans, rxq);
|
iwl_pcie_rxq_restock(trans, rxq);
|
||||||
|
@ -5491,9 +5491,29 @@ static void flush_all_backlogs(void)
|
|||||||
put_online_cpus();
|
put_online_cpus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
|
||||||
|
static void gro_normal_list(struct napi_struct *napi)
|
||||||
|
{
|
||||||
|
if (!napi->rx_count)
|
||||||
|
return;
|
||||||
|
netif_receive_skb_list_internal(&napi->rx_list);
|
||||||
|
INIT_LIST_HEAD(&napi->rx_list);
|
||||||
|
napi->rx_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
|
||||||
|
* pass the whole batch up to the stack.
|
||||||
|
*/
|
||||||
|
static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
list_add_tail(&skb->list, &napi->rx_list);
|
||||||
|
if (++napi->rx_count >= gro_normal_batch)
|
||||||
|
gro_normal_list(napi);
|
||||||
|
}
|
||||||
|
|
||||||
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
|
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
|
||||||
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
|
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
|
||||||
static int napi_gro_complete(struct sk_buff *skb)
|
static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct packet_offload *ptype;
|
struct packet_offload *ptype;
|
||||||
__be16 type = skb->protocol;
|
__be16 type = skb->protocol;
|
||||||
@ -5526,7 +5546,8 @@ static int napi_gro_complete(struct sk_buff *skb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
return netif_receive_skb_internal(skb);
|
gro_normal_one(napi, skb);
|
||||||
|
return NET_RX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
|
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
|
||||||
@ -5539,7 +5560,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
|
|||||||
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
|
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
|
||||||
return;
|
return;
|
||||||
skb_list_del_init(skb);
|
skb_list_del_init(skb);
|
||||||
napi_gro_complete(skb);
|
napi_gro_complete(napi, skb);
|
||||||
napi->gro_hash[index].count--;
|
napi->gro_hash[index].count--;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5641,7 +5662,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gro_flush_oldest(struct list_head *head)
|
static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
|
||||||
{
|
{
|
||||||
struct sk_buff *oldest;
|
struct sk_buff *oldest;
|
||||||
|
|
||||||
@ -5657,7 +5678,7 @@ static void gro_flush_oldest(struct list_head *head)
|
|||||||
* SKB to the chain.
|
* SKB to the chain.
|
||||||
*/
|
*/
|
||||||
skb_list_del_init(oldest);
|
skb_list_del_init(oldest);
|
||||||
napi_gro_complete(oldest);
|
napi_gro_complete(napi, oldest);
|
||||||
}
|
}
|
||||||
|
|
||||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
|
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
|
||||||
@ -5733,7 +5754,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
|
|||||||
|
|
||||||
if (pp) {
|
if (pp) {
|
||||||
skb_list_del_init(pp);
|
skb_list_del_init(pp);
|
||||||
napi_gro_complete(pp);
|
napi_gro_complete(napi, pp);
|
||||||
napi->gro_hash[hash].count--;
|
napi->gro_hash[hash].count--;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5744,7 +5765,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
|
|||||||
goto normal;
|
goto normal;
|
||||||
|
|
||||||
if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
|
if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
|
||||||
gro_flush_oldest(gro_head);
|
gro_flush_oldest(napi, gro_head);
|
||||||
} else {
|
} else {
|
||||||
napi->gro_hash[hash].count++;
|
napi->gro_hash[hash].count++;
|
||||||
}
|
}
|
||||||
@ -5802,26 +5823,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(gro_find_complete_by_type);
|
EXPORT_SYMBOL(gro_find_complete_by_type);
|
||||||
|
|
||||||
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
|
|
||||||
static void gro_normal_list(struct napi_struct *napi)
|
|
||||||
{
|
|
||||||
if (!napi->rx_count)
|
|
||||||
return;
|
|
||||||
netif_receive_skb_list_internal(&napi->rx_list);
|
|
||||||
INIT_LIST_HEAD(&napi->rx_list);
|
|
||||||
napi->rx_count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
|
|
||||||
* pass the whole batch up to the stack.
|
|
||||||
*/
|
|
||||||
static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
|
|
||||||
{
|
|
||||||
list_add_tail(&skb->list, &napi->rx_list);
|
|
||||||
if (++napi->rx_count >= gro_normal_batch)
|
|
||||||
gro_normal_list(napi);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void napi_skb_free_stolen_head(struct sk_buff *skb)
|
static void napi_skb_free_stolen_head(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
skb_dst_drop(skb);
|
skb_dst_drop(skb);
|
||||||
@ -6200,8 +6201,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
|
|||||||
NAPIF_STATE_IN_BUSY_POLL)))
|
NAPIF_STATE_IN_BUSY_POLL)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
gro_normal_list(n);
|
|
||||||
|
|
||||||
if (n->gro_bitmask) {
|
if (n->gro_bitmask) {
|
||||||
unsigned long timeout = 0;
|
unsigned long timeout = 0;
|
||||||
|
|
||||||
@ -6217,6 +6216,9 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
|
|||||||
hrtimer_start(&n->timer, ns_to_ktime(timeout),
|
hrtimer_start(&n->timer, ns_to_ktime(timeout),
|
||||||
HRTIMER_MODE_REL_PINNED);
|
HRTIMER_MODE_REL_PINNED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gro_normal_list(n);
|
||||||
|
|
||||||
if (unlikely(!list_empty(&n->poll_list))) {
|
if (unlikely(!list_empty(&n->poll_list))) {
|
||||||
/* If n->poll_list is not empty, we need to mask irqs */
|
/* If n->poll_list is not empty, we need to mask irqs */
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
@ -6548,8 +6550,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
gro_normal_list(n);
|
|
||||||
|
|
||||||
if (n->gro_bitmask) {
|
if (n->gro_bitmask) {
|
||||||
/* flush too old packets
|
/* flush too old packets
|
||||||
* If HZ < 1000, flush all packets.
|
* If HZ < 1000, flush all packets.
|
||||||
@ -6557,6 +6557,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
|||||||
napi_gro_flush(n, HZ >= 1000);
|
napi_gro_flush(n, HZ >= 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gro_normal_list(n);
|
||||||
|
|
||||||
/* Some drivers may have called napi_schedule
|
/* Some drivers may have called napi_schedule
|
||||||
* prior to exhausting their budget.
|
* prior to exhausting their budget.
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user