mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
dev: add per net_device packet type chains
When many pf_packet listeners are created on a lot of interfaces the current implementation using global packet type lists scales poorly. This patch adds per net_device packet type lists to fix this problem. The patch was originally written by Eric Biederman for linux-2.6.29. Tested on linux-3.16. Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Salam Noureddine <noureddine@arista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
7b4ce694b2
commit
7866a62104
@ -1514,6 +1514,8 @@ struct net_device {
|
|||||||
struct list_head napi_list;
|
struct list_head napi_list;
|
||||||
struct list_head unreg_list;
|
struct list_head unreg_list;
|
||||||
struct list_head close_list;
|
struct list_head close_list;
|
||||||
|
struct list_head ptype_all;
|
||||||
|
struct list_head ptype_specific;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
struct list_head upper;
|
struct list_head upper;
|
||||||
|
@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
|
|||||||
static inline struct list_head *ptype_head(const struct packet_type *pt)
|
static inline struct list_head *ptype_head(const struct packet_type *pt)
|
||||||
{
|
{
|
||||||
if (pt->type == htons(ETH_P_ALL))
|
if (pt->type == htons(ETH_P_ALL))
|
||||||
return &ptype_all;
|
return pt->dev ? &pt->dev->ptype_all : &ptype_all;
|
||||||
else
|
else
|
||||||
return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
|
return pt->dev ? &pt->dev->ptype_specific :
|
||||||
|
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
|
|||||||
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
|
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void deliver_ptype_list_skb(struct sk_buff *skb,
|
||||||
|
struct packet_type **pt,
|
||||||
|
struct net_device *dev, __be16 type,
|
||||||
|
struct list_head *ptype_list)
|
||||||
|
{
|
||||||
|
struct packet_type *ptype, *pt_prev = *pt;
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(ptype, ptype_list, list) {
|
||||||
|
if (ptype->type != type)
|
||||||
|
continue;
|
||||||
|
if (pt_prev)
|
||||||
|
deliver_skb(skb, pt_prev, dev);
|
||||||
|
pt_prev = ptype;
|
||||||
|
}
|
||||||
|
*pt = pt_prev;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
|
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
if (!ptype->af_packet_priv || !skb->sk)
|
if (!ptype->af_packet_priv || !skb->sk)
|
||||||
@ -1757,29 +1775,33 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
|
|||||||
struct packet_type *ptype;
|
struct packet_type *ptype;
|
||||||
struct sk_buff *skb2 = NULL;
|
struct sk_buff *skb2 = NULL;
|
||||||
struct packet_type *pt_prev = NULL;
|
struct packet_type *pt_prev = NULL;
|
||||||
|
struct list_head *ptype_list = &ptype_all;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(ptype, &ptype_all, list) {
|
again:
|
||||||
|
list_for_each_entry_rcu(ptype, ptype_list, list) {
|
||||||
/* Never send packets back to the socket
|
/* Never send packets back to the socket
|
||||||
* they originated from - MvS (miquels@drinkel.ow.org)
|
* they originated from - MvS (miquels@drinkel.ow.org)
|
||||||
*/
|
*/
|
||||||
if ((ptype->dev == dev || !ptype->dev) &&
|
if (skb_loop_sk(ptype, skb))
|
||||||
(!skb_loop_sk(ptype, skb))) {
|
continue;
|
||||||
|
|
||||||
if (pt_prev) {
|
if (pt_prev) {
|
||||||
deliver_skb(skb2, pt_prev, skb->dev);
|
deliver_skb(skb2, pt_prev, skb->dev);
|
||||||
pt_prev = ptype;
|
pt_prev = ptype;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* need to clone skb, done only once */
|
||||||
skb2 = skb_clone(skb, GFP_ATOMIC);
|
skb2 = skb_clone(skb, GFP_ATOMIC);
|
||||||
if (!skb2)
|
if (!skb2)
|
||||||
break;
|
goto out_unlock;
|
||||||
|
|
||||||
net_timestamp_set(skb2);
|
net_timestamp_set(skb2);
|
||||||
|
|
||||||
/* skb->nh should be correctly
|
/* skb->nh should be correctly
|
||||||
set by sender, so that the second statement is
|
* set by sender, so that the second statement is
|
||||||
just protection against buggy protocols.
|
* just protection against buggy protocols.
|
||||||
*/
|
*/
|
||||||
skb_reset_mac_header(skb2);
|
skb_reset_mac_header(skb2);
|
||||||
|
|
||||||
@ -1795,7 +1817,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
|
|||||||
skb2->pkt_type = PACKET_OUTGOING;
|
skb2->pkt_type = PACKET_OUTGOING;
|
||||||
pt_prev = ptype;
|
pt_prev = ptype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ptype_list == &ptype_all) {
|
||||||
|
ptype_list = &dev->ptype_all;
|
||||||
|
goto again;
|
||||||
}
|
}
|
||||||
|
out_unlock:
|
||||||
if (pt_prev)
|
if (pt_prev)
|
||||||
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
|
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
@ -2617,7 +2644,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|||||||
unsigned int len;
|
unsigned int len;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (!list_empty(&ptype_all))
|
if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
|
||||||
dev_queue_xmit_nit(skb, dev);
|
dev_queue_xmit_nit(skb, dev);
|
||||||
|
|
||||||
len = skb->len;
|
len = skb->len;
|
||||||
@ -3615,7 +3642,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
|
|||||||
struct packet_type *ptype, *pt_prev;
|
struct packet_type *ptype, *pt_prev;
|
||||||
rx_handler_func_t *rx_handler;
|
rx_handler_func_t *rx_handler;
|
||||||
struct net_device *orig_dev;
|
struct net_device *orig_dev;
|
||||||
struct net_device *null_or_dev;
|
|
||||||
bool deliver_exact = false;
|
bool deliver_exact = false;
|
||||||
int ret = NET_RX_DROP;
|
int ret = NET_RX_DROP;
|
||||||
__be16 type;
|
__be16 type;
|
||||||
@ -3658,11 +3684,15 @@ another_round:
|
|||||||
goto skip_taps;
|
goto skip_taps;
|
||||||
|
|
||||||
list_for_each_entry_rcu(ptype, &ptype_all, list) {
|
list_for_each_entry_rcu(ptype, &ptype_all, list) {
|
||||||
if (!ptype->dev || ptype->dev == skb->dev) {
|
|
||||||
if (pt_prev)
|
if (pt_prev)
|
||||||
ret = deliver_skb(skb, pt_prev, orig_dev);
|
ret = deliver_skb(skb, pt_prev, orig_dev);
|
||||||
pt_prev = ptype;
|
pt_prev = ptype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
|
||||||
|
if (pt_prev)
|
||||||
|
ret = deliver_skb(skb, pt_prev, orig_dev);
|
||||||
|
pt_prev = ptype;
|
||||||
}
|
}
|
||||||
|
|
||||||
skip_taps:
|
skip_taps:
|
||||||
@ -3718,19 +3748,21 @@ ncls:
|
|||||||
skb->vlan_tci = 0;
|
skb->vlan_tci = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* deliver only exact match when indicated */
|
|
||||||
null_or_dev = deliver_exact ? skb->dev : NULL;
|
|
||||||
|
|
||||||
type = skb->protocol;
|
type = skb->protocol;
|
||||||
list_for_each_entry_rcu(ptype,
|
|
||||||
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
|
/* deliver only exact match when indicated */
|
||||||
if (ptype->type == type &&
|
if (likely(!deliver_exact)) {
|
||||||
(ptype->dev == null_or_dev || ptype->dev == skb->dev ||
|
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
|
||||||
ptype->dev == orig_dev)) {
|
&ptype_base[ntohs(type) &
|
||||||
if (pt_prev)
|
PTYPE_HASH_MASK]);
|
||||||
ret = deliver_skb(skb, pt_prev, orig_dev);
|
|
||||||
pt_prev = ptype;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
|
||||||
|
&orig_dev->ptype_specific);
|
||||||
|
|
||||||
|
if (unlikely(skb->dev != orig_dev)) {
|
||||||
|
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
|
||||||
|
&skb->dev->ptype_specific);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pt_prev) {
|
if (pt_prev) {
|
||||||
@ -6579,6 +6611,8 @@ void netdev_run_todo(void)
|
|||||||
|
|
||||||
/* paranoia */
|
/* paranoia */
|
||||||
BUG_ON(netdev_refcnt_read(dev));
|
BUG_ON(netdev_refcnt_read(dev));
|
||||||
|
BUG_ON(!list_empty(&dev->ptype_all));
|
||||||
|
BUG_ON(!list_empty(&dev->ptype_specific));
|
||||||
WARN_ON(rcu_access_pointer(dev->ip_ptr));
|
WARN_ON(rcu_access_pointer(dev->ip_ptr));
|
||||||
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
|
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
|
||||||
WARN_ON(dev->dn_ptr);
|
WARN_ON(dev->dn_ptr);
|
||||||
@ -6761,6 +6795,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
|
|||||||
INIT_LIST_HEAD(&dev->adj_list.lower);
|
INIT_LIST_HEAD(&dev->adj_list.lower);
|
||||||
INIT_LIST_HEAD(&dev->all_adj_list.upper);
|
INIT_LIST_HEAD(&dev->all_adj_list.upper);
|
||||||
INIT_LIST_HEAD(&dev->all_adj_list.lower);
|
INIT_LIST_HEAD(&dev->all_adj_list.lower);
|
||||||
|
INIT_LIST_HEAD(&dev->ptype_all);
|
||||||
|
INIT_LIST_HEAD(&dev->ptype_specific);
|
||||||
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
|
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
|
||||||
setup(dev);
|
setup(dev);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user