From 76f084bc10004b3050b2cff9cfac29148f1f6088 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 16 Jul 2014 15:57:34 -0700 Subject: [PATCH 01/27] ipvs: Maintain all DSCP and ECN bits for ipv6 tun forwarding Previously, only the four high bits of the tclass were maintained in the ipv6 case. This matches the behavior of ipv4, though whether or not we should reflect ECN bits may be up for debate. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 73ba1cc7a88d..6f70bdd3a90a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph->nexthdr = IPPROTO_IPV6; iph->payload_len = old_iph->payload_len; be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); - iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); + ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph)); iph->daddr = cp->daddr.in6; iph->saddr = saddr; iph->hop_limit = old_iph->hop_limit; From 8452e6ff3efce76335eba9d29d2a5b3b524b9f1f Mon Sep 17 00:00:00 2001 From: Jiri Prchal Date: Fri, 25 Jul 2014 15:58:33 +0200 Subject: [PATCH 02/27] netfilter: xt_LED: fix too short led-always-blink If led-always-blink is set, then between switch led OFF and ON is almost zero time. So blink is invisible. This use oneshot led trigger with fixed time 50ms witch is enough to see blink. Signed-off-by: Jiri Prchal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_LED.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 993de2ba89d3..92c71cdc4739 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -50,11 +50,14 @@ struct xt_led_info_internal { struct timer_list timer; }; +#define XT_LED_BLINK_DELAY 50 /* ms */ + static unsigned int led_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_led_info *ledinfo = par->targinfo; struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + unsigned long led_delay = XT_LED_BLINK_DELAY; /* * If "always blink" is enabled, and there's still some time until the @@ -62,9 +65,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par) */ if ((ledinfo->delay > 0) && ledinfo->always_blink && timer_pending(&ledinternal->timer)) - led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); - - led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); + led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger, + &led_delay, &led_delay, 1); + else + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); /* If there's a positive delay, start/update the timer */ if (ledinfo->delay > 0) { From dabf24d168ae7da5c3dc8c383db64b200e9ab483 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 28 Jul 2014 15:03:52 +0200 Subject: [PATCH 03/27] bna: fill the magic in bnad_get_eeprom() instead of validating A driver should fill magic field of ethtool_eeprom struct in .get_eeprom and validate it in .set_eeprom. The bna incorrectly validates it in both and this makes its .get_eeprom interface unusable. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 882cad71ad62..d26adac6ab99 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -997,10 +997,8 @@ bnad_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, unsigned long flags = 0; int ret = 0; - /* Check if the flash read request is valid */ - if (eeprom->magic != (bnad->pcidev->vendor | - (bnad->pcidev->device << 16))) - return -EFAULT; + /* Fill the magic value */ + eeprom->magic = bnad->pcidev->vendor | (bnad->pcidev->device << 16); /* Query the flash partition based on the offset */ flash_part = bnad_get_flash_partition_by_offset(bnad, From 95cb5745983c222867cc9ac593aebb2ad67d72c0 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Tue, 29 Jul 2014 03:07:52 +0400 Subject: [PATCH 04/27] ip_tunnel(ipv4): fix tunnels with "local any remote $remote_ip" Ipv4 tunnels created with "local any remote $ip" didn't work properly since 7d442fab0 (ipv4: Cache dst in tunnels). 99% of packets sent via those tunnels had src addr = 0.0.0.0. That was because only dst_entry was cached, although fl4.saddr has to be cached too. Every time ip_tunnel_xmit used cached dst_entry (tunnel_rtable_get returned non-NULL), fl4.saddr was initialized with tnl_params->saddr (= 0 in our case), and wasn't changed until iptunnel_xmit(). This patch adds saddr to ip_tunnel->dst_cache, fixing this issue. Reported-by: Sergey Popov Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + net/ipv4/ip_tunnel.c | 29 ++++++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a4daf9eb8562..8dd8cab88b87 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -40,6 +40,7 @@ struct ip_tunnel_prl_entry { struct ip_tunnel_dst { struct dst_entry __rcu *dst; + __be32 saddr; }; struct ip_tunnel { diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6f9de61dce5f..45920d928341 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -69,23 +69,25 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) } static void __tunnel_dst_set(struct ip_tunnel_dst *idst, - struct dst_entry *dst) + struct dst_entry *dst, __be32 saddr) { struct dst_entry *old_dst; dst_clone(dst); old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); dst_release(old_dst); + idst->saddr = saddr; } -static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) +static void tunnel_dst_set(struct ip_tunnel *t, + struct dst_entry *dst, __be32 saddr) { - __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); + __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr); } static void tunnel_dst_reset(struct ip_tunnel *t) { - tunnel_dst_set(t, NULL); + tunnel_dst_set(t, NULL, 0); } void ip_tunnel_dst_reset_all(struct ip_tunnel *t) @@ -93,20 +95,25 @@ void ip_tunnel_dst_reset_all(struct ip_tunnel *t) int i; for_each_possible_cpu(i) - __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); + __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); } EXPORT_SYMBOL(ip_tunnel_dst_reset_all); -static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) +static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, + u32 cookie, __be32 *saddr) { + struct ip_tunnel_dst *idst; struct dst_entry *dst; rcu_read_lock(); - dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); + idst = this_cpu_ptr(t->dst_cache); + dst = rcu_dereference(idst->dst); if (dst && !atomic_inc_not_zero(&dst->__refcnt)) dst = NULL; if (dst) { - if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + if (!dst->obsolete || dst->ops->check(dst, cookie)) { + *saddr = idst->saddr; + } else { tunnel_dst_reset(t); dst_release(dst); dst = NULL; @@ -367,7 +374,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (!IS_ERR(rt)) { tdev = rt->dst.dev; - tunnel_dst_set(tunnel, &rt->dst); + tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -610,7 +617,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); - rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; + rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; if (!rt) { rt = ip_route_output_key(tunnel->net, &fl4); @@ -620,7 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } if (connected) - tunnel_dst_set(tunnel, &rt->dst); + tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); } if (rt->dst.dev == dev) { From 45a07695bc64b3ab5d6d2215f9677e5b8c05a7d0 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 29 Jul 2014 12:07:27 +0200 Subject: [PATCH 05/27] tcp: Fix integer-overflows in TCP veno In veno we do a multiplication of the cwnd and the rtt. This may overflow and thus their result is stored in a u64. However, we first need to cast the cwnd so that actually 64-bit arithmetic is done. A first attempt at fixing 76f1017757aa0 ([TCP]: TCP Veno congestion control) was made by 159131149c2 (tcp: Overflow bug in Vegas), but it failed to add the required cast in tcp_veno_cong_avoid(). Fixes: 76f1017757aa0 ([TCP]: TCP Veno congestion control) Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_veno.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 27b9825753d1..8276977d2c85 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -144,7 +144,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) rtt = veno->minrtt; - target_cwnd = (tp->snd_cwnd * veno->basertt); + target_cwnd = (u64)tp->snd_cwnd * veno->basertt; target_cwnd <<= V_PARAM_SHIFT; do_div(target_cwnd, rtt); From 1f74e613ded11517db90b2bd57e9464d9e0fb161 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 29 Jul 2014 13:40:57 +0200 Subject: [PATCH 06/27] tcp: Fix integer-overflow in TCP vegas In vegas we do a multiplication of the cwnd and the rtt. This may overflow and thus their result is stored in a u64. However, we first need to cast the cwnd so that actually 64-bit arithmetic is done. Then, we need to do do_div to allow this to be used on 32-bit arches. Cc: Stephen Hemminger Cc: Neal Cardwell Cc: Eric Dumazet Cc: David Laight Cc: Doug Leith Fixes: 8d3a564da34e (tcp: tcp_vegas cong avoid fix) Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 9a5e05f27f4f..b40ad897f945 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -218,7 +218,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) * This is: * (actual rate in segments) * baseRTT */ - target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; + target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT; + do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity From c36c9d50cc6af5c5bfcc195f21b73f55520c15f9 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 29 Jul 2014 16:29:30 +0200 Subject: [PATCH 07/27] bna: fix performance regression The recent commit "e29aa33 bna: Enable Multi Buffer RX" is causing a performance regression. It does not properly update 'cmpl' pointer at the end of the loop in NAPI handler bnad_cq_process(). The result is only one packet / per NAPI-schedule is processed. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 3a77f9ead004..556aab75f490 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -600,9 +600,9 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) prefetch(bnad->netdev); cq = ccb->sw_q; - cmpl = &cq[ccb->producer_index]; while (packets < budget) { + cmpl = &cq[ccb->producer_index]; if (!cmpl->valid) break; /* The 'valid' field is set by the adapter, only after writing From 7afcaec4969652e177cf0b247a1530ac927a20f8 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Tue, 29 Jul 2014 13:27:43 +0200 Subject: [PATCH 08/27] bonding: use kobject_put instead of _del after kobject_add Otherwise the name of the kobject isn't getting freed and other stuff from kobject_cleanup() isn't getting called. kobject_put() will call kobject_del() on its own in kobject_cleanup(). CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs_slave.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index 198677f58ce0..5cd532ca1cfe 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -125,7 +125,7 @@ int bond_sysfs_slave_add(struct slave *slave) for (a = slave_attrs; *a; ++a) { err = sysfs_create_file(&slave->kobj, &((*a)->attr)); if (err) { - kobject_del(&slave->kobj); + kobject_put(&slave->kobj); return err; } } @@ -140,5 +140,5 @@ void bond_sysfs_slave_del(struct slave *slave) for (a = slave_attrs; *a; ++a) sysfs_remove_file(&slave->kobj, &((*a)->attr)); - kobject_del(&slave->kobj); + kobject_put(&slave->kobj); } From 47fab41ab51a18a2e5fc4ec63f16b4c6702809b6 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Wed, 30 Jul 2014 13:31:51 +0900 Subject: [PATCH 09/27] bridge: Don't include NDA_VLAN for FDB entries with vid 0 An FDB entry with vlan_id 0 doesn't mean it is used in vlan 0, but used when vlan_filtering is disabled. There is inconsistency around NDA_VLAN whose payload is 0 - even if we add an entry by RTM_NEWNEIGH without any NDA_VLAN, and even though adding an entry with NDA_VLAN 0 is prohibited, we get an entry with NDA_VLAN 0 by RTM_GETNEIGH. Dumping an FDB entry with vlan_id 0 shouldn't include NDA_VLAN. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b524c36c1273..02359e81f86e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -631,7 +631,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) goto nla_put_failure; return nlmsg_end(skb, nlh); From 081e83a78db9b0ae1f5eabc2dedecc865f509b98 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 31 Jul 2014 10:30:25 -0400 Subject: [PATCH 10/27] macvlan: Initialize vlan_features to turn on offload support. Macvlan devices do not initialize vlan_features. As a result, any vlan devices configured on top of macvlans perform very poorly. Initialize vlan_features based on the vlan features of the lower-level device. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 958df383068a..ef8a5c20236a 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -646,6 +646,7 @@ static int macvlan_init(struct net_device *dev) (lowerdev->state & MACVLAN_STATE_MASK); dev->features = lowerdev->features & MACVLAN_FEATURES; dev->features |= ALWAYS_ON_FEATURES; + dev->vlan_features = lowerdev->vlan_features & MACVLAN_FEATURES; dev->gso_max_size = lowerdev->gso_max_size; dev->iflink = lowerdev->ifindex; dev->hard_header_len = lowerdev->hard_header_len; From db8c8ab61a28d7e3eb86d247b342a853263262c3 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 31 Jul 2014 17:38:22 +0100 Subject: [PATCH 11/27] xen-netfront: fix locking in connect error path If no queues could be created when connecting to the backend, one of the error paths would deadlock. Signed-off-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 055222bae6e4..1cc46d00d20a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2001,7 +2001,7 @@ abort_transaction_no_dev_fatal: info->queues = NULL; rtnl_lock(); netif_set_real_num_tx_queues(info->netdev, 0); - rtnl_lock(); + rtnl_unlock(); out: return err; } From a5b5dc3ce4df4f05f4d81c7d3c56a7604b242093 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 31 Jul 2014 17:38:23 +0100 Subject: [PATCH 12/27] xen-netfront: release per-queue Tx and Rx resource when disconnecting Since netfront may reconnect to a backend with a different number of queues, all per-queue Rx and Tx resources (skbs and grant references) should be freed when disconnecting. Without this fix, the Tx and Rx grant refs are not released and netfront will exhaust them after only a few reconnections. netfront will fail to connect when no free grant references are available. Since all Rx bufs are freed and reallocated instead of reused this will add some additional delay to the reconnection but this is expected to be small compared to the time taken by any backend hotplug scripts etc. Signed-off-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 68 ++++---------------------------------- 1 file changed, 7 insertions(+), 61 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 1cc46d00d20a..0b133a3d4312 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1196,22 +1196,6 @@ static void xennet_release_rx_bufs(struct netfront_queue *queue) spin_unlock_bh(&queue->rx_lock); } -static void xennet_uninit(struct net_device *dev) -{ - struct netfront_info *np = netdev_priv(dev); - unsigned int num_queues = dev->real_num_tx_queues; - struct netfront_queue *queue; - unsigned int i; - - for (i = 0; i < num_queues; ++i) { - queue = &np->queues[i]; - xennet_release_tx_bufs(queue); - xennet_release_rx_bufs(queue); - gnttab_free_grant_references(queue->gref_tx_head); - gnttab_free_grant_references(queue->gref_rx_head); - } -} - static netdev_features_t xennet_fix_features(struct net_device *dev, netdev_features_t features) { @@ -1313,7 +1297,6 @@ static void xennet_poll_controller(struct net_device *dev) static const struct net_device_ops xennet_netdev_ops = { .ndo_open = xennet_open, - .ndo_uninit = xennet_uninit, .ndo_stop = xennet_close, .ndo_start_xmit = xennet_start_xmit, .ndo_change_mtu = xennet_change_mtu, @@ -1455,6 +1438,11 @@ static void xennet_disconnect_backend(struct netfront_info *info) napi_synchronize(&queue->napi); + xennet_release_tx_bufs(queue); + xennet_release_rx_bufs(queue); + gnttab_free_grant_references(queue->gref_tx_head); + gnttab_free_grant_references(queue->gref_rx_head); + /* End access and free the pages */ xennet_end_access(queue->tx_ring_ref, queue->tx.sring); xennet_end_access(queue->rx_ring_ref, queue->rx.sring); @@ -2010,10 +1998,7 @@ static int xennet_connect(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); unsigned int num_queues = 0; - int i, requeue_idx, err; - struct sk_buff *skb; - grant_ref_t ref; - struct xen_netif_rx_request *req; + int err; unsigned int feature_rx_copy; unsigned int j = 0; struct netfront_queue *queue = NULL; @@ -2040,47 +2025,8 @@ static int xennet_connect(struct net_device *dev) netdev_update_features(dev); rtnl_unlock(); - /* By now, the queue structures have been set up */ - for (j = 0; j < num_queues; ++j) { - queue = &np->queues[j]; - - /* Step 1: Discard all pending TX packet fragments. */ - spin_lock_irq(&queue->tx_lock); - xennet_release_tx_bufs(queue); - spin_unlock_irq(&queue->tx_lock); - - /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ - spin_lock_bh(&queue->rx_lock); - - for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { - skb_frag_t *frag; - const struct page *page; - if (!queue->rx_skbs[i]) - continue; - - skb = queue->rx_skbs[requeue_idx] = xennet_get_rx_skb(queue, i); - ref = queue->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(queue, i); - req = RING_GET_REQUEST(&queue->rx, requeue_idx); - - frag = &skb_shinfo(skb)->frags[0]; - page = skb_frag_page(frag); - gnttab_grant_foreign_access_ref( - ref, queue->info->xbdev->otherend_id, - pfn_to_mfn(page_to_pfn(page)), - 0); - req->gref = ref; - req->id = requeue_idx; - - requeue_idx++; - } - - queue->rx.req_prod_pvt = requeue_idx; - - spin_unlock_bh(&queue->rx_lock); - } - /* - * Step 3: All public and private state should now be sane. Get + * All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver * domain a kick because we've probably just requeued some * packets. From 69cb85242f4ff1cbbac5a45c05223600084760e8 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 31 Jul 2014 17:38:24 +0100 Subject: [PATCH 13/27] xen-netfront: print correct number of queues When less than the requested number of queues could be created, include the actual number in the warning (instead of the requested number). Signed-off-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 0b133a3d4312..28204bc4f369 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1815,8 +1815,8 @@ static int xennet_create_queues(struct netfront_info *info, ret = xennet_init_queue(queue); if (ret < 0) { - dev_warn(&info->netdev->dev, "only created %d queues\n", - num_queues); + dev_warn(&info->netdev->dev, + "only created %d queues\n", i); num_queues = i; break; } From fcdfe3a7fa4cb74391d42b6a26dc07c20dab1d82 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 31 Jul 2014 10:33:06 -0400 Subject: [PATCH 14/27] net: Correctly set segment mac_len in skb_segment(). When performing segmentation, the mac_len value is copied right out of the original skb. However, this value is not always set correctly (like when the packet is VLAN-tagged) and we'll end up copying a bad value. One way to demonstrate this is to configure a VM which tags packets internally and turn off VLAN acceleration on the forwarding bridge port. The packets show up corrupt like this: 16:18:24.985548 52:54:00:ab:be:25 > 52:54:00:26:ce:a3, ethertype 802.1Q (0x8100), length 1518: vlan 100, p 0, ethertype 0x05e0, 0x0000: 8cdb 1c7c 8cdb 0064 4006 b59d 0a00 6402 ...|...d@.....d. 0x0010: 0a00 6401 9e0d b441 0a5e 64ec 0330 14fa ..d....A.^d..0.. 0x0020: 29e3 01c9 f871 0000 0101 080a 000a e833)....q.........3 0x0030: 000f 8c75 6e65 7470 6572 6600 6e65 7470 ...unetperf.netp 0x0040: 6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp 0x0050: 6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp 0x0060: 6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp ... This also leads to awful throughput as GSO packets are dropped and cause retransmissions. The solution is to set the mac_len using the values already available in then new skb. We've already adjusted all of the header offset, so we might as well correctly figure out the mac_len using skb_reset_mac_len(). After this change, packets are segmented correctly and performance is restored. CC: Eric Dumazet Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c1a33033cbe2..58ff88edbefd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2976,9 +2976,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, tail = nskb; __copy_skb_header(nskb, head_skb); - nskb->mac_len = head_skb->mac_len; skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); + skb_reset_mac_len(nskb); skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, From 0dc1362562a2e8b82a6be8d3ae307a234f28f9bc Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 1 Aug 2014 17:25:38 +0200 Subject: [PATCH 15/27] netfilter: nf_tables: Avoid duplicate call to nft_data_uninit() for same key nft_del_setelem() currently calls nft_data_uninit() twice on the same key. Once to release the key which is guaranteed to be NFT_DATA_VALUE and a second time in the error path to which it falls through. The second call has been harmless so far though because the type passed is always NFT_DATA_VALUE which is currently a no-op. Signed-off-by: Thomas Graf Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8746ff9a8357..b35ba833e13c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3218,6 +3218,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (set->flags & NFT_SET_MAP) nft_data_uninit(&elem.data, set->dtype); + return 0; err2: nft_data_uninit(&elem.key, desc.type); err1: From 06ebb06d49486676272a3c030bfeef4bd969a8e6 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 31 Jul 2014 23:00:35 -0400 Subject: [PATCH 16/27] iovec: make sure the caller actually wants anything in memcpy_fromiovecend Check for cases when the caller requests 0 bytes instead of running off and dereferencing potentially invalid iovecs. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- lib/iovec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/iovec.c b/lib/iovec.c index 7a7c2da4cddf..df3abd1eaa4a 100644 --- a/lib/iovec.c +++ b/lib/iovec.c @@ -85,6 +85,10 @@ EXPORT_SYMBOL(memcpy_toiovecend); int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len) { + /* No data? Done! */ + if (len == 0) + return 0; + /* Skip over the finished iovecs */ while (offset >= iov->iov_len) { offset -= iov->iov_len; From dbcdd4d58c7230bea3157d56d6ef77c493b3865b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 1 Aug 2014 14:01:51 +0200 Subject: [PATCH 17/27] cdc_subset: deal with a device that needs reset for timeout This device needs to be reset to recover from a timeout. Unfortunately this can be handled only at the level of the subdrivers. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_subset.c | 27 ++++++++++++++++++++++++++- drivers/net/usb/usbnet.c | 8 ++++++-- include/linux/usb/usbnet.h | 3 +++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/cdc_subset.c b/drivers/net/usb/cdc_subset.c index 91f0919fe278..6ea98cff2d3b 100644 --- a/drivers/net/usb/cdc_subset.c +++ b/drivers/net/usb/cdc_subset.c @@ -85,14 +85,28 @@ static int always_connected (struct usbnet *dev) * *-------------------------------------------------------------------------*/ +static void m5632_recover(struct usbnet *dev) +{ + struct usb_device *udev = dev->udev; + struct usb_interface *intf = dev->intf; + int r; + + r = usb_lock_device_for_reset(udev, intf); + if (r < 0) + return; + + usb_reset_device(udev); + usb_unlock_device(udev); +} + static const struct driver_info ali_m5632_info = { .description = "ALi M5632", .flags = FLAG_POINTTOPOINT, + .recover = m5632_recover, }; #endif - #ifdef CONFIG_USB_AN2720 #define HAVE_HARDWARE @@ -326,12 +340,23 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); /*-------------------------------------------------------------------------*/ +static int dummy_prereset(struct usb_interface *intf) +{ + return 0; +} + +static int dummy_postreset(struct usb_interface *intf) +{ + return 0; +} static struct usb_driver cdc_subset_driver = { .name = "cdc_subset", .probe = usbnet_probe, .suspend = usbnet_suspend, .resume = usbnet_resume, + .pre_reset = dummy_prereset, + .post_reset = dummy_postreset, .disconnect = usbnet_disconnect, .id_table = products, .disable_hub_initiated_lpm = 1, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index f9e96c427558..5173821a9575 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1218,8 +1218,12 @@ void usbnet_tx_timeout (struct net_device *net) unlink_urbs (dev, &dev->txq); tasklet_schedule (&dev->bh); - - // FIXME: device recovery -- reset? + /* this needs to be handled individually because the generic layer + * doesn't know what is sufficient and could not restore private + * information if a remedy of an unconditional reset were used. + */ + if (dev->driver_info->recover) + (dev->driver_info->recover)(dev); } EXPORT_SYMBOL_GPL(usbnet_tx_timeout); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0662e98fef72..26088feb6608 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -148,6 +148,9 @@ struct driver_info { struct sk_buff *(*tx_fixup)(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); + /* recover from timeout */ + void (*recover)(struct usbnet *dev); + /* early initialization code, can sleep. This is for minidrivers * having 'subminidrivers' that need to do extra initialization * right after minidriver have initialized hardware. */ From 166bd890a3d851b7cfdf3e417917878bfe4671f1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 1 Aug 2014 14:41:10 +0200 Subject: [PATCH 18/27] ipv6: data of fwmark_reflect sysctl needs to be updated on netns construction Fixes: e110861f86094cd ("net: add a sysctl to reflect the fwmark on replies") Cc: Lorenzo Colitti Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 058f3eca2e53..818334619abb 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -74,6 +74,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; + ipv6_table[3].data = &net->ipv6.sysctl.fwmark_reflect; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) From 4f933f414bf629852f361edf0fc5e765e3e78388 Mon Sep 17 00:00:00 2001 From: JF Le Fillatre Date: Sat, 2 Aug 2014 01:26:40 +0200 Subject: [PATCH 19/27] r8152: add missing Makefile rule Add missing Makefile rule for r8152 driver In the current kernel the r8152 driver is *never* built because of a missing rule in drivers/net/Makefile, despite being selected as built-in or module in the .config file. There is no error message or warning to indicate that the driver isn't built. This change adds the rule and lets the driver build. Tested as built-in and module for 3.15.8. Signed-off by: JF Le Fillatre Signed-off-by: David S. Miller --- drivers/net/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 3fef8a81c0f6..331d9cfa6193 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_USB_CATC) += usb/ obj-$(CONFIG_USB_KAWETH) += usb/ obj-$(CONFIG_USB_PEGASUS) += usb/ obj-$(CONFIG_USB_RTL8150) += usb/ +obj-$(CONFIG_USB_RTL8152) += usb/ obj-$(CONFIG_USB_HSO) += usb/ obj-$(CONFIG_USB_USBNET) += usb/ obj-$(CONFIG_USB_ZD1201) += usb/ From d9124268d84a836f14a6ead54ff9d8eee4c43be5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 26 May 2014 17:21:39 +0200 Subject: [PATCH 20/27] batman-adv: Fix out-of-order fragmentation support batadv_frag_insert_packet was unable to handle out-of-order packets because it dropped them directly. This is caused by the way the fragmentation lists is checked for the correct place to insert a fragmentation entry. The fragmentation code keeps the fragments in lists. The fragmentation entries are kept in descending order of sequence number. The list is traversed and each entry is compared with the new fragment. If the current entry has a smaller sequence number than the new fragment then the new one has to be inserted before the current entry. This ensures that the list is still in descending order. An out-of-order packet with a smaller sequence number than all entries in the list still has to be added to the end of the list. The used hlist has no information about the last entry in the list inside hlist_head and thus the last entry has to be calculated differently. Currently the code assumes that the iterator variable of hlist_for_each_entry can be used for this purpose after the hlist_for_each_entry finished. This is obviously wrong because the iterator variable is always NULL when the list was completely traversed. Instead the information about the last entry has to be stored in a different variable. This problem was introduced in 610bfc6bc99bc83680d190ebc69359a05fc7f605 ("batman-adv: Receive fragmented packets and merge"). Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/fragmentation.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index f14e54a05691..022d18ab27a6 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -128,6 +128,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, { struct batadv_frag_table_entry *chain; struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr; + struct batadv_frag_list_entry *frag_entry_last = NULL; struct batadv_frag_packet *frag_packet; uint8_t bucket; uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet); @@ -180,11 +181,14 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, ret = true; goto out; } + + /* store current entry because it could be the last in list */ + frag_entry_last = frag_entry_curr; } - /* Reached the end of the list, so insert after 'frag_entry_curr'. */ - if (likely(frag_entry_curr)) { - hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list); + /* Reached the end of the list, so insert after 'frag_entry_last'. */ + if (likely(frag_entry_last)) { + hlist_add_after(&frag_entry_last->list, &frag_entry_new->list); chain->size += skb->len - hdr_size; chain->timestamp = jiffies; ret = true; From a0eaf75c03712b491b7a840b5836c8f1e2a09277 Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Mon, 4 Aug 2014 11:51:16 -0400 Subject: [PATCH 21/27] qlcnic: Fix update of ethtool stats. o Aggregating tx stats in adapter variable was resulting in an increase in stats even after no traffic was run and user runs ifconfig/ethtool command. o qlcnic_update_stats used to accumulate stats in adapter struct at each function call, instead accumulate tx stats in local variable and then assign it to adapter structure. Reported-by: Holger Kiehl Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller --- .../ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 1b7f3dbae289..141f116eb868 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1290,17 +1290,25 @@ static u64 *qlcnic_fill_stats(u64 *data, void *stats, int type) void qlcnic_update_stats(struct qlcnic_adapter *adapter) { + struct qlcnic_tx_queue_stats tx_stats; struct qlcnic_host_tx_ring *tx_ring; int ring; + memset(&tx_stats, 0, sizeof(tx_stats)); for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - adapter->stats.xmit_on += tx_ring->tx_stats.xmit_on; - adapter->stats.xmit_off += tx_ring->tx_stats.xmit_off; - adapter->stats.xmitcalled += tx_ring->tx_stats.xmit_called; - adapter->stats.xmitfinished += tx_ring->tx_stats.xmit_finished; - adapter->stats.txbytes += tx_ring->tx_stats.tx_bytes; + tx_stats.xmit_on += tx_ring->tx_stats.xmit_on; + tx_stats.xmit_off += tx_ring->tx_stats.xmit_off; + tx_stats.xmit_called += tx_ring->tx_stats.xmit_called; + tx_stats.xmit_finished += tx_ring->tx_stats.xmit_finished; + tx_stats.tx_bytes += tx_ring->tx_stats.tx_bytes; } + + adapter->stats.xmit_on = tx_stats.xmit_on; + adapter->stats.xmit_off = tx_stats.xmit_off; + adapter->stats.xmitcalled = tx_stats.xmit_called; + adapter->stats.xmitfinished = tx_stats.xmit_finished; + adapter->stats.txbytes = tx_stats.tx_bytes; } static u64 *qlcnic_fill_tx_queue_stats(u64 *data, void *stats) From bf63014f108aaff49e4382b7adc7d0a2b6365744 Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Mon, 4 Aug 2014 11:51:17 -0400 Subject: [PATCH 22/27] qlcnic: Set driver version before registering netdev o Earlier, set_drv_version was getting called after register_netdev. This was resulting in a race between set_drv_version and FLR called from open(). Moving set_drv_version before register_netdev avoids the race. o Log response code in error message on CDRP failure. Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index 304e247bdf33..ffbae293cef5 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -136,7 +136,7 @@ int qlcnic_82xx_issue_cmd(struct qlcnic_adapter *adapter, rsp = qlcnic_poll_rsp(adapter); if (rsp == QLCNIC_CDRP_RSP_TIMEOUT) { - dev_err(&pdev->dev, "card response timeout.\n"); + dev_err(&pdev->dev, "command timeout, response = 0x%x\n", rsp); cmd->rsp.arg[0] = QLCNIC_RCODE_TIMEOUT; } else if (rsp == QLCNIC_CDRP_RSP_FAIL) { cmd->rsp.arg[0] = QLCRD32(adapter, QLCNIC_CDRP_ARG(1), &err); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 4fc186713b66..158e1d9f255f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2623,13 +2623,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_out_disable_mbx_intr; + if (adapter->portnum == 0) + qlcnic_set_drv_version(adapter); + err = qlcnic_setup_netdev(adapter, netdev, pci_using_dac); if (err) goto err_out_disable_mbx_intr; - if (adapter->portnum == 0) - qlcnic_set_drv_version(adapter); - pci_set_drvdata(pdev, adapter); if (qlcnic_82xx_check(adapter)) From cd1560e2b60fc2fbfadb9200c366eb59fe04f10d Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Mon, 4 Aug 2014 11:51:18 -0400 Subject: [PATCH 23/27] qlcnic: Initialize dcbnl_ops before register_netdev o Initialization of dcbnl_ops after register netdev may result in dcbnl_ops not getting set before it is being accessed from open. So, moving it before register_netdev. Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 158e1d9f255f..3187bc0c471b 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2323,14 +2323,14 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, if (err) return err; + qlcnic_dcb_init_dcbnl_ops(adapter->dcb); + err = register_netdev(netdev); if (err) { dev_err(&pdev->dev, "failed to register net device\n"); return err; } - qlcnic_dcb_init_dcbnl_ops(adapter->dcb); - return 0; } From ce7991e8198b80eb6b4441b6f6114bea4a665d66 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 5 Aug 2014 08:13:42 -0300 Subject: [PATCH 24/27] Revert "net: phy: Set the driver when registering an MDIO bus device" Commit a71e3c37960ce5f9 ("net: phy: Set the driver when registering an MDIO bus device") caused the following regression on the fec driver: root@imx6qsabresd:~# echo mem > /sys/power/state PM: Syncing filesystems ... done. Freezing user space processes ... (elapsed 0.003 seconds) done. Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done. Unable to handle kernel NULL pointer dereference at virtual address 0000002c pgd = bcd14000 [0000002c] *pgd=4d9e0831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] SMP ARM Modules linked in: CPU: 0 PID: 617 Comm: sh Not tainted 3.16.0 #17 task: bc0c4e00 ti: bceb6000 task.ti: bceb6000 PC is at fec_suspend+0x10/0x70 LR is at dpm_run_callback.isra.7+0x34/0x6c pc : [<803f8a98>] lr : [<80361f44>] psr: 600f0013 sp : bceb7d70 ip : bceb7d88 fp : bceb7d84 r10: 8091523c r9 : 00000000 r8 : bd88f478 r7 : 803f8a88 r6 : 81165988 r5 : 00000000 r4 : 00000000 r3 : 00000000 r2 : 00000000 r1 : bd88f478 r0 : bd88f478 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 10c5387d Table: 4cd1404a DAC: 00000015 Process sh (pid: 617, stack limit = 0xbceb6240) Stack: (0xbceb7d70 to 0xbceb8000) .... The problem with the original commit is explained by Russell King: "It has the effect (as can be seen from the oops) of attaching the MDIO bus device (itself is a bus-less device) to the platform driver, which means that if the platform driver supports power management, it will be called to power manage the MDIO bus device. Moreover, drivers do not expect to be called for power management operations for devices which they haven't probed, and certainly not for devices which aren't part of the same bus that the driver is registered against." This reverts commit a71e3c37960ce5f9c6a519bc1215e3ba9fa83e75. Cc: #3.16 Signed-off-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 203651ebccb0..4eaadcfcb0fe 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -255,7 +255,6 @@ int mdiobus_register(struct mii_bus *bus) bus->dev.parent = bus->parent; bus->dev.class = &mdio_bus_class; - bus->dev.driver = bus->parent->driver; bus->dev.groups = NULL; dev_set_name(&bus->dev, "%s", bus->id); From 757efd32d5ce31f67193cc0e6a56e4dffcc42fb1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Aug 2014 16:49:52 +0200 Subject: [PATCH 25/27] sctp: fix possible seqlock seadlock in sctp_packet_transmit() Dave reported following splat, caused by improper use of IP_INC_STATS_BH() in process context. BUG: using __this_cpu_add() in preemptible [00000000] code: trinity-c117/14551 caller is __this_cpu_preempt_check+0x13/0x20 CPU: 3 PID: 14551 Comm: trinity-c117 Not tainted 3.16.0+ #33 ffffffff9ec898f0 0000000047ea7e23 ffff88022d32f7f0 ffffffff9e7ee207 0000000000000003 ffff88022d32f818 ffffffff9e397eaa ffff88023ee70b40 ffff88022d32f970 ffff8801c026d580 ffff88022d32f828 ffffffff9e397ee3 Call Trace: [] dump_stack+0x4e/0x7a [] check_preemption_disabled+0xfa/0x100 [] __this_cpu_preempt_check+0x13/0x20 [] sctp_packet_transmit+0x692/0x710 [sctp] [] sctp_outq_flush+0x2a2/0xc30 [sctp] [] ? mark_held_locks+0x7c/0xb0 [] ? _raw_spin_unlock_irqrestore+0x5d/0x80 [] sctp_outq_uncork+0x1a/0x20 [sctp] [] sctp_cmd_interpreter.isra.23+0x1142/0x13f0 [sctp] [] sctp_do_sm+0xdb/0x330 [sctp] [] ? preempt_count_sub+0xab/0x100 [] ? sctp_cname+0x70/0x70 [sctp] [] sctp_primitive_ASSOCIATE+0x3a/0x50 [sctp] [] sctp_sendmsg+0x88f/0xe30 [sctp] [] ? lock_release_holdtime.part.28+0x9a/0x160 [] ? put_lock_stats.isra.27+0xe/0x30 [] inet_sendmsg+0x104/0x220 [] ? inet_sendmsg+0x5/0x220 [] sock_sendmsg+0x9e/0xe0 [] ? might_fault+0xb9/0xc0 [] ? might_fault+0x5e/0xc0 [] SYSC_sendto+0x124/0x1c0 [] ? syscall_trace_enter+0x250/0x330 [] SyS_sendto+0xe/0x10 [] tracesys+0xdd/0xe2 This is a followup of commits f1d8cba61c3c4b ("inet: fix possible seqlock deadlocks") and 7f88c6b23afbd315 ("ipv6: fix possible seqlock deadlock in ip6_finish_output2") Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Reported-by: Dave Jones Acked-by: Neil Horman Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 01ab8e0723f0..407ae2bf97b0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -599,7 +599,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the From 2670cc699a66c4cf268cb3e3f6dfc325ec14f224 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Aug 2014 16:44:39 +0100 Subject: [PATCH 26/27] net: sun4i-emac: fix memory leak on bad packet Upon reception of a new frame, the emac driver checks for a number of error conditions, and flag the packet as "bad" if any of these are present. It then allocates a skb unconditionally, but only uses it if the packet is "good". On the error path, the skb is just forgotten, and the system leaks memory. The piece of junk I have on my desk seems to encounter such error frequently enough so that the box goes OOM after a couple of days, which makes me grumpy. Fix this by moving the allocation on the "good_packet" path (and convert it to netdev_alloc_skb while we're at it). Tested on a random Allwinner A20 board. Cc: Stefan Roese Cc: Maxime Ripard Cc: # 3.11+ Signed-off-by: Marc Zyngier Acked-by: Maxime Ripard Signed-off-by: David S. Miller --- drivers/net/ethernet/allwinner/sun4i-emac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index d81e7167a8b5..29b9f082475d 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -633,8 +633,10 @@ static void emac_rx(struct net_device *dev) } /* Move data from EMAC */ - skb = dev_alloc_skb(rxlen + 4); - if (good_packet && skb) { + if (good_packet) { + skb = netdev_alloc_skb(dev, rxlen + 4); + if (!skb) + continue; skb_reserve(skb, 2); rdptr = (u8 *) skb_put(skb, rxlen - 4); From 4d8fdc95c60e90d84c8257a0067ff4b1729a3757 Mon Sep 17 00:00:00 2001 From: Prashant Sreedharan Date: Tue, 5 Aug 2014 16:02:02 -0700 Subject: [PATCH 27/27] tg3: Modify tg3_tso_bug() to handle multiple TX rings tg3_tso_bug() was originally designed to handle only HW TX ring 0, Commit d3f6f3a1d818410c17445bce4f4caab52eb102f1 ("tg3: Prevent page allocation failure during TSO workaround") changed the driver logic to use tg3_tso_bug() for all HW TX rings that are enabled. This patch fixes the regression by modifying tg3_tso_bug() to handle multiple HW TX rings. Signed-off-by: Prashant Sreedharan Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 8afa579e7c40..a3dd5dc64f4c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7830,17 +7830,18 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); -/* Use GSO to workaround a rare TSO bug that may be triggered when the - * TSO header is greater than 80 bytes. +/* Use GSO to workaround all TSO packets that meet HW bug conditions + * indicated in tg3_tx_frag_set() */ -static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) +static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, + struct netdev_queue *txq, struct sk_buff *skb) { struct sk_buff *segs, *nskb; u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3; /* Estimate the number of fragments in the worst case */ - if (unlikely(tg3_tx_avail(&tp->napi[0]) <= frag_cnt_est)) { - netif_stop_queue(tp->dev); + if (unlikely(tg3_tx_avail(tnapi) <= frag_cnt_est)) { + netif_tx_stop_queue(txq); /* netif_tx_stop_queue() must be done before checking * checking tx index in tg3_tx_avail() below, because in @@ -7848,13 +7849,14 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) * netif_tx_queue_stopped(). */ smp_mb(); - if (tg3_tx_avail(&tp->napi[0]) <= frag_cnt_est) + if (tg3_tx_avail(tnapi) <= frag_cnt_est) return NETDEV_TX_BUSY; - netif_wake_queue(tp->dev); + netif_tx_wake_queue(txq); } - segs = skb_gso_segment(skb, tp->dev->features & ~(NETIF_F_TSO | NETIF_F_TSO6)); + segs = skb_gso_segment(skb, tp->dev->features & + ~(NETIF_F_TSO | NETIF_F_TSO6)); if (IS_ERR(segs) || !segs) goto tg3_tso_bug_end; @@ -7930,7 +7932,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!skb_is_gso_v6(skb)) { if (unlikely((ETH_HLEN + hdr_len) > 80) && tg3_flag(tp, TSO_BUG)) - return tg3_tso_bug(tp, skb); + return tg3_tso_bug(tp, tnapi, txq, skb); ip_csum = iph->check; ip_tot_len = iph->tot_len; @@ -8061,7 +8063,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) iph->tot_len = ip_tot_len; } tcph->check = tcp_csum; - return tg3_tso_bug(tp, skb); + return tg3_tso_bug(tp, tnapi, txq, skb); } /* If the workaround fails due to memory/mapping