From 628e341f319f1a64a4639088faba952e4ec8f0a8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 14 Aug 2013 13:05:23 +0200 Subject: [PATCH 01/80] xfrm: make local error reporting more robust In xfrm4 and xfrm6 we need to take care about sockets of the other address family. This could happen because a 6in4 or 4in6 tunnel could get protected by ipsec. Because we don't want to have a run-time dependency on ipv6 when only using ipv4 xfrm we have to embed a pointer to the correct local_error function in xfrm_state_afinet and look it up when returning an error depending on the socket address family. Thanks to vi0ss for the great bug report: v2: a) fix two more unsafe interpretations of skb->sk as ipv6 socket (xfrm6_local_dontfrag and __xfrm6_output) v3: a) add an EXPORT_SYMBOL_GPL(xfrm_local_error) to fix a link error when building ipv6 as a module (thanks to Steffen Klassert) Reported-by: Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 6 ++++++ net/ipv4/xfrm4_output.c | 12 ++++++++++-- net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_output.c | 10 ++++++---- net/ipv6/xfrm6_state.c | 1 + net/xfrm/xfrm_output.c | 13 +++++++++++++ net/xfrm/xfrm_state.c | 7 ++----- 7 files changed, 39 insertions(+), 11 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 94ce082b29dc..e823786e7c66 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -341,10 +341,13 @@ struct xfrm_state_afinfo { struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); + void (*local_error)(struct sk_buff *skb, u32 mtu); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); +extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -1477,6 +1480,7 @@ extern int xfrm_input_resume(struct sk_buff *skb, int nexthdr); extern int xfrm_output_resume(struct sk_buff *skb, int err); extern int xfrm_output(struct sk_buff *skb); extern int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); +extern void xfrm_local_error(struct sk_buff *skb, int mtu); extern int xfrm4_extract_header(struct sk_buff *skb); extern int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -1497,6 +1501,7 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short fam extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler); extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler); +extern void xfrm4_local_error(struct sk_buff *skb, u32 mtu); extern int xfrm6_extract_header(struct sk_buff *skb); extern int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); @@ -1514,6 +1519,7 @@ extern int xfrm6_output(struct sk_buff *skb); extern int xfrm6_output_finish(struct sk_buff *skb); extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); +extern void xfrm6_local_error(struct sk_buff *skb, u32 mtu); #ifdef CONFIG_XFRM extern int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 327a617d594c..7a5491ffa4de 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -33,8 +33,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) mtu = dst_mtu(dst); if (skb->len > mtu) { if (skb->sk) - ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr, - inet_sk(skb->sk)->inet_dport, mtu); + xfrm_local_error(skb, mtu); else icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -99,3 +98,12 @@ int xfrm4_output(struct sk_buff *skb) x->outer_mode->afinfo->output_finish, !(IPCB(skb)->flags & IPSKB_REROUTED)); } + +void xfrm4_local_error(struct sk_buff *skb, u32 mtu) +{ + struct iphdr *hdr; + + hdr = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ip_local_error(skb->sk, EMSGSIZE, hdr->daddr, + inet_sk(skb->sk)->inet_dport, mtu); +} diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 9258e751baba..0b2a0641526a 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -83,6 +83,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, + .local_error = xfrm4_local_error, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8755a3079d0f..b64fff30eb06 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -34,8 +34,10 @@ static int xfrm6_local_dontfrag(struct sk_buff *skb) struct sock *sk = skb->sk; if (sk) { - proto = sk->sk_protocol; + if (sk->sk_family != AF_INET6) + return 0; + proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) return inet6_sk(sk)->dontfrag; } @@ -54,7 +56,7 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) ipv6_local_rxpmtu(sk, &fl6, mtu); } -static void xfrm6_local_error(struct sk_buff *skb, u32 mtu) +void xfrm6_local_error(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; struct sock *sk = skb->sk; @@ -80,7 +82,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (xfrm6_local_dontfrag(skb)) xfrm6_local_rxpmtu(skb, mtu); else if (skb->sk) - xfrm6_local_error(skb, mtu); + xfrm_local_error(skb, mtu); else icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; @@ -142,7 +144,7 @@ static int __xfrm6_output(struct sk_buff *skb) xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; } else if (!skb->local_df && skb->len > mtu && skb->sk) { - xfrm6_local_error(skb, mtu); + xfrm_local_error(skb, mtu); return -EMSGSIZE; } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index d8c70b8efc24..3fc970135fc6 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -183,6 +183,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, + .local_error = xfrm6_local_error, }; int __init xfrm6_state_init(void) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index eb4a84288648..6f5fc612b162 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -214,5 +214,18 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) return inner_mode->afinfo->extract_output(x, skb); } +void xfrm_local_error(struct sk_buff *skb, int mtu) +{ + struct xfrm_state_afinfo *afinfo; + + afinfo = xfrm_state_get_afinfo(skb->sk->sk_family); + if (!afinfo) + return; + + afinfo->local_error(skb, mtu); + xfrm_state_put_afinfo(afinfo); +} + EXPORT_SYMBOL_GPL(xfrm_output); EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); +EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 78f66fa92449..54c0acd29468 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -39,9 +39,6 @@ static DEFINE_SPINLOCK(xfrm_state_lock); static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); - static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -1860,7 +1857,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1872,7 +1869,7 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) return afinfo; } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { rcu_read_unlock(); } From 0ea9d5e3e0e03a63b11392f5613378977dae7eca Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 13 Aug 2013 04:35:58 +0200 Subject: [PATCH 02/80] xfrm: introduce helper for safe determination of mtu skb->sk socket can be of AF_INET or AF_INET6 address family. Thus we always have to make sure we a referring to the correct interpretation of skb->sk. We only depend on header defines to query the mtu, so we don't introduce a new dependency to ipv6 by this change. Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/route.h | 8 ++++++++ include/net/xfrm.h | 12 ++++++++++++ net/ipv4/ip_output.c | 8 -------- net/ipv4/xfrm4_output.c | 4 +--- net/ipv6/xfrm6_output.c | 5 ++++- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 2ea40c1b5e00..afdeeb5bec25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -317,4 +317,12 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } +static inline int ip_skb_dst_mtu(struct sk_buff *skb) +{ + struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; + + return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +} + #endif /* _ROUTE_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e823786e7c66..b41d2d10ff0e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1723,4 +1724,15 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } +static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + if (sk && sk->sk_family == AF_INET6) + return ip6_skb_dst_mtu(skb); + else if (sk && sk->sk_family == AF_INET) + return ip_skb_dst_mtu(skb); + return dst_mtu(skb_dst(skb)); +} + #endif /* _NET_XFRM_H */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4bcabf3ab4ca..9ee17e3d11c3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -211,14 +211,6 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -static inline int ip_skb_dst_mtu(struct sk_buff *skb) -{ - struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; - - return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} - static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 7a5491ffa4de..80baf4a3b1b5 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -21,7 +21,6 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; - struct dst_entry *dst; if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; @@ -29,8 +28,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) goto out; - dst = skb_dst(skb); - mtu = dst_mtu(dst); + mtu = xfrm_skb_dst_mtu(skb); if (skb->len > mtu) { if (skb->sk) xfrm_local_error(skb, mtu); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b64fff30eb06..3ac5ab264fed 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -138,7 +138,10 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu = ip6_skb_dst_mtu(skb); + int mtu = xfrm_skb_dst_mtu(skb); + + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); From 3f0fa9a808f98fa10a18ba2a73f13d65fda990fb Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 14 Aug 2013 16:05:02 -0700 Subject: [PATCH 03/80] regmap: Add another missing header for !CONFIG_REGMAP stubs The use of WARN_ON() needs the definitions from bug.h, without it you can get: include/linux/regmap.h: In function 'regmap_write': include/linux/regmap.h:525:2: error: implicit declaration of function 'WARN_ONCE' [-Werror=implicit-function-declaration] Signed-off-by: Kevin Hilman Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 580a5320cc96..6d91fcb4c5cb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -16,6 +16,7 @@ #include #include #include +#include struct module; struct device; From 44512449c0ab368889dd13ae0031fba74ee7e1d2 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 15 Aug 2013 15:36:49 -0500 Subject: [PATCH 04/80] jfs: fix readdir cookie incompatibility with NFSv4 NFSv4 reserves readdir cookie values 0-2 for special entries (. and ..), but jfs allows a value of 2 for a non-special entry. This incompatibility can result in the nfs client reporting a readdir loop. This patch doesn't change the value stored internally, but adds one to the value exposed to the iterate method. Signed-off-by: Dave Kleikamp Tested-by: Christian Kujau --- fs/jfs/jfs_dtree.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 8743ba9c6742..984c2bbf4f61 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3047,6 +3047,14 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) dir_index = (u32) ctx->pos; + /* + * NFSv4 reserves cookies 1 and 2 for . and .. so the value + * we return to the vfs is one greater than the one we use + * internally. + */ + if (dir_index) + dir_index--; + if (dir_index > 1) { struct dir_table_slot dirtab_slot; @@ -3086,7 +3094,7 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) if (p->header.flag & BT_INTERNAL) { jfs_err("jfs_readdir: bad index table"); DT_PUTPAGE(mp); - ctx->pos = -1; + ctx->pos = DIREND; return 0; } } else { @@ -3094,14 +3102,14 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) /* * self "." */ - ctx->pos = 0; + ctx->pos = 1; if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) return 0; } /* * parent ".." */ - ctx->pos = 1; + ctx->pos = 2; if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) return 0; @@ -3122,22 +3130,23 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." + * pn = 0; index = 1: First entry "." + * pn = 0; index = 2: Second entry ".." * pn > 0: Real entries, pn=1 -> leftmost page * pn = index = -1: No more entries */ dtpos = ctx->pos; - if (dtpos == 0) { + if (dtpos < 2) { /* build "." entry */ + ctx->pos = 1; if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) return 0; - dtoffset->index = 1; + dtoffset->index = 2; ctx->pos = dtpos; } if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { + if (dtoffset->index == 2) { /* build ".." entry */ if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) return 0; @@ -3228,6 +3237,12 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) } jfs_dirent->position = unique_pos++; } + /* + * We add 1 to the index because we may + * use a value of 2 internally, and NFSv4 + * doesn't like that. + */ + jfs_dirent->position++; } else { jfs_dirent->position = dtpos; len = min(d_namleft, DTLHDRDATALEN_LEGACY); From 3d483058c8c8b87a167155ca9ddd776dd730bc39 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 18 Aug 2013 13:46:52 +0200 Subject: [PATCH 05/80] ipv6: wire up skb->encapsulation When pushing a new header before current one call skb_reset_inner_headers to record the position of the inner headers in the various ipv6 tunnel protocols. We later need this to correctly identify the addresses needed to send back an error in the xfrm layer. This change is safe, because skb->protocol is always checked before dereferencing data from the inner protocol. Cc: Steffen Klassert Cc: YOSHIFUJI Hideaki Cc: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- net/ipv6/ip6_gre.c | 5 +++++ net/ipv6/ip6_tunnel.c | 6 ++++++ net/ipv6/sit.c | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ecd60733e5e2..90747f1973fe 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -724,6 +724,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb_push(skb, gre_hlen); skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(*ipv6h)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1e55866cead7..46ba243605a3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1027,6 +1027,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a3437a4cd07e..fbfc5a83867f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -888,6 +888,11 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); From 5d0ff542d0264f61dc4bdb34eba39ffb4ea3bc23 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 18 Aug 2013 13:46:57 +0200 Subject: [PATCH 06/80] ipv6: xfrm: dereference inner ipv6 header if encapsulated In xfrm6_local_error use inner_header if the packet was encapsulated. Cc: Steffen Klassert Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 3ac5ab264fed..e092e306882d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -59,10 +59,12 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) void xfrm6_local_error(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; + const struct ipv6hdr *hdr; struct sock *sk = skb->sk; + hdr = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); fl6.fl6_dport = inet_sk(sk)->inet_dport; - fl6.daddr = ipv6_hdr(skb)->daddr; + fl6.daddr = hdr->daddr; ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); } From 844d48746e4b281a933aedc0428048a1219b42f4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 18 Aug 2013 13:47:01 +0200 Subject: [PATCH 07/80] xfrm: choose protocol family by skb protocol We need to choose the protocol family by skb->protocol. Otherwise we call the wrong xfrm{4,6}_local_error handler in case an ipv6 sockets is used in ipv4 mode, in which case we should call down to xfrm4_local_error (ip6 sockets are a superset of ip4 ones). We are called before before ip_output functions, so skb->protocol is not reset. Cc: Steffen Klassert Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 ++-- net/xfrm/xfrm_output.c | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b41d2d10ff0e..ac5b02515355 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1728,9 +1728,9 @@ static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (sk && sk->sk_family == AF_INET6) + if (sk && skb->protocol == htons(ETH_P_IPV6)) return ip6_skb_dst_mtu(skb); - else if (sk && sk->sk_family == AF_INET) + else if (sk && skb->protocol == htons(ETH_P_IP)) return ip_skb_dst_mtu(skb); return dst_mtu(skb_dst(skb)); } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6f5fc612b162..3bb2cdc13b46 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -216,9 +216,17 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) void xfrm_local_error(struct sk_buff *skb, int mtu) { + unsigned int proto; struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(skb->sk->sk_family); + if (skb->protocol == htons(ETH_P_IP)) + proto = AF_INET; + else if (skb->protocol == htons(ETH_P_IPV6)) + proto = AF_INET6; + else + return; + + afinfo = xfrm_state_get_afinfo(proto); if (!afinfo) return; From d3d3835ce919438c00c5d1270d6f9d6ffea59d03 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 19 Aug 2013 20:05:50 +0200 Subject: [PATCH 08/80] ALSA: hda - Add inverted digital mic fixup for Acer Aspire One Yet another entry, just use the existing fixup for this machine, too. Reported-by: "Nathanael D. Noblet" Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f303cd898515..389db4c2801b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4336,6 +4336,7 @@ static const struct hda_fixup alc662_fixups[] = { static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1019, 0x9087, "ECS", ALC662_FIXUP_ASUS_MODE2), + SND_PCI_QUIRK(0x1025, 0x022f, "Acer Aspire One", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), From 527bf129f9a780e11b251cf2467dc30118a57d16 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Aug 2013 16:43:24 -0700 Subject: [PATCH 09/80] x86/mm: Fix boot crash with DEBUG_PAGE_ALLOC=y and more than 512G RAM Dave Hansen reported that systems between 500G and 600G RAM crash early if DEBUG_PAGEALLOC is selected. > [ 0.000000] init_memory_mapping: [mem 0x00000000-0x000fffff] > [ 0.000000] [mem 0x00000000-0x000fffff] page 4k > [ 0.000000] BRK [0x02086000, 0x02086fff] PGTABLE > [ 0.000000] BRK [0x02087000, 0x02087fff] PGTABLE > [ 0.000000] BRK [0x02088000, 0x02088fff] PGTABLE > [ 0.000000] init_memory_mapping: [mem 0xe80ee00000-0xe80effffff] > [ 0.000000] [mem 0xe80ee00000-0xe80effffff] page 4k > [ 0.000000] BRK [0x02089000, 0x02089fff] PGTABLE > [ 0.000000] BRK [0x0208a000, 0x0208afff] PGTABLE > [ 0.000000] Kernel panic - not syncing: alloc_low_page: ran out of memory It turns out that we missed increasing needed pages in BRK to mapping initial 2M and [0,1M) when we switched to use the #PF handler to set memory mappings: > commit 8170e6bed465b4b0c7687f93e9948aca4358a33b > Author: H. Peter Anvin > Date: Thu Jan 24 12:19:52 2013 -0800 > > x86, 64bit: Use a #PF handler to materialize early mappings on demand Before that, we had the maping from [0,512M) in head_64.S, and we can spare two pages [0-1M). After that change, we can not reuse pages anymore. When we have more than 512M ram, we need an extra page for pgd page with [512G, 1024g). Increase pages in BRK for page table to solve the boot crash. Reported-by: Dave Hansen Bisected-by: Dave Hansen Tested-by: Dave Hansen Signed-off-by: Yinghai Lu Cc: # v3.9 and later Link: http://lkml.kernel.org/r/1376351004-4015-1-git-send-email-yinghai@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 2ec29ac78ae6..04664cdb7fda 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -78,8 +78,8 @@ __ref void *alloc_low_pages(unsigned int num) return __va(pfn << PAGE_SHIFT); } -/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ -#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) +/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */ +#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE) RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); void __init early_alloc_pgt_buf(void) { From 2a3ba63c235fdcd37f6451bdf4a0c7865a3930cf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 20 Aug 2013 11:28:50 +0200 Subject: [PATCH 10/80] mac80211: add missing channel context release IBSS needs to release the channel context when leaving but I evidently missed that. Fix it. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index ea7b9c2c7e66..5e8bb3bee3c2 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1138,6 +1138,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); + ieee80211_vif_release_channel(sdata); synchronize_rcu(); kfree(presp); From 2dfca312a91631311c1cf7c090246cc8103de038 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 20 Aug 2013 19:43:54 +0200 Subject: [PATCH 11/80] mac80211: add a flag to indicate CCK support for HT clients brcm80211 cannot handle sending frames with CCK rates as part of an A-MPDU session. Other drivers may have issues too. Set the flag in all drivers that have been tested with CCK rates. This fixes a reported brcmsmac regression introduced in commit ef47a5e4f1aaf1d0e2e6875e34b2c9595897bef6 "mac80211/minstrel_ht: fix cck rate sampling" Cc: stable@vger.kernel.org # 3.10 Reported-by: Tom Gundersen Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/init.c | 3 ++- drivers/net/wireless/ath/carl9170/main.c | 3 ++- drivers/net/wireless/rt2x00/rt2800lib.c | 3 ++- include/net/mac80211.h | 1 + net/mac80211/rc80211_minstrel_ht.c | 3 +++ 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 16f8b201642b..026a2a067b46 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -802,7 +802,8 @@ void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_SPECTRUM_MGMT | IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SUPPORTS_RC_TABLE; + IEEE80211_HW_SUPPORTS_RC_TABLE | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_HT) { hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 4a33c6e39ca2..349fa22a921a 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1860,7 +1860,8 @@ void *carl9170_alloc(size_t priv_size) IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC | IEEE80211_HW_SUPPORTS_RC_TABLE | - IEEE80211_HW_SIGNAL_DBM; + IEEE80211_HW_SIGNAL_DBM | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; if (!modparam_noht) { /* diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 1f80ea5e29dd..1b41c8eda12d 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -6133,7 +6133,8 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) IEEE80211_HW_SUPPORTS_PS | IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_REPORTS_TX_ACK_STATUS; + IEEE80211_HW_REPORTS_TX_ACK_STATUS | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; /* * Don't set IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING for USB devices diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5b7a3dadadde..551ba6a6a073 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1499,6 +1499,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_RC_TABLE = 1<<24, IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF = 1<<25, IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, + IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, }; /** diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index f5aed963b22e..f3bbea1eb9e7 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -828,6 +828,9 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != IEEE80211_BAND_2GHZ) return; + if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES)) + return; + mi->cck_supported = 0; mi->cck_supported_short = 0; for (i = 0; i < 4; i++) { From 75a423f493ffdf741acae27bf179cd560f7813d7 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 21 Aug 2013 15:30:25 +0200 Subject: [PATCH 12/80] mac80211: ibss: fix ignored channel parameter my earlier patch "mac80211: change IBSS channel state to chandef" created a regression by ignoring the channel parameter in __ieee80211_sta_join_ibss, which breaks IBSS channel selection. This patch fixes this situation by using the right channel and adopting the selected bandwidth mode. Cc: stable@vger.kernel.org Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 5e8bb3bee3c2..2d45643c964e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -36,7 +36,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const u8 *bssid, const int beacon_int, - struct ieee80211_channel *chan, + struct cfg80211_chan_def *req_chandef, const u32 basic_rates, const u16 capability, u64 tsf, bool creator) @@ -51,6 +51,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 bss_change; u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; struct cfg80211_chan_def chandef; + struct ieee80211_channel *chan; struct beacon_data *presp; int frame_len; @@ -81,7 +82,9 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - chandef = ifibss->chandef; + /* make a copy of the chandef, it could be modified below. */ + chandef = *req_chandef; + chan = chandef.chan; if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; @@ -259,10 +262,12 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss = container_of((void *)bss, struct cfg80211_bss, priv); struct ieee80211_supported_band *sband; + struct cfg80211_chan_def chandef; u32 basic_rates; int i, j; u16 beacon_int = cbss->beacon_interval; const struct cfg80211_bss_ies *ies; + enum nl80211_channel_type chan_type; u64 tsf; sdata_assert_lock(sdata); @@ -270,6 +275,26 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, if (beacon_int < 10) beacon_int = 10; + switch (sdata->u.ibss.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + chan_type = cfg80211_get_chandef_type(&sdata->u.ibss.chandef); + cfg80211_chandef_create(&chandef, cbss->channel, chan_type); + break; + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + cfg80211_chandef_create(&chandef, cbss->channel, + NL80211_CHAN_WIDTH_20_NOHT); + chandef.width = sdata->u.ibss.chandef.width; + break; + default: + /* fall back to 20 MHz for unsupported modes */ + cfg80211_chandef_create(&chandef, cbss->channel, + NL80211_CHAN_WIDTH_20_NOHT); + break; + } + sband = sdata->local->hw.wiphy->bands[cbss->channel->band]; basic_rates = 0; @@ -294,7 +319,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, __ieee80211_sta_join_ibss(sdata, cbss->bssid, beacon_int, - cbss->channel, + &chandef, basic_rates, cbss->capability, tsf, false); @@ -736,7 +761,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) sdata->drop_unencrypted = 0; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, - ifibss->chandef.chan, ifibss->basic_rates, + &ifibss->chandef, ifibss->basic_rates, capability, 0, true); } From 4e67fb5f5e336250db944921e3c68057d6203034 Mon Sep 17 00:00:00 2001 From: David Jander Date: Wed, 21 Aug 2013 17:37:22 +0200 Subject: [PATCH 13/80] regmap: rbtree: Fix overlapping rbnodes. Avoid overlapping register regions by making the initial blklen of a new node 1. If a register write occurs to a yet uncached register, that is lower than but near an existing node's base_reg, a new node is created and it's blklen is set to an arbitrary value (sizeof(*rbnode)). That may cause this node to overlap with another node. Those nodes should be merged, but this merge doesn't happen yet, so this patch at least makes the initial blklen small enough to avoid hitting the wrong node, which may otherwise lead to severe breakage. Signed-off-by: David Jander Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/base/regmap/regcache-rbtree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 5c1435c4e210..0fccc99881fd 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -332,7 +332,7 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg) } if (!rbnode->blklen) { - rbnode->blklen = sizeof(*rbnode); + rbnode->blklen = 1; rbnode->base_reg = reg; } From b2fcc0aee58a3435566dd6d8501a0b355552f28b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 21 Aug 2013 10:18:19 +0200 Subject: [PATCH 14/80] iwl4965: fix rfkill set state regression My current 3.11 fix: commit 788f7a56fce1bcb2067b62b851a086fca48a0056 Author: Stanislaw Gruszka Date: Thu Aug 1 12:07:55 2013 +0200 iwl4965: reset firmware after rfkill off broke rfkill notification to user-space . I missed that bug, because I compiled without CONFIG_RFKILL, sorry about that. Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/iwlegacy/4965-mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index f2ed62e37340..7acf5ee23582 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -4464,9 +4464,9 @@ il4965_irq_tasklet(struct il_priv *il) set_bit(S_RFKILL, &il->status); } else { clear_bit(S_RFKILL, &il->status); - wiphy_rfkill_set_hw_state(il->hw->wiphy, hw_rf_kill); il_force_reset(il, true); } + wiphy_rfkill_set_hw_state(il->hw->wiphy, hw_rf_kill); handled |= CSR_INT_BIT_RF_KILL; } From d2e9fc141e2aa21f4b35ee27072d84e9aa6e2ba0 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 16 Aug 2013 21:39:40 +0200 Subject: [PATCH 15/80] ath9k_htc: Restore skb headroom when returning skb to mac80211 ath9k_htc adds padding between the 802.11 header and the payload during TX by moving the header. When handing the frame back to mac80211 for TX status handling the header is not moved back into its original position. This can result in a too small skb headroom when entering ath9k_htc again (due to a soft retransmission for example) causing an skb_under_panic oops. Fix this by moving the 802.11 header back into its original position before returning the frame to mac80211 as other drivers like rt2x00 or ath5k do. Reported-by: Marc Kleine-Budde Signed-off-by: Helmut Schaa Tested-by: Marc Kleine-Budde Signed-off-by: Marc Kleine-Budde Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index e602c9519709..c028df76b564 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -448,6 +448,7 @@ static void ath9k_htc_tx_process(struct ath9k_htc_priv *priv, struct ieee80211_conf *cur_conf = &priv->hw->conf; bool txok; int slot; + int hdrlen, padsize; slot = strip_drv_header(priv, skb); if (slot < 0) { @@ -504,6 +505,15 @@ send_mac80211: ath9k_htc_tx_clear_slot(priv, slot); + /* Remove padding before handing frame back to mac80211 */ + hdrlen = ieee80211_get_hdrlen_from_skb(skb); + + padsize = hdrlen & 3; + if (padsize && skb->len > hdrlen + padsize) { + memmove(skb->data + padsize, skb->data, hdrlen); + skb_pull(skb, padsize); + } + /* Send status to mac80211 */ ieee80211_tx_status(priv->hw, skb); } From 19c361608ce3e73f352e323262f7e0a8264be3af Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Tue, 20 Aug 2013 10:05:59 +0530 Subject: [PATCH 16/80] ath9k: Enable PLL fix only for AR9340/AR9330 The PLL hang workaround is required only for AR9330 and AR9340. This issue was first observed on an AP121 and the WAR is enabled for AR9340 also (DB120 etc.), since it uses a PLL design identical to AR9330. This is not required for AR9485 and AR9550. Various bugs have been reported regarding this: https://bugzilla.redhat.com/show_bug.cgi?id=997217 https://bugzilla.redhat.com/show_bug.cgi?id=994648 Cc: stable@vger.kernel.org Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 1737a3e33685..cb5a65553ac7 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -173,8 +173,7 @@ static void ath_restart_work(struct ath_softc *sc) { ieee80211_queue_delayed_work(sc->hw, &sc->tx_complete_work, 0); - if (AR_SREV_9340(sc->sc_ah) || AR_SREV_9485(sc->sc_ah) || - AR_SREV_9550(sc->sc_ah)) + if (AR_SREV_9340(sc->sc_ah) || AR_SREV_9330(sc->sc_ah)) ieee80211_queue_delayed_work(sc->hw, &sc->hw_pll_work, msecs_to_jiffies(ATH_PLL_WORK_INTERVAL)); From 2ca320e294a738c9134a71b5029de05edbfc7aad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Aug 2013 09:55:36 +0200 Subject: [PATCH 17/80] ALSA: hda - Fix NULL dereference with CONFIG_SND_DYNAMIC_MINORS=n Without the dynamic minor assignment, HDMI codec may have less PCM instances than the number of pins, which eventually leads to Oops. Reported-by: Stratos Karafotis Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 030ca8652a1c..9f3586276871 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1781,6 +1781,9 @@ static int generic_hdmi_build_controls(struct hda_codec *codec) struct snd_pcm_chmap *chmap; struct snd_kcontrol *kctl; int i; + + if (!codec->pcm_info[pin_idx].pcm) + break; err = snd_pcm_add_chmap_ctls(codec->pcm_info[pin_idx].pcm, SNDRV_PCM_STREAM_PLAYBACK, NULL, 0, pin_idx, &chmap); From f41a64eeabbb2daa70f2b39d0dfb59c91904a1b0 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Wed, 21 Aug 2013 09:14:49 -0700 Subject: [PATCH 18/80] Input: wacom - add support for 0x300 and 0x301 Tested-by: Arjuna Rao Chavala Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 384fbcd0cee0..f3e91f0b57ae 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2112,7 +2112,7 @@ static const struct wacom_features wacom_features_0xDA = { "Wacom Bamboo 2FG 4x5 SE", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 2 }; -static struct wacom_features wacom_features_0xDB = +static const struct wacom_features wacom_features_0xDB = { "Wacom Bamboo 2FG 6x8 SE", WACOM_PKGLEN_BBFUN, 21648, 13700, 1023, 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 2 }; @@ -2127,6 +2127,12 @@ static const struct wacom_features wacom_features_0xDF = { "Wacom Bamboo 16FG 6x8", WACOM_PKGLEN_BBPEN, 21648, 13700, 1023, 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 16 }; +static const struct wacom_features wacom_features_0x300 = + { "Wacom Bamboo One S", WACOM_PKGLEN_BBPEN, 14720, 9225, 1023, + 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x301 = + { "Wacom Bamboo One M", WACOM_PKGLEN_BBPEN, 21648, 13530, 1023, + 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x6004 = { "ISD-V4", WACOM_PKGLEN_GRAPHIRE, 12800, 8000, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2253,6 +2259,8 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x100) }, { USB_DEVICE_WACOM(0x101) }, { USB_DEVICE_WACOM(0x10D) }, + { USB_DEVICE_WACOM(0x300) }, + { USB_DEVICE_WACOM(0x301) }, { USB_DEVICE_WACOM(0x304) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x47) }, From acc0444bf83ea559fa84b527b1f49cf20b0df253 Mon Sep 17 00:00:00 2001 From: Matteo Delfino Date: Thu, 15 Aug 2013 00:09:41 -0700 Subject: [PATCH 19/80] Input: elantech - fix packet check for v3 and v4 hardware The signatures of v3 and v4 packets change depending on the value of a hardware flag called 'crc_enabled'. The packet type detection must change accordingly. This patch also restores a consistency check for v4 packets inadvertently removed by commit: 9eebed7de660c0b5ab129a9de4f89d20b60de68c Input: elantech - fix for newer hardware versions (v7) A note about the naming convention: v3 hardware is associated with IC body v5 while v4 hardware is associated with IC body v6 and v7. The above commit refers to IC body v7, not to v7 hardware. Tested on Samsung NP730U3E (fw = 0x675f05, ICv7, crc_enabled = 1) Tested-by: Giovanni Frigione Signed-off-by: Matteo Delfino Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 44 ++++++++++++++++++++++++++++++---- drivers/input/mouse/elantech.h | 1 + 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 57b2637e153a..8551dcaf24db 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -672,6 +672,7 @@ static int elantech_packet_check_v2(struct psmouse *psmouse) */ static int elantech_packet_check_v3(struct psmouse *psmouse) { + struct elantech_data *etd = psmouse->private; const u8 debounce_packet[] = { 0xc4, 0xff, 0xff, 0x02, 0xff, 0xff }; unsigned char *packet = psmouse->packet; @@ -682,19 +683,48 @@ static int elantech_packet_check_v3(struct psmouse *psmouse) if (!memcmp(packet, debounce_packet, sizeof(debounce_packet))) return PACKET_DEBOUNCE; - if ((packet[0] & 0x0c) == 0x04 && (packet[3] & 0xcf) == 0x02) - return PACKET_V3_HEAD; + /* + * If the hardware flag 'crc_enabled' is set the packets have + * different signatures. + */ + if (etd->crc_enabled) { + if ((packet[3] & 0x09) == 0x08) + return PACKET_V3_HEAD; - if ((packet[0] & 0x0c) == 0x0c && (packet[3] & 0xce) == 0x0c) - return PACKET_V3_TAIL; + if ((packet[3] & 0x09) == 0x09) + return PACKET_V3_TAIL; + } else { + if ((packet[0] & 0x0c) == 0x04 && (packet[3] & 0xcf) == 0x02) + return PACKET_V3_HEAD; + + if ((packet[0] & 0x0c) == 0x0c && (packet[3] & 0xce) == 0x0c) + return PACKET_V3_TAIL; + } return PACKET_UNKNOWN; } static int elantech_packet_check_v4(struct psmouse *psmouse) { + struct elantech_data *etd = psmouse->private; unsigned char *packet = psmouse->packet; unsigned char packet_type = packet[3] & 0x03; + bool sanity_check; + + /* + * Sanity check based on the constant bits of a packet. + * The constant bits change depending on the value of + * the hardware flag 'crc_enabled' but are the same for + * every packet, regardless of the type. + */ + if (etd->crc_enabled) + sanity_check = ((packet[3] & 0x08) == 0x00); + else + sanity_check = ((packet[0] & 0x0c) == 0x04 && + (packet[3] & 0x1c) == 0x10); + + if (!sanity_check) + return PACKET_UNKNOWN; switch (packet_type) { case 0: @@ -1313,6 +1343,12 @@ static int elantech_set_properties(struct elantech_data *etd) etd->reports_pressure = true; } + /* + * The signatures of v3 and v4 packets change depending on the + * value of this hardware flag. + */ + etd->crc_enabled = ((etd->fw_version & 0x4000) == 0x4000); + return 0; } diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h index 46db3be45ac9..036a04abaef7 100644 --- a/drivers/input/mouse/elantech.h +++ b/drivers/input/mouse/elantech.h @@ -129,6 +129,7 @@ struct elantech_data { bool paritycheck; bool jumpy_cursor; bool reports_pressure; + bool crc_enabled; unsigned char hw_version; unsigned int fw_version; unsigned int single_finger_reports; From 764480413b7206d295eb6ff58efb057d56aff08b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 22 Aug 2013 14:03:24 -0700 Subject: [PATCH 20/80] dma/Kconfig: TI_EDMA needs to be boolean Fix: arch/arm/common/built-in.o: undefined reference to `edma_filter_fn' seen with "make ARCH=arm allmodconfig" Commit 6cba4355 (ARM: edma: Add DT and runtime PM support to the private EDMA API) adds a dependency on edma_filter_fn() into arch/arm/common/edma.c. Since this file is always built into the kernel, edma_filter_fn() must be built into the kernel as well. Signed-off-by: Guenter Roeck Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 6825957c97fb..643d7c7a0d8e 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -194,7 +194,7 @@ config SIRF_DMA Enable support for the CSR SiRFprimaII DMA engine. config TI_EDMA - tristate "TI EDMA support" + bool "TI EDMA support" depends on ARCH_DAVINCI || ARCH_OMAP select DMA_ENGINE select DMA_VIRTUAL_CHANNELS From ed06349fe8d12dcb718984862b6e839fc8606c34 Mon Sep 17 00:00:00 2001 From: Mag Date: Mon, 26 Aug 2013 00:22:01 -0700 Subject: [PATCH 21/80] Input: xpad - add signature for Razer Onza Classic Edition Signed-off-by: Nol "Mag" Archinova Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index fa061d46527f..75e3b102ce45 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -167,6 +167,7 @@ static const struct xpad_device { { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd00, "Razer Onza Tournament Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x1689, 0xfd01, "Razer Onza Classic Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, From 9504a923924d663e1953f872f0a828e6454a6cfc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 26 Jul 2013 18:43:45 +0200 Subject: [PATCH 22/80] [SCSI] pm80xx: fix Adaptec 71605H hang The IO command size is 128 bytes for these new controllers as opposed to 64 for the old 8001 controller. The Adaptec out-of-tree driver did this correctly. After comparing the two this turned out to be the crucial difference. So don't hardcode the IO command size, instead use pm8001_ha->iomb_size as that is the correct value for both old and new controllers. Signed-off-by: Hans Verkuil Acked-by: Anand Kumar Santhanam Acked-by: Jack Wang Cc: stable@vger.kernel.org # for v3.10 and up Signed-off-by: James Bottomley --- drivers/scsi/pm8001/pm8001_hwi.c | 4 ++-- drivers/scsi/pm8001/pm80xx_hwi.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 5456f5c73593..4a2195752198 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -221,7 +221,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm8001_tbl.fatal_err_interrupt = 0x01; for (i = 0; i < PM8001_MAX_INB_NUM; i++) { pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x00<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x00<<30); pm8001_ha->inbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[IB + i].phys_addr_hi; pm8001_ha->inbnd_q_tbl[i].lower_base_addr = @@ -247,7 +247,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) } for (i = 0; i < PM8001_MAX_OUTB_NUM; i++) { pm8001_ha->outbnd_q_tbl[i].element_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x01<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x01<<30); pm8001_ha->outbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[OB + i].phys_addr_hi; pm8001_ha->outbnd_q_tbl[i].lower_base_addr = diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 7f77210f5cf3..9f91030211e8 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -275,7 +275,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) for (i = 0; i < PM8001_MAX_SPCV_INB_NUM; i++) { pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x00<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x00<<30); pm8001_ha->inbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[IB + i].phys_addr_hi; pm8001_ha->inbnd_q_tbl[i].lower_base_addr = @@ -301,7 +301,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) } for (i = 0; i < PM8001_MAX_SPCV_OUTB_NUM; i++) { pm8001_ha->outbnd_q_tbl[i].element_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x01<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x01<<30); pm8001_ha->outbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[OB + i].phys_addr_hi; pm8001_ha->outbnd_q_tbl[i].lower_base_addr = From 5a25cf1e310888eb333f9e034be84a8117111d30 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 26 Aug 2013 12:31:19 +0200 Subject: [PATCH 23/80] xfrm: revert ipv4 mtu determination to dst_mtu In commit 0ea9d5e3e0e03a63b11392f5613378977dae7eca ("xfrm: introduce helper for safe determination of mtu") I switched the determination of ipv4 mtus from dst_mtu to ip_skb_dst_mtu. This was an error because in case of IP_PMTUDISC_PROBE we fall back to the interface mtu, which is never correct for ipv4 ipsec. This patch partly reverts 0ea9d5e3e0e03a63b11392f5613378977dae7eca ("xfrm: introduce helper for safe determination of mtu"). Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 12 ------------ net/ipv4/xfrm4_output.c | 2 +- net/ipv6/xfrm6_output.c | 8 +++++--- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ac5b02515355..e823786e7c66 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -1724,15 +1723,4 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } -static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk && skb->protocol == htons(ETH_P_IPV6)) - return ip6_skb_dst_mtu(skb); - else if (sk && skb->protocol == htons(ETH_P_IP)) - return ip_skb_dst_mtu(skb); - return dst_mtu(skb_dst(skb)); -} - #endif /* _NET_XFRM_H */ diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 80baf4a3b1b5..baa0f63731fd 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -28,7 +28,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) goto out; - mtu = xfrm_skb_dst_mtu(skb); + mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu) { if (skb->sk) xfrm_local_error(skb, mtu); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index e092e306882d..6cd625e37706 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -140,10 +140,12 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu = xfrm_skb_dst_mtu(skb); + int mtu; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + if (skb->protocol == htons(ETH_P_IPV6)) + mtu = ip6_skb_dst_mtu(skb); + else + mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); From 9c9c9ad5fae7e9ef56a38acb508a01919b225e9a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 26 Aug 2013 12:31:23 +0200 Subject: [PATCH 24/80] ipv6: set skb->protocol on tcp, raw and ip6_append_data genereated skbs Currently we don't initialize skb->protocol when transmitting data via tcp, raw(with and without inclhdr) or udp+ufo or appending data directly to the socket transmit queue (via ip6_append_data). This needs to be done so that we can get the correct mtu in the xfrm layer. Setting of skb->protocol happens only in functions where we also have a transmitting socket and a new skb, so we don't overwrite old values. Cc: Steffen Klassert Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- net/ipv6/ip6_output.c | 3 +++ net/ipv6/raw.c | 1 + 2 files changed, 4 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6e3ddf806ec2..e7ceb6c871d1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -238,6 +238,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hdr->saddr = fl6->saddr; hdr->daddr = *first_hop; + skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -1057,6 +1058,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; } @@ -1359,6 +1361,7 @@ alloc_new_skb: /* * Fill in the control structures */ + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; /* reserve for fragmentation and ipsec header */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c45f7a5c36e9..cdaed47ba932 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -628,6 +628,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, goto error; skb_reserve(skb, hlen); + skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; skb_dst_set(skb, &rt->dst); From dd5746bf6b48bb837e9f5af14b9b241fc4fdc1ef Mon Sep 17 00:00:00 2001 From: Sarveshwar Bandi Date: Fri, 23 Aug 2013 14:59:33 +0530 Subject: [PATCH 25/80] be2net: Check for POST state in suspend-resume sequence In suspend-resume sequence, the OS could attempt to initialize the controller before it is ready, check for POST state before going ahead. Signed-off-by: Sarveshwar Bandi Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4559c35eea13..3d91a5ec61a4 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4373,6 +4373,10 @@ static int be_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); + status = be_fw_wait_ready(adapter); + if (status) + return status; + /* tell fw we're ready to fire cmds */ status = be_cmd_fw_init(adapter); if (status) From d3474049ab6cfcf14274f5ab9f20c8f50b083eab Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 26 Aug 2013 10:53:53 -0400 Subject: [PATCH 26/80] USB: OHCI: fix build error related to ohci_suspend/resume Commit 9a11899c5e69 (USB: OHCI: add missing PCI PM callbacks to ohci-pci.c) added missing ohci_suspend and ohci_resume callback pointers, but forgot that these callbacks are declared and defined only when CONFIG_PM is enabled. This patch adds a preprocessor conditional to avoid build errors when PM is disabled. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Reported-by: Meelis Roos , Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 0f1d193fef02..279b04910f00 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -305,9 +305,11 @@ static int __init ohci_pci_init(void) ohci_init_driver(&ohci_pci_hc_driver, &pci_overrides); +#ifdef CONFIG_PM /* Entries for the PCI suspend/resume callbacks are special */ ohci_pci_hc_driver.pci_suspend = ohci_suspend; ohci_pci_hc_driver.pci_resume = ohci_resume; +#endif return pci_register_driver(&ohci_pci_driver); } From f5f6cbb61610b7bf9d9d96db9c3979d62a424bab Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 27 Aug 2013 16:38:33 +1000 Subject: [PATCH 27/80] powerpc: Don't Oops when accessing /proc/powerpc/lparcfg without hypervisor /proc/powerpc/lparcfg is an ancient facility (though still actively used) which allows access to some informations relative to the partition when running underneath a PAPR compliant hypervisor. It makes no sense on non-pseries machines. However, currently, not only can it be created on these if the kernel has pseries support, but accessing it on such a machine will crash due to trying to do hypervisor calls. In fact, it should also not do HV calls on older pseries that didn't have an hypervisor either. Finally, it has the plumbing to be a module but is a "bool" Kconfig option. This fixes the whole lot by turning it into a machine_device_initcall that is only created on pseries, and adding the necessary hypervisor check before calling the H_GET_EM_PARMS hypercall CC: Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/lparcfg.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index d92f3871e9cf..e2a0a162299b 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -35,7 +35,13 @@ #include #include #include +#include + +/* + * This isn't a module but we expose that to userspace + * via /proc so leave the definitions here + */ #define MODULE_VERS "1.9" #define MODULE_NAME "lparcfg" @@ -418,7 +424,8 @@ static void parse_em_data(struct seq_file *m) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + if (firmware_has_feature(FW_FEATURE_LPAR) && + plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); } @@ -677,7 +684,6 @@ static int lparcfg_open(struct inode *inode, struct file *file) } static const struct file_operations lparcfg_fops = { - .owner = THIS_MODULE, .read = seq_read, .write = lparcfg_write, .open = lparcfg_open, @@ -699,14 +705,4 @@ static int __init lparcfg_init(void) } return 0; } - -static void __exit lparcfg_cleanup(void) -{ - remove_proc_subtree("powerpc/lparcfg", NULL); -} - -module_init(lparcfg_init); -module_exit(lparcfg_cleanup); -MODULE_DESCRIPTION("Interface for LPAR configuration data"); -MODULE_AUTHOR("Dave Engebretsen"); -MODULE_LICENSE("GPL"); +machine_device_initcall(pseries, lparcfg_init); From bdbc29c19b2633b1d9c52638fb732bcde7a2031a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 27 Aug 2013 16:07:49 +1000 Subject: [PATCH 28/80] powerpc: Work around gcc miscompilation of __pa() on 64-bit On 64-bit, __pa(&static_var) gets miscompiled by recent versions of gcc as something like: addis 3,2,.LANCHOR1+4611686018427387904@toc@ha addi 3,3,.LANCHOR1+4611686018427387904@toc@l This ends up effectively ignoring the offset, since its bottom 32 bits are zero, and means that the result of __pa() still has 0xC in the top nibble. This happens with gcc 4.8.1, at least. To work around this, for 64-bit we make __pa() use an AND operator, and for symmetry, we make __va() use an OR operator. Using an AND operator rather than a subtraction ends up with slightly shorter code since it can be done with a single clrldi instruction, whereas it takes three instructions to form the constant (-PAGE_OFFSET) and add it on. (Note that MEMORY_START is always 0 on 64-bit.) CC: Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/page.h | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index dbd9d3c991e8..9cf59816d3e9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -979,6 +979,7 @@ config RELOCATABLE must live at a different physical address than the primary kernel. +# This value must have zeroes in the bottom 60 bits otherwise lots will break config PAGE_OFFSET hex default "0xc000000000000000" diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 988c812aab5b..b9f426212d3a 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -211,9 +211,19 @@ extern long long virt_phys_offset; #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) #else +#ifdef CONFIG_PPC64 +/* + * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET + * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. + */ +#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET)) +#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL) + +#else /* 32-bit, non book E */ #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START)) #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START) #endif +#endif /* * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, From d220980b701d838560a70de691b53be007e99e78 Mon Sep 17 00:00:00 2001 From: Eugene Surovegin Date: Mon, 26 Aug 2013 11:53:32 -0700 Subject: [PATCH 29/80] powerpc/hvsi: Increase handshake timeout from 200ms to 400ms. This solves a problem observed in kexec'ed kernel where 200ms timeout is too short and bootconsole fails to initialize. Console did eventually become workable but much later into the boot process. Observed timeout was around 260ms, but I decided to make it a little bigger for more reliability. This has been tested on Power7 machine with Petitboot as a primary bootloader and PowerNV firmware. CC: Signed-off-by: Eugene Surovegin Signed-off-by: Benjamin Herrenschmidt --- drivers/tty/hvc/hvsi_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/hvc/hvsi_lib.c b/drivers/tty/hvc/hvsi_lib.c index 3396eb9d57a3..ac2767100df5 100644 --- a/drivers/tty/hvc/hvsi_lib.c +++ b/drivers/tty/hvc/hvsi_lib.c @@ -341,8 +341,8 @@ void hvsilib_establish(struct hvsi_priv *pv) pr_devel("HVSI@%x: ... waiting handshake\n", pv->termno); - /* Try for up to 200s */ - for (timeout = 0; timeout < 20; timeout++) { + /* Try for up to 400ms */ + for (timeout = 0; timeout < 40; timeout++) { if (pv->established) goto established; if (!hvsi_get_packet(pv)) From 6dec97dc92946eb479e6ebb54a61f8226cceefec Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 27 Aug 2013 12:37:18 +0400 Subject: [PATCH 30/80] mm: move_ptes -- Set soft dirty bit depending on pte type Dave reported corrupted swap entries | [ 4588.541886] swap_free: Unused swap offset entry 00002d15 | [ 4588.541952] BUG: Bad page map in process trinity-kid12 pte:005a2a80 pmd:22c01f067 and Hugh pointed that in move_ptes _PAGE_SOFT_DIRTY bit set regardless the type of entry pte consists of. The trick here is that when we carry soft dirty status in swap entries we are to use _PAGE_SWP_SOFT_DIRTY instead, because this is the only place in pte which can be used for own needs without intersecting with bits owned by swap entry type/offset. Reported-and-tested-by: Dave Jones Signed-off-by: Cyrill Gorcunov Cc: Pavel Emelyanov Analyzed-by: Hugh Dickins Cc: Hillf Danton Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mremap.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index 457d34ef3bf2..0843feb66f3d 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -69,6 +70,23 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, return pmd; } +static pte_t move_soft_dirty_pte(pte_t pte) +{ + /* + * Set soft dirty bit so we can notice + * in userspace the ptes were moved. + */ +#ifdef CONFIG_MEM_SOFT_DIRTY + if (pte_present(pte)) + pte = pte_mksoft_dirty(pte); + else if (is_swap_pte(pte)) + pte = pte_swp_mksoft_dirty(pte); + else if (pte_file(pte)) + pte = pte_file_mksoft_dirty(pte); +#endif + return pte; +} + static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, @@ -126,7 +144,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); - set_pte_at(mm, new_addr, new_pte, pte_mksoft_dirty(pte)); + pte = move_soft_dirty_pte(pte); + set_pte_at(mm, new_addr, new_pte, pte); } arch_leave_lazy_mmu_mode(); From d661684cf6820331feae71146c35da83d794467e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Aug 2013 11:39:15 -0700 Subject: [PATCH 31/80] net: Check the correct namespace when spoofing pid over SCM_RIGHTS This is a security bug. The follow-up will fix nsproxy to discourage this type of issue from happening again. Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/scm.c b/net/core/scm.c index 03795d0147f2..b4da80b1cc07 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -54,7 +54,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || - ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && + ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || From c2b1df2eb42978073ec27c99cc199d20ae48b849 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Aug 2013 11:39:16 -0700 Subject: [PATCH 32/80] Rename nsproxy.pid_ns to nsproxy.pid_ns_for_children nsproxy.pid_ns is *not* the task's pid namespace. The name should clarify that. This makes it more obvious that setns on a pid namespace is weird -- it won't change the pid namespace shown in procfs. Signed-off-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/nsproxy.h | 6 +++++- kernel/fork.c | 5 +++-- kernel/nsproxy.c | 27 ++++++++++++++------------- kernel/pid_namespace.c | 4 ++-- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 10e5947491c7..b4ec59d159ac 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -14,6 +14,10 @@ struct fs_struct; * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * + * The pid namespace is an exception -- it's accessed using + * task_active_pid_ns. The pid namespace here is the + * namespace that children will use. + * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. @@ -27,7 +31,7 @@ struct nsproxy { struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; - struct pid_namespace *pid_ns; + struct pid_namespace *pid_ns_for_children; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/kernel/fork.c b/kernel/fork.c index e23bb19e2a3e..bf46287c91a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1177,7 +1177,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, * don't allow the creation of threads. */ if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && - (task_active_pid_ns(current) != current->nsproxy->pid_ns)) + (task_active_pid_ns(current) != + current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); @@ -1351,7 +1352,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (pid != &init_struct_pid) { retval = -ENOMEM; - pid = alloc_pid(p->nsproxy->pid_ns); + pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 364ceab15f0c..997cbb951a3b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -29,15 +29,15 @@ static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = { - .count = ATOMIC_INIT(1), - .uts_ns = &init_uts_ns, + .count = ATOMIC_INIT(1), + .uts_ns = &init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) - .ipc_ns = &init_ipc_ns, + .ipc_ns = &init_ipc_ns, #endif - .mnt_ns = NULL, - .pid_ns = &init_pid_ns, + .mnt_ns = NULL, + .pid_ns_for_children = &init_pid_ns, #ifdef CONFIG_NET - .net_ns = &init_net, + .net_ns = &init_net, #endif }; @@ -85,9 +85,10 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns); - if (IS_ERR(new_nsp->pid_ns)) { - err = PTR_ERR(new_nsp->pid_ns); + new_nsp->pid_ns_for_children = + copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children); + if (IS_ERR(new_nsp->pid_ns_for_children)) { + err = PTR_ERR(new_nsp->pid_ns_for_children); goto out_pid; } @@ -100,8 +101,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, return new_nsp; out_net: - if (new_nsp->pid_ns) - put_pid_ns(new_nsp->pid_ns); + if (new_nsp->pid_ns_for_children) + put_pid_ns(new_nsp->pid_ns_for_children); out_pid: if (new_nsp->ipc_ns) put_ipc_ns(new_nsp->ipc_ns); @@ -174,8 +175,8 @@ void free_nsproxy(struct nsproxy *ns) put_uts_ns(ns->uts_ns); if (ns->ipc_ns) put_ipc_ns(ns->ipc_ns); - if (ns->pid_ns) - put_pid_ns(ns->pid_ns); + if (ns->pid_ns_for_children) + put_pid_ns(ns->pid_ns_for_children); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6917e8edb48e..601bb361c235 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -349,8 +349,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) if (ancestor != active) return -EINVAL; - put_pid_ns(nsproxy->pid_ns); - nsproxy->pid_ns = get_pid_ns(new); + put_pid_ns(nsproxy->pid_ns_for_children); + nsproxy->pid_ns_for_children = get_pid_ns(new); return 0; } From 449cfcc1cd28a1c16321c47b1e7e77138411a72a Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 23 Aug 2013 15:40:53 +0200 Subject: [PATCH 33/80] jme: lower NAPI weight Since commit 82dc3c63 ("net: introduce NAPI_POLL_WEIGHT") netif_napi_add() produces an error message if a NAPI poll weight greater than 64 is requested. jme requests a quarter of the rx ring size as the NAPI weight. jme's rx ring size is 1 << 9 = 512. Use the standard NAPI weight. v2: proper reference to the related commit Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/jme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 7fbe6abf6054..23de82a9da82 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -3069,7 +3069,7 @@ jme_init_one(struct pci_dev *pdev, jwrite32(jme, JME_APMC, apmc); } - NETIF_NAPI_SET(netdev, &jme->napi, jme_poll, jme->rx_ring_size >> 2) + NETIF_NAPI_SET(netdev, &jme->napi, jme_poll, NAPI_POLL_WEIGHT) spin_lock_init(&jme->phy_lock); spin_lock_init(&jme->macaddr_lock); From 1e4a5282b4791fb9ba68478ecddae9642f9cfefb Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 23 Aug 2013 15:41:09 +0200 Subject: [PATCH 34/80] netxen: lower NAPI weight Since commit 82dc3c63 ("net: introduce NAPI_POLL_WEIGHT") netif_napi_add() produces an error message if a NAPI poll weight greater than 64 is requested. Use the standard NAPI weight. v2: proper reference to the related commit Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic.h | 1 - drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index 3fe09ab2d7c9..32675e16021e 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -1171,7 +1171,6 @@ typedef struct { #define NETXEN_DB_MAPSIZE_BYTES 0x1000 -#define NETXEN_NETDEV_WEIGHT 128 #define NETXEN_ADAPTER_UP_MAGIC 777 #define NETXEN_NIC_PEG_TUNE 0 diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index c401b0b4353d..ec4cf7fd4123 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -197,7 +197,7 @@ netxen_napi_add(struct netxen_adapter *adapter, struct net_device *netdev) for (ring = 0; ring < adapter->max_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; netif_napi_add(netdev, &sds_ring->napi, - netxen_nic_poll, NETXEN_NETDEV_WEIGHT); + netxen_nic_poll, NAPI_POLL_WEIGHT); } return 0; From ae24f261e11ec30d343c7c5ee805ff29782f726b Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 23 Aug 2013 15:41:19 +0200 Subject: [PATCH 35/80] ps3_gelic: lower NAPI weight Since commit 82dc3c63 ("net: introduce NAPI_POLL_WEIGHT") netif_napi_add() produces an error message if a NAPI poll weight greater than 64 is requested. GELIC_NET_NAPI_WEIGHT is defined to GELIC_NET_RX_DESCRIPTORS, which is 128. Use the standard NAPI weight. v2: proper reference to the related commit Signed-off-by: Michal Schmidt Acked-by: Geoff Levand Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 3 +-- drivers/net/ethernet/toshiba/ps3_gelic_net.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index ad32af67e618..9c805e0c0cae 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1466,8 +1466,7 @@ static void gelic_ether_setup_netdev_ops(struct net_device *netdev, { netdev->watchdog_timeo = GELIC_NET_WATCHDOG_TIMEOUT; /* NAPI */ - netif_napi_add(netdev, napi, - gelic_net_poll, GELIC_NET_NAPI_WEIGHT); + netif_napi_add(netdev, napi, gelic_net_poll, NAPI_POLL_WEIGHT); netdev->ethtool_ops = &gelic_ether_ethtool_ops; netdev->netdev_ops = &gelic_netdevice_ops; } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index a93df6ac1909..309abb472aa2 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -37,7 +37,6 @@ #define GELIC_NET_RXBUF_ALIGN 128 #define GELIC_CARD_RX_CSUM_DEFAULT 1 /* hw chksum */ #define GELIC_NET_WATCHDOG_TIMEOUT 5*HZ -#define GELIC_NET_NAPI_WEIGHT (GELIC_NET_RX_DESCRIPTORS) #define GELIC_NET_BROADCAST_ADDR 0xffffffffffffL #define GELIC_NET_MC_COUNT_MAX 32 /* multicast address list */ From c730b170456d9139c3c1f8a9c1f91837be657d60 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:00 +0300 Subject: [PATCH 36/80] bnx2x: vf mark stats started Solve issue where no stats were being collected for VF devices due to missing configuration in the stats' atomic synchronization mechanism. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_stats.c | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index d63d1327b051..fed9b1543b9f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -522,20 +522,16 @@ static void bnx2x_func_stats_init(struct bnx2x *bp) /* should be called under stats_sema */ static void __bnx2x_stats_start(struct bnx2x *bp) { - /* vfs travel through here as part of the statistics FSM, but no action - * is required - */ - if (IS_VF(bp)) - return; + if (IS_PF(bp)) { + if (bp->port.pmf) + bnx2x_port_stats_init(bp); - if (bp->port.pmf) - bnx2x_port_stats_init(bp); + else if (bp->func_stx) + bnx2x_func_stats_init(bp); - else if (bp->func_stx) - bnx2x_func_stats_init(bp); - - bnx2x_hw_stats_post(bp); - bnx2x_storm_stats_post(bp); + bnx2x_hw_stats_post(bp); + bnx2x_storm_stats_post(bp); + } bp->stats_started = true; } From 34d5626afc39c43d63ec7781b648091e92fae45a Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Aug 2013 01:13:01 +0300 Subject: [PATCH 37/80] bnx2x: Fix move FP memory deallocations If driver will fail to allocate all queues, it will shrink the number of queues and move the storage queue to its correct place (i.e., the last queue among the newly supported number). When changing the pointers of the new location of the FCoE queue, we need to pay special attention to the aggregations pointer - that memory is allocated during probe and released upon driver removal. Current implementation has 2 pointers pointing to the same chunk of allocated memory, meaning upon removal there will be two kfree() of the same chunk while the other won't be released. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index f2d1ff10054b..26b4dfcc0087 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -53,6 +53,7 @@ static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to) struct bnx2x_fp_stats *to_fp_stats = &bp->fp_stats[to]; int old_max_eth_txqs, new_max_eth_txqs; int old_txdata_index = 0, new_txdata_index = 0; + struct bnx2x_agg_info *old_tpa_info = to_fp->tpa_info; /* Copy the NAPI object as it has been already initialized */ from_fp->napi = to_fp->napi; @@ -61,6 +62,11 @@ static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to) memcpy(to_fp, from_fp, sizeof(*to_fp)); to_fp->index = to; + /* Retain the tpa_info of the original `to' version as we don't want + * 2 FPs to contain the same tpa_info pointer. + */ + to_fp->tpa_info = old_tpa_info; + /* move sp_objs contents as well, as their indices match fp ones */ memcpy(to_sp_objs, from_sp_objs, sizeof(*to_sp_objs)); From 35a04aa35c2929f24c7f063f42b6d776ad848c24 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:02 +0300 Subject: [PATCH 38/80] bnx2x: Fix functionality of configuring vlan list The check on return code of bnx2x_vfop_config_vlan0() would lead to error handling flow as the return value indicating an existing pending ramrod would be erroneously considered as an error. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 37 +------------------ 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index ad83f4b48777..b7efe27f845c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -522,23 +522,6 @@ static int bnx2x_vfop_set_user_req(struct bnx2x *bp, return 0; } -static int -bnx2x_vfop_config_vlan0(struct bnx2x *bp, - struct bnx2x_vlan_mac_ramrod_params *vlan_mac, - bool add) -{ - int rc; - - vlan_mac->user_req.cmd = add ? BNX2X_VLAN_MAC_ADD : - BNX2X_VLAN_MAC_DEL; - vlan_mac->user_req.u.vlan.vlan = 0; - - rc = bnx2x_config_vlan_mac(bp, vlan_mac); - if (rc == -EEXIST) - rc = 0; - return rc; -} - static int bnx2x_vfop_config_list(struct bnx2x *bp, struct bnx2x_vfop_filters *filters, struct bnx2x_vlan_mac_ramrod_params *vlan_mac) @@ -643,30 +626,14 @@ static void bnx2x_vfop_vlan_mac(struct bnx2x *bp, struct bnx2x_virtf *vf) case BNX2X_VFOP_VLAN_CONFIG_LIST: /* next state */ - vfop->state = BNX2X_VFOP_VLAN_CONFIG_LIST_0; + vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE; - /* remove vlan0 - could be no-op */ - vfop->rc = bnx2x_vfop_config_vlan0(bp, vlan_mac, false); - if (vfop->rc) - goto op_err; - - /* Do vlan list config. if this operation fails we try to - * restore vlan0 to keep the queue is working order - */ + /* do list config */ vfop->rc = bnx2x_vfop_config_list(bp, filters, vlan_mac); if (!vfop->rc) { set_bit(RAMROD_CONT, &vlan_mac->ramrod_flags); vfop->rc = bnx2x_config_vlan_mac(bp, vlan_mac); } - bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT); /* fall-through */ - - case BNX2X_VFOP_VLAN_CONFIG_LIST_0: - /* next state */ - vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE; - - if (list_empty(&obj->head)) - /* add vlan0 */ - vfop->rc = bnx2x_vfop_config_vlan0(bp, vlan_mac, true); bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE); default: From b4cddbd6dd9b3b9e08c26d8b7247e4e011092117 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:03 +0300 Subject: [PATCH 39/80] bnx2x: Fix VF memory leak unload Due to incorrect VF/PF conditions, when unloading a VF it will not release part of the memory it has previously allocated. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 26b4dfcc0087..0cc26110868d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2962,8 +2962,9 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) if (IS_PF(bp)) { if (CNIC_LOADED(bp)) bnx2x_free_mem_cnic(bp); - bnx2x_free_mem(bp); } + bnx2x_free_mem(bp); + bp->state = BNX2X_STATE_CLOSED; bp->cnic_loaded = false; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 8bdc8b973007..1627a4e09c32 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -7855,12 +7855,15 @@ void bnx2x_free_mem(struct bnx2x *bp) { int i; - BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping, - sizeof(struct host_sp_status_block)); - BNX2X_PCI_FREE(bp->fw_stats, bp->fw_stats_mapping, bp->fw_stats_data_sz + bp->fw_stats_req_sz); + if (IS_VF(bp)) + return; + + BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping, + sizeof(struct host_sp_status_block)); + BNX2X_PCI_FREE(bp->slowpath, bp->slowpath_mapping, sizeof(struct bnx2x_slowpath)); From a3097bda78c7fb41fd3091ffb70bf7bd946e6997 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:04 +0300 Subject: [PATCH 40/80] bnx2x: Fix VF stats sync Since the PF gathers statistics for the VF, when the VF is about to unload we must synchronize the release of its statistics buffer with the PF, so that no DMA operation will be made to that address after the buffer release. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 26 ++++++++++++++++++- .../net/ethernet/broadcom/bnx2x/bnx2x_stats.c | 11 ++++++++ .../net/ethernet/broadcom/bnx2x/bnx2x_stats.h | 3 +++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index b7efe27f845c..e8706e19f96f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2786,6 +2786,18 @@ int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf, dma_addr_t *sb_map) return 0; } +struct set_vf_state_cookie { + struct bnx2x_virtf *vf; + u8 state; +}; + +void bnx2x_set_vf_state(void *cookie) +{ + struct set_vf_state_cookie *p = (struct set_vf_state_cookie *)cookie; + + p->vf->state = p->state; +} + /* VFOP close (teardown the queues, delete mcasts and close HW) */ static void bnx2x_vfop_close(struct bnx2x *bp, struct bnx2x_virtf *vf) { @@ -2836,7 +2848,19 @@ static void bnx2x_vfop_close(struct bnx2x *bp, struct bnx2x_virtf *vf) op_err: BNX2X_ERR("VF[%d] CLOSE error: rc %d\n", vf->abs_vfid, vfop->rc); op_done: - vf->state = VF_ACQUIRED; + + /* need to make sure there are no outstanding stats ramrods which may + * cause the device to access the VF's stats buffer which it will free + * as soon as we return from the close flow. + */ + { + struct set_vf_state_cookie cookie; + + cookie.vf = vf; + cookie.state = VF_ACQUIRED; + bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie); + } + DP(BNX2X_MSG_IOV, "set state to acquired\n"); bnx2x_vfop_end(bp, vf, vfop); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index fed9b1543b9f..86436c77af03 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -1993,3 +1993,14 @@ void bnx2x_afex_collect_stats(struct bnx2x *bp, void *void_afex_stats, estats->mac_discard); } } + +void bnx2x_stats_safe_exec(struct bnx2x *bp, + void (func_to_exec)(void *cookie), + void *cookie){ + if (down_timeout(&bp->stats_sema, HZ/10)) + BNX2X_ERR("Unable to acquire stats lock\n"); + bnx2x_stats_comp(bp); + func_to_exec(cookie); + __bnx2x_stats_start(bp); + up(&bp->stats_sema); +} diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h index 853824d258e8..f35845006cdd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h @@ -539,6 +539,9 @@ struct bnx2x; void bnx2x_memset_stats(struct bnx2x *bp); void bnx2x_stats_init(struct bnx2x *bp); void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event); +void bnx2x_stats_safe_exec(struct bnx2x *bp, + void (func_to_exec)(void *cookie), + void *cookie); /** * bnx2x_save_statistics - save statistics when unloading. From fb615499f0ad28ed74201c1cdfddf9e64e205424 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Aug 2013 12:03:01 +0200 Subject: [PATCH 41/80] ALSA: opti9xx: Fix conflicting driver object name The recent commit to delay the release of kobject triggered NULL dereferences of opti9xx drivers. The cause is that all snd-opti92x-ad1848, snd-opti92x-cs4231 and snd-opti93x drivers register the PnP card driver with the very same name, and also snd-opti92x-ad1848 and -cs4231 drivers register the ISA driver with the same name, too. When these drivers are built in, quick "register-release-and-re-register" actions occur, and this results in Oops because of the same name is assigned to the kobject. The fix is simply to assign individual names. As a bonus, by using KBUILD_MODNAME, the patch reduces more lines than it adds. The fix is based on the suggestion by Russell King. Reported-and-tested-by: Fengguang Wu Cc: Signed-off-by: Takashi Iwai --- sound/isa/opti9xx/opti92x-ad1848.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 103b33373fd4..6effe99bbb9c 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -173,11 +173,7 @@ MODULE_DEVICE_TABLE(pnp_card, snd_opti9xx_pnpids); #endif /* CONFIG_PNP */ -#ifdef OPTi93X -#define DEV_NAME "opti93x" -#else -#define DEV_NAME "opti92x" -#endif +#define DEV_NAME KBUILD_MODNAME static char * snd_opti9xx_names[] = { "unknown", @@ -1167,7 +1163,7 @@ static int snd_opti9xx_pnp_resume(struct pnp_card_link *pcard) static struct pnp_card_driver opti9xx_pnpc_driver = { .flags = PNP_DRIVER_RES_DISABLE, - .name = "opti9xx", + .name = DEV_NAME, .id_table = snd_opti9xx_pnpids, .probe = snd_opti9xx_pnp_probe, .remove = snd_opti9xx_pnp_remove, From 302a50bc941010d7a67f288fd0db31981e4d722d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 28 Aug 2013 08:47:14 +0200 Subject: [PATCH 42/80] xfrm: Fix potential null pointer dereference in xdst_queue_output The net_device might be not set on the skb when we try refcounting. This leads to a null pointer dereference in xdst_queue_output(). It turned out that the refcount to the net_device is not needed after all. The dst_entry has a refcount to the net_device before we queue the skb, so it can't go away. Therefore we can remove the refcount on queueing to fix the null pointer dereference. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e52cab3591dd..f77c371ea72b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -320,10 +320,8 @@ static void xfrm_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) { - dev_put(skb->dev); + while ((skb = skb_dequeue(list)) != NULL) kfree_skb(skb); - } } /* Rule must be locked. Release descentant resources, announce @@ -1758,7 +1756,6 @@ static void xfrm_policy_queue_process(unsigned long arg) struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; - struct net_device *dev; struct xfrm_policy *pol = (struct xfrm_policy *)arg; struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; @@ -1805,7 +1802,6 @@ static void xfrm_policy_queue_process(unsigned long arg) dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, &fl, skb->sk, 0); if (IS_ERR(dst)) { - dev_put(skb->dev); kfree_skb(skb); continue; } @@ -1814,9 +1810,7 @@ static void xfrm_policy_queue_process(unsigned long arg) skb_dst_drop(skb); skb_dst_set(skb, dst); - dev = skb->dev; err = dst_output(skb); - dev_put(dev); } return; @@ -1839,7 +1833,6 @@ static int xdst_queue_output(struct sk_buff *skb) } skb_dst_force(skb); - dev_hold(skb->dev); spin_lock_bh(&pq->hold_queue.lock); From f0cc6ffb8ce8961db587e5072168cac0cbc25f05 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 28 Aug 2013 09:18:05 -0700 Subject: [PATCH 43/80] Revert "fs: Allow unprivileged linkat(..., AT_EMPTY_PATH) aka flink" This reverts commit bb2314b47996491bbc5add73633905c3120b6268. It wasn't necessarily wrong per se, but we're still busily discussing the exact details of this all, so I'm going to revert it for now. It's true that you can already do flink() through /proc and that flink() isn't new. But as Brad Spengler points out, some secure environments do not mount proc, and flink adds a new interface that can avoid path lookup of the source for those kinds of environments. We may re-do this (and even mark it for stable backporting back in 3.11 and possibly earlier) once the whole discussion about the interface is done. Cc: Andy Lutomirski Cc: Al Viro Cc: Oleg Nesterov Cc: Brad Spengler Signed-off-by: Linus Torvalds --- fs/namei.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 89a612e392eb..8b61d103a8a7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3671,11 +3671,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) return -EINVAL; /* - * Using empty names is equivalent to using AT_SYMLINK_FOLLOW - * on /proc/self/fd/. + * To use null names we require CAP_DAC_READ_SEARCH + * This ensures that not everyone will be able to create + * handlink using the passed filedescriptor. */ - if (flags & AT_EMPTY_PATH) + if (flags & AT_EMPTY_PATH) { + if (!capable(CAP_DAC_READ_SEARCH)) + return -ENOENT; how = LOOKUP_EMPTY; + } if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; From 347e2233b7667e336d9f671f1a52dfa3f0416e2c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 28 Aug 2013 13:35:13 -0400 Subject: [PATCH 44/80] SUNRPC: Fix memory corruption issue on 32-bit highmem systems Some architectures, such as ARM-32 do not return the same base address when you call kmap_atomic() twice on the same page. This causes problems for the memmove() call in the XDR helper routine "_shift_data_right_pages()", since it defeats the detection of overlapping memory ranges, and has been seen to corrupt memory. The fix is to distinguish between the case where we're doing an inter-page copy or not. In the former case of we know that the memory ranges cannot possibly overlap, so we can additionally micro-optimise by replacing memmove() with memcpy(). Reported-by: Mark Young Reported-by: Matt Craighead Cc: Bruce Fields Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Tested-by: Matt Craighead --- net/sunrpc/xdr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 75edcfad6e26..1504bb11e4f3 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -207,10 +207,13 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, pgfrom_base -= copy; vto = kmap_atomic(*pgto); - vfrom = kmap_atomic(*pgfrom); - memmove(vto + pgto_base, vfrom + pgfrom_base, copy); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); flush_dcache_page(*pgto); - kunmap_atomic(vfrom); kunmap_atomic(vto); } while ((len -= copy) != 0); From 9b96309c5b0b9e466773c07a5bc8b7b68fcf010a Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 23 Aug 2013 12:44:55 -0700 Subject: [PATCH 45/80] genl: Fix genl dumpit() locking. In case of genl-family with parallel ops off, dumpif() callback is expected to run under genl_lock, But commit def3117493eafd9df (genl: Allow concurrent genl callbacks.) changed this behaviour where only first dumpit() op was called under genl-lock. For subsequent dump, only nlk->cb_lock was taken. Following patch fixes it by defining locked dumpit() and done() callback which takes care of genl-locking. CC: Jesse Gross CC: Johannes Berg Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 51 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 512718adb0d5..7e0f4d199ade 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -544,6 +544,30 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc; + + genl_lock(); + rc = ops->dumpit(skb, cb); + genl_unlock(); + return rc; +} + +static int genl_lock_done(struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->done) { + genl_lock(); + rc = ops->done(cb); + genl_unlock(); + } + return rc; +} + static int genl_family_rcv_msg(struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) @@ -572,15 +596,32 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EPERM; if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = ops->dumpit, - .done = ops->done, - }; + int rc; if (ops->dumpit == NULL) return -EOPNOTSUPP; - return netlink_dump_start(net->genl_sock, skb, nlh, &c); + if (!family->parallel_ops) { + struct netlink_dump_control c = { + .data = ops, + .dump = genl_lock_dumpit, + .done = genl_lock_done, + }; + + genl_unlock(); + rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + genl_lock(); + + } else { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + + rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + } + + return rc; } if (ops->doit == NULL) From 33c6b1f6b154894321f5734e50c66621e9134e7e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 23 Aug 2013 12:45:04 -0700 Subject: [PATCH 46/80] genl: Hold reference on correct module while netlink-dump. netlink dump operations take module as parameter to hold reference for entire netlink dump duration. Currently it holds ref only on genl module which is not correct when we use ops registered to genl from another module. Following patch adds module pointer to genl_ops so that netlink can hold ref count on it. CC: Jesse Gross CC: Johannes Berg Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/genetlink.h | 20 ++++++++++++++++++-- net/netlink/genetlink.c | 20 +++++++++++--------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 93024a47e0e2..8e0b6c856a13 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -61,6 +61,7 @@ struct genl_family { struct list_head ops_list; /* private */ struct list_head family_list; /* private */ struct list_head mcast_groups; /* private */ + struct module *module; }; /** @@ -121,9 +122,24 @@ struct genl_ops { struct list_head ops_list; }; -extern int genl_register_family(struct genl_family *family); -extern int genl_register_family_with_ops(struct genl_family *family, +extern int __genl_register_family(struct genl_family *family); + +static inline int genl_register_family(struct genl_family *family) +{ + family->module = THIS_MODULE; + return __genl_register_family(family); +} + +extern int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops); + +static inline int genl_register_family_with_ops(struct genl_family *family, + struct genl_ops *ops, size_t n_ops) +{ + family->module = THIS_MODULE; + return __genl_register_family_with_ops(family, ops, n_ops); +} + extern int genl_unregister_family(struct genl_family *family); extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7e0f4d199ade..0c741cec4d0d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) EXPORT_SYMBOL(genl_unregister_ops); /** - * genl_register_family - register a generic netlink family + * __genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one @@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops); * * Return 0 on success or a negative error code. */ -int genl_register_family(struct genl_family *family) +int __genl_register_family(struct genl_family *family) { int err = -EINVAL; @@ -430,10 +430,10 @@ errout_locked: errout: return err; } -EXPORT_SYMBOL(genl_register_family); +EXPORT_SYMBOL(__genl_register_family); /** - * genl_register_family_with_ops - register a generic netlink family + * __genl_register_family_with_ops - register a generic netlink family * @family: generic netlink family * @ops: operations to be registered * @n_ops: number of elements to register @@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family); * * Return 0 on success or a negative error code. */ -int genl_register_family_with_ops(struct genl_family *family, +int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops) { int err, i; - err = genl_register_family(family); + err = __genl_register_family(family); if (err) return err; @@ -476,7 +476,7 @@ err_out: genl_unregister_family(family); return err; } -EXPORT_SYMBOL(genl_register_family_with_ops); +EXPORT_SYMBOL(__genl_register_family_with_ops); /** * genl_unregister_family - unregister generic netlink family @@ -603,22 +603,24 @@ static int genl_family_rcv_msg(struct genl_family *family, if (!family->parallel_ops) { struct netlink_dump_control c = { + .module = family->module, .data = ops, .dump = genl_lock_dumpit, .done = genl_lock_done, }; genl_unlock(); - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); genl_lock(); } else { struct netlink_dump_control c = { + .module = family->module, .dump = ops->dumpit, .done = ops->done, }; - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); } return rc; From 64c3b252e9fc8256ef7b1b99ba60492163cd06e8 Mon Sep 17 00:00:00 2001 From: Byungho An Date: Sat, 24 Aug 2013 15:31:43 +0900 Subject: [PATCH 47/80] net: stmmac: fixed the pbl setting with DT This patch fixed the pbl(programmable burst length) setting using DT. Even though the default pbl is 8, If there is no pbl property in device tree file, pbl is set 0 and it causes bandwidth degradation. Signed-off-by: Byungho An Signed-off-by: David S. Miller --- .../ethernet/stmicro/stmmac/stmmac_platform.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 03de76c7a177..1c83a44c547b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -71,14 +71,18 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, plat->force_sf_dma_mode = 1; } - dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), GFP_KERNEL); - if (!dma_cfg) - return -ENOMEM; - - plat->dma_cfg = dma_cfg; - of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl); - dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst"); - dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst"); + if (of_find_property(np, "snps,pbl", NULL)) { + dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), + GFP_KERNEL); + if (!dma_cfg) + return -ENOMEM; + plat->dma_cfg = dma_cfg; + of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl); + dma_cfg->fixed_burst = + of_property_read_bool(np, "snps,fixed-burst"); + dma_cfg->mixed_burst = + of_property_read_bool(np, "snps,mixed-burst"); + } return 0; } From 3046e2f5b79a86044ac0a29c69610d6ac6a4b882 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Sun, 25 Aug 2013 10:23:46 +0300 Subject: [PATCH 48/80] net: add cpu_relax to busy poll loop Add a cpu_relaxt to sk_busy_loop. Julie Cummings reported performance issues when hyperthreading is on. Arjan van de Ven observed that we should have a cpu_relax() in the busy poll loop. Reported-by: Julie Cummings Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- include/net/busy_poll.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8a358a2c97e6..829627d7b846 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -123,6 +123,7 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) /* local bh are disabled so it is ok to use _BH */ NET_ADD_STATS_BH(sock_net(sk), LINUX_MIB_BUSYPOLLRXPACKETS, rc); + cpu_relax(); } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && !need_resched() && !busy_loop_timeout(end_time)); From 03803a59e32453ee5737c6096a295f748f03cc49 Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Sun, 25 Aug 2013 16:02:23 -0600 Subject: [PATCH 49/80] net: usb: Add HP hs2434 device to ZLP exception table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds another entry (HP hs2434 Mobile Broadband) to the list of exceptional devices that require a zero length packet in order to function properly. This list was added in commit 844e88f0. The hs2434 is manufactured by Sierra Wireless, who also produces the MC7710, which the ZLP exception list was created for in the first place. So hopefully it is just this one producer's devices that will need this workaround. Tested on a DM1-4310NR HP notebook, which does not function without this change. Signed-off-by: Rob Gardner Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 872819851aef..25ba7eca9a13 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -400,6 +400,10 @@ static const struct usb_device_id mbim_devs[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68a2, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_zlp, }, + /* HP hs2434 Mobile Broadband Module needs ZLPs */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3f0, 0x4b1d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_zlp, + }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info, }, From 282a1dffc1b9976cdf1b0eea3f6f68fda23a7c7e Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Mon, 26 Aug 2013 11:30:55 +0800 Subject: [PATCH 50/80] net: xilinx: fix memleak decrease device_node refcount np1 in err case. Signed-off-by: Libo Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index e90e1f46121e..64b4639f43b6 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -175,6 +175,7 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) printk(KERN_WARNING "Setting MDIO clock divisor to " "default %d\n", DEFAULT_CLOCK_DIVISOR); clk_div = DEFAULT_CLOCK_DIVISOR; + of_node_put(np1); goto issue; } From 0f8f2aaaab0b0f9c13635cb02e7d19bdaa9aa1bb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:13:26 -0700 Subject: [PATCH 51/80] Add new lockref infrastructure reference implementation This introduces a new "lockref" structure that supports the concept of lockless updates of reference counts that still honor an attached spinlock. NOTE! This reference implementation is not the optimized lockless version, rather it is the fallback implementation using standard spinlocks. The actual optimized versions will be merged into 3.12, but I wanted to get the infrastructure in place and document the new interfaces. [ Also note that this particular commit is drastically cut-down minimal version of the original patch by Waiman. In order to properly credit the original author I'm marking Waiman as the author here, but in the end this patch bears little resemblance to the patch by Waiman. So blame any errors on me editing things down to the point where I can introduce the infrastructure before the merge window for 3.12 actually opens. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 71 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/linux/lockref.h diff --git a/include/linux/lockref.h b/include/linux/lockref.h new file mode 100644 index 000000000000..01233e01627a --- /dev/null +++ b/include/linux/lockref.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_LOCKREF_H +#define __LINUX_LOCKREF_H + +/* + * Locked reference counts. + * + * These are different from just plain atomic refcounts in that they + * are atomic with respect to the spinlock that goes with them. In + * particular, there can be implementations that don't actually get + * the spinlock for the common decrement/increment operations, but they + * still have to check that the operation is done semantically as if + * the spinlock had been taken (using a cmpxchg operation that covers + * both the lock and the count word, or using memory transactions, for + * example). + */ + +#include + +struct lockref { + spinlock_t lock; + unsigned int count; +}; + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +static inline void lockref_get(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count is 0 + */ +static inline int lockref_get_not_zero(struct lockref *lockref) +{ + int retval = 0; + + spin_lock(&lockref->lock); + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +static inline int lockref_put_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} + +#endif /* __LINUX_LOCKREF_H */ From 98474236f72e5a8b89c14cd7c74f0bb77a4b1a99 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:24:59 -0700 Subject: [PATCH 52/80] vfs: make the dentry cache use the lockref infrastructure This just replaces the dentry count/lock combination with the lockref structure that contains both a count and a spinlock, and does the mechanical conversion to use the lockref infrastructure. There are no semantic changes here, it's purely syntactic. The reference lockref implementation uses the spinlock exactly the same way that the old dcache code did, and the bulk of this patch is just expanding the internal "d_count" use in the dcache code to use "d_lockref.count" instead. This is purely preparation for the real change to make the reference count updates be lockless during the 3.12 merge window. [ As with the previous commit, this is a rewritten version of a concept originally from Waiman, so credit goes to him, blame for any errors goes to me. Waiman's patch had some semantic differences for taking advantage of the lockless update in dget_parent(), while this patch is intentionally a pure search-and-replace change with no semantic changes. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- fs/dcache.c | 57 +++++++++++++++++------------------------- fs/namei.c | 6 ++--- include/linux/dcache.h | 19 +++++++------- 3 files changed, 35 insertions(+), 47 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 83cfb834db03..b949af850cd6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -229,7 +229,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { - BUG_ON(dentry->d_count); + BUG_ON(dentry->d_lockref.count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -467,7 +467,7 @@ relock: } if (ref) - dentry->d_count--; + dentry->d_lockref.count--; /* * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. @@ -513,15 +513,10 @@ void dput(struct dentry *dentry) return; repeat: - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) might_sleep(); - spin_lock(&dentry->d_lock); - BUG_ON(!dentry->d_count); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) @@ -535,7 +530,7 @@ repeat: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - dentry->d_count--; + dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return; @@ -590,7 +585,7 @@ int d_invalidate(struct dentry * dentry) * We also need to leave mountpoints alone, * directory or not. */ - if (dentry->d_count > 1 && dentry->d_inode) { + if (dentry->d_lockref.count > 1 && dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { spin_unlock(&dentry->d_lock); return -EBUSY; @@ -606,14 +601,12 @@ EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ static inline void __dget_dlock(struct dentry *dentry) { - dentry->d_count++; + dentry->d_lockref.count++; } static inline void __dget(struct dentry *dentry) { - spin_lock(&dentry->d_lock); - __dget_dlock(dentry); - spin_unlock(&dentry->d_lock); + lockref_get(&dentry->d_lockref); } struct dentry *dget_parent(struct dentry *dentry) @@ -634,8 +627,8 @@ repeat: goto repeat; } rcu_read_unlock(); - BUG_ON(!ret->d_count); - ret->d_count++; + BUG_ON(!ret->d_lockref.count); + ret->d_lockref.count++; spin_unlock(&ret->d_lock); return ret; } @@ -718,7 +711,7 @@ restart: spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!dentry->d_count) { + if (!dentry->d_lockref.count) { __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -763,12 +756,8 @@ static void try_prune_one_dentry(struct dentry *dentry) /* Prune ancestors. */ dentry = parent; while (dentry) { - spin_lock(&dentry->d_lock); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } dentry = dentry_kill(dentry, 1); } } @@ -793,7 +782,7 @@ static void shrink_dentry_list(struct list_head *list) * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; @@ -913,7 +902,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry_lru_del(dentry); __d_shrink(dentry); - if (dentry->d_count != 0) { + if (dentry->d_lockref.count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -922,7 +911,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - dentry->d_count, + dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); @@ -933,7 +922,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) list_del(&dentry->d_u.d_child); } else { parent = dentry->d_parent; - parent->d_count--; + parent->d_lockref.count--; list_del(&dentry->d_u.d_child); } @@ -981,7 +970,7 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - dentry->d_count--; + dentry->d_lockref.count--; shrink_dcache_for_umount_subtree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { @@ -1147,7 +1136,7 @@ resume: * loop in shrink_dcache_parent() might not make any progress * and loop forever. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { dentry_lru_move_list(dentry, dispose); @@ -1269,7 +1258,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) smp_wmb(); dentry->d_name.name = dname; - dentry->d_count = 1; + dentry->d_lockref.count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); @@ -1970,7 +1959,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) goto next; } - dentry->d_count++; + dentry->d_lockref.count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -2069,7 +2058,7 @@ again: spin_lock(&dentry->d_lock); inode = dentry->d_inode; isdir = S_ISDIR(inode->i_mode); - if (dentry->d_count == 1) { + if (dentry->d_lockref.count == 1) { if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); @@ -2948,7 +2937,7 @@ resume: } if (!(dentry->d_flags & DCACHE_GENOCIDE)) { dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_count--; + dentry->d_lockref.count--; } spin_unlock(&dentry->d_lock); } @@ -2956,7 +2945,7 @@ resume: struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; - this_parent->d_count--; + this_parent->d_lockref.count--; } this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) diff --git a/fs/namei.c b/fs/namei.c index 8b61d103a8a7..7720fbd5277b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -536,8 +536,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) * a reference at this point. */ BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); - BUG_ON(!parent->d_count); - parent->d_count++; + BUG_ON(!parent->d_lockref.count); + parent->d_lockref.count++; spin_unlock(&dentry->d_lock); } spin_unlock(&parent->d_lock); @@ -3327,7 +3327,7 @@ void dentry_unhash(struct dentry *dentry) { shrink_dcache_parent(dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) __d_drop(dentry); spin_unlock(&dentry->d_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4a12532da8c4..efdc94434c30 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -9,6 +9,7 @@ #include #include #include +#include struct nameidata; struct path; @@ -100,6 +101,8 @@ extern unsigned int full_name_hash(const unsigned char *, unsigned int); # endif #endif +#define d_lock d_lockref.lock + struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ @@ -112,8 +115,7 @@ struct dentry { unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ /* Ref lookup also touches following */ - unsigned int d_count; /* protected by d_lock */ - spinlock_t d_lock; /* per dentry lock */ + struct lockref d_lockref; /* per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ @@ -318,7 +320,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) assert_spin_locked(&dentry->d_lock); if (!read_seqcount_retry(&dentry->d_seq, seq)) { ret = 1; - dentry->d_count++; + dentry->d_lockref.count++; } return ret; @@ -326,7 +328,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) static inline unsigned d_count(const struct dentry *dentry) { - return dentry->d_count; + return dentry->d_lockref.count; } /* validate "insecure" dentry pointer */ @@ -357,17 +359,14 @@ extern char *dentry_path(struct dentry *, char *, int); static inline struct dentry *dget_dlock(struct dentry *dentry) { if (dentry) - dentry->d_count++; + dentry->d_lockref.count++; return dentry; } static inline struct dentry *dget(struct dentry *dentry) { - if (dentry) { - spin_lock(&dentry->d_lock); - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - } + if (dentry) + lockref_get(&dentry->d_lockref); return dentry; } From 84a78a6504f5c5394a8e558702e5b54131f01d14 Mon Sep 17 00:00:00 2001 From: Nathan Zimmer Date: Wed, 28 Aug 2013 16:35:14 -0700 Subject: [PATCH 53/80] timer_list: correct the iterator for timer_list Correct an issue with /proc/timer_list reported by Holger. When reading from the proc file with a sufficiently small buffer, 2k so not really that small, there was one could get hung trying to read the file a chunk at a time. The timer_list_start function failed to account for the possibility that the offset was adjusted outside the timer_list_next. Signed-off-by: Nathan Zimmer Reported-by: Holger Hans Peter Freyther Cc: John Stultz Cc: Thomas Gleixner Cc: Berke Durak Cc: Jeff Layton Tested-by: Al Viro Cc: # 3.10.x Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timer_list.c | 43 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3bdf28323012..61ed862cdd37 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -265,10 +265,9 @@ static inline void timer_list_header(struct seq_file *m, u64 now) static int timer_list_show(struct seq_file *m, void *v) { struct timer_list_iter *iter = v; - u64 now = ktime_to_ns(ktime_get()); if (iter->cpu == -1 && !iter->second_pass) - timer_list_header(m, now); + timer_list_header(m, iter->now); else if (!iter->second_pass) print_cpu(m, iter->cpu, iter->now); #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -298,33 +297,41 @@ void sysrq_timer_list_show(void) return; } +static void *move_iter(struct timer_list_iter *iter, loff_t offset) +{ + for (; offset; offset--) { + iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); + if (iter->cpu >= nr_cpu_ids) { +#ifdef CONFIG_GENERIC_CLOCKEVENTS + if (!iter->second_pass) { + iter->cpu = -1; + iter->second_pass = true; + } else + return NULL; +#else + return NULL; +#endif + } + } + return iter; +} + static void *timer_list_start(struct seq_file *file, loff_t *offset) { struct timer_list_iter *iter = file->private; - if (!*offset) { - iter->cpu = -1; + if (!*offset) iter->now = ktime_to_ns(ktime_get()); - } else if (iter->cpu >= nr_cpu_ids) { -#ifdef CONFIG_GENERIC_CLOCKEVENTS - if (!iter->second_pass) { - iter->cpu = -1; - iter->second_pass = true; - } else - return NULL; -#else - return NULL; -#endif - } - return iter; + iter->cpu = -1; + iter->second_pass = false; + return move_iter(iter, *offset); } static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset) { struct timer_list_iter *iter = file->private; - iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); ++*offset; - return timer_list_start(file, offset); + return move_iter(iter, 1); } static void timer_list_stop(struct seq_file *seq, void *v) From aaaafb7f953c60d9c9eeb1b10ecdbe6970421b24 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 28 Aug 2013 16:35:16 -0700 Subject: [PATCH 54/80] Omnikey Cardman 4000: pull in ioctl.h in user header This file uses the ioctl helpers (_IOR/_IOW/etc...), so include ioctl.h for the definitions. Signed-off-by: Mike Frysinger Cc: Harald Welte Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/cm4000_cs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/cm4000_cs.h b/include/uapi/linux/cm4000_cs.h index bc51f77db918..1217f751a1bc 100644 --- a/include/uapi/linux/cm4000_cs.h +++ b/include/uapi/linux/cm4000_cs.h @@ -2,6 +2,7 @@ #define _UAPI_CM4000_H_ #include +#include #define MAX_ATR 33 From 368ae537e056acd3f751fa276f48423f06803922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Svenning=20S=C3=B8rensen?= Date: Wed, 28 Aug 2013 16:35:17 -0700 Subject: [PATCH 55/80] IPC: bugfix for msgrcv with msgtyp < 0 According to 'man msgrcv': "If msgtyp is less than 0, the first message of the lowest type that is less than or equal to the absolute value of msgtyp shall be received." Bug: The kernel only returns a message if its type is 1; other messages with type < abs(msgtype) will never get returned. Fix: After having traversed the list to find the first message with the lowest type, we need to actually return that message. This regression was introduced by commit daaf74cf0867 ("ipc: refactor msg list search into separate function") Signed-off-by: Svenning Soerensen Reviewed-by: Peter Hurley Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index bd60d7e159e8..9f29d9e89bac 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -839,7 +839,7 @@ static inline void free_copy(struct msg_msg *copy) static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) { - struct msg_msg *msg; + struct msg_msg *msg, *found = NULL; long count = 0; list_for_each_entry(msg, &msq->q_messages, m_list) { @@ -848,6 +848,7 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) *msgtyp, mode)) { if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { *msgtyp = msg->m_type - 1; + found = msg; } else if (mode == SEARCH_NUMBER) { if (*msgtyp == count) return msg; @@ -857,7 +858,7 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) } } - return ERR_PTR(-EAGAIN); + return found ?: ERR_PTR(-EAGAIN); } long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, From 21ea9f5ace3a7317cc3ba1fbc749758021a83136 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Wed, 28 Aug 2013 16:35:18 -0700 Subject: [PATCH 56/80] drivers/base/memory.c: fix show_mem_removable() to handle missing sections "cat /sys/devices/system/memory/memory*/removable" crashed the system. The problem is that show_mem_removable() is passing a bad pfn to is_mem_section_removable(), which causes if (!node_online(page_to_nid(page))) to blow up. Why is it passing in a bad pfn? The reason is that show_mem_removable() will loop sections_per_block times. sections_per_block is 16, but mem->section_count is 8, indicating holes in this memory block. Checking that the memory section is present before checking to see if the memory section is removable fixes the problem. harp5-sys:~ # cat /sys/devices/system/memory/memory*/removable 0 1 1 1 1 1 1 1 1 1 1 1 1 1 BUG: unable to handle kernel paging request at ffffea00c3200000 IP: [] is_pageblock_removable_nolock+0x1/0x90 PGD 83ffd4067 PUD 37bdfce067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: autofs4 binfmt_misc rdma_ucm rdma_cm iw_cm ib_addr ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_cm ib_uverbs ib_umad iw_cxgb3 cxgb3 mdio mlx4_en mlx4_ib ib_sa mlx4_core ib_mthca ib_mad ib_core fuse nls_iso8859_1 nls_cp437 vfat fat joydev loop hid_generic usbhid hid hwperf(O) numatools(O) dm_mod iTCO_wdt ipv6 iTCO_vendor_support igb i2c_i801 ioatdma i2c_algo_bit ehci_pci pcspkr lpc_ich i2c_core ehci_hcd ptp sg mfd_core dca rtc_cmos pps_core mperf button xhci_hcd sd_mod crc_t10dif usbcore usb_common scsi_dh_emc scsi_dh_hp_sw scsi_dh_alua scsi_dh_rdac scsi_dh gru(O) xvma(O) xfs crc32c libcrc32c thermal sata_nv processor piix mptsas mptscsih scsi_transport_sas mptbase megaraid_sas fan thermal_sys hwmon ext3 jbd ata_piix ahci libahci libata scsi_mod CPU: 4 PID: 5991 Comm: cat Tainted: G O 3.11.0-rc5-rja-uv+ #10 Hardware name: SGI UV2000/ROMLEY, BIOS SGI UV 2000/3000 series BIOS 01/15/2013 task: ffff88081f034580 ti: ffff880820022000 task.ti: ffff880820022000 RIP: 0010:[] [] is_pageblock_removable_nolock+0x1/0x90 RSP: 0018:ffff880820023df8 EFLAGS: 00010287 RAX: 0000000000040000 RBX: ffffea00c3200000 RCX: 0000000000000004 RDX: ffffea00c30b0000 RSI: 00000000001c0000 RDI: ffffea00c3200000 RBP: ffff880820023e38 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000001 R12: ffffea00c33c0000 R13: 0000160000000000 R14: 6db6db6db6db6db7 R15: 0000000000000001 FS: 00007ffff7fb2700(0000) GS:ffff88083fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffea00c3200000 CR3: 000000081b954000 CR4: 00000000000407e0 Call Trace: show_mem_removable+0x41/0x70 dev_attr_show+0x2a/0x60 sysfs_read_file+0xf7/0x1c0 vfs_read+0xc8/0x130 SyS_read+0x5d/0xa0 system_call_fastpath+0x16/0x1b Signed-off-by: Russ Anderson Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Yinghai Lu Reviewed-by: Yasuaki Ishimatsu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 2b7813ec6d02..ec386ee9cb22 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -141,6 +141,8 @@ static ssize_t show_mem_removable(struct device *dev, container_of(dev, struct memory_block, dev); for (i = 0; i < sections_per_block; i++) { + if (!present_section_nr(mem->start_section_nr + i)) + continue; pfn = section_nr_to_pfn(mem->start_section_nr + i); ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); } From 6f6b8951897e487ea6f77b90ea01f70a9c363770 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 28 Aug 2013 16:35:20 -0700 Subject: [PATCH 57/80] memcg: check that kmem_cache has memcg_params before accessing it If the system had a few memory groups and all of them were destroyed, memcg_limited_groups_array_size has non-zero value, but all new caches are created without memcg_params, because memcg_kmem_enabled() returns false. We try to enumirate child caches in a few places and all of them are potentially dangerous. For example my kernel is compiled with CONFIG_SLAB and it crashed when I tryed to mount a NFS share after a few experiments with kmemcg. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] do_tune_cpucache+0x8a/0xd0 PGD b942a067 PUD b999f067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: fscache(+) ip6table_filter ip6_tables iptable_filter ip_tables i2c_piix4 pcspkr virtio_net virtio_balloon i2c_core floppy CPU: 0 PID: 357 Comm: modprobe Not tainted 3.11.0-rc7+ #59 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b9f98240 ti: ffff8800ba32e000 task.ti: ffff8800ba32e000 RIP: 0010:[] [] do_tune_cpucache+0x8a/0xd0 RSP: 0018:ffff8800ba32fb70 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffff8800b9f98910 RDI: 0000000000000246 RBP: ffff8800ba32fba0 R08: 0000000000000002 R09: 0000000000000004 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000010 R13: 0000000000000008 R14: 00000000000000d0 R15: ffff8800375d0200 FS: 00007f55f1378740(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f24feba57a0 CR3: 0000000037b51000 CR4: 00000000000006f0 Call Trace: enable_cpucache+0x49/0x100 setup_cpu_cache+0x215/0x280 __kmem_cache_create+0x2fa/0x450 kmem_cache_create_memcg+0x214/0x350 kmem_cache_create+0x2b/0x30 fscache_init+0x19b/0x230 [fscache] do_one_initcall+0xfa/0x1b0 load_module+0x1c41/0x26d0 SyS_finit_module+0x86/0xb0 system_call_fastpath+0x16/0x1b Signed-off-by: Andrey Vagin Cc: Pekka Enberg Cc: Christoph Lameter Cc: Glauber Costa Cc: Joonsoo Kim Cc: Michal Hocko Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/slab.h b/mm/slab.h index 620ceeddbe1a..a535033f7e9a 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -162,6 +162,8 @@ static inline const char *cache_name(struct kmem_cache *s) static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx) { + if (!s->memcg_params) + return NULL; return s->memcg_params->memcg_caches[idx]; } From 49fa8140e487b492da24c76ac3cccad0a0ae63e4 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 28 Aug 2013 16:35:21 -0700 Subject: [PATCH 58/80] fs/ocfs2/super.c: Use bigger nodestr to accomodate 32-bit node numbers While using pacemaker/corosync, the node numbers are generated using IP address as opposed to serial node number generation. This may not fit in a 8-byte string. Use a bigger string to print the complete node number. Signed-off-by: Goldwyn Rodrigues Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 854d80955bf8..121da2dc3be8 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1022,7 +1022,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; - char nodestr[8]; + char nodestr[12]; struct ocfs2_blockcheck_stats stats; trace_ocfs2_fill_super(sb, data, silent); From b22ce2785d97423846206cceec4efee0c4afd980 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 28 Aug 2013 17:33:37 -0400 Subject: [PATCH 59/80] workqueue: cond_resched() after processing each work item If !PREEMPT, a kworker running work items back to back can hog CPU. This becomes dangerous when a self-requeueing work item which is waiting for something to happen races against stop_machine. Such self-requeueing work item would requeue itself indefinitely hogging the kworker and CPU it's running on while stop_machine would wait for that CPU to enter stop_machine while preventing anything else from happening on all other CPUs. The two would deadlock. Jamie Liu reports that this deadlock scenario exists around scsi_requeue_run_queue() and libata port multiplier support, where one port may exclude command processing from other ports. With the right timing, scsi_requeue_run_queue() can end up requeueing itself trying to execute an IO which is asked to be retried while another device has an exclusive access, which in turn can't make forward progress due to stop_machine. Fix it by invoking cond_resched() after executing each work item. Signed-off-by: Tejun Heo Reported-by: Jamie Liu References: http://thread.gmane.org/gmane.linux.kernel/1552567 Cc: stable@vger.kernel.org -- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) --- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7f5d4be22034..e93f7b9067d8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2201,6 +2201,15 @@ __acquires(&pool->lock) dump_stack(); } + /* + * The following prevents a kworker from hogging CPU on !PREEMPT + * kernels, where a requeueing work item waiting for something to + * happen could deadlock with stop_machine as such work item could + * indefinitely requeue itself while all other CPUs are trapped in + * stop_machine. + */ + cond_resched(); + spin_lock_irq(&pool->lock); /* clear cpu intensive status */ From bb78a92f47696b2da49f2692b6a9fa56d07c444a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 28 Aug 2013 16:31:23 -0700 Subject: [PATCH 60/80] cgroup: fix rmdir EBUSY regression in 3.11 On 3.11-rc we are seeing cgroup directories left behind when they should have been removed. Here's a trivial reproducer: cd /sys/fs/cgroup/memory mkdir parent parent/child; rmdir parent/child parent rmdir: failed to remove `parent': Device or resource busy It's because cgroup_destroy_locked() (step 1 of destruction) leaves cgroup on parent's children list, letting cgroup_offline_fn() (step 2 of destruction) remove it; but step 2 is run by work queue, which may not yet have removed the children when parent destruction checks the list. Fix that by checking through a non-empty list of children: if every one of them has already been marked CGRP_DEAD, then it's safe to proceed: those children are invisible to userspace, and should not obstruct rmdir. (I didn't see any reason to keep the cgrp->children checks under the unrelated css_set_lock, so moved them out.) tj: Flattened nested ifs a bit and updated comment so that it's correct on both for-3.11-fixes and for-3.12. Signed-off-by: Hugh Dickins Signed-off-by: Tejun Heo --- kernel/cgroup.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 781845a013ab..e91963302c0d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4480,6 +4480,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) struct dentry *d = cgrp->dentry; struct cgroup_event *event, *tmp; struct cgroup_subsys *ss; + struct cgroup *child; bool empty; lockdep_assert_held(&d->d_inode->i_mutex); @@ -4490,11 +4491,27 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) * @cgrp from being removed while __put_css_set() is in progress. */ read_lock(&css_set_lock); - empty = list_empty(&cgrp->cset_links) && list_empty(&cgrp->children); + empty = list_empty(&cgrp->cset_links); read_unlock(&css_set_lock); if (!empty) return -EBUSY; + /* + * Make sure there's no live children. We can't test ->children + * emptiness as dead children linger on it while being destroyed; + * otherwise, "rmdir parent/child parent" may fail with -EBUSY. + */ + empty = true; + rcu_read_lock(); + list_for_each_entry_rcu(child, &cgrp->children, sibling) { + empty = cgroup_is_dead(child); + if (!empty) + break; + } + rcu_read_unlock(); + if (!empty) + return -EBUSY; + /* * Block new css_tryget() by killing css refcnts. cgroup core * guarantees that, by the time ->css_offline() is invoked, no new From fa46c7984092f3dbdbb3bcd7338d81a1168d9d2b Mon Sep 17 00:00:00 2001 From: Mischa Jonker Date: Thu, 29 Aug 2013 09:29:40 -0700 Subject: [PATCH 61/80] Input: i8042 - disable the driver on ARC platforms It causes crashes when enabled, and we don't have such a peripheral anyway on ARC platforms. Signed-off-by: Mischa Jonker Signed-off-by: Dmitry Torokhov --- drivers/input/serio/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 94c17c28d268..1e691a3a79cb 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -22,7 +22,8 @@ config SERIO_I8042 tristate "i8042 PC Keyboard controller" if EXPERT || !X86 default y depends on !PARISC && (!ARM || ARCH_SHARK || FOOTBRIDGE_HOST) && \ - (!SUPERH || SH_CAYMAN) && !M68K && !BLACKFIN && !S390 + (!SUPERH || SH_CAYMAN) && !M68K && !BLACKFIN && !S390 && \ + !ARC help i8042 is the chip over which the standard AT keyboard and PS/2 mouse are connected to the computer. If you use these devices, From 29eb51a728f9a67a1e1140490fa188561de56588 Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Tue, 6 Aug 2013 13:37:13 +0800 Subject: [PATCH 62/80] irqchip: sirf: move from legacy mode to linear irqdomain the series of patches for irqdomain core in 3.11 has broken sirf irq which uses legacy mapping. all users fail in the new kernel while setupping irq. this patch moves to linear irqdomain and drop old legacy irqdomain codes since we don't need it any more, and at the same time, it also fixes the broken interrupts of sirfsoc in 3.11. on the other hand, we actually only have 64 interrupt sources for prima2 and atlas6, but there are 128 interrupt souces for marco which uses GIC. in the legacy codes, sirf gpio also uses legacy irqdomain, so to make gpio interrupt mapping not depend on the prima2/atlas6/marco an use unified marco,we enlarge prima2/atlas6 interrupt number to 128. here we don't need this workaround any more as sirf gpio also moved to linear mode before. so we move SIRFSOC_NUM_IRQS back to 64 too. Signed-off-by: Barry Song Signed-off-by: Olof Johansson --- drivers/irqchip/irq-sirfsoc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/irq-sirfsoc.c b/drivers/irqchip/irq-sirfsoc.c index 69ea44ebcf61..4851afae38dc 100644 --- a/drivers/irqchip/irq-sirfsoc.c +++ b/drivers/irqchip/irq-sirfsoc.c @@ -23,7 +23,7 @@ #define SIRFSOC_INT_RISC_LEVEL1 0x0024 #define SIRFSOC_INIT_IRQ_ID 0x0038 -#define SIRFSOC_NUM_IRQS 128 +#define SIRFSOC_NUM_IRQS 64 static struct irq_domain *sirfsoc_irqdomain; @@ -32,15 +32,18 @@ sirfsoc_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num) { struct irq_chip_generic *gc; struct irq_chip_type *ct; + int ret; + unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; - gc = irq_alloc_generic_chip("SIRFINTC", 1, irq_start, base, handle_level_irq); + ret = irq_alloc_domain_generic_chips(sirfsoc_irqdomain, num, 1, "irq_sirfsoc", + handle_level_irq, clr, 0, IRQ_GC_INIT_MASK_CACHE); + + gc = irq_get_domain_generic_chip(sirfsoc_irqdomain, irq_start); + gc->reg_base = base; ct = gc->chip_types; - ct->chip.irq_mask = irq_gc_mask_clr_bit; ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->regs.mask = SIRFSOC_INT_RISC_MASK0; - - irq_setup_generic_chip(gc, IRQ_MSK(num), IRQ_GC_INIT_MASK_CACHE, IRQ_NOREQUEST, 0); } static asmlinkage void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs) @@ -60,9 +63,8 @@ static int __init sirfsoc_irq_init(struct device_node *np, struct device_node *p if (!base) panic("unable to map intc cpu registers\n"); - /* using legacy because irqchip_generic does not work with linear */ - sirfsoc_irqdomain = irq_domain_add_legacy(np, SIRFSOC_NUM_IRQS, 0, 0, - &irq_domain_simple_ops, base); + sirfsoc_irqdomain = irq_domain_add_linear(np, SIRFSOC_NUM_IRQS, + &irq_generic_chip_ops, base); sirfsoc_alloc_gc(base, 0, 32); sirfsoc_alloc_gc(base + 4, 32, SIRFSOC_NUM_IRQS - 32); From f8ab658b5da3fd11893cad085e0e21b67987c10b Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Tue, 6 Aug 2013 13:37:14 +0800 Subject: [PATCH 63/80] arm: prima2: drop nr_irqs in mach as we moved to linear irqdomain we don't need nr_irqs in machine any more after we move to linear irqdomain for sirfsoc irqchip, so drop them. Signed-off-by: Barry Song Signed-off-by: Olof Johansson --- arch/arm/mach-prima2/common.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c index 2c70f74fed5d..e110b6d4ae8c 100644 --- a/arch/arm/mach-prima2/common.c +++ b/arch/arm/mach-prima2/common.c @@ -42,7 +42,6 @@ static const char *atlas6_dt_match[] __initdata = { DT_MACHINE_START(ATLAS6_DT, "Generic ATLAS6 (Flattened Device Tree)") /* Maintainer: Barry Song */ - .nr_irqs = 128, .map_io = sirfsoc_map_io, .init_time = sirfsoc_init_time, .init_late = sirfsoc_init_late, @@ -59,7 +58,6 @@ static const char *prima2_dt_match[] __initdata = { DT_MACHINE_START(PRIMA2_DT, "Generic PRIMA2 (Flattened Device Tree)") /* Maintainer: Barry Song */ - .nr_irqs = 128, .map_io = sirfsoc_map_io, .init_time = sirfsoc_init_time, .dma_zone_size = SZ_256M, From c7781a6e3c4a9a17e144ec2db00ebfea327bd627 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Tue, 27 Aug 2013 12:20:40 +0400 Subject: [PATCH 64/80] tcp: initialize rcv_tstamp for restored sockets u32 rcv_tstamp; /* timestamp of last received ACK */ Its value used in tcp_retransmit_timer, which closes socket if the last ack was received more then TCP_RTO_MAX ago. Currently rcv_tstamp is initialized to zero and if tcp_retransmit_timer is called before receiving a first ack, the connection is closed. This patch initializes rcv_tstamp to a timestamp, when a socket was restored. Cc: Pavel Emelyanov Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Reported-by: Cyrill Gorcunov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 92fde8d1aa82..e2972993c671 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2814,6 +2814,8 @@ void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; + else + tp->rcv_tstamp = tcp_time_stamp; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; From e3e12028315749b7fa2edbc37328e5847be9ede9 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Tue, 27 Aug 2013 12:21:55 +0400 Subject: [PATCH 65/80] tcp: don't apply tsoffset if rcv_tsecr is zero The zero value means that tsecr is not valid, so it's a special case. tsoffset is used to customize tcp_time_stamp for one socket. tsoffset is usually zero, it's used when a socket was moved from one host to another host. Currently this issue affects logic of tcp_rcv_rtt_measure_ts. Due to incorrect value of rcv_tsecr, tcp_rcv_rtt_measure_ts sets rto to TCP_RTO_MAX. Cc: Pavel Emelyanov Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Reported-by: Cyrill Gorcunov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28af45abe062..3ca2139a130b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3535,7 +3535,10 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + if (*ptr) + tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + else + tp->rx_opt.rcv_tsecr = 0; return true; } return false; @@ -3560,7 +3563,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, } tcp_parse_options(skb, &tp->rx_opt, 1, NULL); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; return true; @@ -5316,7 +5319,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, 0, &foc); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; if (th->ack) { From c27c9322d015dc1d9dfdf31724fca71c0476c4d1 Mon Sep 17 00:00:00 2001 From: Chris Clark Date: Tue, 27 Aug 2013 12:02:15 -0600 Subject: [PATCH 66/80] ipv4: sendto/hdrincl: don't use destination address found in header ipv4: raw_sendmsg: don't use header's destination address A sendto() regression was bisected and found to start with commit f8126f1d5136be1 (ipv4: Adjust semantics of rt->rt_gateway.) The problem is that it tries to ARP-lookup the constructed packet's destination address rather than the explicitly provided address. Fix this using FLOWI_FLAG_KNOWN_NH so that given nexthop is used. cf. commit 2ad5b9e4bd314fc685086b99e90e5de3bc59e26b Reported-by: Chris Clark Bisected-by: Chris Clark Tested-by: Chris Clark Suggested-by: Julian Anastasov Signed-off-by: Chris Clark Signed-off-by: David S. Miller --- net/ipv4/raw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dd44e0ab600c..61e60d67adca 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -571,7 +571,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | + (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); if (!inet->hdrincl) { From 1f324e38870cc09659cf23bc626f1b8869e201f2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 28 Aug 2013 01:07:25 +0200 Subject: [PATCH 67/80] ipv6: Don't depend on per socket memory for neighbour discovery messages Allocating skbs when sending out neighbour discovery messages currently uses sock_alloc_send_skb() based on a per net namespace socket and thus share a socket wmem buffer space. If a netdevice is temporarily unable to transmit due to carrier loss or for other reasons, the queued up ndisc messages will cosnume all of the wmem space and will thus prevent from any more skbs to be allocated even for netdevices that are able to transmit packets. The number of neighbour discovery messages sent is very limited, simply use alloc_skb() and don't depend on any socket wmem space any longer. This patch has orginally been posted by Eric Dumazet in a modified form. Signed-off-by: Thomas Graf Cc: Eric Dumazet Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 04d31c2fbef1..5cb98df966c2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -370,16 +370,12 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; - int err; - skb = sock_alloc_send_skb(sk, - hlen + sizeof(struct ipv6hdr) + len + tlen, - 1, &err); + skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", - __func__, err); + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", + __func__); return NULL; } From 77fa4cbd5fa389e28419bbe8ac491b5fdd54840d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2013 23:50:23 +0300 Subject: [PATCH 68/80] drm/i915: ivb: fix edp voltage swing reg val Fix the typo introduced in commit 1a2eb4604b85c5efb343da8a4dcf41288fcfca85 Author: Keith Packard Date: Wed Nov 16 16:26:07 2011 -0800 drm/i915: Hook up Ivybridge eDP This fixes eDP link-training failures and cases where all voltage swing /pre-emphasis levels were tried and failed during clock recovery and - as a fallback - we go on to do channel equalization with the last voltage swing/pre-emphasis level which will succeed. Both issues can lead to a blank screen. v2: - improve commit message CC: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64880 Tested-by: Jeremy Moles Signed-off-by: Imre Deak Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 53cddd985406..342f1f336168 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4440,7 +4440,7 @@ #define EDP_LINK_TRAIN_600MV_0DB_IVB (0x30 <<22) #define EDP_LINK_TRAIN_600MV_3_5DB_IVB (0x36 <<22) #define EDP_LINK_TRAIN_800MV_0DB_IVB (0x38 <<22) -#define EDP_LINK_TRAIN_800MV_3_5DB_IVB (0x33 <<22) +#define EDP_LINK_TRAIN_800MV_3_5DB_IVB (0x3e <<22) /* legacy values */ #define EDP_LINK_TRAIN_500MV_0DB_IVB (0x00 <<22) From 6e4dcff3adbf25acb87e74500a58e3c07bdec40f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 29 Aug 2013 02:32:53 +0200 Subject: [PATCH 69/80] drm/vmwgfx: Split GMR2_REMAP commands if they are to large This fixes the piglit test texturing/max-texture-size causing the VM to die due to a too large SVGA command. Signed-off-by: Jakob Bornecrantz Reviewed-by: Biran Paul Reviewed-by: Zack Rusin Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c | 58 +++++++++++++++++++---------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c index 3751730764a5..1a0bf07fe54b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c @@ -29,7 +29,9 @@ #include #include -#define VMW_PPN_SIZE sizeof(unsigned long) +#define VMW_PPN_SIZE (sizeof(unsigned long)) +/* A future safe maximum remap size. */ +#define VMW_PPN_PER_REMAP ((31 * 1024) / VMW_PPN_SIZE) static int vmw_gmr2_bind(struct vmw_private *dev_priv, struct page *pages[], @@ -38,43 +40,61 @@ static int vmw_gmr2_bind(struct vmw_private *dev_priv, { SVGAFifoCmdDefineGMR2 define_cmd; SVGAFifoCmdRemapGMR2 remap_cmd; - uint32_t define_size = sizeof(define_cmd) + 4; - uint32_t remap_size = VMW_PPN_SIZE * num_pages + sizeof(remap_cmd) + 4; uint32_t *cmd; uint32_t *cmd_orig; + uint32_t define_size = sizeof(define_cmd) + sizeof(*cmd); + uint32_t remap_num = num_pages / VMW_PPN_PER_REMAP + ((num_pages % VMW_PPN_PER_REMAP) > 0); + uint32_t remap_size = VMW_PPN_SIZE * num_pages + (sizeof(remap_cmd) + sizeof(*cmd)) * remap_num; + uint32_t remap_pos = 0; + uint32_t cmd_size = define_size + remap_size; uint32_t i; - cmd_orig = cmd = vmw_fifo_reserve(dev_priv, define_size + remap_size); + cmd_orig = cmd = vmw_fifo_reserve(dev_priv, cmd_size); if (unlikely(cmd == NULL)) return -ENOMEM; define_cmd.gmrId = gmr_id; define_cmd.numPages = num_pages; + *cmd++ = SVGA_CMD_DEFINE_GMR2; + memcpy(cmd, &define_cmd, sizeof(define_cmd)); + cmd += sizeof(define_cmd) / sizeof(*cmd); + + /* + * Need to split the command if there are too many + * pages that goes into the gmr. + */ + remap_cmd.gmrId = gmr_id; remap_cmd.flags = (VMW_PPN_SIZE > sizeof(*cmd)) ? SVGA_REMAP_GMR2_PPN64 : SVGA_REMAP_GMR2_PPN32; - remap_cmd.offsetPages = 0; - remap_cmd.numPages = num_pages; - *cmd++ = SVGA_CMD_DEFINE_GMR2; - memcpy(cmd, &define_cmd, sizeof(define_cmd)); - cmd += sizeof(define_cmd) / sizeof(uint32); + while (num_pages > 0) { + unsigned long nr = min(num_pages, (unsigned long)VMW_PPN_PER_REMAP); - *cmd++ = SVGA_CMD_REMAP_GMR2; - memcpy(cmd, &remap_cmd, sizeof(remap_cmd)); - cmd += sizeof(remap_cmd) / sizeof(uint32); + remap_cmd.offsetPages = remap_pos; + remap_cmd.numPages = nr; - for (i = 0; i < num_pages; ++i) { - if (VMW_PPN_SIZE <= 4) - *cmd = page_to_pfn(*pages++); - else - *((uint64_t *)cmd) = page_to_pfn(*pages++); + *cmd++ = SVGA_CMD_REMAP_GMR2; + memcpy(cmd, &remap_cmd, sizeof(remap_cmd)); + cmd += sizeof(remap_cmd) / sizeof(*cmd); - cmd += VMW_PPN_SIZE / sizeof(*cmd); + for (i = 0; i < nr; ++i) { + if (VMW_PPN_SIZE <= 4) + *cmd = page_to_pfn(*pages++); + else + *((uint64_t *)cmd) = page_to_pfn(*pages++); + + cmd += VMW_PPN_SIZE / sizeof(*cmd); + } + + num_pages -= nr; + remap_pos += nr; } - vmw_fifo_commit(dev_priv, define_size + remap_size); + BUG_ON(cmd != cmd_orig + cmd_size / sizeof(*cmd)); + + vmw_fifo_commit(dev_priv, cmd_size); return 0; } From cc0fdd802859eaeb00e1c87dbb655594bed2844c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 30 Aug 2013 17:28:17 +0200 Subject: [PATCH 70/80] bridge: separate querier and query timer into IGMP/IPv4 and MLD/IPv6 ones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we would still potentially suffer multicast packet loss if there is just either an IGMP or an MLD querier: For the former case, we would possibly drop IPv6 multicast packets, for the latter IPv4 ones. This is because we are currently assuming that if either an IGMP or MLD querier is present that the other one is present, too. This patch makes the behaviour and fix added in "bridge: disable snooping if there is no querier" (b00589af3b04) to also work if there is either just an IGMP or an MLD querier on the link: It refines the deactivation of the snooping to be protocol specific by using separate timers for the snooped IGMP and MLD queries as well as separate timers for our internal IGMP and MLD queriers. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_device.c | 2 +- net/bridge/br_input.c | 2 +- net/bridge/br_mdb.c | 14 +- net/bridge/br_multicast.c | 262 +++++++++++++++++++++++++++----------- net/bridge/br_private.h | 57 +++++++-- 5 files changed, 242 insertions(+), 95 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 69363bd37f64..89659d4ed1f9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -71,7 +71,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br)) + br_multicast_querier_exists(br, eth_hdr(skb))) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb, false); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8c561c0aa636..a2fd37ec35f7 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -102,7 +102,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br)) { + br_multicast_querier_exists(br, eth_hdr(skb))) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 0daae3ec2355..6319c4333c39 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -414,16 +414,20 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; - if (timer_pending(&br->multicast_querier_timer)) - return -EBUSY; - ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) + if (ip.proto == htons(ETH_P_IP)) { + if (timer_pending(&br->ip4_querier.timer)) + return -EBUSY; + ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - else + } else { + if (timer_pending(&br->ip6_querier.timer)) + return -EBUSY; + ip.u.ip6 = entry->addr.u.ip6; #endif + } spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 08e576ada0b2..9d1d0e66c357 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -33,7 +33,8 @@ #include "br_private.h" -static void br_multicast_start_querier(struct net_bridge *br); +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_query *query); unsigned int br_mdb_rehash_seq; static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) @@ -755,20 +756,35 @@ static void br_multicast_local_router_expired(unsigned long data) { } -static void br_multicast_querier_expired(unsigned long data) +static void br_multicast_querier_expired(struct net_bridge *br, + struct bridge_mcast_query *query) { - struct net_bridge *br = (void *)data; - spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || br->multicast_disabled) goto out; - br_multicast_start_querier(br); + br_multicast_start_querier(br, query); out: spin_unlock(&br->multicast_lock); } +static void br_ip4_multicast_querier_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_querier_expired(br, &br->ip4_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_querier_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_querier_expired(br, &br->ip6_query); +} +#endif + static void __br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *ip) @@ -789,37 +805,45 @@ static void __br_multicast_send_query(struct net_bridge *br, } static void br_multicast_send_query(struct net_bridge *br, - struct net_bridge_port *port, u32 sent) + struct net_bridge_port *port, + struct bridge_mcast_query *query) { unsigned long time; struct br_ip br_group; + struct bridge_mcast_querier *querier = NULL; if (!netif_running(br->dev) || br->multicast_disabled || - !br->multicast_querier || - timer_pending(&br->multicast_querier_timer)) + !br->multicast_querier) return; memset(&br_group.u, 0, sizeof(br_group.u)); - br_group.proto = htons(ETH_P_IP); - __br_multicast_send_query(br, port, &br_group); - + if (port ? (query == &port->ip4_query) : + (query == &br->ip4_query)) { + querier = &br->ip4_querier; + br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) - br_group.proto = htons(ETH_P_IPV6); - __br_multicast_send_query(br, port, &br_group); + } else { + querier = &br->ip6_querier; + br_group.proto = htons(ETH_P_IPV6); #endif + } + + if (!querier || timer_pending(&querier->timer)) + return; + + __br_multicast_send_query(br, port, &br_group); time = jiffies; - time += sent < br->multicast_startup_query_count ? + time += query->startup_sent < br->multicast_startup_query_count ? br->multicast_startup_query_interval : br->multicast_query_interval; - mod_timer(port ? &port->multicast_query_timer : - &br->multicast_query_timer, time); + mod_timer(&query->timer, time); } -static void br_multicast_port_query_expired(unsigned long data) +static void br_multicast_port_query_expired(struct net_bridge_port *port, + struct bridge_mcast_query *query) { - struct net_bridge_port *port = (void *)data; struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); @@ -827,25 +851,43 @@ static void br_multicast_port_query_expired(unsigned long data) port->state == BR_STATE_BLOCKING) goto out; - if (port->multicast_startup_queries_sent < - br->multicast_startup_query_count) - port->multicast_startup_queries_sent++; + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; - br_multicast_send_query(port->br, port, - port->multicast_startup_queries_sent); + br_multicast_send_query(port->br, port, query); out: spin_unlock(&br->multicast_lock); } +static void br_ip4_multicast_port_query_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + + br_multicast_port_query_expired(port, &port->ip4_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_port_query_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + + br_multicast_port_query_expired(port, &port->ip6_query); +} +#endif + void br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = 1; setup_timer(&port->multicast_router_timer, br_multicast_router_expired, (unsigned long)port); - setup_timer(&port->multicast_query_timer, - br_multicast_port_query_expired, (unsigned long)port); + setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired, + (unsigned long)port); +#if IS_ENABLED(CONFIG_IPV6) + setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired, + (unsigned long)port); +#endif } void br_multicast_del_port(struct net_bridge_port *port) @@ -853,13 +895,13 @@ void br_multicast_del_port(struct net_bridge_port *port) del_timer_sync(&port->multicast_router_timer); } -static void __br_multicast_enable_port(struct net_bridge_port *port) +static void br_multicast_enable(struct bridge_mcast_query *query) { - port->multicast_startup_queries_sent = 0; + query->startup_sent = 0; - if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 || - del_timer(&port->multicast_query_timer)) - mod_timer(&port->multicast_query_timer, jiffies); + if (try_to_del_timer_sync(&query->timer) >= 0 || + del_timer(&query->timer)) + mod_timer(&query->timer, jiffies); } void br_multicast_enable_port(struct net_bridge_port *port) @@ -870,7 +912,10 @@ void br_multicast_enable_port(struct net_bridge_port *port) if (br->multicast_disabled || !netif_running(br->dev)) goto out; - __br_multicast_enable_port(port); + br_multicast_enable(&port->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + br_multicast_enable(&port->ip6_query); +#endif out: spin_unlock(&br->multicast_lock); @@ -889,7 +934,10 @@ void br_multicast_disable_port(struct net_bridge_port *port) if (!hlist_unhashed(&port->rlist)) hlist_del_init_rcu(&port->rlist); del_timer(&port->multicast_router_timer); - del_timer(&port->multicast_query_timer); + del_timer(&port->ip4_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer(&port->ip6_query.timer); +#endif spin_unlock(&br->multicast_lock); } @@ -1014,14 +1062,15 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif -static void br_multicast_update_querier_timer(struct net_bridge *br, - unsigned long max_delay) +static void +br_multicast_update_querier_timer(struct net_bridge *br, + struct bridge_mcast_querier *querier, + unsigned long max_delay) { - if (!timer_pending(&br->multicast_querier_timer)) - br->multicast_querier_delay_time = jiffies + max_delay; + if (!timer_pending(&querier->timer)) + querier->delay_time = jiffies + max_delay; - mod_timer(&br->multicast_querier_timer, - jiffies + br->multicast_querier_interval); + mod_timer(&querier->timer, jiffies + br->multicast_querier_interval); } /* @@ -1074,12 +1123,13 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, + struct bridge_mcast_querier *querier, int saddr, unsigned long max_delay) { if (saddr) - br_multicast_update_querier_timer(br, max_delay); - else if (timer_pending(&br->multicast_querier_timer)) + br_multicast_update_querier_timer(br, querier, max_delay); + else if (timer_pending(&querier->timer)) return; br_multicast_mark_router(br, port); @@ -1129,7 +1179,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } - br_multicast_query_received(br, port, !!iph->saddr, max_delay); + br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, + max_delay); if (!group) goto out; @@ -1206,8 +1257,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } - br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr), - max_delay); + br_multicast_query_received(br, port, &br->ip6_querier, + !ipv6_addr_any(&ip6h->saddr), max_delay); if (!group) goto out; @@ -1244,7 +1295,9 @@ out: static void br_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, + struct bridge_mcast_querier *querier, + struct bridge_mcast_query *query) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -1255,7 +1308,7 @@ static void br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED) || - timer_pending(&br->multicast_querier_timer)) + timer_pending(&querier->timer)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1263,14 +1316,13 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; - if (br->multicast_querier && - !timer_pending(&br->multicast_querier_timer)) { + if (br->multicast_querier) { __br_multicast_send_query(br, port, &mp->addr); time = jiffies + br->multicast_last_member_count * br->multicast_last_member_interval; - mod_timer(port ? &port->multicast_query_timer : - &br->multicast_query_timer, time); + + mod_timer(&query->timer, time); for (p = mlock_dereference(mp->ports, br); p != NULL; @@ -1323,7 +1375,6 @@ static void br_multicast_leave_group(struct net_bridge *br, mod_timer(&mp->timer, time); } } - out: spin_unlock(&br->multicast_lock); } @@ -1334,6 +1385,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; + struct bridge_mcast_query *query = port ? &port->ip4_query : + &br->ip4_query; if (ipv4_is_local_multicast(group)) return; @@ -1342,7 +1395,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, br_group.proto = htons(ETH_P_IP); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group); + br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query); } #if IS_ENABLED(CONFIG_IPV6) @@ -1352,6 +1405,9 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; + struct bridge_mcast_query *query = port ? &port->ip6_query : + &br->ip6_query; + if (!ipv6_is_transient_multicast(group)) return; @@ -1360,7 +1416,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group); + br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query); } #endif @@ -1622,20 +1678,33 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, return 0; } -static void br_multicast_query_expired(unsigned long data) +static void br_multicast_query_expired(struct net_bridge *br, + struct bridge_mcast_query *query) +{ + spin_lock(&br->multicast_lock); + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; + + br_multicast_send_query(br, NULL, query); + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - spin_lock(&br->multicast_lock); - if (br->multicast_startup_queries_sent < - br->multicast_startup_query_count) - br->multicast_startup_queries_sent++; - - br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent); - - spin_unlock(&br->multicast_lock); + br_multicast_query_expired(br, &br->ip4_query); } +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_query_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_query_expired(br, &br->ip6_query); +} +#endif + void br_multicast_init(struct net_bridge *br) { br->hash_elasticity = 4; @@ -1654,25 +1723,43 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; - br->multicast_querier_delay_time = 0; + br->ip4_querier.delay_time = 0; +#if IS_ENABLED(CONFIG_IPV6) + br->ip6_querier.delay_time = 0; +#endif spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); - setup_timer(&br->multicast_querier_timer, - br_multicast_querier_expired, (unsigned long)br); - setup_timer(&br->multicast_query_timer, br_multicast_query_expired, + setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired, (unsigned long)br); + setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired, + (unsigned long)br); +#if IS_ENABLED(CONFIG_IPV6) + setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired, + (unsigned long)br); + setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired, + (unsigned long)br); +#endif } -void br_multicast_open(struct net_bridge *br) +static void __br_multicast_open(struct net_bridge *br, + struct bridge_mcast_query *query) { - br->multicast_startup_queries_sent = 0; + query->startup_sent = 0; if (br->multicast_disabled) return; - mod_timer(&br->multicast_query_timer, jiffies); + mod_timer(&query->timer, jiffies); +} + +void br_multicast_open(struct net_bridge *br) +{ + __br_multicast_open(br, &br->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + __br_multicast_open(br, &br->ip6_query); +#endif } void br_multicast_stop(struct net_bridge *br) @@ -1684,8 +1771,12 @@ void br_multicast_stop(struct net_bridge *br) int i; del_timer_sync(&br->multicast_router_timer); - del_timer_sync(&br->multicast_querier_timer); - del_timer_sync(&br->multicast_query_timer); + del_timer_sync(&br->ip4_querier.timer); + del_timer_sync(&br->ip4_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer_sync(&br->ip6_querier.timer); + del_timer_sync(&br->ip6_query.timer); +#endif spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); @@ -1788,18 +1879,24 @@ unlock: return err; } -static void br_multicast_start_querier(struct net_bridge *br) +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_query *query) { struct net_bridge_port *port; - br_multicast_open(br); + __br_multicast_open(br, query); list_for_each_entry(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; - __br_multicast_enable_port(port); + if (query == &br->ip4_query) + br_multicast_enable(&port->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + else + br_multicast_enable(&port->ip6_query); +#endif } } @@ -1834,7 +1931,10 @@ rollback: goto rollback; } - br_multicast_start_querier(br); + br_multicast_start_querier(br, &br->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + br_multicast_start_querier(br, &br->ip6_query); +#endif unlock: spin_unlock_bh(&br->multicast_lock); @@ -1857,10 +1957,18 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) goto unlock; max_delay = br->multicast_query_response_interval; - if (!timer_pending(&br->multicast_querier_timer)) - br->multicast_querier_delay_time = jiffies + max_delay; - br_multicast_start_querier(br); + if (!timer_pending(&br->ip4_querier.timer)) + br->ip4_querier.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip4_query); + +#if IS_ENABLED(CONFIG_IPV6) + if (!timer_pending(&br->ip6_querier.timer)) + br->ip6_querier.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip6_query); +#endif unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2f7da41851bf..263ba9034468 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -66,6 +66,20 @@ struct br_ip __u16 vid; }; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +/* our own querier */ +struct bridge_mcast_query { + struct timer_list timer; + u32 startup_sent; +}; + +/* other querier */ +struct bridge_mcast_querier { + struct timer_list timer; + unsigned long delay_time; +}; +#endif + struct net_port_vlans { u16 port_idx; u16 pvid; @@ -162,10 +176,12 @@ struct net_bridge_port #define BR_FLOOD 0x00000040 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - u32 multicast_startup_queries_sent; + struct bridge_mcast_query ip4_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_query ip6_query; +#endif /* IS_ENABLED(CONFIG_IPV6) */ unsigned char multicast_router; struct timer_list multicast_router_timer; - struct timer_list multicast_query_timer; struct hlist_head mglist; struct hlist_node rlist; #endif @@ -258,7 +274,6 @@ struct net_bridge u32 hash_max; u32 multicast_last_member_count; - u32 multicast_startup_queries_sent; u32 multicast_startup_query_count; unsigned long multicast_last_member_interval; @@ -267,15 +282,18 @@ struct net_bridge unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; - unsigned long multicast_querier_delay_time; spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; struct hlist_head router_list; struct timer_list multicast_router_timer; - struct timer_list multicast_querier_timer; - struct timer_list multicast_query_timer; + struct bridge_mcast_querier ip4_querier; + struct bridge_mcast_query ip4_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_querier ip6_querier; + struct bridge_mcast_query ip6_query; +#endif /* IS_ENABLED(CONFIG_IPV6) */ #endif struct timer_list hello_timer; @@ -503,11 +521,27 @@ static inline bool br_multicast_is_router(struct net_bridge *br) timer_pending(&br->multicast_router_timer)); } -static inline bool br_multicast_querier_exists(struct net_bridge *br) +static inline bool +__br_multicast_querier_exists(struct net_bridge *br, + struct bridge_mcast_querier *querier) { - return time_is_before_jiffies(br->multicast_querier_delay_time) && - (br->multicast_querier || - timer_pending(&br->multicast_querier_timer)); + return time_is_before_jiffies(querier->delay_time) && + (br->multicast_querier || timer_pending(&querier->timer)); +} + +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) +{ + switch (eth->h_proto) { + case (htons(ETH_P_IP)): + return __br_multicast_querier_exists(br, &br->ip4_querier); +#if IS_ENABLED(CONFIG_IPV6) + case (htons(ETH_P_IPV6)): + return __br_multicast_querier_exists(br, &br->ip6_querier); +#endif + default: + return false; + } } #else static inline int br_multicast_rcv(struct net_bridge *br, @@ -565,7 +599,8 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } -static inline bool br_multicast_querier_exists(struct net_bridge *br) +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) { return false; } From eb8895debe1baba41fcb62c78a16f0c63c21662a Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Tue, 27 Aug 2013 16:41:40 -0700 Subject: [PATCH 71/80] tcp: tcp_make_synack() should use sock_wmalloc In commit 90ba9b19 (tcp: tcp_make_synack() can use alloc_skb()), Eric changed the call to sock_wmalloc in tcp_make_synack to alloc_skb. In doing so, the netfilter owner match lost its ability to block the SYNACK packet on outbound listening sockets. Revert the change, restoring the owner match functionality. This closes netfilter bugzilla #847. Signed-off-by: Phil Oester Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2972993c671..170737a9d56d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2670,7 +2670,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, int tcp_header_size; int mss; - skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; From 2c8d85182348021fc0a1bed193a4be4161dc8364 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Wed, 28 Aug 2013 09:29:58 +0200 Subject: [PATCH 72/80] tipc: set sk_err correctly when connection fails Should a connect fail, if the publication/server is unavailable or due to some other error, a positive value will be returned and errno is never set. If the application code checks for an explicit zero return from connect (success) or a negative return (failure), it will not catch the error and subsequent send() calls will fail as shown from the strace snippet below. socket(0x1e /* PF_??? */, SOCK_SEQPACKET, 0) = 3 connect(3, {sa_family=0x1e /* AF_??? */, sa_data="\2\1\322\4\0\0\322\4\0\0\0\0\0\0"}, 16) = 111 sendto(3, "test", 4, 0, NULL, 0) = -1 EPIPE (Broken pipe) The reason for this behaviour is that TIPC wrongly inverts error codes set in sk_err. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ce8249c76827..6cc7ddd2fb7c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1257,7 +1257,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) /* Accept only ACK or NACK message */ if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; - sk->sk_err = -ECONNREFUSED; + sk->sk_err = ECONNREFUSED; retval = TIPC_OK; break; } @@ -1268,7 +1268,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) res = auto_connect(sock, msg); if (res) { sock->state = SS_DISCONNECTING; - sk->sk_err = res; + sk->sk_err = -res; retval = TIPC_OK; break; } From 737e828bdbdaf2f9d7de07f20a0308ac46ce5178 Mon Sep 17 00:00:00 2001 From: Li Hongjun Date: Wed, 28 Aug 2013 11:54:50 +0200 Subject: [PATCH 73/80] ipv4 tunnels: fix an oops when using ipip/sit with IPsec Since commit 3d7b46cd20e3 (ip_tunnel: push generic protocol handling to ip_tunnel module.), an Oops is triggered when an xfrm policy is configured on an IPv4 over IPv4 tunnel. xfrm4_policy_check() calls __xfrm_policy_check2(), which uses skb_dst(skb). But this field is NULL because iptunnel_pull_header() calls skb_dst_drop(skb). Signed-off-by: Li Hongjun Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 5 ++--- net/ipv6/sit.c | 6 ++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 51fc2a1dcdd3..b3ac3c3f6219 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -190,15 +190,14 @@ static int ipip_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph; - if (iptunnel_pull_header(skb, 0, tpi.proto)) - goto drop; - iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fbfc5a83867f..21b25dd8466b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -645,11 +645,7 @@ static int ipip_rcv(struct sk_buff *skb) const struct iphdr *iph; struct ip_tunnel *tunnel; - if (iptunnel_pull_header(skb, 0, tpi.proto)) - goto drop; - iph = ip_hdr(skb); - tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -659,6 +655,8 @@ static int ipip_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); } From 25ad6117e73656071b38fd19fa67ae325471c758 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 30 Aug 2013 17:39:33 -0400 Subject: [PATCH 74/80] Revert "ipv6: Don't depend on per socket memory for neighbour discovery messages" This reverts commit 1f324e38870cc09659cf23bc626f1b8869e201f2. It seems to cause regressions, and in particular the output path really depends upon there being a socket attached to skb->sk for checks such as sk_mc_loop(skb->sk) for example. See ip6_output_finish2(). Reported-by: Stephen Warren Reported-by: Fabio Estevam Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5cb98df966c2..04d31c2fbef1 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -370,12 +370,16 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; + int err; - skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); + skb = sock_alloc_send_skb(sk, + hlen + sizeof(struct ipv6hdr) + len + tlen, + 1, &err); if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", + __func__, err); return NULL; } From 702821f4ea6f68db18aa1de7d8ed62c6ba586a64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Aug 2013 18:10:43 -0700 Subject: [PATCH 75/80] net: revert 8728c544a9c ("net: dev_pick_tx() fix") commit 8728c544a9cbdc ("net: dev_pick_tx() fix") and commit b6fe83e9525a ("bonding: refine IFF_XMIT_DST_RELEASE capability") are quite incompatible : Queue selection is disabled because skb dst was dropped before entering bonding device. This causes major performance regression, mainly because TCP packets for a given flow can be sent to multiple queues. This is particularly visible when using the new FQ packet scheduler with MQ + FQ setup on the slaves. We can safely revert the first commit now that 416186fbf8c5b ("net: Split core bits of netdev_pick_tx into __netdev_pick_tx") properly caps the queue_index. Reported-by: Xi Wang Diagnosed-by: Xi Wang Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Alexander Duyck Cc: Denys Fedorysychenko Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b84a1b155bc1..d12e3a9a5356 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -346,14 +346,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) if (new_index < 0) new_index = skb_tx_hash(dev, skb); - if (queue_index != new_index && sk) { - struct dst_entry *dst = - rcu_dereference_check(sk->sk_dst_cache, 1); - - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - - } + if (queue_index != new_index && sk && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, queue_index); queue_index = new_index; } From 0d63c27d9e879a0b54eb405636d60ab12040ca46 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Aug 2013 11:47:00 +0300 Subject: [PATCH 76/80] mISDN: return -EINVAL on error in dsp_control_req() If skb->len is too short then we should return an error. Otherwise we read beyond the end of skb->data for several bytes. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/isdn/mISDN/dsp_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 22b720ec80cb..77025f5cb57d 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -288,8 +288,10 @@ dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb) u8 *data; int len; - if (skb->len < sizeof(int)) + if (skb->len < sizeof(int)) { printk(KERN_ERR "%s: PH_CONTROL message too short\n", __func__); + return -EINVAL; + } cont = *((int *)skb->data); len = skb->len - sizeof(int); data = skb->data + sizeof(int); From 2d98c29b6fb3de44d9eaa73c09f9cf7209346383 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Aug 2013 23:55:05 +0200 Subject: [PATCH 77/80] net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for max_delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking into MLDv1/v2 code, I noticed that bridging code does not convert it's max delay into jiffies for MLDv2 messages as we do in core IPv6' multicast code. RFC3810, 5.1.3. Maximum Response Code says: The Maximum Response Code field specifies the maximum time allowed before sending a responding Report. The actual time allowed, called the Maximum Response Delay, is represented in units of milliseconds, and is derived from the Maximum Response Code as follows: [...] As we update timers that work with jiffies, we need to convert it. Signed-off-by: Daniel Borkmann Cc: Linus Lüssing Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9d1d0e66c357..bbcb43582496 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1254,7 +1254,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; + + max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL); } br_multicast_query_received(br, port, &br->ip6_querier, From 0affdf347ffc0c3a4595661c091e8cc5f1346e92 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 30 Aug 2013 20:28:10 +0200 Subject: [PATCH 78/80] net: fec: fix time stamping logic after napi conversion Commit dc975382 "net: fec: add napi support to improve proformance" converted the fec driver to the napi model. However, that commit forgot to remove the call to skb_defer_rx_timestamp which is only needed in non-napi drivers. (The function napi_gro_receive eventually calls netif_receive_skb, which in turn calls skb_defer_rx_timestamp.) This patch should also be applied to the 3.9 and 3.10 kernels. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 77ea0db0bbfc..c610a2716be4 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -971,8 +971,7 @@ fec_enet_rx(struct net_device *ndev, int budget) htons(ETH_P_8021Q), vlan_tag); - if (!skb_defer_rx_timestamp(skb)) - napi_gro_receive(&fep->napi, skb); + napi_gro_receive(&fep->napi, skb); } bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data, From de80963e6108d08f337c55d9b85838b0ba2bde44 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 30 Aug 2013 09:50:42 +0100 Subject: [PATCH 79/80] MAINTAINERS: change my DT related maintainer address Filtering capabilities on my work email are pretty much non-existent and this has turned out to be something of a firehose... Cc: Stephen Warren Cc: Rob Herring Cc: Olof Johansson Cc: Linus Walleij Signed-off-by: Ian Campbell Acked-by: Pawel Moll Acked-by: Mark Rutland Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8197fbd70a3e..b140c8123098 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6066,7 +6066,7 @@ M: Rob Herring M: Pawel Moll M: Mark Rutland M: Stephen Warren -M: Ian Campbell +M: Ian Campbell L: devicetree@vger.kernel.org S: Maintained F: Documentation/devicetree/ From 6e4664525b1db28f8c4e1130957f70a94c19213e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 13:46:10 -0700 Subject: [PATCH 80/80] Linux 3.11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 369882e4fc77..fe8204be566d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Linux for Workgroups # *DOCUMENTATION*