From cd64647f043e3fd3569bcf068f47f030198ff93a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Sep 2013 16:43:58 +0800 Subject: [PATCH 01/21] hung_task: Change sysctl_hung_task_check_count to 'int' As 'sysctl_hung_task_check_count' is 'unsigned long' when this value is assigned to max_count in check_hung_uninterruptible_tasks(), it's truncated to 'int' type. This causes a minor artifact: if we write 2^32 to sysctl.hung_task_check_count, hung task detection will be effectively disabled. With this fix, it will still truncate the user input to 32 bits, but reading sysctl.hung_task_check_count reflects the actual truncated value. Signed-off-by: Li Zefan Acked-by: Ingo Molnar Link: http://lkml.kernel.org/r/523FFF4E.9050401@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 2 +- kernel/hung_task.c | 2 +- kernel/sysctl.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index bf8086b2506e..9552afa733d8 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -2,8 +2,8 @@ #define _SCHED_SYSCTL_H #ifdef CONFIG_DETECT_HUNG_TASK +extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3e97fb126e6b..042252383fd2 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -20,7 +20,7 @@ /* * The number of tasks checked: */ -unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; +int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Limit number of tasks checked in a batch. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b2f06f3c6a3f..b24ed7f87a14 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -962,9 +962,10 @@ static struct ctl_table kern_table[] = { { .procname = "hung_task_check_count", .data = &sysctl_hung_task_check_count, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "hung_task_timeout_secs", From 2a929242ee50db84c1a561c81897bb0551f2c32f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Sep 2013 16:34:42 +0200 Subject: [PATCH 02/21] lockdep, x86/alternatives: Drop ancient lockdep fixup message It messes up the output of the nodes/cores bootup table and it is obsolete anyway, see 17abecfe651c x86: fix up alternatives with lockdep enabled Signed-off-by: Borislav Petkov Cc: huawei.libin@huawei.com Cc: wangyijing@huawei.com Cc: fenghua.yu@intel.com Cc: guohanjun@huawei.com Cc: paul.gortmaker@windriver.com Link: http://lkml.kernel.org/r/20130927143442.GE4422@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 15e8563e5c24..df94598ad05a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -402,17 +402,6 @@ void alternatives_enable_smp(void) { struct smp_alt_module *mod; -#ifdef CONFIG_LOCKDEP - /* - * Older binutils section handling bug prevented - * alternatives-replacement from working reliably. - * - * If this still occurs then you should see a hang - * or crash shortly after this line: - */ - pr_info("lockdep: fixing up alternatives\n"); -#endif - /* Why bother if there are no other CPUs? */ BUG_ON(num_possible_cpus() == 1); From 1232e3807f597748d437ab8680873af21fa81da9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 8 Oct 2013 20:37:16 -0700 Subject: [PATCH 03/21] lockstat: Report avg wait and hold times While both the nr and total times are showed, having the avg lock hold and wait times show in the report is quite useful when working on performance related issues. Furthermore, I find myself constantly doing the calculations manually. In addition, some of the documentation examples were changed to easily update them to show the two new columns. No textual change otherwise, as descriptions match the lockstat output. Signed-off-by: Davidlohr Bueso Cc: aswin@hp.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1380746928.2313.14.camel@buesod1.americas.hpqcorp.net [ Fixlets: changed a seq_printf() to seq_puts(), converted spaces to tabs. ] Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 123 ++++++++++++++++++------------------- kernel/lockdep_proc.c | 15 +++-- 2 files changed, 70 insertions(+), 68 deletions(-) diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index dd2f7b26ca30..72d010689751 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -46,16 +46,14 @@ With these hooks we provide the following statistics: contentions - number of lock acquisitions that had to wait wait time min - shortest (non-0) time we ever had to wait for a lock max - longest time we ever had to wait for a lock - total - total time we spend waiting on this lock + total - total time we spend waiting on this lock + avg - average time spent waiting on this lock acq-bounces - number of lock acquisitions that involved x-cpu data acquisitions - number of times we took the lock hold time min - shortest (non-0) time we ever held the lock - max - longest time we ever held the lock - total - total time this lock was held - -From these number various other statistics can be derived, such as: - - hold time average = hold time total / acquisitions + max - longest time we ever held the lock + total - total time this lock was held + avg - average time this lock was held These numbers are gathered per lock class, per read/write state (when applicable). @@ -84,37 +82,38 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.3 -02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total -04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +01 lock_stat version 0.4 +02----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg +04----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 -07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 -08 --------------- -09 &mm->mmap_sem 487 [] do_page_fault+0x466/0x928 -10 &mm->mmap_sem 179 [] sys_mprotect+0xcd/0x21d -11 &mm->mmap_sem 279 [] sys_mmap+0x75/0xce -12 &mm->mmap_sem 76 [] sys_munmap+0x32/0x59 -13 --------------- -14 &mm->mmap_sem 270 [] sys_mmap+0x75/0xce -15 &mm->mmap_sem 431 [] do_page_fault+0x466/0x928 -16 &mm->mmap_sem 138 [] sys_munmap+0x32/0x59 -17 &mm->mmap_sem 145 [] sys_mprotect+0xcd/0x21d +06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99 +07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77 +08 --------------- +09 &mm->mmap_sem 1 [] khugepaged_scan_mm_slot+0x57/0x280 +19 &mm->mmap_sem 96 [] __do_page_fault+0x1d4/0x510 +11 &mm->mmap_sem 34 [] vm_mmap_pgoff+0x87/0xd0 +12 &mm->mmap_sem 17 [] vm_munmap+0x41/0x80 +13 --------------- +14 &mm->mmap_sem 1 [] dup_mmap+0x2a/0x3f0 +15 &mm->mmap_sem 60 [] SyS_mprotect+0xe9/0x250 +16 &mm->mmap_sem 41 [] __do_page_fault+0x1d4/0x510 +17 &mm->mmap_sem 68 [] vm_mmap_pgoff+0x87/0xd0 18 -19 ............................................................................................................................................................................................... +19............................................................................................................................................................................................................................. 20 -21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 -22 ----------- -23 dcache_lock 179 [] _atomic_dec_and_lock+0x34/0x54 -24 dcache_lock 113 [] d_alloc+0x19a/0x1eb -25 dcache_lock 99 [] d_rehash+0x1b/0x44 -26 dcache_lock 104 [] d_instantiate+0x36/0x8a -27 ----------- -28 dcache_lock 192 [] _atomic_dec_and_lock+0x34/0x54 -29 dcache_lock 98 [] d_rehash+0x1b/0x44 -30 dcache_lock 72 [] d_alloc+0x19a/0x1eb -31 dcache_lock 112 [] d_instantiate+0x36/0x8a +21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48 +22 --------------- +23 unix_table_lock 45 [] unix_create1+0x16e/0x1b0 +24 unix_table_lock 47 [] unix_release_sock+0x31/0x250 +25 unix_table_lock 15 [] unix_find_other+0x117/0x230 +26 unix_table_lock 5 [] unix_autobind+0x11f/0x1b0 +27 --------------- +28 unix_table_lock 39 [] unix_release_sock+0x31/0x250 +29 unix_table_lock 49 [] unix_create1+0x16e/0x1b0 +30 unix_table_lock 20 [] unix_find_other+0x117/0x230 +31 unix_table_lock 4 [] unix_autobind+0x11f/0x1b0 + This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 @@ -131,30 +130,30 @@ The integer part of the time values is in us. Dealing with nested locks, subclasses may appear: -32............................................................................................................................................................................................... +32........................................................................................................................................................................................................................... 33 -34 &rq->lock: 13128 13128 0.43 190.53 103881.26 97454 3453404 0.00 401.11 13224683.11 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82 35 --------- -36 &rq->lock 645 [] task_rq_lock+0x43/0x75 -37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a -38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a -39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb +36 &rq->lock 645 [] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb 40 --------- -41 &rq->lock 77 [] task_rq_lock+0x43/0x75 -42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a -43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 -44 &rq->lock 893 [] schedule+0x157/0x7b8 +41 &rq->lock 77 [] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [] schedule+0x157/0x7b8 45 -46............................................................................................................................................................................................... +46........................................................................................................................................................................................................................... 47 -48 &rq->lock/1: 11526 11488 0.33 388.73 136294.31 21461 38404 0.00 37.93 109388.53 +48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84 49 ----------- -50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 +50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 51 ----------- -52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 -53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 -54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 -55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a +52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a Line 48 shows statistics for the second subclass (/1) of &rq->lock class (subclass starts from 0), since in this case, as line 50 suggests, @@ -163,16 +162,16 @@ double_rq_lock actually acquires a nested lock of two spinlocks. View the top contending locks: # grep : /proc/lock_stat | head - &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 - &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 - dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 - &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 - &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 - &inode->i_data.i_mmap_mutex: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 - &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 - &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 - &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63 - tasklist_lock-W: 15 15 1.45 10.87 32.70 1201 7390 0.58 62.55 13648.47 + clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71 + tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59 + &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59 + &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69 + &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93 + tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72 + tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89 + rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89 + &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27 + rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76 Clear the statistics: diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index b2c71c5873e4..09220656d888 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -421,6 +421,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) seq_time(m, lt->min); seq_time(m, lt->max); seq_time(m, lt->total); + seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); } static void seq_stats(struct seq_file *m, struct lock_stat_data *data) @@ -518,20 +519,20 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) } if (i) { seq_puts(m, "\n"); - seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); + seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1)); seq_puts(m, "\n"); } } static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.3\n"); + seq_puts(m, "lock_stat version 0.4\n"); if (unlikely(!debug_locks)) seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n"); - seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); - seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); + seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", "class name", "con-bounces", @@ -539,12 +540,14 @@ static void seq_header(struct seq_file *m) "waittime-min", "waittime-max", "waittime-total", + "waittime-avg", "acq-bounces", "acquisitions", "holdtime-min", "holdtime-max", - "holdtime-total"); - seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); + "holdtime-total", + "holdtime-avg"); + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); seq_printf(m, "\n"); } From 4c4e4f61368164f326fe59e2156c70d7faa72c17 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Mon, 21 Oct 2013 21:35:08 +0530 Subject: [PATCH 04/21] x86/locking/kconfig: Update paravirt spinlock Kconfig description Since the paravirt spinlock optimizations went into the v3.12 kernel we have a very good performance benefit for paravirtualized KVM / Xen kernels. Also we no longer suffer from 5% side effect on native kernel that is mentioned in the Kconfig entry. So update the Kconfig entry accordingly. pvspinlock benefit on KVM link: https://lkml.org/lkml/2013/8/6/178 Attilio's tests on native kernel impact: http://blog.xen.org/index.php/2012/05/11/benchmarking-the-new-pv-ticketlock-implementation/ Signed-off-by: Raghavendra K T Cc: Cc: Cc: Cc: Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1382371508-3843-1-git-send-email-raghavendra.kt@linux.vnet.ibm.com [ Updated the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d37745..a92572de2ac1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -638,10 +638,10 @@ config PARAVIRT_SPINLOCKS spinlock implementation with something virtualization-friendly (for example, block the virtual CPU rather than spinning). - Unfortunately the downside is an up to 5% performance hit on - native kernels, with various workloads. + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM / Xen kernels. - If you are unsure how to answer this question, answer N. + If you are unsure how to answer this question, answer Y. source "arch/x86/xen/Kconfig" From 6a716c90a51338009c3bc1f460829afaed8f922d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Oct 2013 18:18:28 +0200 Subject: [PATCH 05/21] hung_task debugging: Add tracepoint to report the hang Currently check_hung_task() prints a warning if it detects the problem, but it is not convenient to watch the system logs if user-space wants to be notified about the hang. Add the new trace_sched_process_hang() into check_hung_task(), this way a user-space monitor can easily wait for the hang and potentially resolve a problem. Signed-off-by: Oleg Nesterov Cc: Dave Sullivan Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20131019161828.GA7439@redhat.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 19 +++++++++++++++++++ kernel/hung_task.c | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 2e7d9947a10d..2a652d124fbb 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio, __entry->oldprio, __entry->newprio) ); +#ifdef CONFIG_DETECT_HUNG_TASK +TRACE_EVENT(sched_process_hang, + TP_PROTO(struct task_struct *tsk), + TP_ARGS(tsk), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + ), + + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) +); +#endif /* CONFIG_DETECT_HUNG_TASK */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 042252383fd2..8807061ca004 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * The number of tasks checked: @@ -92,6 +93,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) t->last_switch_count = switch_count; return; } + + trace_sched_process_hang(t); + if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; From 01768b42dc97a67b4fb33a2535c49fc1969880df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:11:53 +0100 Subject: [PATCH 06/21] locking: Move the mutex code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-1ditvncg30dgbpvrz2bxfmke@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 6 ++---- kernel/locking/Makefile | 9 +++++++++ kernel/{ => locking}/mutex-debug.c | 0 kernel/{ => locking}/mutex-debug.h | 0 kernel/{ => locking}/mutex.c | 0 kernel/{ => locking}/mutex.h | 0 6 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 kernel/locking/Makefile rename kernel/{ => locking}/mutex-debug.c (100%) rename kernel/{ => locking}/mutex-debug.h (100%) rename kernel/{ => locking}/mutex.c (100%) rename kernel/{ => locking}/mutex.h (100%) diff --git a/kernel/Makefile b/kernel/Makefile index a4d1aa8da9bc..330b14666475 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = fork.o exec_domain.o panic.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ extable.o params.o posix-timers.o \ - kthread.o sys_ni.o posix-cpu-timers.o mutex.o \ + kthread.o sys_ni.o posix-cpu-timers.o \ hrtimer.o rwsem.o nsproxy.o semaphore.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o @@ -16,13 +16,12 @@ ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files CFLAGS_REMOVE_lockdep.o = -pg CFLAGS_REMOVE_lockdep_proc.o = -pg -CFLAGS_REMOVE_mutex-debug.o = -pg -CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_irq_work.o = -pg endif obj-y += sched/ +obj-y += locking/ obj-y += power/ obj-y += printk/ obj-y += cpu/ @@ -34,7 +33,6 @@ obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ -obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile new file mode 100644 index 000000000000..fe8bd58b22f8 --- /dev/null +++ b/kernel/locking/Makefile @@ -0,0 +1,9 @@ + +obj-y += mutex.o + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_mutex-debug.o = -pg +CFLAGS_REMOVE_rtmutex-debug.o = -pg +endif + +obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/kernel/mutex-debug.c b/kernel/locking/mutex-debug.c similarity index 100% rename from kernel/mutex-debug.c rename to kernel/locking/mutex-debug.c diff --git a/kernel/mutex-debug.h b/kernel/locking/mutex-debug.h similarity index 100% rename from kernel/mutex-debug.h rename to kernel/locking/mutex-debug.h diff --git a/kernel/mutex.c b/kernel/locking/mutex.c similarity index 100% rename from kernel/mutex.c rename to kernel/locking/mutex.c diff --git a/kernel/mutex.h b/kernel/locking/mutex.h similarity index 100% rename from kernel/mutex.h rename to kernel/locking/mutex.h From 8eddac3f103736163f49255bcb109edadea167f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:14:17 +0100 Subject: [PATCH 07/21] locking: Move the lockdep code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wl7s3tta5isufzfguc23et06@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 6 ------ kernel/locking/Makefile | 6 ++++++ kernel/{ => locking}/lockdep.c | 0 kernel/{ => locking}/lockdep_internals.h | 0 kernel/{ => locking}/lockdep_proc.c | 0 kernel/{ => locking}/lockdep_states.h | 0 6 files changed, 6 insertions(+), 6 deletions(-) rename kernel/{ => locking}/lockdep.c (100%) rename kernel/{ => locking}/lockdep_internals.h (100%) rename kernel/{ => locking}/lockdep_proc.c (100%) rename kernel/{ => locking}/lockdep_states.h (100%) diff --git a/kernel/Makefile b/kernel/Makefile index 330b14666475..4fffd6ee42c1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -14,8 +14,6 @@ obj-y = fork.o exec_domain.o panic.o \ ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files -CFLAGS_REMOVE_lockdep.o = -pg -CFLAGS_REMOVE_lockdep_proc.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_irq_work.o = -pg endif @@ -33,10 +31,6 @@ obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ -obj-$(CONFIG_LOCKDEP) += lockdep.o -ifeq ($(CONFIG_PROC_FS),y) -obj-$(CONFIG_LOCKDEP) += lockdep_proc.o -endif obj-$(CONFIG_FUTEX) += futex.o ifeq ($(CONFIG_COMPAT),y) obj-$(CONFIG_FUTEX) += futex_compat.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index fe8bd58b22f8..c103599fc1ba 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -2,8 +2,14 @@ obj-y += mutex.o ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_lockdep.o = -pg +CFLAGS_REMOVE_lockdep_proc.o = -pg CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg endif obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o +obj-$(CONFIG_LOCKDEP) += lockdep.o +ifeq ($(CONFIG_PROC_FS),y) +obj-$(CONFIG_LOCKDEP) += lockdep_proc.o +endif diff --git a/kernel/lockdep.c b/kernel/locking/lockdep.c similarity index 100% rename from kernel/lockdep.c rename to kernel/locking/lockdep.c diff --git a/kernel/lockdep_internals.h b/kernel/locking/lockdep_internals.h similarity index 100% rename from kernel/lockdep_internals.h rename to kernel/locking/lockdep_internals.h diff --git a/kernel/lockdep_proc.c b/kernel/locking/lockdep_proc.c similarity index 100% rename from kernel/lockdep_proc.c rename to kernel/locking/lockdep_proc.c diff --git a/kernel/lockdep_states.h b/kernel/locking/lockdep_states.h similarity index 100% rename from kernel/lockdep_states.h rename to kernel/locking/lockdep_states.h From 60fc28746a7b61775ae28950ddf7a4ac15955639 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:15:36 +0100 Subject: [PATCH 08/21] locking: Move the spinlock code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-b81ol0z3mon45m51o131yc9j@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 3 --- kernel/locking/Makefile | 4 ++++ kernel/{ => locking}/spinlock.c | 0 {lib => kernel/locking}/spinlock_debug.c | 0 lib/Makefile | 1 - 5 files changed, 4 insertions(+), 4 deletions(-) rename kernel/{ => locking}/spinlock.c (100%) rename {lib => kernel/locking}/spinlock_debug.c (100%) diff --git a/kernel/Makefile b/kernel/Makefile index 4fffd6ee42c1..4bce165dce5d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -43,9 +43,6 @@ obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) obj-y += up.o endif -obj-$(CONFIG_SMP) += spinlock.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index c103599fc1ba..674d2152d10f 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -13,3 +13,7 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif +obj-$(CONFIG_SMP) += spinlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o diff --git a/kernel/spinlock.c b/kernel/locking/spinlock.c similarity index 100% rename from kernel/spinlock.c rename to kernel/locking/spinlock.c diff --git a/lib/spinlock_debug.c b/kernel/locking/spinlock_debug.c similarity index 100% rename from lib/spinlock_debug.c rename to kernel/locking/spinlock_debug.c diff --git a/lib/Makefile b/lib/Makefile index f3bb2cb98adf..bee27e1d3431 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o From e25a64c4017e3a3cda17454b040737e410a12991 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:16:43 +0100 Subject: [PATCH 09/21] locking: Move the semaphore core to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-vmw5sf6vzmua1z6nx1cg69h2@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 2 +- kernel/locking/Makefile | 2 +- kernel/{ => locking}/semaphore.c | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename kernel/{ => locking}/semaphore.c (100%) diff --git a/kernel/Makefile b/kernel/Makefile index 4bce165dce5d..45e5ae26dc03 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = fork.o exec_domain.o panic.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ extable.o params.o posix-timers.o \ kthread.o sys_ni.o posix-cpu-timers.o \ - hrtimer.o rwsem.o nsproxy.o semaphore.o \ + hrtimer.o rwsem.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 674d2152d10f..5978fddf1412 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o +obj-y += mutex.o semaphore.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg diff --git a/kernel/semaphore.c b/kernel/locking/semaphore.c similarity index 100% rename from kernel/semaphore.c rename to kernel/locking/semaphore.c From 1696a8bee390929fed05c6297164816ae2ced280 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:18:19 +0100 Subject: [PATCH 10/21] locking: Move the rtmutex code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-p9ijt8div0hwldexwfm4nlhj@git.kernel.org [ Fixed build failure in kernel/rcu/tree_plugin.h. ] Signed-off-by: Ingo Molnar --- kernel/Makefile | 3 --- kernel/futex.c | 2 +- kernel/locking/Makefile | 3 +++ kernel/{ => locking}/rtmutex-debug.c | 0 kernel/{ => locking}/rtmutex-debug.h | 0 kernel/{ => locking}/rtmutex-tester.c | 0 kernel/{ => locking}/rtmutex.c | 0 kernel/{ => locking}/rtmutex.h | 0 kernel/{ => locking}/rtmutex_common.h | 0 kernel/rcu/tree_plugin.h | 2 +- 10 files changed, 5 insertions(+), 5 deletions(-) rename kernel/{ => locking}/rtmutex-debug.c (100%) rename kernel/{ => locking}/rtmutex-debug.h (100%) rename kernel/{ => locking}/rtmutex-tester.c (100%) rename kernel/{ => locking}/rtmutex.c (100%) rename kernel/{ => locking}/rtmutex.h (100%) rename kernel/{ => locking}/rtmutex_common.h (100%) diff --git a/kernel/Makefile b/kernel/Makefile index 45e5ae26dc03..9c2ad1852223 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -35,9 +35,6 @@ obj-$(CONFIG_FUTEX) += futex.o ifeq ($(CONFIG_COMPAT),y) obj-$(CONFIG_FUTEX) += futex_compat.o endif -obj-$(CONFIG_RT_MUTEXES) += rtmutex.o -obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o -obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) diff --git a/kernel/futex.c b/kernel/futex.c index c3a1a55a5214..80ba086f021d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -66,7 +66,7 @@ #include -#include "rtmutex_common.h" +#include "locking/rtmutex_common.h" int __read_mostly futex_cmpxchg_enabled; diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 5978fddf1412..59f66dec2bf9 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -15,5 +15,8 @@ obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_RT_MUTEXES) += rtmutex.o +obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o +obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o diff --git a/kernel/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c similarity index 100% rename from kernel/rtmutex-debug.c rename to kernel/locking/rtmutex-debug.c diff --git a/kernel/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h similarity index 100% rename from kernel/rtmutex-debug.h rename to kernel/locking/rtmutex-debug.h diff --git a/kernel/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c similarity index 100% rename from kernel/rtmutex-tester.c rename to kernel/locking/rtmutex-tester.c diff --git a/kernel/rtmutex.c b/kernel/locking/rtmutex.c similarity index 100% rename from kernel/rtmutex.c rename to kernel/locking/rtmutex.c diff --git a/kernel/rtmutex.h b/kernel/locking/rtmutex.h similarity index 100% rename from kernel/rtmutex.h rename to kernel/locking/rtmutex.h diff --git a/kernel/rtmutex_common.h b/kernel/locking/rtmutex_common.h similarity index 100% rename from kernel/rtmutex_common.h rename to kernel/locking/rtmutex_common.h diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3822ac0c4b27..6abb03dff5c0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1133,7 +1133,7 @@ void exit_rcu(void) #ifdef CONFIG_RCU_BOOST -#include "../rtmutex_common.h" +#include "../locking/rtmutex_common.h" #ifdef CONFIG_RCU_TRACE From ed428bfc3caaa4b1e6cd15ea12c90c30291903f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:19:28 +0100 Subject: [PATCH 11/21] locking: Move the rwsem code to kernel/locking/ Notably: changed lib/rwsem* targets from lib- to obj-, no idea about the ramifications of that. Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-g0kynfh5feriwc6p3h6kpbw6@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 2 +- kernel/locking/Makefile | 4 +++- {lib => kernel/locking}/rwsem-spinlock.c | 0 lib/rwsem.c => kernel/locking/rwsem-xadd.c | 0 kernel/{ => locking}/rwsem.c | 0 lib/Makefile | 2 -- 6 files changed, 4 insertions(+), 4 deletions(-) rename {lib => kernel/locking}/rwsem-spinlock.c (100%) rename lib/rwsem.c => kernel/locking/rwsem-xadd.c (100%) rename kernel/{ => locking}/rwsem.c (100%) diff --git a/kernel/Makefile b/kernel/Makefile index 9c2ad1852223..1aef002aa56b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = fork.o exec_domain.o panic.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ extable.o params.o posix-timers.o \ kthread.o sys_ni.o posix-cpu-timers.o \ - hrtimer.o rwsem.o nsproxy.o \ + hrtimer.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 59f66dec2bf9..b0e0d73516e3 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o +obj-y += mutex.o semaphore.o rwsem.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg @@ -20,3 +20,5 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o +obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o +obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o \ No newline at end of file diff --git a/lib/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c similarity index 100% rename from lib/rwsem-spinlock.c rename to kernel/locking/rwsem-spinlock.c diff --git a/lib/rwsem.c b/kernel/locking/rwsem-xadd.c similarity index 100% rename from lib/rwsem.c rename to kernel/locking/rwsem-xadd.c diff --git a/kernel/rwsem.c b/kernel/locking/rwsem.c similarity index 100% rename from kernel/rwsem.c rename to kernel/locking/rwsem.c diff --git a/lib/Makefile b/lib/Makefile index bee27e1d3431..ca8cadc8fe4d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,8 +42,6 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o -lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) From cd4d241d57c99c6b00ef1799ad797d90f75a1da9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2013 11:51:33 +0100 Subject: [PATCH 12/21] locking: Move the lglocks code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-amd6pg1mif6tikbyktfvby3y@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 2 +- kernel/locking/Makefile | 2 +- kernel/{ => locking}/lglock.c | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename kernel/{ => locking}/lglock.c (100%) diff --git a/kernel/Makefile b/kernel/Makefile index 1aef002aa56b..09a9c94f42bd 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o \ kthread.o sys_ni.o posix-cpu-timers.o \ hrtimer.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o groups.o lglock.o smpboot.o + async.o range.o groups.o smpboot.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index b0e0d73516e3..bdd313a3411d 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o rwsem.o +obj-y += mutex.o semaphore.o rwsem.o lglock.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg diff --git a/kernel/lglock.c b/kernel/locking/lglock.c similarity index 100% rename from kernel/lglock.c rename to kernel/locking/lglock.c From 32cf7c3c94623514eb882addae307212c1507239 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2013 18:05:09 +0100 Subject: [PATCH 13/21] locking: Move the percpu-rwsem code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-52bjmtty46we26hbfd9sc9iy@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/Makefile | 3 ++- {lib => kernel/locking}/percpu-rwsem.c | 0 lib/Makefile | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) rename {lib => kernel/locking}/percpu-rwsem.c (100%) diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index bdd313a3411d..baab8e5e7f66 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -21,4 +21,5 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o -obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o \ No newline at end of file +obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o +obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o diff --git a/lib/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c similarity index 100% rename from lib/percpu-rwsem.c rename to kernel/locking/percpu-rwsem.c diff --git a/lib/Makefile b/lib/Makefile index ca8cadc8fe4d..8d3c62936c8b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o From 827da44c61419f29ae3be198c342e2147f1a10cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:58 -0700 Subject: [PATCH 14/21] net: Explicitly initialize u64_stats_sync structures for lockdep In order to enable lockdep on seqcount/seqlock structures, we must explicitly initialize any locks. The u64_stats_sync structure, uses a seqcount, and thus we need to introduce a u64_stats_init() function and use it to initialize the structure. This unfortunately adds a lot of fairly trivial initialization code to a number of drivers. But the benefit of ensuring correctness makes this worth while. Because these changes are required for lockdep to be enabled, and the changes are quite trivial, I've not yet split this patch out into 30-some separate patches, as I figured it would be better to get the various maintainers thoughts on how to best merge this change along with the seqcount lockdep enablement. Feedback would be appreciated! Signed-off-by: John Stultz Acked-by: Julian Anastasov Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Jesse Gross Cc: Mathieu Desnoyers Cc: "Michael S. Tsirkin" Cc: Mirko Lindner Cc: Patrick McHardy Cc: Roger Luethi Cc: Rusty Russell Cc: Simon Horman Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Thomas Petazzoni Cc: Wensong Zhang Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/net/dummy.c | 6 +++++ drivers/net/ethernet/emulex/benet/be_main.c | 4 +++ drivers/net/ethernet/intel/igb/igb_main.c | 5 ++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +++ drivers/net/ethernet/marvell/mvneta.c | 3 +++ drivers/net/ethernet/marvell/sky2.c | 3 +++ .../net/ethernet/neterion/vxge/vxge-main.c | 4 +++ drivers/net/ethernet/nvidia/forcedeth.c | 2 ++ drivers/net/ethernet/realtek/8139too.c | 3 +++ drivers/net/ethernet/tile/tilepro.c | 2 ++ drivers/net/ethernet/via/via-rhine.c | 3 +++ drivers/net/ifb.c | 5 ++++ drivers/net/loopback.c | 6 +++++ drivers/net/macvlan.c | 7 ++++++ drivers/net/nlmon.c | 8 ++++++ drivers/net/team/team.c | 6 +++++ drivers/net/team/team_mode_loadbalance.c | 9 ++++++- drivers/net/veth.c | 8 ++++++ drivers/net/virtio_net.c | 8 ++++++ drivers/net/vxlan.c | 8 ++++++ drivers/net/xen-netfront.c | 6 +++++ include/linux/u64_stats_sync.h | 7 ++++++ net/8021q/vlan_dev.c | 9 ++++++- net/bridge/br_device.c | 7 ++++++ net/ipv4/af_inet.c | 14 +++++++++++ net/ipv4/ip_tunnel.c | 8 +++++- net/ipv6/addrconf.c | 14 +++++++++++ net/ipv6/af_inet6.c | 14 +++++++++++ net/ipv6/ip6_gre.c | 15 +++++++++++ net/ipv6/ip6_tunnel.c | 7 ++++++ net/ipv6/sit.c | 15 +++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 25 ++++++++++++++++--- net/openvswitch/datapath.c | 6 +++++ net/openvswitch/vport.c | 8 ++++++ 34 files changed, 253 insertions(+), 6 deletions(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index b710c6b2d659..bd8f84b0b894 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -88,10 +88,16 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) static int dummy_dev_init(struct net_device *dev) { + int i; dev->dstats = alloc_percpu(struct pcpu_dstats); if (!dev->dstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_dstats *dstats; + dstats = per_cpu_ptr(dev->dstats, i); + u64_stats_init(&dstats->syncp); + } return 0; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 2c38cc402119..edd75950855a 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2047,6 +2047,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) if (status) return status; + u64_stats_init(&txo->stats.sync); + u64_stats_init(&txo->stats.sync_compl); + /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ @@ -2108,6 +2111,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter) if (rc) return rc; + u64_stats_init(&rxo->stats.sync); eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; rc = be_cmd_cq_create(adapter, cq, eq, false, 3); if (rc) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8cf44f2a8ccd..b6edb93a8fc1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1223,6 +1223,9 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; + u64_stats_init(&ring->tx_syncp); + u64_stats_init(&ring->tx_syncp2); + /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; @@ -1256,6 +1259,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->rx_ring_count; ring->queue_index = rxr_idx; + u64_stats_init(&ring->rx_syncp); + /* assign ring to adapter */ adapter->rx_ring[rxr_idx] = ring; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0ade0cd5ef53..c1750364ddc0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4867,6 +4867,8 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) if (!tx_ring->tx_buffer_info) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); @@ -4949,6 +4951,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) if (!rx_ring->rx_buffer_info) goto err; + u64_stats_init(&rx_ring->syncp); + /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e35bac7cfdf1..cb4635c0cd73 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2792,6 +2792,9 @@ static int mvneta_probe(struct platform_device *pdev) pp = netdev_priv(dev); + u64_stats_init(&pp->tx_stats.syncp); + u64_stats_init(&pp->rx_stats.syncp); + pp->weight = MVNETA_RX_POLL_WEIGHT; pp->phy_node = phy_node; pp->phy_interface = phy_mode; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index e09a8c6f8536..339d841a538b 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4763,6 +4763,9 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, sky2->hw = hw; sky2->msg_enable = netif_msg_init(debug, default_msg); + u64_stats_init(&sky2->tx_stats.syncp); + u64_stats_init(&sky2->rx_stats.syncp); + /* Auto speed and flow control */ sky2->flags = SKY2_FLAG_AUTO_SPEED | SKY2_FLAG_AUTO_PAUSE; if (hw->chip_id != CHIP_ID_YUKON_XL) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5a20eaf903dd..44626ec1127b 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2072,6 +2072,10 @@ static int vxge_open_vpaths(struct vxgedev *vdev) vdev->config.tx_steering_type; vpath->fifo.ndev = vdev->ndev; vpath->fifo.pdev = vdev->pdev; + + u64_stats_init(&vpath->fifo.stats.syncp); + u64_stats_init(&vpath->ring.stats.syncp); + if (vdev->config.tx_steering_type) vpath->fifo.txq = netdev_get_tx_queue(vdev->ndev, i); diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 098b96dad66f..2d045be4b5cf 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5619,6 +5619,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) spin_lock_init(&np->lock); spin_lock_init(&np->hwstats_lock); SET_NETDEV_DEV(dev, &pci_dev->dev); + u64_stats_init(&np->swstats_rx_syncp); + u64_stats_init(&np->swstats_tx_syncp); init_timer(&np->oom_kick); np->oom_kick.data = (unsigned long) dev; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 3ccedeb8aba0..c40e984868ad 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -791,6 +791,9 @@ static struct net_device *rtl8139_init_board(struct pci_dev *pdev) pci_set_master (pdev); + u64_stats_init(&tp->rx_stats.syncp); + u64_stats_init(&tp->tx_stats.syncp); + retry: /* PIO bar register comes first. */ bar = !use_io; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 106be47716e7..edb2e12a0fe2 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -1008,6 +1008,8 @@ static void tile_net_register(void *dev_ptr) info->egress_timer.data = (long)info; info->egress_timer.function = tile_net_handle_egress_timer; + u64_stats_init(&info->stats.syncp); + priv->cpu[my_cpu] = info; /* diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index bdf697b184ae..dd99715e6645 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -987,6 +987,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rp->base = ioaddr; + u64_stats_init(&rp->tx_stats.syncp); + u64_stats_init(&rp->rx_stats.syncp); + /* Get chip registers into a sane state */ rhine_power_init(dev); rhine_hw_init(dev, pioaddr); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index a3bed28197d2..c14d39bf32d0 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -265,6 +265,7 @@ MODULE_PARM_DESC(numifbs, "Number of ifb devices"); static int __init ifb_init_one(int index) { struct net_device *dev_ifb; + struct ifb_private *dp; int err; dev_ifb = alloc_netdev(sizeof(struct ifb_private), @@ -273,6 +274,10 @@ static int __init ifb_init_one(int index) if (!dev_ifb) return -ENOMEM; + dp = netdev_priv(dev_ifb); + u64_stats_init(&dp->rsync); + u64_stats_init(&dp->tsync); + dev_ifb->rtnl_link_ops = &ifb_link_ops; err = register_netdevice(dev_ifb); if (err < 0) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a17d85a331f1..ac24c27b4b2d 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -137,10 +137,16 @@ static const struct ethtool_ops loopback_ethtool_ops = { static int loopback_dev_init(struct net_device *dev) { + int i; dev->lstats = alloc_percpu(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_lstats *lb_stats; + lb_stats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&lb_stats->syncp); + } return 0; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9bf46bd19b87..0924e51b9ee0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -501,6 +501,7 @@ static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); const struct net_device *lowerdev = vlan->lowerdev; + int i; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); @@ -516,6 +517,12 @@ static int macvlan_init(struct net_device *dev) if (!vlan->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct macvlan_pcpu_stats *mvlstats; + mvlstats = per_cpu_ptr(vlan->pcpu_stats, i); + u64_stats_init(&mvlstats->syncp); + } + return 0; } diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index b57ce5f48962..d2bb12bfabd5 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -47,8 +47,16 @@ static int nlmon_change_mtu(struct net_device *dev, int new_mtu) static int nlmon_dev_init(struct net_device *dev) { + int i; + dev->lstats = alloc_percpu(struct pcpu_lstats); + for_each_possible_cpu(i) { + struct pcpu_lstats *nlmstats; + nlmstats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&nlmstats->syncp); + } + return dev->lstats == NULL ? -ENOMEM : 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 50e43e64d51d..6574eb8766f9 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1540,6 +1540,12 @@ static int team_init(struct net_device *dev) if (!team->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct team_pcpu_stats *team_stats; + team_stats = per_cpu_ptr(team->pcpu_stats, i); + u64_stats_init(&team_stats->syncp); + } + for (i = 0; i < TEAM_PORT_HASHENTRIES; i++) INIT_HLIST_HEAD(&team->en_port_hlist[i]); INIT_LIST_HEAD(&team->port_list); diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 829a9cd2b4da..d671fc3ac5ac 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -570,7 +570,7 @@ static int lb_init(struct team *team) { struct lb_priv *lb_priv = get_lb_priv(team); lb_select_tx_port_func_t *func; - int err; + int i, err; /* set default tx port selector */ func = lb_select_tx_port_get_func("hash"); @@ -588,6 +588,13 @@ static int lb_init(struct team *team) goto err_alloc_pcpu_stats; } + for_each_possible_cpu(i) { + struct lb_pcpu_stats *team_lb_stats; + team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); + u64_stats_init(&team_lb_stats->syncp); + } + + INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index eee1f19ef1e9..46e83e3fe999 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -230,10 +230,18 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu) static int veth_dev_init(struct net_device *dev) { + int i; + dev->vstats = alloc_percpu(struct pcpu_vstats); if (!dev->vstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_vstats *veth_stats; + veth_stats = per_cpu_ptr(dev->vstats, i); + u64_stats_init(&veth_stats->syncp); + } + return 0; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd1e1a0..ee384f3d612b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1569,6 +1569,14 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->stats == NULL) goto free; + for_each_possible_cpu(i) { + struct virtnet_stats *virtnet_stats; + virtnet_stats = per_cpu_ptr(vi->stats, i); + u64_stats_init(&virtnet_stats->tx_syncp); + u64_stats_init(&virtnet_stats->rx_syncp); + } + + vi->vq_index = alloc_percpu(int); if (vi->vq_index == NULL) goto free_stats; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2ef5b6219f3f..01ab64d5f9a4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1884,11 +1884,19 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_sock *vs; + int i; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *vxlan_stats; + vxlan_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&vxlan_stats->syncp); + } + + spin_lock(&vn->sock_lock); vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); if (vs) { diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 36808bf25677..54223ac6d8a6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1338,6 +1338,12 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) if (np->stats == NULL) goto exit; + for_each_possible_cpu(i) { + struct netfront_stats *xen_nf_stats; + xen_nf_stats = per_cpu_ptr(np->stats, i); + u64_stats_init(&xen_nf_stats->syncp); + } + /* Initialise tx_skbs as a free chain containing every entry. */ np->tx_skb_freelist = 0; for (i = 0; i < NET_TX_RING_SIZE; i++) { diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 8da8c4e87da3..7bfabd20204c 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -67,6 +67,13 @@ struct u64_stats_sync { #endif }; + +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +# define u64_stats_init(syncp) seqcount_init(syncp.seq) +#else +# define u64_stats_init(syncp) do { } while (0) +#endif + static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09bf1c38805b..4deff3ed1da1 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -558,7 +558,7 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; + int subclass = 0, i; netif_carrier_off(dev); @@ -612,6 +612,13 @@ static int vlan_dev_init(struct net_device *dev) if (!vlan_dev_priv(dev)->vlan_pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct vlan_pcpu_stats *vlan_stat; + vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + u64_stats_init(&vlan_stat->syncp); + } + + return 0; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163635da..7893d641775b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -88,11 +88,18 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + int i; br->stats = alloc_percpu(struct br_cpu_netstats); if (!br->stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct br_cpu_netstats *br_dev_stats; + br_dev_stats = per_cpu_ptr(br->stats, i); + u64_stats_init(&br_dev_stats->syncp); + } + return 0; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cfeb85cff4f0..5f4617e377b8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1518,6 +1518,7 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) return -ENOMEM; + #if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); if (!ptr[1]) { @@ -1561,6 +1562,8 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, sizeof(struct tcp_mib), __alignof__(struct tcp_mib)) < 0) @@ -1569,6 +1572,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet_stats; + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); + u64_stats_init(&af_inet_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); + u64_stats_init(&af_inet_stats->syncp); +#endif + } + if (snmp_mib_init((void __percpu **)net->mib.net_statistics, sizeof(struct linux_mib), __alignof__(struct linux_mib)) < 0) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 63a6d6d6b875..caf01176a5e4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -976,13 +976,19 @@ int ip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - int err; + int i, err; dev->destructor = ip_tunnel_dev_free; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipt_stats; + ipt_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipt_stats->syncp); + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { free_percpu(dev->tstats); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb301da38..d62d589bb622 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -281,10 +281,24 @@ static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { + int i; + if (snmp_mib_init((void __percpu **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip; + + for_each_possible_cpu(i) { + struct ipstats_mib *addrconf_stats; + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + u64_stats_init(&addrconf_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); + u64_stats_init(&addrconf_stats->syncp); +#endif + } + + idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), GFP_KERNEL); if (!idev->stats.icmpv6dev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100b021e..a8f8559b3dce 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -719,6 +719,8 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, sizeof(struct udp_mib), __alignof__(struct udp_mib)) < 0) @@ -731,6 +733,18 @@ static int __net_init ipv6_init_mibs(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet6_stats; + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + u64_stats_init(&af_inet6_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); + u64_stats_init(&af_inet6_stats->syncp); +#endif + } + + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bf4a9a084de5..8acb28621f9c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1252,6 +1252,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1269,6 +1270,13 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tunnel_stats; + ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tunnel_stats->syncp); + } + + return 0; } @@ -1449,6 +1457,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1462,6 +1471,12 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tap_stats; + ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tap_stats->syncp); + } + return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 583b77e2f69b..df1fa58528c6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1494,12 +1494,19 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int i; t->dev = dev; t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6_tnl_stats; + ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6_tnl_stats->syncp); + } return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 19269453a8ea..365dc5473eb2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1310,6 +1310,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1322,6 +1323,12 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_tunnel_stats; + ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_tunnel_stats->syncp); + } + return 0; } @@ -1331,6 +1338,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1344,6 +1352,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_fb_stats; + ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_fb_stats->syncp); + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f7..3825725907f6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; - unsigned int atype; + unsigned int atype, i; EnterFunction(2); @@ -869,6 +869,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, if (!dest->stats.cpustats) goto err_alloc; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_dest_stats; + ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); + u64_stats_init(&ip_vs_dest_stats->syncp); + } + dest->af = svc->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; @@ -1134,7 +1140,7 @@ static int ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { - int ret = 0; + int ret = 0, i; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; @@ -1184,6 +1190,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_stats; + ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); + u64_stats_init(&ip_vs_stats->syncp); + } + + /* I'm the first user of the service */ atomic_set(&svc->refcnt, 0); @@ -3780,7 +3793,7 @@ static struct notifier_block ip_vs_dst_notifier = { int __net_init ip_vs_control_net_init(struct net *net) { - int idx; + int i, idx; struct netns_ipvs *ipvs = net_ipvs(net); /* Initialize rs_table */ @@ -3799,6 +3812,12 @@ int __net_init ip_vs_control_net_init(struct net *net) if (!ipvs->tot_stats.cpustats) return -ENOMEM; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ipvs_tot_stats; + ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); + u64_stats_init(&ipvs_tot_stats->syncp); + } + spin_lock_init(&ipvs->tot_stats.lock); proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2aa13bd7f2b2..b92553c02279 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1698,6 +1698,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_table; } + for_each_possible_cpu(i) { + struct dp_stats_percpu *dpath_stats; + dpath_stats = per_cpu_ptr(dp->stats_percpu, i); + u64_stats_init(&dpath_stats->sync); + } + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6f65dbe13812..d830a95f03a4 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -118,6 +118,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; + int i; alloc_size = sizeof(struct vport); if (priv_size) { @@ -141,6 +142,13 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, return ERR_PTR(-ENOMEM); } + for_each_possible_cpu(i) { + struct pcpu_tstats *vport_stats; + vport_stats = per_cpu_ptr(vport->percpu_stats, i); + u64_stats_init(&vport_stats->syncp); + } + + spin_lock_init(&vport->stats_lock); return vport; From 1ca7d67cf5d5a2aef26a8d9afd789006fa098347 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:59 -0700 Subject: [PATCH 15/21] seqcount: Add lockdep functionality to seqcount/seqlock structures Currently seqlocks and seqcounts don't support lockdep. After running across a seqcount related deadlock in the timekeeping code, I used a less-refined and more focused variant of this patch to narrow down the cause of the issue. This is a first-pass attempt to properly enable lockdep functionality on seqlocks and seqcounts. Since seqcounts are used in the vdso gettimeofday code, I've provided non-lockdep accessors for those needs. I've also handled one case where there were nested seqlock writers and there may be more edge cases. Comments and feedback would be appreciated! Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Cc: Eric Dumazet Cc: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/vdso/vclock_gettime.c | 8 ++-- fs/dcache.c | 4 +- fs/fs_struct.c | 2 +- include/linux/init_task.h | 8 ++-- include/linux/lockdep.h | 8 +++- include/linux/seqlock.h | 79 +++++++++++++++++++++++++++++++--- mm/filemap_xip.c | 2 +- 7 files changed, 90 insertions(+), 21 deletions(-) diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 72074d528400..2ada505067cc 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -178,7 +178,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->wall_time_snsec; @@ -198,7 +198,7 @@ notrace static int do_monotonic(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; ns = gtod->monotonic_time_snsec; @@ -214,7 +214,7 @@ notrace static int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -225,7 +225,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); diff --git a/fs/dcache.c b/fs/dcache.c index ae6ebb88ceff..f750be22c08c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2574,7 +2574,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&target->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ @@ -2706,7 +2706,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&anon->d_seq); + write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); dparent = dentry->d_parent; diff --git a/fs/fs_struct.c b/fs/fs_struct.c index d8ac61d0c932..7dca743b2ce1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -161,6 +161,6 @@ EXPORT_SYMBOL(current_umask); struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), - .seq = SEQCNT_ZERO, + .seq = SEQCNT_ZERO(init_fs.seq), .umask = 0022, }; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5cd0f0949927..b0ed422e4e4a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,10 +32,10 @@ extern struct fs_struct init_fs; #endif #ifdef CONFIG_CPUSETS -#define INIT_CPUSET_SEQ \ - .mems_allowed_seq = SEQCNT_ZERO, +#define INIT_CPUSET_SEQ(tsk) \ + .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), #else -#define INIT_CPUSET_SEQ +#define INIT_CPUSET_SEQ(tsk) #endif #define INIT_SIGNALS(sig) { \ @@ -220,7 +220,7 @@ extern struct task_group root_task_group; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ - INIT_CPUSET_SEQ \ + INIT_CPUSET_SEQ(tsk) \ INIT_VTIME(tsk) \ } diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index cfc2f119779a..92b1bfc5da60 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -497,6 +497,10 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) #define rwlock_release(l, n, i) lock_release(l, n, i) +#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define seqcount_release(l, n, i) lock_release(l, n, i) + #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define mutex_release(l, n, i) lock_release(l, n, i) @@ -504,11 +508,11 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) -# define rwsem_release(l, n, i) lock_release(l, n, i) +#define rwsem_release(l, n, i) lock_release(l, n, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) -# define lock_map_release(l) lock_release(l, 1, _THIS_IP_) +#define lock_map_release(l) lock_release(l, 1, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 21a209336e79..1e8a8b6e837d 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -34,6 +34,7 @@ #include #include +#include #include /* @@ -44,10 +45,50 @@ */ typedef struct seqcount { unsigned sequence; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } seqcount_t; -#define SEQCNT_ZERO { 0 } -#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0) +static inline void __seqcount_init(seqcount_t *s, const char *name, + struct lock_class_key *key) +{ + /* + * Make sure we are not reinitializing a held lock: + */ + lockdep_init_map(&s->dep_map, name, key, 0); + s->sequence = 0; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SEQCOUNT_DEP_MAP_INIT(lockname) \ + .dep_map = { .name = #lockname } \ + +# define seqcount_init(s) \ + do { \ + static struct lock_class_key __key; \ + __seqcount_init((s), #s, &__key); \ + } while (0) + +static inline void seqcount_lockdep_reader_access(const seqcount_t *s) +{ + seqcount_t *l = (seqcount_t *)s; + unsigned long flags; + + local_irq_save(flags); + seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); + seqcount_release(&l->dep_map, 1, _RET_IP_); + local_irq_restore(flags); +} + +#else +# define SEQCOUNT_DEP_MAP_INIT(lockname) +# define seqcount_init(s) __seqcount_init(s, NULL, NULL) +# define seqcount_lockdep_reader_access(x) +#endif + +#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} + /** * __read_seqcount_begin - begin a seq-read critical section (without barrier) @@ -75,6 +116,22 @@ repeat: return ret; } +/** + * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * read_seqcount_begin_no_lockdep opens a read critical section of the given + * seqcount, but without any lockdep checking. Validity of the critical + * section is tested by checking read_seqcount_retry function. + */ +static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s) +{ + unsigned ret = __read_seqcount_begin(s); + smp_rmb(); + return ret; +} + /** * read_seqcount_begin - begin a seq-read critical section * @s: pointer to seqcount_t @@ -86,9 +143,8 @@ repeat: */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { - unsigned ret = __read_seqcount_begin(s); - smp_rmb(); - return ret; + seqcount_lockdep_reader_access(s); + return read_seqcount_begin_no_lockdep(s); } /** @@ -108,6 +164,8 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) static inline unsigned raw_seqcount_begin(const seqcount_t *s) { unsigned ret = ACCESS_ONCE(s->sequence); + + seqcount_lockdep_reader_access(s); smp_rmb(); return ret & ~1; } @@ -152,14 +210,21 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) * Sequence counter only version assumes that callers are using their * own mutexing. */ -static inline void write_seqcount_begin(seqcount_t *s) +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { s->sequence++; smp_wmb(); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); } static inline void write_seqcount_end(seqcount_t *s) { + seqcount_release(&s->dep_map, 1, _RET_IP_); smp_wmb(); s->sequence++; } @@ -188,7 +253,7 @@ typedef struct { */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ - .seqcount = SEQCNT_ZERO, \ + .seqcount = SEQCNT_ZERO(lockname), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 28fe26b64f8a..d8d9fe3f685c 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -26,7 +26,7 @@ * of ZERO_PAGE(), such as /dev/zero */ static DEFINE_MUTEX(xip_sparse_mutex); -static seqcount_t xip_sparse_seq = SEQCNT_ZERO; +static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq); static struct page *__xip_sparse_page; /* called under xip_sparse_mutex */ From db751fe3ea6880ff5ac5abe60cb7b80deb5a4140 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:52:00 -0700 Subject: [PATCH 16/21] cpuset: Fix potential deadlock w/ set_mems_allowed After adding lockdep support to seqlock/seqcount structures, I started seeing the following warning: [ 1.070907] ====================================================== [ 1.072015] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] [ 1.073181] 3.11.0+ #67 Not tainted [ 1.073801] ------------------------------------------------------ [ 1.074882] kworker/u4:2/708 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1.076088] (&p->mems_allowed_seq){+.+...}, at: [] new_slab+0x5f/0x280 [ 1.077572] [ 1.077572] and this task is already holding: [ 1.078593] (&(&q->__queue_lock)->rlock){..-...}, at: [] blk_execute_rq_nowait+0x53/0xf0 [ 1.080042] which would create a new lock dependency: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} -> (&p->mems_allowed_seq){+.+...} [ 1.080042] [ 1.080042] but this new dependency connects a SOFTIRQ-irq-safe lock: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} [ 1.080042] ... which became SOFTIRQ-irq-safe at: [ 1.080042] [] __lock_acquire+0x5b9/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] _raw_spin_lock+0x41/0x80 [ 1.080042] [] scsi_device_unbusy+0x7e/0xd0 [ 1.080042] [] scsi_finish_command+0x32/0xf0 [ 1.080042] [] scsi_softirq_done+0xa1/0x130 [ 1.080042] [] blk_done_softirq+0x73/0x90 [ 1.080042] [] __do_softirq+0x110/0x2f0 [ 1.080042] [] run_ksoftirqd+0x2d/0x60 [ 1.080042] [] smpboot_thread_fn+0x156/0x1e0 [ 1.080042] [] kthread+0xd6/0xe0 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] to a SOFTIRQ-irq-unsafe lock: [ 1.080042] (&p->mems_allowed_seq){+.+...} [ 1.080042] ... which became SOFTIRQ-irq-unsafe at: [ 1.080042] ... [] __lock_acquire+0x613/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] kthreadd+0x82/0x180 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] other info that might help us debug this: [ 1.080042] [ 1.080042] Possible interrupt unsafe locking scenario: [ 1.080042] [ 1.080042] CPU0 CPU1 [ 1.080042] ---- ---- [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] local_irq_disable(); [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] [ 1.080042] *** DEADLOCK *** The issue stems from the kthreadd() function calling set_mems_allowed with irqs enabled. While its possibly unlikely for the actual deadlock to trigger, a fix is fairly simple: disable irqs before taking the mems_allowed_seq lock. Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Acked-by: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index cc1b01cf2035..3fe661fe96d1 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -110,10 +110,14 @@ static inline bool put_mems_allowed(unsigned int seq) static inline void set_mems_allowed(nodemask_t nodemask) { + unsigned long flags; + task_lock(current); + local_irq_save(flags); write_seqcount_begin(¤t->mems_allowed_seq); current->mems_allowed = nodemask; write_seqcount_end(¤t->mems_allowed_seq); + local_irq_restore(flags); task_unlock(current); } From 5ac68e7c34a4797aa4ca9615e5a6603bda1abe9b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:52:01 -0700 Subject: [PATCH 17/21] ipv6: Fix possible ipv6 seqlock deadlock While enabling lockdep on seqlocks, I ran across the warning below caused by the ipv6 stats being updated in both irq and non-irq context. This patch changes from IP6_INC_STATS_BH to IP6_INC_STATS (suggested by Eric Dumazet) to resolve this problem. [ 11.120383] ================================= [ 11.121024] [ INFO: inconsistent lock state ] [ 11.121663] 3.12.0-rc1+ #68 Not tainted [ 11.122229] --------------------------------- [ 11.122867] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 11.123741] init/4483 [HC0[0]:SC1[3]:HE1:SE0] takes: [ 11.124505] (&stats->syncp.seq#6){+.?...}, at: [] ndisc_send_ns+0xe2/0x130 [ 11.125736] {SOFTIRQ-ON-W} state was registered at: [ 11.126447] [] __lock_acquire+0x5c7/0x1af0 [ 11.127222] [] lock_acquire+0x96/0xd0 [ 11.127925] [] write_seqcount_begin+0x33/0x40 [ 11.128766] [] ip6_dst_lookup_tail+0x3a3/0x460 [ 11.129582] [] ip6_dst_lookup_flow+0x2e/0x80 [ 11.130014] [] ip6_datagram_connect+0x150/0x4e0 [ 11.130014] [] inet_dgram_connect+0x25/0x70 [ 11.130014] [] SYSC_connect+0xa1/0xc0 [ 11.130014] [] SyS_connect+0x11/0x20 [ 11.130014] [] SyS_socketcall+0x12b/0x300 [ 11.130014] [] syscall_call+0x7/0xb [ 11.130014] irq event stamp: 1184 [ 11.130014] hardirqs last enabled at (1184): [] local_bh_enable+0x71/0x110 [ 11.130014] hardirqs last disabled at (1183): [] local_bh_enable+0x3d/0x110 [ 11.130014] softirqs last enabled at (0): [] copy_process.part.42+0x45d/0x11a0 [ 11.130014] softirqs last disabled at (1147): [] irq_exit+0xa5/0xb0 [ 11.130014] [ 11.130014] other info that might help us debug this: [ 11.130014] Possible unsafe locking scenario: [ 11.130014] [ 11.130014] CPU0 [ 11.130014] ---- [ 11.130014] lock(&stats->syncp.seq#6); [ 11.130014] [ 11.130014] lock(&stats->syncp.seq#6); [ 11.130014] [ 11.130014] *** DEADLOCK *** [ 11.130014] [ 11.130014] 3 locks held by init/4483: [ 11.130014] #0: (rcu_read_lock){.+.+..}, at: [] SyS_setpriority+0x4c/0x620 [ 11.130014] #1: (((&ifa->dad_timer))){+.-...}, at: [] call_timer_fn+0x0/0xf0 [ 11.130014] #2: (rcu_read_lock){.+.+..}, at: [] ndisc_send_skb+0x54/0x5d0 [ 11.130014] [ 11.130014] stack backtrace: [ 11.130014] CPU: 0 PID: 4483 Comm: init Not tainted 3.12.0-rc1+ #68 [ 11.130014] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 11.130014] 00000000 00000000 c55e5c10 c1bb0e71 c57128b0 c55e5c4c c1badf79 c1ec1123 [ 11.130014] c1ec1484 00001183 00000000 00000000 00000001 00000003 00000001 00000000 [ 11.130014] c1ec1484 00000004 c5712dcc 00000000 c55e5c84 c10de492 00000004 c10755f2 [ 11.130014] Call Trace: [ 11.130014] [] dump_stack+0x4b/0x66 [ 11.130014] [] print_usage_bug+0x1d3/0x1dd [ 11.130014] [] mark_lock+0x282/0x2f0 [ 11.130014] [] ? kvm_clock_read+0x22/0x30 [ 11.130014] [] ? check_usage_backwards+0x150/0x150 [ 11.130014] [] __lock_acquire+0x584/0x1af0 [ 11.130014] [] ? sched_clock_cpu+0xef/0x190 [ 11.130014] [] ? mark_held_locks+0x8c/0xf0 [ 11.130014] [] lock_acquire+0x96/0xd0 [ 11.130014] [] ? ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ndisc_send_skb+0x293/0x5d0 [ 11.130014] [] ? ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ? mod_timer+0xf2/0x160 [ 11.130014] [] ? addrconf_dad_timer+0xce/0x150 [ 11.130014] [] addrconf_dad_timer+0x10a/0x150 [ 11.130014] [] ? addrconf_dad_completed+0x1c0/0x1c0 [ 11.130014] [] call_timer_fn+0x73/0xf0 [ 11.130014] [] ? __internal_add_timer+0xb0/0xb0 [ 11.130014] [] ? addrconf_dad_completed+0x1c0/0x1c0 [ 11.130014] [] run_timer_softirq+0x141/0x1e0 [ 11.130014] [] ? __do_softirq+0x70/0x1b0 [ 11.130014] [] __do_softirq+0xc0/0x1b0 [ 11.130014] [] irq_exit+0xa5/0xb0 [ 11.130014] [] smp_apic_timer_interrupt+0x35/0x50 [ 11.130014] [] apic_timer_interrupt+0x32/0x38 [ 11.130014] [] ? SyS_setpriority+0xfd/0x620 [ 11.130014] [] ? lock_release+0x9/0x240 [ 11.130014] [] ? SyS_setpriority+0xe7/0x620 [ 11.130014] [] ? _raw_read_unlock+0x1d/0x30 [ 11.130014] [] SyS_setpriority+0x111/0x620 [ 11.130014] [] ? SyS_setpriority+0x4c/0x620 [ 11.130014] [] syscall_call+0x7/0xb Signed-off-by: John Stultz Acked-by: Eric Dumazet Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Mathieu Desnoyers Cc: Patrick McHardy Cc: Steven Rostedt Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-5-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 91fb4e8212f5..ad138b187fb2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -909,7 +909,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; From 67a6de49bf545c34eb8dee99abbb92d9ea268200 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Nov 2013 08:26:39 +0100 Subject: [PATCH 18/21] locking/doc: Update references to kernel/mutex.c Fix this docbook error: >> docproc: kernel/mutex.c: No such file or directory by updating the stale references to kernel/mutex.c. Reported-by: fengguang.wu@intel.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-34pikw1tlsskj65rrt5iusrq@git.kernel.org Signed-off-by: Ingo Molnar --- Documentation/DocBook/kernel-locking.tmpl | 2 +- Documentation/mutex-design.txt | 10 +++++----- include/linux/mutex.h | 2 +- kernel/locking/mutex.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 09e884e5b9f5..19f2a5a5a5b4 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1958,7 +1958,7 @@ machines due to caching. Mutex API reference !Iinclude/linux/mutex.h -!Ekernel/mutex.c +!Ekernel/locking/mutex.c diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index 38c10fd7f411..1dfe62c3641d 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -116,11 +116,11 @@ using mutexes at the moment, please let me know if you find any. ] Implementation of mutexes ------------------------- -'struct mutex' is the new mutex type, defined in include/linux/mutex.h -and implemented in kernel/mutex.c. It is a counter-based mutex with a -spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", -0 for "locked" and negative numbers (usually -1) for "locked, potential -waiters queued". +'struct mutex' is the new mutex type, defined in include/linux/mutex.h and +implemented in kernel/locking/mutex.c. It is a counter-based mutex with a +spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for +"locked" and negative numbers (usually -1) for "locked, potential waiters +queued". the APIs of 'struct mutex' have been streamlined: diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bab49da8a0f0..d3181936c138 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -131,7 +131,7 @@ static inline int mutex_is_locked(struct mutex *lock) } /* - * See kernel/mutex.c for detailed documentation of these APIs. + * See kernel/locking/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d24105b1b794..4dd6e4c219de 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1,5 +1,5 @@ /* - * kernel/mutex.c + * kernel/locking/mutex.c * * Mutexes: blocking mutual exclusion locks * From 838cc7b488f89ee642fd8336e8e1b620c8c3ece2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Nov 2013 17:42:30 +0100 Subject: [PATCH 19/21] lockdep/proc: Fix lock-time avg computation > kernel/locking/lockdep_proc.c: In function 'seq_lock_time': > >> kernel/locking/lockdep_proc.c:424:23: warning: comparison of distinct pointer types lacks a cast [enabled by default] > > 418 static void seq_lock_time(struct seq_file *m, struct lock_time *lt) > 419 { > 420 seq_printf(m, "%14lu", lt->nr); > 421 seq_time(m, lt->min); > 422 seq_time(m, lt->max); > 423 seq_time(m, lt->total); > > 424 seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); > 425 } My compiler refuses to actually say that; but it looks wrong in that do_div() returns the remainder, not the divisor. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Peter Zijlstra Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/20131106164230.GE16117@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/lockdep_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 09220656d888..ef43ac4bafb5 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -421,7 +421,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) seq_time(m, lt->min); seq_time(m, lt->max); seq_time(m, lt->total); - seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); + seq_time(m, lt->nr ? div_s64(lt->total, lt->nr) : 0); } static void seq_stats(struct seq_file *m, struct lock_stat_data *data) From 5216d530bbd8581c927b250a11533c2a31b57510 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Sat, 9 Nov 2013 00:55:35 +0800 Subject: [PATCH 20/21] locking/lockdep: Mark __lockdep_count_forward_deps() as static There are new Sparse warnings: >> kernel/locking/lockdep.c:1235:15: sparse: symbol '__lockdep_count_forward_deps' was not declared. Should it be static? >> kernel/locking/lockdep.c:1261:15: sparse: symbol '__lockdep_count_backward_deps' was not declared. Should it be static? Please consider folding the attached diff :-) Signed-off-by: Fengguang Wu Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/527d1787.ThzXGoUspZWehFDl\%fengguang.wu@intel.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4e8e14c34e42..576ba756a32d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1232,7 +1232,7 @@ static int noop_count(struct lock_list *entry, void *data) return 0; } -unsigned long __lockdep_count_forward_deps(struct lock_list *this) +static unsigned long __lockdep_count_forward_deps(struct lock_list *this) { unsigned long count = 0; struct lock_list *uninitialized_var(target_entry); @@ -1258,7 +1258,7 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) return ret; } -unsigned long __lockdep_count_backward_deps(struct lock_list *this) +static unsigned long __lockdep_count_backward_deps(struct lock_list *this) { unsigned long count = 0; struct lock_list *uninitialized_var(target_entry); From 90d3839b90fe379557dae4a44735a6af78f42885 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Nov 2013 19:42:14 -0800 Subject: [PATCH 21/21] block: Use u64_stats_init() to initialize seqcounts Now that seqcounts are lockdep enabled objects, we need to explicitly initialize runtime allocated seqcounts so that lockdep can track them. Without this patch, Fengguang was seeing: [ 4.127282] INFO: trying to register non-static key. [ 4.128027] the code is fine but needs lockdep annotation. [ 4.128027] turning off the locking correctness validator. [ 4.128027] CPU: 0 PID: 96 Comm: kworker/u4:1 Not tainted 3.12.0-next-20131108-10601-gbad570d #2 [ 4.128027] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ ... ] [ 4.128027] Call Trace: [ 4.128027] [<7908e744>] ? console_unlock+0x353/0x380 [ 4.128027] [<79dc7cf2>] dump_stack+0x48/0x60 [ 4.128027] [<7908953e>] __lock_acquire.isra.26+0x7e3/0xceb [ 4.128027] [<7908a1c5>] lock_acquire+0x71/0x9a [ 4.128027] [<794079aa>] ? blk_throtl_bio+0x1c3/0x485 [ 4.128027] [<7940658b>] throtl_update_dispatch_stats+0x7c/0x153 [ 4.128027] [<794079aa>] ? blk_throtl_bio+0x1c3/0x485 [ 4.128027] [<794079aa>] blk_throtl_bio+0x1c3/0x485 ... Use u64_stats_init() for all affected data structures, which initializes the seqcount. Reported-and-Tested-by: Fengguang Wu Cc: Vivek Goyal Cc: Jens Axboe Signed-off-by: Peter Zijlstra [ Folded in another fix from the mailing list as well as a fix to that fix. Tweaked commit message. ] Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1384314134-6895-1-git-send-email-john.stultz@linaro.org [ So I actually think that the two SOBs from PeterZ are the right depiction of the patch route. ] Signed-off-by: Ingo Molnar --- block/blk-cgroup.h | 10 ++++++++++ block/blk-throttle.c | 10 ++++++++++ block/cfq-iosched.c | 25 +++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index ae6969a7ffd4..1610b22edf09 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -402,6 +402,11 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl, #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) +static inline void blkg_stat_init(struct blkg_stat *stat) +{ + u64_stats_init(&stat->syncp); +} + /** * blkg_stat_add - add a value to a blkg_stat * @stat: target blkg_stat @@ -458,6 +463,11 @@ static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) blkg_stat_add(to, blkg_stat_read(from)); } +static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) +{ + u64_stats_init(&rwstat->syncp); +} + /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 8331aba9426f..06534049afba 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -256,6 +256,12 @@ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq) } \ } while (0) +static void tg_stats_init(struct tg_stats_cpu *tg_stats) +{ + blkg_rwstat_init(&tg_stats->service_bytes); + blkg_rwstat_init(&tg_stats->serviced); +} + /* * Worker for allocating per cpu stat for tgs. This is scheduled on the * system_wq once there are some groups on the alloc_list waiting for @@ -269,12 +275,16 @@ static void tg_stats_alloc_fn(struct work_struct *work) alloc_stats: if (!stats_cpu) { + int cpu; + stats_cpu = alloc_percpu(struct tg_stats_cpu); if (!stats_cpu) { /* allocation failed, try again after some time */ schedule_delayed_work(dwork, msecs_to_jiffies(10)); return; } + for_each_possible_cpu(cpu) + tg_stats_init(per_cpu_ptr(stats_cpu, cpu)); } spin_lock_irq(&tg_stats_alloc_lock); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 434944cbd761..4d5cec1ad80d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1508,6 +1508,29 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg) } #ifdef CONFIG_CFQ_GROUP_IOSCHED +static void cfqg_stats_init(struct cfqg_stats *stats) +{ + blkg_rwstat_init(&stats->service_bytes); + blkg_rwstat_init(&stats->serviced); + blkg_rwstat_init(&stats->merged); + blkg_rwstat_init(&stats->service_time); + blkg_rwstat_init(&stats->wait_time); + blkg_rwstat_init(&stats->queued); + + blkg_stat_init(&stats->sectors); + blkg_stat_init(&stats->time); + +#ifdef CONFIG_DEBUG_BLK_CGROUP + blkg_stat_init(&stats->unaccounted_time); + blkg_stat_init(&stats->avg_queue_size_sum); + blkg_stat_init(&stats->avg_queue_size_samples); + blkg_stat_init(&stats->dequeue); + blkg_stat_init(&stats->group_wait_time); + blkg_stat_init(&stats->idle_time); + blkg_stat_init(&stats->empty_time); +#endif +} + static void cfq_pd_init(struct blkcg_gq *blkg) { struct cfq_group *cfqg = blkg_to_cfqg(blkg); @@ -1515,6 +1538,8 @@ static void cfq_pd_init(struct blkcg_gq *blkg) cfq_init_cfqg_base(cfqg); cfqg->weight = blkg->blkcg->cfq_weight; cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; + cfqg_stats_init(&cfqg->stats); + cfqg_stats_init(&cfqg->dead_stats); } static void cfq_pd_offline(struct blkcg_gq *blkg)