diff --git a/include/linux/mm.h b/include/linux/mm.h index f31c75dc190e..f5487a86b154 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -628,12 +628,6 @@ struct vm_operations_struct { }; #ifdef CONFIG_PER_VMA_LOCK -static inline void vma_init_lock(struct vm_area_struct *vma) -{ - init_rwsem(&vma->lock); - vma->vm_lock_seq = -1; -} - /* * Try to read-lock a vma. The function is allowed to occasionally yield false * locked result to avoid performance overhead, in which case we fall back to @@ -645,17 +639,17 @@ static inline bool vma_start_read(struct vm_area_struct *vma) if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) return false; - if (unlikely(down_read_trylock(&vma->lock) == 0)) + if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) return false; /* * Overflow might produce false locked result. * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq + * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq * modification invalidates all existing locks. */ if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) { - up_read(&vma->lock); + up_read(&vma->vm_lock->lock); return false; } return true; @@ -664,7 +658,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) static inline void vma_end_read(struct vm_area_struct *vma) { rcu_read_lock(); /* keeps vma alive till the end of up_read */ - up_read(&vma->lock); + up_read(&vma->vm_lock->lock); rcu_read_unlock(); } @@ -687,9 +681,9 @@ static inline void vma_start_write(struct vm_area_struct *vma) if (__is_vma_write_locked(vma, &mm_lock_seq)) return; - down_write(&vma->lock); + down_write(&vma->vm_lock->lock); vma->vm_lock_seq = mm_lock_seq; - up_write(&vma->lock); + up_write(&vma->vm_lock->lock); } static inline bool vma_try_start_write(struct vm_area_struct *vma) @@ -740,6 +734,10 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, #endif /* CONFIG_PER_VMA_LOCK */ +/* + * WARNING: vma_init does not initialize vma->vm_lock. + * Use vm_area_alloc()/vm_area_free() if vma needs locking. + */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { static const struct vm_operations_struct dummy_vm_ops = {}; @@ -749,7 +747,6 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_ops = &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); vma_mark_detached(vma, false); - vma_init_lock(vma); } /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3a6cf4c2edd0..1b3031e95215 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -471,6 +471,10 @@ struct anon_vma_name { char name[]; }; +struct vma_lock { + struct rw_semaphore lock; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -505,7 +509,7 @@ struct vm_area_struct { #ifdef CONFIG_PER_VMA_LOCK int vm_lock_seq; - struct rw_semaphore lock; + struct vma_lock *vm_lock; /* Flag to indicate areas detached from the mm->mm_mt tree */ bool detached; diff --git a/kernel/fork.c b/kernel/fork.c index f86f7e917954..c75088367bb4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -451,13 +451,49 @@ static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +#ifdef CONFIG_PER_VMA_LOCK + +/* SLAB cache for vm_area_struct.lock */ +static struct kmem_cache *vma_lock_cachep; + +static bool vma_lock_alloc(struct vm_area_struct *vma) +{ + vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL); + if (!vma->vm_lock) + return false; + + init_rwsem(&vma->vm_lock->lock); + vma->vm_lock_seq = -1; + + return true; +} + +static inline void vma_lock_free(struct vm_area_struct *vma) +{ + kmem_cache_free(vma_lock_cachep, vma->vm_lock); +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; } +static inline void vma_lock_free(struct vm_area_struct *vma) {} + +#endif /* CONFIG_PER_VMA_LOCK */ + struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { struct vm_area_struct *vma; vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (vma) - vma_init(vma, mm); + if (!vma) + return NULL; + + vma_init(vma, mm); + if (!vma_lock_alloc(vma)) { + kmem_cache_free(vm_area_cachep, vma); + return NULL; + } + return vma; } @@ -465,24 +501,30 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) { struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (new) { - ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); - ASSERT_EXCLUSIVE_WRITER(orig->vm_file); - /* - * orig->shared.rb may be modified concurrently, but the clone - * will be reinitialized. - */ - data_race(memcpy(new, orig, sizeof(*new))); - INIT_LIST_HEAD(&new->anon_vma_chain); - vma_init_lock(new); - dup_anon_vma_name(orig, new); + if (!new) + return NULL; + + ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); + ASSERT_EXCLUSIVE_WRITER(orig->vm_file); + /* + * orig->shared.rb may be modified concurrently, but the clone + * will be reinitialized. + */ + data_race(memcpy(new, orig, sizeof(*new))); + if (!vma_lock_alloc(new)) { + kmem_cache_free(vm_area_cachep, new); + return NULL; } + INIT_LIST_HEAD(&new->anon_vma_chain); + dup_anon_vma_name(orig, new); + return new; } void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); + vma_lock_free(vma); kmem_cache_free(vm_area_cachep, vma); } @@ -493,7 +535,7 @@ static void vm_area_free_rcu_cb(struct rcu_head *head) vm_rcu); /* The vma should not be locked while being destroyed. */ - VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock), vma); + VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma); __vm_area_free(vma); } #endif @@ -3152,6 +3194,9 @@ void __init proc_caches_init(void) NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); +#ifdef CONFIG_PER_VMA_LOCK + vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT); +#endif mmap_init(); nsproxy_cache_init(); }