mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-26 05:34:13 +08:00
slub: Invert locking and avoid slab lock
Locking slabs is no longer necesary if the arch supports cmpxchg operations and if no debuggin features are used on a slab. If the arch does not support cmpxchg then we fallback to use the slab lock to do a cmpxchg like operation. The patch also changes the lock order. Slab locks are subsumed to the node lock now. With that approach slab_trylocking is no longer necessary. Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Pekka Enberg <penberg@kernel.org>
This commit is contained in:
parent
2cfb7455d2
commit
881db7fb03
129
mm/slub.c
129
mm/slub.c
@ -2,10 +2,11 @@
|
|||||||
* SLUB: A slab allocator that limits cache line use instead of queuing
|
* SLUB: A slab allocator that limits cache line use instead of queuing
|
||||||
* objects in per cpu and per node lists.
|
* objects in per cpu and per node lists.
|
||||||
*
|
*
|
||||||
* The allocator synchronizes using per slab locks and only
|
* The allocator synchronizes using per slab locks or atomic operatios
|
||||||
* uses a centralized lock to manage a pool of partial slabs.
|
* and only uses a centralized lock to manage a pool of partial slabs.
|
||||||
*
|
*
|
||||||
* (C) 2007 SGI, Christoph Lameter
|
* (C) 2007 SGI, Christoph Lameter
|
||||||
|
* (C) 2011 Linux Foundation, Christoph Lameter
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
@ -32,15 +33,27 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock order:
|
* Lock order:
|
||||||
* 1. slab_lock(page)
|
* 1. slub_lock (Global Semaphore)
|
||||||
* 2. slab->list_lock
|
* 2. node->list_lock
|
||||||
|
* 3. slab_lock(page) (Only on some arches and for debugging)
|
||||||
*
|
*
|
||||||
* The slab_lock protects operations on the object of a particular
|
* slub_lock
|
||||||
* slab and its metadata in the page struct. If the slab lock
|
*
|
||||||
* has been taken then no allocations nor frees can be performed
|
* The role of the slub_lock is to protect the list of all the slabs
|
||||||
* on the objects in the slab nor can the slab be added or removed
|
* and to synchronize major metadata changes to slab cache structures.
|
||||||
* from the partial or full lists since this would mean modifying
|
*
|
||||||
* the page_struct of the slab.
|
* The slab_lock is only used for debugging and on arches that do not
|
||||||
|
* have the ability to do a cmpxchg_double. It only protects the second
|
||||||
|
* double word in the page struct. Meaning
|
||||||
|
* A. page->freelist -> List of object free in a page
|
||||||
|
* B. page->counters -> Counters of objects
|
||||||
|
* C. page->frozen -> frozen state
|
||||||
|
*
|
||||||
|
* If a slab is frozen then it is exempt from list management. It is not
|
||||||
|
* on any list. The processor that froze the slab is the one who can
|
||||||
|
* perform list operations on the page. Other processors may put objects
|
||||||
|
* onto the freelist but the processor that froze the slab is the only
|
||||||
|
* one that can retrieve the objects from the page's freelist.
|
||||||
*
|
*
|
||||||
* The list_lock protects the partial and full list on each node and
|
* The list_lock protects the partial and full list on each node and
|
||||||
* the partial slab counter. If taken then no new slabs may be added or
|
* the partial slab counter. If taken then no new slabs may be added or
|
||||||
@ -53,20 +66,6 @@
|
|||||||
* slabs, operations can continue without any centralized lock. F.e.
|
* slabs, operations can continue without any centralized lock. F.e.
|
||||||
* allocating a long series of objects that fill up slabs does not require
|
* allocating a long series of objects that fill up slabs does not require
|
||||||
* the list lock.
|
* the list lock.
|
||||||
*
|
|
||||||
* The lock order is sometimes inverted when we are trying to get a slab
|
|
||||||
* off a list. We take the list_lock and then look for a page on the list
|
|
||||||
* to use. While we do that objects in the slabs may be freed. We can
|
|
||||||
* only operate on the slab if we have also taken the slab_lock. So we use
|
|
||||||
* a slab_trylock() on the slab. If trylock was successful then no frees
|
|
||||||
* can occur anymore and we can use the slab for allocations etc. If the
|
|
||||||
* slab_trylock() does not succeed then frees are in progress in the slab and
|
|
||||||
* we must stay away from it for a while since we may cause a bouncing
|
|
||||||
* cacheline if we try to acquire the lock. So go onto the next slab.
|
|
||||||
* If all pages are busy then we may allocate a new slab instead of reusing
|
|
||||||
* a partial slab. A new slab has no one operating on it and thus there is
|
|
||||||
* no danger of cacheline contention.
|
|
||||||
*
|
|
||||||
* Interrupts are disabled during allocation and deallocation in order to
|
* Interrupts are disabled during allocation and deallocation in order to
|
||||||
* make the slab allocator safe to use in the context of an irq. In addition
|
* make the slab allocator safe to use in the context of an irq. In addition
|
||||||
* interrupts are disabled to ensure that the processor does not change
|
* interrupts are disabled to ensure that the processor does not change
|
||||||
@ -342,6 +341,19 @@ static inline int oo_objects(struct kmem_cache_order_objects x)
|
|||||||
return x.x & OO_MASK;
|
return x.x & OO_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per slab locking using the pagelock
|
||||||
|
*/
|
||||||
|
static __always_inline void slab_lock(struct page *page)
|
||||||
|
{
|
||||||
|
bit_spin_lock(PG_locked, &page->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline void slab_unlock(struct page *page)
|
||||||
|
{
|
||||||
|
__bit_spin_unlock(PG_locked, &page->flags);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
|
static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
|
||||||
void *freelist_old, unsigned long counters_old,
|
void *freelist_old, unsigned long counters_old,
|
||||||
void *freelist_new, unsigned long counters_new,
|
void *freelist_new, unsigned long counters_new,
|
||||||
@ -356,11 +368,14 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
|
|||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
slab_lock(page);
|
||||||
if (page->freelist == freelist_old && page->counters == counters_old) {
|
if (page->freelist == freelist_old && page->counters == counters_old) {
|
||||||
page->freelist = freelist_new;
|
page->freelist = freelist_new;
|
||||||
page->counters = counters_new;
|
page->counters = counters_new;
|
||||||
|
slab_unlock(page);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
slab_unlock(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
@ -377,7 +392,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
|
|||||||
/*
|
/*
|
||||||
* Determine a map of object in use on a page.
|
* Determine a map of object in use on a page.
|
||||||
*
|
*
|
||||||
* Slab lock or node listlock must be held to guarantee that the page does
|
* Node listlock must be held to guarantee that the page does
|
||||||
* not vanish from under us.
|
* not vanish from under us.
|
||||||
*/
|
*/
|
||||||
static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
|
static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
|
||||||
@ -808,10 +823,11 @@ static int check_slab(struct kmem_cache *s, struct page *page)
|
|||||||
static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
|
static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
|
||||||
{
|
{
|
||||||
int nr = 0;
|
int nr = 0;
|
||||||
void *fp = page->freelist;
|
void *fp;
|
||||||
void *object = NULL;
|
void *object = NULL;
|
||||||
unsigned long max_objects;
|
unsigned long max_objects;
|
||||||
|
|
||||||
|
fp = page->freelist;
|
||||||
while (fp && nr <= page->objects) {
|
while (fp && nr <= page->objects) {
|
||||||
if (fp == search)
|
if (fp == search)
|
||||||
return 1;
|
return 1;
|
||||||
@ -1024,6 +1040,8 @@ bad:
|
|||||||
static noinline int free_debug_processing(struct kmem_cache *s,
|
static noinline int free_debug_processing(struct kmem_cache *s,
|
||||||
struct page *page, void *object, unsigned long addr)
|
struct page *page, void *object, unsigned long addr)
|
||||||
{
|
{
|
||||||
|
slab_lock(page);
|
||||||
|
|
||||||
if (!check_slab(s, page))
|
if (!check_slab(s, page))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
@ -1059,10 +1077,12 @@ static noinline int free_debug_processing(struct kmem_cache *s,
|
|||||||
set_track(s, object, TRACK_FREE, addr);
|
set_track(s, object, TRACK_FREE, addr);
|
||||||
trace(s, page, object, 0);
|
trace(s, page, object, 0);
|
||||||
init_object(s, object, SLUB_RED_INACTIVE);
|
init_object(s, object, SLUB_RED_INACTIVE);
|
||||||
|
slab_unlock(page);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
slab_fix(s, "Object at 0x%p not freed", object);
|
slab_fix(s, "Object at 0x%p not freed", object);
|
||||||
|
slab_unlock(page);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1393,27 +1413,6 @@ static void discard_slab(struct kmem_cache *s, struct page *page)
|
|||||||
free_slab(s, page);
|
free_slab(s, page);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Per slab locking using the pagelock
|
|
||||||
*/
|
|
||||||
static __always_inline void slab_lock(struct page *page)
|
|
||||||
{
|
|
||||||
bit_spin_lock(PG_locked, &page->flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __always_inline void slab_unlock(struct page *page)
|
|
||||||
{
|
|
||||||
__bit_spin_unlock(PG_locked, &page->flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __always_inline int slab_trylock(struct page *page)
|
|
||||||
{
|
|
||||||
int rc = 1;
|
|
||||||
|
|
||||||
rc = bit_spin_trylock(PG_locked, &page->flags);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Management of partially allocated slabs.
|
* Management of partially allocated slabs.
|
||||||
*
|
*
|
||||||
@ -1445,17 +1444,13 @@ static inline void remove_partial(struct kmem_cache_node *n,
|
|||||||
*
|
*
|
||||||
* Must hold list_lock.
|
* Must hold list_lock.
|
||||||
*/
|
*/
|
||||||
static inline int lock_and_freeze_slab(struct kmem_cache *s,
|
static inline int acquire_slab(struct kmem_cache *s,
|
||||||
struct kmem_cache_node *n, struct page *page)
|
struct kmem_cache_node *n, struct page *page)
|
||||||
{
|
{
|
||||||
void *freelist;
|
void *freelist;
|
||||||
unsigned long counters;
|
unsigned long counters;
|
||||||
struct page new;
|
struct page new;
|
||||||
|
|
||||||
|
|
||||||
if (!slab_trylock(page))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Zap the freelist and set the frozen bit.
|
* Zap the freelist and set the frozen bit.
|
||||||
* The old freelist is the list of objects for the
|
* The old freelist is the list of objects for the
|
||||||
@ -1491,7 +1486,6 @@ static inline int lock_and_freeze_slab(struct kmem_cache *s,
|
|||||||
*/
|
*/
|
||||||
printk(KERN_ERR "SLUB: %s : Page without available objects on"
|
printk(KERN_ERR "SLUB: %s : Page without available objects on"
|
||||||
" partial list\n", s->name);
|
" partial list\n", s->name);
|
||||||
slab_unlock(page);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1515,7 +1509,7 @@ static struct page *get_partial_node(struct kmem_cache *s,
|
|||||||
|
|
||||||
spin_lock(&n->list_lock);
|
spin_lock(&n->list_lock);
|
||||||
list_for_each_entry(page, &n->partial, lru)
|
list_for_each_entry(page, &n->partial, lru)
|
||||||
if (lock_and_freeze_slab(s, n, page))
|
if (acquire_slab(s, n, page))
|
||||||
goto out;
|
goto out;
|
||||||
page = NULL;
|
page = NULL;
|
||||||
out:
|
out:
|
||||||
@ -1804,8 +1798,6 @@ redo:
|
|||||||
"unfreezing slab"))
|
"unfreezing slab"))
|
||||||
goto redo;
|
goto redo;
|
||||||
|
|
||||||
slab_unlock(page);
|
|
||||||
|
|
||||||
if (lock)
|
if (lock)
|
||||||
spin_unlock(&n->list_lock);
|
spin_unlock(&n->list_lock);
|
||||||
|
|
||||||
@ -1819,7 +1811,6 @@ redo:
|
|||||||
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
|
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
|
||||||
{
|
{
|
||||||
stat(s, CPUSLAB_FLUSH);
|
stat(s, CPUSLAB_FLUSH);
|
||||||
slab_lock(c->page);
|
|
||||||
deactivate_slab(s, c);
|
deactivate_slab(s, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1968,7 +1959,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
|||||||
if (!page)
|
if (!page)
|
||||||
goto new_slab;
|
goto new_slab;
|
||||||
|
|
||||||
slab_lock(page);
|
|
||||||
if (unlikely(!node_match(c, node)))
|
if (unlikely(!node_match(c, node)))
|
||||||
goto another_slab;
|
goto another_slab;
|
||||||
|
|
||||||
@ -1994,8 +1984,6 @@ load_freelist:
|
|||||||
|
|
||||||
stat(s, ALLOC_REFILL);
|
stat(s, ALLOC_REFILL);
|
||||||
|
|
||||||
slab_unlock(page);
|
|
||||||
|
|
||||||
c->freelist = get_freepointer(s, object);
|
c->freelist = get_freepointer(s, object);
|
||||||
c->tid = next_tid(c->tid);
|
c->tid = next_tid(c->tid);
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
@ -2031,7 +2019,6 @@ new_slab:
|
|||||||
page->inuse = page->objects;
|
page->inuse = page->objects;
|
||||||
|
|
||||||
stat(s, ALLOC_SLAB);
|
stat(s, ALLOC_SLAB);
|
||||||
slab_lock(page);
|
|
||||||
c->node = page_to_nid(page);
|
c->node = page_to_nid(page);
|
||||||
c->page = page;
|
c->page = page;
|
||||||
goto load_freelist;
|
goto load_freelist;
|
||||||
@ -2205,7 +2192,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
|||||||
unsigned long uninitialized_var(flags);
|
unsigned long uninitialized_var(flags);
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
slab_lock(page);
|
|
||||||
stat(s, FREE_SLOWPATH);
|
stat(s, FREE_SLOWPATH);
|
||||||
|
|
||||||
if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
|
if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
|
||||||
@ -2271,7 +2257,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
|||||||
spin_unlock(&n->list_lock);
|
spin_unlock(&n->list_lock);
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
slab_unlock(page);
|
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -2285,7 +2270,6 @@ slab_empty:
|
|||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&n->list_lock);
|
spin_unlock(&n->list_lock);
|
||||||
slab_unlock(page);
|
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
stat(s, FREE_SLAB);
|
stat(s, FREE_SLAB);
|
||||||
discard_slab(s, page);
|
discard_slab(s, page);
|
||||||
@ -3202,14 +3186,8 @@ int kmem_cache_shrink(struct kmem_cache *s)
|
|||||||
* list_lock. page->inuse here is the upper limit.
|
* list_lock. page->inuse here is the upper limit.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry_safe(page, t, &n->partial, lru) {
|
list_for_each_entry_safe(page, t, &n->partial, lru) {
|
||||||
if (!page->inuse && slab_trylock(page)) {
|
if (!page->inuse) {
|
||||||
/*
|
|
||||||
* Must hold slab lock here because slab_free
|
|
||||||
* may have freed the last object and be
|
|
||||||
* waiting to release the slab.
|
|
||||||
*/
|
|
||||||
remove_partial(n, page);
|
remove_partial(n, page);
|
||||||
slab_unlock(page);
|
|
||||||
discard_slab(s, page);
|
discard_slab(s, page);
|
||||||
} else {
|
} else {
|
||||||
list_move(&page->lru,
|
list_move(&page->lru,
|
||||||
@ -3797,12 +3775,9 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
|
|||||||
static void validate_slab_slab(struct kmem_cache *s, struct page *page,
|
static void validate_slab_slab(struct kmem_cache *s, struct page *page,
|
||||||
unsigned long *map)
|
unsigned long *map)
|
||||||
{
|
{
|
||||||
if (slab_trylock(page)) {
|
slab_lock(page);
|
||||||
validate_slab(s, page, map);
|
validate_slab(s, page, map);
|
||||||
slab_unlock(page);
|
slab_unlock(page);
|
||||||
} else
|
|
||||||
printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
|
|
||||||
s->name, page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int validate_slab_node(struct kmem_cache *s,
|
static int validate_slab_node(struct kmem_cache *s,
|
||||||
|
Loading…
Reference in New Issue
Block a user