linux/lib/stackdepot.c
Andrey Ryabinin b417170cb4 stackdepot: respect __GFP_NOLOCKDEP allocation flag
commit 6fe60465e1 upstream.

If stack_depot_save_flags() allocates memory it always drops
__GFP_NOLOCKDEP flag.  So when KASAN tries to track __GFP_NOLOCKDEP
allocation we may end up with lockdep splat like bellow:

======================================================
 WARNING: possible circular locking dependency detected
 6.9.0-rc3+ #49 Not tainted
 ------------------------------------------------------
 kswapd0/149 is trying to acquire lock:
 ffff88811346a920
(&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590
[xfs]

 but task is already holding lock:
 ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at:
balance_pgdat+0x5d9/0xad0

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:
 -> #1 (fs_reclaim){+.+.}-{0:0}:
        __lock_acquire+0x7da/0x1030
        lock_acquire+0x15d/0x400
        fs_reclaim_acquire+0xb5/0x100
 prepare_alloc_pages.constprop.0+0xc5/0x230
        __alloc_pages+0x12a/0x3f0
        alloc_pages_mpol+0x175/0x340
        stack_depot_save_flags+0x4c5/0x510
        kasan_save_stack+0x30/0x40
        kasan_save_track+0x10/0x30
        __kasan_slab_alloc+0x83/0x90
        kmem_cache_alloc+0x15e/0x4a0
        __alloc_object+0x35/0x370
        __create_object+0x22/0x90
 __kmalloc_node_track_caller+0x477/0x5b0
        krealloc+0x5f/0x110
        xfs_iext_insert_raw+0x4b2/0x6e0 [xfs]
        xfs_iext_insert+0x2e/0x130 [xfs]
        xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs]
        xfs_btree_visit_block+0xfb/0x290 [xfs]
        xfs_btree_visit_blocks+0x215/0x2c0 [xfs]
        xfs_iread_extents+0x1a2/0x2e0 [xfs]
 xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs]
        iomap_iter+0x1d1/0x2d0
 iomap_file_buffered_write+0x120/0x1a0
        xfs_file_buffered_write+0x128/0x4b0 [xfs]
        vfs_write+0x675/0x890
        ksys_write+0xc3/0x160
        do_syscall_64+0x94/0x170
 entry_SYSCALL_64_after_hwframe+0x71/0x79

Always preserve __GFP_NOLOCKDEP to fix this.

Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com
Fixes: cd11016e5f ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB")
Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Reported-by: Xiubo Li <xiubli@redhat.com>
Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/
Reported-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/
Suggested-by: Dave Chinner <david@fromorbit.com>
Tested-by: Xiubo Li <xiubli@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-05-02 16:35:31 +02:00

809 lines
22 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Stack depot - a stack trace storage that avoids duplication.
*
* Internally, stack depot maintains a hash table of unique stacktraces. The
* stack traces themselves are stored contiguously one after another in a set
* of separate page allocations.
*
* Author: Alexander Potapenko <glider@google.com>
* Copyright (C) 2016 Google, Inc.
*
* Based on the code by Dmitry Chernenkov.
*/
#define pr_fmt(fmt) "stackdepot: " fmt
#include <linux/debugfs.h>
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <linux/kernel.h>
#include <linux/kmsan.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/poison.h>
#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stacktrace.h>
#include <linux/stackdepot.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/memblock.h>
#include <linux/kasan-enabled.h>
#define DEPOT_POOLS_CAP 8192
/* The pool_index is offset by 1 so the first record does not have a 0 handle. */
#define DEPOT_MAX_POOLS \
(((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \
(1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP)
static bool stack_depot_disabled;
static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
static bool __stack_depot_early_init_passed __initdata;
/* Use one hash table bucket per 16 KB of memory. */
#define STACK_HASH_TABLE_SCALE 14
/* Limit the number of buckets between 4K and 1M. */
#define STACK_BUCKET_NUMBER_ORDER_MIN 12
#define STACK_BUCKET_NUMBER_ORDER_MAX 20
/* Initial seed for jhash2. */
#define STACK_HASH_SEED 0x9747b28c
/* Hash table of stored stack records. */
static struct list_head *stack_table;
/* Fixed order of the number of table buckets. Used when KASAN is enabled. */
static unsigned int stack_bucket_number_order;
/* Hash mask for indexing the table. */
static unsigned int stack_hash_mask;
/* Array of memory regions that store stack records. */
static void *stack_pools[DEPOT_MAX_POOLS];
/* Newly allocated pool that is not yet added to stack_pools. */
static void *new_pool;
/* Number of pools in stack_pools. */
static int pools_num;
/* Offset to the unused space in the currently used pool. */
static size_t pool_offset = DEPOT_POOL_SIZE;
/* Freelist of stack records within stack_pools. */
static LIST_HEAD(free_stacks);
/* The lock must be held when performing pool or freelist modifications. */
static DEFINE_RAW_SPINLOCK(pool_lock);
/* Statistics counters for debugfs. */
enum depot_counter_id {
DEPOT_COUNTER_REFD_ALLOCS,
DEPOT_COUNTER_REFD_FREES,
DEPOT_COUNTER_REFD_INUSE,
DEPOT_COUNTER_FREELIST_SIZE,
DEPOT_COUNTER_PERSIST_COUNT,
DEPOT_COUNTER_PERSIST_BYTES,
DEPOT_COUNTER_COUNT,
};
static long counters[DEPOT_COUNTER_COUNT];
static const char *const counter_names[] = {
[DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations",
[DEPOT_COUNTER_REFD_FREES] = "refcounted_frees",
[DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use",
[DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size",
[DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count",
[DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes",
};
static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
static int __init disable_stack_depot(char *str)
{
return kstrtobool(str, &stack_depot_disabled);
}
early_param("stack_depot_disable", disable_stack_depot);
void __init stack_depot_request_early_init(void)
{
/* Too late to request early init now. */
WARN_ON(__stack_depot_early_init_passed);
__stack_depot_early_init_requested = true;
}
/* Initialize list_head's within the hash table. */
static void init_stack_table(unsigned long entries)
{
unsigned long i;
for (i = 0; i < entries; i++)
INIT_LIST_HEAD(&stack_table[i]);
}
/* Allocates a hash table via memblock. Can only be used during early boot. */
int __init stack_depot_early_init(void)
{
unsigned long entries = 0;
/* This function must be called only once, from mm_init(). */
if (WARN_ON(__stack_depot_early_init_passed))
return 0;
__stack_depot_early_init_passed = true;
/*
* Print disabled message even if early init has not been requested:
* stack_depot_init() will not print one.
*/
if (stack_depot_disabled) {
pr_info("disabled\n");
return 0;
}
/*
* If KASAN is enabled, use the maximum order: KASAN is frequently used
* in fuzzing scenarios, which leads to a large number of different
* stack traces being stored in stack depot.
*/
if (kasan_enabled() && !stack_bucket_number_order)
stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX;
/*
* Check if early init has been requested after setting
* stack_bucket_number_order: stack_depot_init() uses its value.
*/
if (!__stack_depot_early_init_requested)
return 0;
/*
* If stack_bucket_number_order is not set, leave entries as 0 to rely
* on the automatic calculations performed by alloc_large_system_hash().
*/
if (stack_bucket_number_order)
entries = 1UL << stack_bucket_number_order;
pr_info("allocating hash table via alloc_large_system_hash\n");
stack_table = alloc_large_system_hash("stackdepot",
sizeof(struct list_head),
entries,
STACK_HASH_TABLE_SCALE,
HASH_EARLY,
NULL,
&stack_hash_mask,
1UL << STACK_BUCKET_NUMBER_ORDER_MIN,
1UL << STACK_BUCKET_NUMBER_ORDER_MAX);
if (!stack_table) {
pr_err("hash table allocation failed, disabling\n");
stack_depot_disabled = true;
return -ENOMEM;
}
if (!entries) {
/*
* Obtain the number of entries that was calculated by
* alloc_large_system_hash().
*/
entries = stack_hash_mask + 1;
}
init_stack_table(entries);
return 0;
}
/* Allocates a hash table via kvcalloc. Can be used after boot. */
int stack_depot_init(void)
{
static DEFINE_MUTEX(stack_depot_init_mutex);
unsigned long entries;
int ret = 0;
mutex_lock(&stack_depot_init_mutex);
if (stack_depot_disabled || stack_table)
goto out_unlock;
/*
* Similarly to stack_depot_early_init, use stack_bucket_number_order
* if assigned, and rely on automatic scaling otherwise.
*/
if (stack_bucket_number_order) {
entries = 1UL << stack_bucket_number_order;
} else {
int scale = STACK_HASH_TABLE_SCALE;
entries = nr_free_buffer_pages();
entries = roundup_pow_of_two(entries);
if (scale > PAGE_SHIFT)
entries >>= (scale - PAGE_SHIFT);
else
entries <<= (PAGE_SHIFT - scale);
}
if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN)
entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN;
if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX)
entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX;
pr_info("allocating hash table of %lu entries via kvcalloc\n", entries);
stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL);
if (!stack_table) {
pr_err("hash table allocation failed, disabling\n");
stack_depot_disabled = true;
ret = -ENOMEM;
goto out_unlock;
}
stack_hash_mask = entries - 1;
init_stack_table(entries);
out_unlock:
mutex_unlock(&stack_depot_init_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(stack_depot_init);
/*
* Initializes new stack pool, and updates the list of pools.
*/
static bool depot_init_pool(void **prealloc)
{
lockdep_assert_held(&pool_lock);
if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
/* Bail out if we reached the pool limit. */
WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */
WARN_ONCE(1, "Stack depot reached limit capacity");
return false;
}
if (!new_pool && *prealloc) {
/* We have preallocated memory, use it. */
WRITE_ONCE(new_pool, *prealloc);
*prealloc = NULL;
}
if (!new_pool)
return false; /* new_pool and *prealloc are NULL */
/* Save reference to the pool to be used by depot_fetch_stack(). */
stack_pools[pools_num] = new_pool;
/*
* Stack depot tries to keep an extra pool allocated even before it runs
* out of space in the currently used pool.
*
* To indicate that a new preallocation is needed new_pool is reset to
* NULL; do not reset to NULL if we have reached the maximum number of
* pools.
*/
if (pools_num < DEPOT_MAX_POOLS)
WRITE_ONCE(new_pool, NULL);
else
WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
/* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
WRITE_ONCE(pools_num, pools_num + 1);
ASSERT_EXCLUSIVE_WRITER(pools_num);
pool_offset = 0;
return true;
}
/* Keeps the preallocated memory to be used for a new stack depot pool. */
static void depot_keep_new_pool(void **prealloc)
{
lockdep_assert_held(&pool_lock);
/*
* If a new pool is already saved or the maximum number of
* pools is reached, do not use the preallocated memory.
*/
if (new_pool)
return;
WRITE_ONCE(new_pool, *prealloc);
*prealloc = NULL;
}
/*
* Try to initialize a new stack record from the current pool, a cached pool, or
* the current pre-allocation.
*/
static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
{
struct stack_record *stack;
void *current_pool;
u32 pool_index;
lockdep_assert_held(&pool_lock);
if (pool_offset + size > DEPOT_POOL_SIZE) {
if (!depot_init_pool(prealloc))
return NULL;
}
if (WARN_ON_ONCE(pools_num < 1))
return NULL;
pool_index = pools_num - 1;
current_pool = stack_pools[pool_index];
if (WARN_ON_ONCE(!current_pool))
return NULL;
stack = current_pool + pool_offset;
/* Pre-initialize handle once. */
stack->handle.pool_index_plus_1 = pool_index + 1;
stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
stack->handle.extra = 0;
INIT_LIST_HEAD(&stack->hash_list);
pool_offset += size;
return stack;
}
/* Try to find next free usable entry from the freelist. */
static struct stack_record *depot_pop_free(void)
{
struct stack_record *stack;
lockdep_assert_held(&pool_lock);
if (list_empty(&free_stacks))
return NULL;
/*
* We maintain the invariant that the elements in front are least
* recently used, and are therefore more likely to be associated with an
* RCU grace period in the past. Consequently it is sufficient to only
* check the first entry.
*/
stack = list_first_entry(&free_stacks, struct stack_record, free_list);
if (!poll_state_synchronize_rcu(stack->rcu_state))
return NULL;
list_del(&stack->free_list);
counters[DEPOT_COUNTER_FREELIST_SIZE]--;
return stack;
}
static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries)
{
const size_t used = flex_array_size(s, entries, nr_entries);
const size_t unused = sizeof(s->entries) - used;
WARN_ON_ONCE(sizeof(s->entries) < used);
return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN);
}
/* Allocates a new stack in a stack depot pool. */
static struct stack_record *
depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc)
{
struct stack_record *stack = NULL;
size_t record_size;
lockdep_assert_held(&pool_lock);
/* This should already be checked by public API entry points. */
if (WARN_ON_ONCE(!nr_entries))
return NULL;
/* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES)
nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES;
if (flags & STACK_DEPOT_FLAG_GET) {
/*
* Evictable entries have to allocate the max. size so they may
* safely be re-used by differently sized allocations.
*/
record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES);
stack = depot_pop_free();
} else {
record_size = depot_stack_record_size(stack, nr_entries);
}
if (!stack) {
stack = depot_pop_free_pool(prealloc, record_size);
if (!stack)
return NULL;
}
/* Save the stack trace. */
stack->hash = hash;
stack->size = nr_entries;
/* stack->handle is already filled in by depot_pop_free_pool(). */
memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries));
if (flags & STACK_DEPOT_FLAG_GET) {
refcount_set(&stack->count, 1);
counters[DEPOT_COUNTER_REFD_ALLOCS]++;
counters[DEPOT_COUNTER_REFD_INUSE]++;
} else {
/* Warn on attempts to switch to refcounting this entry. */
refcount_set(&stack->count, REFCOUNT_SATURATED);
counters[DEPOT_COUNTER_PERSIST_COUNT]++;
counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size;
}
/*
* Let KMSAN know the stored stack record is initialized. This shall
* prevent false positive reports if instrumented code accesses it.
*/
kmsan_unpoison_memory(stack, record_size);
return stack;
}
static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
{
const int pools_num_cached = READ_ONCE(pools_num);
union handle_parts parts = { .handle = handle };
void *pool;
u32 pool_index = parts.pool_index_plus_1 - 1;
size_t offset = parts.offset << DEPOT_STACK_ALIGN;
struct stack_record *stack;
lockdep_assert_not_held(&pool_lock);
if (pool_index >= pools_num_cached) {
WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
pool_index, pools_num_cached, handle);
return NULL;
}
pool = stack_pools[pool_index];
if (WARN_ON(!pool))
return NULL;
stack = pool + offset;
if (WARN_ON(!refcount_read(&stack->count)))
return NULL;
return stack;
}
/* Links stack into the freelist. */
static void depot_free_stack(struct stack_record *stack)
{
unsigned long flags;
lockdep_assert_not_held(&pool_lock);
raw_spin_lock_irqsave(&pool_lock, flags);
printk_deferred_enter();
/*
* Remove the entry from the hash list. Concurrent list traversal may
* still observe the entry, but since the refcount is zero, this entry
* will no longer be considered as valid.
*/
list_del_rcu(&stack->hash_list);
/*
* Due to being used from constrained contexts such as the allocators,
* NMI, or even RCU itself, stack depot cannot rely on primitives that
* would sleep (such as synchronize_rcu()) or recursively call into
* stack depot again (such as call_rcu()).
*
* Instead, get an RCU cookie, so that we can ensure this entry isn't
* moved onto another list until the next grace period, and concurrent
* RCU list traversal remains safe.
*/
stack->rcu_state = get_state_synchronize_rcu();
/*
* Add the entry to the freelist tail, so that older entries are
* considered first - their RCU cookie is more likely to no longer be
* associated with the current grace period.
*/
list_add_tail(&stack->free_list, &free_stacks);
counters[DEPOT_COUNTER_FREELIST_SIZE]++;
counters[DEPOT_COUNTER_REFD_FREES]++;
counters[DEPOT_COUNTER_REFD_INUSE]--;
printk_deferred_exit();
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
/* Calculates the hash for a stack. */
static inline u32 hash_stack(unsigned long *entries, unsigned int size)
{
return jhash2((u32 *)entries,
array_size(size, sizeof(*entries)) / sizeof(u32),
STACK_HASH_SEED);
}
/*
* Non-instrumented version of memcmp().
* Does not check the lexicographical order, only the equality.
*/
static inline
int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
unsigned int n)
{
for ( ; n-- ; u1++, u2++) {
if (*u1 != *u2)
return 1;
}
return 0;
}
/* Finds a stack in a bucket of the hash table. */
static inline struct stack_record *find_stack(struct list_head *bucket,
unsigned long *entries, int size,
u32 hash, depot_flags_t flags)
{
struct stack_record *stack, *ret = NULL;
/*
* Stack depot may be used from instrumentation that instruments RCU or
* tracing itself; use variant that does not call into RCU and cannot be
* traced.
*
* Note: Such use cases must take care when using refcounting to evict
* unused entries, because the stack record free-then-reuse code paths
* do call into RCU.
*/
rcu_read_lock_sched_notrace();
list_for_each_entry_rcu(stack, bucket, hash_list) {
if (stack->hash != hash || stack->size != size)
continue;
/*
* This may race with depot_free_stack() accessing the freelist
* management state unioned with @entries. The refcount is zero
* in that case and the below refcount_inc_not_zero() will fail.
*/
if (data_race(stackdepot_memcmp(entries, stack->entries, size)))
continue;
/*
* Try to increment refcount. If this succeeds, the stack record
* is valid and has not yet been freed.
*
* If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior
* to then call stack_depot_put() later, and we can assume that
* a stack record is never placed back on the freelist.
*/
if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count))
continue;
ret = stack;
break;
}
rcu_read_unlock_sched_notrace();
return ret;
}
depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
unsigned int nr_entries,
gfp_t alloc_flags,
depot_flags_t depot_flags)
{
struct list_head *bucket;
struct stack_record *found = NULL;
depot_stack_handle_t handle = 0;
struct page *page = NULL;
void *prealloc = NULL;
bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC;
unsigned long flags;
u32 hash;
if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK))
return 0;
/*
* If this stack trace is from an interrupt, including anything before
* interrupt entry usually leads to unbounded stack depot growth.
*
* Since use of filter_irq_stacks() is a requirement to ensure stack
* depot can efficiently deduplicate interrupt stacks, always
* filter_irq_stacks() to simplify all callers' use of stack depot.
*/
nr_entries = filter_irq_stacks(entries, nr_entries);
if (unlikely(nr_entries == 0) || stack_depot_disabled)
return 0;
hash = hash_stack(entries, nr_entries);
bucket = &stack_table[hash & stack_hash_mask];
/* Fast path: look the stack trace up without locking. */
found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
if (found)
goto exit;
/*
* Allocate memory for a new pool if required now:
* we won't be able to do that under the lock.
*/
if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
/*
* Zero out zone modifiers, as we don't have specific zone
* requirements. Keep the flags related to allocation in atomic
* contexts, I/O, nolockdep.
*/
alloc_flags &= ~GFP_ZONEMASK;
alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP);
alloc_flags |= __GFP_NOWARN;
page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
if (page)
prealloc = page_address(page);
}
raw_spin_lock_irqsave(&pool_lock, flags);
printk_deferred_enter();
/* Try to find again, to avoid concurrently inserting duplicates. */
found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
if (!found) {
struct stack_record *new =
depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc);
if (new) {
/*
* This releases the stack record into the bucket and
* makes it visible to readers in find_stack().
*/
list_add_rcu(&new->hash_list, bucket);
found = new;
}
}
if (prealloc) {
/*
* Either stack depot already contains this stack trace, or
* depot_alloc_stack() did not consume the preallocated memory.
* Try to keep the preallocated memory for future.
*/
depot_keep_new_pool(&prealloc);
}
printk_deferred_exit();
raw_spin_unlock_irqrestore(&pool_lock, flags);
exit:
if (prealloc) {
/* Stack depot didn't use this memory, free it. */
free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER);
}
if (found)
handle = found->handle.handle;
return handle;
}
EXPORT_SYMBOL_GPL(stack_depot_save_flags);
depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
gfp_t alloc_flags)
{
return stack_depot_save_flags(entries, nr_entries, alloc_flags,
STACK_DEPOT_FLAG_CAN_ALLOC);
}
EXPORT_SYMBOL_GPL(stack_depot_save);
unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries)
{
struct stack_record *stack;
*entries = NULL;
/*
* Let KMSAN know *entries is initialized. This shall prevent false
* positive reports if instrumented code accesses it.
*/
kmsan_unpoison_memory(entries, sizeof(*entries));
if (!handle || stack_depot_disabled)
return 0;
stack = depot_fetch_stack(handle);
/*
* Should never be NULL, otherwise this is a use-after-put (or just a
* corrupt handle).
*/
if (WARN(!stack, "corrupt handle or use after stack_depot_put()"))
return 0;
*entries = stack->entries;
return stack->size;
}
EXPORT_SYMBOL_GPL(stack_depot_fetch);
void stack_depot_put(depot_stack_handle_t handle)
{
struct stack_record *stack;
if (!handle || stack_depot_disabled)
return;
stack = depot_fetch_stack(handle);
/*
* Should always be able to find the stack record, otherwise this is an
* unbalanced put attempt (or corrupt handle).
*/
if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()"))
return;
if (refcount_dec_and_test(&stack->count))
depot_free_stack(stack);
}
EXPORT_SYMBOL_GPL(stack_depot_put);
void stack_depot_print(depot_stack_handle_t stack)
{
unsigned long *entries;
unsigned int nr_entries;
nr_entries = stack_depot_fetch(stack, &entries);
if (nr_entries > 0)
stack_trace_print(entries, nr_entries, 0);
}
EXPORT_SYMBOL_GPL(stack_depot_print);
int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
int spaces)
{
unsigned long *entries;
unsigned int nr_entries;
nr_entries = stack_depot_fetch(handle, &entries);
return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
spaces) : 0;
}
EXPORT_SYMBOL_GPL(stack_depot_snprint);
depot_stack_handle_t __must_check stack_depot_set_extra_bits(
depot_stack_handle_t handle, unsigned int extra_bits)
{
union handle_parts parts = { .handle = handle };
/* Don't set extra bits on empty handles. */
if (!handle)
return 0;
parts.extra = extra_bits;
return parts.handle;
}
EXPORT_SYMBOL(stack_depot_set_extra_bits);
unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
{
union handle_parts parts = { .handle = handle };
return parts.extra;
}
EXPORT_SYMBOL(stack_depot_get_extra_bits);
static int stats_show(struct seq_file *seq, void *v)
{
/*
* data race ok: These are just statistics counters, and approximate
* statistics are ok for debugging.
*/
seq_printf(seq, "pools: %d\n", data_race(pools_num));
for (int i = 0; i < DEPOT_COUNTER_COUNT; i++)
seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i]));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(stats);
static int depot_debugfs_init(void)
{
struct dentry *dir;
if (stack_depot_disabled)
return 0;
dir = debugfs_create_dir("stackdepot", NULL);
debugfs_create_file("stats", 0444, dir, NULL, &stats_fops);
return 0;
}
late_initcall(depot_debugfs_init);