mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-01 16:14:13 +08:00
xfs: can't use kmem_zalloc() for attribute buffers
Because heap allocation of 64kB buffers will fail: .... XFS: fs_mark(8414) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8417) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8409) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8428) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8430) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8437) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8433) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8406) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8412) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8432) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8424) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) .... I'd use kvmalloc() instead, but.... - 48.19% xfs_attr_create_intent - 46.89% xfs_attri_init - kvmalloc_node - 46.04% __kmalloc_node - kmalloc_large_node - 45.99% __alloc_pages - 39.39% __alloc_pages_slowpath.constprop.0 - 38.89% __alloc_pages_direct_compact - 38.71% try_to_compact_pages - compact_zone_order - compact_zone - 21.09% isolate_migratepages_block 10.31% PageHuge 5.82% set_pfnblock_flags_mask 0.86% get_pfnblock_flags_mask - 4.48% __reset_isolation_suitable 4.44% __reset_isolation_pfn - 3.56% __pageblock_pfn_to_page 1.33% pfn_to_online_page 2.83% get_pfnblock_flags_mask - 0.87% migrate_pages 0.86% compaction_alloc 0.84% find_suitable_fallback - 6.60% get_page_from_freelist 4.99% clear_page_erms - 1.19% _raw_spin_lock_irqsave - do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.86% __vmalloc_node_range 0.65% __alloc_pages_bulk .... this is just yet another reminder of how much kvmalloc() sucks. So lift xlog_cil_kvmalloc(), rename it to xlog_kvmalloc() and use that instead.... We also clean up the attribute name and value lengths as they no longer need to be rounded out to sizes compatible with log vectors. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Allison Henderson <allison.henderson@oracle.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
51e6104fdb
commit
45ff8b471c
@ -44,7 +44,7 @@ xfs_attri_item_free(
|
||||
struct xfs_attri_log_item *attrip)
|
||||
{
|
||||
kmem_free(attrip->attri_item.li_lv_shadow);
|
||||
kmem_free(attrip);
|
||||
kvfree(attrip);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -119,11 +119,11 @@ xfs_attri_item_format(
|
||||
sizeof(struct xfs_attri_log_format));
|
||||
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME,
|
||||
attrip->attri_name,
|
||||
xlog_calc_iovec_len(attrip->attri_name_len));
|
||||
attrip->attri_name_len);
|
||||
if (attrip->attri_value_len > 0)
|
||||
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE,
|
||||
attrip->attri_value,
|
||||
xlog_calc_iovec_len(attrip->attri_value_len));
|
||||
attrip->attri_value_len);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -163,26 +163,21 @@ xfs_attri_init(
|
||||
|
||||
{
|
||||
struct xfs_attri_log_item *attrip;
|
||||
uint32_t name_vec_len = 0;
|
||||
uint32_t value_vec_len = 0;
|
||||
uint32_t buffer_size;
|
||||
|
||||
if (name_len)
|
||||
name_vec_len = xlog_calc_iovec_len(name_len);
|
||||
if (value_len)
|
||||
value_vec_len = xlog_calc_iovec_len(value_len);
|
||||
|
||||
buffer_size = name_vec_len + value_vec_len;
|
||||
uint32_t buffer_size = name_len + value_len;
|
||||
|
||||
if (buffer_size) {
|
||||
attrip = kmem_zalloc(sizeof(struct xfs_attri_log_item) +
|
||||
buffer_size, KM_NOFS);
|
||||
if (attrip == NULL)
|
||||
return NULL;
|
||||
/*
|
||||
* This could be over 64kB in length, so we have to use
|
||||
* kvmalloc() for this. But kvmalloc() utterly sucks, so we
|
||||
* use own version.
|
||||
*/
|
||||
attrip = xlog_kvmalloc(sizeof(struct xfs_attri_log_item) +
|
||||
buffer_size);
|
||||
} else {
|
||||
attrip = kmem_cache_zalloc(xfs_attri_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
attrip = kmem_cache_alloc(xfs_attri_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
}
|
||||
memset(attrip, 0, sizeof(struct xfs_attri_log_item));
|
||||
|
||||
attrip->attri_name_len = name_len;
|
||||
if (name_len)
|
||||
@ -195,7 +190,7 @@ xfs_attri_init(
|
||||
if (value_len)
|
||||
attrip->attri_value = ((char *)attrip) +
|
||||
sizeof(struct xfs_attri_log_item) +
|
||||
name_vec_len;
|
||||
name_len;
|
||||
else
|
||||
attrip->attri_value = NULL;
|
||||
|
||||
|
@ -134,39 +134,6 @@ xlog_cil_iovec_space(
|
||||
sizeof(uint64_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* shadow buffers can be large, so we need to use kvmalloc() here to ensure
|
||||
* success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts to fall
|
||||
* back to vmalloc, so we can't actually do anything useful with gfp flags to
|
||||
* control the kmalloc() behaviour within kvmalloc(). Hence kmalloc() will do
|
||||
* direct reclaim and compaction in the slow path, both of which are
|
||||
* horrendously expensive. We just want kmalloc to fail fast and fall back to
|
||||
* vmalloc if it can't get somethign straight away from the free lists or buddy
|
||||
* allocator. Hence we have to open code kvmalloc outselves here.
|
||||
*
|
||||
* Also, we are in memalloc_nofs_save task context here, so despite the use of
|
||||
* GFP_KERNEL here, we are actually going to be doing GFP_NOFS allocations. This
|
||||
* is actually the only way to make vmalloc() do GFP_NOFS allocations, so lets
|
||||
* just all pretend this is a GFP_KERNEL context operation....
|
||||
*/
|
||||
static inline void *
|
||||
xlog_cil_kvmalloc(
|
||||
size_t buf_size)
|
||||
{
|
||||
gfp_t flags = GFP_KERNEL;
|
||||
void *p;
|
||||
|
||||
flags &= ~__GFP_DIRECT_RECLAIM;
|
||||
flags |= __GFP_NOWARN | __GFP_NORETRY;
|
||||
do {
|
||||
p = kmalloc(buf_size, flags);
|
||||
if (!p)
|
||||
p = vmalloc(buf_size);
|
||||
} while (!p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate or pin log vector buffers for CIL insertion.
|
||||
*
|
||||
@ -283,7 +250,7 @@ xlog_cil_alloc_shadow_bufs(
|
||||
* storage.
|
||||
*/
|
||||
kmem_free(lip->li_lv_shadow);
|
||||
lv = xlog_cil_kvmalloc(buf_size);
|
||||
lv = xlog_kvmalloc(buf_size);
|
||||
|
||||
memset(lv, 0, xlog_cil_iovec_space(niovecs));
|
||||
|
||||
|
@ -651,4 +651,38 @@ xlog_valid_lsn(
|
||||
return valid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Log vector and shadow buffers can be large, so we need to use kvmalloc() here
|
||||
* to ensure success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts
|
||||
* to fall back to vmalloc, so we can't actually do anything useful with gfp
|
||||
* flags to control the kmalloc() behaviour within kvmalloc(). Hence kmalloc()
|
||||
* will do direct reclaim and compaction in the slow path, both of which are
|
||||
* horrendously expensive. We just want kmalloc to fail fast and fall back to
|
||||
* vmalloc if it can't get somethign straight away from the free lists or
|
||||
* buddy allocator. Hence we have to open code kvmalloc outselves here.
|
||||
*
|
||||
* This assumes that the caller uses memalloc_nofs_save task context here, so
|
||||
* despite the use of GFP_KERNEL here, we are going to be doing GFP_NOFS
|
||||
* allocations. This is actually the only way to make vmalloc() do GFP_NOFS
|
||||
* allocations, so lets just all pretend this is a GFP_KERNEL context
|
||||
* operation....
|
||||
*/
|
||||
static inline void *
|
||||
xlog_kvmalloc(
|
||||
size_t buf_size)
|
||||
{
|
||||
gfp_t flags = GFP_KERNEL;
|
||||
void *p;
|
||||
|
||||
flags &= ~__GFP_DIRECT_RECLAIM;
|
||||
flags |= __GFP_NOWARN | __GFP_NORETRY;
|
||||
do {
|
||||
p = kmalloc(buf_size, flags);
|
||||
if (!p)
|
||||
p = vmalloc(buf_size);
|
||||
} while (!p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
#endif /* __XFS_LOG_PRIV_H__ */
|
||||
|
Loading…
Reference in New Issue
Block a user